1 #ifndef NOISEQPROFILE_H_
2 #define NOISEQPROFILE_H_
17 memset(c, 0, sizeof(c));
18 memset(p, 0, sizeof(p));
21 NoiseQProfile& operator=(const NoiseQProfile&);
24 void updateC(const std::string&, const std::string&);
25 void update(const std::string&, const std::string&, double frac);
27 void calcInitParams();
29 double getProb(const std::string&, const std::string&);
30 double getLogP() { return logp; }
32 void collect(const NoiseQProfile&);
37 void startSimulation();
38 std::string simulate(simul*, int, const std::string&);
39 void finishSimulation();
42 static const int NCODES = 5; // number of possible codes
43 static const int SIZE = 100;
45 double logp; //log prob;
46 double c[SIZE][NCODES]; //counts in N0;
47 double p[SIZE][NCODES]; //p[q][c] = p(c|q)
49 int c2q(char c) { assert(c >= 33 && c <= 126); return c - 33; }
51 double (*pc)[NCODES]; // for simulation
54 NoiseQProfile& NoiseQProfile::operator=(const NoiseQProfile& rv) {
55 if (this == &rv) return *this;
57 memcpy(c, rv.c, sizeof(rv.c));
58 memcpy(p, rv.p, sizeof(rv.p));
62 void NoiseQProfile::init() {
63 memset(p, 0, sizeof(p));
66 void NoiseQProfile::updateC(const std::string& readseq, const std::string& qual) {
67 int len = readseq.size();
68 for (int i = 0; i < len; i++) {
69 ++c[c2q(qual[i])][get_base_id(readseq[i])];
73 void NoiseQProfile::update(const std::string& readseq, const std::string& qual, double frac) {
74 int len = readseq.size();
75 for (int i = 0; i < len; i++) {
76 p[c2q(qual[i])][get_base_id(readseq[i])] += frac;
80 void NoiseQProfile::finish() {
83 //If N0 is 0, p(c|q) = 0 for all c, q
85 for (int i = 0; i < SIZE; i++) {
87 for (int j = 0; j < NCODES; j++) sum += (p[i][j] + c[i][j]);
88 if (sum <= 0.0) continue;
89 //if (isZero(sum)) continue;
90 for (int j = 0; j < NCODES; j++) {
91 p[i][j] = (p[i][j] + c[i][j]) /sum;
92 if (c[i][j] > 0.0) { logp += c[i][j] * log(p[i][j]); }
97 //make init parameters not zero
98 void NoiseQProfile::calcInitParams() {
102 for (int i = 0; i < SIZE; i++) {
104 for (int j = 0; j < NCODES; j++) sum += (1.0 + c[i][j]); // 1.0 pseudo count
105 for (int j = 0; j < NCODES; j++) {
106 p[i][j] = (c[i][j] + 1.0) / sum;
107 if (c[i][j] > 0.0) { logp += c[i][j] * log(p[i][j]); }
112 double NoiseQProfile::getProb(const std::string& readseq, const std::string& qual) {
114 int len = readseq.size();
116 for (int i = 0; i < len; i++) {
117 prob *= p[c2q(qual[i])][get_base_id(readseq[i])];
123 void NoiseQProfile::collect(const NoiseQProfile& o) {
124 for (int i = 0; i < SIZE; i++) {
125 for (int j = 0; j < NCODES; j++)
126 p[i][j] += o.p[i][j];
130 //If read from file, assume do not need to estimate from data
131 void NoiseQProfile::read(FILE *fi) {
132 int tmp_size, tmp_ncodes;
135 memset(c, 0, sizeof(c));
139 fscanf(fi, "%d %d", &tmp_size, &tmp_ncodes);
140 assert(tmp_size == SIZE && tmp_ncodes == NCODES);
141 for (int i = 0; i < SIZE; i++) {
142 for (int j = 0; j < NCODES; j++) {
143 fscanf(fi, "%lf", &p[i][j]);
145 //if (c[i][j] > 0) logp += c[i][j] * log(p[i][j]);
151 void NoiseQProfile::write(FILE *fo) {
152 fprintf(fo, "%d %d\n", SIZE, NCODES);
153 for (int i = 0; i < SIZE; i++) {
154 for (int j = 0; j < NCODES - 1; j++) { fprintf(fo, "%.10g ", p[i][j]); }
155 fprintf(fo, "%.10g\n", p[i][NCODES - 1]);
159 void NoiseQProfile::startSimulation() {
160 pc = new double[SIZE][NCODES];
162 for (int i = 0; i < SIZE; i++)
163 for (int j = 0; j < NCODES; j++) {
165 if (j > 0) pc[i][j] += pc[i][j - 1];
169 std::string NoiseQProfile::simulate(simul* sampler, int len, const std::string& qual) {
170 std::string readseq = "";
172 for (int i = 0; i < len; i++) {
173 readseq.push_back(getCharacter(sampler->sample(pc[c2q(qual[i])], NCODES)));
178 void NoiseQProfile::finishSimulation() {
182 #endif /* NOISEQPROFILE_H_ */