1 #ifndef NOISEQPROFILE_H_
2 #define NOISEQPROFILE_H_
18 memset(c, 0, sizeof(c));
19 memset(p, 0, sizeof(p));
22 NoiseQProfile& operator=(const NoiseQProfile&);
25 void updateC(const std::string&, const std::string&);
26 void update(const std::string&, const std::string&, double frac);
28 void calcInitParams();
30 double getProb(const std::string&, const std::string&);
31 double getLogP() { return logp; }
33 void collect(const NoiseQProfile&);
38 void startSimulation();
39 std::string simulate(simul*, int, const std::string&);
40 void finishSimulation();
43 static const int NCODES = 5; // number of possible codes
44 static const int SIZE = 100;
46 double logp; //log prob;
47 double c[SIZE][NCODES]; //counts in N0;
48 double p[SIZE][NCODES]; //p[q][c] = p(c|q)
50 int c2q(char c) { assert(c >= 33 && c <= 126); return c - 33; }
52 double (*pc)[NCODES]; // for simulation
55 NoiseQProfile& NoiseQProfile::operator=(const NoiseQProfile& rv) {
56 if (this == &rv) return *this;
58 memcpy(c, rv.c, sizeof(rv.c));
59 memcpy(p, rv.p, sizeof(rv.p));
63 void NoiseQProfile::init() {
64 memset(p, 0, sizeof(p));
67 void NoiseQProfile::updateC(const std::string& readseq, const std::string& qual) {
68 int len = readseq.size();
69 for (int i = 0; i < len; i++) {
70 ++c[c2q(qual[i])][get_base_id(readseq[i])];
74 void NoiseQProfile::update(const std::string& readseq, const std::string& qual, double frac) {
75 int len = readseq.size();
76 for (int i = 0; i < len; i++) {
77 p[c2q(qual[i])][get_base_id(readseq[i])] += frac;
81 void NoiseQProfile::finish() {
84 //If N0 is 0, p(c|q) = 0 for all c, q
86 for (int i = 0; i < SIZE; i++) {
88 for (int j = 0; j < NCODES; j++) sum += (p[i][j] + c[i][j]);
89 if (sum <= 0.0) continue;
90 //if (isZero(sum)) continue;
91 for (int j = 0; j < NCODES; j++) {
92 p[i][j] = (p[i][j] + c[i][j]) /sum;
93 if (c[i][j] > 0.0) { logp += c[i][j] * log(p[i][j]); }
98 //make init parameters not zero
99 void NoiseQProfile::calcInitParams() {
103 for (int i = 0; i < SIZE; i++) {
105 for (int j = 0; j < NCODES; j++) sum += (1.0 + c[i][j]); // 1.0 pseudo count
106 for (int j = 0; j < NCODES; j++) {
107 p[i][j] = (c[i][j] + 1.0) / sum;
108 if (c[i][j] > 0.0) { logp += c[i][j] * log(p[i][j]); }
113 double NoiseQProfile::getProb(const std::string& readseq, const std::string& qual) {
115 int len = readseq.size();
117 for (int i = 0; i < len; i++) {
118 prob *= p[c2q(qual[i])][get_base_id(readseq[i])];
124 void NoiseQProfile::collect(const NoiseQProfile& o) {
125 for (int i = 0; i < SIZE; i++) {
126 for (int j = 0; j < NCODES; j++)
127 p[i][j] += o.p[i][j];
131 //If read from file, assume do not need to estimate from data
132 void NoiseQProfile::read(FILE *fi) {
133 int tmp_size, tmp_ncodes;
135 memset(c, 0, sizeof(c));
137 assert(fscanf(fi, "%d %d", &tmp_size, &tmp_ncodes) == 2);
138 assert(tmp_size == SIZE && tmp_ncodes == NCODES);
139 for (int i = 0; i < SIZE; i++) {
140 for (int j = 0; j < NCODES; j++)
141 assert(fscanf(fi, "%lf", &p[i][j]) == 1);
145 void NoiseQProfile::write(FILE *fo) {
146 fprintf(fo, "%d %d\n", SIZE, NCODES);
147 for (int i = 0; i < SIZE; i++) {
148 for (int j = 0; j < NCODES - 1; j++) { fprintf(fo, "%.10g ", p[i][j]); }
149 fprintf(fo, "%.10g\n", p[i][NCODES - 1]);
153 void NoiseQProfile::startSimulation() {
154 pc = new double[SIZE][NCODES];
156 for (int i = 0; i < SIZE; i++)
157 for (int j = 0; j < NCODES; j++) {
159 if (j > 0) pc[i][j] += pc[i][j - 1];
163 std::string NoiseQProfile::simulate(simul* sampler, int len, const std::string& qual) {
164 std::string readseq = "";
166 for (int i = 0; i < len; i++) {
167 readseq.push_back(getCharacter(sampler->sample(pc[c2q(qual[i])], NCODES)));
172 void NoiseQProfile::finishSimulation() {
176 #endif /* NOISEQPROFILE_H_ */