16 QProfile& operator=(const QProfile&);
19 void update(const std::string&, const std::string&, const RefSeq&, int, int, double);
22 double getProb(const std::string&, const std::string&, const RefSeq&, int, int);
24 void collect(const QProfile&);
29 void startSimulation();
30 std::string simulate(simul*, int, int, int, const std::string&, const RefSeq&);
31 void finishSimulation();
34 static const int NCODES = 5; // number of possible codes
35 static const int SIZE = 100;
37 double p[SIZE][NCODES][NCODES]; // p[q][r][c] = p(c|r,q)
39 //make sure that quality score in [0, 93]
40 int c2q(char c) { assert(c >= 33 && c <= 126); return c - 33; }
42 double (*pc)[NCODES][NCODES]; // for simulation
45 QProfile::QProfile() {
46 memset(p, sizeof(p), 0);
48 //make initialized parameters
49 //ASSUME order of A, C, G, T, N
52 double probC, probO; // current, other
54 for (int i = 0; i < SIZE; i++) {
55 for (int j = 0; j < NCODES - 1; j++) {
58 probO = exp(-i / 10.0 * log(10.0));
60 probO /= (NCODES - 2);
62 probC *= (1.0 - probN);
63 probO *= (1.0 - probN);
65 assert(probC >= 0.0 && probO >= 0.0);
67 for (int k = 0; k < NCODES - 1; k++) {
68 if (j == k) p[i][j][k] = probC;
69 else p[i][j][k] = probO;
73 for (int k = 0; k < NCODES - 1; k++)
74 p[i][N][k] = (1.0 - probN) / (NCODES - 1);
78 QProfile& QProfile::operator=(const QProfile& rv) {
79 if (this == &rv) return *this;
80 memcpy(p, rv.p, sizeof(rv.p));
84 void QProfile::init() {
85 memset(p, 0, sizeof(p));
88 void QProfile::update(const std::string& readseq, const std::string& qual, const RefSeq& refseq, int pos, int dir, double frac) {
89 int len = readseq.size();
90 for (int i = 0; i < len; i++) {
91 p[c2q(qual[i])][refseq.get_id(i + pos, dir)][get_base_id(readseq[i])] += frac;
95 void QProfile::finish() {
98 for (int i = 0; i < SIZE; i++) {
99 for (int j = 0; j < NCODES; j++) {
101 for (int k = 0; k < NCODES; k++) sum += p[i][j][k];
103 for (int k = 0; k < NCODES; k++) p[i][j][k] = 0.0;
106 for (int k = 0; k < NCODES; k++) p[i][j][k] /= sum;
111 double QProfile::getProb(const std::string& readseq, const std::string& qual, const RefSeq& refseq, int pos, int dir) {
113 int len = readseq.size();
115 for (int i = 0; i < len; i++) {
116 prob *= p[c2q(qual[i])][refseq.get_id(i + pos, dir)][get_base_id(readseq[i])];
122 void QProfile::collect(const QProfile& o) {
123 for (int i = 0; i < SIZE; i++)
124 for (int j = 0; j < NCODES; j++)
125 for (int k = 0; k < NCODES; k++)
126 p[i][j][k] += o.p[i][j][k];
129 void QProfile::read(FILE *fi) {
130 int tmp_size, tmp_ncodes;
131 fscanf(fi, "%d %d", &tmp_size, &tmp_ncodes);
132 assert(tmp_size == SIZE && tmp_ncodes == NCODES);
133 for (int i = 0; i < SIZE; i++)
134 for (int j = 0; j < NCODES; j++)
135 for (int k = 0; k < NCODES; k++)
136 fscanf(fi, "%lf", &p[i][j][k]);
139 void QProfile::write(FILE *fo) {
140 fprintf(fo, "%d %d\n", SIZE, NCODES);
141 for (int i = 0; i < SIZE; i++) {
142 for (int j = 0; j < NCODES; j++) {
143 for (int k = 0; k < NCODES - 1; k++)
144 fprintf(fo, "%.10g ", p[i][j][k]);
145 fprintf(fo, "%.10g\n", p[i][j][NCODES - 1]);
147 if (i < SIZE - 1) { fprintf(fo, "\n"); }
151 void QProfile::startSimulation() {
152 pc = new double[SIZE][NCODES][NCODES];
153 for (int i = 0; i < SIZE; i++) {
154 for (int j = 0; j < NCODES; j++)
155 for (int k = 0; k < NCODES; k++) {
156 pc[i][j][k] = p[i][j][k];
157 if (k > 0) pc[i][j][k] += pc[i][j][k - 1];
160 //avoid sampling from 0.0!!!
161 double cp_sum, cp_d, cp_n;
162 cp_sum = cp_d = cp_n = 0.0;
164 for (int j = 0; j < NCODES - 1; j++) {
165 cp_sum += pc[i][j][NCODES - 1];
167 cp_n += p[i][j][NCODES - 1];
170 if (cp_sum == 0.0) continue;
172 double p_d, p_o, p_n;
175 p_o = (1.0 - p_d - p_n) / (NCODES - 2);
176 for (int j = 0; j < NCODES - 1; j++) {
177 if (pc[i][j][NCODES - 1] > 0.0) continue;
178 for (int k = 0; k < NCODES; k++) {
179 if (k == j) pc[i][j][k] = p_d;
180 else if (k == NCODES - 1) pc[i][j][k] = p_n;
181 else pc[i][j][k] = p_o;
182 if (k > 0) pc[i][j][k] += pc[i][j][k - 1];
185 if (pc[i][NCODES - 1][NCODES - 1] == 0.0) {
186 p_o = (1.0 - p_n) / (NCODES - 1);
187 for (int k = 0; k < NCODES; k++) {
188 pc[i][NCODES - 1][k] = (k < NCODES - 1 ? p_o : p_n);
189 if (k > 0) pc[i][NCODES - 1][k] += pc[i][NCODES - 1][k - 1];
195 std::string QProfile::simulate(simul* sampler, int len, int pos, int dir, const std::string& qual, const RefSeq& refseq) {
196 std::string readseq = "";
198 for (int i = 0; i < len; i++) {
199 readseq.push_back(getCharacter(sampler->sample(pc[c2q(qual[i])][refseq.get_id(i + pos, dir)], NCODES)));
204 void QProfile::finishSimulation() {
208 #endif /* QPROFILE_H_ */