1 //uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain.
\r
11 S[i] = Score of col i: 0=no SNP, +1 = Y, -3 = N or A.
\r
13 V[k] = Best segment score from j, j+1 .. k for all possible j
\r
14 max(j) Sum i=j..k S[i]
\r
17 V[k] = S[k] + max (V[k-1], 0)
\r
20 void AlignChimeGlobal3(const string &Q3, const string &A3, const string &B3,
\r
21 const string &QLabel, const string &ALabel, const string &BLabel,
\r
24 void Make3Way(const SeqData &SDQ, const SeqData &SDA, const SeqData &SDB,
\r
25 const string &PathQA, const string &PathQB,
\r
26 string &Q3, string &A3, string &B3);
\r
28 double GetScore2(double Y, double N, double A);
\r
30 void AlignChimeLocal3(const string &Q3, const string &A3, const string &B3,
\r
31 const string &QLabel, const string &ALabel, const string &BLabel,
\r
36 const byte *Q3Seq = (const byte *) Q3.c_str();
\r
37 const byte *A3Seq = (const byte *) A3.c_str();
\r
38 const byte *B3Seq = (const byte *) B3.c_str();
\r
40 const unsigned ColCount = SIZE(Q3);
\r
41 asserta(SIZE(A3) == ColCount && SIZE(B3) == ColCount);
\r
43 vector<float> ColScoresA(ColCount, 0.0f);
\r
44 vector<float> ColScoresB(ColCount, 0.0f);
\r
46 float ScoreN = -(float) opt_xn;
\r
48 for (unsigned Col = 0; Col < ColCount; ++Col)
\r
50 char q = Q3Seq[Col];
\r
51 char a = A3Seq[Col];
\r
52 char b = B3Seq[Col];
\r
57 if (q == a && q == b && a == b)
\r
60 if (isgap(q) || isgap(a) || isgap(b))
\r
63 if (Col > 0 && (isgap(Q3Seq[Col-1]) || isgap(A3Seq[Col-1]) || isgap(B3Seq[Col-1])))
\r
66 if (Col + 1 < ColCount && (isgap(Q3Seq[Col+1]) || isgap(A3Seq[Col+1]) || isgap(B3Seq[Col+1])))
\r
69 if (q == a && q != b)
\r
70 ColScoresA[Col] = 1;
\r
72 ColScoresA[Col] = ScoreN;
\r
74 if (q == b && q != a)
\r
75 ColScoresB[Col] = 1;
\r
77 ColScoresB[Col] = ScoreN;
\r
80 vector<float> LVA(ColCount, 0.0f);
\r
81 vector<float> LVB(ColCount, 0.0f);
\r
83 LVA[0] = ColScoresA[0];
\r
84 LVB[0] = ColScoresB[0];
\r
85 for (unsigned Col = 1; Col < ColCount; ++Col)
\r
87 LVA[Col] = max(LVA[Col-1], 0.0f) + ColScoresA[Col];
\r
88 LVB[Col] = max(LVB[Col-1], 0.0f) + ColScoresB[Col];
\r
91 vector<float> RVA(ColCount, 0.0f);
\r
92 vector<float> RVB(ColCount, 0.0f);
\r
94 RVA[ColCount-1] = ColScoresA[ColCount-1];
\r
95 RVB[ColCount-1] = ColScoresB[ColCount-1];
\r
96 for (int Col = ColCount-2; Col >= 0; --Col)
\r
98 RVA[Col] = max(RVA[Col+1], 0.0f) + ColScoresA[Col];
\r
99 RVB[Col] = max(RVB[Col+1], 0.0f) + ColScoresB[Col];
\r
102 bool FirstA = true;
\r
103 float MaxSum = 0.0;
\r
104 unsigned ColX = UINT_MAX;
\r
105 for (unsigned Col = 1; Col < ColCount-1; ++Col)
\r
107 float Sum = LVA[Col] + RVB[Col+1];
\r
116 for (unsigned Col = 1; Col < ColCount-1; ++Col)
\r
118 float Sum = LVB[Col] + RVA[Col+1];
\r
126 if (ColX == UINT_MAX)
\r
129 unsigned ColLo = UINT_MAX;
\r
130 unsigned ColHi = UINT_MAX;
\r
134 for (int Col = ColX; Col >= 0; --Col)
\r
136 Sum += ColScoresA[Col];
\r
137 if (Sum >= LVA[ColX])
\r
143 asserta(Sum >= LVA[ColX]);
\r
145 for (unsigned Col = ColX+1; Col < ColCount; ++Col)
\r
147 Sum += ColScoresB[Col];
\r
148 if (Sum >= RVB[ColX])
\r
154 asserta(Sum >= RVB[ColX]);
\r
159 for (int Col = ColX; Col >= 0; --Col)
\r
161 Sum += ColScoresB[Col];
\r
162 if (Sum >= LVB[ColX])
\r
168 asserta(Sum >= LVB[ColX]);
\r
170 for (unsigned Col = ColX+1; Col < ColCount; ++Col)
\r
172 Sum += ColScoresA[Col];
\r
173 if (Sum >= RVA[ColX])
\r
179 asserta(Sum >= RVA[ColX]);
\r
182 unsigned ColXHi = ColX;
\r
183 for (unsigned Col = ColX + 1; Col < ColCount; ++Col)
\r
185 char q = Q3Seq[Col];
\r
186 char a = A3Seq[Col];
\r
187 char b = B3Seq[Col];
\r
189 if (q == a && q == b && !isgap(q))
\r
195 unsigned ColXLo = ColX;
\r
196 for (int Col = (int) ColX - 1; Col >= 0; --Col)
\r
198 char q = Q3Seq[Col];
\r
199 char a = A3Seq[Col];
\r
200 char b = B3Seq[Col];
\r
202 if (q == a && q == b && !isgap(q))
\r
214 for (unsigned Col = 0; Col < ColCount; ++Col)
\r
216 char q = Q3Seq[Col];
\r
217 char a = A3Seq[Col];
\r
218 char b = B3Seq[Col];
\r
220 if (!isgap(q) && !isgap(a))
\r
227 if (!isgap(q) && !isgap(b))
\r
234 if (!isgap(a) && !isgap(b))
\r
242 Hit.PctIdQA = Pct(IdQA, NQA);
\r
243 Hit.PctIdQB = Pct(IdQB, NQB);
\r
244 Hit.PctIdAB = Pct(IdAB, NAB);
\r
246 unsigned LIdQA = 0;
\r
247 unsigned LIdQB = 0;
\r
248 for (unsigned Col = ColLo; Col < ColXLo; ++Col)
\r
250 char q = Q3Seq[Col];
\r
251 char a = A3Seq[Col];
\r
252 char b = B3Seq[Col];
\r
254 if (!isgap(q) && !isgap(a))
\r
260 if (!isgap(q) && !isgap(b))
\r
267 unsigned RIdQA = 0;
\r
268 unsigned RIdQB = 0;
\r
269 for (unsigned Col = ColXHi+1; Col <= ColHi; ++Col)
\r
271 char q = Q3Seq[Col];
\r
272 char a = A3Seq[Col];
\r
273 char b = B3Seq[Col];
\r
275 if (!isgap(q) && !isgap(a))
\r
281 if (!isgap(q) && !isgap(b))
\r
288 unsigned IdDiffL = max(LIdQA, LIdQB) - min(LIdQA, LIdQB);
\r
289 unsigned IdDiffR = max(RIdQA, RIdQB) - min(RIdQA, RIdQB);
\r
290 unsigned MinIdDiff = min(IdDiffL, IdDiffR);
\r
291 unsigned ColRange = ColHi - ColLo + 1;
\r
292 if (opt_queryfract > 0.0f && float(ColRange)/float(QL) < opt_queryfract)
\r
295 // double Div = Pct(MinIdDiff, QSD.L);
\r
299 Log(" Col A Q B ScoreA ScoreB LVA LVB RVA RVB\n");
\r
300 Log("----- - - - ------- ------- ------- ------- ------- -------\n");
\r
301 for (unsigned Col = 0; Col < ColCount; ++Col)
\r
303 if (ColScoresA[Col] == 0.0 && ColScoresB[Col] == 0.0)
\r
306 char q = Q3Seq[Col];
\r
307 char a = A3Seq[Col];
\r
308 char b = B3Seq[Col];
\r
309 Log("%5u %c %c %c", Col, a, q, b);
\r
311 if (ColScoresA[Col] == 0.0)
\r
314 Log(" %7.1f", ColScoresA[Col]);
\r
316 if (ColScoresB[Col] == 0.0)
\r
319 Log(" %7.1f", ColScoresB[Col]);
\r
321 Log(" %7.1f %7.1f %7.1f %7.1f", LVA[Col], LVB[Col], RVA[Col], RVB[Col]);
\r
326 Log("MaxSum %.1f, ColLo %u, ColXLo %u, ColX %u, ColXHi %u, ColHi %u, AF %c\n",
\r
327 MaxSum, ColLo, ColXLo, ColX, ColXHi, ColHi, tof(FirstA));
\r
328 Log(" LIdQA %u, LIdQB %u, RIdQA %u, RIdQB %u\n", LIdQA, LIdQB, RIdQA, RIdQB);
\r
335 for (unsigned Col = ColLo; Col <= ColHi; ++Col)
\r
346 AlignChimeGlobal3(Q3L, A3L, B3L, QLabel, ALabel, BLabel, Hit);
\r
356 for (unsigned Col = ColLo; Col <= ColHi; ++Col)
\r
358 char q = Q3Seq[Col];
\r
359 char a = A3Seq[Col];
\r
360 char b = B3Seq[Col];
\r
361 if (q == a && q == b && a == b)
\r
363 if (isgap(q) || isgap(a) || isgap(b))
\r
365 if (Col > 0 && (isgap(Q3Seq[Col-1]) || isgap(A3Seq[Col-1]) || isgap(B3Seq[Col-1])))
\r
367 if (Col + 1 < ColCount && (isgap(Q3Seq[Col+1]) || isgap(A3Seq[Col+1]) || isgap(B3Seq[Col+1])))
\r
375 if (q == a && q != b)
\r
377 else if (q == b && q != a)
\r
382 else if (Col > ColXHi)
\r
384 if (q == b && q != a)
\r
386 else if (q == a && q != b)
\r
393 double ScoreL = GetScore2(Hit.CS_LY, Hit.CS_LN, Hit.CS_LA);
\r
394 double ScoreR = GetScore2(Hit.CS_RY, Hit.CS_RN, Hit.CS_RA);
\r
395 Hit.Score = ScoreL*ScoreR;
\r
402 // Hit.PathQA = PathQA;
\r
403 // Hit.PathQB = PathQB;
\r
411 //Hit.ColLo = ColLo;
\r
412 //Hit.ColXLo = ColXLo;
\r
413 //Hit.ColXHi = ColXHi;
\r
414 //Hit.ColHi = ColHi;
\r