1 //uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain.
\r
13 extern FILE *g_fUChime;
\r
15 void GetCandidateParents(Ultra &U, const SeqData &QSD, float AbQ,
\r
16 vector<unsigned> &Parents);
\r
18 void AlignChime(const SeqData &QSD, const SeqData &ASD, const SeqData &BSD,
\r
19 const string &PathQA, const string &PathQB, ChimeHit2 &Hit);
\r
21 double GetFractIdGivenPath(const byte *A, const byte *B, const char *Path, bool Nucleo);
\r
23 static void GetSmoothedIdVec(const SeqData &QSD, const SeqData &PSD, const string &Path,
\r
24 vector<unsigned> &IdVec, unsigned d)
\r
27 const unsigned ColCount = SIZE(Path);
\r
29 const byte *Q = QSD.Seq;
\r
30 const byte *P = PSD.Seq;
\r
32 const unsigned QL = QSD.L;
\r
33 const unsigned PL = PSD.L;
\r
37 IdVec.resize(QSD.L, 0);
\r
44 vector<bool> SameVec;
\r
45 SameVec.reserve(QL);
\r
46 for (unsigned Col = 0; Col < ColCount; ++Col)
\r
55 Same = (toupper(q) == toupper(p));
\r
58 if (c == 'M' || c == 'D')
\r
61 SameVec.push_back(Same);
\r
64 if (c == 'M' || c == 'I')
\r
68 asserta(SIZE(SameVec) == QL);
\r
71 for (unsigned QPos = 0; QPos < d; ++QPos)
\r
78 for (unsigned QPos = d; QPos < QL; ++QPos)
\r
83 if (SameVec[QPos-d])
\r
86 asserta(SIZE(IdVec) == QL);
\r
91 Log("GetSmoothedIdVec\n");
\r
94 Log("Q P Same Id\n");
\r
95 Log("- - ---- -------\n");
\r
96 for (unsigned Col = 0; Col < ColCount; ++Col)
\r
105 Same = (toupper(q) == toupper(p));
\r
106 Log("%c %c %4c %7d\n", q, p, tof(Same), IdVec[QPos]);
\r
109 if (c == 'M' || c == 'D')
\r
111 if (c == 'M' || c == 'I')
\r
118 bool SearchChime(Ultra &U, const SeqData &QSD, float QAb,
\r
119 const AlnParams &AP, const AlnHeuristics &AH, HSPFinder &HF,
\r
120 float MinFractId, ChimeHit2 &Hit)
\r
123 Hit.QLabel = QSD.Label;
\r
128 Log("SearchChime()\n");
\r
129 Log("Query>%s\n", QSD.Label);
\r
132 vector<unsigned> Parents;
\r
133 GetCandidateParents(U, QSD, QAb, Parents);
\r
135 unsigned ParentCount = SIZE(Parents);
\r
136 if (ParentCount <= 1)
\r
139 Log("%u candidate parents, done.\n", ParentCount);
\r
145 HSPFinder *ptrHF = (opt_fastalign ? &HF : 0);
\r
147 unsigned ChunkLength;
\r
148 vector<unsigned> ChunkLos;
\r
149 GetChunkInfo(QSD.L, ChunkLength, ChunkLos);
\r
150 const unsigned ChunkCount = SIZE(ChunkLos);
\r
152 vector<unsigned> ChunkIndexToBestId(ChunkCount, 0);
\r
153 vector<unsigned> ChunkIndexToBestParentIndex(ChunkCount, UINT_MAX);
\r
155 vector<SeqData> PSDs;
\r
156 vector<string> Paths;
\r
157 double TopPctId = 0.0;
\r
158 unsigned TopParentIndex = UINT_MAX;
\r
159 unsigned QL = QSD.L;
\r
160 vector<unsigned> MaxIdVec(QL, 0);
\r
161 for (unsigned ParentIndex = 0; ParentIndex < ParentCount; ++ParentIndex)
\r
163 unsigned ParentSeqIndex = Parents[ParentIndex];
\r
166 //PSD.Label = U.GetSeedLabel(ParentSeqIndex);
\r
167 //PSD.Seq = U.GetSeedSeq(ParentSeqIndex);
\r
168 //PSD.L = U.GetSeedLength(ParentSeqIndex);
\r
169 //PSD.Index = ParentSeqIndex;
\r
170 U.GetSeqData(ParentSeqIndex, PSD);
\r
171 PSDs.push_back(PSD);
\r
179 bool Found = GlobalAlign(QSD, PSD, AP, AH, *ptrHF, MinFractId, HSPId, PD);
\r
182 Paths.push_back("");
\r
186 double PctId = 100.0*GetFractIdGivenPath(QSD.Seq, PSD.Seq, PD.Start, true);
\r
187 if (opt_selfid && PctId == 100.0)
\r
189 Paths.push_back("");
\r
193 if (PctId > TopPctId)
\r
195 TopParentIndex = ParentIndex;
\r
197 if (TopPctId >= 100.0 - opt_mindiv)
\r
201 Log(" %.1f%% >%s\n", TopPctId, PSD.Label);
\r
202 Log(" Top hit exceeds ctl threshold, done.\n");
\r
208 string Path = PD.Start;
\r
209 Paths.push_back(Path);
\r
211 vector<unsigned> IdVec;
\r
212 GetSmoothedIdVec(QSD, PSD, Path, IdVec, opt_idsmoothwindow);
\r
214 for (unsigned QPos = 0; QPos < QL; ++QPos)
\r
215 if (IdVec[QPos] > MaxIdVec[QPos])
\r
216 MaxIdVec[QPos] = IdVec[QPos];
\r
219 vector<unsigned> BestParents;
\r
220 for (unsigned k = 0; k < opt_maxp; ++k)
\r
222 unsigned BestParent = UINT_MAX;
\r
223 unsigned BestCov = 0;
\r
224 for (unsigned ParentIndex = 0; ParentIndex < ParentCount; ++ParentIndex)
\r
226 const SeqData &PSD = PSDs[ParentIndex];
\r
227 const string &Path = Paths[ParentIndex];
\r
231 vector<unsigned> IdVec;
\r
232 GetSmoothedIdVec(QSD, PSD, Path, IdVec, opt_idsmoothwindow);
\r
235 for (unsigned QPos = 0; QPos < QL; ++QPos)
\r
236 if (IdVec[QPos] == MaxIdVec[QPos])
\r
241 BestParent = ParentIndex;
\r
246 if (BestParent == UINT_MAX)
\r
249 BestParents.push_back(BestParent);
\r
250 vector<unsigned> IdVec;
\r
252 const SeqData &PSD = PSDs[BestParent];
\r
253 const string &Path = Paths[BestParent];
\r
254 GetSmoothedIdVec(QSD, PSD, Path, IdVec, opt_idsmoothwindow);
\r
255 for (unsigned QPos = 0; QPos < QL; ++QPos)
\r
256 if (IdVec[QPos] == MaxIdVec[QPos])
\r
257 MaxIdVec[QPos] = UINT_MAX;
\r
260 unsigned BestParentCount = SIZE(BestParents);
\r
264 Log("%u/%u best parents\n", BestParentCount, ParentCount);
\r
265 for (unsigned k = 0; k < BestParentCount; ++k)
\r
267 unsigned i = BestParents[k];
\r
268 Log(" %s\n", PSDs[i].Label);
\r
272 bool Found = false;
\r
273 for (unsigned k1 = 0; k1 < BestParentCount; ++k1)
\r
275 unsigned i1 = BestParents[k1];
\r
276 asserta(i1 < ParentCount);
\r
278 const SeqData &PSD1 = PSDs[i1];
\r
279 const string &Path1 = Paths[i1];
\r
281 for (unsigned k2 = k1 + 1; k2 < BestParentCount; ++k2)
\r
283 unsigned i2 = BestParents[k2];
\r
284 asserta(i2 < ParentCount);
\r
287 const SeqData &PSD2 = PSDs[i2];
\r
288 const string &Path2 = Paths[i2];
\r
291 AlignChime(QSD, PSD1, PSD2, Path1, Path2, Hit2);
\r
292 Hit2.PctIdQT = TopPctId;
\r
297 if (Hit2.Score > Hit.Score)
\r