11 extern FILE *g_fUChime;
\r
13 void GetCandidateParents(Ultra &U, const SeqData &QSD, float AbQ,
\r
14 vector<unsigned> &Parents);
\r
16 void AlignChime(const SeqData &QSD, const SeqData &ASD, const SeqData &BSD,
\r
17 const string &PathQA, const string &PathQB, ChimeHit2 &Hit);
\r
19 double GetFractIdGivenPath(const byte *A, const byte *B, const char *Path, bool Nucleo);
\r
21 static void GetSmoothedIdVec(const SeqData &QSD, const SeqData &PSD, const string &Path,
\r
22 vector<unsigned> &IdVec, unsigned d)
\r
25 const unsigned ColCount = SIZE(Path);
\r
27 const byte *Q = QSD.Seq;
\r
28 const byte *P = PSD.Seq;
\r
30 const unsigned QL = QSD.L;
\r
31 const unsigned PL = PSD.L;
\r
35 IdVec.resize(QSD.L, 0);
\r
42 vector<bool> SameVec;
\r
43 SameVec.reserve(QL);
\r
44 for (unsigned Col = 0; Col < ColCount; ++Col)
\r
53 Same = (toupper(q) == toupper(p));
\r
56 if (c == 'M' || c == 'D')
\r
59 SameVec.push_back(Same);
\r
62 if (c == 'M' || c == 'I')
\r
66 asserta(SIZE(SameVec) == QL);
\r
69 for (unsigned QPos = 0; QPos < d; ++QPos)
\r
76 for (unsigned QPos = d; QPos < QL; ++QPos)
\r
81 if (SameVec[QPos-d])
\r
84 asserta(SIZE(IdVec) == QL);
\r
89 Log("GetSmoothedIdVec\n");
\r
92 Log("Q P Same Id\n");
\r
93 Log("- - ---- -------\n");
\r
94 for (unsigned Col = 0; Col < ColCount; ++Col)
\r
103 Same = (toupper(q) == toupper(p));
\r
104 Log("%c %c %4c %7d\n", q, p, tof(Same), IdVec[QPos]);
\r
107 if (c == 'M' || c == 'D')
\r
109 if (c == 'M' || c == 'I')
\r
116 bool SearchChime(Ultra &U, const SeqData &QSD, float QAb,
\r
117 const AlnParams &AP, const AlnHeuristics &AH, HSPFinder &HF,
\r
118 float MinFractId, ChimeHit2 &Hit)
\r
121 Hit.QLabel = QSD.Label;
\r
126 Log("SearchChime()\n");
\r
127 Log("Query>%s\n", QSD.Label);
\r
130 vector<unsigned> Parents;
\r
131 GetCandidateParents(U, QSD, QAb, Parents);
\r
133 unsigned ParentCount = SIZE(Parents);
\r
134 if (ParentCount <= 1)
\r
137 Log("%u candidate parents, done.\n", ParentCount);
\r
143 HSPFinder *ptrHF = (opt_fastalign ? &HF : 0);
\r
145 unsigned ChunkLength;
\r
146 vector<unsigned> ChunkLos;
\r
147 GetChunkInfo(QSD.L, ChunkLength, ChunkLos);
\r
148 const unsigned ChunkCount = SIZE(ChunkLos);
\r
150 vector<unsigned> ChunkIndexToBestId(ChunkCount, 0);
\r
151 vector<unsigned> ChunkIndexToBestParentIndex(ChunkCount, UINT_MAX);
\r
153 vector<SeqData> PSDs;
\r
154 vector<string> Paths;
\r
155 double TopPctId = 0.0;
\r
156 unsigned TopParentIndex = UINT_MAX;
\r
157 unsigned QL = QSD.L;
\r
158 vector<unsigned> MaxIdVec(QL, 0);
\r
159 for (unsigned ParentIndex = 0; ParentIndex < ParentCount; ++ParentIndex)
\r
161 unsigned ParentSeqIndex = Parents[ParentIndex];
\r
164 //PSD.Label = U.GetSeedLabel(ParentSeqIndex);
\r
165 //PSD.Seq = U.GetSeedSeq(ParentSeqIndex);
\r
166 //PSD.L = U.GetSeedLength(ParentSeqIndex);
\r
167 //PSD.Index = ParentSeqIndex;
\r
168 U.GetSeqData(ParentSeqIndex, PSD);
\r
169 PSDs.push_back(PSD);
\r
177 bool Found = GlobalAlign(QSD, PSD, AP, AH, *ptrHF, MinFractId, HSPId, PD);
\r
180 Paths.push_back("");
\r
184 double PctId = 100.0*GetFractIdGivenPath(QSD.Seq, PSD.Seq, PD.Start, true);
\r
185 if (opt_selfid && PctId == 100.0)
\r
187 Paths.push_back("");
\r
191 if (PctId > TopPctId)
\r
193 TopParentIndex = ParentIndex;
\r
195 if (TopPctId >= 100.0 - opt_mindiv)
\r
199 Log(" %.1f%% >%s\n", TopPctId, PSD.Label);
\r
200 Log(" Top hit exceeds ctl threshold, done.\n");
\r
206 string Path = PD.Start;
\r
207 Paths.push_back(Path);
\r
209 vector<unsigned> IdVec;
\r
210 GetSmoothedIdVec(QSD, PSD, Path, IdVec, opt_idsmoothwindow);
\r
212 for (unsigned QPos = 0; QPos < QL; ++QPos)
\r
213 if (IdVec[QPos] > MaxIdVec[QPos])
\r
214 MaxIdVec[QPos] = IdVec[QPos];
\r
217 vector<unsigned> BestParents;
\r
218 for (unsigned k = 0; k < opt_maxp; ++k)
\r
220 unsigned BestParent = UINT_MAX;
\r
221 unsigned BestCov = 0;
\r
222 for (unsigned ParentIndex = 0; ParentIndex < ParentCount; ++ParentIndex)
\r
224 const SeqData &PSD = PSDs[ParentIndex];
\r
225 const string &Path = Paths[ParentIndex];
\r
229 vector<unsigned> IdVec;
\r
230 GetSmoothedIdVec(QSD, PSD, Path, IdVec, opt_idsmoothwindow);
\r
233 for (unsigned QPos = 0; QPos < QL; ++QPos)
\r
234 if (IdVec[QPos] == MaxIdVec[QPos])
\r
239 BestParent = ParentIndex;
\r
244 if (BestParent == UINT_MAX)
\r
247 BestParents.push_back(BestParent);
\r
248 vector<unsigned> IdVec;
\r
250 const SeqData &PSD = PSDs[BestParent];
\r
251 const string &Path = Paths[BestParent];
\r
252 GetSmoothedIdVec(QSD, PSD, Path, IdVec, opt_idsmoothwindow);
\r
253 for (unsigned QPos = 0; QPos < QL; ++QPos)
\r
254 if (IdVec[QPos] == MaxIdVec[QPos])
\r
255 MaxIdVec[QPos] = UINT_MAX;
\r
258 unsigned BestParentCount = SIZE(BestParents);
\r
262 Log("%u/%u best parents\n", BestParentCount, ParentCount);
\r
263 for (unsigned k = 0; k < BestParentCount; ++k)
\r
265 unsigned i = BestParents[k];
\r
266 Log(" %s\n", PSDs[i].Label);
\r
270 bool Found = false;
\r
271 for (unsigned k1 = 0; k1 < BestParentCount; ++k1)
\r
273 unsigned i1 = BestParents[k1];
\r
274 asserta(i1 < ParentCount);
\r
276 const SeqData &PSD1 = PSDs[i1];
\r
277 const string &Path1 = Paths[i1];
\r
279 for (unsigned k2 = k1 + 1; k2 < BestParentCount; ++k2)
\r
281 unsigned i2 = BestParents[k2];
\r
282 asserta(i2 < ParentCount);
\r
285 const SeqData &PSD2 = PSDs[i2];
\r
286 const string &Path2 = Paths[i2];
\r
289 AlignChime(QSD, PSD1, PSD2, Path1, Path2, Hit2);
\r
290 Hit2.PctIdQT = TopPctId;
\r
295 if (Hit2.Score > Hit.Score)
\r