5 * Created by westcott on 9/23/09.
6 * Copyright 2009 Schloss Lab. All rights reserved.
12 /***********************************************************************/
13 Maligner::Maligner(vector<Sequence*> temp, int num, int match, int misMatch, float div, int minCov) :
14 db(temp), numWanted(num), matchScore(match), misMatchPenalty(misMatch), minDivR(div), minCoverage(minCov) {}
15 /***********************************************************************/
16 string Maligner::getResults(Sequence* q) {
19 outputResults.clear();
21 //make copy so trimming doesn't destroy query from calling class - remember to deallocate
22 query = new Sequence(q->getName(), q->getAligned());
26 decalc = new DeCalculator();
28 //find closest seqs to query in template - returns copies of seqs so trim does not destroy - remember to deallocate
29 refSeqs = decalc->findClosest(query, db, numWanted);
31 refSeqs = minCoverageFilter(refSeqs);
33 if (refSeqs.size() < 2) {
34 for (int i = 0; i < refSeqs.size(); i++) { delete refSeqs[i]; }
35 percentIdenticalQueryChimera = 0.0;
39 int chimeraPenalty = computeChimeraPenalty();
41 //trims seqs to first non gap char in all seqs and last non gap char in all seqs
42 decalc->trimSeqs(query, refSeqs);
44 vector<Sequence*> temp = refSeqs;
45 temp.push_back(query);
49 vector< vector<score_struct> > matrix = buildScoreMatrix(query->getAligned().length(), refSeqs.size()); //builds and initializes
51 fillScoreMatrix(matrix, refSeqs, chimeraPenalty);
53 vector<score_struct> path = extractHighestPath(matrix);
55 vector<trace_struct> trace = mapTraceRegionsToAlignment(path, refSeqs);
57 if (trace.size() > 1) { chimera = "yes"; }
58 else { chimera = "no"; }
60 int traceStart = path[0].col;
61 int traceEnd = path[path.size()-1].col;
63 string queryInRange = query->getAligned();
64 queryInRange = queryInRange.substr(traceStart, (traceEnd-traceStart+1));
66 string chimeraSeq = constructChimericSeq(trace, refSeqs);
68 percentIdenticalQueryChimera = computePercentID(queryInRange, chimeraSeq);
73 for (int i = 0; i < trace.size(); i++) {
74 int regionStart = trace[i].col;
75 int regionEnd = trace[i].oldCol;
76 int seqIndex = trace[i].row;
80 temp.parent = refSeqs[seqIndex]->getName();
81 temp.regionStart = regionStart;
82 temp.regionEnd = regionEnd;
84 string parentInRange = refSeqs[seqIndex]->getAligned();
85 parentInRange = parentInRange.substr(traceStart, (traceEnd-traceStart+1));
87 temp.queryToParent = computePercentID(queryInRange, parentInRange);
88 temp.divR = (percentIdenticalQueryChimera / temp.queryToParent);
90 string queryInRegion = query->getAligned();
91 queryInRegion = queryInRegion.substr(regionStart, (regionEnd-regionStart+1));
93 string parentInRegion = refSeqs[seqIndex]->getAligned();
94 parentInRegion = parentInRegion.substr(regionStart, (regionEnd-regionStart+1));
96 temp.queryToParentLocal = computePercentID(queryInRegion, parentInRegion);
98 outputResults.push_back(temp);
103 for (int i = 0; i < refSeqs.size(); i++) { delete refSeqs[i]; }
107 catch(exception& e) {
108 errorOut(e, "Maligner", "getResults");
112 /***********************************************************************/
113 //removes top matches that do not have minimum coverage with query.
114 vector<Sequence*> Maligner::minCoverageFilter(vector<Sequence*> ref){
116 vector<Sequence*> newRefs;
118 string queryAligned = query->getAligned();
120 for (int i = 0; i < ref.size(); i++) {
122 string refAligned = ref[i]->getAligned();
128 for (int j = 0; j < queryAligned.length(); j++) {
130 if (isalpha(queryAligned[j])) {
133 if (isalpha(refAligned[j])) {
139 int coverage = ((numCovered/(float)numBases)*100);
141 //if coverage above minimum
142 if (coverage > minCoverage) {
143 newRefs.push_back(ref[i]);
149 catch(exception& e) {
150 errorOut(e, "Maligner", "minCoverageFilter");
154 /***********************************************************************/
155 // a breakpoint should yield fewer mismatches than this number with respect to the best parent sequence.
156 int Maligner::computeChimeraPenalty() {
159 int numAllowable = ((1.0 - (1.0/minDivR)) * query->getNumBases());
161 int penalty = int(numAllowable + 1) * misMatchPenalty;
166 catch(exception& e) {
167 errorOut(e, "Maligner", "computeChimeraPenalty");
171 /***********************************************************************/
172 //this is a vertical filter
173 void Maligner::verticalFilter(vector<Sequence*> seqs) {
175 vector<int> gaps; gaps.resize(query->getAligned().length(), 0);
177 string filterString = (string(query->getAligned().length(), '1'));
180 for (int i = 0; i < seqs.size(); i++) {
182 string seqAligned = seqs[i]->getAligned();
184 for (int j = 0; j < seqAligned.length(); j++) {
185 //if this spot is a gap
186 if ((seqAligned[j] == '-') || (seqAligned[j] == '.')) { gaps[j]++; }
190 //zero out spot where all sequences have blanks
191 int numColRemoved = 0;
192 for(int i = 0; i < seqs[0]->getAligned().length(); i++){
193 if(gaps[i] == seqs.size()) { filterString[i] = '0'; numColRemoved++; }
197 for (int i = 0; i < seqs.size(); i++) {
199 string seqAligned = seqs[i]->getAligned();
200 string newAligned = "";
202 for (int j = 0; j < seqAligned.length(); j++) {
203 //if this spot is not a gap
204 if (filterString[j] == '1') { newAligned += seqAligned[j]; }
207 seqs[i]->setAligned(newAligned);
212 catch(exception& e) {
213 errorOut(e, "Maligner", "verticalFilter");
217 //***************************************************************************************************************
218 vector< vector<score_struct> > Maligner::buildScoreMatrix(int cols, int rows) {
221 vector< vector<score_struct> > m; m.resize(rows);
223 for (int i = 0; i < m.size(); i++) {
224 for (int j = 0; j < cols; j++) {
226 //initialize each cell
229 temp.score = -9999999;
233 m[i].push_back(temp);
239 catch(exception& e) {
240 errorOut(e, "Maligner", "buildScoreMatrix");
244 //***************************************************************************************************************
245 void Maligner::fillScoreMatrix(vector<vector<score_struct> >& m, vector<Sequence*> seqs, int penalty) {
248 //get matrix dimensions
249 int numCols = query->getAligned().length();
250 int numRows = seqs.size();
252 //initialize first col
253 string queryAligned = query->getAligned();
254 for (int i = 0; i < numRows; i++) {
255 string subjectAligned = seqs[i]->getAligned();
258 if ((!isalpha(queryAligned[0])) && (!isalpha(subjectAligned[0]))) {
260 }else if (queryAligned[0] == subjectAligned[0]) {
261 m[i][0].score = matchScore;
267 //fill rest of matrix
268 for (int j = 1; j < numCols; j++) { //iterate through matrix columns
270 for (int i = 0; i < numRows; i++) { //iterate through matrix rows
272 string subjectAligned = seqs[i]->getAligned();
274 int matchMisMatchScore = 0;
276 if ((!isalpha(queryAligned[j])) && (!isalpha(subjectAligned[j]))) {
278 }else if ((toupper(queryAligned[j]) == 'N') || (toupper(subjectAligned[j]) == 'N')) {
280 }else if (queryAligned[j] == subjectAligned[j]) {
281 matchMisMatchScore = matchScore;
282 }else if (queryAligned[j] != subjectAligned[j]) {
283 matchMisMatchScore = misMatchPenalty;
286 //compute score based on previous columns scores
287 for (int prevIndex = 0; prevIndex < numRows; prevIndex++) { //iterate through rows
289 int sumScore = matchMisMatchScore + m[prevIndex][j-1].score;
291 //you are not at yourself
292 if (prevIndex != i) { sumScore += penalty; }
293 if (sumScore < 0) { sumScore = 0; }
295 if (sumScore > m[i][j].score) {
296 m[i][j].score = sumScore;
297 m[i][j].prev = prevIndex;
304 catch(exception& e) {
305 errorOut(e, "Maligner", "fillScoreMatrix");
309 //***************************************************************************************************************
310 vector<score_struct> Maligner::extractHighestPath(vector<vector<score_struct> > m) {
313 //get matrix dimensions
314 int numCols = query->getAligned().length();
315 int numRows = m.size();
318 //find highest score scoring matrix
319 score_struct highestStruct;
320 int highestScore = 0;
322 for (int i = 0; i < numRows; i++) {
323 for (int j = 0; j < numCols; j++) {
324 if (m[i][j].score > highestScore) {
325 highestScore = m[i][j].score;
326 highestStruct = m[i][j];
331 vector<score_struct> path;
333 int rowIndex = highestStruct.row;
334 int pos = highestStruct.col;
335 int score = highestStruct.score;
337 while (pos >= 0 && score > 0) {
338 score_struct temp = m[rowIndex][pos];
341 if (score > 0) { path.push_back(temp); }
343 rowIndex = temp.prev;
347 reverse(path.begin(), path.end());
352 catch(exception& e) {
353 errorOut(e, "Maligner", "extractHighestPath");
357 //***************************************************************************************************************
358 vector<trace_struct> Maligner::mapTraceRegionsToAlignment(vector<score_struct> path, vector<Sequence*> seqs) {
360 vector<trace_struct> trace;
362 int region_index = path[0].row;
363 int region_start = path[0].col;
365 for (int i = 1; i < path.size(); i++) {
367 int next_region_index = path[i].row;
369 if (next_region_index != region_index) {
372 int col_index = path[i].col;
374 temp.col = region_start;
375 temp.oldCol = col_index-1;
376 temp.row = region_index;
378 trace.push_back(temp);
380 region_index = path[i].row;
381 region_start = col_index;
387 temp.col = region_start;
388 temp.oldCol = path[path.size()-1].col;
389 temp.row = region_index;
390 trace.push_back(temp);
395 catch(exception& e) {
396 errorOut(e, "Maligner", "mapTraceRegionsToAlignment");
400 //***************************************************************************************************************
401 string Maligner::constructChimericSeq(vector<trace_struct> trace, vector<Sequence*> seqs) {
405 for (int i = 0; i < trace.size(); i++) {
406 string seqAlign = seqs[trace[i].row]->getAligned();
407 seqAlign = seqAlign.substr(trace[i].col, (trace[i].oldCol-trace[i].col+1));
413 catch(exception& e) {
414 errorOut(e, "Maligner", "constructChimericSeq");
418 //***************************************************************************************************************
419 float Maligner::computePercentID(string queryAlign, string chimera) {
422 if (queryAlign.length() != chimera.length()) {
423 mothurOut("Error, alignment strings are of different lengths: "); mothurOutEndLine();
424 mothurOut(toString(queryAlign.length())); mothurOutEndLine(); mothurOutEndLine(); mothurOutEndLine(); mothurOutEndLine();
425 mothurOut(toString(chimera.length())); mothurOutEndLine();
431 int numIdentical = 0;
433 for (int i = 0; i < queryAlign.length(); i++) {
434 if ((isalpha(queryAlign[i])) || (isalpha(chimera[i]))) {
436 if (queryAlign[i] == chimera[i]) {
442 if (numBases == 0) { return 0; }
444 float percentIdentical = (numIdentical/(float)numBases) * 100;
446 return percentIdentical;
449 catch(exception& e) {
450 errorOut(e, "Maligner", "computePercentID");
454 //***************************************************************************************************************