]> git.donarmstrong.com Git - mothur.git/blob - maligner.cpp
chimera.slayer debugging
[mothur.git] / maligner.cpp
1 /*
2  *  maligner.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 9/23/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "maligner.h"
11
12 /***********************************************************************/ //int num, int match, int misMatch, , string mode, Database* dataLeft, Database* dataRight
13 Maligner::Maligner(vector<Sequence*> temp, int match, int misMatch, float div, int ms, int minCov) : db(temp), matchScore(match), misMatchPenalty(misMatch), minDivR(div), minSimilarity(ms), minCoverage(minCov) { 
14                         //numWanted(num),  , searchMethod(mode), databaseLeft(dataLeft), databaseRight(dataRight)
15                         
16                         m = MothurOut::getInstance(); 
17                         
18 }
19 /***********************************************************************/
20 string Maligner::getResults(Sequence* q, DeCalculator* decalc) {
21         try {
22                 
23                 outputResults.clear();
24                 
25                 //make copy so trimming doesn't destroy query from calling class - remember to deallocate
26                 query = new Sequence(q->getName(), q->getAligned());
27                 
28                 string chimera;
29                 
30                 //copy refSeqs so that filter does not effect original
31                 for(int i = 0; i < db.size(); i++) {  
32                         Sequence* newSeq = new Sequence(db[i]->getName(), db[i]->getAligned());
33                         refSeqs.push_back(newSeq);
34                 }
35                 
36                 refSeqs = minCoverageFilter(refSeqs);
37                 
38                 if (refSeqs.size() < 2)  { 
39                         for (int i = 0; i < refSeqs.size(); i++) {  delete refSeqs[i];  }
40                         percentIdenticalQueryChimera = 0.0;
41                         return "unknown"; 
42                 }
43                 
44                 int chimeraPenalty = computeChimeraPenalty();
45                 
46                 //fills outputResults
47                 chimera = chimeraMaligner(chimeraPenalty, decalc);
48                 
49                 if (m->control_pressed) { return chimera;  }
50                                 
51                 //free memory
52                 delete query;
53
54                 for (int i = 0; i < refSeqs.size(); i++) {  delete refSeqs[i];  }
55                 
56                 return chimera;
57         }
58         catch(exception& e) {
59                 m->errorOut(e, "Maligner", "getResults");
60                 exit(1);
61         }
62 }
63 /***********************************************************************/
64 string Maligner::chimeraMaligner(int chimeraPenalty, DeCalculator* decalc) {
65         try {
66                 
67                 string chimera;
68                 
69                 //trims seqs to first non gap char in all seqs and last non gap char in all seqs
70                 spotMap = decalc->trimSeqs(query, refSeqs);
71                 
72                 //you trimmed the whole sequence, skip
73                 if (query->getAligned() == "") { return "no"; }
74
75                 vector<Sequence*> temp = refSeqs;
76                 
77 //              for(int i=0;i<refSeqs.size();i++){
78 //                      cout << refSeqs[i]->getName() << endl;
79 //              }
80                 
81                 temp.push_back(query);
82                         
83                 verticalFilter(temp);
84                 
85                 //for (int i = 0; i < refSeqs.size(); i++) { cout << refSeqs[i]->getName() << endl ; }//<< refSeqs[i]->getAligned() << endl
86
87                 vector< vector<score_struct> > matrix = buildScoreMatrix(query->getAligned().length(), refSeqs.size()); //builds and initializes
88                 
89                 if (m->control_pressed) { return chimera;  }
90                 
91                 fillScoreMatrix(matrix, refSeqs, chimeraPenalty);
92                 
93                 vector<score_struct> path = extractHighestPath(matrix);
94                 
95                 if (m->control_pressed) { return chimera;  }
96                 
97                 vector<trace_struct> trace = mapTraceRegionsToAlignment(path, refSeqs);
98                         
99                 if (trace.size() > 1) {         chimera = "yes";        }
100                 else { chimera = "no";  return chimera; }
101                 
102                 int traceStart = path[0].col;
103                 int traceEnd = path[path.size()-1].col; 
104                 string queryInRange = query->getAligned();
105                 queryInRange = queryInRange.substr(traceStart, (traceEnd-traceStart+1));
106 //              cout << queryInRange << endl;
107                 string chimeraSeq = constructChimericSeq(trace, refSeqs);
108 //              cout << chimeraSeq << endl;
109                 
110 //              cout << queryInRange.length() << endl;
111 //              cout << chimeraSeq.length() << endl;
112                 
113                 percentIdenticalQueryChimera = computePercentID(queryInRange, chimeraSeq);
114                 
115 //              cout << percentIdenticalQueryChimera << endl;
116         /*      
117                 vector<trace_struct> trace = extractHighestPath(matrix);
118                                 
119                 //cout << "traces\n";
120                 //for(int i=0;i<trace.size();i++){
121                 //      cout << trace[i].col << '\t' << trace[i].oldCol << '\t' << refSeqs[trace[i].row]->getName() << endl;
122                 //}
123                 
124                 if (trace.size() > 1) {         chimera = "yes";        }
125                 else { chimera = "no";  return chimera; }
126                 
127                 int traceStart = trace[0].col;
128                 int traceEnd = trace[trace.size()-1].oldCol;    
129                 string queryInRange = query->getAligned();
130                 queryInRange = queryInRange.substr(traceStart, (traceEnd-traceStart));*/
131                 
132                 if (m->control_pressed) { return chimera;  }
133                 
134                 //save output results
135                 for (int i = 0; i < trace.size(); i++) {
136                         int regionStart = trace[i].col;
137                         int regionEnd = trace[i].oldCol;
138                         int seqIndex = trace[i].row;
139                         
140 //                      cout << regionStart << '\t' << regionEnd << '\t' << seqIndex << endl;
141                         results temp;
142                         
143                         temp.parent = refSeqs[seqIndex]->getName();
144                         temp.parentAligned = db[seqIndex]->getAligned();
145                         temp.nastRegionStart = spotMap[regionStart];
146                         temp.nastRegionEnd = spotMap[regionEnd];
147                         temp.regionStart = regionStart;
148                         temp.regionEnd = regionEnd;
149                         
150                         string parentInRange = refSeqs[seqIndex]->getAligned();
151                         parentInRange = parentInRange.substr(traceStart, (traceEnd-traceStart+1));
152                         
153                         temp.queryToParent = computePercentID(queryInRange, parentInRange);
154                         temp.divR = (percentIdenticalQueryChimera / temp.queryToParent);
155
156                         string queryInRegion = query->getAligned();
157                         queryInRegion = queryInRegion.substr(regionStart, (regionEnd-regionStart+1));
158                         
159                         string parentInRegion = refSeqs[seqIndex]->getAligned();
160                         parentInRegion = parentInRegion.substr(regionStart, (regionEnd-regionStart+1));
161                         
162                         temp.queryToParentLocal = computePercentID(queryInRegion, parentInRegion);
163                         
164 //                      cout << query->getName() << '\t' << temp.parent << '\t' << "NAST:" << temp.nastRegionStart << '-' << temp.nastRegionEnd << " G:" << temp.queryToParent << " L:" << temp.queryToParentLocal << ", " <<  temp.divR << endl;
165
166                         outputResults.push_back(temp);
167                 }
168                 
169                 return chimera;
170         }
171         catch(exception& e) {
172                 m->errorOut(e, "Maligner", "chimeraMaligner");
173                 exit(1);
174         }
175 }
176 /***********************************************************************/
177 //removes top matches that do not have minimum coverage with query.
178 vector<Sequence*> Maligner::minCoverageFilter(vector<Sequence*> ref){  
179         try {
180                 vector<Sequence*> newRefs;
181                 
182                 string queryAligned = query->getAligned();
183                 
184                 for (int i = 0; i < ref.size(); i++) {
185                         
186                         string refAligned = ref[i]->getAligned();
187                         
188                         int numBases = 0;
189                         int numCovered = 0;
190                         
191                         //calculate coverage
192                         for (int j = 0; j < queryAligned.length(); j++) {
193                                 
194                                 if (isalpha(queryAligned[j])) {
195                                         numBases++;
196                                         
197                                         if (isalpha(refAligned[j])) {
198                                                 numCovered++;
199                                         }
200                                 }
201                         }
202                         
203                         int coverage = ((numCovered/(float)numBases)*100);
204                         
205                         //if coverage above minimum
206                         if (coverage > minCoverage) {
207                                 newRefs.push_back(ref[i]);
208                         }else {
209                                 delete ref[i];
210                         }
211                 }
212                 
213                 return newRefs;
214         }
215         catch(exception& e) {
216                 m->errorOut(e, "Maligner", "minCoverageFilter");
217                 exit(1);
218         }
219 }
220 /***********************************************************************/
221 // a breakpoint should yield fewer mismatches than this number with respect to the best parent sequence.
222 int Maligner::computeChimeraPenalty() {
223         try {
224                 
225                 int numAllowable = ((1.0 - (1.0/minDivR)) * query->getNumBases());
226
227 //              if(numAllowable < 1){   numAllowable = 1;       }
228                 
229                 int penalty = int(numAllowable + 1) * misMatchPenalty;
230
231                 return penalty;
232
233         }
234         catch(exception& e) {
235                 m->errorOut(e, "Maligner", "computeChimeraPenalty");
236                 exit(1);
237         }
238 }
239 /***********************************************************************/
240 //this is a vertical filter
241 void Maligner::verticalFilter(vector<Sequence*> seqs) {
242         try {
243                 vector<int> gaps;       gaps.resize(query->getAligned().length(), 0);
244                 
245                 string filterString = (string(query->getAligned().length(), '1'));
246                 
247                 //for each sequence
248                 for (int i = 0; i < seqs.size(); i++) {
249                 
250                         string seqAligned = seqs[i]->getAligned();
251                         
252                         for (int j = 0; j < seqAligned.length(); j++) {
253                                 //if this spot is a gap
254                                 if ((seqAligned[j] == '-') || (seqAligned[j] == '.'))   {       gaps[j]++;      }
255                         }
256                 }
257                 
258                 //zero out spot where all sequences have blanks
259                 int numColRemoved = 0;
260                 for(int i = 0; i < seqs[0]->getAligned().length(); i++){
261                         if(gaps[i] == seqs.size())      {       filterString[i] = '0';  numColRemoved++;  }
262                 }
263                 
264                 map<int, int> newMap;
265                 //for each sequence
266                 for (int i = 0; i < seqs.size(); i++) {
267                 
268                         string seqAligned = seqs[i]->getAligned();
269                         string newAligned = "";
270                         int count = 0;
271                         
272                         for (int j = 0; j < seqAligned.length(); j++) {
273                                 //if this spot is not a gap
274                                 if (filterString[j] == '1') { 
275                                         newAligned += seqAligned[j]; 
276                                         newMap[count] = spotMap[j];
277                                         count++;
278                                 }
279                         }
280                         
281                         seqs[i]->setAligned(newAligned);
282                 }
283
284                 spotMap = newMap;
285         }
286         catch(exception& e) {
287                 m->errorOut(e, "Maligner", "verticalFilter");
288                 exit(1);
289         }
290 }
291 //***************************************************************************************************************
292 vector< vector<score_struct> > Maligner::buildScoreMatrix(int cols, int rows) {
293         try{
294                 
295                 vector< vector<score_struct> > m(rows);
296                 
297                 for (int i = 0; i < rows; i++) {
298                         for (int j = 0; j < cols; j++) {
299                                 
300                                 //initialize each cell
301                                 score_struct temp;
302                                 temp.prev = -1;
303                                 temp.score = -9999999;
304                                 temp.col = j;
305                                 temp.row = i;
306                                 
307                                 m[i].push_back(temp);
308                         }
309                 }
310                 
311                 return m;
312         }
313         catch(exception& e) {
314                 m->errorOut(e, "Maligner", "buildScoreMatrix");
315                 exit(1);
316         }
317 }
318
319 //***************************************************************************************************************
320
321 void Maligner::fillScoreMatrix(vector<vector<score_struct> >& ms, vector<Sequence*> seqs, int penalty) {
322         try{
323                 
324                 //get matrix dimensions
325                 int numCols = query->getAligned().length();
326                 int numRows = seqs.size();
327                 
328 //              cout << numRows << endl;
329                 
330                 //initialize first col
331                 string queryAligned = query->getAligned();
332                 for (int i = 0; i < numRows; i++) {
333                         string subjectAligned = seqs[i]->getAligned();
334                         
335                         //are you both gaps?
336                         if ((!isalpha(queryAligned[0])) && (!isalpha(subjectAligned[0]))) {
337                                 ms[i][0].score = 0;
338 //                              ms[i][0].mismatches = 0;
339                         }else if (queryAligned[0] == subjectAligned[0])  { //|| subjectAligned[0] == 'N')
340                                 ms[i][0].score = matchScore;
341 //                              ms[i][0].mismatches = 0;
342                         }else{
343                                 ms[i][0].score = 0;
344 //                              ms[i][0].mismatches = 1;
345                         }
346                 }
347                 
348                 //fill rest of matrix
349                 for (int j = 1; j < numCols; j++) {  //iterate through matrix columns
350                 
351 //                      for (int i = 0; i < 1; i++) {  //iterate through matrix rows
352                         for (int i = 0; i < numRows; i++) {  //iterate through matrix rows
353                                 
354                                 string subjectAligned = seqs[i]->getAligned();
355                                 
356                                 int matchMisMatchScore = 0;
357                                 //are you both gaps?
358                                 if ((!isalpha(queryAligned[j])) && (!isalpha(subjectAligned[j]))) {
359                                         //leave the same
360                                 }else if ((toupper(queryAligned[j]) == 'N') || (toupper(subjectAligned[j]) == 'N')) {
361                                         //leave the same
362                                 }else if (queryAligned[j] == subjectAligned[j]) {
363                                         matchMisMatchScore = matchScore;
364                                 }else if (queryAligned[j] != subjectAligned[j]) {
365                                         matchMisMatchScore = misMatchPenalty;
366                                 }
367
368                                 //compute score based on previous columns scores
369                                 for (int prevIndex = 0; prevIndex < numRows; prevIndex++) { //iterate through rows
370                                         
371                                         int sumScore = matchMisMatchScore + ms[prevIndex][j-1].score;
372                                         
373                                         //you are not at yourself
374                                         if (prevIndex != i) {   sumScore += penalty;    }
375                                         if (sumScore < 0)       {       sumScore = 0;                   }
376                                         
377                                         if (sumScore > ms[i][j].score) {
378                                                 ms[i][j].score = sumScore;
379                                                 ms[i][j].prev = prevIndex;
380                                         }
381                                 }
382 //                              cout << i << '\t' << j << '\t' << queryAligned[j] << '\t' << subjectAligned[j] << '\t' << matchMisMatchScore << '\t' << ms[i][j].score << endl;
383
384                         }
385                         
386                 }
387                 
388                 
389                 
390                 
391                 
392 //              cout << numRows << '\t' << numCols << endl;
393 //              for(int i=0;i<numRows;i++){
394 //                      cout << seqs[i]->getName();
395 //                      for(int j=0;j<numCols;j++){
396 //                              cout << '\t' << ms[i][j].score;
397 //                      }
398 //                      cout << endl;
399 //              }
400 //              cout << endl;
401 //              
402 //              for(int i=0;i<numRows;i++){
403 //                      cout << seqs[i]->getName();
404 //                      for(int j=0;j<numCols;j++){
405 //                              cout << '\t' << ms[i][j].prev;
406 //                      }
407 //                      cout << endl;
408 //              }
409                 
410                 
411         }
412         catch(exception& e) {
413                 m->errorOut(e, "Maligner", "fillScoreMatrix");
414                 exit(1);
415         }
416 }
417 //***************************************************************************************************************
418 vector<score_struct> Maligner::extractHighestPath(vector<vector<score_struct> > ms) {
419         try {
420                 
421                 //get matrix dimensions
422                 int numCols = query->getAligned().length();
423                 int numRows = ms.size();
424                 
425                 
426                 //find highest score scoring matrix
427                 score_struct highestStruct;
428                 int highestScore = 0;
429                 
430                 for (int i = 0; i < numRows; i++) {
431                         for (int j = 0; j < numCols; j++) {
432                                 if (ms[i][j].score > highestScore) {
433                                         highestScore = ms[i][j].score;
434                                         highestStruct = ms[i][j];
435                                 }
436                         }
437                 }
438                 
439 //              cout << highestScore << endl;
440                 vector<score_struct> path;
441                 
442                 int rowIndex = highestStruct.row;
443                 int pos = highestStruct.col;
444                 int score = highestStruct.score;
445                 
446                 while (pos >= 0 && score > 0) {
447                         score_struct temp = ms[rowIndex][pos];
448                         score = temp.score;
449                         
450                         if (score > 0) {        path.push_back(temp);   }
451                         
452                         rowIndex = temp.prev;
453                         pos--;
454                 }
455                 
456                 reverse(path.begin(), path.end());
457                 
458                 return path;
459                 
460         }
461         catch(exception& e) {
462                 m->errorOut(e, "Maligner", "extractHighestPath");
463                 exit(1);
464         }
465 }
466 //***************************************************************************************************************
467 vector<trace_struct> Maligner::mapTraceRegionsToAlignment(vector<score_struct> path, vector<Sequence*> seqs) {
468         try {
469                 vector<trace_struct> trace;
470                 
471                 int region_index = path[0].row;
472                 int region_start = path[0].col;
473                 
474                 for (int i = 1; i < path.size(); i++) {
475                         
476                         int next_region_index = path[i].row;
477 //                      cout << i << '\t' << next_region_index << endl;
478
479                         if (next_region_index != region_index) {
480                                 
481                                 // add trace region
482                                 int col_index = path[i].col;
483                                 trace_struct temp;
484                                 temp.col = region_start;
485                                 temp.oldCol = col_index-1;
486                                 temp.row = region_index;
487                                 
488                                 trace.push_back(temp);
489                                 
490                                 region_index = path[i].row;
491                                 region_start = col_index;
492                         }
493                 }
494                 
495                 // get last one
496                 trace_struct temp;
497                 temp.col = region_start;
498                 temp.oldCol = path[path.size()-1].col;
499                 temp.row = region_index;
500                 trace.push_back(temp);
501                 
502                 return trace;
503                 
504         }
505         catch(exception& e) {
506                 m->errorOut(e, "Maligner", "mapTraceRegionsToAlignment");
507                 exit(1);
508         }
509 }
510
511 /***************************************************************************************************************
512
513 vector<trace_struct> Maligner::extractHighestPath(vector<vector<score_struct> > ms) {
514         try {
515         
516                 
517                 //get matrix dimensions
518                 int numCols = query->getAligned().length();
519                 int numRows = ms.size();
520         
521         
522                 //find highest score scoring matrix
523                 vector<score_struct> highestStruct;
524                 int highestScore = 0;
525                 
526                 for (int i = 0; i < numRows; i++) {
527                         for (int j = 0; j < numCols; j++) {
528                                 if (ms[i][j].score > highestScore) {
529                                         highestScore = ms[i][j].score;
530                                         highestStruct.resize(0);
531                                         highestStruct.push_back(ms[i][j]);
532                                 }
533                                 else if(ms[i][j].score == highestScore){
534                                         highestStruct.push_back(ms[i][j]);
535                                 }
536                         }
537                 }
538                         
539                 //cout << endl << highestScore << '\t' << highestStruct.size() << '\t' << highestStruct[0].row << endl; 
540                 
541                 vector<trace_struct> maxTrace;
542                 double maxPercentIdenticalQueryAntiChimera = 0;
543                 
544                 for(int i=0;i<highestStruct.size();i++){
545                         
546                         vector<score_struct> path;
547
548                         int rowIndex = highestStruct[i].row;
549                         int pos = highestStruct[i].col;
550                         int score = highestStruct[i].score;
551                                         
552                         while (pos >= 0 && score > 0) {
553                                 score_struct temp = ms[rowIndex][pos];
554                                 score = temp.score;
555                                 
556                                 if (score > 0) {        path.push_back(temp);   }
557                                 
558                                 rowIndex = temp.prev;
559                                 pos--;
560                         }
561
562                         reverse(path.begin(), path.end());
563
564                         vector<trace_struct> trace = mapTraceRegionsToAlignment(path, refSeqs);
565                 
566                         //cout << "traces\n";
567                         //for(int j=0;j<trace.size();j++){
568                         //      cout << trace[j].col << '\t' << trace[j].oldCol << '\t' << refSeqs[trace[j].row]->getName() << endl;
569                         //}
570                                                 
571                         int traceStart = path[0].col;
572                         int traceEnd = path[path.size()-1].col; 
573 //                      cout << "traceStart/End\t" << traceStart << '\t' << traceEnd << endl;
574                         
575                         string queryInRange = query->getAligned();
576                         queryInRange = queryInRange.substr(traceStart, (traceEnd-traceStart+1));
577 //                      cout << "here" << endl;
578                         string chimeraSeq = constructChimericSeq(trace, refSeqs);
579                         string antiChimeraSeq = constructAntiChimericSeq(trace, refSeqs);
580                 
581                         percentIdenticalQueryChimera = computePercentID(queryInRange, chimeraSeq);                      
582                         double percentIdenticalQueryAntiChimera = computePercentID(queryInRange, antiChimeraSeq);
583 //                      cout << i << '\t' << percentIdenticalQueryChimera << '\t' << percentIdenticalQueryAntiChimera << endl;
584                         
585                         if(percentIdenticalQueryAntiChimera > maxPercentIdenticalQueryAntiChimera){
586                                 maxPercentIdenticalQueryAntiChimera = percentIdenticalQueryAntiChimera;
587                                 maxTrace = trace;
588                         }
589                 }
590 //              cout << maxPercentIdenticalQueryAntiChimera << endl;
591                 return maxTrace;
592         
593                 
594         }
595         catch(exception& e) {
596                 m->errorOut(e, "Maligner", "extractHighestPath");
597                 exit(1);
598         }
599 }
600
601 //***************************************************************************************************************
602
603 vector<trace_struct> Maligner::mapTraceRegionsToAlignment(vector<score_struct> path, vector<Sequence*> seqs) {
604         try {
605                 vector<trace_struct> trace;
606                 
607                 int region_index = path[0].row;
608                 int region_start = path[0].col;
609         
610                 for (int i = 1; i < path.size(); i++) {
611                 
612                         int next_region_index = path[i].row;
613                         
614                         if (next_region_index != region_index) {
615                                 
616                                 // add trace region
617                                 int col_index = path[i].col;
618                                 trace_struct temp;
619                                 temp.col = region_start;
620                                 temp.oldCol = col_index-1;
621                                 temp.row = region_index;
622                                 
623                                 trace.push_back(temp);
624                                                         
625                                 region_index = path[i].row;
626                                 region_start = col_index;
627                         }
628                 }
629         
630                 // get last one
631                 trace_struct temp;
632                 temp.col = region_start;
633                 temp.oldCol = path[path.size()-1].col;
634                 temp.row = region_index;
635                 trace.push_back(temp);
636
637 //              cout << endl;
638 //              cout << trace.size() << endl;
639 //              for(int i=0;i<trace.size();i++){
640 //                      cout << seqs[trace[i].row]->getName() << endl;
641 //              }
642 //              cout << endl;
643                 
644                 return trace;
645                 
646         }
647         catch(exception& e) {
648                 m->errorOut(e, "Maligner", "mapTraceRegionsToAlignment");
649                 exit(1);
650         }
651 }
652 */
653 //***************************************************************************************************************
654
655 string Maligner::constructChimericSeq(vector<trace_struct> trace, vector<Sequence*> seqs) {
656         try {
657                 string chimera = "";
658                 
659                 for (int i = 0; i < trace.size(); i++) {
660 //                      cout << i << '\t' << trace[i].row << '\t' << trace[i].col << '\t' << trace[i].oldCol << endl;
661                         
662                         string seqAlign = seqs[trace[i].row]->getAligned();
663                         seqAlign = seqAlign.substr(trace[i].col, (trace[i].oldCol-trace[i].col+1));
664                         chimera += seqAlign;
665                 }
666 //              cout << chimera << endl;
667 //              if (chimera != "") { chimera = chimera.substr(0, (chimera.length()-1)); }       //this was introducing a fence post error
668 //              cout << chimera << endl;
669                 return chimera;
670         }
671         catch(exception& e) {
672                 m->errorOut(e, "Maligner", "constructChimericSeq");
673                 exit(1);
674         }
675 }
676
677 //***************************************************************************************************************
678
679 string Maligner::constructAntiChimericSeq(vector<trace_struct> trace, vector<Sequence*> seqs) {
680         try {
681                 string antiChimera = "";
682                 
683                 for (int i = 0; i < trace.size(); i++) {
684 //                      cout << i << '\t' << (trace.size() - i - 1) << '\t' << trace[i].row << '\t' << trace[i].col << '\t' << trace[i].oldCol << endl;
685                         
686                         int oppositeIndex = trace.size() - i - 1;
687                         
688                         string seqAlign = seqs[trace[oppositeIndex].row]->getAligned();
689                         seqAlign = seqAlign.substr(trace[i].col, (trace[i].oldCol-trace[i].col+1));
690                         antiChimera += seqAlign;
691                 }
692                 
693                 return antiChimera;
694         }
695         catch(exception& e) {
696                 m->errorOut(e, "Maligner", "constructChimericSeq");
697                 exit(1);
698         }
699 }
700
701 //***************************************************************************************************************
702 float Maligner::computePercentID(string queryAlign, string chimera) {
703         try {
704         
705                 if (queryAlign.length() != chimera.length()) {
706                         m->mothurOut("Error, alignment strings are of different lengths: "); m->mothurOutEndLine();
707                         m->mothurOut(toString(queryAlign.length())); m->mothurOutEndLine(); 
708                         m->mothurOut(toString(chimera.length())); m->mothurOutEndLine();
709                         return -1.0;
710                 }
711
712 //              cout << queryAlign.length() << endl;
713                 int numIdentical = 0;
714                 int countA = 0;
715                 int countB = 0;
716                 for (int i = 0; i < queryAlign.length(); i++) {
717                         if (((queryAlign[i] != 'G') && (queryAlign[i] != 'T') && (queryAlign[i] != 'A') && (queryAlign[i] != 'C')&& (queryAlign[i] != '.') && (queryAlign[i] != '-')) ||
718                                 ((chimera[i] != 'G') && (chimera[i] != 'T') && (chimera[i] != 'A') && (chimera[i] != 'C')&& (chimera[i] != '.') && (chimera[i] != '-'))) {}
719                         else {
720
721                                 bool charA = false; bool charB = false;
722                                 if ((queryAlign[i] == 'G') || (queryAlign[i] == 'T') || (queryAlign[i] == 'A') || (queryAlign[i] == 'C')) { charA = true; }
723                                 if ((chimera[i] == 'G') || (chimera[i] == 'T') || (chimera[i] == 'A') || (chimera[i] == 'C')) { charB = true; }
724                                         
725                                 
726                                 if (charA || charB) {
727                                                 
728                                         if (charA) { countA++; }
729                                         if (charB) { countB++; }
730                                                 
731                                         if (queryAlign[i] == chimera[i]) {
732                                                 numIdentical++;
733                                         }
734                                 }
735 //                              cout << queryAlign[i] << '\t' << chimera[i] << '\t' << countA << '\t' << countB << endl;
736
737                         }
738                 }
739                 
740 //              cout << "pat\t" << countA << '\t' << countB << '\t' << numIdentical << endl;
741
742                 
743                 float numBases = (countA + countB) /(float) 2;
744                 
745                 if (numBases == 0) { return 0; }
746         
747 //              cout << numIdentical << '\t' << numBases << endl;
748                 
749                 float percentIdentical = (numIdentical/(float)numBases) * 100;
750                 
751 //              cout << percentIdentical << endl;
752                 
753                 return percentIdentical;
754                 
755         }
756         catch(exception& e) {
757                 m->errorOut(e, "Maligner", "computePercentID");
758                 exit(1);
759         }
760 }
761 //***************************************************************************************************************