]> git.donarmstrong.com Git - mothur.git/blob - decalc.cpp
working on chimeras
[mothur.git] / decalc.cpp
1 /*
2  *  decalc.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 7/22/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "decalc.h"
11 #include "chimera.h"
12 #include "dist.h"
13 #include "eachgapdist.h"
14
15
16 //***************************************************************************************************************
17 void DeCalculator::setMask(string m) { 
18         try {
19                 seqMask = m; 
20                 int count = 0;
21                 maskMap.clear();
22                 
23                 if (seqMask.length() != 0) {
24                         //whereever there is a base in the mask, save that value is query and subject
25                         for (int i = 0; i < seqMask.length(); i++) {
26                                 if (isalpha(seqMask[i])) {
27                                         h.insert(i);
28                                         maskMap[count] = i;
29                                         count++;
30                                         
31                                 }
32                         }
33                 }else {
34                         for (int i = 0; i < alignLength; i++) {   
35                                 h.insert(i); 
36                                 maskMap[count] = i;
37                                 count++;
38                         }
39                 }
40         }
41         catch(exception& e) {
42                 errorOut(e, "DeCalculator", "setMask");
43                 exit(1);
44         } 
45 }
46 //***************************************************************************************************************
47 void DeCalculator::runMask(Sequence* seq) {
48         try{
49                 
50                 string q = seq->getAligned();
51                 string tempQuery = "";
52                 
53                 //whereever there is a base in the mask, save that value is query and subject
54                 set<int>::iterator setit;
55                 for ( setit=h.begin() ; setit != h.end(); setit++ )  {
56                         tempQuery += q[*setit];
57                 }
58                 
59                 //save masked values
60                 seq->setAligned(tempQuery);
61                 seq->setUnaligned(tempQuery);
62         }
63         catch(exception& e) {
64                 errorOut(e, "DeCalculator", "runMask");
65                 exit(1);
66         }
67 }
68 //***************************************************************************************************************
69 //num is query's spot in querySeqs
70 void DeCalculator::trimSeqs(Sequence* query, Sequence* subject, map<int, int>& trim) {
71         try {
72                 
73                 string q = query->getAligned();
74                 string s = subject->getAligned();
75                 
76                 int front = 0;
77                 for (int i = 0; i < q.length(); i++) {
78 //cout << "query = " << q[i] << " subject = " << s[i] << endl;
79                         if (isalpha(q[i]) && isalpha(s[i])) { front = i; break;  }
80                 }
81 //cout << endl << endl;         
82                 int back = 0;           
83                 for (int i = q.length(); i >= 0; i--) {
84 //cout << "query = " << q[i] << " subject = " << s[i] << endl;
85                         if (isalpha(q[i]) && isalpha(s[i])) { back = i; break;  }
86                 }
87                 
88                 trim[front] = back;
89                 
90         }
91         catch(exception& e) {
92                 errorOut(e, "DeCalculator", "trimSeqs");
93                 exit(1);
94         }
95 }
96 //***************************************************************************************************************
97 vector<int>  DeCalculator::findWindows(Sequence* query, int front, int back, int& size, int increment) {
98         try {
99                 
100                 vector<int> win; 
101                 
102                 int cutoff = back - front;  //back - front 
103                         
104                 //if window is set to default
105                 if (size == 0) {  if (cutoff > 1200) {  size = 300; }
106                                                         else{  size = (cutoff / 4); }  } 
107                 else if (size > (cutoff / 4)) { 
108                                 mothurOut("You have selected too large a window size for sequence " + query->getName() + ".  I will choose an appropriate window size."); mothurOutEndLine();
109                                 size = (cutoff / 4); 
110                 }
111         
112         /*      string seq = query->getAligned().substr(front, cutoff);
113                         
114                 //count bases
115                 int numBases = 0;
116                 for (int l = 0; l < seq.length(); l++) {  if (isalpha(seq[l])) { numBases++; }  }
117 //cout << "num Bases = " << numBases << endl;                   
118                 //save start of seq
119                 win.push_back(front);
120 //cout << front << '\t';                
121                 //move ahead increment bases at a time until all bases are in a window
122                 int countBases = 0;
123                 int totalBases = 0;  //used to eliminate window of blanks at end of sequence
124                         
125                 seq = query->getAligned();
126                 for (int m = front; m < (back - size) ; m++) {
127                                 
128                         //count number of bases you see
129                         if (isalpha(seq[m])) { countBases++;  }
130                                 
131                         //if you have seen enough bases to make a new window
132                         if (countBases >= increment) {
133                                 //total bases is the number of bases in a window already.
134                                 totalBases += countBases;
135 //cout << "total bases = " << totalBases << endl;
136                                 win.push_back(m);  //save spot in alignment
137 //cout << m << '\t';
138                                 countBases = 0;                         //reset bases you've seen in this window
139                         }
140                                 
141                         //no need to continue if all your bases are in a window
142                         if (totalBases == numBases) {   break;  }
143                 }
144         
145
146                 //get last window if needed
147                 if (totalBases < numBases) {   win.push_back(back-size);  }
148 //cout << endl << endl;
149 */      
150                 
151                 //this follows wigeon, but we may want to consider that it chops off the end values if the sequence cannot be evenly divided into steps
152                 for (int m = front; m < (back - size) ; m+=increment) {  win.push_back(m);  }
153
154
155         
156                 return win;
157         
158         }
159         catch(exception& e) {
160                 errorOut(e, "DeCalculator", "findWindows");
161                 exit(1);
162         }
163 }
164
165 //***************************************************************************************************************
166 vector<float> DeCalculator::calcObserved(Sequence* query, Sequence* subject, vector<int> window, int size) {
167         try {
168                 
169                 vector<float> temp;     
170                 //int gaps = 0;         
171                 for (int m = 0; m < window.size(); m++) {
172                                                 
173                         string seqFrag = query->getAligned().substr(window[m], size);
174                         string seqFragsub = subject->getAligned().substr(window[m], size);
175                                 
176                         int diff = 0;
177                         for (int b = 0; b < seqFrag.length(); b++) {
178                                 //if at least one is a base and they are not equal
179                                 if( (isalpha(seqFrag[b]) || isalpha(seqFragsub[b])) && (seqFrag[b] != seqFragsub[b]) ) { diff++; }
180                                 
181                                 //ignore gaps
182                                 //if((!isalpha(seqFrag[b])) && (!isalpha(seqFragsub[b]))) { gaps++; }
183                         }
184                
185                         //percentage of mismatched bases
186                         float dist;
187                         
188                         //if the whole fragment is 0 distance = 0
189                         //if ((seqFrag.length()-gaps) == 0)  { dist =  0.0; }
190                
191                         //percentage of mismatched bases
192                         //else {  dist = diff / (float) (seqFrag.length()-gaps) * 100;   } 
193                         
194                         dist = diff / (float) (seqFrag.length()) * 100; 
195                         
196                         temp.push_back(dist);
197                 }
198                         
199                 return temp;
200         }
201         catch(exception& e) {
202                 errorOut(e, "DeCalculator", "calcObserved");
203                 exit(1);
204         }
205 }
206 //***************************************************************************************************************
207 float DeCalculator::calcDist(Sequence* query, Sequence* subject, int front, int back) {
208         try {
209                 
210                 //so you only look at the trimmed part of the sequence
211                 int cutoff = back - front;  
212                 int gaps = 0;
213                         
214                 //from first startpoint with length back-front
215                 string seqFrag = query->getAligned().substr(front, cutoff);
216                 string seqFragsub = subject->getAligned().substr(front, cutoff);
217                                                                                                                 
218                 int diff = 0;
219                 for (int b = 0; b < seqFrag.length(); b++) {
220                         //ignore gaps
221                         if((!isalpha(seqFrag[b])) && (!isalpha(seqFragsub[b]))) { gaps++; }
222                         if (seqFrag[b] != seqFragsub[b]) { diff++; }
223                 }
224                 
225                 //if the whole fragment is 0 distance = 0
226                 if ((seqFrag.length()-gaps) == 0)  { return 0.0; }
227                
228                 //percentage of mismatched bases
229                 float dist = diff / (float) (seqFrag.length()-gaps) * 100;       
230                                 
231                 return dist;
232         }
233         catch(exception& e) {
234                 errorOut(e, "DeCalculator", "calcDist");
235                 exit(1);
236         }
237 }
238
239 //***************************************************************************************************************
240 vector<float> DeCalculator::calcExpected(vector<float> qav, float coef) {
241         try {
242                 
243                 //for each window
244                 vector<float> queryExpected;
245                         
246                 for (int m = 0; m < qav.size(); m++) {          
247                                 
248                         float expected = qav[m] * coef;
249                                 
250                         queryExpected.push_back(expected);      
251                 }
252                         
253                 return queryExpected;
254                                 
255         }
256         catch(exception& e) {
257                 errorOut(e, "DeCalculator", "calcExpected");
258                 exit(1);
259         }
260 }
261 //***************************************************************************************************************
262 float DeCalculator::calcDE(vector<float> obs, vector<float> exp) {
263         try {
264                 
265                 //for each window
266                 float sum = 0.0;  //sum = sum from 1 to m of (oi-ei)^2
267                 int numZeros = 0;
268                 for (int m = 0; m < obs.size(); m++) {          
269                         
270                         //if (obs[m] != 0.0) {
271                                 sum += ((obs[m] - exp[m]) * (obs[m] - exp[m])); 
272                         //}else {  numZeros++;   }
273                         
274                 }
275                         
276                 float de = sqrt((sum / (obs.size() - 1 - numZeros)));
277                         
278                 return de;
279         }
280         catch(exception& e) {
281                 errorOut(e, "DeCalculator", "calcDE");
282                 exit(1);
283         }
284 }
285
286 //***************************************************************************************************************
287
288 vector<float> DeCalculator::calcFreq(vector<Sequence*> seqs, string filename) {
289         try {
290
291                 vector<float> prob;
292                 string freqfile = getRootName(filename) + "freq";
293                 ofstream outFreq;
294                 
295                 openOutputFile(freqfile, outFreq);
296                 
297                 string length = toString(seqs.size());  //if there are 5000 seqs in the template then set precision to 3
298                 int precision = length.length() - 1;
299                 
300                 //format output
301                 outFreq.setf(ios::fixed, ios::floatfield); outFreq.setf(ios::showpoint);
302                 
303                 //at each position in the sequence
304                 for (int i = 0; i < seqs[0]->getAligned().length(); i++) {
305                         
306                         vector<int> freq;   freq.resize(4,0);
307                         int gaps = 0;
308                         
309                         //find the frequency of each nucleotide
310                         for (int j = 0; j < seqs.size(); j++) {
311                                 
312                                 char value = seqs[j]->getAligned()[i];
313                                 
314                                 if(toupper(value) == 'A')                                                                       {       freq[0]++;      }
315                                 else if(toupper(value) == 'T' || toupper(value) == 'U')         {       freq[1]++;      }
316                                 else if(toupper(value) == 'G')                                                          {       freq[2]++;      }
317                                 else if(toupper(value) == 'C')                                                          {       freq[3]++;      }
318                                 else { gaps++; }
319                         }
320                         
321                         //find base with highest frequency
322                         int highest = 0;
323                         for (int m = 0; m < freq.size(); m++) {   if (freq[m] > highest) {  highest = freq[m];  }               }
324                         
325                         float highFreq = highest / (float) (seqs.size());       
326                         
327                         float Pi;
328                         Pi =  (highFreq - 0.25) / 0.75; 
329                         
330                         //cannot have probability less than 0.
331                         if (Pi < 0) { Pi = 0.0; }
332                         
333                         //saves this for later
334                         outFreq << setprecision(precision) << i << '\t' << highFreq << endl;
335         
336                         if (h.count(i) > 0) {
337                                 prob.push_back(Pi); 
338                         }
339                 }
340                 
341                 outFreq.close();
342                 
343                 return prob;
344                                 
345         }
346         catch(exception& e) {
347                 errorOut(e, "DeCalculator", "calcFreq");
348                 exit(1);
349         }
350 }
351 //***************************************************************************************************************
352 vector<float>  DeCalculator::findQav(vector<int> window, int size, vector<float> probabilityProfile) {
353         try {
354                 vector<float>  averages; 
355                                 
356                 //for each window find average
357                 for (int m = 0; m < window.size(); m++) {
358                                 
359                         float average = 0.0;
360                                 
361                         //while you are in the window for this sequence
362                         int count = 0;
363                         for (int j = window[m]; j < (window[m]+size); j++) {   
364                                 average += probabilityProfile[j];
365                                 count++;
366                         }
367                                 
368                         average = average / count;
369         
370                         //save this windows average
371                         averages.push_back(average);
372                 }
373                                 
374                 return averages;
375         }
376         catch(exception& e) {
377                 errorOut(e, "DeCalculator", "findQav");
378                 exit(1);
379         }
380 }
381 //***************************************************************************************************************
382 //seqs have already been masked
383 vector< vector<quanMember> > DeCalculator::getQuantiles(vector<Sequence*> seqs, vector<int> windowSizesTemplate, int window, vector<float> probProfile, int increment, int start, int end) {
384         try {
385                 vector< vector<quanMember> > quan; 
386                 
387                 //percentage of mismatched pairs 1 to 100
388                 quan.resize(100);
389 //ofstream o;
390 //string out = "getQuantiles.out";
391 //openOutputFile(out, o);
392                 
393                 //for each sequence
394                 for(int i = start; i < end; i++){
395                 
396                         mothurOut("Processing sequence " + toString(i)); mothurOutEndLine();
397                         Sequence* query = new Sequence(seqs[i]->getName(), seqs[i]->getAligned());
398                         
399                         //compare to every other sequence in template
400                         for(int j = 0; j < i; j++){
401                                 
402                                 Sequence* subject = new Sequence(seqs[j]->getName(), seqs[j]->getAligned());
403                                 
404                                 map<int, int> trim;
405                                 map<int, int>::iterator it;
406                                 
407                                 trimSeqs(query, subject, trim);
408                                 
409                                 it = trim.begin();
410                                 int front = it->first; int back = it->second;
411                                 
412                                 //reset window for each new comparison
413                                 windowSizesTemplate[i] = window;
414                                 
415                                 vector<int> win = findWindows(query, front, back, windowSizesTemplate[i], increment);
416                                 
417                                 vector<float> obsi = calcObserved(query, subject, win, windowSizesTemplate[i]);
418                                 
419                                 vector<float> q = findQav(win, windowSizesTemplate[i], probProfile);
420                                                                         
421                                 float alpha = getCoef(obsi, q);
422                                                 
423                                 vector<float> exp = calcExpected(q, alpha);
424                                 
425                                 float de = calcDE(obsi, exp);
426                                                                 
427                                 float dist = calcDist(query, subject, front, back); 
428         //o << i << '\t' <<  j << '\t' << dist << '\t' << de << endl;                   
429                                 dist = ceil(dist);
430                                 
431                                 quanMember newScore(de, i, j);
432                                 
433                                 quan[dist].push_back(newScore);
434                                 
435                                 delete subject;
436                         }
437                         
438                         delete query;
439                 }
440
441                 return quan;
442                                                 
443         }
444         catch(exception& e) {
445                 errorOut(e, "DeCalculator", "getQuantiles");
446                 exit(1);
447         }
448 }
449 //********************************************************************************************************************
450 //sorts lowest to highest
451 inline bool compareQuanMembers(quanMember left, quanMember right){
452         return (left.score < right.score);      
453
454 //***************************************************************************************************************
455 //this was going to be used by pintail to increase the sensitivity of the chimera detection, but it wasn't quite right.  may want to revisit in the future...
456 void DeCalculator::removeObviousOutliers(vector< vector<quanMember> >& quantiles, int num) {
457         try {
458                                                 
459                 for (int i = 0; i < quantiles.size(); i++) {
460                 
461                         //find mean of this quantile score
462                         sort(quantiles[i].begin(), quantiles[i].end(), compareQuanMembers);
463                         
464                         float high = quantiles[i][int(quantiles[i].size() * 0.99)].score;
465                         float low =  quantiles[i][int(quantiles[i].size() * 0.01)].score;
466                         
467                         vector<quanMember> temp;
468                         
469                         //look at each value in quantiles to see if it is an outlier
470                         for (int j = 0; j < quantiles[i].size(); j++) {
471                                 //is this score between 1 and 99%
472                                 if ((quantiles[i][j].score > low) && (quantiles[i][j].score < high)) {
473                                         temp.push_back(quantiles[i][j]);
474                                 }
475                         }
476                         
477                         quantiles[i] = temp;
478                 }
479
480 /*
481                 //find contributer with most offending score related to it
482                 int largestContrib = findLargestContrib(seen);
483         
484                 //while you still have guys to eliminate
485                 while (contributions.size() > 0) {
486                 
487                         mothurOut("Removing scores contributed by sequence " + toString(largestContrib) + " in your template file."); mothurOutEndLine();
488                         
489                         //remove from quantiles all scores that were made using this largestContrib
490                         for (int i = 0; i < quantiles.size(); i++) {
491 //cout << "before remove " << quantiles[i].size() << '\t';
492                                 removeContrib(largestContrib, quantiles[i]);
493 //cout << "after remove " << quantiles[i].size() << endl;
494                         }
495 //cout << count << " largest contrib = " << largestContrib << endl;  count++;
496                         //remove from contributions all scores that were made using this largestContrib
497                         removeContrib(largestContrib, contributions);
498                         
499                         //"erase" largestContrib
500                         seen[largestContrib] = -1;
501                         
502                         //get next largestContrib
503                         largestContrib = findLargestContrib(seen);
504                 }
505 ABOVE IS ATTEMPT #1             
506 **************************************************************************************************
507 BELOW IS ATTEMPT #2             
508                 vector<int> marked = returnObviousOutliers(quantiles, num);
509                 vector<int> copyMarked = marked;
510                 
511                 //find the 99% of marked
512                 sort(copyMarked.begin(), copyMarked.end());
513                 int high = copyMarked[int(copyMarked.size() * 0.99)];
514 cout << "high = " << high << endl;              
515                 
516                 for(int i = 0; i < marked.size(); i++) {
517                         if (marked[i] > high) { 
518                                 mothurOut("Removing scores contributed by sequence " + toString(marked[i]) + " in your template file."); mothurOutEndLine();
519                                 for (int i = 0; i < quantiles.size(); i++) {
520                                         removeContrib(marked[i], quantiles[i]);
521                                 }
522                         }
523
524                 }
525                 
526                 
527                 //adjust quantiles
528                 for (int i = 0; i < quantiles.size(); i++) {
529                         vector<float> temp;
530                         
531                         if (quantiles[i].size() == 0) {
532                                 //in case this is not a distance found in your template files
533                                 for (int g = 0; g < 6; g++) {
534                                         temp.push_back(0.0);
535                                 }
536                         }else{
537                                 
538                                 sort(quantiles[i].begin(), quantiles[i].end(), compareQuanMembers);
539                                 
540                                 //save 10%
541                                 temp.push_back(quantiles[i][int(quantiles[i].size() * 0.10)].score);
542                                 //save 25%
543                                 temp.push_back(quantiles[i][int(quantiles[i].size() * 0.25)].score);
544                                 //save 50%
545                                 temp.push_back(quantiles[i][int(quantiles[i].size() * 0.5)].score);
546                                 //save 75%
547                                 temp.push_back(quantiles[i][int(quantiles[i].size() * 0.75)].score);
548                                 //save 95%
549                                 temp.push_back(quantiles[i][int(quantiles[i].size() * 0.95)].score);
550                                 //save 99%
551                                 temp.push_back(quantiles[i][int(quantiles[i].size() * 0.99)].score);
552                                 
553                         }
554                         
555                         quan[i] = temp;
556                         
557                 }
558 */
559                 
560         }
561         catch(exception& e) {
562                 errorOut(e, "DeCalculator", "removeObviousOutliers");
563                 exit(1);
564         }
565 }
566 //***************************************************************************************************************
567 //put quanMember in the vector based on how far they are from the 99% or 1%.  Biggest offenders in front.
568 /*vector<quanMember> DeCalculator::sortContrib(map<quanMember*, float> quan) {
569         try{
570                 
571                 vector<quanMember> newQuan;
572                 map<quanMember*, float>::iterator it;
573                 
574                 while (quan.size() > 0) {
575                         
576                          map<quanMember*, float>::iterator largest = quan.begin(); 
577                           
578                         //find biggest member
579                         for (it = quan.begin(); it != quan.end(); it++) {
580                                 if (it->second > largest->second) {  largest = it;  }
581                         }
582 cout << largest->second << '\t' << largest->first->score << '\t' << largest->first->member1 << '\t' << largest->first->member2 << endl;
583                         //save it 
584                         newQuan.push_back(*(largest->first));
585                 
586                         //erase from quan
587                         quan.erase(largest);
588                 }
589                 
590                 return newQuan;
591                 
592         }
593         catch(exception& e) {
594                 errorOut(e, "DeCalculator", "sortContrib");
595                 exit(1);
596         }
597 }
598
599 //***************************************************************************************************************
600 //used by removeObviousOutliers which was attempt to increase sensitivity of chimera detection...not currently used...
601 int DeCalculator::findLargestContrib(vector<int> seen) {
602         try{
603                 
604                 int largest = 0;
605                 
606                 int largestContribs;
607                 
608                 for (int i = 0; i < seen.size(); i++)  {  
609                         
610                         if (seen[i] > largest) {
611                                 largestContribs = i;
612                                 largest = seen[i];
613                         }
614                 }
615                 
616                 return largestContribs;
617                 
618         }
619         catch(exception& e) {
620                 errorOut(e, "DeCalculator", "findLargestContribs");
621                 exit(1);
622         }
623 }
624 //***************************************************************************************************************
625 void DeCalculator::removeContrib(int bad, vector<quanMember>& quan) {
626         try{
627         
628                 vector<quanMember> newQuan;
629                 for (int i = 0; i < quan.size(); i++)  {  
630                         if ((quan[i].member1 != bad) && (quan[i].member2 != bad) ) {  
631                                 newQuan.push_back(quan[i]);  
632                         }
633                 }
634                 
635                 quan = newQuan;
636                 
637         }
638         catch(exception& e) {
639                 errorOut(e, "DeCalculator", "removeContrib");
640                 exit(1);
641         }
642 }
643 */
644 //***************************************************************************************************************
645 float DeCalculator::findAverage(vector<float> myVector) {
646         try{
647                 
648                 float total = 0.0;
649                 for (int i = 0; i < myVector.size(); i++)  {  total += myVector[i];  }
650                 
651                 float average = total / (float) myVector.size();
652                 
653                 return average;
654                 
655         }
656         catch(exception& e) {
657                 errorOut(e, "DeCalculator", "findAverage");
658                 exit(1);
659         }
660 }
661
662 //***************************************************************************************************************
663 float DeCalculator::getCoef(vector<float> obs, vector<float> qav) {
664         try {
665         
666                 //find average prob for this seqs windows
667                 float probAverage = findAverage(qav);
668                                 
669                 //find observed average 
670                 float obsAverage = findAverage(obs);
671                         
672                 float coef = obsAverage / probAverage;
673                                                 
674                 return coef;
675         }
676         catch(exception& e) {
677                 errorOut(e, "DeCalculator", "getCoef");
678                 exit(1);
679         }
680 }
681 //***************************************************************************************************************
682 //gets closest matches to each end, since chimeras will most likely have different parents on each end
683 vector<Sequence*> DeCalculator::findClosest(Sequence* querySeq, vector<Sequence*> db, int numWanted) {
684         try {
685                 
686                 vector<Sequence*> seqsMatches;  
687                 vector<SeqDist> dists;
688                 
689                 Dist* distcalculator = new eachGapDist();
690                 
691                 string queryAligned = querySeq->getAligned();
692                 string leftQuery = queryAligned.substr(0, (queryAligned.length() / 3)); //first 1/3 of the sequence
693                 string rightQuery = queryAligned.substr(((queryAligned.length() / 3)*2)); //last 1/3 of the sequence
694                 
695                 Sequence queryLeft(querySeq->getName(), leftQuery);
696                 Sequence queryRight(querySeq->getName(), rightQuery);
697                                 
698                 for(int j = 0; j < db.size(); j++){
699                         
700                         string dbAligned = db[j]->getAligned();
701                         string leftDB = dbAligned.substr(0, (dbAligned.length() / 3)); //first 1/3 of the sequence
702                         string rightDB = dbAligned.substr(((dbAligned.length() / 3)*2)); //last 1/3 of the sequence
703                         
704                         Sequence dbLeft(db[j]->getName(), leftDB);
705                         Sequence dbRight(db[j]->getName(), rightDB);
706                         
707                         distcalculator->calcDist(queryLeft, dbLeft);
708                         float distLeft = distcalculator->getDist();
709                         
710                         distcalculator->calcDist(queryRight, dbRight);
711                         float distRight = distcalculator->getDist();
712                         
713                         float dist = min(distLeft, distRight); //keep smallest distance
714                         
715                         SeqDist subject;
716                         subject.seq = db[j];
717                         subject.dist = dist;
718                         
719                         dists.push_back(subject);
720                 }
721                 
722                 delete distcalculator;
723                 
724                 sort(dists.begin(), dists.end(), compareSeqDist);
725                 
726                 for (int i = 0; i < numWanted; i++) {
727                         Sequence* temp = new Sequence(dists[i].seq->getName(), dists[i].seq->getAligned()); //have to make a copy so you can trim and filter without stepping on eachother.
728                         seqsMatches.push_back(temp);
729                 }
730                 
731                 return seqsMatches;
732         }
733         catch(exception& e) {
734                 errorOut(e, "DeCalculator", "findClosest");
735                 exit(1);
736         }
737 }
738 /***************************************************************************************************************/
739 map<int, int> DeCalculator::trimSeqs(Sequence* query, vector<Sequence*> topMatches) {
740         try {
741                 
742                 int frontPos = 0;  //should contain first position in all seqs that is not a gap character
743                 int rearPos = query->getAligned().length();
744                 
745                 //********find first position in topMatches that is a non gap character***********//
746                 //find first position all query seqs that is a non gap character
747                 for (int i = 0; i < topMatches.size(); i++) {
748                         
749                         string aligned = topMatches[i]->getAligned();
750                         int pos = 0;
751                         
752                         //find first spot in this seq
753                         for (int j = 0; j < aligned.length(); j++) {
754                                 if (isalpha(aligned[j])) {
755                                         pos = j;
756                                         break;
757                                 }
758                         }
759                         
760                         //save this spot if it is the farthest
761                         if (pos > frontPos) { frontPos = pos; }
762                 }
763                 
764                 
765                 string aligned = query->getAligned();
766                 int pos = 0;
767                         
768                 //find first position in query that is a non gap character
769                 for (int j = 0; j < aligned.length(); j++) {
770                         if (isalpha(aligned[j])) {
771                                 pos = j;
772                                 break;
773                         }
774                 }
775                 
776                 //save this spot if it is the farthest
777                 if (pos > frontPos) { frontPos = pos; }
778                 
779                 
780                 //********find last position in topMatches that is a non gap character***********//
781                 for (int i = 0; i < topMatches.size(); i++) {
782                         
783                         string aligned = topMatches[i]->getAligned();
784                         int pos = aligned.length();
785                         
786                         //find first spot in this seq
787                         for (int j = aligned.length()-1; j >= 0; j--) {
788                                 if (isalpha(aligned[j])) {
789                                         pos = j;
790                                         break;
791                                 }
792                         }
793                         
794                         //save this spot if it is the farthest
795                         if (pos < rearPos) { rearPos = pos; }
796                 }
797                 
798                 
799                 aligned = query->getAligned();
800                 pos = aligned.length();
801                 
802                 //find last position in query that is a non gap character
803                 for (int j = aligned.length()-1; j >= 0; j--) {
804                         if (isalpha(aligned[j])) {
805                                 pos = j;
806                                 break;
807                         }
808                 }
809                 
810                 //save this spot if it is the farthest
811                 if (pos < rearPos) { rearPos = pos; }
812
813                 //check to make sure that is not whole seq
814                 if ((rearPos - frontPos - 1) <= 0) {  mothurOut("Error, when I trim your sequences, the entire sequence is trimmed."); mothurOutEndLine(); exit(1);  }
815 //cout << "front = " << frontPos << " rear = " << rearPos << endl;              
816                 //trim query
817                 string newAligned = query->getAligned();
818                 newAligned = newAligned.substr(frontPos, (rearPos-frontPos));
819                 query->setAligned(newAligned);
820                 
821                 //trim topMatches
822                 for (int i = 0; i < topMatches.size(); i++) {
823                         newAligned = topMatches[i]->getAligned();
824                         newAligned = newAligned.substr(frontPos, (rearPos-frontPos+1));
825                         topMatches[i]->setAligned(newAligned);
826                 }
827                 
828                 map<int, int> trimmedPos;
829                 
830                 for (int i = 0; i < newAligned.length(); i++) {
831                         trimmedPos[i] = i+frontPos;
832                 }
833                 
834                 return trimmedPos;
835         }
836         catch(exception& e) {
837                 errorOut(e, "DeCalculator", "trimSequences");
838                 exit(1);
839         }
840
841 }
842 //***************************************************************************************************************
843
844
845