]> git.donarmstrong.com Git - mothur.git/blob - decalc.cpp
started adding chimeraslayer method and fixed minor bug in treegroupscommand deconstr...
[mothur.git] / decalc.cpp
1 /*
2  *  decalc.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 7/22/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "decalc.h"
11 #include "chimera.h"
12 #include "dist.h"
13 #include "eachgapdist.h"
14
15
16 //***************************************************************************************************************
17 void DeCalculator::setMask(string m) { 
18         try {
19                 seqMask = m; 
20                 int count = 0;
21                 maskMap.clear();
22                 
23                 if (seqMask.length() != 0) {
24                         //whereever there is a base in the mask, save that value is query and subject
25                         for (int i = 0; i < seqMask.length(); i++) {
26                                 if (isalpha(seqMask[i])) {
27                                         h.insert(i);
28                                         maskMap[count] = i;
29                                         count++;
30                                         
31                                 }
32                         }
33                 }else {
34                         for (int i = 0; i < alignLength; i++) {   
35                                 h.insert(i); 
36                                 maskMap[count] = i;
37                                 count++;
38                         }
39                 }
40         }
41         catch(exception& e) {
42                 errorOut(e, "DeCalculator", "setMask");
43                 exit(1);
44         } 
45 }
46 //***************************************************************************************************************
47 void DeCalculator::runMask(Sequence* seq) {
48         try{
49                 
50                 string q = seq->getAligned();
51                 string tempQuery = "";
52                 
53                 //whereever there is a base in the mask, save that value is query and subject
54                 set<int>::iterator setit;
55                 for ( setit=h.begin() ; setit != h.end(); setit++ )  {
56                         tempQuery += q[*setit];
57                 }
58                 
59                 //save masked values
60                 seq->setAligned(tempQuery);
61                 seq->setUnaligned(tempQuery);
62         }
63         catch(exception& e) {
64                 errorOut(e, "DeCalculator", "runMask");
65                 exit(1);
66         }
67 }
68 //***************************************************************************************************************
69 //num is query's spot in querySeqs
70 void DeCalculator::trimSeqs(Sequence* query, Sequence* subject, map<int, int>& trim) {
71         try {
72                 
73                 string q = query->getAligned();
74                 string s = subject->getAligned();
75                 
76                 int front = 0;
77                 for (int i = 0; i < q.length(); i++) {
78 //cout << "query = " << q[i] << " subject = " << s[i] << endl;
79                         if (isalpha(q[i]) && isalpha(s[i])) { front = i; break;  }
80                 }
81 //cout << endl << endl;         
82                 int back = 0;           
83                 for (int i = q.length(); i >= 0; i--) {
84 //cout << "query = " << q[i] << " subject = " << s[i] << endl;
85                         if (isalpha(q[i]) && isalpha(s[i])) { back = i; break;  }
86                 }
87                 
88                 trim[front] = back;
89                 
90         }
91         catch(exception& e) {
92                 errorOut(e, "DeCalculator", "trimSeqs");
93                 exit(1);
94         }
95 }
96 //***************************************************************************************************************
97 vector<int>  DeCalculator::findWindows(Sequence* query, int front, int back, int& size, int increment) {
98         try {
99                 
100                 vector<int> win; 
101                 
102                 int cutoff = back - front;  //back - front 
103                         
104                 //if window is set to default
105                 if (size == 0) {  if (cutoff > 1200) {  size = 300; }
106                                                         else{  size = (cutoff / 4); }  } 
107                 else if (size > (cutoff / 4)) { 
108                                 mothurOut("You have selected to large a window size for sequence " + query->getName() + ".  I will choose an appropriate window size."); mothurOutEndLine();
109                                 size = (cutoff / 4); 
110                 }
111         
112         /*      string seq = query->getAligned().substr(front, cutoff);
113                         
114                 //count bases
115                 int numBases = 0;
116                 for (int l = 0; l < seq.length(); l++) {  if (isalpha(seq[l])) { numBases++; }  }
117 //cout << "num Bases = " << numBases << endl;                   
118                 //save start of seq
119                 win.push_back(front);
120 //cout << front << '\t';                
121                 //move ahead increment bases at a time until all bases are in a window
122                 int countBases = 0;
123                 int totalBases = 0;  //used to eliminate window of blanks at end of sequence
124                         
125                 seq = query->getAligned();
126                 for (int m = front; m < (back - size) ; m++) {
127                                 
128                         //count number of bases you see
129                         if (isalpha(seq[m])) { countBases++;  }
130                                 
131                         //if you have seen enough bases to make a new window
132                         if (countBases >= increment) {
133                                 //total bases is the number of bases in a window already.
134                                 totalBases += countBases;
135 //cout << "total bases = " << totalBases << endl;
136                                 win.push_back(m);  //save spot in alignment
137 //cout << m << '\t';
138                                 countBases = 0;                         //reset bases you've seen in this window
139                         }
140                                 
141                         //no need to continue if all your bases are in a window
142                         if (totalBases == numBases) {   break;  }
143                 }
144         
145
146                 //get last window if needed
147                 if (totalBases < numBases) {   win.push_back(back-size);  }
148 //cout << endl << endl;
149 */      
150                 
151                 //this follows wigeon, but we may want to consider that it chops off the end values if the sequence cannot be evenly divided into steps
152                 for (int m = front; m < (back - size) ; m+=increment) {  win.push_back(m);  }
153
154
155         
156                 return win;
157         
158         }
159         catch(exception& e) {
160                 errorOut(e, "DeCalculator", "findWindows");
161                 exit(1);
162         }
163 }
164
165 //***************************************************************************************************************
166 vector<float> DeCalculator::calcObserved(Sequence* query, Sequence* subject, vector<int> window, int size) {
167         try {
168                 
169                 vector<float> temp;     
170                 //int gaps = 0;         
171                 for (int m = 0; m < window.size(); m++) {
172                                                 
173                         string seqFrag = query->getAligned().substr(window[m], size);
174                         string seqFragsub = subject->getAligned().substr(window[m], size);
175                                 
176                         int diff = 0;
177                         for (int b = 0; b < seqFrag.length(); b++) {
178                                 //if at least one is a base and they are not equal
179                                 if( (isalpha(seqFrag[b]) || isalpha(seqFragsub[b])) && (seqFrag[b] != seqFragsub[b]) ) { diff++; }
180                                 
181                                 //ignore gaps
182                                 //if((!isalpha(seqFrag[b])) && (!isalpha(seqFragsub[b]))) { gaps++; }
183                         }
184                
185                         //percentage of mismatched bases
186                         float dist;
187                         
188                         //if the whole fragment is 0 distance = 0
189                         //if ((seqFrag.length()-gaps) == 0)  { dist =  0.0; }
190                
191                         //percentage of mismatched bases
192                         //else {  dist = diff / (float) (seqFrag.length()-gaps) * 100;   } 
193                         
194                         dist = diff / (float) (seqFrag.length()) * 100; 
195                         
196                         temp.push_back(dist);
197                 }
198                         
199                 return temp;
200         }
201         catch(exception& e) {
202                 errorOut(e, "DeCalculator", "calcObserved");
203                 exit(1);
204         }
205 }
206 //***************************************************************************************************************
207 float DeCalculator::calcDist(Sequence* query, Sequence* subject, int front, int back) {
208         try {
209                 
210                 //so you only look at the trimmed part of the sequence
211                 int cutoff = back - front;  
212                 int gaps = 0;
213                         
214                 //from first startpoint with length back-front
215                 string seqFrag = query->getAligned().substr(front, cutoff);
216                 string seqFragsub = subject->getAligned().substr(front, cutoff);
217                                                                                                                 
218                 int diff = 0;
219                 for (int b = 0; b < seqFrag.length(); b++) {
220                         //ignore gaps
221                         if((!isalpha(seqFrag[b])) && (!isalpha(seqFragsub[b]))) { gaps++; }
222                         if (seqFrag[b] != seqFragsub[b]) { diff++; }
223                 }
224                 
225                 //if the whole fragment is 0 distance = 0
226                 if ((seqFrag.length()-gaps) == 0)  { return 0.0; }
227                
228                 //percentage of mismatched bases
229                 float dist = diff / (float) (seqFrag.length()-gaps) * 100;       
230                                 
231                 return dist;
232         }
233         catch(exception& e) {
234                 errorOut(e, "DeCalculator", "calcDist");
235                 exit(1);
236         }
237 }
238
239 //***************************************************************************************************************
240 vector<float> DeCalculator::calcExpected(vector<float> qav, float coef) {
241         try {
242                 
243                 //for each window
244                 vector<float> queryExpected;
245                         
246                 for (int m = 0; m < qav.size(); m++) {          
247                                 
248                         float expected = qav[m] * coef;
249                                 
250                         queryExpected.push_back(expected);      
251                 }
252                         
253                 return queryExpected;
254                                 
255         }
256         catch(exception& e) {
257                 errorOut(e, "DeCalculator", "calcExpected");
258                 exit(1);
259         }
260 }
261 //***************************************************************************************************************
262 float DeCalculator::calcDE(vector<float> obs, vector<float> exp) {
263         try {
264                 
265                 //for each window
266                 float sum = 0.0;  //sum = sum from 1 to m of (oi-ei)^2
267                 int numZeros = 0;
268                 for (int m = 0; m < obs.size(); m++) {          
269                         
270                         //if (obs[m] != 0.0) {
271                                 sum += ((obs[m] - exp[m]) * (obs[m] - exp[m])); 
272                         //}else {  numZeros++;   }
273                         
274                 }
275                         
276                 float de = sqrt((sum / (obs.size() - 1 - numZeros)));
277                         
278                 return de;
279         }
280         catch(exception& e) {
281                 errorOut(e, "DeCalculator", "calcDE");
282                 exit(1);
283         }
284 }
285
286 //***************************************************************************************************************
287
288 vector<float> DeCalculator::calcFreq(vector<Sequence*> seqs, string filename) {
289         try {
290
291                 vector<float> prob;
292                 string freqfile = getRootName(filename) + "freq";
293                 ofstream outFreq;
294                 
295                 openOutputFile(freqfile, outFreq);
296                 
297                 string length = toString(seqs.size());  //if there are 5000 seqs in the template then set precision to 3
298                 int precision = length.length() - 1;
299                 
300                 //format output
301                 outFreq.setf(ios::fixed, ios::floatfield); outFreq.setf(ios::showpoint);
302                 
303                 //at each position in the sequence
304                 for (int i = 0; i < seqs[0]->getAligned().length(); i++) {
305                         
306                         vector<int> freq;   freq.resize(4,0);
307                         int gaps = 0;
308                         
309                         //find the frequency of each nucleotide
310                         for (int j = 0; j < seqs.size(); j++) {
311                                 
312                                 char value = seqs[j]->getAligned()[i];
313                                 
314                                 if(toupper(value) == 'A')                                                                       {       freq[0]++;      }
315                                 else if(toupper(value) == 'T' || toupper(value) == 'U')         {       freq[1]++;      }
316                                 else if(toupper(value) == 'G')                                                          {       freq[2]++;      }
317                                 else if(toupper(value) == 'C')                                                          {       freq[3]++;      }
318                                 else { gaps++; }
319                         }
320                         
321                         //find base with highest frequency
322                         int highest = 0;
323                         for (int m = 0; m < freq.size(); m++) {   if (freq[m] > highest) {  highest = freq[m];  }               }
324                         
325                         float highFreq = highest / (float) (seqs.size());       
326                         
327                         float Pi;
328                         Pi =  (highFreq - 0.25) / 0.75; 
329                         
330                         //cannot have probability less than 0.
331                         if (Pi < 0) { Pi = 0.0; }
332                         
333                         //saves this for later
334                         outFreq << setprecision(precision) << i << '\t' << highFreq << endl;
335         
336                         if (h.count(i) > 0) {
337                                 prob.push_back(Pi); 
338                         }
339                 }
340                 
341                 outFreq.close();
342                 
343                 return prob;
344                                 
345         }
346         catch(exception& e) {
347                 errorOut(e, "DeCalculator", "calcFreq");
348                 exit(1);
349         }
350 }
351 //***************************************************************************************************************
352 vector<float>  DeCalculator::findQav(vector<int> window, int size, vector<float> probabilityProfile) {
353         try {
354                 vector<float>  averages; 
355                                 
356                 //for each window find average
357                 for (int m = 0; m < window.size(); m++) {
358                                 
359                         float average = 0.0;
360                                 
361                         //while you are in the window for this sequence
362                         int count = 0;
363                         for (int j = window[m]; j < (window[m]+size); j++) {   
364                                 average += probabilityProfile[j];
365                                 count++;
366                         }
367                                 
368                         average = average / count;
369         
370                         //save this windows average
371                         averages.push_back(average);
372                 }
373                                 
374                 return averages;
375         }
376         catch(exception& e) {
377                 errorOut(e, "DeCalculator", "findQav");
378                 exit(1);
379         }
380 }
381 //***************************************************************************************************************
382 //seqs have already been masked
383 vector< vector<quanMember> > DeCalculator::getQuantiles(vector<Sequence*> seqs, vector<int> windowSizesTemplate, int window, vector<float> probProfile, int increment, int start, int end) {
384         try {
385                 vector< vector<quanMember> > quan; 
386                 
387                 //percentage of mismatched pairs 1 to 100
388                 quan.resize(100);
389 //ofstream o;
390 //string out = "getQuantiles.out";
391 //openOutputFile(out, o);
392                 
393                 //for each sequence
394                 for(int i = start; i < end; i++){
395                 
396                         mothurOut("Processing sequence " + toString(i)); mothurOutEndLine();
397                         Sequence* query = new Sequence(seqs[i]->getName(), seqs[i]->getAligned());
398                         
399                         //compare to every other sequence in template
400                         for(int j = 0; j < i; j++){
401                                 
402                                 Sequence* subject = new Sequence(seqs[j]->getName(), seqs[j]->getAligned());
403                                 
404                                 map<int, int> trim;
405                                 map<int, int>::iterator it;
406                                 
407                                 trimSeqs(query, subject, trim);
408                                 
409                                 it = trim.begin();
410                                 int front = it->first; int back = it->second;
411                                 
412                                 //reset window for each new comparison
413                                 windowSizesTemplate[i] = window;
414                                 
415                                 vector<int> win = findWindows(query, front, back, windowSizesTemplate[i], increment);
416                                 
417                                 vector<float> obsi = calcObserved(query, subject, win, windowSizesTemplate[i]);
418                                 
419                                 vector<float> q = findQav(win, windowSizesTemplate[i], probProfile);
420                                                                         
421                                 float alpha = getCoef(obsi, q);
422                                                 
423                                 vector<float> exp = calcExpected(q, alpha);
424                                 
425                                 float de = calcDE(obsi, exp);
426                                                                 
427                                 float dist = calcDist(query, subject, front, back); 
428         //o << i << '\t' <<  j << '\t' << dist << '\t' << de << endl;                   
429                                 dist = ceil(dist);
430                                 
431                                 quanMember newScore(de, i, j);
432                                 
433                                 quan[dist].push_back(newScore);
434                                 
435                                 delete subject;
436                         }
437                         
438                         delete query;
439                 }
440
441                 return quan;
442                                                 
443         }
444         catch(exception& e) {
445                 errorOut(e, "DeCalculator", "getQuantiles");
446                 exit(1);
447         }
448 }
449 //********************************************************************************************************************
450 //sorts lowest to highest
451 inline bool compareQuanMembers(quanMember left, quanMember right){
452         return (left.score < right.score);      
453
454 //***************************************************************************************************************
455 //this was going to be used by pintail to increase the sensitivity of the chimera detection, but it wasn't quite right.  may want to revisit in the future...
456 void DeCalculator::removeObviousOutliers(vector< vector<quanMember> >& quantiles, int num) {
457         try {
458                                                 
459                 for (int i = 0; i < quantiles.size(); i++) {
460                 
461                         //find mean of this quantile score
462                         sort(quantiles[i].begin(), quantiles[i].end(), compareQuanMembers);
463                         
464                         float high = quantiles[i][int(quantiles[i].size() * 0.99)].score;
465                         float low =  quantiles[i][int(quantiles[i].size() * 0.01)].score;
466                         
467                         vector<quanMember> temp;
468                         
469                         //look at each value in quantiles to see if it is an outlier
470                         for (int j = 0; j < quantiles[i].size(); j++) {
471                                 //is this score between 1 and 99%
472                                 if ((quantiles[i][j].score > low) && (quantiles[i][j].score < high)) {
473                                         temp.push_back(quantiles[i][j]);
474                                 }
475                         }
476                         
477                         quantiles[i] = temp;
478                 }
479
480 /*
481                 //find contributer with most offending score related to it
482                 int largestContrib = findLargestContrib(seen);
483         
484                 //while you still have guys to eliminate
485                 while (contributions.size() > 0) {
486                 
487                         mothurOut("Removing scores contributed by sequence " + toString(largestContrib) + " in your template file."); mothurOutEndLine();
488                         
489                         //remove from quantiles all scores that were made using this largestContrib
490                         for (int i = 0; i < quantiles.size(); i++) {
491 //cout << "before remove " << quantiles[i].size() << '\t';
492                                 removeContrib(largestContrib, quantiles[i]);
493 //cout << "after remove " << quantiles[i].size() << endl;
494                         }
495 //cout << count << " largest contrib = " << largestContrib << endl;  count++;
496                         //remove from contributions all scores that were made using this largestContrib
497                         removeContrib(largestContrib, contributions);
498                         
499                         //"erase" largestContrib
500                         seen[largestContrib] = -1;
501                         
502                         //get next largestContrib
503                         largestContrib = findLargestContrib(seen);
504                 }
505 ABOVE IS ATTEMPT #1             
506 **************************************************************************************************
507 BELOW IS ATTEMPT #2             
508                 vector<int> marked = returnObviousOutliers(quantiles, num);
509                 vector<int> copyMarked = marked;
510                 
511                 //find the 99% of marked
512                 sort(copyMarked.begin(), copyMarked.end());
513                 int high = copyMarked[int(copyMarked.size() * 0.99)];
514 cout << "high = " << high << endl;              
515                 
516                 for(int i = 0; i < marked.size(); i++) {
517                         if (marked[i] > high) { 
518                                 mothurOut("Removing scores contributed by sequence " + toString(marked[i]) + " in your template file."); mothurOutEndLine();
519                                 for (int i = 0; i < quantiles.size(); i++) {
520                                         removeContrib(marked[i], quantiles[i]);
521                                 }
522                         }
523
524                 }
525                 
526                 
527                 //adjust quantiles
528                 for (int i = 0; i < quantiles.size(); i++) {
529                         vector<float> temp;
530                         
531                         if (quantiles[i].size() == 0) {
532                                 //in case this is not a distance found in your template files
533                                 for (int g = 0; g < 6; g++) {
534                                         temp.push_back(0.0);
535                                 }
536                         }else{
537                                 
538                                 sort(quantiles[i].begin(), quantiles[i].end(), compareQuanMembers);
539                                 
540                                 //save 10%
541                                 temp.push_back(quantiles[i][int(quantiles[i].size() * 0.10)].score);
542                                 //save 25%
543                                 temp.push_back(quantiles[i][int(quantiles[i].size() * 0.25)].score);
544                                 //save 50%
545                                 temp.push_back(quantiles[i][int(quantiles[i].size() * 0.5)].score);
546                                 //save 75%
547                                 temp.push_back(quantiles[i][int(quantiles[i].size() * 0.75)].score);
548                                 //save 95%
549                                 temp.push_back(quantiles[i][int(quantiles[i].size() * 0.95)].score);
550                                 //save 99%
551                                 temp.push_back(quantiles[i][int(quantiles[i].size() * 0.99)].score);
552                                 
553                         }
554                         
555                         quan[i] = temp;
556                         
557                 }
558 */
559                 
560         }
561         catch(exception& e) {
562                 errorOut(e, "DeCalculator", "removeObviousOutliers");
563                 exit(1);
564         }
565 }
566 //***************************************************************************************************************
567 //put quanMember in the vector based on how far they are from the 99% or 1%.  Biggest offenders in front.
568 /*vector<quanMember> DeCalculator::sortContrib(map<quanMember*, float> quan) {
569         try{
570                 
571                 vector<quanMember> newQuan;
572                 map<quanMember*, float>::iterator it;
573                 
574                 while (quan.size() > 0) {
575                         
576                          map<quanMember*, float>::iterator largest = quan.begin(); 
577                           
578                         //find biggest member
579                         for (it = quan.begin(); it != quan.end(); it++) {
580                                 if (it->second > largest->second) {  largest = it;  }
581                         }
582 cout << largest->second << '\t' << largest->first->score << '\t' << largest->first->member1 << '\t' << largest->first->member2 << endl;
583                         //save it 
584                         newQuan.push_back(*(largest->first));
585                 
586                         //erase from quan
587                         quan.erase(largest);
588                 }
589                 
590                 return newQuan;
591                 
592         }
593         catch(exception& e) {
594                 errorOut(e, "DeCalculator", "sortContrib");
595                 exit(1);
596         }
597 }
598
599 //***************************************************************************************************************
600 //used by removeObviousOutliers which was attempt to increase sensitivity of chimera detection...not currently used...
601 int DeCalculator::findLargestContrib(vector<int> seen) {
602         try{
603                 
604                 int largest = 0;
605                 
606                 int largestContribs;
607                 
608                 for (int i = 0; i < seen.size(); i++)  {  
609                         
610                         if (seen[i] > largest) {
611                                 largestContribs = i;
612                                 largest = seen[i];
613                         }
614                 }
615                 
616                 return largestContribs;
617                 
618         }
619         catch(exception& e) {
620                 errorOut(e, "DeCalculator", "findLargestContribs");
621                 exit(1);
622         }
623 }
624 //***************************************************************************************************************
625 void DeCalculator::removeContrib(int bad, vector<quanMember>& quan) {
626         try{
627         
628                 vector<quanMember> newQuan;
629                 for (int i = 0; i < quan.size(); i++)  {  
630                         if ((quan[i].member1 != bad) && (quan[i].member2 != bad) ) {  
631                                 newQuan.push_back(quan[i]);  
632                         }
633                 }
634                 
635                 quan = newQuan;
636                 
637         }
638         catch(exception& e) {
639                 errorOut(e, "DeCalculator", "removeContrib");
640                 exit(1);
641         }
642 }
643 */
644 //***************************************************************************************************************
645 float DeCalculator::findAverage(vector<float> myVector) {
646         try{
647                 
648                 float total = 0.0;
649                 for (int i = 0; i < myVector.size(); i++)  {  total += myVector[i];  }
650                 
651                 float average = total / (float) myVector.size();
652                 
653                 return average;
654                 
655         }
656         catch(exception& e) {
657                 errorOut(e, "DeCalculator", "findAverage");
658                 exit(1);
659         }
660 }
661
662 //***************************************************************************************************************
663 float DeCalculator::getCoef(vector<float> obs, vector<float> qav) {
664         try {
665         
666                 //find average prob for this seqs windows
667                 float probAverage = findAverage(qav);
668                                 
669                 //find observed average 
670                 float obsAverage = findAverage(obs);
671                         
672                 float coef = obsAverage / probAverage;
673                                                 
674                 return coef;
675         }
676         catch(exception& e) {
677                 errorOut(e, "DeCalculator", "getCoef");
678                 exit(1);
679         }
680 }
681 //***************************************************************************************************************
682 vector<Sequence*> DeCalculator::findClosest(Sequence* querySeq, vector<Sequence*> db, int numWanted) {
683         try {
684                 
685                 vector<Sequence*> seqsMatches;  
686                 vector<SeqDist> dists;
687                 
688                 Dist* distcalculator = new eachGapDist();
689                 
690                 Sequence query = *(querySeq);
691                 
692                 for(int j = 0; j < db.size(); j++){
693                         
694                         Sequence temp = *(db[j]);
695                         
696                         distcalculator->calcDist(query, temp);
697                         float dist = distcalculator->getDist();
698                         
699                         SeqDist subject;
700                         subject.seq = db[j];
701                         subject.dist = dist;
702                         
703                         dists.push_back(subject);
704                 }
705                 
706                 delete distcalculator;
707                 
708                 sort(dists.begin(), dists.end(), compareSeqDist);
709                 
710                 for (int i = 0; i < numWanted; i++) {
711                         Sequence* temp = new Sequence(dists[i].seq->getName(), dists[i].seq->getAligned()); //have to make a copy so you can trim and filter without stepping on eachother.
712                         seqsMatches.push_back(temp);
713                 }
714                 
715                 return seqsMatches;
716         }
717         catch(exception& e) {
718                 errorOut(e, "DeCalculator", "findClosest");
719                 exit(1);
720         }
721 }
722 /***************************************************************************************************************/
723 void DeCalculator::trimSeqs(Sequence* query, vector<Sequence*> topMatches) {
724         try {
725                 
726                 int frontPos = 0;  //should contain first position in all seqs that is not a gap character
727                 int rearPos = query->getAligned().length();
728                 
729                 //********find first position in topMatches that is a non gap character***********//
730                 //find first position all query seqs that is a non gap character
731                 for (int i = 0; i < topMatches.size(); i++) {
732                         
733                         string aligned = topMatches[i]->getAligned();
734                         int pos = 0;
735                         
736                         //find first spot in this seq
737                         for (int j = 0; j < aligned.length(); j++) {
738                                 if (isalpha(aligned[j])) {
739                                         pos = j;
740                                         break;
741                                 }
742                         }
743                         
744                         //save this spot if it is the farthest
745                         if (pos > frontPos) { frontPos = pos; }
746                 }
747                 
748                 
749                 string aligned = query->getAligned();
750                 int pos = 0;
751                         
752                 //find first position in query that is a non gap character
753                 for (int j = 0; j < aligned.length(); j++) {
754                         if (isalpha(aligned[j])) {
755                                 pos = j;
756                                 break;
757                         }
758                 }
759                 
760                 //save this spot if it is the farthest
761                 if (pos > frontPos) { frontPos = pos; }
762                 
763                 
764                 //********find last position in topMatches that is a non gap character***********//
765                 for (int i = 0; i < topMatches.size(); i++) {
766                         
767                         string aligned = topMatches[i]->getAligned();
768                         int pos = aligned.length();
769                         
770                         //find first spot in this seq
771                         for (int j = aligned.length()-1; j >= 0; j--) {
772                                 if (isalpha(aligned[j])) {
773                                         pos = j;
774                                         break;
775                                 }
776                         }
777                         
778                         //save this spot if it is the farthest
779                         if (pos < rearPos) { rearPos = pos; }
780                 }
781                 
782                 
783                 aligned = query->getAligned();
784                 pos = aligned.length();
785                 
786                 //find last position in query that is a non gap character
787                 for (int j = aligned.length()-1; j >= 0; j--) {
788                         if (isalpha(aligned[j])) {
789                                 pos = j;
790                                 break;
791                         }
792                 }
793                 
794                 //save this spot if it is the farthest
795                 if (pos < rearPos) { rearPos = pos; }
796
797                 //check to make sure that is not whole seq
798                 if ((rearPos - frontPos - 1) <= 0) {  mothurOut("Error, when I trim your sequences, the entire sequence is trimmed."); mothurOutEndLine(); exit(1);  }
799 //cout << "front = " << frontPos << " rear = " << rearPos << endl;              
800                 //trim query
801                 string newAligned = query->getAligned();
802                 newAligned = newAligned.substr(frontPos, (rearPos-frontPos));
803                 query->setAligned(newAligned);
804                 
805                 //trim topMatches
806                 for (int i = 0; i < topMatches.size(); i++) {
807                         newAligned = topMatches[i]->getAligned();
808                         newAligned = newAligned.substr(frontPos, (rearPos-frontPos+1));
809                         topMatches[i]->setAligned(newAligned);
810                 }
811                 
812         }
813         catch(exception& e) {
814                 errorOut(e, "DeCalculator", "trimSequences");
815                 exit(1);
816         }
817
818 }
819 //***************************************************************************************************************
820
821
822