5 * Created by Sarah Westcott on 7/22/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
13 #include "eachgapdist.h"
14 #include "ignoregaps.h"
15 #include "eachgapdist.h"
17 //***************************************************************************************************************
18 void DeCalculator::setMask(string ms) {
24 if (seqMask.length() != 0) {
25 //whereever there is a base in the mask, save that value is query and subject
26 for (int i = 0; i < seqMask.length(); i++) {
27 if (isalpha(seqMask[i])) {
35 for (int i = 0; i < alignLength; i++) {
43 m->errorOut(e, "DeCalculator", "setMask");
47 //***************************************************************************************************************
48 void DeCalculator::runMask(Sequence* seq) {
51 string q = seq->getAligned();
52 string tempQuery = "";
54 //whereever there is a base in the mask, save that value is query and subject
55 set<int>::iterator setit;
56 for ( setit=h.begin() ; setit != h.end(); setit++ ) {
57 tempQuery += q[*setit];
61 seq->setAligned(tempQuery);
62 seq->setUnaligned(tempQuery);
65 m->errorOut(e, "DeCalculator", "runMask");
69 //***************************************************************************************************************
70 //num is query's spot in querySeqs
71 void DeCalculator::trimSeqs(Sequence* query, Sequence* subject, map<int, int>& trim) {
74 string q = query->getAligned();
75 string s = subject->getAligned();
78 for (int i = 0; i < q.length(); i++) {
79 //cout << "query = " << q[i] << " subject = " << s[i] << endl;
80 if (isalpha(q[i]) && isalpha(s[i])) { front = i; break; }
82 //cout << endl << endl;
84 for (int i = q.length(); i >= 0; i--) {
85 //cout << "query = " << q[i] << " subject = " << s[i] << endl;
86 if (isalpha(q[i]) && isalpha(s[i])) { back = i; break; }
93 m->errorOut(e, "DeCalculator", "trimSeqs");
97 //***************************************************************************************************************
98 vector<int> DeCalculator::findWindows(Sequence* query, int front, int back, int& size, int increment) {
103 int cutoff = back - front; //back - front
105 //if window is set to default
106 if (size == 0) { if (cutoff > 1200) { size = 300; }
107 else{ size = (cutoff / 4); } }
108 else if (size > (cutoff / 4)) {
109 m->mothurOut("You have selected too large a window size for sequence " + query->getName() + ". I will choose an appropriate window size."); m->mothurOutEndLine();
113 /* string seq = query->getAligned().substr(front, cutoff);
117 for (int l = 0; l < seq.length(); l++) { if (isalpha(seq[l])) { numBases++; } }
118 //cout << "num Bases = " << numBases << endl;
120 win.push_back(front);
121 //cout << front << '\t';
122 //move ahead increment bases at a time until all bases are in a window
124 int totalBases = 0; //used to eliminate window of blanks at end of sequence
126 seq = query->getAligned();
127 for (int m = front; m < (back - size) ; m++) {
129 //count number of bases you see
130 if (isalpha(seq[m])) { countBases++; }
132 //if you have seen enough bases to make a new window
133 if (countBases >= increment) {
134 //total bases is the number of bases in a window already.
135 totalBases += countBases;
136 //cout << "total bases = " << totalBases << endl;
137 win.push_back(m); //save spot in alignment
139 countBases = 0; //reset bases you've seen in this window
142 //no need to continue if all your bases are in a window
143 if (totalBases == numBases) { break; }
147 //get last window if needed
148 if (totalBases < numBases) { win.push_back(back-size); }
149 //cout << endl << endl;
152 //this follows wigeon, but we may want to consider that it chops off the end values if the sequence cannot be evenly divided into steps
153 for (int i = front; i < (back - size) ; i+=increment) { win.push_back(i); }
160 catch(exception& e) {
161 m->errorOut(e, "DeCalculator", "findWindows");
166 //***************************************************************************************************************
167 vector<float> DeCalculator::calcObserved(Sequence* query, Sequence* subject, vector<int> window, int size) {
172 for (int i = 0; i < window.size(); i++) {
174 string seqFrag = query->getAligned().substr(window[i], size);
175 string seqFragsub = subject->getAligned().substr(window[i], size);
178 for (int b = 0; b < seqFrag.length(); b++) {
179 //if at least one is a base and they are not equal
180 if( (isalpha(seqFrag[b]) || isalpha(seqFragsub[b])) && (seqFrag[b] != seqFragsub[b]) ) { diff++; }
183 //if((!isalpha(seqFrag[b])) && (!isalpha(seqFragsub[b]))) { gaps++; }
186 //percentage of mismatched bases
189 //if the whole fragment is 0 distance = 0
190 //if ((seqFrag.length()-gaps) == 0) { dist = 0.0; }
192 //percentage of mismatched bases
193 //else { dist = diff / (float) (seqFrag.length()-gaps) * 100; }
195 dist = diff / (float) (seqFrag.length()) * 100;
197 temp.push_back(dist);
202 catch(exception& e) {
203 m->errorOut(e, "DeCalculator", "calcObserved");
207 //***************************************************************************************************************
208 float DeCalculator::calcDist(Sequence* query, Sequence* subject, int front, int back) {
211 //so you only look at the trimmed part of the sequence
212 int cutoff = back - front;
215 //from first startpoint with length back-front
216 string seqFrag = query->getAligned().substr(front, cutoff);
217 string seqFragsub = subject->getAligned().substr(front, cutoff);
220 for (int b = 0; b < seqFrag.length(); b++) {
222 if((!isalpha(seqFrag[b])) && (!isalpha(seqFragsub[b]))) { gaps++; }
223 if (seqFrag[b] != seqFragsub[b]) { diff++; }
226 //if the whole fragment is 0 distance = 0
227 if ((seqFrag.length()-gaps) == 0) { return 0.0; }
229 //percentage of mismatched bases
230 float dist = diff / (float) (seqFrag.length()-gaps) * 100;
234 catch(exception& e) {
235 m->errorOut(e, "DeCalculator", "calcDist");
240 //***************************************************************************************************************
241 vector<float> DeCalculator::calcExpected(vector<float> qav, float coef) {
245 vector<float> queryExpected;
247 for (int j = 0; j < qav.size(); j++) {
249 float expected = qav[j] * coef;
251 queryExpected.push_back(expected);
254 return queryExpected;
257 catch(exception& e) {
258 m->errorOut(e, "DeCalculator", "calcExpected");
262 //***************************************************************************************************************
263 float DeCalculator::calcDE(vector<float> obs, vector<float> exp) {
267 float sum = 0.0; //sum = sum from 1 to i of (oi-ei)^2
269 for (int j = 0; j < obs.size(); j++) {
271 //if (obs[j] != 0.0) {
272 sum += ((obs[j] - exp[j]) * (obs[j] - exp[j]));
273 //}else { numZeros++; }
277 float de = sqrt((sum / (obs.size() - 1 - numZeros)));
281 catch(exception& e) {
282 m->errorOut(e, "DeCalculator", "calcDE");
287 //***************************************************************************************************************
289 vector<float> DeCalculator::calcFreq(vector<Sequence*> seqs, string filename) {
293 string freqfile = m->getRootName(filename) + "freq";
296 m->openOutputFile(freqfile, outFreq);
298 outFreq << "#" << m->getVersion() << endl;
300 string length = toString(seqs.size()); //if there are 5000 seqs in the template then set precision to 3
301 int precision = length.length() - 1;
304 outFreq.setf(ios::fixed, ios::floatfield); outFreq.setf(ios::showpoint);
306 //at each position in the sequence
307 for (int i = 0; i < seqs[0]->getAligned().length(); i++) {
309 vector<int> freq; freq.resize(4,0);
312 //find the frequency of each nucleotide
313 for (int j = 0; j < seqs.size(); j++) {
315 char value = seqs[j]->getAligned()[i];
317 if(toupper(value) == 'A') { freq[0]++; }
318 else if(toupper(value) == 'T' || toupper(value) == 'U') { freq[1]++; }
319 else if(toupper(value) == 'G') { freq[2]++; }
320 else if(toupper(value) == 'C') { freq[3]++; }
324 //find base with highest frequency
326 for (int j = 0; j < freq.size(); j++) { if (freq[j] > highest) { highest = freq[j]; } }
328 float highFreq = highest / (float) (seqs.size());
331 Pi = (highFreq - 0.25) / 0.75;
333 //cannot have probability less than 0.
334 if (Pi < 0) { Pi = 0.0; }
336 //saves this for later
337 outFreq << setprecision(precision) << i << '\t' << highFreq << endl;
339 if (h.count(i) > 0) {
349 catch(exception& e) {
350 m->errorOut(e, "DeCalculator", "calcFreq");
354 //***************************************************************************************************************
355 vector<float> DeCalculator::findQav(vector<int> window, int size, vector<float> probabilityProfile) {
357 vector<float> averages;
359 //for each window find average
360 for (int i = 0; i < window.size(); i++) {
364 //while you are in the window for this sequence
366 for (int j = window[i]; j < (window[i]+size); j++) {
367 average += probabilityProfile[j];
371 average = average / count;
373 //save this windows average
374 averages.push_back(average);
379 catch(exception& e) {
380 m->errorOut(e, "DeCalculator", "findQav");
384 //***************************************************************************************************************
385 //seqs have already been masked
386 vector< vector<float> > DeCalculator::getQuantiles(vector<Sequence*> seqs, vector<int> windowSizesTemplate, int window, vector<float> probProfile, int increment, int start, int end) {
388 vector< vector<float> > quan;
390 //percentage of mismatched pairs 1 to 100
394 for(int i = start; i < end; i++){
396 m->mothurOut("Processing sequence " + toString(i)); m->mothurOutEndLine();
397 Sequence* query = new Sequence(seqs[i]->getName(), seqs[i]->getAligned());
399 //compare to every other sequence in template
400 for(int j = 0; j < i; j++){
402 Sequence* subject = new Sequence(seqs[j]->getName(), seqs[j]->getAligned());
404 if (m->control_pressed) { delete query; delete subject; return quan; }
407 map<int, int>::iterator it;
409 trimSeqs(query, subject, trim);
412 int front = it->first; int back = it->second;
414 //reset window for each new comparison
415 windowSizesTemplate[i] = window;
417 vector<int> win = findWindows(query, front, back, windowSizesTemplate[i], increment);
419 vector<float> obsi = calcObserved(query, subject, win, windowSizesTemplate[i]);
421 vector<float> q = findQav(win, windowSizesTemplate[i], probProfile);
423 float alpha = getCoef(obsi, q);
425 vector<float> exp = calcExpected(q, alpha);
427 float de = calcDE(obsi, exp);
429 float dist = calcDist(query, subject, front, back);
430 //cout << i << '\t' << j << '\t' << dist << '\t' << de << endl;
433 //quanMember newScore(de, i, j);
435 quan[dist].push_back(de);
447 catch(exception& e) {
448 m->errorOut(e, "DeCalculator", "getQuantiles");
452 //********************************************************************************************************************
453 //sorts lowest to highest
454 inline bool compareQuanMembers(quanMember left, quanMember right){
455 return (left.score < right.score);
457 //***************************************************************************************************************
458 //this was going to be used by pintail to increase the sensitivity of the chimera detection, but it wasn't quite right. may want to revisit in the future...
459 void DeCalculator::removeObviousOutliers(vector< vector<float> >& quantiles, int num) {
462 for (int i = 0; i < quantiles.size(); i++) {
464 //find mean of this quantile score
465 sort(quantiles[i].begin(), quantiles[i].end());
468 if (quantiles[i].size() != 0) {
469 float high = quantiles[i][int(quantiles[i].size() * 0.99)];
470 float low = quantiles[i][int(quantiles[i].size() * 0.01)];
472 //look at each value in quantiles to see if it is an outlier
473 for (int j = 0; j < quantiles[i].size(); j++) {
474 //is this score between 1 and 99%
475 if ((quantiles[i][j] > low) && (quantiles[i][j] < high)) {
476 temp.push_back(quantiles[i][j]);
484 //find contributer with most offending score related to it
485 int largestContrib = findLargestContrib(seen);
487 //while you still have guys to eliminate
488 while (contributions.size() > 0) {
490 m->mothurOut("Removing scores contributed by sequence " + toString(largestContrib) + " in your template file."); m->mothurOutEndLine();
492 //remove from quantiles all scores that were made using this largestContrib
493 for (int i = 0; i < quantiles.size(); i++) {
494 //cout << "before remove " << quantiles[i].size() << '\t';
495 removeContrib(largestContrib, quantiles[i]);
496 //cout << "after remove " << quantiles[i].size() << endl;
498 //cout << count << " largest contrib = " << largestContrib << endl; count++;
499 //remove from contributions all scores that were made using this largestContrib
500 removeContrib(largestContrib, contributions);
502 //"erase" largestContrib
503 seen[largestContrib] = -1;
505 //get next largestContrib
506 largestContrib = findLargestContrib(seen);
509 **************************************************************************************************
511 vector<int> marked = returnObviousOutliers(quantiles, num);
512 vector<int> copyMarked = marked;
514 //find the 99% of marked
515 sort(copyMarked.begin(), copyMarked.end());
516 int high = copyMarked[int(copyMarked.size() * 0.99)];
517 cout << "high = " << high << endl;
519 for(int i = 0; i < marked.size(); i++) {
520 if (marked[i] > high) {
521 m->mothurOut("Removing scores contributed by sequence " + toString(marked[i]) + " in your template file."); m->mothurOutEndLine();
522 for (int i = 0; i < quantiles.size(); i++) {
523 removeContrib(marked[i], quantiles[i]);
531 for (int i = 0; i < quantiles.size(); i++) {
534 if (quantiles[i].size() == 0) {
535 //in case this is not a distance found in your template files
536 for (int g = 0; g < 6; g++) {
541 sort(quantiles[i].begin(), quantiles[i].end(), compareQuanMembers);
544 temp.push_back(quantiles[i][int(quantiles[i].size() * 0.10)].score);
546 temp.push_back(quantiles[i][int(quantiles[i].size() * 0.25)].score);
548 temp.push_back(quantiles[i][int(quantiles[i].size() * 0.5)].score);
550 temp.push_back(quantiles[i][int(quantiles[i].size() * 0.75)].score);
552 temp.push_back(quantiles[i][int(quantiles[i].size() * 0.95)].score);
554 temp.push_back(quantiles[i][int(quantiles[i].size() * 0.99)].score);
564 catch(exception& e) {
565 m->errorOut(e, "DeCalculator", "removeObviousOutliers");
569 //***************************************************************************************************************
570 //put quanMember in the vector based on how far they are from the 99% or 1%. Biggest offenders in front.
571 /*vector<quanMember> DeCalculator::sortContrib(map<quanMember*, float> quan) {
574 vector<quanMember> newQuan;
575 map<quanMember*, float>::iterator it;
577 while (quan.size() > 0) {
579 map<quanMember*, float>::iterator largest = quan.begin();
581 //find biggest member
582 for (it = quan.begin(); it != quan.end(); it++) {
583 if (it->second > largest->second) { largest = it; }
585 cout << largest->second << '\t' << largest->first->score << '\t' << largest->first->member1 << '\t' << largest->first->member2 << endl;
587 newQuan.push_back(*(largest->first));
596 catch(exception& e) {
597 m->errorOut(e, "DeCalculator", "sortContrib");
602 //***************************************************************************************************************
603 //used by removeObviousOutliers which was attempt to increase sensitivity of chimera detection...not currently used...
604 int DeCalculator::findLargestContrib(vector<int> seen) {
611 for (int i = 0; i < seen.size(); i++) {
613 if (seen[i] > largest) {
619 return largestContribs;
622 catch(exception& e) {
623 m->errorOut(e, "DeCalculator", "findLargestContribs");
627 //***************************************************************************************************************
628 void DeCalculator::removeContrib(int bad, vector<quanMember>& quan) {
631 vector<quanMember> newQuan;
632 for (int i = 0; i < quan.size(); i++) {
633 if ((quan[i].member1 != bad) && (quan[i].member2 != bad) ) {
634 newQuan.push_back(quan[i]);
641 catch(exception& e) {
642 m->errorOut(e, "DeCalculator", "removeContrib");
647 //***************************************************************************************************************
648 float DeCalculator::findAverage(vector<float> myVector) {
652 for (int i = 0; i < myVector.size(); i++) { total += myVector[i]; }
654 float average = total / (float) myVector.size();
659 catch(exception& e) {
660 m->errorOut(e, "DeCalculator", "findAverage");
665 //***************************************************************************************************************
666 float DeCalculator::getCoef(vector<float> obs, vector<float> qav) {
669 //find average prob for this seqs windows
670 float probAverage = findAverage(qav);
672 //find observed average
673 float obsAverage = findAverage(obs);
675 float coef = obsAverage / probAverage;
679 catch(exception& e) {
680 m->errorOut(e, "DeCalculator", "getCoef");
684 //***************************************************************************************************************
685 //gets closest matches to each end, since chimeras will most likely have different parents on each end
686 vector<Sequence> DeCalculator::findClosest(Sequence querySeq, vector<Sequence*>& thisTemplate, vector<Sequence*>& thisFilteredTemplate, int numWanted, int minSim) {
690 vector<Sequence> seqsMatches;
692 vector<SeqDist> distsLeft;
693 vector<SeqDist> distsRight;
695 Dist* distcalculator = new eachGapDist();
697 string queryUnAligned = querySeq.getUnaligned();
698 int numBases = int(queryUnAligned.length() * 0.33);
700 string leftQuery = ""; //first 1/3 of the sequence
701 string rightQuery = ""; //last 1/3 of the sequence
702 string queryAligned = querySeq.getAligned();
705 bool foundFirstBase = false;
708 int firstBaseSpot = 0;
709 for (int i = 0; i < queryAligned.length(); i++) {
711 if (isalpha(queryAligned[i])) {
713 if (!foundFirstBase) { foundFirstBase = true; firstBaseSpot = i; }
716 //eliminate opening .'s
717 if (foundFirstBase) { leftQuery += queryAligned[i]; }
719 if (baseCount >= numBases) { leftSpot = i; break; } //first 1/3
722 //right side - count through another 1/3, so you are at last third
725 for (int i = leftSpot; i < queryAligned.length(); i++) {
727 if (isalpha(queryAligned[i])) { baseCount++; }
729 if (baseCount > numBases + 1) { rightSpot = i; break; } //last 1/3
733 //find last position in query that is a non gap character
734 int lastBaseSpot = queryAligned.length()-1;
735 for (int j = queryAligned.length()-1; j >= 0; j--) {
736 if (isalpha(queryAligned[j])) {
741 rightQuery = queryAligned.substr(rightSpot, (lastBaseSpot-rightSpot+1)); //sequence from pos spot to end
743 Sequence queryLeft(querySeq.getName(), leftQuery);
744 Sequence queryRight(querySeq.getName(), rightQuery);
746 //cout << querySeq->getName() << '\t' << leftSpot << '\t' << rightSpot << '\t' << firstBaseSpot << '\t' << lastBaseSpot << endl;
747 //cout << queryUnAligned.length() << '\t' << queryLeft.getUnaligned().length() << '\t' << queryRight.getUnaligned().length() << endl;
748 for(int j = 0; j < thisFilteredTemplate.size(); j++){
750 string dbAligned = thisFilteredTemplate[j]->getAligned();
751 string leftDB = dbAligned.substr(firstBaseSpot, (leftSpot-firstBaseSpot+1)); //first 1/3 of the sequence
752 string rightDB = dbAligned.substr(rightSpot, (lastBaseSpot-rightSpot+1)); //last 1/3 of the sequence
754 Sequence dbLeft(thisFilteredTemplate[j]->getName(), leftDB);
755 Sequence dbRight(thisFilteredTemplate[j]->getName(), rightDB);
757 distcalculator->calcDist(queryLeft, dbLeft);
758 float distLeft = distcalculator->getDist();
760 distcalculator->calcDist(queryRight, dbRight);
761 float distRight = distcalculator->getDist();
764 subjectLeft.seq = NULL;
765 subjectLeft.dist = distLeft;
766 subjectLeft.index = j;
768 distsLeft.push_back(subjectLeft);
770 SeqDist subjectRight;
771 subjectRight.seq = NULL;
772 subjectRight.dist = distRight;
773 subjectRight.index = j;
775 distsRight.push_back(subjectRight);
779 delete distcalculator;
781 //sort by smallest distance
782 sort(distsRight.begin(), distsRight.end(), compareSeqDist);
783 sort(distsLeft.begin(), distsLeft.end(), compareSeqDist);
787 map<string, string> seen;
788 map<string, string>::iterator it;
790 vector<SeqDist> dists;
791 float lastRight = distsRight[0].dist;
792 float lastLeft = distsLeft[0].dist;
794 float maxDist = 1.0 - (minSim / 100.0);
796 for (int i = 0; i < numWanted+1; i++) {
797 if (m->control_pressed) { return seqsMatches; }
799 //add left if you havent already
800 it = seen.find(thisTemplate[distsLeft[i].index]->getName());
801 if (it == seen.end() && distsLeft[i].dist <= maxDist) {
802 dists.push_back(distsLeft[i]);
803 seen[thisTemplate[distsLeft[i].index]->getName()] = thisTemplate[distsLeft[i].index]->getName();
804 lastLeft = distsLeft[i].dist;
805 // cout << "loop-left\t" << db[distsLeft[i].index]->getName() << '\t' << distsLeft[i].dist << endl;
808 //add right if you havent already
809 it = seen.find(thisTemplate[distsRight[i].index]->getName());
810 if (it == seen.end() && distsRight[i].dist <= maxDist) {
811 dists.push_back(distsRight[i]);
812 seen[thisTemplate[distsRight[i].index]->getName()] = thisTemplate[distsRight[i].index]->getName();
813 lastRight = distsRight[i].dist;
814 // cout << "loop-right\t" << db[distsRight[i].index]->getName() << '\t' << distsRight[i].dist << endl;
817 if (i == numWanted) { break; }
821 //are we still above the minimum similarity cutoff
822 if ((lastLeft >= minSim) || (lastRight >= minSim)) {
823 //add in ties from left
825 while (i < distsLeft.size()) {
826 if (distsLeft[i].dist == lastLeft) { dists.push_back(distsLeft[i]); }
831 //add in ties from right
833 while (i < distsRight.size()) {
834 if (distsRight[i].dist == lastRight) { dists.push_back(distsRight[i]); }
840 //cout << numWanted << endl;
841 for (int i = 0; i < dists.size(); i++) {
842 // cout << db[dists[i].index]->getName() << '\t' << dists[i].dist << endl;
844 if ((thisTemplate[dists[i].index]->getName() != querySeq.getName()) && (((1.0-dists[i].dist)*100) >= minSim)) {
845 Sequence temp(thisTemplate[dists[i].index]->getName(), thisTemplate[dists[i].index]->getAligned()); //have to make a copy so you can trim and filter without stepping on eachother.
846 //cout << querySeq->getName() << '\t' << thisTemplate[dists[i].index]->getName() << '\t' << dists[i].dist << endl;
847 seqsMatches.push_back(temp);
854 catch(exception& e) {
855 m->errorOut(e, "DeCalculator", "findClosest");
859 //***************************************************************************************************************
860 Sequence* DeCalculator::findClosest(Sequence* querySeq, vector<Sequence*> db) {
865 Dist* distcalculator = new eachGapDist();
867 int smallest = 1000000;
869 for(int j = 0; j < db.size(); j++){
871 distcalculator->calcDist(*querySeq, *db[j]);
872 float dist = distcalculator->getDist();
874 if (dist < smallest) {
880 delete distcalculator;
882 seqsMatch = new Sequence(db[index]->getName(), db[index]->getAligned()); //have to make a copy so you can trim and filter without stepping on eachother.
886 catch(exception& e) {
887 m->errorOut(e, "DeCalculator", "findClosest");
891 /***************************************************************************************************************/
892 map<int, int> DeCalculator::trimSeqs(Sequence& query, vector<Sequence>& topMatches) {
895 int frontPos = 0; //should contain first position in all seqs that is not a gap character
896 int rearPos = query.getAligned().length();
898 //********find first position in topMatches that is a non gap character***********//
899 //find first position all query seqs that is a non gap character
900 for (int i = 0; i < topMatches.size(); i++) {
902 string aligned = topMatches[i].getAligned();
905 //find first spot in this seq
906 for (int j = 0; j < aligned.length(); j++) {
907 if (isalpha(aligned[j])) {
913 //save this spot if it is the farthest
914 if (pos > frontPos) { frontPos = pos; }
918 string aligned = query.getAligned();
921 //find first position in query that is a non gap character
922 for (int j = 0; j < aligned.length(); j++) {
923 if (isalpha(aligned[j])) {
929 //save this spot if it is the farthest
930 if (pos > frontPos) { frontPos = pos; }
933 //********find last position in topMatches that is a non gap character***********//
934 for (int i = 0; i < topMatches.size(); i++) {
936 string aligned = topMatches[i].getAligned();
937 int pos = aligned.length();
939 //find first spot in this seq
940 for (int j = aligned.length()-1; j >= 0; j--) {
941 if (isalpha(aligned[j])) {
947 //save this spot if it is the farthest
948 if (pos < rearPos) { rearPos = pos; }
952 aligned = query.getAligned();
953 pos = aligned.length();
955 //find last position in query that is a non gap character
956 for (int j = aligned.length()-1; j >= 0; j--) {
957 if (isalpha(aligned[j])) {
963 //save this spot if it is the farthest
964 if (pos < rearPos) { rearPos = pos; }
966 map<int, int> trimmedPos;
967 //check to make sure that is not whole seq
968 if ((rearPos - frontPos - 1) <= 0) {
969 query.setAligned("");
971 for (int i = 0; i < topMatches.size(); i++) {
972 topMatches[i].setAligned("");
978 string newAligned = query.getAligned();
979 newAligned = newAligned.substr(frontPos, (rearPos-frontPos+1));
980 query.setAligned(newAligned);
983 for (int i = 0; i < topMatches.size(); i++) {
984 newAligned = topMatches[i].getAligned();
985 newAligned = newAligned.substr(frontPos, (rearPos-frontPos+1));
986 topMatches[i].setAligned(newAligned);
989 for (int i = 0; i < newAligned.length(); i++) {
990 trimmedPos[i] = i+frontPos;
995 catch(exception& e) {
996 m->errorOut(e, "DeCalculator", "trimSequences");
1001 //***************************************************************************************************************