]> git.donarmstrong.com Git - mothur.git/blobdiff - decalc.cpp
chimeracode
[mothur.git] / decalc.cpp
index bfaae000386c872584952b27b5dcc64f52a785ea..911efe268b6cd71817c28c7a8c008f850ff63e3e 100644 (file)
@@ -8,6 +8,10 @@
  */
 
 #include "decalc.h"
+#include "chimera.h"
+#include "dist.h"
+#include "eachgapdist.h"
+
 
 //***************************************************************************************************************
 void DeCalculator::setMask(string m) { 
@@ -101,7 +105,7 @@ vector<int>  DeCalculator::findWindows(Sequence* query, int front, int back, int
                if (size == 0) {  if (cutoff > 1200) {  size = 300; }
                                                        else{  size = (cutoff / 4); }  } 
                else if (size > (cutoff / 4)) { 
-                               mothurOut("You have selected to large a window size for sequence " + query->getName() + ".  I will choose an appropriate window size."); mothurOutEndLine();
+                               mothurOut("You have selected too large a window size for sequence " + query->getName() + ".  I will choose an appropriate window size."); mothurOutEndLine();
                                size = (cutoff / 4); 
                }
        
@@ -675,5 +679,167 @@ float DeCalculator::getCoef(vector<float> obs, vector<float> qav) {
        }
 }
 //***************************************************************************************************************
+//gets closest matches to each end, since chimeras will most likely have different parents on each end
+vector<Sequence*> DeCalculator::findClosest(Sequence* querySeq, vector<Sequence*> db, int numWanted) {
+       try {
+               
+               vector<Sequence*> seqsMatches;  
+               vector<SeqDist> dists;
+               
+               Dist* distcalculator = new eachGapDist();
+               
+               string queryAligned = querySeq->getAligned();
+               string leftQuery = queryAligned.substr(0, (queryAligned.length() / 3)); //first 1/3 of the sequence
+               string rightQuery = queryAligned.substr(((queryAligned.length() / 3)*2)); //last 1/3 of the sequence
+               
+               Sequence queryLeft(querySeq->getName(), leftQuery);
+               Sequence queryRight(querySeq->getName(), rightQuery);
+                               
+               for(int j = 0; j < db.size(); j++){
+                       
+                       string dbAligned = db[j]->getAligned();
+                       string leftDB = dbAligned.substr(0, (dbAligned.length() / 3)); //first 1/3 of the sequence
+                       string rightDB = dbAligned.substr(((dbAligned.length() / 3)*2)); //last 1/3 of the sequence
+                       
+                       Sequence dbLeft(db[j]->getName(), leftDB);
+                       Sequence dbRight(db[j]->getName(), rightDB);
+                       
+                       distcalculator->calcDist(queryLeft, dbLeft);
+                       float distLeft = distcalculator->getDist();
+                       
+                       distcalculator->calcDist(queryRight, dbRight);
+                       float distRight = distcalculator->getDist();
+                       
+                       float dist = min(distLeft, distRight); //keep smallest distance
+                       
+                       SeqDist subject;
+                       subject.seq = db[j];
+                       subject.dist = dist;
+                       
+                       dists.push_back(subject);
+               }
+               
+               delete distcalculator;
+               
+               sort(dists.begin(), dists.end(), compareSeqDist);
+               
+               for (int i = 0; i < numWanted; i++) {
+                       Sequence* temp = new Sequence(dists[i].seq->getName(), dists[i].seq->getAligned()); //have to make a copy so you can trim and filter without stepping on eachother.
+                       seqsMatches.push_back(temp);
+               }
+               
+               return seqsMatches;
+       }
+       catch(exception& e) {
+               errorOut(e, "DeCalculator", "findClosest");
+               exit(1);
+       }
+}
+/***************************************************************************************************************/
+map<int, int> DeCalculator::trimSeqs(Sequence* query, vector<Sequence*> topMatches) {
+       try {
+               
+               int frontPos = 0;  //should contain first position in all seqs that is not a gap character
+               int rearPos = query->getAligned().length();
+               
+               //********find first position in topMatches that is a non gap character***********//
+               //find first position all query seqs that is a non gap character
+               for (int i = 0; i < topMatches.size(); i++) {
+                       
+                       string aligned = topMatches[i]->getAligned();
+                       int pos = 0;
+                       
+                       //find first spot in this seq
+                       for (int j = 0; j < aligned.length(); j++) {
+                               if (isalpha(aligned[j])) {
+                                       pos = j;
+                                       break;
+                               }
+                       }
+                       
+                       //save this spot if it is the farthest
+                       if (pos > frontPos) { frontPos = pos; }
+               }
+               
+               
+               string aligned = query->getAligned();
+               int pos = 0;
+                       
+               //find first position in query that is a non gap character
+               for (int j = 0; j < aligned.length(); j++) {
+                       if (isalpha(aligned[j])) {
+                               pos = j;
+                               break;
+                       }
+               }
+               
+               //save this spot if it is the farthest
+               if (pos > frontPos) { frontPos = pos; }
+               
+               
+               //********find last position in topMatches that is a non gap character***********//
+               for (int i = 0; i < topMatches.size(); i++) {
+                       
+                       string aligned = topMatches[i]->getAligned();
+                       int pos = aligned.length();
+                       
+                       //find first spot in this seq
+                       for (int j = aligned.length()-1; j >= 0; j--) {
+                               if (isalpha(aligned[j])) {
+                                       pos = j;
+                                       break;
+                               }
+                       }
+                       
+                       //save this spot if it is the farthest
+                       if (pos < rearPos) { rearPos = pos; }
+               }
+               
+               
+               aligned = query->getAligned();
+               pos = aligned.length();
+               
+               //find last position in query that is a non gap character
+               for (int j = aligned.length()-1; j >= 0; j--) {
+                       if (isalpha(aligned[j])) {
+                               pos = j;
+                               break;
+                       }
+               }
+               
+               //save this spot if it is the farthest
+               if (pos < rearPos) { rearPos = pos; }
+
+               //check to make sure that is not whole seq
+               if ((rearPos - frontPos - 1) <= 0) {  mothurOut("Error, when I trim your sequences, the entire sequence is trimmed."); mothurOutEndLine(); exit(1);  }
+//cout << "front = " << frontPos << " rear = " << rearPos << endl;             
+               //trim query
+               string newAligned = query->getAligned();
+               newAligned = newAligned.substr(frontPos, (rearPos-frontPos));
+               query->setAligned(newAligned);
+               
+               //trim topMatches
+               for (int i = 0; i < topMatches.size(); i++) {
+                       newAligned = topMatches[i]->getAligned();
+                       newAligned = newAligned.substr(frontPos, (rearPos-frontPos+1));
+                       topMatches[i]->setAligned(newAligned);
+               }
+               
+               map<int, int> trimmedPos;
+               
+               for (int i = 0; i < newAligned.length(); i++) {
+                       trimmedPos[i] = i+frontPos;
+               }
+               
+               return trimmedPos;
+       }
+       catch(exception& e) {
+               errorOut(e, "DeCalculator", "trimSequences");
+               exit(1);
+       }
+
+}
+//***************************************************************************************************************
+