]> git.donarmstrong.com Git - mothur.git/blobdiff - maligner.cpp
tracking down chimera.slayer issue
[mothur.git] / maligner.cpp
index b3cb991299fbf29d595318e613f8654cc7f3426f..7d7145f4ca6bd9fac7a316745ace4432e4c26fe0 100644 (file)
@@ -106,7 +106,7 @@ string Maligner::chimeraMaligner(int chimeraPenalty, DeCalculator* decalc) {
 //             }
                
                if (trace.size() > 1) {         chimera = "yes";        }
-               else { chimera = "no";  }
+               else { chimera = "no";  return chimera; }
                
                int traceStart = trace[0].col;
                int traceEnd = trace[trace.size()-1].oldCol;    
@@ -628,25 +628,35 @@ vector<Sequence*> Maligner::getBlastSeqs(Sequence* q, int num) {
                string queryUnAligned = q->getUnaligned();
                string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
                string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
-
+               
                Sequence* queryLeft = new Sequence(q->getName()+"left", leftQuery);
                Sequence* queryRight = new Sequence(q->getName()+"right", rightQuery);
-               
-               vector<int> tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1);
+
                vector<int> tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1);
-                       
+               vector<float> leftScores = databaseLeft->getMegaBlastSearchScores();
+               vector<int> tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1);
+               vector<float> rightScores = databaseLeft->getMegaBlastSearchScores();
+
                //if ((tempIndexesRight.size() == 0) && (tempIndexesLeft.size() == 0))  {  m->mothurOut("megablast returned " + toString(tempIndexesRight.size()) + " results for the right end, and " + toString(tempIndexesLeft.size()) + " for the left end. Needed " + toString(num+1) + ". Unable to process sequence " + q->getName()); m->mothurOutEndLine(); return refResults; }
                
                vector<int> smaller;
+               vector<float> smallerScores;
                vector<int> larger;
+               vector<float> largerScores;
                
-               if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight;  larger = tempIndexesLeft; }
-               else { smaller = tempIndexesLeft; larger = tempIndexesRight; } 
+               if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight; smallerScores = rightScores; larger = tempIndexesLeft; largerScores = leftScores; }
+               else { smaller = tempIndexesLeft; smallerScores = leftScores; larger = tempIndexesRight; largerScores = rightScores; } 
+               
+               //for (int i = 0; i < smaller.size(); i++) { cout << "smaller = " << smaller[i] << '\t' << smallerScores[i] << endl; }
+               //cout << endl;
+               //for (int i = 0; i < larger.size(); i++) { cout << "larger = " << larger[i] << '\t' << largerScores[i] << endl; }
                
                //merge results         
                map<int, int> seen;
                map<int, int>::iterator it;
-               
+               float lastSmaller = smallerScores[0];
+               float lastLarger = largerScores[0];
+               int lasti = 0;
                vector<int> mergedResults;
                for (int i = 0; i < smaller.size(); i++) {
                        //add left if you havent already
@@ -654,6 +664,7 @@ vector<Sequence*> Maligner::getBlastSeqs(Sequence* q, int num) {
                        if (it == seen.end()) {  
                                mergedResults.push_back(smaller[i]);
                                seen[smaller[i]] = smaller[i];
+                               lastSmaller = smallerScores[i];
                        }
 
                        //add right if you havent already
@@ -661,26 +672,74 @@ vector<Sequence*> Maligner::getBlastSeqs(Sequence* q, int num) {
                        if (it == seen.end()) {  
                                mergedResults.push_back(larger[i]);
                                seen[larger[i]] = larger[i];
+                               lastLarger = largerScores[i];
                        }
+                       
+                       lasti = i;
+                       if (mergedResults.size() > num) { break; }
                }
                
-               for (int i = smaller.size(); i < larger.size(); i++) {
-                       it = seen.find(larger[i]);
-                       if (it == seen.end()) {  
-                               mergedResults.push_back(larger[i]);
-                               seen[larger[i]] = larger[i];
+               //save lasti for smaller ties below
+               lasti++;
+               int iSmaller = lasti;
+               
+               if (!(mergedResults.size() > num)) { //if we still need more results.  
+                       for (int i = smaller.size(); i < larger.size(); i++) {
+                               it = seen.find(larger[i]);
+                               if (it == seen.end()) {  
+                                       mergedResults.push_back(larger[i]);
+                                       seen[larger[i]] = larger[i];
+                                       lastLarger = largerScores[i];
+                               }
+                               
+                               lasti = i;
+                               if (mergedResults.size() > num) {  break; }
+                       }
+               }
+               
+               
+               //add in any ties from smaller
+               while (iSmaller < smaller.size()) {
+                       if (smallerScores[iSmaller] == lastSmaller) {
+                               it = seen.find(smaller[iSmaller]);
+                               
+                               if (it == seen.end()) {  
+                                       mergedResults.push_back(smaller[iSmaller]);
+                                       seen[smaller[iSmaller]] = smaller[iSmaller];
+                               }
                        }
+                       else { break; }
+                       iSmaller++;                     
                }
                
+               lasti++;
+               //add in any ties from larger
+               while (lasti < larger.size()) {
+                       if (largerScores[lasti] == lastLarger) { //is it a tie
+                               it = seen.find(larger[lasti]);
+                               
+                               if (it == seen.end()) {  //we haven't already seen it
+                                       mergedResults.push_back(larger[lasti]);
+                                       seen[larger[lasti]] = larger[lasti];
+                               }
+                       }
+                       else { break; }
+                       lasti++;                        
+               }
+               
+               numWanted = mergedResults.size();
+               
                if (mergedResults.size() < numWanted) { numWanted = mergedResults.size(); }
 //cout << q->getName() << " merged results size = " << mergedResults.size() << '\t' << "numwanted = " << numWanted <<  endl;           
                for (int i = 0; i < numWanted; i++) {
 //cout << db[mergedResults[i]]->getName()  << '\t' << mergedResults[i] << endl;        
+                       
                        if (db[mergedResults[i]]->getName() != q->getName()) { 
                                Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
                                refResults.push_back(temp);
                                indexes.push_back(mergedResults[i]);
                        }
+                       
 //cout << mergedResults[i] << endl;
                }
 //cout << "done " << q->getName()  << endl;