]> git.donarmstrong.com Git - mothur.git/commitdiff
fixed ties issue in maligner getBlastSeqs
authorwestcott <westcott>
Mon, 25 Apr 2011 11:31:20 +0000 (11:31 +0000)
committerwestcott <westcott>
Mon, 25 Apr 2011 11:31:20 +0000 (11:31 +0000)
Mothur.xcodeproj/project.pbxproj
blastdb.cpp
blastdb.hpp
commandfactory.cpp
database.hpp
decalc.cpp
listseqscommand.h
maligner.cpp

index ab8268391b1d39181ee47645f582bd75f2e1e43f..217151be7313e5ec60611c20d104af1d7b57d617 100644 (file)
                                A7E9B68512D37EC400DA6239 /* chimerarealigner.h */,
                                A7E9B68812D37EC400DA6239 /* chimeraslayer.cpp */,
                                A7E9B68912D37EC400DA6239 /* chimeraslayer.h */,
+                               A7E9B6C212D37EC400DA6239 /* decalc.h */,
                                A7E9B6C112D37EC400DA6239 /* decalc.cpp */,
                                A7E9B74612D37EC400DA6239 /* maligner.h */,
-                               A7E9B6C212D37EC400DA6239 /* decalc.h */,
                                A7E9B74512D37EC400DA6239 /* maligner.cpp */,
                                A7E9B79312D37EC400DA6239 /* pintail.cpp */,
                                A7E9B79412D37EC400DA6239 /* pintail.h */,
index 14a1fa3a67368f5207a1c0df7a08ca5bab7fade9..026c4ca3054f0d9db16eb53ec1327115f67239d4 100644 (file)
@@ -146,6 +146,7 @@ vector<int> BlastDB::findClosestMegaBlast(Sequence* seq, int n) {
                        
                        m->gobble(m8FileHandle);
                        topMatches.push_back(templateAccession);
+                       megaScores.push_back(searchScore);
 //cout << templateAccession << endl;
                }
                m8FileHandle.close();
index 9398ba0362b9f501bc6684b508c6befcfe20b4d5..844f7f1fdc78abe8e3a1e38ca6045e1584a454db 100644 (file)
@@ -36,6 +36,7 @@ private:
        float gapExtend;
        float match;
        float misMatch;
+       
 };
 
 #endif
index fbafe572582194ca455074a4c4fab0887a21bf0c..63e0690f40cebb2b1ac0bf1b481a6848261c71bd 100644 (file)
@@ -184,7 +184,7 @@ CommandFactory::CommandFactory(){
        commands["trim.flows"]                  = "trim.flows";
        commands["list.seqs"]                   = "list.seqs";
        commands["get.seqs"]                    = "get.seqs";
-       commands["remove.seqs"]                 = "get.seqs";
+       commands["remove.seqs"]                 = "remove.seqs";
        commands["system"]                              = "system";
        commands["align.check"]                 = "align.check";
        commands["get.sharedseqs"]              = "get.sharedseqs";
index bbe01c456acbb9e66c56127d9bc23371e09b5ddf..a31075b4fddd46ec6d02d32186723a0608b7784d 100644 (file)
@@ -52,6 +52,7 @@ public:
        virtual vector<int> findClosestSequences(Sequence*, int) = 0;  // returns indexes of n closest sequences to query
        virtual vector<int> findClosestMegaBlast(Sequence*, int){return results;}
        virtual float getSearchScore();
+       virtual vector<float> getMegaBlastSearchScores() { return megaScores; } //assumes you already called findClosestMegaBlast
        virtual int getLongestBase(); 
        virtual void readKmerDB(ifstream&){};
        virtual void setNumSeqs(int i) {        numSeqs = i;    }
@@ -63,6 +64,7 @@ protected:
        int numSeqs, longest;
        float searchScore;
        vector<int> results;
+       vector<float> megaScores;
 };
 /**************************************************************************************************/
 #endif
index 1442bd8ec8865f0574d338675d5899332814d8a8..609b8959036cef53e97db2a439ae5b12c6373135 100644 (file)
@@ -821,7 +821,7 @@ vector<Sequence*> DeCalculator::findClosest(Sequence* querySeq, vector<Sequence*
                
 //             cout << "lastLeft\t" << lastLeft << endl;
                
-               //add in dups
+               //add in sequences with same distance as last sequence added
                lasti++;
                int i = lasti;
                while (i < distsLeft.size()) {  
@@ -840,7 +840,7 @@ vector<Sequence*> DeCalculator::findClosest(Sequence* querySeq, vector<Sequence*
                }
                
 //             cout << "lastRight\t" << lastRight << endl;
-
+               //add in sequences with same distance as last sequence added
                i = lasti;
                while (i < distsRight.size()) {  
                        if (distsRight[i].dist == lastRight) {
index bd7beae73360da9453911e7d5b755135a00f4d54..be53d750ac0691938e314c883d689715bc8d11c7 100644 (file)
@@ -21,7 +21,7 @@ class ListSeqsCommand : public Command {
                ~ListSeqsCommand(){}
        
                vector<string> setParameters();
-               string getCommandName()                 { return "get.seqs";                            }
+               string getCommandName()                 { return "list.seqs";                           }
                string getCommandCategory()             { return "Sequence Processing";         }
                string getHelpString(); 
        
index 18f740911b2ccf342c4893cb00f629da95e7307b..4a00c1d47709d2137a2f79e875367b55bde1ed95 100644 (file)
@@ -633,20 +633,30 @@ vector<Sequence*> Maligner::getBlastSeqs(Sequence* q, int num) {
                Sequence* queryRight = new Sequence(q->getName()+"right", rightQuery);
 
                vector<int> tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1);
+               vector<float> leftScores = databaseLeft->getMegaBlastSearchScores();
                vector<int> tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1);
+               vector<float> rightScores = databaseLeft->getMegaBlastSearchScores();
 
                //if ((tempIndexesRight.size() == 0) && (tempIndexesLeft.size() == 0))  {  m->mothurOut("megablast returned " + toString(tempIndexesRight.size()) + " results for the right end, and " + toString(tempIndexesLeft.size()) + " for the left end. Needed " + toString(num+1) + ". Unable to process sequence " + q->getName()); m->mothurOutEndLine(); return refResults; }
                
                vector<int> smaller;
+               vector<float> smallerScores;
                vector<int> larger;
+               vector<float> largerScores;
                
-               if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight;  larger = tempIndexesLeft; }
-               else { smaller = tempIndexesLeft; larger = tempIndexesRight; } 
+               if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight; smallerScores = rightScores; larger = tempIndexesLeft; largerScores = leftScores; }
+               else { smaller = tempIndexesLeft; smallerScores = leftScores; larger = tempIndexesRight; largerScores = rightScores; } 
+               
+               //for (int i = 0; i < smaller.size(); i++) { cout << "smaller = " << smaller[i] << '\t' << smallerScores[i] << endl; }
+               //cout << endl;
+               //for (int i = 0; i < larger.size(); i++) { cout << "larger = " << larger[i] << '\t' << largerScores[i] << endl; }
                
                //merge results         
                map<int, int> seen;
                map<int, int>::iterator it;
-               
+               float lastSmaller = smallerScores[0];
+               float lastLarger = largerScores[0];
+               int lasti = 0;
                vector<int> mergedResults;
                for (int i = 0; i < smaller.size(); i++) {
                        //add left if you havent already
@@ -654,6 +664,7 @@ vector<Sequence*> Maligner::getBlastSeqs(Sequence* q, int num) {
                        if (it == seen.end()) {  
                                mergedResults.push_back(smaller[i]);
                                seen[smaller[i]] = smaller[i];
+                               lastSmaller = smallerScores[i];
                        }
 
                        //add right if you havent already
@@ -661,17 +672,63 @@ vector<Sequence*> Maligner::getBlastSeqs(Sequence* q, int num) {
                        if (it == seen.end()) {  
                                mergedResults.push_back(larger[i]);
                                seen[larger[i]] = larger[i];
+                               lastLarger = largerScores[i];
                        }
+                       
+                       lasti = i;
+                       if (mergedResults.size() > num) { break; }
                }
                
-               for (int i = smaller.size(); i < larger.size(); i++) {
-                       it = seen.find(larger[i]);
-                       if (it == seen.end()) {  
-                               mergedResults.push_back(larger[i]);
-                               seen[larger[i]] = larger[i];
+               //save lasti for smaller ties below
+               lasti++;
+               int iSmaller = lasti;
+               
+               if (!(mergedResults.size() > num)) { //if we still need more results.  
+                       for (int i = smaller.size(); i < larger.size(); i++) {
+                               it = seen.find(larger[i]);
+                               if (it == seen.end()) {  
+                                       mergedResults.push_back(larger[i]);
+                                       seen[larger[i]] = larger[i];
+                                       lastLarger = largerScores[i];
+                               }
+                               
+                               lasti = i;
+                               if (mergedResults.size() > num) {  break; }
                        }
                }
                
+               
+               //add in any ties from smaller
+               while (iSmaller < smaller.size()) {
+                       if (smallerScores[iSmaller] == lastSmaller) {
+                               it = seen.find(smaller[iSmaller]);
+                               
+                               if (it == seen.end()) {  
+                                       mergedResults.push_back(smaller[iSmaller]);
+                                       seen[smaller[iSmaller]] = smaller[iSmaller];
+                               }
+                       }
+                       else { break; }
+                       iSmaller++;                     
+               }
+               
+               lasti++;
+               //add in any ties from larger
+               while (lasti < larger.size()) {
+                       if (largerScores[lasti] == lastLarger) { //is it a tie
+                               it = seen.find(larger[lasti]);
+                               
+                               if (it == seen.end()) {  //we haven't already seen it
+                                       mergedResults.push_back(larger[lasti]);
+                                       seen[larger[lasti]] = larger[lasti];
+                               }
+                       }
+                       else { break; }
+                       lasti++;                        
+               }
+               
+               numWanted = seen.size();
+               
                if (mergedResults.size() < numWanted) { numWanted = mergedResults.size(); }
 //cout << q->getName() << " merged results size = " << mergedResults.size() << '\t' << "numwanted = " << numWanted <<  endl;           
                for (int i = 0; i < numWanted; i++) {