]> git.donarmstrong.com Git - mothur.git/blobdiff - chimeraslayer.cpp
v 19.3
[mothur.git] / chimeraslayer.cpp
index eb2b06e5c0229fd269631d8f642f00639025830c..70208607a00373eac0e13cfa4eb639d9025be99d 100644 (file)
@@ -217,7 +217,7 @@ int ChimeraSlayer::doPrep() {
                }else if (searchMethod == "blast") {
                
                        //generate blastdb
-                       databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(templateFileName)), -1.0, -1.0, 1, -3);
+                       databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(fastafile)), -1.0, -1.0, 1, -3);
 
                        for (int i = 0; i < templateSeqs.size(); i++) {         databaseLeft->addSequence(*templateSeqs[i]);    }
                        databaseLeft->generateDB();
@@ -545,6 +545,8 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results lef
                        
                        bool rightChimeric = false;
                        bool leftChimeric = false;
+
+                       cout << endl;
                        
                        if (chimeraFlag == "yes") {     
                                //which peice is chimeric or are both
@@ -771,7 +773,7 @@ int ChimeraSlayer::getChimeras(Sequence* query) {
                for (int i = 0; i < refSeqs.size(); i++) {  delete refSeqs[i];  }
                
                if (chimeraFlag == "yes") {
-
+                       
                        if (realign) {
                                vector<string> parents;
                                for (int i = 0; i < Results.size(); i++) {
@@ -782,7 +784,8 @@ int ChimeraSlayer::getChimeras(Sequence* query) {
                                realigner.reAlign(query, parents);
 
                        }
-
+                       
+//                     cout << query->getAligned() << endl;
                        //get sequence that were given from maligner results
                        vector<SeqDist> seqs;
                        map<string, float> removeDups;
@@ -790,16 +793,25 @@ int ChimeraSlayer::getChimeras(Sequence* query) {
                        map<string, string> parentNameSeq;
                        map<string, string>::iterator itSeq;
                        for (int j = 0; j < Results.size(); j++) {
+
                                float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
                                //only add if you are not a duplicate
-                               itDup = removeDups.find(Results[j].parent);
-                               if (itDup == removeDups.end()) { //this is not duplicate
-                                       removeDups[Results[j].parent] = dist;
-                                       parentNameSeq[Results[j].parent] = Results[j].parentAligned;
-                               }else if (dist > itDup->second) { //is this a stronger number for this parent
-                                       removeDups[Results[j].parent] = dist;
-                                       parentNameSeq[Results[j].parent] = Results[j].parentAligned;
+//                             cout << Results[j].parent << '\t' << Results[j].regionEnd << '\t' << Results[j].regionStart << '\t' << Results[j].regionEnd - Results[j].regionStart +1 << '\t' << Results[j].queryToParentLocal << '\t' << dist << endl;
+                               
+                               
+                               if(Results[j].queryToParentLocal >= 90){        //local match has to be over 90% similarity
+                               
+                                       itDup = removeDups.find(Results[j].parent);
+                                       if (itDup == removeDups.end()) { //this is not duplicate
+                                               removeDups[Results[j].parent] = dist;
+                                               parentNameSeq[Results[j].parent] = Results[j].parentAligned;
+                                       }else if (dist > itDup->second) { //is this a stronger number for this parent
+                                               removeDups[Results[j].parent] = dist;
+                                               parentNameSeq[Results[j].parent] = Results[j].parentAligned;
+                                       }
+                               
                                }
+                               
                        }
                        
                        for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
@@ -809,7 +821,6 @@ int ChimeraSlayer::getChimeras(Sequence* query) {
                                SeqDist member;
                                member.seq = seq;
                                member.dist = itDup->second;
-                               
                                seqs.push_back(member);
                        }
                        
@@ -827,8 +838,14 @@ int ChimeraSlayer::getChimeras(Sequence* query) {
                        }
                
                        //put seqs into vector to send to slayer
+                       
+//                     cout << query->getAligned() << endl;
                        vector<Sequence*> seqsForSlayer;
-                       for (int k = 0; k < seqs.size(); k++) {  seqsForSlayer.push_back(seqs[k].seq);  }
+                       for (int k = 0; k < seqs.size(); k++) {  
+//                             cout << seqs[k].seq->getAligned() << endl;
+                               seqsForSlayer.push_back(seqs[k].seq);   
+//                             cout << seqs[k].seq->getName() << endl;
+                       }
                        
                        if (m->control_pressed) {  for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }  return 0;  }
 
@@ -1038,58 +1055,82 @@ vector<Sequence*> ChimeraSlayer::getBlastSeqs(Sequence* q, vector<Sequence*>& db
                
                vector<int> tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1, minSim);
                vector<int> tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1, minSim);
-               //cout << q->getName() << '\t' << leftQuery << '\t' << "leftMatches = " << tempIndexesLeft.size() << '\t' << rightQuery << " rightMatches = " << tempIndexesRight.size() << endl;
-               vector<int> smaller;
-               vector<int> larger;
+                               
                
-               if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight;  larger = tempIndexesLeft;  }
-               else { smaller = tempIndexesLeft;  larger = tempIndexesRight;  } 
+               //cout << q->getName() << '\t' << leftQuery << '\t' << "leftMatches = " << tempIndexesLeft.size() << '\t' << rightQuery << " rightMatches = " << tempIndexesRight.size() << endl;
+//             vector<int> smaller;
+//             vector<int> larger;
+//             
+//             if (tempIndexesRight.size() < tempIndexesLeft.size()) { smaller = tempIndexesRight;  larger = tempIndexesLeft;  }
+//             else { smaller = tempIndexesLeft;  larger = tempIndexesRight;  } 
                
                //merge results         
                map<int, int> seen;
                map<int, int>::iterator it;
                vector<int> mergedResults;
-               for (int i = 0; i < smaller.size(); i++) {
+               
+               int index = 0;
+//             for (int i = 0; i < smaller.size(); i++) {
+               while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){
+                       
                        if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
        
                        //add left if you havent already
-                       it = seen.find(smaller[i]);
+                       it = seen.find(tempIndexesLeft[index]);
                        if (it == seen.end()) {  
-                               mergedResults.push_back(smaller[i]);
-                               seen[smaller[i]] = smaller[i];
+                               mergedResults.push_back(tempIndexesLeft[index]);
+                               seen[tempIndexesLeft[index]] = tempIndexesLeft[index];
                        }
                        
                        //add right if you havent already
-                       it = seen.find(larger[i]);
+                       it = seen.find(tempIndexesRight[index]);
                        if (it == seen.end()) {  
-                               mergedResults.push_back(larger[i]);
-                               seen[larger[i]] = larger[i];
+                               mergedResults.push_back(tempIndexesRight[index]);
+                               seen[tempIndexesRight[index]] = tempIndexesRight[index];
                        }
+                       index++;
                }
+
                
-               for (int i = smaller.size(); i < larger.size(); i++) {
+               for (int i = index; i < tempIndexesLeft.size(); i++) {
                        if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
                        
                        //add right if you havent already
-                       it = seen.find(larger[i]);
+                       it = seen.find(tempIndexesLeft[i]);
                        if (it == seen.end()) {  
-                               mergedResults.push_back(larger[i]);
-                               seen[larger[i]] = larger[i];
+                               mergedResults.push_back(tempIndexesLeft[i]);
+                               seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
                        }
                }
 
+               for (int i = index; i < tempIndexesRight.size(); i++) {
+                       if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
+                       
+                       //add right if you havent already
+                       it = seen.find(tempIndexesRight[i]);
+                       if (it == seen.end()) {  
+                               mergedResults.push_back(tempIndexesRight[i]);
+                               seen[tempIndexesRight[i]] = tempIndexesRight[i];
+                       }
+               }
+               //string qname = q->getName().substr(0, q->getName().find_last_of('_'));        
+               //cout << qname << endl;        
+               
                for (int i = 0; i < mergedResults.size(); i++) {
-                       //cout << mergedResults[i]  << '\t' << db[mergedResults[i]]->getName() << endl; 
+                       //cout << q->getName() << mergedResults[i]  << '\t' << db[mergedResults[i]]->getName() << endl; 
                        if (db[mergedResults[i]]->getName() != q->getName()) { 
                                Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
                                refResults.push_back(temp);
                                
                        }
                }
-                       
+               //cout << endl << endl;
+
                delete queryRight;
                delete queryLeft;
                
+               if (refResults.size() == 0) { m->mothurOut("[WARNING]: mothur found 0 potential parents, so we are not able to check " + q->getName() + ". This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); }
+               
                return refResults;
        }
        catch(exception& e) {
@@ -1116,17 +1157,44 @@ vector<Sequence*> ChimeraSlayer::getKmerSeqs(Sequence* q, vector<Sequence*>& db,
                //merge results         
                map<int, int> seen;
                map<int, int>::iterator it;
-                       vector<int> mergedResults;
-               for (int i = 0; i < tempIndexesLeft.size(); i++) {
+               vector<int> mergedResults;
+               
+               int index = 0;
+               //              for (int i = 0; i < smaller.size(); i++) {
+               while(index < tempIndexesLeft.size() && index < tempIndexesRight.size()){
                        
                        if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
                        
                        //add left if you havent already
+                       it = seen.find(tempIndexesLeft[index]);
+                       if (it == seen.end()) {  
+                               mergedResults.push_back(tempIndexesLeft[index]);
+                               seen[tempIndexesLeft[index]] = tempIndexesLeft[index];
+                       }
+                       
+                       //add right if you havent already
+                       it = seen.find(tempIndexesRight[index]);
+                       if (it == seen.end()) {  
+                               mergedResults.push_back(tempIndexesRight[index]);
+                               seen[tempIndexesRight[index]] = tempIndexesRight[index];
+                       }
+                       index++;
+               }
+               
+               
+               for (int i = index; i < tempIndexesLeft.size(); i++) {
+                       if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
+                       
+                       //add right if you havent already
                        it = seen.find(tempIndexesLeft[i]);
                        if (it == seen.end()) {  
                                mergedResults.push_back(tempIndexesLeft[i]);
                                seen[tempIndexesLeft[i]] = tempIndexesLeft[i];
                        }
+               }
+               
+               for (int i = index; i < tempIndexesRight.size(); i++) {
+                       if (m->control_pressed) { delete queryRight; delete queryLeft; return refResults; }
                        
                        //add right if you havent already
                        it = seen.find(tempIndexesRight[i]);
@@ -1136,17 +1204,15 @@ vector<Sequence*> ChimeraSlayer::getKmerSeqs(Sequence* q, vector<Sequence*>& db,
                        }
                }
                
-               //numWanted = mergedResults.size();
-                       
-               //cout << q->getName() << endl;         
-               
                for (int i = 0; i < mergedResults.size(); i++) {
-                       //cout << db[mergedResults[i]]->getName() << endl;      
+                       //cout << mergedResults[i]  << '\t' << db[mergedResults[i]]->getName() << endl; 
                        if (db[mergedResults[i]]->getName() != q->getName()) { 
                                Sequence* temp = new Sequence(db[mergedResults[i]]->getName(), db[mergedResults[i]]->getAligned());
                                refResults.push_back(temp);
+                               
                        }
                }
+
                //cout << endl;         
                delete queryRight;
                delete queryLeft;