]> git.donarmstrong.com Git - mothur.git/blobdiff - chimeraslayer.cpp
chimera.slayer debugging
[mothur.git] / chimeraslayer.cpp
index cd268d3d6f1a513e932940332bab8ed15c59ada1..1d22bdfe36f74194d2609c01b956cf0ab8ba67c8 100644 (file)
@@ -72,10 +72,18 @@ ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, in
                
                createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
                
-               //run filter on template
-               for (int i = 0; i < templateSeqs.size(); i++) {  if (m->control_pressed) {  break; }  runFilter(templateSeqs[i]);  }
-
-               
+               if (searchMethod == "distance") { 
+                       createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
+                       
+                       //run filter on template copying templateSeqs into filteredTemplateSeqs
+                       for (int i = 0; i < templateSeqs.size(); i++) {  
+                               if (m->control_pressed) {  break; }
+                               
+                               Sequence* newSeq = new Sequence(templateSeqs[i]->getName(), templateSeqs[i]->getAligned());
+                               runFilter(newSeq);  
+                               filteredTemplateSeqs.push_back(newSeq);
+                       }
+               }
        }
        catch(exception& e) {
                m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
@@ -209,7 +217,7 @@ int ChimeraSlayer::doPrep() {
                }else if (searchMethod == "blast") {
                
                        //generate blastdb
-                       databaseLeft = new BlastDB(-1.0, -1.0, 1, -3);
+                       databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(fastafile)), -1.0, -1.0, 1, -3);
 
                        for (int i = 0; i < templateSeqs.size(); i++) {         databaseLeft->addSequence(*templateSeqs[i]);    }
                        databaseLeft->generateDB();
@@ -316,7 +324,7 @@ vector<Sequence*> ChimeraSlayer::getTemplate(Sequence* q, vector<Sequence*>& use
                }else if (searchMethod == "blast") {
                        
                        //generate blastdb
-                       databaseLeft = new BlastDB(-1.0, -1.0, 1, -3);
+                       databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(templateFileName)), -1.0, -1.0, 1, -3);
 
                        for (int i = 0; i < userTemplate.size(); i++) { if (m->control_pressed) { return userTemplate; }   databaseLeft->addSequence(*userTemplate[i]); }
                        databaseLeft->generateDB();
@@ -544,7 +552,7 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results lef
                                if (leftPiece.flag == "yes") { if ((leftPiece.results[0].bsa >= minBS) || (leftPiece.results[0].bsb >= minBS))  { leftChimeric = true;  } }
                                
                                if (rightChimeric || leftChimeric) {
-                                       cout << querySeq->getName() <<  "\tyes" << endl;
+//                                     cout << querySeq->getName() <<  "\tyes" << endl;
                                        outAccString += querySeq->getName() + "\n";
                                        results = true;
                                        
@@ -774,7 +782,7 @@ int ChimeraSlayer::getChimeras(Sequence* query) {
                                realigner.reAlign(query, parents);
 
                        }
-
+                       
                        //get sequence that were given from maligner results
                        vector<SeqDist> seqs;
                        map<string, float> removeDups;
@@ -784,14 +792,20 @@ int ChimeraSlayer::getChimeras(Sequence* query) {
                        for (int j = 0; j < Results.size(); j++) {
                                float dist = (Results[j].regionEnd - Results[j].regionStart + 1) * Results[j].queryToParentLocal;
                                //only add if you are not a duplicate
-                               itDup = removeDups.find(Results[j].parent);
-                               if (itDup == removeDups.end()) { //this is not duplicate
-                                       removeDups[Results[j].parent] = dist;
-                                       parentNameSeq[Results[j].parent] = Results[j].parentAligned;
-                               }else if (dist > itDup->second) { //is this a stronger number for this parent
-                                       removeDups[Results[j].parent] = dist;
-                                       parentNameSeq[Results[j].parent] = Results[j].parentAligned;
+
+                               if(Results[j].queryToParentLocal >= 90){        //local match has to be over 90% similarity
+                               
+                                       itDup = removeDups.find(Results[j].parent);
+                                       if (itDup == removeDups.end()) { //this is not duplicate
+                                               removeDups[Results[j].parent] = dist;
+                                               parentNameSeq[Results[j].parent] = Results[j].parentAligned;
+                                       }else if (dist > itDup->second) { //is this a stronger number for this parent
+                                               removeDups[Results[j].parent] = dist;
+                                               parentNameSeq[Results[j].parent] = Results[j].parentAligned;
+                                       }
+                               
                                }
+                               
                        }
                        
                        for (itDup = removeDups.begin(); itDup != removeDups.end(); itDup++) {
@@ -801,7 +815,6 @@ int ChimeraSlayer::getChimeras(Sequence* query) {
                                SeqDist member;
                                member.seq = seq;
                                member.dist = itDup->second;
-                               
                                seqs.push_back(member);
                        }
                        
@@ -819,8 +832,14 @@ int ChimeraSlayer::getChimeras(Sequence* query) {
                        }
                
                        //put seqs into vector to send to slayer
+                       
+//                     cout << query->getAligned() << endl;
                        vector<Sequence*> seqsForSlayer;
-                       for (int k = 0; k < seqs.size(); k++) {  seqsForSlayer.push_back(seqs[k].seq);  }
+                       for (int k = 0; k < seqs.size(); k++) {  
+//                             cout << seqs[k].seq->getAligned() << endl;
+                               seqsForSlayer.push_back(seqs[k].seq);   
+                       
+                       }
                        
                        if (m->control_pressed) {  for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }  return 0;  }
 
@@ -835,7 +854,7 @@ int ChimeraSlayer::getChimeras(Sequence* query) {
                        //free memory
                        for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }
                }
-               
+               //cout << endl << endl;
                return 0;
        }
        catch(exception& e) {
@@ -1024,13 +1043,13 @@ vector<Sequence*> ChimeraSlayer::getBlastSeqs(Sequence* q, vector<Sequence*>& db
                string queryUnAligned = q->getUnaligned();
                string leftQuery = queryUnAligned.substr(0, int(queryUnAligned.length() * 0.33)); //first 1/3 of the sequence
                string rightQuery = queryUnAligned.substr(int(queryUnAligned.length() * 0.66)); //last 1/3 of the sequence
-               
+//cout << "whole length = " << queryUnAligned.length() << '\t' << "left length = " << leftQuery.length() << '\t' << "right length = "<< rightQuery.length() << endl;   
                Sequence* queryLeft = new Sequence(q->getName(), leftQuery);
                Sequence* queryRight = new Sequence(q->getName(), rightQuery);
                
                vector<int> tempIndexesLeft = databaseLeft->findClosestMegaBlast(queryLeft, num+1, minSim);
                vector<int> tempIndexesRight = databaseLeft->findClosestMegaBlast(queryRight, num+1, minSim);
-               cout << q->getName() << '\t' << leftQuery << '\t' << "leftMatches = " << tempIndexesLeft.size() << '\t' << rightQuery   << " rightMatches = " << tempIndexesRight.size() << endl;
+               //cout << q->getName() << '\t' << leftQuery << '\t' << "leftMatches = " << tempIndexesLeft.size() << '\t' << rightQuery << " rightMatches = " << tempIndexesRight.size() << endl;
                vector<int> smaller;
                vector<int> larger;
                
@@ -1078,6 +1097,11 @@ vector<Sequence*> ChimeraSlayer::getBlastSeqs(Sequence* q, vector<Sequence*>& db
                                
                        }
                }
+               
+               
+//             for(int i=0;i<refResults.size();i++){
+//                     cout << refResults[i]->getName() << endl;
+//             }
                        
                delete queryRight;
                delete queryLeft;