]> git.donarmstrong.com Git - mothur.git/blobdiff - chimeraslayer.cpp
testing 1.13.0
[mothur.git] / chimeraslayer.cpp
index 6ca8c2945bb4600e6c7f518b1d7c4dfa1a529225..3497579167210514496ac904f6c9f19d17140b16 100644 (file)
 #include "chimeraslayer.h"
 #include "chimerarealigner.h"
 #include "kmerdb.hpp"
+#include "blastdb.hpp"
 
 //***************************************************************************************************************
-ChimeraSlayer::ChimeraSlayer(string mode, bool r, string f) : searchMethod(mode), realign(r), fastafile(f) {   
-       decalc = new DeCalculator();    
+ChimeraSlayer::ChimeraSlayer(string file, string temp, string mode, int k, int ms, int mms, int win, float div, 
+int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera()  {      
+       try {
+               fastafile = file;
+               templateFileName = temp; templateSeqs = readSeqs(temp);
+               searchMethod = mode;
+               kmerSize = k;
+               match = ms;
+               misMatch = mms;
+               window = win;
+               divR = div;
+               minSim = minsim;
+               minCov = mincov;
+               minBS = minbs;
+               minSNP = minsnp;
+               parents = par;
+               iters = it;
+               increment = inc;
+               numWanted = numw;
+               realign = r; 
+       
+               decalc = new DeCalculator();    
+               
+               doPrep();
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
+               exit(1);
+       }
 }
 //***************************************************************************************************************
-void ChimeraSlayer::doPrep() {
+int ChimeraSlayer::doPrep() {
        try {
-       
+               
+               //read in all query seqs
+               vector<Sequence*> tempQuerySeqs = readSeqs(fastafile);
+               
+               vector<Sequence*> temp = templateSeqs;
+               for (int i = 0; i < tempQuerySeqs.size(); i++) {  temp.push_back(tempQuerySeqs[i]);  }
+               
+               createFilter(temp, 0.0); //just removed columns where all seqs have a gap
+               
+               for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i];  }
+               
+               if (m->control_pressed) {  return 0; } 
+               
+               //run filter on template
+               for (int i = 0; i < templateSeqs.size(); i++) {  if (m->control_pressed) {  return 0; }  runFilter(templateSeqs[i]);  }
+               
                string  kmerDBNameLeft;
                string  kmerDBNameRight;
-               
+       
                //generate the kmerdb to pass to maligner
                if (searchMethod == "kmer") { 
+                       string templatePath = m->hasPath(templateFileName);
+                       string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
+                       databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
+                               
+                       string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
+                       databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);      
+               #ifdef USE_MPI
+                       for (int i = 0; i < templateSeqs.size(); i++) {
+                                       
+                               if (m->control_pressed) { return 0; } 
+                                       
+                               string leftFrag = templateSeqs[i]->getUnaligned();
+                               leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
+                                       
+                               Sequence leftTemp(templateSeqs[i]->getName(), leftFrag);
+                               databaseLeft->addSequence(leftTemp);    
+                       }
+                       databaseLeft->generateDB();
+                       databaseLeft->setNumSeqs(templateSeqs.size());
+                       
+                       for (int i = 0; i < templateSeqs.size(); i++) {
+                               if (m->control_pressed) { return 0; } 
+                                       
+                               string rightFrag = templateSeqs[i]->getUnaligned();
+                               rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
+                                       
+                               Sequence rightTemp(templateSeqs[i]->getName(), rightFrag);
+                               databaseRight->addSequence(rightTemp);  
+                       }
+                       databaseRight->generateDB();
+                       databaseRight->setNumSeqs(templateSeqs.size());
+
+               #else   
                        //leftside
-                       string leftTemplateFileName = "left." + templateFileName;
-                       databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);                      
                        kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
                        ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
+                       bool needToGenerateLeft = true;
+                       
+                       if(kmerFileTestLeft){   
+                               bool GoodFile = m->checkReleaseVersion(kmerFileTestLeft, m->getVersion());
+                               if (GoodFile) {  needToGenerateLeft = false;    }
+                       }
                        
-                       if(!kmerFileTestLeft){  
+                       if(needToGenerateLeft){ 
                        
                                for (int i = 0; i < templateSeqs.size(); i++) {
+                                       
+                                       if (m->control_pressed) { return 0; } 
+                                       
                                        string leftFrag = templateSeqs[i]->getUnaligned();
                                        leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
                                        
@@ -49,14 +132,20 @@ void ChimeraSlayer::doPrep() {
                        databaseLeft->setNumSeqs(templateSeqs.size());
                        
                        //rightside
-                       string rightTemplateFileName = "right." + templateFileName;
-                       databaseRight = new KmerDB(rightTemplateFileName, kmerSize);                    
                        kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
                        ifstream kmerFileTestRight(kmerDBNameRight.c_str());
+                       bool needToGenerateRight = true;
+                       
+                       if(kmerFileTestRight){  
+                               bool GoodFile = m->checkReleaseVersion(kmerFileTestRight, m->getVersion());
+                               if (GoodFile) {  needToGenerateRight = false;   }
+                       }
                        
-                       if(!kmerFileTestRight){ 
+                       if(needToGenerateRight){        
                        
                                for (int i = 0; i < templateSeqs.size(); i++) {
+                                       if (m->control_pressed) { return 0; } 
+                                       
                                        string rightFrag = templateSeqs[i]->getUnaligned();
                                        rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
                                        
@@ -71,54 +160,40 @@ void ChimeraSlayer::doPrep() {
                        kmerFileTestRight.close();
                        
                        databaseRight->setNumSeqs(templateSeqs.size());
-
-               }
+               #endif  
+               }else if (searchMethod == "blast") {
                
-               int start = time(NULL); 
-               //filter the sequences
-               //read in all query seqs
-               ifstream in; 
-               openInputFile(fastafile, in);
-               
-               vector<Sequence*> tempQuerySeqs;
-               while(!in.eof()){
-                       Sequence* s = new Sequence(in);
-                       gobble(in);
-                       
-                       if (s->getName() != "") { tempQuerySeqs.push_back(s); }
+                       //generate blastdb
+                       databaseLeft = new BlastDB(-2.0, -1.0, match, misMatch);
+                       for (int i = 0; i < templateSeqs.size(); i++) {         databaseLeft->addSequence(*templateSeqs[i]);    }
+                       databaseLeft->generateDB();
+                       databaseLeft->setNumSeqs(templateSeqs.size());
                }
-               in.close();
                
-               vector<Sequence*> temp = templateSeqs;
-               for (int i = 0; i < tempQuerySeqs.size(); i++) {  temp.push_back(tempQuerySeqs[i]);  }
-                               
-               createFilter(temp, 0.0); //just removed columns where all seqs have a gap
-                               
-               for (int i = 0; i < tempQuerySeqs.size(); i++) { delete tempQuerySeqs[i];  }
-               
-               //run filter on template
-               for (int i = 0; i < templateSeqs.size(); i++) {  runFilter(templateSeqs[i]);  }
-               
-               mothurOutEndLine(); mothurOut("It took " + toString(time(NULL) - start) + " secs to filter.");  mothurOutEndLine();
+               return 0;
 
        }
        catch(exception& e) {
-               errorOut(e, "ChimeraSlayer", "doprep");
+               m->errorOut(e, "ChimeraSlayer", "doprep");
                exit(1);
        }
 }
 //***************************************************************************************************************
-ChimeraSlayer::~ChimeraSlayer() {      delete decalc;  if (searchMethod == "kmer") {  delete databaseRight;  delete databaseLeft;  }    }
+ChimeraSlayer::~ChimeraSlayer() {      
+       delete decalc;  
+       if (searchMethod == "kmer") {  delete databaseRight;  delete databaseLeft;  }   
+       else if (searchMethod == "blast") {  delete databaseLeft; }
+}
 //***************************************************************************************************************
 void ChimeraSlayer::printHeader(ostream& out) {
-       mothurOutEndLine();
-       mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
-       mothurOutEndLine();
+       m->mothurOutEndLine();
+       m->mothurOut("Only reporting sequence supported by " + toString(minBS) + "% of bootstrapped results.");
+       m->mothurOutEndLine();
        
        out << "Name\tLeftParent\tRightParent\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
 }
 //***************************************************************************************************************
-void ChimeraSlayer::print(ostream& out, ostream& outAcc) {
+int ChimeraSlayer::print(ostream& out, ostream& outAcc) {
        try {
                if (chimeraFlags == "yes") {
                        string chimeraFlag = "no";
@@ -129,38 +204,108 @@ void ChimeraSlayer::print(ostream& out, ostream& outAcc) {
                        
                        if (chimeraFlag == "yes") {     
                                if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
-                                       mothurOut(querySeq->getName() + "\tyes"); mothurOutEndLine();
+                                       m->mothurOut(querySeq->getName() + "\tyes"); m->mothurOutEndLine();
                                        outAcc << querySeq->getName() << endl;
                                }
                        }
                        
-                       printBlock(chimeraResults[0], out);
+                       printBlock(chimeraResults[0], chimeraFlag, out);
                        out << endl;
                }else {  out << querySeq->getName() << "\tno" << endl;  }
                
+               return 0;
+               
        }
        catch(exception& e) {
-               errorOut(e, "ChimeraSlayer", "print");
+               m->errorOut(e, "ChimeraSlayer", "print");
                exit(1);
        }
 }
+#ifdef USE_MPI
+//***************************************************************************************************************
+int ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
+       try {
+               MPI_Status status;
+               bool results = false;
+               string outAccString = "";
+               string outputString = "";
+               
+               if (chimeraFlags == "yes") {
+                       string chimeraFlag = "no";
+                       if(  (chimeraResults[0].bsa >= minBS && chimeraResults[0].divr_qla_qrb >= divR)
+                          ||
+                          (chimeraResults[0].bsb >= minBS && chimeraResults[0].divr_qlb_qra >= divR) ) { chimeraFlag = "yes"; }
+                       
+                       
+                       if (chimeraFlag == "yes") {     
+                               if ((chimeraResults[0].bsa >= minBS) || (chimeraResults[0].bsb >= minBS)) {
+                                       cout << querySeq->getName() <<  "\tyes" << endl;
+                                       outAccString += querySeq->getName() + "\n";
+                                       results = true;
+                                       
+                                       //write to accnos file
+                                       int length = outAccString.length();
+                                       char* buf2 = new char[length];
+                                       memcpy(buf2, outAccString.c_str(), length);
+                               
+                                       MPI_File_write_shared(outAcc, buf2, length, MPI_CHAR, &status);
+                                       delete buf2;
+                               }
+                       }
+                       
+                       outputString = getBlock(chimeraResults[0], chimeraFlag);
+                       outputString += "\n";
+       //cout << outputString << endl;         
+                       //write to output file
+                       int length = outputString.length();
+                       char* buf = new char[length];
+                       memcpy(buf, outputString.c_str(), length);
+                               
+                       MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
+                       delete buf;
+
+               }else {  
+                       outputString += querySeq->getName() + "\tno\n";  
+       //cout << outputString << endl;
+                       //write to output file
+                       int length = outputString.length();
+                       char* buf = new char[length];
+                       memcpy(buf, outputString.c_str(), length);
+                               
+                       MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
+                       delete buf;
+               }
+               
+               
+               return results;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraSlayer", "print");
+               exit(1);
+       }
+}
+#endif
+
 //***************************************************************************************************************
 int ChimeraSlayer::getChimeras(Sequence* query) {
        try {
                chimeraFlags = "no";
-               
+
                //filter query
-               spotMap = runFilter(query);
+               spotMap = runFilter(query);     
                
                querySeq = query;
                
                //referenceSeqs, numWanted, matchScore, misMatchPenalty, divR, minSimilarity
                maligner = new Maligner(templateSeqs, numWanted, match, misMatch, divR, minSim, minCov, searchMethod, databaseLeft, databaseRight);
                slayer = new Slayer(window, increment, minSim, divR, iters, minSNP);
+       
+               if (m->control_pressed) {  return 0;  }
                
                string chimeraFlag = maligner->getResults(query, decalc);
+               if (m->control_pressed) {  return 0;  }
                vector<results> Results = maligner->getOutput();
-                               
+                       
                //found in testing realigning only made things worse
                if (realign) {
                        ChimeraReAligner realigner(templateSeqs, match, misMatch);
@@ -233,8 +378,11 @@ int ChimeraSlayer::getChimeras(Sequence* query) {
                                spotMap = decalc->getMaskMap();
                        }
                        
+                       if (m->control_pressed) {  for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }  return 0;  }
+                       
                        //send to slayer
                        chimeraFlags = slayer->getResults(query, seqsForSlayer);
+                       if (m->control_pressed) {  return 0;  }
                        chimeraResults = slayer->getOutput();
                        
                        //free memory
@@ -247,14 +395,14 @@ int ChimeraSlayer::getChimeras(Sequence* query) {
                return 0;
        }
        catch(exception& e) {
-               errorOut(e, "ChimeraSlayer", "getChimeras");
+               m->errorOut(e, "ChimeraSlayer", "getChimeras");
                exit(1);
        }
 }
 //***************************************************************************************************************
-void ChimeraSlayer::printBlock(data_struct data, ostream& out){
+void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){
        try {
-       //out << "Name\tParentA\tParentB\tDivQLAQRB\tPerIDQLAQRB\tBootStrapA\tDivQLBQRA\tPerIDQLBQRA\tBootStrapB\tFlag\tLeftWindow\tRightWindow\n";
+       //out << ":)\n";
                
                out << querySeq->getName() << '\t';
                out << data.parentA.getName() << "\t" << data.parentB.getName()  << '\t';
@@ -264,7 +412,7 @@ void ChimeraSlayer::printBlock(data_struct data, ostream& out){
                out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
                out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
                
-               out << "yes\t" << spotMap[data.winLStart] << "-" << spotMap[data.winLEnd] << '\t' << spotMap[data.winRStart] << "-" << spotMap[data.winREnd] << '\t';
+               out << flag << '\t' << spotMap[data.winLStart] << "-" << spotMap[data.winLEnd] << '\t' << spotMap[data.winRStart] << "-" << spotMap[data.winREnd] << '\t';
                
                //out << "Similarity of parents: " << data.ab << endl;
                //out << "Similarity of query to parentA: " << data.qa << endl;
@@ -282,9 +430,30 @@ void ChimeraSlayer::printBlock(data_struct data, ostream& out){
 
        }
        catch(exception& e) {
-               errorOut(e, "ChimeraSlayer", "printBlock");
+               m->errorOut(e, "ChimeraSlayer", "printBlock");
                exit(1);
        }
 }
 //***************************************************************************************************************
+string ChimeraSlayer::getBlock(data_struct data, string flag){
+       try {
+               
+               string outputString = "";
+               
+               outputString += querySeq->getName() + "\t";
+               outputString += data.parentA.getName() + "\t" + data.parentB.getName()  + "\t";
+                       
+               outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t";
+               outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t";
+               
+               outputString += flag + "\t" + toString(spotMap[data.winLStart]) + "-" + toString(spotMap[data.winLEnd]) + "\t" + toString(spotMap[data.winRStart]) + "-" + toString(spotMap[data.winREnd]) + "\t";
+               
+               return outputString;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraSlayer", "getBlock");
+               exit(1);
+       }
+}
+//***************************************************************************************************************/