]> git.donarmstrong.com Git - mothur.git/blobdiff - chimeraslayer.cpp
Changed back to xcode build.
[mothur.git] / chimeraslayer.cpp
index 2c4f7fba4253c2cb4c2693bb3cb18e676c5d938b..78c9f9e9e24d0138b158791d7d3bce2f39eca026 100644 (file)
@@ -43,6 +43,98 @@ int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int num
                exit(1);
        }
 }
+//***************************************************************************************************************
+ChimeraSlayer::ChimeraSlayer(string file, string temp, string name, string mode, string abunds, int k, int ms, int mms, int win, float div, 
+                                                        int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera()  {      
+       try {
+               fastafile = file; templateSeqs = readSeqs(fastafile);
+               templateFileName = temp; 
+               searchMethod = mode;
+               kmerSize = k;
+               match = ms;
+               misMatch = mms;
+               window = win;
+               divR = div;
+               minSim = minsim;
+               minCov = mincov;
+               minBS = minbs;
+               minSNP = minsnp;
+               parents = par;
+               iters = it;
+               increment = inc;
+               numWanted = numw;
+               realign = r; 
+               includeAbunds = abunds;
+               
+               //read name file and create nameMapRank
+               readNameFile(name);
+               
+               decalc = new DeCalculator();    
+               
+               createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
+               
+               //run filter on template
+               for (int i = 0; i < templateSeqs.size(); i++) { runFilter(templateSeqs[i]);  }
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+int ChimeraSlayer::readNameFile(string name) {
+       try {
+               ifstream in;
+               m->openInputFile(name, in);
+               
+               int maxRank = 0;
+               int minRank = 10000000;
+               
+               while(!in.eof()){
+                       
+                       if (m->control_pressed) { in.close(); return 0; }
+                       
+                       string thisname, repnames;
+                       
+                       in >> thisname;         m->gobble(in);          //read from first column
+                       in >> repnames;                 //read from second column
+                       
+                       map<string, vector<string> >::iterator it = nameMapRank.find(thisname);
+                       if (it == nameMapRank.end()) {
+                               
+                               vector<string> splitRepNames;
+                               m->splitAtComma(repnames, splitRepNames);
+                               
+                               nameMapRank[thisname] = splitRepNames;  
+                               
+                               if (splitRepNames.size() > maxRank) { maxRank = splitRepNames.size(); }
+                               if (splitRepNames.size() < minRank) { minRank = splitRepNames.size(); }
+                               
+                       }else{  m->mothurOut(thisname + " is already in namesfile. I will use first definition."); m->mothurOutEndLine();  }
+                       
+                       m->gobble(in);
+               }
+               in.close();     
+               
+               //sanity check to make sure files match
+               for (int i = 0; i < templateSeqs.size(); i++) {
+                       map<string, vector<string> >::iterator it = nameMapRank.find(templateSeqs[i]->getName());
+                       
+                       if (it == nameMapRank.end()) { m->mothurOut("[ERROR]: " + templateSeqs[i]->getName() + " is not in namesfile, but is in fastafile. Every name in fasta file must be in first column of names file."); m->mothurOutEndLine(); m->control_pressed = true;  }
+               }
+               
+               if (maxRank == minRank) { m->mothurOut("[ERROR]: all sequences in namesfile have the same abundance, aborting."); m->mothurOutEndLine(); m->control_pressed = true;  }
+               
+               return 0;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraSlayer", "readNameFile");
+               exit(1);
+       }
+}
+
 //***************************************************************************************************************
 int ChimeraSlayer::doPrep() {
        try {
@@ -67,10 +159,11 @@ int ChimeraSlayer::doPrep() {
        
                //generate the kmerdb to pass to maligner
                if (searchMethod == "kmer") { 
-                       string rightTemplateFileName = "right." + templateFileName;
+                       string templatePath = m->hasPath(templateFileName);
+                       string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
                        databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
                                
-                       string leftTemplateFileName = "left." + templateFileName;
+                       string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
                        databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);      
                #ifdef USE_MPI
                        for (int i = 0; i < templateSeqs.size(); i++) {
@@ -102,8 +195,14 @@ int ChimeraSlayer::doPrep() {
                        //leftside
                        kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
                        ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
+                       bool needToGenerateLeft = true;
+                       
+                       if(kmerFileTestLeft){   
+                               bool GoodFile = m->checkReleaseVersion(kmerFileTestLeft, m->getVersion());
+                               if (GoodFile) {  needToGenerateLeft = false;    }
+                       }
                        
-                       if(!kmerFileTestLeft){  
+                       if(needToGenerateLeft){ 
                        
                                for (int i = 0; i < templateSeqs.size(); i++) {
                                        
@@ -127,8 +226,14 @@ int ChimeraSlayer::doPrep() {
                        //rightside
                        kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
                        ifstream kmerFileTestRight(kmerDBNameRight.c_str());
+                       bool needToGenerateRight = true;
                        
-                       if(!kmerFileTestRight){ 
+                       if(kmerFileTestRight){  
+                               bool GoodFile = m->checkReleaseVersion(kmerFileTestRight, m->getVersion());
+                               if (GoodFile) {  needToGenerateRight = false;   }
+                       }
+                       
+                       if(needToGenerateRight){        
                        
                                for (int i = 0; i < templateSeqs.size(); i++) {
                                        if (m->control_pressed) { return 0; } 
@@ -165,11 +270,140 @@ int ChimeraSlayer::doPrep() {
                exit(1);
        }
 }
+//***************************************************************************************************************
+vector<Sequence*> ChimeraSlayer::getTemplate(Sequence* q) {
+       try {
+               
+               vector<Sequence*> thisTemplate;
+               
+               int thisRank;
+               string thisName = q->getName();
+               map<string, vector<string> >::iterator itRank = nameMapRank.find(thisName); // you will find it because we already sanity checked
+               thisRank = (itRank->second).size();
+               
+               //create list of names we want to put into the template
+               set<string> namesToAdd;
+               for (itRank = nameMapRank.begin(); itRank != nameMapRank.end(); itRank++) {
+                       if (itRank->first != thisName) {
+                               if (includeAbunds == "greaterequal") {
+                                       if ((itRank->second).size() >= thisRank) {
+                                               //you are more abundant than me or equal to my abundance
+                                               for (int i = 0; i < (itRank->second).size(); i++) {
+                                                       namesToAdd.insert((itRank->second)[i]);
+                                               }
+                                       }
+                               }else if (includeAbunds == "greater") {
+                                       if ((itRank->second).size() > thisRank) {
+                                               //you are more abundant than me
+                                               for (int i = 0; i < (itRank->second).size(); i++) {
+                                                       namesToAdd.insert((itRank->second)[i]);
+                                               }
+                                       }
+                               }else if (includeAbunds == "all") {
+                                       //add everyone
+                                       for (int i = 0; i < (itRank->second).size(); i++) {
+                                               namesToAdd.insert((itRank->second)[i]);
+                                       }
+                               }
+                       }
+               }
+               
+               for (int i = 0; i < templateSeqs.size(); i++) {  
+                       if (namesToAdd.count(templateSeqs[i]->getName()) != 0) { 
+                               thisTemplate.push_back(templateSeqs[i]);
+                       }
+               }
+               
+               string  kmerDBNameLeft;
+               string  kmerDBNameRight;
+               
+               //generate the kmerdb to pass to maligner
+               if (searchMethod == "kmer") { 
+                       string templatePath = m->hasPath(templateFileName);
+                       string rightTemplateFileName = templatePath + "right." + m->getRootName(m->getSimpleName(templateFileName));
+                       databaseRight = new KmerDB(rightTemplateFileName, kmerSize);
+                       
+                       string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
+                       databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);      
+#ifdef USE_MPI
+                       for (int i = 0; i < thisTemplate.size(); i++) {
+                               
+                               if (m->control_pressed) { return thisTemplate; } 
+                               
+                               string leftFrag = thisTemplate[i]->getUnaligned();
+                               leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
+                               
+                               Sequence leftTemp(thisTemplate[i]->getName(), leftFrag);
+                               databaseLeft->addSequence(leftTemp);    
+                       }
+                       databaseLeft->generateDB();
+                       databaseLeft->setNumSeqs(thisTemplate.size());
+                       
+                       for (int i = 0; i < thisTemplate.size(); i++) {
+                               if (m->control_pressed) { return thisTemplate;  } 
+                               
+                               string rightFrag = thisTemplate[i]->getUnaligned();
+                               rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
+                               
+                               Sequence rightTemp(thisTemplate[i]->getName(), rightFrag);
+                               databaseRight->addSequence(rightTemp);  
+                       }
+                       databaseRight->generateDB();
+                       databaseRight->setNumSeqs(thisTemplate.size());
+                       
+#else  
+                       
+                       
+                       for (int i = 0; i < thisTemplate.size(); i++) {
+                               
+                               if (m->control_pressed) { return thisTemplate; } 
+                               
+                               string leftFrag = thisTemplate[i]->getUnaligned();
+                               leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
+                               
+                               Sequence leftTemp(thisTemplate[i]->getName(), leftFrag);
+                               databaseLeft->addSequence(leftTemp);    
+                       }
+                       databaseLeft->generateDB();
+                       databaseLeft->setNumSeqs(thisTemplate.size());
+                               
+                       for (int i = 0; i < thisTemplate.size(); i++) {
+                               if (m->control_pressed) { return thisTemplate; } 
+                                       
+                               string rightFrag = thisTemplate[i]->getUnaligned();
+                               rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
+                                       
+                               Sequence rightTemp(thisTemplate[i]->getName(), rightFrag);
+                               databaseRight->addSequence(rightTemp);  
+                       }
+                       databaseRight->generateDB();
+                       databaseRight->setNumSeqs(thisTemplate.size());
+#endif 
+               }else if (searchMethod == "blast") {
+                       
+                       //generate blastdb
+                       databaseLeft = new BlastDB(-2.0, -1.0, match, misMatch);
+                       for (int i = 0; i < thisTemplate.size(); i++) { if (m->control_pressed) { return thisTemplate; }  databaseLeft->addSequence(*thisTemplate[i]);  }
+                       databaseLeft->generateDB();
+                       databaseLeft->setNumSeqs(thisTemplate.size());
+               }
+               
+               return thisTemplate;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraSlayer", "getTemplate");
+               exit(1);
+       }
+}
+
 //***************************************************************************************************************
 ChimeraSlayer::~ChimeraSlayer() {      
        delete decalc;  
-       if (searchMethod == "kmer") {  delete databaseRight;  delete databaseLeft;  }   
-       else if (searchMethod == "blast") {  delete databaseLeft; }
+       if (templateFileName != "self") {
+               if (searchMethod == "kmer") {  delete databaseRight;  delete databaseLeft;  }   
+               else if (searchMethod == "blast") {  delete databaseLeft; }
+       }
 }
 //***************************************************************************************************************
 void ChimeraSlayer::printHeader(ostream& out) {
@@ -196,7 +430,7 @@ int ChimeraSlayer::print(ostream& out, ostream& outAcc) {
                                }
                        }
                        
-                       printBlock(chimeraResults[0], out);
+                       printBlock(chimeraResults[0], chimeraFlag, out);
                        out << endl;
                }else {  out << querySeq->getName() << "\tno" << endl;  }
                
@@ -240,7 +474,7 @@ int ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
                                }
                        }
                        
-                       outputString = getBlock(chimeraResults[0]);
+                       outputString = getBlock(chimeraResults[0], chimeraFlag);
                        outputString += "\n";
        //cout << outputString << endl;         
                        //write to output file
@@ -283,19 +517,33 @@ int ChimeraSlayer::getChimeras(Sequence* query) {
                
                querySeq = query;
                
+               //you must create a template
+               vector<Sequence*> thisTemplate;
+               if (templateFileName != "self") { thisTemplate = templateSeqs; }
+               else { thisTemplate = getTemplate(query); } //fills thistemplate and creates the databases
+               
+               if (m->control_pressed) {  return 0;  }
+               
+               if (thisTemplate.size() == 0) {  return 0; } //not chimeric
+               
                //referenceSeqs, numWanted, matchScore, misMatchPenalty, divR, minSimilarity
-               maligner = new Maligner(templateSeqs, numWanted, match, misMatch, divR, minSim, minCov, searchMethod, databaseLeft, databaseRight);
-               slayer = new Slayer(window, increment, minSim, divR, iters, minSNP);
+               Maligner maligner(thisTemplate, numWanted, match, misMatch, divR, minSim, minCov, searchMethod, databaseLeft, databaseRight);
+               Slayer slayer(window, increment, minSim, divR, iters, minSNP);
                
+               if (templateFileName == "self") {
+                       if (searchMethod == "kmer") {  delete databaseRight;  delete databaseLeft;  }   
+                       else if (searchMethod == "blast") {  delete databaseLeft; }
+               }
+       
                if (m->control_pressed) {  return 0;  }
                
-               string chimeraFlag = maligner->getResults(query, decalc);
+               string chimeraFlag = maligner.getResults(query, decalc);
                if (m->control_pressed) {  return 0;  }
-               vector<results> Results = maligner->getOutput();
-                               
+               vector<results> Results = maligner.getOutput();
+                       
                //found in testing realigning only made things worse
                if (realign) {
-                       ChimeraReAligner realigner(templateSeqs, match, misMatch);
+                       ChimeraReAligner realigner(thisTemplate, match, misMatch);
                        realigner.reAlign(query, Results);
                }
 
@@ -368,16 +616,16 @@ int ChimeraSlayer::getChimeras(Sequence* query) {
                        if (m->control_pressed) {  for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }  return 0;  }
                        
                        //send to slayer
-                       chimeraFlags = slayer->getResults(query, seqsForSlayer);
+                       chimeraFlags = slayer.getResults(query, seqsForSlayer);
                        if (m->control_pressed) {  return 0;  }
-                       chimeraResults = slayer->getOutput();
+                       chimeraResults = slayer.getOutput();
                        
                        //free memory
                        for (int k = 0; k < seqs.size(); k++) {  delete seqs[k].seq;   }
                }
                
-               delete maligner;
-               delete slayer;
+               //delete maligner;
+               //delete slayer;
                
                return 0;
        }
@@ -387,7 +635,7 @@ int ChimeraSlayer::getChimeras(Sequence* query) {
        }
 }
 //***************************************************************************************************************
-void ChimeraSlayer::printBlock(data_struct data, ostream& out){
+void ChimeraSlayer::printBlock(data_struct data, string flag, ostream& out){
        try {
        //out << ":)\n";
                
@@ -399,7 +647,7 @@ void ChimeraSlayer::printBlock(data_struct data, ostream& out){
                out << data.divr_qla_qrb << '\t' << data.qla_qrb << '\t' << data.bsa << '\t';
                out << data.divr_qlb_qra << '\t' << data.qlb_qra << '\t' << data.bsb << '\t';
                
-               out << "yes\t" << spotMap[data.winLStart] << "-" << spotMap[data.winLEnd] << '\t' << spotMap[data.winRStart] << "-" << spotMap[data.winREnd] << '\t';
+               out << flag << '\t' << spotMap[data.winLStart] << "-" << spotMap[data.winLEnd] << '\t' << spotMap[data.winRStart] << "-" << spotMap[data.winREnd] << '\t';
                
                //out << "Similarity of parents: " << data.ab << endl;
                //out << "Similarity of query to parentA: " << data.qa << endl;
@@ -422,7 +670,7 @@ void ChimeraSlayer::printBlock(data_struct data, ostream& out){
        }
 }
 //***************************************************************************************************************
-string ChimeraSlayer::getBlock(data_struct data){
+string ChimeraSlayer::getBlock(data_struct data, string flag){
        try {
                
                string outputString = "";
@@ -433,7 +681,7 @@ string ChimeraSlayer::getBlock(data_struct data){
                outputString += toString(data.divr_qla_qrb) + "\t" + toString(data.qla_qrb) + "\t" + toString(data.bsa) + "\t";
                outputString += toString(data.divr_qlb_qra) + "\t" + toString(data.qlb_qra) + "\t" + toString(data.bsb) + "\t";
                
-               outputString += "yes\t" + toString(spotMap[data.winLStart]) + "-" + toString(spotMap[data.winLEnd]) + "\t" + toString(spotMap[data.winRStart]) + "-" + toString(spotMap[data.winREnd]) + "\t";
+               outputString += flag + "\t" + toString(spotMap[data.winLStart]) + "-" + toString(spotMap[data.winLEnd]) + "\t" + toString(spotMap[data.winRStart]) + "-" + toString(spotMap[data.winREnd]) + "\t";
                
                return outputString;
        }