]> git.donarmstrong.com Git - mothur.git/blobdiff - chimeraslayer.cpp
modified chimera.slayer template=self
[mothur.git] / chimeraslayer.cpp
index b334f909a7411d6dd00b3eda311740b7e93a80b8..8c9417ad82cd0341a88aa216a84add2a701791e3 100644 (file)
@@ -45,7 +45,7 @@ int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int num
        }
 }
 //***************************************************************************************************************
-ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string name, string mode, string abunds, int k, int ms, int mms, int win, float div, 
+ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string name, string mode, int k, int ms, int mms, int win, float div, 
                                                         int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera()  {      
        try {
                fastafile = file; templateSeqs = readSeqs(fastafile);
@@ -65,78 +65,21 @@ ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string name, s
                increment = inc;
                numWanted = numw;
                realign = r; 
-               includeAbunds = abunds;
                trimChimera = trim;
                
-               //read name file and create nameMapRank
-               readNameFile(name);
-               
                decalc = new DeCalculator();    
                
                createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
                
                //run filter on template
-               for (int i = 0; i < templateSeqs.size(); i++) { runFilter(templateSeqs[i]);  }
-               
+               for (int i = 0; i < templateSeqs.size(); i++) { delete templateSeqs[i];  } templateSeqs.clear();
+                
        }
        catch(exception& e) {
                m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
                exit(1);
        }
 }
-//***************************************************************************************************************
-int ChimeraSlayer::readNameFile(string name) {
-       try {
-               ifstream in;
-               m->openInputFile(name, in);
-               
-               int maxRank = 0;
-               int minRank = 10000000;
-               
-               while(!in.eof()){
-                       
-                       if (m->control_pressed) { in.close(); return 0; }
-                       
-                       string thisname, repnames;
-                       
-                       in >> thisname;         m->gobble(in);          //read from first column
-                       in >> repnames;                 //read from second column
-                       
-                       map<string, vector<string> >::iterator it = nameMapRank.find(thisname);
-                       if (it == nameMapRank.end()) {
-                               
-                               vector<string> splitRepNames;
-                               m->splitAtComma(repnames, splitRepNames);
-                               
-                               nameMapRank[thisname] = splitRepNames;  
-                               
-                               if (splitRepNames.size() > maxRank) { maxRank = splitRepNames.size(); }
-                               if (splitRepNames.size() < minRank) { minRank = splitRepNames.size(); }
-                               
-                       }else{  m->mothurOut(thisname + " is already in namesfile. I will use first definition."); m->mothurOutEndLine();  }
-                       
-                       m->gobble(in);
-               }
-               in.close();     
-               
-               //sanity check to make sure files match
-               for (int i = 0; i < templateSeqs.size(); i++) {
-                       map<string, vector<string> >::iterator it = nameMapRank.find(templateSeqs[i]->getName());
-                       
-                       if (it == nameMapRank.end()) { m->mothurOut("[ERROR]: " + templateSeqs[i]->getName() + " is not in namesfile, but is in fastafile. Every name in fasta file must be in first column of names file."); m->mothurOutEndLine(); m->control_pressed = true;  }
-               }
-               
-               if (maxRank == minRank) { m->mothurOut("[ERROR]: all sequences in namesfile have the same abundance, aborting."); m->mothurOutEndLine(); m->control_pressed = true;  }
-               
-               return 0;
-               
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ChimeraSlayer", "readNameFile");
-               exit(1);
-       }
-}
-
 //***************************************************************************************************************
 int ChimeraSlayer::doPrep() {
        try {
@@ -259,6 +202,7 @@ int ChimeraSlayer::doPrep() {
                
                        //generate blastdb
                        databaseLeft = new BlastDB(-1.0, -1.0, 1, -3);
+
                        for (int i = 0; i < templateSeqs.size(); i++) {         databaseLeft->addSequence(*templateSeqs[i]);    }
                        databaseLeft->generateDB();
                        databaseLeft->setNumSeqs(templateSeqs.size());
@@ -273,49 +217,9 @@ int ChimeraSlayer::doPrep() {
        }
 }
 //***************************************************************************************************************
-vector<Sequence*> ChimeraSlayer::getTemplate(Sequence* q) {
+int ChimeraSlayer::getTemplate(Sequence* q) {
        try {
                
-               vector<Sequence*> thisTemplate;
-               
-               int thisRank;
-               string thisName = q->getName();
-               map<string, vector<string> >::iterator itRank = nameMapRank.find(thisName); // you will find it because we already sanity checked
-               thisRank = (itRank->second).size();
-               
-               //create list of names we want to put into the template
-               set<string> namesToAdd;
-               for (itRank = nameMapRank.begin(); itRank != nameMapRank.end(); itRank++) {
-                       if (itRank->first != thisName) {
-                               if (includeAbunds == "greaterequal") {
-                                       if ((itRank->second).size() >= thisRank) {
-                                               //you are more abundant than me or equal to my abundance
-                                               for (int i = 0; i < (itRank->second).size(); i++) {
-                                                       namesToAdd.insert((itRank->second)[i]);
-                                               }
-                                       }
-                               }else if (includeAbunds == "greater") {
-                                       if ((itRank->second).size() > thisRank) {
-                                               //you are more abundant than me
-                                               for (int i = 0; i < (itRank->second).size(); i++) {
-                                                       namesToAdd.insert((itRank->second)[i]);
-                                               }
-                                       }
-                               }else if (includeAbunds == "all") {
-                                       //add everyone
-                                       for (int i = 0; i < (itRank->second).size(); i++) {
-                                               namesToAdd.insert((itRank->second)[i]);
-                                       }
-                               }
-                       }
-               }
-               
-               for (int i = 0; i < templateSeqs.size(); i++) {  
-                       if (namesToAdd.count(templateSeqs[i]->getName()) != 0) { 
-                               thisTemplate.push_back(templateSeqs[i]);
-                       }
-               }
-               
                string  kmerDBNameLeft;
                string  kmerDBNameRight;
                
@@ -328,69 +232,70 @@ vector<Sequence*> ChimeraSlayer::getTemplate(Sequence* q) {
                        string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
                        databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);      
 #ifdef USE_MPI
-                       for (int i = 0; i < thisTemplate.size(); i++) {
+                       for (int i = 0; i < userTemplate.size(); i++) {
                                
-                               if (m->control_pressed) { return thisTemplate; } 
+                               if (m->control_pressed) { return 0; } 
                                
-                               string leftFrag = thisTemplate[i]->getUnaligned();
+                               string leftFrag = userTemplate[i]->getUnaligned();
                                leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
                                
-                               Sequence leftTemp(thisTemplate[i]->getName(), leftFrag);
+                               Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
                                databaseLeft->addSequence(leftTemp);    
                        }
                        databaseLeft->generateDB();
-                       databaseLeft->setNumSeqs(thisTemplate.size());
+                       databaseLeft->setNumSeqs(userTemplate.size());
                        
-                       for (int i = 0; i < thisTemplate.size(); i++) {
-                               if (m->control_pressed) { return thisTemplate;  } 
+                       for (int i = 0; i < userTemplate.size(); i++) {
+                               if (m->control_pressed) { return 0;  } 
                                
-                               string rightFrag = thisTemplate[i]->getUnaligned();
+                               string rightFrag = userTemplate[i]->getUnaligned();
                                rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
                                
-                               Sequence rightTemp(thisTemplate[i]->getName(), rightFrag);
+                               Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
                                databaseRight->addSequence(rightTemp);  
                        }
                        databaseRight->generateDB();
-                       databaseRight->setNumSeqs(thisTemplate.size());
+                       databaseRight->setNumSeqs(userTemplate.size());
                        
 #else  
                        
                        
-                       for (int i = 0; i < thisTemplate.size(); i++) {
+                       for (int i = 0; i < userTemplate.size(); i++) {
                                
-                               if (m->control_pressed) { return thisTemplate; } 
+                               if (m->control_pressed) { return 0; } 
                                
-                               string leftFrag = thisTemplate[i]->getUnaligned();
+                               string leftFrag = userTemplate[i]->getUnaligned();
                                leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
                                
-                               Sequence leftTemp(thisTemplate[i]->getName(), leftFrag);
+                               Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
                                databaseLeft->addSequence(leftTemp);    
                        }
                        databaseLeft->generateDB();
-                       databaseLeft->setNumSeqs(thisTemplate.size());
+                       databaseLeft->setNumSeqs(userTemplate.size());
                                
-                       for (int i = 0; i < thisTemplate.size(); i++) {
-                               if (m->control_pressed) { return thisTemplate; } 
+                       for (int i = 0; i < userTemplate.size(); i++) {
+                               if (m->control_pressed) { return 0; } 
                                        
-                               string rightFrag = thisTemplate[i]->getUnaligned();
+                               string rightFrag = userTemplate[i]->getUnaligned();
                                rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
                                        
-                               Sequence rightTemp(thisTemplate[i]->getName(), rightFrag);
+                               Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
                                databaseRight->addSequence(rightTemp);  
                        }
                        databaseRight->generateDB();
-                       databaseRight->setNumSeqs(thisTemplate.size());
+                       databaseRight->setNumSeqs(userTemplate.size());
 #endif 
                }else if (searchMethod == "blast") {
                        
                        //generate blastdb
                        databaseLeft = new BlastDB(-1.0, -1.0, 1, -3);
-                       for (int i = 0; i < thisTemplate.size(); i++) { if (m->control_pressed) { return thisTemplate; }  databaseLeft->addSequence(*thisTemplate[i]);  }
+
+                       for (int i = 0; i < userTemplate.size(); i++) { if (m->control_pressed) { return 0; }  databaseLeft->addSequence(*userTemplate[i]);     }
                        databaseLeft->generateDB();
-                       databaseLeft->setNumSeqs(thisTemplate.size());
+                       databaseLeft->setNumSeqs(userTemplate.size());
                }
                
-               return thisTemplate;
+               return 0;
                
        }
        catch(exception& e) {
@@ -405,6 +310,12 @@ ChimeraSlayer::~ChimeraSlayer() {
        if (templateFileName != "self") {
                if (searchMethod == "kmer") {  delete databaseRight;  delete databaseLeft;  }   
                else if (searchMethod == "blast") {  delete databaseLeft; }
+       }else {
+               //delete userTemplate
+               for (int i = 0; i < userTemplate.size(); i++) {
+                       delete userTemplate[i];
+               }
+               userTemplate.clear();
        }
 }
 //***************************************************************************************************************
@@ -419,7 +330,7 @@ void ChimeraSlayer::printHeader(ostream& out) {
 Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc) {
        try {
                Sequence* trim = NULL;
-               if (trimChimera) { trim = trimQuery; }
+               if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); }
                
                if (chimeraFlags == "yes") {
                        string chimeraFlag = "no";
@@ -446,13 +357,19 @@ Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc) {
                                                }
                                                trim->setAligned(newAligned);
                                        }
-                                               
                                }
                        }
                        
                        printBlock(chimeraResults[0], chimeraFlag, out);
                        out << endl;
-               }else {  out << querySeq->getName() << "\tno" << endl;  }
+               }else {  
+                       out << querySeq->getName() << "\tno" << endl; 
+                       if (templateFileName == "self") {  
+                               Sequence* temp = new Sequence(trimQuery.getName(), trimQuery.getAligned());
+                               runFilter(temp);
+                               userTemplate.push_back(temp);
+                       }
+               }
                
                return trim;
                
@@ -551,7 +468,14 @@ Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftP
                        
                        printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out);
                        out << endl;
-               }else {  out << querySeq->getName() << "\tno" << endl;  }
+               }else {  
+                       out << querySeq->getName() << "\tno" << endl;  
+                       if (templateFileName == "self") {  
+                               Sequence* temp = new Sequence(trimQuery.getName(), trimQuery.getAligned());
+                               runFilter(temp);
+                               userTemplate.push_back(temp);
+                       }
+               }
                
                return trim;
                
@@ -686,6 +610,12 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results lef
                                
                        MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
                        delete buf;
+                       
+                       if (template == "self") {  
+                               Sequence temp = new Sequence(trimQuery.getName(), trimQuery.getAligned());
+                               runFilter(temp);
+                               userTemplate.push_back(temp);
+                       }
                }
                
                
@@ -705,7 +635,7 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
                string outputString = "";
                
                Sequence* trim = NULL;
-               if (trimChimera) { trim = trimQuery; }
+               if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); }
                
                if (chimeraFlags == "yes") {
                        string chimeraFlag = "no";
@@ -764,6 +694,12 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
                        
                        MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
                        delete buf;
+                       
+                       if (template == "self") {  
+                               Sequence temp = new Sequence(trimQuery.getName(), trimQuery.getAligned());
+                               runFilter(temp);
+                               userTemplate.push_back(temp);
+                       }
                }
                
                return trim;
@@ -778,7 +714,9 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
 //***************************************************************************************************************
 int ChimeraSlayer::getChimeras(Sequence* query) {
        try {
-               if (trimChimera) { trimQuery = new Sequence(query->getName(), query->getAligned()); printResults.trimQuery = *trimQuery; }
+               
+               trimQuery.setName(query->getName()); trimQuery.setAligned(query->getAligned());
+               printResults.trimQuery = trimQuery; 
                
                chimeraFlags = "no";
                printResults.flag = "no";
@@ -792,7 +730,7 @@ int ChimeraSlayer::getChimeras(Sequence* query) {
                //you must create a template
                vector<Sequence*> thisTemplate;
                if (templateFileName != "self") { thisTemplate = templateSeqs; }
-               else { thisTemplate = getTemplate(query); } //fills thistemplate and creates the databases
+               else { getTemplate(query);  thisTemplate = userTemplate; } //fills this template and creates the databases
                
                if (m->control_pressed) {  return 0;  }
                
@@ -810,10 +748,11 @@ int ChimeraSlayer::getChimeras(Sequence* query) {
                if (m->control_pressed) {  return 0;  }
 
                string chimeraFlag = maligner.getResults(query, decalc);
+               
                if (m->control_pressed) {  return 0;  }
+               
                vector<results> Results = maligner.getOutput();
        
-               //found in testing realigning only made things worse
                if (realign) {
                        ChimeraReAligner realigner(thisTemplate, match, misMatch);
                        realigner.reAlign(query, Results);