modified chimera.slayer template=self

author westcott <westcott>

Mon, 18 Apr 2011 17:55:59 +0000 (17:55 +0000)

committer westcott <westcott>

Mon, 18 Apr 2011 17:55:59 +0000 (17:55 +0000)
author westcott <westcott>
Mon, 18 Apr 2011 17:55:59 +0000 (17:55 +0000)
committer westcott <westcott>
Mon, 18 Apr 2011 17:55:59 +0000 (17:55 +0000)
diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj

index 6bc5e178281bbdc2a993986280a9a366864c07eb..d3b9cfc04051882ed9f2260dfd5143b6192714c9 100644 (file)
--- a/Mothur.xcodeproj/project.pbxproj
+++ b/Mothur.xcodeproj/project.pbxproj
@@ -1980,8 +1980,8 @@
                                 GCC_OPTIMIZATION_LEVEL = 3;
                                 GCC_PREPROCESSOR_DEFINITIONS = (
                                         "MOTHUR_FILES=\"\\\"../release\\\"\"",
-                                       "VERSION=\"\\\"1.18.0\\\"\"",
-                                       "RELEASE_DATE=\"\\\"4/11/2011\\\"\"",
+                                       "VERSION=\"\\\"1.18.1\\\"\"",
+                                       "RELEASE_DATE=\"\\\"4/15/2011\\\"\"",
                                 );
                                 GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
                                 GCC_WARN_ABOUT_RETURN_TYPE = YES;
@@ -2013,8 +2013,8 @@
                                 GCC_MODEL_TUNING = "";
                                 GCC_OPTIMIZATION_LEVEL = 3;
                                 GCC_PREPROCESSOR_DEFINITIONS = (
-                                       "VERSION=\"\\\"1.18.0\\\"\"",
-                                       "RELEASE_DATE=\"\\\"4/11/2011\\\"\"",
+                                       "VERSION=\"\\\"1.18.1\\\"\"",
+                                       "RELEASE_DATE=\"\\\"4/15/2011\\\"\"",
                                 );
                                 GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
                                 GCC_WARN_ABOUT_RETURN_TYPE = YES;
@@ -2036,7 +2036,7 @@
                                         "-lreadline",
                                 );
                                 PREBINDING = NO;
-                               SDKROOT = macosx10.5;
+                               SDKROOT = macosx10.6;
                         };
                         name = Release;
                 };
diff --git a/chimeraslayer.cpp b/chimeraslayer.cpp

index 77a8695a579d64b250cf98aa3852cf4975e7f90f..8c9417ad82cd0341a88aa216a84add2a701791e3 100644 (file)
--- a/chimeraslayer.cpp
+++ b/chimeraslayer.cpp
@@ -45,7 +45,7 @@ int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int num
         }
  }
  //***************************************************************************************************************
-ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string name, string mode, string abunds, int k, int ms, int mms, int win, float div, 
+ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string name, string mode, int k, int ms, int mms, int win, float div, 
                                                          int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera()  {      
         try {
                 fastafile = file; templateSeqs = readSeqs(fastafile);
@@ -65,78 +65,21 @@ ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string name, s
                 increment = inc;
                 numWanted = numw;
                 realign = r; 
-               includeAbunds = abunds;
                 trimChimera = trim;
                 
-               //read name file and create nameMapRank
-               readNameFile(name);
-               
                 decalc = new DeCalculator();    
                 
                 createFilter(templateSeqs, 0.0); //just removed columns where all seqs have a gap
                 
                 //run filter on template
-               for (int i = 0; i < templateSeqs.size(); i++) { runFilter(templateSeqs[i]);  }
-               
+               for (int i = 0; i < templateSeqs.size(); i++) { delete templateSeqs[i];  } templateSeqs.clear();
+                
         }
         catch(exception& e) {
                 m->errorOut(e, "ChimeraSlayer", "ChimeraSlayer");
                 exit(1);
         }
  }
-//***************************************************************************************************************
-int ChimeraSlayer::readNameFile(string name) {
-       try {
-               ifstream in;
-               m->openInputFile(name, in);
-               
-               int maxRank = 0;
-               int minRank = 10000000;
-               
-               while(!in.eof()){
-                       
-                       if (m->control_pressed) { in.close(); return 0; }
-                       
-                       string thisname, repnames;
-                       
-                       in >> thisname;         m->gobble(in);          //read from first column
-                       in >> repnames;                 //read from second column
-                       
-                       map<string, vector<string> >::iterator it = nameMapRank.find(thisname);
-                       if (it == nameMapRank.end()) {
-                               
-                               vector<string> splitRepNames;
-                               m->splitAtComma(repnames, splitRepNames);
-                               
-                               nameMapRank[thisname] = splitRepNames;  
-                               
-                               if (splitRepNames.size() > maxRank) { maxRank = splitRepNames.size(); }
-                               if (splitRepNames.size() < minRank) { minRank = splitRepNames.size(); }
-                               
-                       }else{  m->mothurOut(thisname + " is already in namesfile. I will use first definition."); m->mothurOutEndLine();  }
-                       
-                       m->gobble(in);
-               }
-               in.close();     
-               
-               //sanity check to make sure files match
-               for (int i = 0; i < templateSeqs.size(); i++) {
-                       map<string, vector<string> >::iterator it = nameMapRank.find(templateSeqs[i]->getName());
-                       
-                       if (it == nameMapRank.end()) { m->mothurOut("[ERROR]: " + templateSeqs[i]->getName() + " is not in namesfile, but is in fastafile. Every name in fasta file must be in first column of names file."); m->mothurOutEndLine(); m->control_pressed = true;  }
-               }
-               
-               if (maxRank == minRank) { m->mothurOut("[ERROR]: all sequences in namesfile have the same abundance, aborting."); m->mothurOutEndLine(); m->control_pressed = true;  }
-               
-               return 0;
-               
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ChimeraSlayer", "readNameFile");
-               exit(1);
-       }
-}
-
  //***************************************************************************************************************
  int ChimeraSlayer::doPrep() {
         try {
@@ -274,49 +217,9 @@ int ChimeraSlayer::doPrep() {
         }
  }
  //***************************************************************************************************************
-vector<Sequence*> ChimeraSlayer::getTemplate(Sequence* q) {
+int ChimeraSlayer::getTemplate(Sequence* q) {
         try {
                 
-               vector<Sequence*> thisTemplate;
-               
-               int thisRank;
-               string thisName = q->getName();
-               map<string, vector<string> >::iterator itRank = nameMapRank.find(thisName); // you will find it because we already sanity checked
-               thisRank = (itRank->second).size();
-               
-               //create list of names we want to put into the template
-               set<string> namesToAdd;
-               for (itRank = nameMapRank.begin(); itRank != nameMapRank.end(); itRank++) {
-                       if (itRank->first != thisName) {
-                               if (includeAbunds == "greaterequal") {
-                                       if ((itRank->second).size() >= thisRank) {
-                                               //you are more abundant than me or equal to my abundance
-                                               for (int i = 0; i < (itRank->second).size(); i++) {
-                                                       namesToAdd.insert((itRank->second)[i]);
-                                               }
-                                       }
-                               }else if (includeAbunds == "greater") {
-                                       if ((itRank->second).size() > thisRank) {
-                                               //you are more abundant than me
-                                               for (int i = 0; i < (itRank->second).size(); i++) {
-                                                       namesToAdd.insert((itRank->second)[i]);
-                                               }
-                                       }
-                               }else if (includeAbunds == "all") {
-                                       //add everyone
-                                       for (int i = 0; i < (itRank->second).size(); i++) {
-                                               namesToAdd.insert((itRank->second)[i]);
-                                       }
-                               }
-                       }
-               }
-               
-               for (int i = 0; i < templateSeqs.size(); i++) {  
-                       if (namesToAdd.count(templateSeqs[i]->getName()) != 0) { 
-                               thisTemplate.push_back(templateSeqs[i]);
-                       }
-               }
-               
                 string  kmerDBNameLeft;
                 string  kmerDBNameRight;
                 
@@ -329,70 +232,70 @@ vector<Sequence*> ChimeraSlayer::getTemplate(Sequence* q) {
                         string leftTemplateFileName = templatePath + "left." + m->getRootName(m->getSimpleName(templateFileName));
                         databaseLeft = new KmerDB(leftTemplateFileName, kmerSize);      
  #ifdef USE_MPI
-                       for (int i = 0; i < thisTemplate.size(); i++) {
+                       for (int i = 0; i < userTemplate.size(); i++) {
                                 
-                               if (m->control_pressed) { return thisTemplate; } 
+                               if (m->control_pressed) { return 0; } 
                                 
-                               string leftFrag = thisTemplate[i]->getUnaligned();
+                               string leftFrag = userTemplate[i]->getUnaligned();
                                 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
                                 
-                               Sequence leftTemp(thisTemplate[i]->getName(), leftFrag);
+                               Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
                                 databaseLeft->addSequence(leftTemp);    
                         }
                         databaseLeft->generateDB();
-                       databaseLeft->setNumSeqs(thisTemplate.size());
+                       databaseLeft->setNumSeqs(userTemplate.size());
                         
-                       for (int i = 0; i < thisTemplate.size(); i++) {
-                               if (m->control_pressed) { return thisTemplate;  } 
+                       for (int i = 0; i < userTemplate.size(); i++) {
+                               if (m->control_pressed) { return 0;  } 
                                 
-                               string rightFrag = thisTemplate[i]->getUnaligned();
+                               string rightFrag = userTemplate[i]->getUnaligned();
                                 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
                                 
-                               Sequence rightTemp(thisTemplate[i]->getName(), rightFrag);
+                               Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
                                 databaseRight->addSequence(rightTemp);  
                         }
                         databaseRight->generateDB();
-                       databaseRight->setNumSeqs(thisTemplate.size());
+                       databaseRight->setNumSeqs(userTemplate.size());
                         
  #else  
                         
                         
-                       for (int i = 0; i < thisTemplate.size(); i++) {
+                       for (int i = 0; i < userTemplate.size(); i++) {
                                 
-                               if (m->control_pressed) { return thisTemplate; } 
+                               if (m->control_pressed) { return 0; } 
                                 
-                               string leftFrag = thisTemplate[i]->getUnaligned();
+                               string leftFrag = userTemplate[i]->getUnaligned();
                                 leftFrag = leftFrag.substr(0, int(leftFrag.length() * 0.33));
                                 
-                               Sequence leftTemp(thisTemplate[i]->getName(), leftFrag);
+                               Sequence leftTemp(userTemplate[i]->getName(), leftFrag);
                                 databaseLeft->addSequence(leftTemp);    
                         }
                         databaseLeft->generateDB();
-                       databaseLeft->setNumSeqs(thisTemplate.size());
+                       databaseLeft->setNumSeqs(userTemplate.size());
                                 
-                       for (int i = 0; i < thisTemplate.size(); i++) {
-                               if (m->control_pressed) { return thisTemplate; } 
+                       for (int i = 0; i < userTemplate.size(); i++) {
+                               if (m->control_pressed) { return 0; } 
                                         
-                               string rightFrag = thisTemplate[i]->getUnaligned();
+                               string rightFrag = userTemplate[i]->getUnaligned();
                                 rightFrag = rightFrag.substr(int(rightFrag.length() * 0.66));
                                         
-                               Sequence rightTemp(thisTemplate[i]->getName(), rightFrag);
+                               Sequence rightTemp(userTemplate[i]->getName(), rightFrag);
                                 databaseRight->addSequence(rightTemp);  
                         }
                         databaseRight->generateDB();
-                       databaseRight->setNumSeqs(thisTemplate.size());
+                       databaseRight->setNumSeqs(userTemplate.size());
  #endif 
                 }else if (searchMethod == "blast") {
                         
                         //generate blastdb
                         databaseLeft = new BlastDB(-1.0, -1.0, 1, -3);
  
-                       for (int i = 0; i < thisTemplate.size(); i++) { if (m->control_pressed) { return thisTemplate; }  databaseLeft->addSequence(*thisTemplate[i]);  }
+                       for (int i = 0; i < userTemplate.size(); i++) { if (m->control_pressed) { return 0; }  databaseLeft->addSequence(*userTemplate[i]);     }
                         databaseLeft->generateDB();
-                       databaseLeft->setNumSeqs(thisTemplate.size());
+                       databaseLeft->setNumSeqs(userTemplate.size());
                 }
                 
-               return thisTemplate;
+               return 0;
                 
         }
         catch(exception& e) {
@@ -407,6 +310,12 @@ ChimeraSlayer::~ChimeraSlayer() {
         if (templateFileName != "self") {
                 if (searchMethod == "kmer") {  delete databaseRight;  delete databaseLeft;  }   
                 else if (searchMethod == "blast") {  delete databaseLeft; }
+       }else {
+               //delete userTemplate
+               for (int i = 0; i < userTemplate.size(); i++) {
+                       delete userTemplate[i];
+               }
+               userTemplate.clear();
         }
  }
  //***************************************************************************************************************
@@ -421,7 +330,7 @@ void ChimeraSlayer::printHeader(ostream& out) {
  Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc) {
         try {
                 Sequence* trim = NULL;
-               if (trimChimera) { trim = trimQuery; }
+               if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); }
                 
                 if (chimeraFlags == "yes") {
                         string chimeraFlag = "no";
@@ -448,13 +357,19 @@ Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc) {
                                                 }
                                                 trim->setAligned(newAligned);
                                         }
-                                               
                                 }
                         }
                         
                         printBlock(chimeraResults[0], chimeraFlag, out);
                         out << endl;
-               }else {  out << querySeq->getName() << "\tno" << endl;  }
+               }else {  
+                       out << querySeq->getName() << "\tno" << endl; 
+                       if (templateFileName == "self") {  
+                               Sequence* temp = new Sequence(trimQuery.getName(), trimQuery.getAligned());
+                               runFilter(temp);
+                               userTemplate.push_back(temp);
+                       }
+               }
                 
                 return trim;
                 
@@ -553,7 +468,14 @@ Sequence* ChimeraSlayer::print(ostream& out, ostream& outAcc, data_results leftP
                         
                         printBlock(leftPiece, rightPiece, leftChimeric, rightChimeric, chimeraFlag, out);
                         out << endl;
-               }else {  out << querySeq->getName() << "\tno" << endl;  }
+               }else {  
+                       out << querySeq->getName() << "\tno" << endl;  
+                       if (templateFileName == "self") {  
+                               Sequence* temp = new Sequence(trimQuery.getName(), trimQuery.getAligned());
+                               runFilter(temp);
+                               userTemplate.push_back(temp);
+                       }
+               }
                 
                 return trim;
                 
@@ -688,6 +610,12 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc, data_results lef
                                 
                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
                         delete buf;
+                       
+                       if (template == "self") {  
+                               Sequence temp = new Sequence(trimQuery.getName(), trimQuery.getAligned());
+                               runFilter(temp);
+                               userTemplate.push_back(temp);
+                       }
                 }
                 
                 
@@ -707,7 +635,7 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
                 string outputString = "";
                 
                 Sequence* trim = NULL;
-               if (trimChimera) { trim = trimQuery; }
+               if (trimChimera) { trim = new Sequence(trimQuery.getName(), trimQuery.getAligned()); }
                 
                 if (chimeraFlags == "yes") {
                         string chimeraFlag = "no";
@@ -766,6 +694,12 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
                         
                         MPI_File_write_shared(out, buf, length, MPI_CHAR, &status);
                         delete buf;
+                       
+                       if (template == "self") {  
+                               Sequence temp = new Sequence(trimQuery.getName(), trimQuery.getAligned());
+                               runFilter(temp);
+                               userTemplate.push_back(temp);
+                       }
                 }
                 
                 return trim;
@@ -780,10 +714,9 @@ Sequence* ChimeraSlayer::print(MPI_File& out, MPI_File& outAcc) {
  //***************************************************************************************************************
  int ChimeraSlayer::getChimeras(Sequence* query) {
         try {
-               if (trimChimera) {
-                       trimQuery = new Sequence(query->getName(), query->getAligned());
-                       printResults.trimQuery = *trimQuery; 
-               }
+               
+               trimQuery.setName(query->getName()); trimQuery.setAligned(query->getAligned());
+               printResults.trimQuery = trimQuery; 
                 
                 chimeraFlags = "no";
                 printResults.flag = "no";
@@ -797,7 +730,7 @@ int ChimeraSlayer::getChimeras(Sequence* query) {
                 //you must create a template
                 vector<Sequence*> thisTemplate;
                 if (templateFileName != "self") { thisTemplate = templateSeqs; }
-               else { thisTemplate = getTemplate(query); } //fills this template and creates the databases
+               else { getTemplate(query);  thisTemplate = userTemplate; } //fills this template and creates the databases
                 
                 if (m->control_pressed) {  return 0;  }
                 
diff --git a/chimeraslayer.h b/chimeraslayer.h

index 3e76297b038b9eeafab2c04e5428a2091814a07d..cac96eb158a721ac5fe3d5c68a82d61979fd99e8 100644 (file)
--- a/chimeraslayer.h
+++ b/chimeraslayer.h
@@ -23,7 +23,7 @@ class ChimeraSlayer : public Chimera {
         
         public:
                 ChimeraSlayer(string, string, bool, string, int, int, int, int, float, int, int, int, int, int, int, int, int, bool);
-               ChimeraSlayer(string, string, bool, string, string, string, int, int, int, int, float, int, int, int, int, int, int, int, int, bool);
+               ChimeraSlayer(string, string, bool, string, string, int, int, int, int, float, int, int, int, int, int, int, int, int, bool);
  
                 ~ChimeraSlayer();
                 
@@ -41,16 +41,18 @@ class ChimeraSlayer : public Chimera {
                 
         private:
                 Sequence* querySeq;
-               Sequence* trimQuery;
+               Sequence trimQuery;
                 DeCalculator* decalc;
                 map<int, int>  spotMap;
                 Database* databaseRight;
                 Database* databaseLeft;
-               map<string, vector<string> > nameMapRank;  //sequence name to rank so you can construct a template of the abundant sequences if the user uses itself as template
+               vector<Sequence*> userTemplate;  //when template=self, the query file is sorted from most abundance to least abundant
+                                                                                //userTemplate grows as the query file is processed by adding sequences that are not chimeric
+               set<string> namesOfChimericSeqs; //only used when template=self
                 
                 vector<data_struct>  chimeraResults;
                 data_results printResults;
-               string chimeraFlags, searchMethod, fastafile, includeAbunds;
+               string chimeraFlags, searchMethod, fastafile;
                 bool realign, trimChimera;
                 int window, numWanted, kmerSize, match, misMatch, minSim, minCov, minBS, minSNP, parents, iters, increment;
                 float divR;
@@ -59,8 +61,8 @@ class ChimeraSlayer : public Chimera {
                 void printBlock(data_results, data_results, bool, bool, string, ostream&);
                 string getBlock(data_struct, string);
                 string getBlock(data_results, data_results, bool, bool, string);
-               int readNameFile(string);
-               vector<Sequence*> getTemplate(Sequence*);
+               //int readNameFile(string);
+               int getTemplate(Sequence*);
                 
  };
  
diff --git a/chimeraslayercommand.cpp b/chimeraslayercommand.cpp

index 2e897988a6d5538dbf7ef4e0ebd26564aa174fd8..1a9c1fb5ca3d56896e605803bd98d472428f9f01 100644 (file)
--- a/chimeraslayercommand.cpp
+++ b/chimeraslayercommand.cpp
@@ -26,7 +26,6 @@ vector<string> ChimeraSlayerCommand::setParameters(){
                 CommandParameter pminsnp("minsnp", "Number", "", "100", "", "", "",false,false); parameters.push_back(pminsnp);
                 CommandParameter pminbs("minbs", "Number", "", "90", "", "", "",false,false); parameters.push_back(pminbs);
                 CommandParameter psearch("search", "Multiple", "kmer-blast-distance", "distance", "", "", "",false,false); parameters.push_back(psearch);
-               CommandParameter pinclude("include", "Multiple", "greater-greaterequal-all", "greater", "", "", "",false,false); parameters.push_back(pinclude);
                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
                 CommandParameter prealign("realign", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(prealign);
                 CommandParameter ptrim("trim", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(ptrim);
@@ -59,7 +58,6 @@ string ChimeraSlayerCommand::getHelpString(){
                 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
                 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
                 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
-               helpString += "The include parameter is used when template=self and allows you to choose which sequences will make up the \"template\". Options are greater, greaterequal and all, default=greater, meaning sequences with greater abundance than the query sequence. \n";
                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
  #ifdef USE_MPI
                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
@@ -282,9 +280,6 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(string option)  {
                         m->setProcessors(temp);
                         convert(temp, processors);
                         
-                       includeAbunds = validParameter.validFile(parameters, "include", false);         if (includeAbunds == "not found") { includeAbunds = "greater"; }
-                       if ((includeAbunds != "greater") && (includeAbunds != "greaterequal") && (includeAbunds != "all")) { includeAbunds = "greater"; m->mothurOut("Invalid include setting. options are greater, greaterequal or all. using greater."); m->mothurOutEndLine(); }
-                       
                         temp = validParameter.validFile(parameters, "ksize", false);                    if (temp == "not found") { temp = "7"; }
                         convert(temp, ksize);
                                                 
@@ -359,10 +354,11 @@ int ChimeraSlayerCommand::execute(){
                         if (templatefile != "self") { //you want to run slayer with a refernce template
                                 chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, trim, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign);     
                         }else {
+                               if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; }
+                               string nameFile = "";
                                 if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one
-                                       chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, trim, nameFileNames[s], search, includeAbunds, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign);    
+                                       nameFile = nameFileNames[s];
                                 }else {
-                                       
                                         m->mothurOutEndLine(); m->mothurOut("No namesfile given, running unique.seqs command to generate one."); m->mothurOutEndLine(); m->mothurOutEndLine();
                                         
                                         //use unique.seqs to create new name and fastafile
@@ -379,11 +375,18 @@ int ChimeraSlayerCommand::execute(){
                                         
                                         m->mothurOut("/******************************************/"); m->mothurOutEndLine(); 
                                         
-                                       string nameFile = filenames["name"][0];
+                                       nameFile = filenames["name"][0];
                                         fastaFileNames[s] = filenames["fasta"][0];
-                       
-                                       chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, trim, nameFile, search, includeAbunds, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign);    
                                 }
+                               
+                               //sort fastafile by abundance, returns new sorted fastafile name
+                               m->mothurOut("Sorting fastafile according to abundance..."); cout.flush(); 
+                               fastaFileNames[s] = sortFastaFile(fastaFileNames[s], nameFile);
+                               m->mothurOut("Done."); m->mothurOutEndLine();
+                               
+                               if (m->control_pressed) {  for (int j = 0; j < outputNames.size(); j++) {       remove(outputNames[j].c_str()); }  return 0;    }
+
+                               chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, trim, nameFile, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign);   
                         }
                                 
                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it                               
@@ -935,6 +938,57 @@ int ChimeraSlayerCommand::divideInHalf(Sequence querySeq, string& leftQuery, str
                 exit(1);
         }
  }
+/**************************************************************************************************/
+
+string ChimeraSlayerCommand::sortFastaFile(string fastaFile, string nameFile) {
+       try {
+               
+               //read through fastafile and store info
+               map<string, string> seqs;
+               ifstream in;
+               m->openInputFile(fastaFile, in);
+               
+               while (!in.eof()) {
+                       
+                       if (m->control_pressed) { in.close(); return ""; }
+                       
+                       Sequence seq(in); m->gobble(in);
+                       seqs[seq.getName()] = seq.getAligned();
+               }
+               
+               in.close();
+               
+               //read namefile
+               vector<seqPriorityNode> nameMapCount;
+               int error = m->readNames(nameFile, nameMapCount, seqs);
+               
+               if (m->control_pressed) { return ""; }
+               
+               if (error == 1) { m->control_pressed = true; return ""; }
+               if (seqs.size() != nameMapCount.size()) { m->mothurOut( "The number of sequences in your fastafile does not match the number of sequences in your namefile, aborting."); m->mothurOutEndLine(); m->control_pressed = true; return ""; }
+
+               sort(nameMapCount.begin(), nameMapCount.end(), compareSeqPriorityNodes);
+               
+               string newFasta = fastaFile + ".temp";
+               ofstream out;
+               m->openOutputFile(newFasta, out);
+               
+               //print new file in order of
+               for (int i = 0; i < nameMapCount.size(); i++) {
+                       out << ">" << nameMapCount[i].name << endl << nameMapCount[i].seq << endl;
+               }
+               out.close();
+               
+               rename(newFasta.c_str(), fastaFile.c_str());
+                               
+               return fastaFile;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ChimeraSlayerCommand", "sortFastaFile");
+               exit(1);
+       }
+}
  
  /**************************************************************************************************/
  
diff --git a/chimeraslayercommand.h b/chimeraslayercommand.h

index 02ebe722a522d763c309bab0ea2215f0ceac6f85..6ca0310d109c117cf5205ac0725e79338540676a 100644 (file)
--- a/chimeraslayercommand.h
+++ b/chimeraslayercommand.h
@@ -14,7 +14,6 @@
  #include "command.hpp"
  #include "chimera.h"
  
-
  /***********************************************************/
  
  class ChimeraSlayerCommand : public Command {
@@ -45,13 +44,14 @@ private:
         int driver(linePair*, string, string, string, string);
         int createProcesses(string, string, string, string);
         int divideInHalf(Sequence, string&, string&);
+       string sortFastaFile(string, string);
                 
         #ifdef USE_MPI
         int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long int>&);
         #endif
  
         bool abort, realign, trim, trimera;
-       string fastafile, templatefile, outputDir, search, namefile, includeAbunds;
+       string fastafile, templatefile, outputDir, search, namefile;
         int processors, window, iters, increment, numwanted, ksize, match, mismatch, parents, minSimilarity, minCoverage, minBS, minSNP, numSeqs, templateSeqsLength;
         float divR;
         Chimera* chimera;
diff --git a/mothur.h b/mothur.h

index 190254a96e4c7d7830f8405035933efda4b003ea..53074eb1e146f28af23a87e6888d57bfaa5184fa 100644 (file)
--- a/mothur.h
+++ b/mothur.h
@@ -125,6 +125,15 @@ struct distlinePair {
         int end;
         
  };
+/************************************************************/
+struct seqPriorityNode {
+       int numIdentical;
+       string seq;
+       string name;
+       seqPriorityNode() {}
+       seqPriorityNode(int n, string s, string nm) : numIdentical(n), seq(s), name(nm) {}
+       ~seqPriorityNode() {}
+};
  /***************************************************************/
  struct spearmanRank {
         string name;
@@ -138,6 +147,11 @@ inline bool compareSpearman(spearmanRank left, spearmanRank right){
         return (left.score > right.score);      
  } 
  //********************************************************************************************************************
+//sorts highest to lowest
+inline bool compareSeqPriorityNodes(seqPriorityNode left, seqPriorityNode right){
+       return (left.numIdentical > right.numIdentical);        
+} 
+//********************************************************************************************************************
  //sorts lowest to highest
  inline bool compareSpearmanReverse(spearmanRank left, spearmanRank right){
         return (left.score < right.score);      
diff --git a/mothurout.cpp b/mothurout.cpp

index 8fde2fd105d2b100715cc244a6d9a16bbd9eb353..65296221759fa879e591bb0967b0f20872106adc 100644 (file)
--- a/mothurout.cpp
+++ b/mothurout.cpp
@@ -1217,6 +1217,42 @@ map<string, int> MothurOut::readNames(string namefile) {
                 exit(1);
         }
  }
+/**********************************************************************************************************************/
+int MothurOut::readNames(string namefile, vector<seqPriorityNode>& nameVector, map<string, string>& fastamap) { 
+       try {
+               int error = 0;
+               
+               //open input file
+               ifstream in;
+               openInputFile(namefile, in);
+               
+               while (!in.eof()) {
+                       if (control_pressed) { break; }
+                       
+                       string firstCol, secondCol;
+                       in >> firstCol >> secondCol; gobble(in);
+                       
+                       int num = getNumNames(secondCol);
+                       
+                       map<string, string>::iterator it = fastamap.find(firstCol);
+                       if (it == fastamap.end()) {
+                               error = 1;
+                               mothurOut("[ERROR]: " + firstCol + " is not in your fastafile, but is in your namesfile, please correct."); mothurOutEndLine();
+                       }else {
+                               seqPriorityNode temp(num, it->second, firstCol);
+                               nameVector.push_back(temp);
+                       }
+               }
+               in.close();
+               
+               return error;
+               
+       }
+       catch(exception& e) {
+               errorOut(e, "MothurOut", "readNames");
+               exit(1);
+       }
+}
  
  /***********************************************************************/
  
diff --git a/mothurout.h b/mothurout.h

index 8138f28e46115b8d527270a80d904e68ebbc322a..9446bf9bd5c2b3c150b02944c7d8f87aeef95dca 100644 (file)
--- a/mothurout.h
+++ b/mothurout.h
@@ -65,6 +65,7 @@ class MothurOut {
                 void gobble(istream&);
                 void gobble(istringstream&);
                 map<string, int> readNames(string);
+               int readNames(string, vector<seqPriorityNode>&, map<string, string>&);
                 
                 //searchs and checks
                 bool checkReleaseVersion(ifstream&, string);
author	westcott <westcott>
	Mon, 18 Apr 2011 17:55:59 +0000 (17:55 +0000)
committer	westcott <westcott>
	Mon, 18 Apr 2011 17:55:59 +0000 (17:55 +0000)
Mothur.xcodeproj/project.pbxproj		patch \| blob \| history
chimeraslayer.cpp		patch \| blob \| history
chimeraslayer.h		patch \| blob \| history
chimeraslayercommand.cpp		patch \| blob \| history
chimeraslayercommand.h		patch \| blob \| history
mothur.h		patch \| blob \| history
mothurout.cpp		patch \| blob \| history
mothurout.h		patch \| blob \| history