fixed bug with shhh.flow from file path name in write functions, added "smart" featur...

author westcott <westcott>

Thu, 19 Jan 2012 12:44:14 +0000 (12:44 +0000)

committer westcott <westcott>

Thu, 19 Jan 2012 12:44:14 +0000 (12:44 +0000)
author westcott <westcott>
Thu, 19 Jan 2012 12:44:14 +0000 (12:44 +0000)
committer westcott <westcott>
Thu, 19 Jan 2012 12:44:14 +0000 (12:44 +0000)
diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj

index c7c7cee1ad0c4a9ad5b1e9bfb02ba09c6db0b3fb..99454a9b47e4150e39ab5d35da76ec5e93ff5244 100644 (file)
--- a/Mothur.xcodeproj/project.pbxproj
+++ b/Mothur.xcodeproj/project.pbxproj
@@ -2316,8 +2316,8 @@
                                 GCC_OPTIMIZATION_LEVEL = 3;
                                 GCC_PREPROCESSOR_DEFINITIONS = (
                                         "MOTHUR_FILES=\"\\\"../release\\\"\"",
-                                       "VERSION=\"\\\"1.22.0\\\"\"",
-                                       "RELEASE_DATE=\"\\\"10/12/2011\\\"\"",
+                                       "VERSION=\"\\\"1.23.0\\\"\"",
+                                       "RELEASE_DATE=\"\\\"1/9/2012\\\"\"",
                                 );
                                 "GCC_VERSION[arch=*]" = "";
                                 GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
diff --git a/bayesian.cpp b/bayesian.cpp

index b1f2c4cf422656bb603f1691b67b79b57305490e..f7ea6e4351868a20a191169b995e94faff6fa053 100644 (file)
--- a/bayesian.cpp
+++ b/bayesian.cpp
@@ -12,12 +12,13 @@
  #include "phylosummary.h"
  #include "referencedb.h"
  /**************************************************************************************************/
-Bayesian::Bayesian(string tfile, string tempFile, string method, int ksize, int cutoff, int i, int tid) : 
-Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i)  {
+Bayesian::Bayesian(string tfile, string tempFile, string method, int ksize, int cutoff, int i, int tid, bool f) : 
+Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i) {
         try {
                 ReferenceDB* rdb = ReferenceDB::getInstance();
                 
                 threadID = tid;
+               flip = f;
                 string baseName = tempFile;
                         
                 if (baseName == "saved") { baseName = rdb->getSavedReference(); }
@@ -78,13 +79,14 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i)  {
                         if (tfile == "saved") { 
                                 m->mothurOutEndLine();  m->mothurOut("Using probabilties from " + rdb->getSavedTaxonomy() + " that are saved in memory...    ");        cout.flush();; 
                                 wordGenusProb = rdb->wordGenusProb;
+                               WordPairDiffArr = rdb->WordPairDiffArr;
                         }else {
                                 m->mothurOut("Reading template probabilities...     "); cout.flush();
                                 readProbFile(probFileTest, probFileTest2, probFileName, probFileName2);
                         }       
                         
                         //save probabilities
-                       if (rdb->save) { rdb->wordGenusProb = wordGenusProb; }
+                       if (rdb->save) { rdb->wordGenusProb = wordGenusProb; rdb->WordPairDiffArr = WordPairDiffArr; }
                 }else{
                 
                         //create search database and names vector
@@ -108,11 +110,12 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i)  {
                         
                                 //initialze probabilities
                                 wordGenusProb.resize(numKmers);
+                               WordPairDiffArr.resize(numKmers);
                         //cout << numKmers << '\t' << genusNodes.size() << endl;
                                 for (int j = 0; j < wordGenusProb.size(); j++) {        wordGenusProb[j].resize(genusNodes.size());             }
                         //cout << numKmers << '\t' << genusNodes.size() << endl;        
-                               //ofstream out;
-                               //ofstream out2;
+                               ofstream out;
+                               ofstream out2;
                                 
                                 #ifdef USE_MPI
                                         int pid;
@@ -122,17 +125,17 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i)  {
                                 #endif
  
                                 
-                               //m->openOutputFile(probFileName, out);
+                               m->openOutputFile(probFileName, out);
                                 
                                 //output mothur version
-                               //out << "#" << m->getVersion() << endl;
+                               out << "#" << m->getVersion() << endl;
                                 
-                               //out << numKmers << endl;
+                               out << numKmers << endl;
                                 
-                               //m->openOutputFile(probFileName2, out2);
+                               m->openOutputFile(probFileName2, out2);
                                 
                                 //output mothur version
-                               //out2 << "#" << m->getVersion() << endl;
+                               out2 << "#" << m->getVersion() << endl;
                                 
                                 #ifdef USE_MPI
                                         }
@@ -149,7 +152,7 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i)  {
                                                 if (pid == 0) {  
                                         #endif
  
-                                       //out << i << '\t';
+                                       out << i << '\t';
                                         
                                         #ifdef USE_MPI
                                                 }
@@ -168,7 +171,9 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i)  {
                                         
                                         //probabilityInTemplate = (# of seqs with that word in template + 0.50) / (total number of seqs in template + 1);
                                         float probabilityInTemplate = (seqsWithWordi.size() + 0.50) / (float) (names.size() + 1);
-                                       
+                                       diffPair tempProb(log(probabilityInTemplate), 0.0);
+                                       WordPairDiffArr[i] = tempProb;
+                                               
                                         int numNotZero = 0;
                                         for (int k = 0; k < genusNodes.size(); k++) {
                                                 //probabilityInThisTaxonomy = (# of seqs with that word in this taxonomy + probabilityInTemplate) / (total number of seqs in this taxonomy + 1);
@@ -184,7 +189,7 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i)  {
                                                                 if (pid == 0) {  
                                                         #endif
  
-                                                       //out << k << '\t' << wordGenusProb[i][k] << '\t'; 
+                                                       out << k << '\t' << wordGenusProb[i][k] << '\t' ; 
                                                         
                                                         #ifdef USE_MPI
                                                                 }
@@ -200,8 +205,8 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i)  {
                                                 if (pid == 0) {  
                                         #endif
                                         
-                                       //out << endl;
-                                       //out2 << probabilityInTemplate << '\t' << numNotZero << endl;
+                                       out << endl;
+                                       out2 << probabilityInTemplate << '\t' << numNotZero << '\t' << log(probabilityInTemplate) << endl;
                                         
                                         #ifdef USE_MPI
                                                 }
@@ -214,8 +219,8 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i)  {
                                         if (pid == 0) {  
                                 #endif
                                 
-                               //out.close();
-                               //out2.close();
+                               out.close();
+                               out2.close();
                                 
                                 #ifdef USE_MPI
                                         }
@@ -228,10 +233,15 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i)  {
                                 phyloTree = new PhyloTree(phyloTreeTest, phyloTreeName);
                                 
                                 //save probabilities
-                               if (rdb->save) { rdb->wordGenusProb = wordGenusProb; }
+                               if (rdb->save) { rdb->wordGenusProb = wordGenusProb; rdb->WordPairDiffArr = WordPairDiffArr; }
                         }
                 }
-       
+               
+               generateWordPairDiffArr();
+               
+               //save probabilities
+               if (rdb->save) { rdb->wordGenusProb = wordGenusProb; rdb->WordPairDiffArr = WordPairDiffArr; }
+               
                 m->mothurOut("DONE."); m->mothurOutEndLine();
                 m->mothurOut("It took " + toString(time(NULL) - start) + " seconds get probabilities. "); m->mothurOutEndLine();
         }
@@ -258,13 +268,13 @@ string Bayesian::getTaxonomy(Sequence* seq) {
         try {
                 string tax = "";
                 Kmer kmer(kmerSize);
+               flipped = false;
                 
                 //get words contained in query
                 //getKmerString returns a string where the index in the string is hte kmer number 
                 //and the character at that index can be converted to be the number of times that kmer was seen
-               
                 string queryKmerString = kmer.getKmerString(seq->getUnaligned()); 
-
+               
                 vector<int> queryKmers;
                 for (int i = 0; i < queryKmerString.length()-1; i++) {  // the -1 is to ignore any kmer with an N in it
                         if (queryKmerString[i] != '!') { //this kmer is in the query
@@ -272,7 +282,22 @@ string Bayesian::getTaxonomy(Sequence* seq) {
                         }
                 }
                 
-               if (queryKmers.size() == 0) {  m->mothurOut(seq->getName() + "is bad."); m->mothurOutEndLine(); return "bad seq"; }
+               //if user wants to test reverse compliment and its reversed use that instead
+               if (flip) {     
+                       if (isReversed(queryKmers)) { 
+                               flipped = true;
+                               seq->reverseComplement(); 
+                               queryKmerString = kmer.getKmerString(seq->getUnaligned()); 
+                               queryKmers.clear();
+                               for (int i = 0; i < queryKmerString.length()-1; i++) {  // the -1 is to ignore any kmer with an N in it
+                                       if (queryKmerString[i] != '!') { //this kmer is in the query
+                                               queryKmers.push_back(i);
+                                       }
+                               }
+                       }  
+               }
+               
+               if (queryKmers.size() == 0) {  m->mothurOut(seq->getName() + "is bad."); m->mothurOutEndLine(); simpleTax = "unknown;";  return "unknown;"; }
                 
                 
                 int index = getMostProbableTaxonomy(queryKmers);
@@ -283,7 +308,7 @@ string Bayesian::getTaxonomy(Sequence* seq) {
                 int numToSelect = queryKmers.size() / 8;
         
                 tax = bootstrapResults(queryKmers, index, numToSelect);
-                               
+               
                 return tax;     
         }
         catch(exception& e) {
@@ -366,7 +391,8 @@ string Bayesian::bootstrapResults(vector<int> kmers, int tax, int numToSelect) {
                                 seqTax = phyloTree->get(seqTax.parent);
                 }
                 
-               if (confidenceTax == "") { confidenceTax = "unclassified;"; simpleTax = "unclassified;"; }
+               if (confidenceTax == "") { confidenceTax = "unknown;"; simpleTax = "unknown;";  }
+       
                 return confidenceTax;
                 
         }
@@ -412,6 +438,46 @@ int Bayesian::getMostProbableTaxonomy(vector<int> queryKmer) {
                 exit(1);
         }
  }
+//********************************************************************************************************************
+//if it is more probable that the reverse compliment kmers are in the template, then we assume the sequence is reversed.
+bool Bayesian::isReversed(vector<int>& queryKmers){
+       try{
+               bool reversed = false;
+               float prob = 0;
+               float reverseProb = 0;
+                
+        for (int i = 0; i < queryKmers.size(); i++){
+            int kmer = queryKmers[i];
+            if (kmer >= 0){
+                prob += WordPairDiffArr[kmer].prob;
+                               reverseProb += WordPairDiffArr[kmer].reverseProb;
+            }
+        }
+               
+        if (reverseProb > prob){ reversed = true; }
+       
+               return reversed;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Bayesian", "isReversed");
+               exit(1);
+       }
+}
+//********************************************************************************************************************
+int Bayesian::generateWordPairDiffArr(){
+       try{
+               Kmer kmer(kmerSize);
+               for (int i = 0; i < WordPairDiffArr.size(); i++) {
+                       int reversedWord = kmer.getReverseKmerNumber(i);
+                       WordPairDiffArr[i].reverseProb = WordPairDiffArr[reversedWord].prob;
+               }
+               
+               return 0;
+       }catch(exception& e) {
+               m->errorOut(e, "Bayesian", "generateWordPairDiffArr");
+               exit(1);
+       }
+}
  /*************************************************************************************************
  map<string, int> Bayesian::parseTaxMap(string newTax) {
         try{
@@ -515,7 +581,8 @@ void Bayesian::readProbFile(ifstream& in, ifstream& inNum, string inName, string
                         int kmer, name;  
                         vector<int> numbers; numbers.resize(numKmers);
                         float prob;
-                       vector<float> zeroCountProb; zeroCountProb.resize(numKmers);    
+                       vector<float> zeroCountProb; zeroCountProb.resize(numKmers);
+                       WordPairDiffArr.resize(numKmers);
                         
                         //read version
                         length = positions[1] - positions[0];
@@ -537,7 +604,10 @@ void Bayesian::readProbFile(ifstream& in, ifstream& inNum, string inName, string
                                 delete buf4;
  
                                 istringstream iss (tempBuf,istringstream::in);
-                               iss >> zeroCountProb[i] >> numbers[i];  
+                               float probTemp;
+                               iss >> zeroCountProb[i] >> numbers[i] >> probTemp; 
+                               WordPairDiffArr[i].prob = tempProb;
+
                         }
                         
                         MPI_File_close(&inMPI);
@@ -585,13 +655,16 @@ void Bayesian::readProbFile(ifstream& in, ifstream& inNum, string inName, string
                         int kmer, name, count;  count = 0;
                         vector<int> num; num.resize(numKmers);
                         float prob;
-                       vector<float> zeroCountProb; zeroCountProb.resize(numKmers);            
+                       vector<float> zeroCountProb; zeroCountProb.resize(numKmers);    
+                       WordPairDiffArr.resize(numKmers);
                         
                         //read version
                         string line2 = m->getline(inNum); m->gobble(inNum);
+                       float probTemp;
                 //cout << threadID << '\t' << line2 << '\t' << this << endl;    
                         while (inNum) {
-                               inNum >> zeroCountProb[count] >> num[count];  
+                               inNum >> zeroCountProb[count] >> num[count] >> probTemp; 
+                               WordPairDiffArr[count].prob = probTemp;
                                 count++;
                                 m->gobble(inNum);
                                 //cout << threadID << '\t' << count << endl;
diff --git a/bayesian.h b/bayesian.h

index 1cf5145132eab009b9d807f69de655fc26d59a40..7c884337495dbb1e0213d1171a54ca20e0eb932c 100644 (file)
--- a/bayesian.h
+++ b/bayesian.h
@@ -18,7 +18,7 @@
  class Bayesian : public Classify {
         
  public:
-       Bayesian(string, string, string, int, int, int, int);
+       Bayesian(string, string, string, int, int, int, int, bool);
         ~Bayesian();
         
         string getTaxonomy(Sequence*);
@@ -30,12 +30,17 @@ private:
         vector<int> genusTotals;
         vector<int> genusNodes;  //indexes in phyloTree where genus' are located
         
+       vector<diffPair> WordPairDiffArr; 
+       
         int kmerSize, numKmers, confidenceThreshold, iters;
         
         string bootstrapResults(vector<int>, int, int);
         int getMostProbableTaxonomy(vector<int>);
         void readProbFile(ifstream&, ifstream&, string, string);
         bool checkReleaseDate(ifstream&, ifstream&, ifstream&, ifstream&);
+       bool isReversed(vector<int>&);
+       vector<int> createWordIndexArr(Sequence*);
+       int generateWordPairDiffArr();
         
  };
  
diff --git a/binsequencecommand.cpp b/binsequencecommand.cpp

index 4977cad1f769690e300013e89c936eefb4228e84..2117daff4f2558e26bcf0008666f87bee0180f4d 100644 (file)
--- a/binsequencecommand.cpp
+++ b/binsequencecommand.cpp
@@ -167,7 +167,7 @@ BinSeqCommand::BinSeqCommand(string option) {
                         }
                         
                         namesfile = validParameter.validFile(parameters, "name", true);
-                       if (namesfile == "not open") { abort = true; }  
+                       if (namesfile == "not open") { namesfile = ""; abort = true; }  
                         else if (namesfile == "not found") { namesfile = ""; }
                         else {  m->setNameFile(namesfile); }
  
@@ -176,6 +176,11 @@ BinSeqCommand::BinSeqCommand(string option) {
                         else if (groupfile == "not found") { groupfile = ""; }
                         else { m->setGroupFile(groupfile); }
                         
+                       if (namesfile == ""){
+                               vector<string> files; files.push_back(fastafile); 
+                               parser.getNameFile(files);
+                       }
+                       
                 }
         }
         catch(exception& e) {
diff --git a/classify.cpp b/classify.cpp

index c92d9fdc6526788df19d9e6f487ffb641bbaca44..3bf0d572f88d0ad7ffd76989d71f10807e8cc659 100644 (file)
--- a/classify.cpp
+++ b/classify.cpp
@@ -61,7 +61,7 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me
                                         names.push_back(temp.getName());
                                         database->addSequence(temp);    
                                 }
-//                             database->generateDB();
+                               database->generateDB();
                         }else if ((method == "kmer") && (!needToGenerate)) {    
                                 ifstream kmerFileTest(kmerDBName.c_str());
                                 database->readKmerDB(kmerFileTest);     
@@ -150,7 +150,7 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me
                                         }
                                 }
                                 
-//                             database->generateDB();
+                               database->generateDB();
                                 MPI_File_close(&inMPI);
                                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
                 #else
@@ -200,7 +200,7 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me
                                 }
                                 fastaFile.close();
  
-//                             database->generateDB();
+                               database->generateDB();
                                 
                         }else if ((method == "kmer") && (!needToGenerate)) {    
                                 ifstream kmerFileTest(kmerDBName.c_str());
@@ -223,9 +223,9 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me
                         database->setNumSeqs(names.size());
                         
                         //sanity check
-                       //bool okay = phyloTree->ErrorCheck(names);
+                       bool okay = phyloTree->ErrorCheck(names);
                         
-                       //if (!okay) { m->control_pressed = true; }
+                       if (!okay) { m->control_pressed = true; }
                         
                         m->mothurOut("DONE."); m->mothurOutEndLine();
                         m->mothurOut("It took " + toString(time(NULL) - start) + " seconds generate search database. "); m->mothurOutEndLine();
@@ -238,7 +238,7 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me
         }
  }
  /**************************************************************************************************/
-Classify::Classify() {         m = MothurOut::getInstance();   database = NULL;        }
+Classify::Classify() {         m = MothurOut::getInstance();   database = NULL;        flipped=false; }
  /**************************************************************************************************/
  
  int Classify::readTaxonomy(string file) {
diff --git a/classify.h b/classify.h

index 2e209f562108121e8a95b3c4470252537813baed..4e0354782e5063e2a52268e88a1412b40db3c28e 100644 (file)
--- a/classify.h
+++ b/classify.h
@@ -30,6 +30,7 @@ public:
         virtual ~Classify(){};
         virtual string getTaxonomy(Sequence*) = 0;
         virtual string getSimpleTax()  { return simpleTax;      }
+       virtual bool getFlipped()  { return flipped;    }
         virtual void generateDatabaseAndNames(string, string, string, int, float, float, float, float);
         virtual void setDistName(string s) {} //for knn, so if distance method is selected with knn you can create the smallest distance file in the right place.
         
@@ -45,6 +46,7 @@ protected:
         string taxFile, templateFile, simpleTax;
         vector<string> names;
         int threadID;
+       bool flip, flipped;
         
         int readTaxonomy(string);
         vector<string> parseTax(string);
diff --git a/classifyotucommand.cpp b/classifyotucommand.cpp

index e28961b74888ee5bdefb9eb022fd11afe328e07f..c8896375b016ef6c45462351a18e8e520bfe75c2 100644 (file)
--- a/classifyotucommand.cpp
+++ b/classifyotucommand.cpp
@@ -182,7 +182,7 @@ ClassifyOtuCommand::ClassifyOtuCommand(string option)  {
                         else if (refTaxonomy == "not open") { abort = true; }
         
                         namefile = validParameter.validFile(parameters, "name", true);
-                       if (namefile == "not open") { abort = true; }   
+                       if (namefile == "not open") { namefile = ""; abort = true; }    
                         else if (namefile == "not found") { namefile = ""; }
                         else { m->setNameFile(namefile); }
                         
@@ -214,6 +214,11 @@ ClassifyOtuCommand::ClassifyOtuCommand(string option)  {
                         
                         if ((cutoff < 51) || (cutoff > 100)) { m->mothurOut("cutoff must be above 50, and no greater than 100."); m->mothurOutEndLine(); abort = true;  }
                         
+                       if (namefile == ""){
+                               vector<string> files; files.push_back(taxfile);
+                               parser.getNameFile(files);
+                       }
+                       
                 }
         }
         catch(exception& e) {
diff --git a/classifyseqscommand.cpp b/classifyseqscommand.cpp

index 9ee7b5e946a7f5819cbac94bab7efe8c0cb71aef..328cd58f1f32bc265de857de4ede5aa557c2cd94 100644 (file)
--- a/classifyseqscommand.cpp
+++ b/classifyseqscommand.cpp
@@ -27,6 +27,7 @@ vector<string> ClassifySeqsCommand::setParameters(){
                 CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pmismatch);
                 CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "",false,false); parameters.push_back(pgapopen);
                 CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pgapextend);
+               //CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
                 CommandParameter pcutoff("cutoff", "Number", "", "0", "", "", "",false,true); parameters.push_back(pcutoff);
                 CommandParameter pprobs("probs", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pprobs);
                 CommandParameter piters("iters", "Number", "", "100", "", "", "",false,true); parameters.push_back(piters);
@@ -69,6 +70,7 @@ string ClassifySeqsCommand::getHelpString(){
                 helpString += "The cutoff parameter allows you to specify a bootstrap confidence threshold for your taxonomy.  The default is 0.\n";
                 helpString += "The probs parameter shuts off the bootstrapping results for the bayesian method. The default is true, meaning you want the bootstrapping to be shown.\n";
                 helpString += "The iters parameter allows you to specify how many iterations to do when calculating the bootstrap confidence score for your taxonomy with the bayesian method.  The default is 100.\n";
+               //helpString += "The flip parameter allows you shut off mothur's   The default is T.\n";
                 helpString += "The classify.seqs command should be in the following format: \n";
                 helpString += "classify.seqs(reference=yourTemplateFile, fasta=yourFastaFile, method=yourClassificationMethod, search=yourSearchmethod, ksize=yourKmerSize, taxonomy=yourTaxonomyFile, processors=yourProcessors) \n";
                 helpString += "Example classify.seqs(fasta=amazon.fasta, reference=core.filtered, method=knn, search=gotoh, ksize=8, processors=2)\n";
@@ -89,6 +91,7 @@ ClassifySeqsCommand::ClassifySeqsCommand(){
                 setParameters();
                 vector<string> tempOutNames;
                 outputTypes["taxonomy"] = tempOutNames;
+               outputTypes["accnos"] = tempOutNames;
                 outputTypes["taxsummary"] = tempOutNames;
                 outputTypes["matchdist"] = tempOutNames;
         }
@@ -126,6 +129,7 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
                         outputTypes["taxonomy"] = tempOutNames;
                         outputTypes["taxsummary"] = tempOutNames;
                         outputTypes["matchdist"] = tempOutNames;
+                       outputTypes["accnos"] = tempOutNames;
                         
                         //if the user changes the output directory command factory will send this info to us in the output parameter 
                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
@@ -440,15 +444,24 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
                         temp = validParameter.validFile(parameters, "probs", false);            if (temp == "not found"){       temp = "true";                  }
                         probs = m->isTrue(temp);
                         
+                       //temp = validParameter.validFile(parameters, "flip", false);                   if (temp == "not found"){       temp = "T";                             }
+                       //flip = m->isTrue(temp); 
+                       flip = true;
+                       
                         temp = validParameter.validFile(parameters, "iters", false);            if (temp == "not found") { temp = "100";                        }
                         m->mothurConvert(temp, iters); 
  
-
                         
                         if ((method == "bayesian") && (search != "kmer"))  { 
                                 m->mothurOut("The bayesian method requires the kmer search." + search + "will be disregarded." ); m->mothurOutEndLine();
                                 search = "kmer";
                         }
+                       
+                       if (namefileNames.size() == 0){
+                               vector<string> files; files.push_back(fastaFileNames[fastaFileNames.size()-1]); 
+                               parser.getNameFile(files);
+                       }
+                       
                 }
                 
         }
@@ -470,12 +483,12 @@ int ClassifySeqsCommand::execute(){
         try {
                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
                 
-               if(method == "bayesian"){       classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand());           }
+               if(method == "bayesian"){       classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand(), flip);             }
                 else if(method == "knn"){       classify = new Knn(taxonomyFileName, templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, numWanted, rand());                               }
                 else {
                         m->mothurOut(search + " is not a valid method option. I will run the command using bayesian.");
                         m->mothurOutEndLine();
-                       classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand());   
+                       classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand(), flip);     
                 }
                 
                 if (m->control_pressed) { delete classify; return 0; }
@@ -494,6 +507,7 @@ int ClassifySeqsCommand::execute(){
                 
                         if (outputDir == "") { outputDir += m->hasPath(fastaFileNames[s]); }
                         string newTaxonomyFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + "taxonomy";
+                       string newaccnosFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + "flip.accnos";
                         string tempTaxonomyFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "taxonomy.temp";
                         string taxSummary = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + "tax.summary";
                         
@@ -503,6 +517,7 @@ int ClassifySeqsCommand::execute(){
                         }
                         
                         outputNames.push_back(newTaxonomyFile); outputTypes["taxonomy"].push_back(newTaxonomyFile);
+                       outputNames.push_back(newaccnosFile); outputTypes["accnos"].push_back(newaccnosFile);
                         outputNames.push_back(taxSummary);      outputTypes["taxsummary"].push_back(taxSummary);
                         
                         int start = time(NULL);
@@ -521,6 +536,7 @@ int ClassifySeqsCommand::execute(){
                                 MPI_File inMPI;
                                 MPI_File outMPINewTax;
                                 MPI_File outMPITempTax;
+                               MPI_File outMPIAcc;
                                                         
                                 int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
                                 int inMode=MPI_MODE_RDONLY; 
@@ -530,6 +546,9 @@ int ClassifySeqsCommand::execute(){
                                 
                                 char outTempTax[1024];
                                 strcpy(outTempTax, tempTaxonomyFile.c_str());
+                       
+                               char outAcc[1024];
+                               strcpy(outAcc, newaccnosFile.c_str());
                                 
                                 char inFileName[1024];
                                 strcpy(inFileName, fastaFileNames[s].c_str());
@@ -537,8 +556,9 @@ int ClassifySeqsCommand::execute(){
                                 MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
                                 MPI_File_open(MPI_COMM_WORLD, outNewTax, outMode, MPI_INFO_NULL, &outMPINewTax);
                                 MPI_File_open(MPI_COMM_WORLD, outTempTax, outMode, MPI_INFO_NULL, &outMPITempTax);
+                               MPI_File_open(MPI_COMM_WORLD, outAcc, outMode, MPI_INFO_NULL, &outMPIAcc);
                                 
-                               if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI);  MPI_File_close(&outMPINewTax);   MPI_File_close(&outMPITempTax);  delete classify;  return 0;  }
+                               if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI);  MPI_File_close(&outMPINewTax);  MPI_File_close(&outMPIAcc);   MPI_File_close(&outMPITempTax);  delete classify;  return 0;  }
                                 
                                 if (pid == 0) { //you are the root process 
                                         
@@ -557,9 +577,9 @@ int ClassifySeqsCommand::execute(){
                                         
                                 
                                         //align your part
-                                       driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPINewTax, outMPITempTax, MPIPos);
+                                       driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPINewTax, outMPITempTax, outMPIAcc, MPIPos);
                                         
-                                       if (m->control_pressed) {  outputTypes.clear(); MPI_File_close(&inMPI);  MPI_File_close(&outMPINewTax);   MPI_File_close(&outMPITempTax);  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);        } delete classify; return 0;  }
+                                       if (m->control_pressed) {  outputTypes.clear(); MPI_File_close(&inMPI);  MPI_File_close(&outMPINewTax); MPI_File_close(&outMPIAcc);   MPI_File_close(&outMPITempTax);  for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);        } delete classify; return 0;  }
                                         
                                         for (int i = 1; i < processors; i++) {
                                                 int done;
@@ -577,9 +597,9 @@ int ClassifySeqsCommand::execute(){
                                         
                                         
                                         //align your part
-                                       driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPINewTax, outMPITempTax, MPIPos);
+                                       driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPINewTax, outMPITempTax, outMPIAcc, MPIPos);
                                         
-                                       if (m->control_pressed) {  outputTypes.clear(); MPI_File_close(&inMPI);  MPI_File_close(&outMPINewTax);   MPI_File_close(&outMPITempTax);  delete classify; return 0;  }
+                                       if (m->control_pressed) {  outputTypes.clear(); MPI_File_close(&inMPI);  MPI_File_close(&outMPINewTax);  MPI_File_close(&outMPIAcc);  MPI_File_close(&outMPITempTax);  delete classify; return 0;  }
  
                                         int done = 0;
                                         MPI_Send(&done, 1, MPI_INT, 0, tag, MPI_COMM_WORLD); 
@@ -589,6 +609,7 @@ int ClassifySeqsCommand::execute(){
                                 MPI_File_close(&inMPI);
                                 MPI_File_close(&outMPINewTax);
                                 MPI_File_close(&outMPITempTax);
+                               MPI_File_close(&outMPIAcc); 
                                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
                                 
  #else
@@ -613,16 +634,19 @@ int ClassifySeqsCommand::execute(){
                         }
  #endif
                         if(processors == 1){
-                               numFastaSeqs = driver(lines[0], newTaxonomyFile, tempTaxonomyFile, fastaFileNames[s]);
+                               numFastaSeqs = driver(lines[0], newTaxonomyFile, tempTaxonomyFile, newaccnosFile, fastaFileNames[s]);
                         }else{
-                               numFastaSeqs = createProcesses(newTaxonomyFile, tempTaxonomyFile, fastaFileNames[s]); 
+                               numFastaSeqs = createProcesses(newTaxonomyFile, tempTaxonomyFile, newaccnosFile, fastaFileNames[s]); 
                         }
  #endif
+                       
+                       if (!m->isBlank(newaccnosFile)) { m->mothurOutEndLine(); m->mothurOut("[WARNING]: mothur suspects some of your sequences may be reversed, please check " + newaccnosFile + " for the list of the sequences."); m->mothurOutEndLine(); }
  
                 m->mothurOutEndLine();
                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to classify " + toString(numFastaSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine();
                 start = time(NULL);
-
+               
+               
  
                 #ifdef USE_MPI  
                         if (pid == 0) {  //this part does not need to be paralellized
@@ -744,6 +768,12 @@ int ClassifySeqsCommand::execute(){
                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
                 }
                 
+               current = "";
+               itTypes = outputTypes.find("accnos");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
+               }
+               
                 delete classify;
                 
                 return 0;
@@ -785,7 +815,7 @@ string ClassifySeqsCommand::addUnclassifieds(string tax, int maxlevel) {
  
  /**************************************************************************************************/
  
-int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile, string filename) {
+int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile, string accnos, string filename) {
         try {
                 
                 int num = 0;
@@ -802,7 +832,7 @@ int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile,
                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
                                 process++;
                         }else if (pid == 0){
-                               num = driver(lines[process], taxFileName + toString(getpid()) + ".temp", tempTaxFile + toString(getpid()) + ".temp", filename);
+                               num = driver(lines[process], taxFileName + toString(getpid()) + ".temp", tempTaxFile + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", filename);
  
                                 //pass numSeqs to parent
                                 ofstream out;
@@ -820,7 +850,7 @@ int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile,
                 }
                 
                 //parent does its part
-               num = driver(lines[0], taxFileName, tempTaxFile, filename);
+               num = driver(lines[0], taxFileName, tempTaxFile, accnos, filename);
                 
                 //force parent to wait until all the processes are done
                 for (int i=0;i<processIDS.size();i++) { 
@@ -851,7 +881,7 @@ int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile,
                         string extension = "";
                         if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
                         
-                       classifyData* tempclass = new classifyData(probs, method, templateFileName, taxonomyFileName, (taxFileName + extension), (tempTaxFile + extension), filename, search, kmerSize, iters, numWanted, m, lines[i]->start, lines[i]->end, match, misMatch, gapOpen, gapExtend, cutoff, i);
+                       classifyData* tempclass = new classifyData((accnos + extension), probs, method, templateFileName, taxonomyFileName, (taxFileName + extension), (tempTaxFile + extension), filename, search, kmerSize, iters, numWanted, m, lines[i]->start, lines[i]->end, match, misMatch, gapOpen, gapExtend, cutoff, i, flipThreshold);
                         pDataArray.push_back(tempclass);
                         
                         //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
@@ -861,7 +891,7 @@ int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile,
                 }
                 
                 //parent does its part
-               num = driver(lines[processors-1], taxFileName + toString(processors-1) + ".temp", tempTaxFile + toString(processors-1) + ".temp", filename);
+               num = driver(lines[processors-1], taxFileName + toString(processors-1) + ".temp", tempTaxFile + toString(processors-1) + ".temp", accnos + toString(processors-1) + ".temp", filename);
                 processIDS.push_back((processors-1));
                 
                 //Wait until all threads have terminated.
@@ -879,8 +909,10 @@ int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile,
                 for(int i=0;i<processIDS.size();i++){
                         appendTaxFiles((taxFileName + toString(processIDS[i]) + ".temp"), taxFileName);
                         appendTaxFiles((tempTaxFile + toString(processIDS[i]) + ".temp"), tempTaxFile);
+                       appendTaxFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
                         m->mothurRemove((m->getFullPathName(taxFileName) + toString(processIDS[i]) + ".temp"));
                         m->mothurRemove((m->getFullPathName(tempTaxFile) + toString(processIDS[i]) + ".temp"));
+                       m->mothurRemove((m->getFullPathName(accnos) + toString(processIDS[i]) + ".temp"));
                 }
                 
                 return num;
@@ -917,13 +949,16 @@ void ClassifySeqsCommand::appendTaxFiles(string temp, string filename) {
  
  //**********************************************************************************************************************
  
-int ClassifySeqsCommand::driver(linePair* filePos, string taxFName, string tempTFName, string filename){
+int ClassifySeqsCommand::driver(linePair* filePos, string taxFName, string tempTFName, string accnos, string filename){
         try {
                 ofstream outTax;
                 m->openOutputFile(taxFName, outTax);
                 
                 ofstream outTaxSimple;
                 m->openOutputFile(tempTFName, outTaxSimple);
+               
+               ofstream outAcc;
+               m->openOutputFile(accnos, outAcc);
         
                 ifstream inFASTA;
                 m->openInputFile(filename, inFASTA);
@@ -936,7 +971,11 @@ int ClassifySeqsCommand::driver(linePair* filePos, string taxFName, string tempT
                 int count = 0;
                 
                 while (!done) {
-                       if (m->control_pressed) { return 0; }
+                       if (m->control_pressed) { 
+                               inFASTA.close();
+                               outTax.close();
+                               outTaxSimple.close();
+                               outAcc.close(); return 0; }
                 
                         Sequence* candidateSeq = new Sequence(inFASTA); m->gobble(inFASTA);
                         
@@ -945,17 +984,20 @@ int ClassifySeqsCommand::driver(linePair* filePos, string taxFName, string tempT
                                 taxonomy = classify->getTaxonomy(candidateSeq);
                                 
                                 if (m->control_pressed) { delete candidateSeq; return 0; }
-
-                               if (taxonomy != "bad seq") {
-                                       //output confidence scores or not
-                                       if (probs) {
-                                               outTax << candidateSeq->getName() << '\t' << taxonomy << endl;
-                                       }else{
-                                               outTax << candidateSeq->getName() << '\t' << classify->getSimpleTax() << endl;
-                                       }
-                                       
-                                       outTaxSimple << candidateSeq->getName() << '\t' << classify->getSimpleTax() << endl;
+                               
+                               if (taxonomy == "unknown;") { m->mothurOut("[WARNING]: " + candidateSeq->getName() + " could not be classified. You can use the remove.lineage command with taxon=unknown; to remove such sequences."); m->mothurOutEndLine(); }
+                               
+                               //output confidence scores or not
+                               if (probs) {
+                                       outTax << candidateSeq->getName() << '\t' << taxonomy << endl;
+                               }else{
+                                       outTax << candidateSeq->getName() << '\t' << classify->getSimpleTax() << endl;
                                 }
+                               
+                               if (classify->getFlipped()) { outAcc << candidateSeq->getName() << endl; }
+                               
+                               outTaxSimple << candidateSeq->getName() << '\t' << classify->getSimpleTax() << endl;
+                               
                                 count++;
                         }
                         delete candidateSeq;
@@ -977,6 +1019,7 @@ int ClassifySeqsCommand::driver(linePair* filePos, string taxFName, string tempT
                 inFASTA.close();
                 outTax.close();
                 outTaxSimple.close();
+               outAcc.close();
                 
                 return count;
         }
@@ -987,10 +1030,11 @@ int ClassifySeqsCommand::driver(linePair* filePos, string taxFName, string tempT
  }
  //**********************************************************************************************************************
  #ifdef USE_MPI
-int ClassifySeqsCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& newFile, MPI_File& tempFile, vector<unsigned long long>& MPIPos){
+int ClassifySeqsCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& newFile, MPI_File& tempFile, MPI_File& accFile, vector<unsigned long long>& MPIPos){
         try {
                 MPI_Status statusNew; 
                 MPI_Status statusTemp; 
+               MPI_Status statusAcc; 
                 MPI_Status status; 
                 
                 int pid;
@@ -1018,29 +1062,40 @@ int ClassifySeqsCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File
                         if (candidateSeq->getName() != "") {
                                 taxonomy = classify->getTaxonomy(candidateSeq);
                                 
-                               if (taxonomy != "bad seq") {
-                                       //output confidence scores or not
-                                       if (probs) {
-                                               outputString =  candidateSeq->getName() + "\t" + taxonomy + "\n";
-                                       }else{
-                                               outputString =  candidateSeq->getName() + "\t" + classify->getSimpleTax() + "\n";
-                                       }
-                                       
-                                       int length = outputString.length();
-                                       char* buf2 = new char[length];
-                                       memcpy(buf2, outputString.c_str(), length);
+                               if (taxonomy == "unknown;") { m->mothurOut("[WARNING]: " + candidateSeq->getName() + " could not be classified. You can use the remove.lineage command with taxon=unknown; to remove such sequences."); m->mothurOutEndLine(); }
                                 
-                                       MPI_File_write_shared(newFile, buf2, length, MPI_CHAR, &statusNew);
-                                       delete buf2;
-
+                               //output confidence scores or not
+                               if (probs) {
+                                       outputString =  candidateSeq->getName() + "\t" + taxonomy + "\n";
+                               }else{
                                         outputString =  candidateSeq->getName() + "\t" + classify->getSimpleTax() + "\n";
-                                       length = outputString.length();
-                                       char* buf = new char[length];
-                                       memcpy(buf, outputString.c_str(), length);
+                               }
+                               
+                               int length = outputString.length();
+                               char* buf2 = new char[length];
+                               memcpy(buf2, outputString.c_str(), length);
+                               
+                               MPI_File_write_shared(newFile, buf2, length, MPI_CHAR, &statusNew);
+                               delete buf2;
+                               
+                               outputString =  candidateSeq->getName() + "\t" + classify->getSimpleTax() + "\n";
+                               length = outputString.length();
+                               char* buf = new char[length];
+                               memcpy(buf, outputString.c_str(), length);
                                 
-                                       MPI_File_write_shared(tempFile, buf, length, MPI_CHAR, &statusTemp);
-                                       delete buf;
+                               MPI_File_write_shared(tempFile, buf, length, MPI_CHAR, &statusTemp);
+                               delete buf;
+                               
+                               if (classify->getFlipped()) { 
+                                       outputString =  candidateSeq->getName() + "\n";
+                                       length = outputString.length();
+                                       char* buf3 = new char[length];
+                                       memcpy(buf3, outputString.c_str(), length);
+                                       
+                                       MPI_File_write_shared(accFile, buf3, length, MPI_CHAR, &statusAcc);
+                                       delete buf3;
                                 }
+                               
                         }                               
                         delete candidateSeq;
                         
diff --git a/classifyseqscommand.h b/classifyseqscommand.h

index 0e21a203caaa0c4a8166d7ecb08a685cd36f3169..0bf4a9154412080d28fa065ab33789279832fbeb 100644 (file)
--- a/classifyseqscommand.h
+++ b/classifyseqscommand.h
@@ -72,16 +72,16 @@ private:
         string fastaFileName, templateFileName, distanceFileName, namefile, search, method, taxonomyFileName, outputDir, groupfile;
         int processors, kmerSize, numWanted, cutoff, iters;
         float match, misMatch, gapOpen, gapExtend;
-       bool abort, probs, save;
+       bool abort, probs, save, flip;
         
-       int driver(linePair*, string, string, string);
+       int driver(linePair*, string, string, string, string);
         void appendTaxFiles(string, string);
-       int createProcesses(string, string, string); 
+       int createProcesses(string, string, string, string); 
         string addUnclassifieds(string, int);
         
         int MPIReadNamesFile(string);
         #ifdef USE_MPI
-       int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long long>&);
+       int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long long>&);
         #endif
  };
  
@@ -93,16 +93,17 @@ struct classifyData {
         string taxFName; 
         string tempTFName; 
         string filename;
-       string search, taxonomyFileName, templateFileName, method;
+       string search, taxonomyFileName, templateFileName, method, accnos;
         unsigned long long start;
         unsigned long long end;
         MothurOut* m;
         float match, misMatch, gapOpen, gapExtend;
         int count, kmerSize, threadID, cutoff, iters, numWanted;
-       bool probs;
+       bool probs, flip;
          
         classifyData(){}
-       classifyData(bool p, string me, string te, string tx, string a, string r, string f, string se, int ks, int i, int numW, MothurOut* mout, unsigned long long st, unsigned long long en, float ma, float misMa, float gapO, float gapE, int cut, int tid) {
+       classifyData(string acc, bool p, string me, string te, string tx, string a, string r, string f, string se, int ks, int i, int numW, MothurOut* mout, unsigned long long st, unsigned long long en, float ma, float misMa, float gapO, float gapE, int cut, int tid, bool fli) {
+               accnos = acc;
                 taxonomyFileName = tx;
                 templateFileName = te;
                 taxFName = a;
@@ -124,6 +125,7 @@ struct classifyData {
                 threadID = tid;
                 probs = p;
                 count = 0;
+               flip = fli;
         }
  };
  
@@ -141,6 +143,9 @@ static DWORD WINAPI MyClassThreadFunction(LPVOID lpParam){
                 ofstream outTaxSimple;
                 pDataArray->m->openOutputFile(pDataArray->tempTFName, outTaxSimple);
                 
+               ofstream outAcc;
+               pDataArray->m->openOutputFile(pDataArray->accnos, outAcc);
+               
                 ifstream inFASTA;
                 pDataArray->m->openInputFile(pDataArray->filename, inFASTA);
                 
@@ -157,12 +162,12 @@ static DWORD WINAPI MyClassThreadFunction(LPVOID lpParam){
                 
                 //make classify
                 Classify* myclassify;
-               if(pDataArray->method == "bayesian"){   myclassify = new Bayesian(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->cutoff, pDataArray->iters, pDataArray->threadID);           }
-               else if(pDataArray->method == "knn"){   myclassify = new Knn(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, pDataArray->numWanted, pDataArray->threadID);                           }
+               if(pDataArray->method == "bayesian"){   myclassify = new Bayesian(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->cutoff, pDataArray->iters, pDataArray->threadID, pDataArray->flip);         }
+               else if(pDataArray->method == "knn"){   myclassify = new Knn(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, pDataArray->numWanted, pDataArray->threadID, pDataArray->flipThreshold);                                }
                 else {
                         pDataArray->m->mothurOut(pDataArray->search + " is not a valid method option. I will run the command using bayesian.");
                         pDataArray->m->mothurOutEndLine();
-                       myclassify = new Bayesian(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->cutoff, pDataArray->iters, pDataArray->threadID);   
+                       myclassify = new Bayesian(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->cutoff, pDataArray->iters, pDataArray->threadID, pDataArray->flip); 
                 }
                 
                 if (pDataArray->m->control_pressed) { delete myclassify; return 0; }
@@ -180,16 +185,19 @@ static DWORD WINAPI MyClassThreadFunction(LPVOID lpParam){
                                 
                                 if (pDataArray->m->control_pressed) { delete candidateSeq; return 0; }
                                 
-                               if (taxonomy != "bad seq") {
-                                       //output confidence scores or not
-                                       if (pDataArray->probs) {
-                                               outTax << candidateSeq->getName() << '\t' << taxonomy << endl;
-                                       }else{
-                                               outTax << candidateSeq->getName() << '\t' << myclassify->getSimpleTax() << endl;
-                                       }
-                                       
-                                       outTaxSimple << candidateSeq->getName() << '\t' << myclassify->getSimpleTax() << endl;
+                               if (taxonomy == "unknown;") { pDataArray->m->mothurOut("[WARNING]: " + candidateSeq->getName() + " could not be classified. You can use the remove.lineage command with taxon=unknown; to remove such sequences."); pDataArray->m->mothurOutEndLine(); }
+
+                               //output confidence scores or not
+                               if (pDataArray->probs) {
+                                       outTax << candidateSeq->getName() << '\t' << taxonomy << endl;
+                               }else{
+                                       outTax << candidateSeq->getName() << '\t' << myclassify->getSimpleTax() << endl;
                                 }
+                                       
+                               outTaxSimple << candidateSeq->getName() << '\t' << myclassify->getSimpleTax() << endl;
+                                       
+                               if (myclassify->getFlipped()) { outAcc << candidateSeq->getName() << endl; }
+                               
                                 count++;
                         }
                         delete candidateSeq;
diff --git a/clusterfragmentscommand.cpp b/clusterfragmentscommand.cpp

index ad1bd81b4a9d4554751cfbbc5eb836e107e7b53d..772113baf83e9fbbd057794103f46ffa21c21fda 100644 (file)
--- a/clusterfragmentscommand.cpp
+++ b/clusterfragmentscommand.cpp
@@ -147,7 +147,7 @@ ClusterFragmentsCommand::ClusterFragmentsCommand(string option) {
                         // ...at some point should added some additional type checking...
                         namefile = validParameter.validFile(parameters, "name", true);
                         if (namefile == "not found") { namefile =  "";  }
-                       else if (namefile == "not open") { abort = true; }      
+                       else if (namefile == "not open") { namefile = ""; abort = true; }       
                         else {  readNameFile(); m->setNameFile(namefile); }
                         
                         string temp;
@@ -157,6 +157,11 @@ ClusterFragmentsCommand::ClusterFragmentsCommand(string option) {
                         temp = validParameter.validFile(parameters, "percent", false);          if (temp == "not found"){       temp = "0";                             }
                         m->mothurConvert(temp, percent);
                         
+                       if (namefile == "") {
+                               vector<string> files; files.push_back(fastafile);
+                               parser.getNameFile(files);
+                       }
+                       
                 }
                                 
         }
diff --git a/clustersplitcommand.cpp b/clustersplitcommand.cpp

index 787d84aa3ef40042f3e2358283648ba63e8eb098..34caf654124886f9cd23638a149b2b3487ca53e2 100644 (file)
--- a/clustersplitcommand.cpp
+++ b/clustersplitcommand.cpp
@@ -201,7 +201,7 @@ ClusterSplitCommand::ClusterSplitCommand(string option)  {
                         else { distfile = fastafile;  splitmethod = "fasta";  m->setFastaFile(fastafile); }
                         
                         taxFile = validParameter.validFile(parameters, "taxonomy", true);
-                       if (taxFile == "not open") { abort = true; }    
+                       if (taxFile == "not open") { taxFile = ""; abort = true; }      
                         else if (taxFile == "not found") { taxFile = ""; }
                         else {  m->setTaxonomyFile(taxFile); }
                         
diff --git a/consensusseqscommand.cpp b/consensusseqscommand.cpp

index 4f8c53bc9b8cd70fab82a13cd8507a8e5719a07a..223e5dbb86600907fafaafdf5af48ee665adfc5b 100644 (file)
--- a/consensusseqscommand.cpp
+++ b/consensusseqscommand.cpp
@@ -142,7 +142,7 @@ ConsensusSeqsCommand::ConsensusSeqsCommand(string option)  {
                         }else { m->setFastaFile(fastafile); }   
                         
                         namefile = validParameter.validFile(parameters, "name", true);
-                       if (namefile == "not open") { abort = true; }
+                       if (namefile == "not open") { namefile = ""; abort = true; }
                         else if (namefile == "not found") { namefile = ""; }
                         else { m->setNameFile(namefile); }
                         
@@ -163,7 +163,11 @@ ConsensusSeqsCommand::ConsensusSeqsCommand(string option)  {
                         
                         //if the user changes the output directory command factory will send this info to us in the output parameter 
                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(fastafile);      }
-
+                       
+                       if (namefile == ""){
+                               vector<string> files; files.push_back(fastafile); 
+                               parser.getNameFile(files);
+                       }
                 }
         }
         catch(exception& e) {
diff --git a/database.hpp b/database.hpp

index 580c3f2d4587ba80b8286d5b46968d6faa3818c3..b2817a779dc1b95070ef77fbe6e8bf5b11159a3c 100644 (file)
--- a/database.hpp
+++ b/database.hpp
@@ -58,6 +58,7 @@ public:
         virtual void readKmerDB(ifstream&){};
         virtual void setNumSeqs(int i) {        numSeqs = i;    }
         virtual vector<int> getSequencesWithKmer(int){ vector<int> filler; return filler; };  
+       virtual int getReversed(int) { return 0; } 
         virtual int getMaxKmer(){       return 1;       }
         
  protected:
diff --git a/deconvolutecommand.cpp b/deconvolutecommand.cpp

index 3fa622f01ff2fc334f5cbb9d79851a9ca7f8b30c..3541e00d064233c4da10619789291222290b33f1 100644 (file)
--- a/deconvolutecommand.cpp
+++ b/deconvolutecommand.cpp
@@ -125,9 +125,15 @@ DeconvoluteCommand::DeconvoluteCommand(string option)  {
                         }
                         
                         oldNameMapFName = validParameter.validFile(parameters, "name", true);
-                       if (oldNameMapFName == "not open") { abort = true; }
+                       if (oldNameMapFName == "not open") { oldNameMapFName = ""; abort = true; }
                         else if (oldNameMapFName == "not found"){       oldNameMapFName = "";   }
                         else { m->setNameFile(oldNameMapFName); }
+                       
+                       if (oldNameMapFName == "") {
+                               vector<string> files; files.push_back(inFastaName);
+                               parser.getNameFile(files);
+                       }
+                       
                 }
  
         }
diff --git a/getgroupscommand.cpp b/getgroupscommand.cpp

index 0c42b7dad8281b1d9edd49550c3df95ae773c7e8..432894b06fe9d5ffa6982fdaaa49474a44c403e0 100644 (file)
--- a/getgroupscommand.cpp
+++ b/getgroupscommand.cpp
@@ -16,12 +16,12 @@
  //**********************************************************************************************************************
  vector<string> GetGroupsCommand::setParameters(){      
         try {
-               CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
-               CommandParameter pshared("shared", "InputTypes", "", "", "none", "FNGLT-sharedGroup", "none",false,false); parameters.push_back(pshared);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname);
-               CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT-sharedGroup", "none",false,false); parameters.push_back(pgroup);
-               CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
-               CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(ptaxonomy);
+               CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(pfasta);
+               CommandParameter pshared("shared", "InputTypes", "", "", "none", "sharedGroup", "none",false,false); parameters.push_back(pshared);
+               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+               CommandParameter pgroup("group", "InputTypes", "", "", "none", "sharedGroup", "FNGLT",false,false); parameters.push_back(pgroup);
+               CommandParameter plist("list", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(plist);
+               CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(ptaxonomy);
                 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(paccnos);
                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
@@ -180,12 +180,12 @@ GetGroupsCommand::GetGroupsCommand(string option)  {
                         else { m->setAccnosFile(accnosfile); }
                         
                         fastafile = validParameter.validFile(parameters, "fasta", true);
-                       if (fastafile == "not open") { abort = true; }
+                       if (fastafile == "not open") { fastafile = ""; abort = true; }
                         else if (fastafile == "not found") {  fastafile = "";  }
                         else { m->setFastaFile(fastafile); }
                         
                         namefile = validParameter.validFile(parameters, "name", true);
-                       if (namefile == "not open") { abort = true; }
+                       if (namefile == "not open") { namefile = ""; abort = true; }
                         else if (namefile == "not found") {  namefile = "";  }  
                         else { m->setNameFile(namefile); }
                         
@@ -200,7 +200,7 @@ GetGroupsCommand::GetGroupsCommand(string option)  {
                         else { m->setListFile(listfile); }
                         
                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
-                       if (taxfile == "not open") { abort = true; }
+                       if (taxfile == "not open") { taxfile = ""; abort = true; }
                         else if (taxfile == "not found") {  taxfile = "";  }
                         else { m->setTaxonomyFile(taxfile); }
                         
@@ -253,6 +253,10 @@ GetGroupsCommand::GetGroupsCommand(string option)  {
                         if ((fastafile == "") && (namefile == "") && (groupfile == "")  && (sharedfile == "") && (listfile == "") && (taxfile == ""))  { m->mothurOut("You must provide at least one of the following: fasta, name, taxonomy, group, shared or list."); m->mothurOutEndLine(); abort = true; }
                         if ((groupfile == "") && ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != "")))  { m->mothurOut("If using a fasta, name, taxonomy, group or list, then you must provide a group file."); m->mothurOutEndLine(); abort = true; }
  
+                       if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
+                               vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
+                               parser.getNameFile(files);
+                       }
                 }
                 
         }
diff --git a/getlineagecommand.cpp b/getlineagecommand.cpp

index b4e41e5fb60f48823528907df5804ff05c01c419..c2be580bb73313c91322e903c87027dc5da405d9 100644 (file)
--- a/getlineagecommand.cpp
+++ b/getlineagecommand.cpp
@@ -168,12 +168,12 @@ GetLineageCommand::GetLineageCommand(string option)  {
                         
                         //check for required parameters                 
                         fastafile = validParameter.validFile(parameters, "fasta", true);
-                       if (fastafile == "not open") { abort = true; }
+                       if (fastafile == "not open") { fastafile = ""; abort = true; }
                         else if (fastafile == "not found") {  fastafile = "";  }
                         else { m->setFastaFile(fastafile); }
                         
                         namefile = validParameter.validFile(parameters, "name", true);
-                       if (namefile == "not open") { abort = true; }
+                       if (namefile == "not open") { namefile = ""; abort = true; }
                         else if (namefile == "not found") {  namefile = "";  }  
                         else { m->setNameFile(namefile); }
                         
@@ -192,7 +192,7 @@ GetLineageCommand::GetLineageCommand(string option)  {
                         else { m->setListFile(listfile); }
                         
                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
-                       if (taxfile == "not open") { abort = true; }
+                       if (taxfile == "not open") { taxfile = ""; abort = true; }
                         else if (taxfile == "not found") {                              
                                 taxfile = m->getTaxonomyFile(); 
                                 if (taxfile != "") { m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
@@ -217,6 +217,11 @@ GetLineageCommand::GetLineageCommand(string option)  {
                         m->splitAtChar(taxons, listOfTaxons, '-');
                         
                         if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, alignreport, taxonomy or listfile."); m->mothurOutEndLine(); abort = true; }
+               
+                       if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
+                               vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
+                               parser.getNameFile(files);
+                       }
                 }
  
         }
diff --git a/getseqscommand.cpp b/getseqscommand.cpp

index ffe49b48810d7916e5324efd9480c87e8322ec92..9bf188db3d25fa4ac16b82973a2802c3b1e6ad75 100644 (file)
--- a/getseqscommand.cpp
+++ b/getseqscommand.cpp
@@ -206,12 +206,12 @@ GetSeqsCommand::GetSeqsCommand(string option)  {
                         if (accnosfile2 == "not found") { accnosfile2 = ""; }
                         
                         fastafile = validParameter.validFile(parameters, "fasta", true);
-                       if (fastafile == "not open") { abort = true; }
+                       if (fastafile == "not open") { fastafile = ""; abort = true; }
                         else if (fastafile == "not found") {  fastafile = "";  }
                         else { m->setFastaFile(fastafile); }
                         
                         namefile = validParameter.validFile(parameters, "name", true);
-                       if (namefile == "not open") { abort = true; }
+                       if (namefile == "not open") { namefile = ""; abort = true; }
                         else if (namefile == "not found") {  namefile = "";  }  
                         else { m->setNameFile(namefile); }
                         
@@ -230,7 +230,7 @@ GetSeqsCommand::GetSeqsCommand(string option)  {
                         else { m->setListFile(listfile); }
                         
                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
-                       if (taxfile == "not open") { abort = true; }
+                       if (taxfile == "not open") { taxfile = ""; abort = true; }
                         else if (taxfile == "not found") {  taxfile = "";  }
                         else { m->setTaxonomyFile(taxfile); }
                         
@@ -249,6 +249,11 @@ GetSeqsCommand::GetSeqsCommand(string option)  {
                         dups = m->isTrue(temp);
                         
                         if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == "") && (accnosfile2 == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, alignreport, taxonomy, quality or listfile."); m->mothurOutEndLine(); abort = true; }
+               
+                       if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
+                               vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
+                               parser.getNameFile(files);
+                       }
                 }
  
         }
diff --git a/groupmap.h b/groupmap.h

index 99891a46c5cf998c565bffb8012350ebedacc8fa..29c174134eee5689e5210775dbde2f220010d551 100644 (file)
--- a/groupmap.h
+++ b/groupmap.h
@@ -2,7 +2,7 @@
  #define GROUPMAP_H
  /*
   *  groupmap.h
- *  Dotur
+ *  Mothur
   *
   *  Created by Sarah Westcott on 12/1/08.
   *  Copyright 2008 Schloss Lab UMASS Amherst. All rights reserved.
diff --git a/kmer.cpp b/kmer.cpp

index 6ee84d3da3e8917c8c02d9c2d37deb68e08397b5..50574f4a5ee0709daf2ebb67b8f0e49971e20ef0 100644 (file)
--- a/kmer.cpp
+++ b/kmer.cpp
@@ -131,6 +131,27 @@ string Kmer::getKmerBases(int kmerNumber){
         }
         return kmer;
  }
+/**************************************************************************************************/
+
+int Kmer::getReverseKmerNumber(int kmerNumber){
+               
+       string kmerString = getKmerBases(kmerNumber);
+       
+       //get Reverse
+       string reverse = "";
+       for(int i=kmerString.length()-1;i>=0;i--){
+               if(kmerString[i] == 'A')                {       reverse += 'T'; }
+               else if(kmerString[i] == 'T'){  reverse += 'A'; }
+               else if(kmerString[i] == 'G'){  reverse += 'C'; }
+               else if(kmerString[i] == 'C'){  reverse += 'G'; }
+               else                                            {       reverse += 'N'; }
+       }
+       
+       int reverseNumber = getKmerNumber(reverse, 0);
+       
+       return reverseNumber;
+       
+}
  
  /**************************************************************************************************/
  
diff --git a/kmer.hpp b/kmer.hpp

index d5b67d66d173cb24c179569d44886f2591ea2530..9a3c5a0091b868b7f61ae8043546f39b27e212cb 100644 (file)
--- a/kmer.hpp
+++ b/kmer.hpp
@@ -21,6 +21,7 @@ public:
         string getKmerString(string);
         int getKmerNumber(string, int);
         string getKmerBases(int);
+       int getReverseKmerNumber(int);
         vector< map<int, int> > getKmerCounts(string sequence);  //for use in chimeraCheck
         
  private:
diff --git a/kmerdb.cpp b/kmerdb.cpp

index 2703e16d481ceccd6019ff642b97432ba5cf819f..9a5d23500be955b459413beee989601f6c0ec78f 100644 (file)
--- a/kmerdb.cpp
+++ b/kmerdb.cpp
@@ -57,6 +57,8 @@ KmerDB::~KmerDB(){}
  
  vector<int> KmerDB::findClosestSequences(Sequence* candidateSeq, int num){
         try {
+               if (num > numSeqs) { m->mothurOut("[WARNING]: you requested " + toString(num) + " closest sequences, but the template only contains " + toString(numSeqs) + ", adjusting."); m->mothurOutEndLine(); num = numSeqs; }
+               
                 vector<int> topMatches;
                 Kmer kmer(kmerSize);
                 searchScore = 0;
@@ -215,6 +217,20 @@ int KmerDB::getCount(int kmer) {
         }       
  }
  /**************************************************************************************************/
+int KmerDB::getReversed(int kmerNumber) {
+       try {
+               Kmer kmer(kmerSize);
+               
+               if (kmerNumber < 0) { return 0; }  //if user gives negative number
+               else if (kmerNumber > maxKmer) {        return 0;       }  //or a kmer that is bigger than maxkmer
+               else {  return kmer.getReverseKmerNumber(kmerNumber);   }  // kmer is in vector range
+       }
+       catch(exception& e) {
+               m->errorOut(e, "KmerDB", "getReversed");
+               exit(1);
+       }       
+}
+/**************************************************************************************************/
  vector<int> KmerDB::getSequencesWithKmer(int kmer) {
         try {
                 
diff --git a/kmerdb.hpp b/kmerdb.hpp

index 62d4836a066f9bc10cf1a041e21cb07e1e0a1f84..4ae00b91311677488c35961186dfbf072ba005f3 100644 (file)
--- a/kmerdb.hpp
+++ b/kmerdb.hpp
@@ -36,6 +36,7 @@ public:
         void readKmerDB(ifstream&);
         int getCount(int);  //returns number of sequences with that kmer number
         vector<int> getSequencesWithKmer(int);  //returns vector of sequences that contain kmer passed in
+       int getReversed(int);  //returns reverse compliment kmerNumber 
         int getMaxKmer() { return maxKmer; }
         
  private:
diff --git a/knn.cpp b/knn.cpp

index 6053b6e47a1f16c1398c2c183cd40eef0aefeda0..837fa6d18d4f1aed6cf34e5861942b4230c30dd1 100644 (file)
--- a/knn.cpp
+++ b/knn.cpp
@@ -72,11 +72,11 @@ string Knn::getTaxonomy(Sequence* seq) {
                 }
                 
                 if (closestNames.size() == 0) {
-                       m->mothurOut("Error: All the matches for sequence " + seq->getName() + " have been eliminated. " + seq->getName() + " will be disregarded."); m->mothurOutEndLine();
-                       tax = "bad seq";
+                       m->mothurOut("Error: All the matches for sequence " + seq->getName() + " have been eliminated. "); m->mothurOutEndLine();
+                       tax = "unknown;";
                 }else{
                         tax = findCommonTaxonomy(closestNames);
-                       if (tax == "") { m->mothurOut("There are no common levels for sequence " + seq->getName() + ". " + seq->getName() + " will be disregarded."); m->mothurOutEndLine(); tax = "bad seq"; }
+                       if (tax == "") { m->mothurOut("There are no common levels for sequence " + seq->getName() + ". "); m->mothurOutEndLine(); tax = "unknown;"; }
                 }
                 
                 simpleTax = tax;
@@ -90,7 +90,7 @@ string Knn::getTaxonomy(Sequence* seq) {
  /**************************************************************************************************/
  string Knn::findCommonTaxonomy(vector<string> closest)  {
         try {
-               vector< vector<string> > taxons;  //taxon[0] = vector of taxonomy info for closest[0].
+               /*vector< vector<string> > taxons;  //taxon[0] = vector of taxonomy info for closest[0].
                                                                                 //so if closest[0] taxonomy is Bacteria;Alphaproteobacteria;Rhizobiales;Azorhizobium_et_rel.;Methylobacterium_et_rel.;Bosea;
                                                                                 //taxon[0][0] = Bacteria, taxon[0][1] = Alphaproteobacteria....
                                                                                 
@@ -101,6 +101,7 @@ string Knn::findCommonTaxonomy(vector<string> closest)  {
                         if (m->control_pressed) { return "control"; }
                 
                         string tax = taxonomy[closest[i]];  //we know its there since we checked in getTaxonomy
+                       cout << tax << endl;
                 
                         taxons[i] = parseTax(tax);
                 
@@ -128,9 +129,54 @@ string Knn::findCommonTaxonomy(vector<string> closest)  {
                                 }
                                 break;
                         }
+               }*/
+               
+               string conTax;
+               
+               //create a tree containing sequences from this bin
+               PhyloTree* p = new PhyloTree();
+               
+               for (int i = 0; i < closest.size(); i++) {
+                       p->addSeqToTree(closest[i], taxonomy[closest[i]]);
                 }
-       
-               return common;
+               
+               //build tree
+               p->assignHeirarchyIDs(0);
+               
+               TaxNode currentNode = p->get(0);
+               
+               //at each level
+               while (currentNode.children.size() != 0) { //you still have more to explore
+                       
+                       TaxNode bestChild;
+                       int bestChildSize = 0;
+                       
+                       //go through children
+                       for (map<string, int>::iterator itChild = currentNode.children.begin(); itChild != currentNode.children.end(); itChild++) {
+                               
+                               TaxNode temp = p->get(itChild->second);
+                               
+                               //select child with largest accessions - most seqs assigned to it
+                               if (temp.accessions.size() > bestChildSize) {
+                                       bestChild = p->get(itChild->second);
+                                       bestChildSize = temp.accessions.size();
+                               }
+                               
+                       }
+                       
+                       if (bestChildSize == closest.size()) { //if yes, add it
+                               conTax += bestChild.name + ";";
+                       }else{ //if no, quit
+                               break;
+                       }
+                       
+                       //move down a level
+                       currentNode = bestChild;
+               }
+               
+               delete p;       
+               
+               return conTax;
         }
         catch(exception& e) {
                 m->errorOut(e, "Knn", "findCommonTaxonomy");
diff --git a/makefile b/makefile

index e2e66c152650226dda8bde8f43d93a5e660c2895..476070eb2fbb654d054238c45ee335b2ce8fe0eb 100644 (file)
--- a/makefile
+++ b/makefile
@@ -32,6 +32,7 @@ ifeq  ($(strip $(64BIT_VERSION)),yes)
         #if you using cygwin to build Windows the following line
         #CXX = x86_64-w64-mingw32-g++
         #CC = x86_64-w64-mingw32-g++
+       #FORTAN_COMPILER = x86_64-w64-mingw32-gfortran
         #TARGET_ARCH += -m64 -static
  
         #if you are a linux user use the following line
diff --git a/memchi2.cpp b/memchi2.cpp

index 595b54f664d61e1fcd75e537d48b2e055fde2237..f08b4b8561d9177fbd6f0a5a8f3337b95312e362 100644 (file)
--- a/memchi2.cpp
+++ b/memchi2.cpp
@@ -17,7 +17,7 @@ EstOutput MemChi2::getValues(vector<SharedRAbundVector*> shared) {
                 int nonZeroA = 0;
                 int nonZeroB = 0;
                 int totalOtus = shared[0]->getNumBins();
-               int totalGroups = shared.size();
+               //int totalGroups = shared.size();
                 
                 //for each otu
                 for (int i = 0; i < shared[0]->getNumBins(); i++) {
diff --git a/mergegroupscommand.cpp b/mergegroupscommand.cpp

index af2af96b4dc6ae122fd0b9383d9486b3c776234e..e30558cbe69d3069135e0432f2eda7f9623095b5 100644 (file)
--- a/mergegroupscommand.cpp
+++ b/mergegroupscommand.cpp
@@ -13,7 +13,8 @@
  //**********************************************************************************************************************
  vector<string> MergeGroupsCommand::setParameters(){    
         try {
-               CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
+               CommandParameter pshared("shared", "InputTypes", "", "", "none", "sharedGroup", "none",false,false); parameters.push_back(pshared);
+               CommandParameter pgroup("group", "InputTypes", "", "", "none", "sharedGroup", "none",false,false); parameters.push_back(pgroup);
                 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pdesign);
                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
@@ -33,12 +34,12 @@ vector<string> MergeGroupsCommand::setParameters(){
  string MergeGroupsCommand::getHelpString(){    
         try {
                 string helpString = "";
-               helpString += "The merge.groups command reads a shared file and a design file and merges the groups in the shared file that are in the same grouping in the design file.\n";
+               helpString += "The merge.groups command reads a shared or group file and a design file and merges the groups that are in the same grouping in the design file.\n";
                 helpString += "The merge.groups command outputs a .shared file. \n";
-               helpString += "The merge.groups command parameters are shared, groups, label and design.  The design and shared parameter are required.\n";
+               helpString += "The merge.groups command parameters are shared, group, groups, label and design.  The design parameter is required.\n";
                 helpString += "The design parameter allows you to assign your groups to sets. It is required. \n";
                 helpString += "The design file looks like the group file.  It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n";
-               helpString += "The groups parameter allows you to specify which of the groups in your shared you would like included. The group names are separated by dashes.\n";
+               helpString += "The groups parameter allows you to specify which of the groups in your shared or group file you would like included. The group names are separated by dashes.\n";
                 helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
                 helpString += "The merge.groups command should be in the following format: merge.groups(design=yourDesignFile, shared=yourSharedFile).\n";
                 helpString += "Example merge.groups(design=temp.design, groups=A-B-C, shared=temp.shared).\n";
@@ -58,6 +59,7 @@ MergeGroupsCommand::MergeGroupsCommand(){
                 setParameters();
                 vector<string> tempOutNames;
                 outputTypes["shared"] = tempOutNames;
+               outputTypes["group"] = tempOutNames;
         }
         catch(exception& e) {
                 m->errorOut(e, "MergeGroupsCommand", "MetaStatsCommand");
@@ -92,6 +94,7 @@ MergeGroupsCommand::MergeGroupsCommand(string option) {
                         //initialize outputTypes
                         vector<string> tempOutNames;
                         outputTypes["shared"] = tempOutNames;
+                       outputTypes["group"] = tempOutNames;
                         
                         //if the user changes the output directory command factory will send this info to us in the output parameter 
                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
@@ -116,6 +119,15 @@ MergeGroupsCommand::MergeGroupsCommand(string option) {
                                         //if the user has not given a path then, add inputdir. else leave path alone.
                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
                                 }
+                               
+                               it = parameters.find("group");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["group"] = inputDir + it->second;            }
+                               }
+                               
                         }
                         
                         //check for required parameters
@@ -128,15 +140,15 @@ MergeGroupsCommand::MergeGroupsCommand(string option) {
                                 else {  m->mothurOut("You have no current designfile and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
                         }else { m->setDesignFile(designfile); } 
                         
-                       //make sure the user has already run the read.otu command
                         sharedfile = validParameter.validFile(parameters, "shared", true);
-                       if (sharedfile == "not open") { abort = true; }
-                       else if (sharedfile == "not found") {                           
-                               //if there is a current shared file, use it
-                               sharedfile = m->getSharedFile(); 
-                               if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
-                               else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
-                       }else { m->setSharedFile(sharedfile); } 
+                       if (sharedfile == "not open") { abort = true; sharedfile = ""; }
+                       else if (sharedfile == "not found") {  sharedfile = ""; }
+                       else { m->setSharedFile(sharedfile); }  
+                       
+                       groupfile = validParameter.validFile(parameters, "group", true);
+                       if (groupfile == "not open") { abort = true; groupfile = ""; }
+                       else if (groupfile == "not found") {  groupfile = ""; }
+                       else { m->setGroupFile(groupfile); }    
                         
                         //check for optional parameter and set defaults
                         // ...at some point should added some additional type checking...
@@ -151,6 +163,19 @@ MergeGroupsCommand::MergeGroupsCommand(string option) {
                         if (groups == "not found") { groups = "all";  }
                         m->splitAtDash(groups, Groups);
                         m->setGroups(Groups);
+                       
+                       if ((sharedfile == "") && (groupfile == "")) { 
+                               //give priority to group, then shared
+                               groupfile = m->getGroupFile(); 
+                               if (groupfile != "") {  m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
+                               else { 
+                                       sharedfile = m->getSharedFile(); 
+                                       if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
+                                       else { 
+                                               m->mothurOut("You have no current groupfile or sharedfile and one is required."); m->mothurOutEndLine(); abort = true;
+                                       }
+                               }
+                       }
                 }
                 
         }
@@ -165,17 +190,106 @@ int MergeGroupsCommand::execute(){
         try {
                 
                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
+       
+               designMap = new GroupMap(designfile);
+               designMap->readDesignMap();
+               
+               if (groupfile != "") { processGroupFile(designMap); }
+               if (sharedfile != "") { processSharedFile(designMap); }
+
+               //reset groups parameter
+               m->clearGroups();  
+               delete designMap;
+               
+               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0;}
+               
+               
+               //set shared file as new current sharedfile
+               string current = "";
+               itTypes = outputTypes.find("shared");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
+               }
+               
+               itTypes = outputTypes.find("group");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
+               }
+               
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "MergeGroupsCommand", "execute");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+int MergeGroupsCommand::process(vector<SharedRAbundVector*>& thisLookUp, ofstream& out){
+       try {
+               
+               map<string, SharedRAbundVector> merged;
+               map<string, SharedRAbundVector>::iterator it;
+               
+               for (int i = 0; i < thisLookUp.size(); i++) {
+                       
+                       if (m->control_pressed) { return 0; }
+                       
+                       //what grouping does this group belong to
+                       string grouping = designMap->getGroup(thisLookUp[i]->getGroup());
+                       if (grouping == "not found") { m->mothurOut("[ERROR]: " + thisLookUp[i]->getGroup() + " is not in your design file. Ignoring!"); m->mothurOutEndLine(); grouping = "NOTFOUND"; }
+                       
+                       else {
+                               //do we already have a member of this grouping?
+                               it = merged.find(grouping);
+                               
+                               if (it == merged.end()) { //nope, so create it
+                                       merged[grouping] = *thisLookUp[i];
+                                       merged[grouping].setGroup(grouping);
+                               }else { //yes, merge it
+                                       
+                                       for (int j = 0; j < thisLookUp[i]->getNumBins(); j++) {
+                                               int abund = (it->second).getAbundance(j);
+                                               abund += thisLookUp[i]->getAbundance(j);
+                                               
+                                               (it->second).set(j, abund, grouping);
+                                       }
+                               }
+                       }
+               }
+               
+               //print new file
+               for (it = merged.begin(); it != merged.end(); it++) {
+                       out << (it->second).getLabel() << '\t' << it->first << '\t';
+                       (it->second).print(out);
+               }
+               
+               return 0;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "MergeGroupsCommand", "process");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+int MergeGroupsCommand::processSharedFile(GroupMap*& designMap){
+       try {
                 
-               if (outputDir == "") {  outputDir += m->hasPath(sharedfile);  }
-               string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "merge" +  m->getExtension(sharedfile);
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(sharedfile);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + "merge" +  m->getExtension(sharedfile);
                 outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName);
                 
                 ofstream out;
                 m->openOutputFile(outputFileName, out);
                 
-               designMap = new GroupMap(designfile);
-               designMap->readDesignMap();
-               
                 InputData input(sharedfile, "sharedfile");
                 lookup = input.getSharedRAbundVectors();
                 string lastLabel = lookup[0]->getLabel();
@@ -256,78 +370,68 @@ int MergeGroupsCommand::execute(){
                 }
                 
                 out.close();
-               //reset groups parameter
-               m->clearGroups();  
-               delete designMap;
-               
-               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0;}
-               
-               
-               //set shared file as new current sharedfile
-               string current = "";
-               itTypes = outputTypes.find("shared");
-               if (itTypes != outputTypes.end()) {
-                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
-               }
-               
-               m->mothurOutEndLine();
-               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
-               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
-               m->mothurOutEndLine();
                 
+                               
                 return 0;
+               
         }
         catch(exception& e) {
-               m->errorOut(e, "MergeGroupsCommand", "execute");
+               m->errorOut(e, "MergeGroupsCommand", "processSharedFile");
                 exit(1);
         }
  }
  //**********************************************************************************************************************
  
-int MergeGroupsCommand::process(vector<SharedRAbundVector*>& thisLookUp, ofstream& out){
+int MergeGroupsCommand::processGroupFile(GroupMap*& designMap){
         try {
                 
-               map<string, SharedRAbundVector> merged;
-               map<string, SharedRAbundVector>::iterator it;
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "merge" +  m->getExtension(groupfile);
+               outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName);
                 
-               for (int i = 0; i < thisLookUp.size(); i++) {
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+               
+               //read groupfile
+               GroupMap groupMap(groupfile);
+               groupMap.readMap();
+               
+               //fill Groups - checks for "all" and for any typo groups
+               SharedUtil* util = new SharedUtil();
+               vector<string> nameGroups = groupMap.getNamesOfGroups();
+               util->setGroups(Groups, nameGroups);
+               delete util;
+               
+               vector<string> namesOfSeqs = groupMap.getNamesSeqs();
+               bool error = false;
+               
+               for (int i = 0; i < namesOfSeqs.size(); i++) {
                         
-                       if (m->control_pressed) { return 0; }
+                       if (m->control_pressed) { break; }
                         
-                       //what grouping does this group belong to
-                       string grouping = designMap->getGroup(thisLookUp[i]->getGroup());
-                       if (grouping == "not found") { m->mothurOut("[ERROR]: " + thisLookUp[i]->getGroup() + " is not in your design file. Ignoring!"); m->mothurOutEndLine(); grouping = "NOTFOUND"; }
+                       string thisGroup = groupMap.getGroup(namesOfSeqs[i]);
                         
-                       else {
-                               //do we already have a member of this grouping?
-                               it = merged.find(grouping);
+                       //are you in a group the user wants
+                       if (m->inUsersGroups(thisGroup, Groups)) {
+                               string thisGrouping = designMap->getGroup(thisGroup);
                                 
-                               if (it == merged.end()) { //nope, so create it
-                                       merged[grouping] = *thisLookUp[i];
-                                       merged[grouping].setGroup(grouping);
-                               }else { //yes, merge it
-                                       
-                                       for (int j = 0; j < thisLookUp[i]->getNumBins(); j++) {
-                                               int abund = (it->second).getAbundance(j);
-                                               abund += thisLookUp[i]->getAbundance(j);
-                                               
-                                               (it->second).set(j, abund, grouping);
-                                       }
+                               if (thisGrouping == "not found") { m->mothurOut("[ERROR]: " + namesOfSeqs[i] + " is from group " + thisGroup + " which is not in your design file, please correct."); m->mothurOutEndLine();  error = true; }
+                               else {
+                                       out << namesOfSeqs[i] << '\t' << thisGrouping << endl;
                                 }
                         }
                 }
                 
-               //print new file
-               for (it = merged.begin(); it != merged.end(); it++) {
-                       out << (it->second).getLabel() << '\t' << it->first << '\t';
-                       (it->second).print(out);
-               }
+               if (error) { m->control_pressed = true; }
+
+               out.close();
                 
                 return 0;
                 
         }
         catch(exception& e) {
-               m->errorOut(e, "MergeGroupsCommand", "process");
+               m->errorOut(e, "MergeGroupsCommand", "processGroupFile");
                 exit(1);
         }
  }
diff --git a/mergegroupscommand.h b/mergegroupscommand.h

index 7907dfe85f556682d1cc796323314a2f0c8d7b67..7f216ebed7653a2647c4770e4ce17e5e6d881e95 100644 (file)
--- a/mergegroupscommand.h
+++ b/mergegroupscommand.h
@@ -38,10 +38,12 @@ private:
         
         bool abort, allLines, pickedGroups;
         set<string> labels; //holds labels to be used
-       string groups, label, outputDir, inputDir, designfile, sharedfile;
+       string groups, label, outputDir, inputDir, designfile, sharedfile, groupfile;
         vector<string> Groups, outputNames;
                 
         int process(vector<SharedRAbundVector*>&, ofstream&);
+       int processSharedFile(GroupMap*&);
+       int processGroupFile(GroupMap*&);
  };
  
  #endif
diff --git a/mothur.h b/mothur.h

index 3b7c4599329aede91be3f6325d27df1accf77c03..50344e24bfea85e8b7c4b829b5136e3e63d34fc7 100644 (file)
--- a/mothur.h
+++ b/mothur.h
@@ -106,6 +106,19 @@ struct ThreadNode {
         IntNode* right;
  };
  
+struct diffPair {
+       float   prob;
+       float   reverseProb;
+       
+       diffPair() {
+               prob = 0; reverseProb = 0;
+       }
+       diffPair(float p, float rp) {
+               prob = p;
+               reverseProb = rp;
+       }
+};
+
  /************************************************************/
  struct clusterNode {
         int numSeq;
diff --git a/mothurout.cpp b/mothurout.cpp

index 9dcdc80e761f1483059297dbcd996a0a06b43e5b..20a7b5235029b6d7908ca0f23f119c3c38d095f1 100644 (file)
--- a/mothurout.cpp
+++ b/mothurout.cpp
@@ -2152,7 +2152,7 @@ int MothurOut::removeConfidences(string& tax) {
                                 int pos2 = taxon.find_last_of(')');
                                 if (pos2 != -1) {
                                         string confidenceScore = taxon.substr(pos+1, (pos2-(pos+1)));
-                                       if (isContainingOnlyDigits(confidenceScore)) {
+                                       if (isNumeric1(confidenceScore)) {
                                                 taxon = taxon.substr(0, pos); //rip off confidence 
                                         }
                                 }
diff --git a/optionparser.cpp b/optionparser.cpp

index 71714f184e644105041910933a5bf877539deb24..1e8c4451538683d4725ed60e535a5a86a8a0ea79 100644 (file)
--- a/optionparser.cpp
+++ b/optionparser.cpp
@@ -104,7 +104,7 @@ map<string, string> OptionParser::getParameters() {
                                 }
                         }else{ it++; }
                 }
-       
+               
                 return parameters;      
         }
         catch(exception& e) {
@@ -113,4 +113,54 @@ map<string, string> OptionParser::getParameters() {
         }
  }
  
+/***********************************************************************/
+//pass a vector of filenames that may match the current namefile.  
+//this function will look at each one, if the rootnames match, mothur will warn 
+//the user that they may have neglected to provide a namefile.
+//stops when it finds a match.
+bool OptionParser::getNameFile(vector<string> files) { 
+       try {
+               string namefile = m->getNameFile();
+               bool match = false;
+               
+               if (namefile != "") {
+                       string temp = m->getRootName(m->getSimpleName(namefile));
+                       vector<string> rootName;
+                       m->splitAtChar(temp, rootName, '.');
+                       
+                       for (int i = 0; i < files.size(); i++) {
+                               temp = m->getRootName(m->getSimpleName(files[i]));
+                               vector<string> root;
+                               m->splitAtChar(temp, root, '.');
+                               
+                               int smallest = rootName.size();
+                               if (root.size() < smallest) { smallest = root.size(); }
+                               
+                               int numMatches = 0;
+                               for(int j = 0; j < smallest; j++) {
+                                       if (root[j] == rootName[j]) { numMatches++; }
+                               }
+                               
+                               if (smallest > 0) {
+                                       if ((numMatches >= (smallest-2)) && (root[0] == rootName[0])) {
+                                               m->mothurOut("[WARNING]: This command can take a namefile and you did not provide one. The current namefile is " + namefile + " which seems to match " + files[i] + ".");
+                                               m->mothurOutEndLine();
+                                               match = true;
+                                               break;
+                                       }
+                               }
+                       }
+                       
+               }
+               
+               
+               return match;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "OptionParser", "getNameFile");
+               exit(1);
+       }
+}
+
+                               
  /***********************************************************************/
diff --git a/optionparser.h b/optionparser.h

index 7707e739ce15d7e0dd6657bc08a288222dfbdbb0..facd1f85182cc526a6db01d85968d9231afcfc88 100644 (file)
--- a/optionparser.h
+++ b/optionparser.h
@@ -14,7 +14,7 @@
  
  #include "mothur.h"
  #include "mothurout.h"
-
+#include "command.hpp"
  
  /***********************************************************************/
  
@@ -23,6 +23,7 @@ public:
         OptionParser(string);
         ~OptionParser() {}
         map<string, string> getParameters();
+       bool getNameFile(vector<string>);
  private:
         map<string, string> parameters;
         MothurOut* m;
diff --git a/parsefastaqcommand.cpp b/parsefastaqcommand.cpp

index 9cba70b9d2772edba281cd5f03459d1fdc32a966..9509a20af2528392c2df8590aea61fef6a7665b4 100644 (file)
--- a/parsefastaqcommand.cpp
+++ b/parsefastaqcommand.cpp
@@ -14,6 +14,8 @@
  vector<string> ParseFastaQCommand::setParameters(){    
         try {
                 CommandParameter pfastq("fastq", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfastq);
+               CommandParameter pfasta("fasta", "Bool", "", "T", "", "", "",false,false); parameters.push_back(pfasta);
+               CommandParameter pqual("qfile", "Bool", "", "T", "", "", "",false,false); parameters.push_back(pqual);
                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
                 
@@ -104,6 +106,15 @@ ParseFastaQCommand::ParseFastaQCommand(string option){
                         
                         //if the user changes the output directory command factory will send this info to us in the output parameter 
                         outputDir = validParameter.validFile(parameters, "outputdir", false);   if (outputDir == "not found"){  outputDir = m->hasPath(fastaQFile);     }
+                       
+                       string temp;
+                       temp = validParameter.validFile(parameters, "fasta", false);    if(temp == "not found"){        temp = "T";     }
+                       fasta = m->isTrue(temp); 
+
+                       temp = validParameter.validFile(parameters, "qfile", false);    if(temp == "not found"){        temp = "T";     }
+                       qual = m->isTrue(temp); 
+                       
+                       if ((!fasta) && (!qual)) { m->mothurOut("[ERROR]: no outputs selected. Aborting."); m->mothurOutEndLine(); abort=true; }
  
                 }               
         }
@@ -122,13 +133,16 @@ int ParseFastaQCommand::execute(){
                 string fastaFile = outputDir + m->getRootName(m->getSimpleName(fastaQFile)) + "fasta";
                 string qualFile = outputDir + m->getRootName(m->getSimpleName(fastaQFile)) + "qual";
                 ofstream outFasta, outQual;
-               m->openOutputFile(fastaFile, outFasta);  outputNames.push_back(fastaFile); outputTypes["fasta"].push_back(fastaFile);
-               m->openOutputFile(qualFile, outQual);   outputNames.push_back(qualFile);  outputTypes["qfile"].push_back(qualFile);
+               
+               if (fasta) { m->openOutputFile(fastaFile, outFasta);  outputNames.push_back(fastaFile); outputTypes["fasta"].push_back(fastaFile);      }
+               if (qual) { m->openOutputFile(qualFile, outQual);       outputNames.push_back(qualFile);  outputTypes["qfile"].push_back(qualFile);             }
                 
                 ifstream in;
                 m->openInputFile(fastaQFile, in);
                 
                 while (!in.eof()) {
+                       
+                       if (m->control_pressed) { break; }
                 
                         //read sequence name
                         string name = m->getline(in); m->gobble(in);
@@ -147,27 +161,27 @@ int ParseFastaQCommand::execute(){
                         else { name2 = name2.substr(1);  }
                         
                         //read quality scores
-                       string qual = m->getline(in); m->gobble(in);
-                       if (qual == "") {  m->mothurOut("[ERROR]: missing quality for " + name2); m->mothurOutEndLine(); m->control_pressed = true; break; }
+                       string quality = m->getline(in); m->gobble(in);
+                       if (quality == "") {  m->mothurOut("[ERROR]: missing quality for " + name2); m->mothurOutEndLine(); m->control_pressed = true; break; }
                         
                         //sanity check sequence length and number of quality scores match
                         if (name2 != "") { if (name != name2) { m->mothurOut("[ERROR]: names do not match. read " + name + " for fasta and " + name2 + " for quality."); m->mothurOutEndLine(); m->control_pressed = true; break; } }
-                       if (qual.length() != sequence.length()) { m->mothurOut("[ERROR]: lengths do not match. read " + toString(sequence.length()) + " characters for fasta and " + toString(qual.length()) + " characters for quality scores."); m->mothurOutEndLine(); m->control_pressed = true; break; }
-                       
-                       //convert quality scores
-                       vector<int> qualScores = convertQual(qual);
+                       if (quality.length() != sequence.length()) { m->mothurOut("[ERROR]: lengths do not match. read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores."); m->mothurOutEndLine(); m->control_pressed = true; break; }
                         
                         //print sequence info to files
-                       outFasta << ">" << name << endl << sequence << endl;
+                       if (fasta) { outFasta << ">" << name << endl << sequence << endl; }
                         
-                       outQual << ">" << name << endl;
-                       for (int i = 0; i < qualScores.size(); i++) { outQual << qualScores[i] << " "; }
-                       outQual << endl;
+                       if (qual) { 
+                               vector<int> qualScores = convertQual(quality);
+                               outQual << ">" << name << endl;
+                               for (int i = 0; i < qualScores.size(); i++) { outQual << qualScores[i] << " "; }
+                               outQual << endl;
+                       }
                 }
                 
                 in.close();
-               outFasta.close();
-               outQual.close();
+               if (fasta)      { outFasta.close();     }
+               if (qual)       { outQual.close();      }
                 
                 if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(fastaFile); m->mothurRemove(qualFile); return 0; }
                 
diff --git a/parsefastaqcommand.h b/parsefastaqcommand.h

index 60524566b2a4f02e4571c38d0602980e676d8138..6feabce848c09c21b2e276df8361ac4d78667f22 100644 (file)
--- a/parsefastaqcommand.h
+++ b/parsefastaqcommand.h
@@ -34,7 +34,7 @@ private:
  
         vector<string> outputNames;     
         string outputDir, fastaQFile;
-       bool abort;
+       bool abort, fasta, qual;
         
         vector<int> convertQual(string);
  };
diff --git a/parsimonycommand.cpp b/parsimonycommand.cpp

index ccf29526f5e803049371c452bcdcf7ddad10da48..2d46efc9efebcfbe7807292cb814cf58686f9ac4 100644 (file)
--- a/parsimonycommand.cpp
+++ b/parsimonycommand.cpp
@@ -139,7 +139,7 @@ ParsimonyCommand::ParsimonyCommand(string option)  {
                         if (randomtree == "")  {
                                 //check for required parameters
                                 treefile = validParameter.validFile(parameters, "tree", true);
-                               if (treefile == "not open") { abort = true; }
+                               if (treefile == "not open") { treefile = ""; abort = true; }
                                 else if (treefile == "not found") {                             //if there is a current design file, use it
                                         treefile = m->getTreeFile(); 
                                         if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
@@ -153,7 +153,7 @@ ParsimonyCommand::ParsimonyCommand(string option)  {
                                 else { m->setGroupFile(groupfile); }
                                 
                                 namefile = validParameter.validFile(parameters, "name", true);
-                               if (namefile == "not open") { abort = true; }
+                               if (namefile == "not open") { namefile = ""; abort = true; }
                                 else if (namefile == "not found") { namefile = ""; }
                                 else { m->setNameFile(namefile); }
                         }
@@ -177,6 +177,11 @@ ParsimonyCommand::ParsimonyCommand(string option)  {
                         m->setProcessors(temp);
                         m->mothurConvert(temp, processors);
                         
+                       if (namefile == "") {
+                               vector<string> files; files.push_back(treefile);
+                               parser.getNameFile(files);
+                       }
+                       
                 }
  
         }
diff --git a/phylodiversitycommand.cpp b/phylodiversitycommand.cpp

index 69c7b304623e44878cdafb8925fbffd9f45cd718..2b15d11d29580d1455ffea983788ab0715fe194b 100644 (file)
--- a/phylodiversitycommand.cpp
+++ b/phylodiversitycommand.cpp
@@ -144,7 +144,7 @@ PhyloDiversityCommand::PhyloDiversityCommand(string option)  {
                         
                         //check for required parameters
                         treefile = validParameter.validFile(parameters, "tree", true);
-                       if (treefile == "not open") { abort = true; }
+                       if (treefile == "not open") { treefile = ""; abort = true; }
                         else if (treefile == "not found") {                             
                                 //if there is a current design file, use it
                                 treefile = m->getTreeFile(); 
@@ -159,7 +159,7 @@ PhyloDiversityCommand::PhyloDiversityCommand(string option)  {
                         else { m->setGroupFile(groupfile); }
                         
                         namefile = validParameter.validFile(parameters, "name", true);
-                       if (namefile == "not open") { abort = true; }
+                       if (namefile == "not open") { namefile = ""; abort = true; }
                         else if (namefile == "not found") { namefile = ""; }
                         else { m->setNameFile(namefile); }
                         
@@ -197,6 +197,11 @@ PhyloDiversityCommand::PhyloDiversityCommand(string option)  {
                         }
                         
                         if ((!collect) && (!rarefy) && (!summary)) { m->mothurOut("No outputs selected. You must set either collect, rarefy or summary to true, summary=T by default."); m->mothurOutEndLine(); abort=true; }
+                       
+                       if (namefile == "") {
+                               vector<string> files; files.push_back(treefile);
+                               parser.getNameFile(files);
+                       }
                 }
                 
         }
diff --git a/phylosummary.cpp b/phylosummary.cpp

index 22e8e75ce7047e00b135483f16b566eb506a71fb..7d9fcac2558da8f50653a11ddafd997b58c1c286 100644 (file)
--- a/phylosummary.cpp
+++ b/phylosummary.cpp
@@ -121,6 +121,7 @@ string PhyloSummary::getNextTaxon(string& heirarchy){
  
  int PhyloSummary::addSeqToTree(string seqName, string seqTaxonomy){
         try {
+                               
                 numSeqs++;
                 
                 map<string, int>::iterator childPointer;
diff --git a/phylotree.cpp b/phylotree.cpp

index e430fb9e85622dcd1d425534857fe37dff600da9..dba1e3b56951f8ae1c484ace22187a9b65053d35 100644 (file)
--- a/phylotree.cpp
+++ b/phylotree.cpp
@@ -20,6 +20,7 @@ PhyloTree::PhyloTree(){
                 tree[0].heirarchyID = "0";
                 maxLevel = 0;
                 calcTotals = true;
+               addSeqToTree("unknown", "unknown;");
         }
         catch(exception& e) {
                 m->errorOut(e, "PhyloTree", "PhyloTree");
@@ -127,6 +128,7 @@ PhyloTree::PhyloTree(string tfile){
                 maxLevel = 0;
                 calcTotals = true;
                 string name, tax;
+               addSeqToTree("unknown", "unknown;");
  
                 
                 #ifdef USE_MPI
@@ -232,7 +234,6 @@ string PhyloTree::getNextTaxon(string& heirarchy, string seqname){
  
  int PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){
         try {
-                       
                 numSeqs++;
                 
                 map<string, int>::iterator childPointer;
@@ -375,7 +376,7 @@ void PhyloTree::binUnclassified(string file){
                 map<string, int>::iterator childPointer;
                 
                 vector<TaxNode> copy = tree;
-                       
+               
                 //fill out tree
                 fillOutTree(0, copy);
         
@@ -484,16 +485,16 @@ string PhyloTree::getFullTaxonomy(string seqName) {
  
  void PhyloTree::print(ofstream& out, vector<TaxNode>& copy){
         try {
-       
+               
                 //output mothur version
                 out << "#" << m->getVersion() << endl;
                 
                 out << copy.size() << endl;
                 
                 out << maxLevel << endl;
-               
+                               
                 for (int i = 0; i < copy.size(); i++) {
-       
+                               
                         out << copy[i].level << '\t'<< copy[i].name << '\t' << copy[i].children.size() << '\t';
                         
                         map<string,int>::iterator it;
@@ -606,6 +607,7 @@ bool PhyloTree::ErrorCheck(vector<string> templateFileNames){
         try {
         
                 bool okay = true;
+               templateFileNames.push_back("unknown");
                 
                 map<string, int>::iterator itFind;
                 map<string, int> taxonomyFileNames = name2Taxonomy;
diff --git a/phylotypecommand.cpp b/phylotypecommand.cpp

index 00f960bbbab8d07ecbee08dc01b78907c60c0185..1d6c391de69c83a86971873577a0df0b21efdbcd 100644 (file)
--- a/phylotypecommand.cpp
+++ b/phylotypecommand.cpp
@@ -128,11 +128,11 @@ PhylotypeCommand::PhylotypeCommand(string option)  {
                                         m->mothurOut("No valid current files. taxonomy is a required parameter."); m->mothurOutEndLine(); 
                                         abort = true; 
                                 }
-                       }else if (taxonomyFileName == "not open") { abort = true; }     
+                       }else if (taxonomyFileName == "not open") { taxonomyFileName = ""; abort = true; }      
                         else { m->setTaxonomyFile(taxonomyFileName); }
                         
                         namefile = validParameter.validFile(parameters, "name", true);
-                       if (namefile == "not open") { abort = true; }
+                       if (namefile == "not open") { namefile = ""; abort = true; }
                         else if (namefile == "not found") { namefile = ""; }
                         else { readNamesFile(); m->setNameFile(namefile); }     
                         
@@ -153,6 +153,11 @@ PhylotypeCommand::PhylotypeCommand(string option)  {
                                 else { allLines = 1;  }
                         }
                         
+                       if (namefile == "") {
+                               vector<string> files; files.push_back(taxonomyFileName);
+                               parser.getNameFile(files);
+                       }
+                       
                 }
         }
         catch(exception& e) {
diff --git a/preclustercommand.cpp b/preclustercommand.cpp

index 21de0344e7c856bb83105abe9b51cf8fe5b4a8ec..582da493e4dc5cdf5267d65606181d1594f56a87 100644 (file)
--- a/preclustercommand.cpp
+++ b/preclustercommand.cpp
@@ -147,7 +147,7 @@ PreClusterCommand::PreClusterCommand(string option) {
                         // ...at some point should added some additional type checking...
                         namefile = validParameter.validFile(parameters, "name", true);
                         if (namefile == "not found") { namefile =  "";  }
-                       else if (namefile == "not open") { abort = true; }      
+                       else if (namefile == "not open") { namefile = ""; abort = true; }       
                         else {  m->setNameFile(namefile); }
                         
                         groupfile = validParameter.validFile(parameters, "group", true);
@@ -162,7 +162,10 @@ PreClusterCommand::PreClusterCommand(string option) {
                         m->setProcessors(temp);
                         m->mothurConvert(temp, processors);
                         
-                       
+                       if (namefile == "") {
+                               vector<string> files; files.push_back(fastafile);
+                               parser.getNameFile(files);
+                       }
                 }
                                 
         }
diff --git a/referencedb.cpp b/referencedb.cpp

index fb930efcfd881561f27a5889f9da65aaaebd5f02..36400c1324380cda8e5570c028ac48aa4beaa2c9 100644 (file)
--- a/referencedb.cpp
+++ b/referencedb.cpp
@@ -22,6 +22,7 @@ void ReferenceDB::clearMemory()  {
         setSavedReference("");
         for(int i = 0; i < wordGenusProb.size(); i++) { wordGenusProb[i].clear(); }
         wordGenusProb.clear();
+       WordPairDiffArr.clear();
         setSavedTaxonomy("");
  }
  /*******************************************************
diff --git a/referencedb.h b/referencedb.h

index 2f292472cd35da2161099438c3f6ff79418039d1..5262e80d010a29da6896ada17bcfd7091dce7fec 100644 (file)
--- a/referencedb.h
+++ b/referencedb.h
@@ -26,6 +26,7 @@ class ReferenceDB {
                 bool save;
                 vector<Sequence> referenceSeqs;
                 vector< vector<float> > wordGenusProb;
+               vector<diffPair> WordPairDiffArr;
         
                 string getSavedReference()                      { return referencefile;         }
                 void setSavedReference(string p)        { referencefile = p;            }
diff --git a/removegroupscommand.cpp b/removegroupscommand.cpp

index e5a3bb2edc7fc97dacb89393ef3a2968b64247b4..f35da2e46588829480a5540cdcd1148fd54e31bf 100644 (file)
--- a/removegroupscommand.cpp
+++ b/removegroupscommand.cpp
@@ -16,12 +16,12 @@
  //**********************************************************************************************************************
  vector<string> RemoveGroupsCommand::setParameters(){   
         try {
-               CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
-               CommandParameter pshared("shared", "InputTypes", "", "", "none", "FNGLT-sharedGroup", "none",false,false); parameters.push_back(pshared);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname);
-               CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT-sharedGroup", "none",false,false); parameters.push_back(pgroup);
-               CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
-               CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(ptaxonomy);
+               CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(pfasta);
+               CommandParameter pshared("shared", "InputTypes", "", "", "none", "sharedGroup", "none",false,false); parameters.push_back(pshared);
+               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+               CommandParameter pgroup("group", "InputTypes", "", "", "none", "sharedGroup", "FNGLT",false,false); parameters.push_back(pgroup);
+               CommandParameter plist("list", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(plist);
+               CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(ptaxonomy);
                 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(paccnos);
                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
@@ -253,6 +253,12 @@ RemoveGroupsCommand::RemoveGroupsCommand(string option)  {
                         
                         if ((fastafile == "") && (namefile == "") && (groupfile == "")  && (sharedfile == "") && (listfile == "") && (taxfile == ""))  { m->mothurOut("You must provide at least one of the following: fasta, name, taxonomy, group, shared or list."); m->mothurOutEndLine(); abort = true; }
                         if ((groupfile == "") && ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != "")))  { m->mothurOut("If using a fasta, name, taxonomy, group or list, then you must provide a group file."); m->mothurOutEndLine(); abort = true; }
+                       
+                       if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
+                               vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
+                               parser.getNameFile(files);
+                       }
+               
                 }
                 
         }
diff --git a/removelineagecommand.cpp b/removelineagecommand.cpp

index f55158fc259ace27c87d5a4fcca10e7c55539f13..9fec2496068950b4863448fe500b17cbc957670a 100644 (file)
--- a/removelineagecommand.cpp
+++ b/removelineagecommand.cpp
@@ -169,12 +169,12 @@ RemoveLineageCommand::RemoveLineageCommand(string option)  {
                         
                         //check for required parameters                 
                         fastafile = validParameter.validFile(parameters, "fasta", true);
-                       if (fastafile == "not open") { abort = true; }
+                       if (fastafile == "not open") { fastafile = ""; abort = true; }
                         else if (fastafile == "not found") {  fastafile = "";  }        
                         else { m->setFastaFile(fastafile); }
                         
                         namefile = validParameter.validFile(parameters, "name", true);
-                       if (namefile == "not open") { abort = true; }
+                       if (namefile == "not open") { namefile = ""; abort = true; }
                         else if (namefile == "not found") {  namefile = "";  }  
                         else { m->setNameFile(namefile); }
                         
@@ -193,7 +193,7 @@ RemoveLineageCommand::RemoveLineageCommand(string option)  {
                         else { m->setListFile(listfile); }
                         
                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
-                       if (taxfile == "not open") { abort = true; }
+                       if (taxfile == "not open") { taxfile = ""; abort = true; }
                         else if (taxfile == "not found") {              
                                 taxfile = m->getTaxonomyFile(); 
                                 if (taxfile != "") { m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
@@ -220,7 +220,12 @@ RemoveLineageCommand::RemoveLineageCommand(string option)  {
                         if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, alignreport, taxonomy or listfile."); m->mothurOutEndLine(); abort = true; }
                 
                         if ((usedDups != "") && (namefile == "")) {  m->mothurOut("You may only use dups with the name option."); m->mothurOutEndLine();  abort = true; }                       
-
+                       
+                       if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
+                               vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
+                               parser.getNameFile(files);
+                       }
+                       
                 }
  
         }
diff --git a/removeseqscommand.cpp b/removeseqscommand.cpp

index 59c9b348ba1af9b719f35ae12a249265c7fe9662..2abe9ef78a8c76354d377087fa7a23deb27b4d25 100644 (file)
--- a/removeseqscommand.cpp
+++ b/removeseqscommand.cpp
@@ -194,12 +194,12 @@ RemoveSeqsCommand::RemoveSeqsCommand(string option)  {
                         }else { m->setAccnosFile(accnosfile); } 
                         
                         fastafile = validParameter.validFile(parameters, "fasta", true);
-                       if (fastafile == "not open") { abort = true; }
+                       if (fastafile == "not open") { fastafile = ""; abort = true; }
                         else if (fastafile == "not found") {  fastafile = "";  }        
                         else { m->setFastaFile(fastafile); }
                                                                    
                         namefile = validParameter.validFile(parameters, "name", true);
-                       if (namefile == "not open") { abort = true; }
+                       if (namefile == "not open") { namefile = ""; abort = true; }
                         else if (namefile == "not found") {  namefile = "";  }  
                         else { m->setNameFile(namefile); } 
                                                                    
@@ -237,6 +237,10 @@ RemoveSeqsCommand::RemoveSeqsCommand(string option)  {
                         
                         if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == ""))  { m->mothurOut("You must provide at least one of the following: fasta, name, group, taxonomy, quality, alignreport or list."); m->mothurOutEndLine(); abort = true; }
                         
+                       if ((fastafile != "") && (namefile == "")) {
+                               vector<string> files; files.push_back(fastafile);
+                               parser.getNameFile(files);
+                       }
                 }
  
         }
diff --git a/screenseqscommand.cpp b/screenseqscommand.cpp

index 16618f74cc8e251c7d28d10ca37fcd2e7b09d017..41067396c6f5333faeaf3346afd9b865ad351531 100644 (file)
--- a/screenseqscommand.cpp
+++ b/screenseqscommand.cpp
@@ -260,6 +260,11 @@ ScreenSeqsCommand::ScreenSeqsCommand(string option)  {
                         
                         temp = validParameter.validFile(parameters, "criteria", false); if (temp == "not found"){       temp = "90";                            }
                         m->mothurConvert(temp, criteria); 
+                       
+                       if (namefile == "") {
+                               vector<string> files; files.push_back(fastafile);
+                               parser.getNameFile(files);
+                       }
                 }
  
         }
diff --git a/secondarystructurecommand.cpp b/secondarystructurecommand.cpp

index fe80b7810e716264152835060f25d1f2e45c1fed..24142e0cf4aa6174a8006f1e0a7a85c82f10ab24 100644 (file)
--- a/secondarystructurecommand.cpp
+++ b/secondarystructurecommand.cpp
@@ -122,7 +122,7 @@ AlignCheckCommand::AlignCheckCommand(string option)  {
                         else if (mapfile == "not found") {  mapfile = "";  m->mothurOut("You must provide an map file."); m->mothurOutEndLine(); abort = true; }        
                         
                         fastafile = validParameter.validFile(parameters, "fasta", true);
-                       if (fastafile == "not open") { abort = true; }
+                       if (fastafile == "not open") { fastafile = ""; abort = true; }
                         else if (fastafile == "not found") {                            
                                 fastafile = m->getFastaFile(); 
                                 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
@@ -139,7 +139,11 @@ AlignCheckCommand::AlignCheckCommand(string option)  {
                                 outputDir = ""; 
                                 outputDir += m->hasPath(fastafile); //if user entered a file with a path then preserve it       
                         }
-
+                       
+                       if ((namefile == "") && (fastafile != "")){
+                               vector<string> files; files.push_back(fastafile); 
+                               parser.getNameFile(files);
+                       }
                 }
  
         }
diff --git a/seqerrorcommand.cpp b/seqerrorcommand.cpp

index 9ac7c5bcef6fee9c8e01b7838a3560f3478c5fcc..d8ebe50fb11859367256a28286dec9789cc70d36 100644 (file)
--- a/seqerrorcommand.cpp
+++ b/seqerrorcommand.cpp
@@ -176,7 +176,7 @@ SeqErrorCommand::SeqErrorCommand(string option)  {
                                 if (queryFileName != "") { m->mothurOut("Using " + queryFileName + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
                                 else {  m->mothurOut("You have no current fasta file and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
                         }
-                       else if (queryFileName == "not open") { abort = true; } 
+                       else if (queryFileName == "not open") { queryFileName = ""; abort = true; }     
                         else { m->setFastaFile(queryFileName); }
                         
                         referenceFileName = validParameter.validFile(parameters, "reference", true);
@@ -246,6 +246,11 @@ SeqErrorCommand::SeqErrorCommand(string option)  {
  
                         substitutionMatrix.resize(6);
                         for(int i=0;i<6;i++){   substitutionMatrix[i].resize(6,0);      }
+                       
+                       if ((namesFileName == "") && (queryFileName != "")){
+                               vector<string> files; files.push_back(queryFileName); 
+                               parser.getNameFile(files);
+                       }
                 }
         }
         catch(exception& e) {
diff --git a/seqsummarycommand.cpp b/seqsummarycommand.cpp

index 93950d1f6222c10af735808d87130213b6664017..e8f73ca10875f8321f624adfb62ee02f7baf2770 100644 (file)
--- a/seqsummarycommand.cpp
+++ b/seqsummarycommand.cpp
@@ -132,8 +132,12 @@ SeqSummaryCommand::SeqSummaryCommand(string option)  {
                         string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = m->getProcessors();      }
                         m->setProcessors(temp);
                         m->mothurConvert(temp, processors);
-
-
+                       
+                       if (namefile == "") {
+                               vector<string> files; files.push_back(fastafile);
+                               parser.getNameFile(files);
+                       }
+                       
                 }
         }
         catch(exception& e) {
diff --git a/sequence.cpp b/sequence.cpp

index 162d3be9e69d0f86f7e4f2122c3c914b613392c3..090ee14c1c838098aaaeb1a831ec01e3272ed512 100644 (file)
--- a/sequence.cpp
+++ b/sequence.cpp
@@ -604,7 +604,6 @@ void Sequence::padFromPos(int end){
  bool Sequence::getIsAligned(){
         return isAligned;
  }
-
  //********************************************************************************************************************
  
  void Sequence::reverseComplement(){
diff --git a/setdircommand.cpp b/setdircommand.cpp

index 17bad60d2f358b41d4dce9eeafc6a155ad069df7..67a1f59f8388dbfec55ae7b552f02640b5d6b93f 100644 (file)
--- a/setdircommand.cpp
+++ b/setdircommand.cpp
@@ -202,8 +202,19 @@ int SetDirectoryCommand::execute(){
                                 if (lastChar != "\\") { tempdefault += "\\"; }  
                         #endif
                         
-                       m->mothurOut("tempDefault=" + tempdefault); m->mothurOutEndLine();  
-                       m->setDefaultPath(tempdefault);
+                       //test to make sure directory exists
+                       tempdefault = m->getFullPathName(tempdefault);
+                       string inTemp = tempdefault + tag + "temp";
+                       ofstream in;
+                       in.open(inTemp.c_str(), ios::trunc);
+                       if(!in) {
+                               m->mothurOut(tempdefault + " directory does not exist or is not writable."); m->mothurOutEndLine(); 
+                       }else{
+                               in.close();
+                               m->mothurRemove(inTemp);
+                               m->mothurOut("tempDefault=" + tempdefault); m->mothurOutEndLine();  
+                               m->setDefaultPath(tempdefault);
+                       }
                 }
  
                 return 0;
diff --git a/shhhercommand.cpp b/shhhercommand.cpp

index 40a4c8499a599207b56831c91e6f0f41c023eb2c..537821134edebcf9dcad70f8c1f38be0da39b121 100644 (file)
--- a/shhhercommand.cpp
+++ b/shhhercommand.cpp
@@ -23,7 +23,7 @@ vector<string> ShhherCommand::setParameters(){
         try {
                 CommandParameter pflow("flow", "InputTypes", "", "", "none", "fileflow", "none",false,false); parameters.push_back(pflow);
                 CommandParameter pfile("file", "InputTypes", "", "", "none", "fileflow", "none",false,false); parameters.push_back(pfile);
-               CommandParameter plookup("lookup", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plookup);
+               CommandParameter plookup("lookup", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(plookup);
                 CommandParameter pcutoff("cutoff", "Number", "", "0.01", "", "", "",false,false); parameters.push_back(pcutoff);
                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
                 CommandParameter pmaxiter("maxiter", "Number", "", "1000", "", "", "",false,false); parameters.push_back(pmaxiter);
@@ -2152,7 +2152,7 @@ void ShhherCommand::writeQualities(vector<int> otuCounts){
         try {
                 string thisOutputDir = outputDir;
                 if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
-               string qualityFileName = thisOutputDir + flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.qual";
+               string qualityFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + ".shhh.qual";
  
                 ofstream qualityFile;
                 m->openOutputFile(qualityFileName, qualityFile);
@@ -2259,7 +2259,7 @@ void ShhherCommand::writeSequences(vector<int> otuCounts){
         try {
                 string thisOutputDir = outputDir;
                 if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
-               string fastaFileName = thisOutputDir + flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.fasta";
+               string fastaFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + ".shhh.fasta";
                 ofstream fastaFile;
                 m->openOutputFile(fastaFileName, fastaFile);
                 
@@ -2307,7 +2307,7 @@ void ShhherCommand::writeNames(vector<int> otuCounts){
         try {
                 string thisOutputDir = outputDir;
                 if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
-               string nameFileName = thisOutputDir + flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.names";
+               string nameFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + ".shhh.names";
                 ofstream nameFile;
                 m->openOutputFile(nameFileName, nameFile);
                 
@@ -2345,7 +2345,7 @@ void ShhherCommand::writeGroups(){
         try {
                 string thisOutputDir = outputDir;
                 if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
-               string fileRoot = thisOutputDir + flowFileName.substr(0,flowFileName.find_last_of('.'));
+               string fileRoot = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName));
                 string groupFileName = fileRoot + ".shhh.groups";
                 ofstream groupFile;
                 m->openOutputFile(groupFileName, groupFile);
@@ -2370,7 +2370,7 @@ void ShhherCommand::writeClusters(vector<int> otuCounts){
         try {
                 string thisOutputDir = outputDir;
                 if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
-               string otuCountsFileName = thisOutputDir + flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.counts";
+               string otuCountsFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + ".shhh.counts";
                 ofstream otuCountsFile;
                 m->openOutputFile(otuCountsFileName, otuCountsFile);
                 
diff --git a/shhhseqscommand.cpp b/shhhseqscommand.cpp

index 6a339b880eb54eedaa7be7af3c297ed0089beb10..518e78d6386521dcb4ef57700c1ffb903ae02a13 100644 (file)
--- a/shhhseqscommand.cpp
+++ b/shhhseqscommand.cpp
@@ -165,6 +165,11 @@ ShhhSeqsCommand::ShhhSeqsCommand(string option) {
                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
                         m->setProcessors(temp);
                         m->mothurConvert(temp, processors);
+                       
+                       if (namefile == "") {
+                               vector<string> files; files.push_back(fastafile);
+                               parser.getNameFile(files);
+                       }
                 }
         }
         catch(exception& e) {
diff --git a/splitabundcommand.cpp b/splitabundcommand.cpp

index 989ef596168ff1894009fd8c625edc881197ef87..8131fa9195f6b8d939b27891605dd862d618964b 100644 (file)
--- a/splitabundcommand.cpp
+++ b/splitabundcommand.cpp
@@ -216,7 +216,6 @@ SplitAbundCommand::SplitAbundCommand(string option)  {
                         m->mothurConvert(temp, cutoff); 
  
                         if (cutoff == 0) {  m->mothurOut("You must provide a cutoff to qualify what is abundant for the split.abund command. "); m->mothurOutEndLine(); abort = true;  }
-
                 }
  
         }
diff --git a/splitgroupscommand.cpp b/splitgroupscommand.cpp

index 113439c9220429f4d9466a72a579c5aa2caa10ae..26716554decad148623563d4b120148325b7477a 100644 (file)
--- a/splitgroupscommand.cpp
+++ b/splitgroupscommand.cpp
@@ -125,7 +125,7 @@ SplitGroupCommand::SplitGroupCommand(string option)  {
  
                         
                         namefile = validParameter.validFile(parameters, "name", true);
-                       if (namefile == "not open") { abort = true; }
+                       if (namefile == "not open") { namefile = ""; abort = true; }
                         else if (namefile == "not found") { namefile = ""; }    
                         else { m->setNameFile(namefile); }
                 
@@ -151,6 +151,11 @@ SplitGroupCommand::SplitGroupCommand(string option)  {
                                                 
                         //if the user changes the output directory command factory will send this info to us in the output parameter 
                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(groupfile);      }
+                       
+                       if (namefile == "") {
+                               vector<string> files; files.push_back(fastafile);
+                               parser.getNameFile(files);
+                       }
                 }
  
         }
diff --git a/subsamplecommand.cpp b/subsamplecommand.cpp

index fe9e93423969796187dd59452f7e3a3496d4ee57..c352feb099a83879bae9e1aca0f35d5bfe56e691 100644 (file)
--- a/subsamplecommand.cpp
+++ b/subsamplecommand.cpp
@@ -255,6 +255,10 @@ SubSampleCommand::SubSampleCommand(string option) {
                         if ((groupfile != "") && ((fastafile != "") && (listfile != ""))) { 
                                 m->mothurOut("A new group file can only be made from the subsample of a listfile or fastafile, not both. Please correct."); m->mothurOutEndLine(); abort = true; }
                         
+                       if ((fastafile != "") && (namefile == "")) {
+                               vector<string> files; files.push_back(fastafile);
+                               parser.getNameFile(files);
+                       }
                 }
  
         }
diff --git a/summaryqualcommand.cpp b/summaryqualcommand.cpp

index 421a8f2ad36c64e218aa68a4eb6fd8dc651362d0..2a40e0862ba0b0444db035246cdcb95409fe4923 100644 (file)
--- a/summaryqualcommand.cpp
+++ b/summaryqualcommand.cpp
@@ -111,7 +111,7 @@ SummaryQualCommand::SummaryQualCommand(string option)  {
                         
                         //check for required parameters
                         qualfile = validParameter.validFile(parameters, "qfile", true);
-                       if (qualfile == "not open") { abort = true; }
+                       if (qualfile == "not open") { qualfile = ""; abort = true; }
                         else if (qualfile == "not found") {                             
                                 qualfile = m->getQualFile(); 
                                 if (qualfile != "") { m->mothurOut("Using " + qualfile + " as input file for the qfile parameter."); m->mothurOutEndLine(); }
@@ -131,7 +131,12 @@ SummaryQualCommand::SummaryQualCommand(string option)  {
                         
                         string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = m->getProcessors();      }
                         m->setProcessors(temp);
-                       m->mothurConvert(temp, processors);                     
+                       m->mothurConvert(temp, processors);     
+                       
+                       if (namefile == "") {
+                               vector<string> files; files.push_back(qualfile);
+                               parser.getNameFile(files);
+                       }
                 }
         }
         catch(exception& e) {
diff --git a/summarytaxcommand.cpp b/summarytaxcommand.cpp

index a19986aba92ffeae80d3786c2322dc3d7d5bb461..ffa304102b95570669e4e977a9f4859b18c24c71 100644 (file)
--- a/summarytaxcommand.cpp
+++ b/summarytaxcommand.cpp
@@ -157,6 +157,12 @@ SummaryTaxCommand::SummaryTaxCommand(string option)  {
                                 outputDir = ""; 
                                 outputDir += m->hasPath(taxfile); //if user entered a file with a path then preserve it 
                         }
+                       
+                       if (namefile == "") {
+                               vector<string> files; files.push_back(taxfile);
+                               parser.getNameFile(files);
+                       }
+                       
                 }
         }
         catch(exception& e) {
diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp

index 213e241ccfc6ee8feeb6c34ed9b589343c7c71d1..dd3427b9a9733285e5e793f78cabe5bdc3ed1cf3 100644 (file)
--- a/trimseqscommand.cpp
+++ b/trimseqscommand.cpp
@@ -292,6 +292,11 @@ TrimSeqsCommand::TrimSeqsCommand(string option)  {
                                 m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
                                 abort = true;
                         }
+                       
+                       if (nameFile == "") {
+                               vector<string> files; files.push_back(fastaFile);
+                               parser.getNameFile(files);
+                       }
                 }
  
         }
diff --git a/unifracunweightedcommand.cpp b/unifracunweightedcommand.cpp

index 047d06e27fadff92f647f75875f038f4bf8b04fd..a404f79222ac83cf41d42c04ceb614df1e01dceb 100644 (file)
--- a/unifracunweightedcommand.cpp
+++ b/unifracunweightedcommand.cpp
@@ -155,7 +155,7 @@ UnifracUnweightedCommand::UnifracUnweightedCommand(string option)  {
                         else { m->setGroupFile(groupfile); }
                         
                         namefile = validParameter.validFile(parameters, "name", true);
-                       if (namefile == "not open") { abort = true; }
+                       if (namefile == "not open") { namefile = ""; abort = true; }
                         else if (namefile == "not found") { namefile = ""; }
                         else { m->setNameFile(namefile); }
                         
@@ -198,6 +198,11 @@ UnifracUnweightedCommand::UnifracUnweightedCommand(string option)  {
                                 m->splitAtDash(groups, Groups);
                                 m->setGroups(Groups);
                         }
+                       
+                       if (namefile == "") {
+                               vector<string> files; files.push_back(treefile);
+                               parser.getNameFile(files);
+                       }
                 }
                 
         }
diff --git a/unifracweightedcommand.cpp b/unifracweightedcommand.cpp

index b007db1f6eddc2d8e6119add616803b1d58526a2..f9dc450444b242555710f91dc3567f96e69e6d3d 100644 (file)
--- a/unifracweightedcommand.cpp
+++ b/unifracweightedcommand.cpp
@@ -141,7 +141,7 @@ UnifracWeightedCommand::UnifracWeightedCommand(string option) {
                         
                         //check for required parameters
                         treefile = validParameter.validFile(parameters, "tree", true);
-                       if (treefile == "not open") { abort = true; }
+                       if (treefile == "not open") { treefile = ""; abort = true; }
                         else if (treefile == "not found") {                             //if there is a current design file, use it
                                 treefile = m->getTreeFile(); 
                                 if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
@@ -155,7 +155,7 @@ UnifracWeightedCommand::UnifracWeightedCommand(string option) {
                         else { m->setGroupFile(groupfile); }
                         
                         namefile = validParameter.validFile(parameters, "name", true);
-                       if (namefile == "not open") { abort = true; }
+                       if (namefile == "not open") { namefile = ""; abort = true; }
                         else if (namefile == "not found") { namefile = ""; }
                         else { m->setNameFile(namefile); }
                         
@@ -192,6 +192,11 @@ UnifracWeightedCommand::UnifracWeightedCommand(string option) {
                         m->mothurConvert(temp, processors);
                         
                         if (!random) {  iters = 0;  } //turn off random calcs
+                       
+                       if (namefile == "") {
+                               vector<string> files; files.push_back(treefile);
+                               parser.getNameFile(files);
+                       }
                 }
author	westcott <westcott>
	Thu, 19 Jan 2012 12:44:14 +0000 (12:44 +0000)
committer	westcott <westcott>
	Thu, 19 Jan 2012 12:44:14 +0000 (12:44 +0000)
Mothur.xcodeproj/project.pbxproj		patch \| blob \| history
bayesian.cpp		patch \| blob \| history
bayesian.h		patch \| blob \| history
binsequencecommand.cpp		patch \| blob \| history
classify.cpp		patch \| blob \| history
classify.h		patch \| blob \| history
classifyotucommand.cpp		patch \| blob \| history
classifyseqscommand.cpp		patch \| blob \| history
classifyseqscommand.h		patch \| blob \| history
clusterfragmentscommand.cpp		patch \| blob \| history
clustersplitcommand.cpp		patch \| blob \| history
consensusseqscommand.cpp		patch \| blob \| history
database.hpp		patch \| blob \| history
deconvolutecommand.cpp		patch \| blob \| history
getgroupscommand.cpp		patch \| blob \| history
getlineagecommand.cpp		patch \| blob \| history
getseqscommand.cpp		patch \| blob \| history
groupmap.h		patch \| blob \| history
kmer.cpp		patch \| blob \| history
kmer.hpp		patch \| blob \| history
kmerdb.cpp		patch \| blob \| history
kmerdb.hpp		patch \| blob \| history
knn.cpp		patch \| blob \| history
makefile		patch \| blob \| history
memchi2.cpp		patch \| blob \| history
mergegroupscommand.cpp		patch \| blob \| history
mergegroupscommand.h		patch \| blob \| history
mothur.h		patch \| blob \| history
mothurout.cpp		patch \| blob \| history
optionparser.cpp		patch \| blob \| history
optionparser.h		patch \| blob \| history
parsefastaqcommand.cpp		patch \| blob \| history
parsefastaqcommand.h		patch \| blob \| history
parsimonycommand.cpp		patch \| blob \| history
phylodiversitycommand.cpp		patch \| blob \| history
phylosummary.cpp		patch \| blob \| history
phylotree.cpp		patch \| blob \| history
phylotypecommand.cpp		patch \| blob \| history
preclustercommand.cpp		patch \| blob \| history
referencedb.cpp		patch \| blob \| history
referencedb.h		patch \| blob \| history
removegroupscommand.cpp		patch \| blob \| history
removelineagecommand.cpp		patch \| blob \| history
removeseqscommand.cpp		patch \| blob \| history
screenseqscommand.cpp		patch \| blob \| history
secondarystructurecommand.cpp		patch \| blob \| history
seqerrorcommand.cpp		patch \| blob \| history
seqsummarycommand.cpp		patch \| blob \| history
sequence.cpp		patch \| blob \| history
setdircommand.cpp		patch \| blob \| history
shhhercommand.cpp		patch \| blob \| history
shhhseqscommand.cpp		patch \| blob \| history
splitabundcommand.cpp		patch \| blob \| history
splitgroupscommand.cpp		patch \| blob \| history
subsamplecommand.cpp		patch \| blob \| history
summaryqualcommand.cpp		patch \| blob \| history
summarytaxcommand.cpp		patch \| blob \| history
trimseqscommand.cpp		patch \| blob \| history
unifracunweightedcommand.cpp		patch \| blob \| history
unifracweightedcommand.cpp		patch \| blob \| history