]> git.donarmstrong.com Git - mothur.git/commitdiff
fixed bug with shhh.flow from file path name in write functions, added "smart" featur...
authorwestcott <westcott>
Thu, 19 Jan 2012 12:44:14 +0000 (12:44 +0000)
committerwestcott <westcott>
Thu, 19 Jan 2012 12:44:14 +0000 (12:44 +0000)
60 files changed:
Mothur.xcodeproj/project.pbxproj
bayesian.cpp
bayesian.h
binsequencecommand.cpp
classify.cpp
classify.h
classifyotucommand.cpp
classifyseqscommand.cpp
classifyseqscommand.h
clusterfragmentscommand.cpp
clustersplitcommand.cpp
consensusseqscommand.cpp
database.hpp
deconvolutecommand.cpp
getgroupscommand.cpp
getlineagecommand.cpp
getseqscommand.cpp
groupmap.h
kmer.cpp
kmer.hpp
kmerdb.cpp
kmerdb.hpp
knn.cpp
makefile
memchi2.cpp
mergegroupscommand.cpp
mergegroupscommand.h
mothur.h
mothurout.cpp
optionparser.cpp
optionparser.h
parsefastaqcommand.cpp
parsefastaqcommand.h
parsimonycommand.cpp
phylodiversitycommand.cpp
phylosummary.cpp
phylotree.cpp
phylotypecommand.cpp
preclustercommand.cpp
referencedb.cpp
referencedb.h
removegroupscommand.cpp
removelineagecommand.cpp
removeseqscommand.cpp
screenseqscommand.cpp
secondarystructurecommand.cpp
seqerrorcommand.cpp
seqsummarycommand.cpp
sequence.cpp
setdircommand.cpp
shhhercommand.cpp
shhhseqscommand.cpp
splitabundcommand.cpp
splitgroupscommand.cpp
subsamplecommand.cpp
summaryqualcommand.cpp
summarytaxcommand.cpp
trimseqscommand.cpp
unifracunweightedcommand.cpp
unifracweightedcommand.cpp

index c7c7cee1ad0c4a9ad5b1e9bfb02ba09c6db0b3fb..99454a9b47e4150e39ab5d35da76ec5e93ff5244 100644 (file)
                                GCC_OPTIMIZATION_LEVEL = 3;
                                GCC_PREPROCESSOR_DEFINITIONS = (
                                        "MOTHUR_FILES=\"\\\"../release\\\"\"",
-                                       "VERSION=\"\\\"1.22.0\\\"\"",
-                                       "RELEASE_DATE=\"\\\"10/12/2011\\\"\"",
+                                       "VERSION=\"\\\"1.23.0\\\"\"",
+                                       "RELEASE_DATE=\"\\\"1/9/2012\\\"\"",
                                );
                                "GCC_VERSION[arch=*]" = "";
                                GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
index b1f2c4cf422656bb603f1691b67b79b57305490e..f7ea6e4351868a20a191169b995e94faff6fa053 100644 (file)
 #include "phylosummary.h"
 #include "referencedb.h"
 /**************************************************************************************************/
-Bayesian::Bayesian(string tfile, string tempFile, string method, int ksize, int cutoff, int i, int tid) : 
-Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i)  {
+Bayesian::Bayesian(string tfile, string tempFile, string method, int ksize, int cutoff, int i, int tid, bool f) : 
+Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i) {
        try {
                ReferenceDB* rdb = ReferenceDB::getInstance();
                
                threadID = tid;
+               flip = f;
                string baseName = tempFile;
                        
                if (baseName == "saved") { baseName = rdb->getSavedReference(); }
@@ -78,13 +79,14 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i)  {
                        if (tfile == "saved") { 
                                m->mothurOutEndLine();  m->mothurOut("Using probabilties from " + rdb->getSavedTaxonomy() + " that are saved in memory...    ");        cout.flush();; 
                                wordGenusProb = rdb->wordGenusProb;
+                               WordPairDiffArr = rdb->WordPairDiffArr;
                        }else {
                                m->mothurOut("Reading template probabilities...     "); cout.flush();
                                readProbFile(probFileTest, probFileTest2, probFileName, probFileName2);
                        }       
                        
                        //save probabilities
-                       if (rdb->save) { rdb->wordGenusProb = wordGenusProb; }
+                       if (rdb->save) { rdb->wordGenusProb = wordGenusProb; rdb->WordPairDiffArr = WordPairDiffArr; }
                }else{
                
                        //create search database and names vector
@@ -108,11 +110,12 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i)  {
                        
                                //initialze probabilities
                                wordGenusProb.resize(numKmers);
+                               WordPairDiffArr.resize(numKmers);
                        //cout << numKmers << '\t' << genusNodes.size() << endl;
                                for (int j = 0; j < wordGenusProb.size(); j++) {        wordGenusProb[j].resize(genusNodes.size());             }
                        //cout << numKmers << '\t' << genusNodes.size() << endl;        
-                               //ofstream out;
-                               //ofstream out2;
+                               ofstream out;
+                               ofstream out2;
                                
                                #ifdef USE_MPI
                                        int pid;
@@ -122,17 +125,17 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i)  {
                                #endif
 
                                
-                               //m->openOutputFile(probFileName, out);
+                               m->openOutputFile(probFileName, out);
                                
                                //output mothur version
-                               //out << "#" << m->getVersion() << endl;
+                               out << "#" << m->getVersion() << endl;
                                
-                               //out << numKmers << endl;
+                               out << numKmers << endl;
                                
-                               //m->openOutputFile(probFileName2, out2);
+                               m->openOutputFile(probFileName2, out2);
                                
                                //output mothur version
-                               //out2 << "#" << m->getVersion() << endl;
+                               out2 << "#" << m->getVersion() << endl;
                                
                                #ifdef USE_MPI
                                        }
@@ -149,7 +152,7 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i)  {
                                                if (pid == 0) {  
                                        #endif
 
-                                       //out << i << '\t';
+                                       out << i << '\t';
                                        
                                        #ifdef USE_MPI
                                                }
@@ -168,7 +171,9 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i)  {
                                        
                                        //probabilityInTemplate = (# of seqs with that word in template + 0.50) / (total number of seqs in template + 1);
                                        float probabilityInTemplate = (seqsWithWordi.size() + 0.50) / (float) (names.size() + 1);
-                                       
+                                       diffPair tempProb(log(probabilityInTemplate), 0.0);
+                                       WordPairDiffArr[i] = tempProb;
+                                               
                                        int numNotZero = 0;
                                        for (int k = 0; k < genusNodes.size(); k++) {
                                                //probabilityInThisTaxonomy = (# of seqs with that word in this taxonomy + probabilityInTemplate) / (total number of seqs in this taxonomy + 1);
@@ -184,7 +189,7 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i)  {
                                                                if (pid == 0) {  
                                                        #endif
 
-                                                       //out << k << '\t' << wordGenusProb[i][k] << '\t'
+                                                       out << k << '\t' << wordGenusProb[i][k] << '\t' 
                                                        
                                                        #ifdef USE_MPI
                                                                }
@@ -200,8 +205,8 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i)  {
                                                if (pid == 0) {  
                                        #endif
                                        
-                                       //out << endl;
-                                       //out2 << probabilityInTemplate << '\t' << numNotZero << endl;
+                                       out << endl;
+                                       out2 << probabilityInTemplate << '\t' << numNotZero << '\t' << log(probabilityInTemplate) << endl;
                                        
                                        #ifdef USE_MPI
                                                }
@@ -214,8 +219,8 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i)  {
                                        if (pid == 0) {  
                                #endif
                                
-                               //out.close();
-                               //out2.close();
+                               out.close();
+                               out2.close();
                                
                                #ifdef USE_MPI
                                        }
@@ -228,10 +233,15 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i)  {
                                phyloTree = new PhyloTree(phyloTreeTest, phyloTreeName);
                                
                                //save probabilities
-                               if (rdb->save) { rdb->wordGenusProb = wordGenusProb; }
+                               if (rdb->save) { rdb->wordGenusProb = wordGenusProb; rdb->WordPairDiffArr = WordPairDiffArr; }
                        }
                }
-       
+               
+               generateWordPairDiffArr();
+               
+               //save probabilities
+               if (rdb->save) { rdb->wordGenusProb = wordGenusProb; rdb->WordPairDiffArr = WordPairDiffArr; }
+               
                m->mothurOut("DONE."); m->mothurOutEndLine();
                m->mothurOut("It took " + toString(time(NULL) - start) + " seconds get probabilities. "); m->mothurOutEndLine();
        }
@@ -258,13 +268,13 @@ string Bayesian::getTaxonomy(Sequence* seq) {
        try {
                string tax = "";
                Kmer kmer(kmerSize);
+               flipped = false;
                
                //get words contained in query
                //getKmerString returns a string where the index in the string is hte kmer number 
                //and the character at that index can be converted to be the number of times that kmer was seen
-               
                string queryKmerString = kmer.getKmerString(seq->getUnaligned()); 
-
+               
                vector<int> queryKmers;
                for (int i = 0; i < queryKmerString.length()-1; i++) {  // the -1 is to ignore any kmer with an N in it
                        if (queryKmerString[i] != '!') { //this kmer is in the query
@@ -272,7 +282,22 @@ string Bayesian::getTaxonomy(Sequence* seq) {
                        }
                }
                
-               if (queryKmers.size() == 0) {  m->mothurOut(seq->getName() + "is bad."); m->mothurOutEndLine(); return "bad seq"; }
+               //if user wants to test reverse compliment and its reversed use that instead
+               if (flip) {     
+                       if (isReversed(queryKmers)) { 
+                               flipped = true;
+                               seq->reverseComplement(); 
+                               queryKmerString = kmer.getKmerString(seq->getUnaligned()); 
+                               queryKmers.clear();
+                               for (int i = 0; i < queryKmerString.length()-1; i++) {  // the -1 is to ignore any kmer with an N in it
+                                       if (queryKmerString[i] != '!') { //this kmer is in the query
+                                               queryKmers.push_back(i);
+                                       }
+                               }
+                       }  
+               }
+               
+               if (queryKmers.size() == 0) {  m->mothurOut(seq->getName() + "is bad."); m->mothurOutEndLine(); simpleTax = "unknown;";  return "unknown;"; }
                
                
                int index = getMostProbableTaxonomy(queryKmers);
@@ -283,7 +308,7 @@ string Bayesian::getTaxonomy(Sequence* seq) {
                int numToSelect = queryKmers.size() / 8;
        
                tax = bootstrapResults(queryKmers, index, numToSelect);
-                               
+               
                return tax;     
        }
        catch(exception& e) {
@@ -366,7 +391,8 @@ string Bayesian::bootstrapResults(vector<int> kmers, int tax, int numToSelect) {
                                seqTax = phyloTree->get(seqTax.parent);
                }
                
-               if (confidenceTax == "") { confidenceTax = "unclassified;"; simpleTax = "unclassified;"; }
+               if (confidenceTax == "") { confidenceTax = "unknown;"; simpleTax = "unknown;";  }
+       
                return confidenceTax;
                
        }
@@ -412,6 +438,46 @@ int Bayesian::getMostProbableTaxonomy(vector<int> queryKmer) {
                exit(1);
        }
 }
+//********************************************************************************************************************
+//if it is more probable that the reverse compliment kmers are in the template, then we assume the sequence is reversed.
+bool Bayesian::isReversed(vector<int>& queryKmers){
+       try{
+               bool reversed = false;
+               float prob = 0;
+               float reverseProb = 0;
+                
+        for (int i = 0; i < queryKmers.size(); i++){
+            int kmer = queryKmers[i];
+            if (kmer >= 0){
+                prob += WordPairDiffArr[kmer].prob;
+                               reverseProb += WordPairDiffArr[kmer].reverseProb;
+            }
+        }
+               
+        if (reverseProb > prob){ reversed = true; }
+       
+               return reversed;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Bayesian", "isReversed");
+               exit(1);
+       }
+}
+//********************************************************************************************************************
+int Bayesian::generateWordPairDiffArr(){
+       try{
+               Kmer kmer(kmerSize);
+               for (int i = 0; i < WordPairDiffArr.size(); i++) {
+                       int reversedWord = kmer.getReverseKmerNumber(i);
+                       WordPairDiffArr[i].reverseProb = WordPairDiffArr[reversedWord].prob;
+               }
+               
+               return 0;
+       }catch(exception& e) {
+               m->errorOut(e, "Bayesian", "generateWordPairDiffArr");
+               exit(1);
+       }
+}
 /*************************************************************************************************
 map<string, int> Bayesian::parseTaxMap(string newTax) {
        try{
@@ -515,7 +581,8 @@ void Bayesian::readProbFile(ifstream& in, ifstream& inNum, string inName, string
                        int kmer, name;  
                        vector<int> numbers; numbers.resize(numKmers);
                        float prob;
-                       vector<float> zeroCountProb; zeroCountProb.resize(numKmers);    
+                       vector<float> zeroCountProb; zeroCountProb.resize(numKmers);
+                       WordPairDiffArr.resize(numKmers);
                        
                        //read version
                        length = positions[1] - positions[0];
@@ -537,7 +604,10 @@ void Bayesian::readProbFile(ifstream& in, ifstream& inNum, string inName, string
                                delete buf4;
 
                                istringstream iss (tempBuf,istringstream::in);
-                               iss >> zeroCountProb[i] >> numbers[i];  
+                               float probTemp;
+                               iss >> zeroCountProb[i] >> numbers[i] >> probTemp; 
+                               WordPairDiffArr[i].prob = tempProb;
+
                        }
                        
                        MPI_File_close(&inMPI);
@@ -585,13 +655,16 @@ void Bayesian::readProbFile(ifstream& in, ifstream& inNum, string inName, string
                        int kmer, name, count;  count = 0;
                        vector<int> num; num.resize(numKmers);
                        float prob;
-                       vector<float> zeroCountProb; zeroCountProb.resize(numKmers);            
+                       vector<float> zeroCountProb; zeroCountProb.resize(numKmers);    
+                       WordPairDiffArr.resize(numKmers);
                        
                        //read version
                        string line2 = m->getline(inNum); m->gobble(inNum);
+                       float probTemp;
                //cout << threadID << '\t' << line2 << '\t' << this << endl;    
                        while (inNum) {
-                               inNum >> zeroCountProb[count] >> num[count];  
+                               inNum >> zeroCountProb[count] >> num[count] >> probTemp; 
+                               WordPairDiffArr[count].prob = probTemp;
                                count++;
                                m->gobble(inNum);
                                //cout << threadID << '\t' << count << endl;
index 1cf5145132eab009b9d807f69de655fc26d59a40..7c884337495dbb1e0213d1171a54ca20e0eb932c 100644 (file)
@@ -18,7 +18,7 @@
 class Bayesian : public Classify {
        
 public:
-       Bayesian(string, string, string, int, int, int, int);
+       Bayesian(string, string, string, int, int, int, int, bool);
        ~Bayesian();
        
        string getTaxonomy(Sequence*);
@@ -30,12 +30,17 @@ private:
        vector<int> genusTotals;
        vector<int> genusNodes;  //indexes in phyloTree where genus' are located
        
+       vector<diffPair> WordPairDiffArr; 
+       
        int kmerSize, numKmers, confidenceThreshold, iters;
        
        string bootstrapResults(vector<int>, int, int);
        int getMostProbableTaxonomy(vector<int>);
        void readProbFile(ifstream&, ifstream&, string, string);
        bool checkReleaseDate(ifstream&, ifstream&, ifstream&, ifstream&);
+       bool isReversed(vector<int>&);
+       vector<int> createWordIndexArr(Sequence*);
+       int generateWordPairDiffArr();
        
 };
 
index 4977cad1f769690e300013e89c936eefb4228e84..2117daff4f2558e26bcf0008666f87bee0180f4d 100644 (file)
@@ -167,7 +167,7 @@ BinSeqCommand::BinSeqCommand(string option) {
                        }
                        
                        namesfile = validParameter.validFile(parameters, "name", true);
-                       if (namesfile == "not open") { abort = true; }  
+                       if (namesfile == "not open") { namesfile = ""; abort = true; }  
                        else if (namesfile == "not found") { namesfile = ""; }
                        else {  m->setNameFile(namesfile); }
 
@@ -176,6 +176,11 @@ BinSeqCommand::BinSeqCommand(string option) {
                        else if (groupfile == "not found") { groupfile = ""; }
                        else { m->setGroupFile(groupfile); }
                        
+                       if (namesfile == ""){
+                               vector<string> files; files.push_back(fastafile); 
+                               parser.getNameFile(files);
+                       }
+                       
                }
        }
        catch(exception& e) {
index c92d9fdc6526788df19d9e6f487ffb641bbaca44..3bf0d572f88d0ad7ffd76989d71f10807e8cc659 100644 (file)
@@ -61,7 +61,7 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me
                                        names.push_back(temp.getName());
                                        database->addSequence(temp);    
                                }
-//                             database->generateDB();
+                               database->generateDB();
                        }else if ((method == "kmer") && (!needToGenerate)) {    
                                ifstream kmerFileTest(kmerDBName.c_str());
                                database->readKmerDB(kmerFileTest);     
@@ -150,7 +150,7 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me
                                        }
                                }
                                
-//                             database->generateDB();
+                               database->generateDB();
                                MPI_File_close(&inMPI);
                                MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
                #else
@@ -200,7 +200,7 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me
                                }
                                fastaFile.close();
 
-//                             database->generateDB();
+                               database->generateDB();
                                
                        }else if ((method == "kmer") && (!needToGenerate)) {    
                                ifstream kmerFileTest(kmerDBName.c_str());
@@ -223,9 +223,9 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me
                        database->setNumSeqs(names.size());
                        
                        //sanity check
-                       //bool okay = phyloTree->ErrorCheck(names);
+                       bool okay = phyloTree->ErrorCheck(names);
                        
-                       //if (!okay) { m->control_pressed = true; }
+                       if (!okay) { m->control_pressed = true; }
                        
                        m->mothurOut("DONE."); m->mothurOutEndLine();
                        m->mothurOut("It took " + toString(time(NULL) - start) + " seconds generate search database. "); m->mothurOutEndLine();
@@ -238,7 +238,7 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me
        }
 }
 /**************************************************************************************************/
-Classify::Classify() {         m = MothurOut::getInstance();   database = NULL;        }
+Classify::Classify() {         m = MothurOut::getInstance();   database = NULL;        flipped=false; }
 /**************************************************************************************************/
 
 int Classify::readTaxonomy(string file) {
index 2e209f562108121e8a95b3c4470252537813baed..4e0354782e5063e2a52268e88a1412b40db3c28e 100644 (file)
@@ -30,6 +30,7 @@ public:
        virtual ~Classify(){};
        virtual string getTaxonomy(Sequence*) = 0;
        virtual string getSimpleTax()  { return simpleTax;      }
+       virtual bool getFlipped()  { return flipped;    }
        virtual void generateDatabaseAndNames(string, string, string, int, float, float, float, float);
        virtual void setDistName(string s) {} //for knn, so if distance method is selected with knn you can create the smallest distance file in the right place.
        
@@ -45,6 +46,7 @@ protected:
        string taxFile, templateFile, simpleTax;
        vector<string> names;
        int threadID;
+       bool flip, flipped;
        
        int readTaxonomy(string);
        vector<string> parseTax(string);
index e28961b74888ee5bdefb9eb022fd11afe328e07f..c8896375b016ef6c45462351a18e8e520bfe75c2 100644 (file)
@@ -182,7 +182,7 @@ ClassifyOtuCommand::ClassifyOtuCommand(string option)  {
                        else if (refTaxonomy == "not open") { abort = true; }
        
                        namefile = validParameter.validFile(parameters, "name", true);
-                       if (namefile == "not open") { abort = true; }   
+                       if (namefile == "not open") { namefile = ""; abort = true; }    
                        else if (namefile == "not found") { namefile = ""; }
                        else { m->setNameFile(namefile); }
                        
@@ -214,6 +214,11 @@ ClassifyOtuCommand::ClassifyOtuCommand(string option)  {
                        
                        if ((cutoff < 51) || (cutoff > 100)) { m->mothurOut("cutoff must be above 50, and no greater than 100."); m->mothurOutEndLine(); abort = true;  }
                        
+                       if (namefile == ""){
+                               vector<string> files; files.push_back(taxfile);
+                               parser.getNameFile(files);
+                       }
+                       
                }
        }
        catch(exception& e) {
index 9ee7b5e946a7f5819cbac94bab7efe8c0cb71aef..328cd58f1f32bc265de857de4ede5aa557c2cd94 100644 (file)
@@ -27,6 +27,7 @@ vector<string> ClassifySeqsCommand::setParameters(){
                CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pmismatch);
                CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "",false,false); parameters.push_back(pgapopen);
                CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pgapextend);
+               //CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
                CommandParameter pcutoff("cutoff", "Number", "", "0", "", "", "",false,true); parameters.push_back(pcutoff);
                CommandParameter pprobs("probs", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pprobs);
                CommandParameter piters("iters", "Number", "", "100", "", "", "",false,true); parameters.push_back(piters);
@@ -69,6 +70,7 @@ string ClassifySeqsCommand::getHelpString(){
                helpString += "The cutoff parameter allows you to specify a bootstrap confidence threshold for your taxonomy.  The default is 0.\n";
                helpString += "The probs parameter shuts off the bootstrapping results for the bayesian method. The default is true, meaning you want the bootstrapping to be shown.\n";
                helpString += "The iters parameter allows you to specify how many iterations to do when calculating the bootstrap confidence score for your taxonomy with the bayesian method.  The default is 100.\n";
+               //helpString += "The flip parameter allows you shut off mothur's   The default is T.\n";
                helpString += "The classify.seqs command should be in the following format: \n";
                helpString += "classify.seqs(reference=yourTemplateFile, fasta=yourFastaFile, method=yourClassificationMethod, search=yourSearchmethod, ksize=yourKmerSize, taxonomy=yourTaxonomyFile, processors=yourProcessors) \n";
                helpString += "Example classify.seqs(fasta=amazon.fasta, reference=core.filtered, method=knn, search=gotoh, ksize=8, processors=2)\n";
@@ -89,6 +91,7 @@ ClassifySeqsCommand::ClassifySeqsCommand(){
                setParameters();
                vector<string> tempOutNames;
                outputTypes["taxonomy"] = tempOutNames;
+               outputTypes["accnos"] = tempOutNames;
                outputTypes["taxsummary"] = tempOutNames;
                outputTypes["matchdist"] = tempOutNames;
        }
@@ -126,6 +129,7 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
                        outputTypes["taxonomy"] = tempOutNames;
                        outputTypes["taxsummary"] = tempOutNames;
                        outputTypes["matchdist"] = tempOutNames;
+                       outputTypes["accnos"] = tempOutNames;
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
@@ -440,15 +444,24 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
                        temp = validParameter.validFile(parameters, "probs", false);            if (temp == "not found"){       temp = "true";                  }
                        probs = m->isTrue(temp);
                        
+                       //temp = validParameter.validFile(parameters, "flip", false);                   if (temp == "not found"){       temp = "T";                             }
+                       //flip = m->isTrue(temp); 
+                       flip = true;
+                       
                        temp = validParameter.validFile(parameters, "iters", false);            if (temp == "not found") { temp = "100";                        }
                        m->mothurConvert(temp, iters); 
 
-
                        
                        if ((method == "bayesian") && (search != "kmer"))  { 
                                m->mothurOut("The bayesian method requires the kmer search." + search + "will be disregarded." ); m->mothurOutEndLine();
                                search = "kmer";
                        }
+                       
+                       if (namefileNames.size() == 0){
+                               vector<string> files; files.push_back(fastaFileNames[fastaFileNames.size()-1]); 
+                               parser.getNameFile(files);
+                       }
+                       
                }
                
        }
@@ -470,12 +483,12 @@ int ClassifySeqsCommand::execute(){
        try {
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
                
-               if(method == "bayesian"){       classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand());           }
+               if(method == "bayesian"){       classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand(), flip);             }
                else if(method == "knn"){       classify = new Knn(taxonomyFileName, templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, numWanted, rand());                               }
                else {
                        m->mothurOut(search + " is not a valid method option. I will run the command using bayesian.");
                        m->mothurOutEndLine();
-                       classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand());   
+                       classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand(), flip);     
                }
                
                if (m->control_pressed) { delete classify; return 0; }
@@ -494,6 +507,7 @@ int ClassifySeqsCommand::execute(){
                
                        if (outputDir == "") { outputDir += m->hasPath(fastaFileNames[s]); }
                        string newTaxonomyFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + "taxonomy";
+                       string newaccnosFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + "flip.accnos";
                        string tempTaxonomyFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "taxonomy.temp";
                        string taxSummary = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + "tax.summary";
                        
@@ -503,6 +517,7 @@ int ClassifySeqsCommand::execute(){
                        }
                        
                        outputNames.push_back(newTaxonomyFile); outputTypes["taxonomy"].push_back(newTaxonomyFile);
+                       outputNames.push_back(newaccnosFile); outputTypes["accnos"].push_back(newaccnosFile);
                        outputNames.push_back(taxSummary);      outputTypes["taxsummary"].push_back(taxSummary);
                        
                        int start = time(NULL);
@@ -521,6 +536,7 @@ int ClassifySeqsCommand::execute(){
                                MPI_File inMPI;
                                MPI_File outMPINewTax;
                                MPI_File outMPITempTax;
+                               MPI_File outMPIAcc;
                                                        
                                int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
                                int inMode=MPI_MODE_RDONLY; 
@@ -530,6 +546,9 @@ int ClassifySeqsCommand::execute(){
                                
                                char outTempTax[1024];
                                strcpy(outTempTax, tempTaxonomyFile.c_str());
+                       
+                               char outAcc[1024];
+                               strcpy(outAcc, newaccnosFile.c_str());
                                
                                char inFileName[1024];
                                strcpy(inFileName, fastaFileNames[s].c_str());
@@ -537,8 +556,9 @@ int ClassifySeqsCommand::execute(){
                                MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
                                MPI_File_open(MPI_COMM_WORLD, outNewTax, outMode, MPI_INFO_NULL, &outMPINewTax);
                                MPI_File_open(MPI_COMM_WORLD, outTempTax, outMode, MPI_INFO_NULL, &outMPITempTax);
+                               MPI_File_open(MPI_COMM_WORLD, outAcc, outMode, MPI_INFO_NULL, &outMPIAcc);
                                
-                               if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI);  MPI_File_close(&outMPINewTax);   MPI_File_close(&outMPITempTax);  delete classify;  return 0;  }
+                               if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI);  MPI_File_close(&outMPINewTax);  MPI_File_close(&outMPIAcc);   MPI_File_close(&outMPITempTax);  delete classify;  return 0;  }
                                
                                if (pid == 0) { //you are the root process 
                                        
@@ -557,9 +577,9 @@ int ClassifySeqsCommand::execute(){
                                        
                                
                                        //align your part
-                                       driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPINewTax, outMPITempTax, MPIPos);
+                                       driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPINewTax, outMPITempTax, outMPIAcc, MPIPos);
                                        
-                                       if (m->control_pressed) {  outputTypes.clear(); MPI_File_close(&inMPI);  MPI_File_close(&outMPINewTax);   MPI_File_close(&outMPITempTax);  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]);        } delete classify; return 0;  }
+                                       if (m->control_pressed) {  outputTypes.clear(); MPI_File_close(&inMPI);  MPI_File_close(&outMPINewTax); MPI_File_close(&outMPIAcc);   MPI_File_close(&outMPITempTax);  for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]);        } delete classify; return 0;  }
                                        
                                        for (int i = 1; i < processors; i++) {
                                                int done;
@@ -577,9 +597,9 @@ int ClassifySeqsCommand::execute(){
                                        
                                        
                                        //align your part
-                                       driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPINewTax, outMPITempTax, MPIPos);
+                                       driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPINewTax, outMPITempTax, outMPIAcc, MPIPos);
                                        
-                                       if (m->control_pressed) {  outputTypes.clear(); MPI_File_close(&inMPI);  MPI_File_close(&outMPINewTax);   MPI_File_close(&outMPITempTax);  delete classify; return 0;  }
+                                       if (m->control_pressed) {  outputTypes.clear(); MPI_File_close(&inMPI);  MPI_File_close(&outMPINewTax);  MPI_File_close(&outMPIAcc);  MPI_File_close(&outMPITempTax);  delete classify; return 0;  }
 
                                        int done = 0;
                                        MPI_Send(&done, 1, MPI_INT, 0, tag, MPI_COMM_WORLD); 
@@ -589,6 +609,7 @@ int ClassifySeqsCommand::execute(){
                                MPI_File_close(&inMPI);
                                MPI_File_close(&outMPINewTax);
                                MPI_File_close(&outMPITempTax);
+                               MPI_File_close(&outMPIAcc); 
                                MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
                                
 #else
@@ -613,16 +634,19 @@ int ClassifySeqsCommand::execute(){
                        }
 #endif
                        if(processors == 1){
-                               numFastaSeqs = driver(lines[0], newTaxonomyFile, tempTaxonomyFile, fastaFileNames[s]);
+                               numFastaSeqs = driver(lines[0], newTaxonomyFile, tempTaxonomyFile, newaccnosFile, fastaFileNames[s]);
                        }else{
-                               numFastaSeqs = createProcesses(newTaxonomyFile, tempTaxonomyFile, fastaFileNames[s]); 
+                               numFastaSeqs = createProcesses(newTaxonomyFile, tempTaxonomyFile, newaccnosFile, fastaFileNames[s]); 
                        }
 #endif
+                       
+                       if (!m->isBlank(newaccnosFile)) { m->mothurOutEndLine(); m->mothurOut("[WARNING]: mothur suspects some of your sequences may be reversed, please check " + newaccnosFile + " for the list of the sequences."); m->mothurOutEndLine(); }
 
                m->mothurOutEndLine();
                m->mothurOut("It took " + toString(time(NULL) - start) + " secs to classify " + toString(numFastaSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine();
                start = time(NULL);
-
+               
+               
 
                #ifdef USE_MPI  
                        if (pid == 0) {  //this part does not need to be paralellized
@@ -744,6 +768,12 @@ int ClassifySeqsCommand::execute(){
                        if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
                }
                
+               current = "";
+               itTypes = outputTypes.find("accnos");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
+               }
+               
                delete classify;
                
                return 0;
@@ -785,7 +815,7 @@ string ClassifySeqsCommand::addUnclassifieds(string tax, int maxlevel) {
 
 /**************************************************************************************************/
 
-int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile, string filename) {
+int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile, string accnos, string filename) {
        try {
                
                int num = 0;
@@ -802,7 +832,7 @@ int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile,
                                processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
                                process++;
                        }else if (pid == 0){
-                               num = driver(lines[process], taxFileName + toString(getpid()) + ".temp", tempTaxFile + toString(getpid()) + ".temp", filename);
+                               num = driver(lines[process], taxFileName + toString(getpid()) + ".temp", tempTaxFile + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", filename);
 
                                //pass numSeqs to parent
                                ofstream out;
@@ -820,7 +850,7 @@ int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile,
                }
                
                //parent does its part
-               num = driver(lines[0], taxFileName, tempTaxFile, filename);
+               num = driver(lines[0], taxFileName, tempTaxFile, accnos, filename);
                
                //force parent to wait until all the processes are done
                for (int i=0;i<processIDS.size();i++) { 
@@ -851,7 +881,7 @@ int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile,
                        string extension = "";
                        if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
                        
-                       classifyData* tempclass = new classifyData(probs, method, templateFileName, taxonomyFileName, (taxFileName + extension), (tempTaxFile + extension), filename, search, kmerSize, iters, numWanted, m, lines[i]->start, lines[i]->end, match, misMatch, gapOpen, gapExtend, cutoff, i);
+                       classifyData* tempclass = new classifyData((accnos + extension), probs, method, templateFileName, taxonomyFileName, (taxFileName + extension), (tempTaxFile + extension), filename, search, kmerSize, iters, numWanted, m, lines[i]->start, lines[i]->end, match, misMatch, gapOpen, gapExtend, cutoff, i, flipThreshold);
                        pDataArray.push_back(tempclass);
                        
                        //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
@@ -861,7 +891,7 @@ int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile,
                }
                
                //parent does its part
-               num = driver(lines[processors-1], taxFileName + toString(processors-1) + ".temp", tempTaxFile + toString(processors-1) + ".temp", filename);
+               num = driver(lines[processors-1], taxFileName + toString(processors-1) + ".temp", tempTaxFile + toString(processors-1) + ".temp", accnos + toString(processors-1) + ".temp", filename);
                processIDS.push_back((processors-1));
                
                //Wait until all threads have terminated.
@@ -879,8 +909,10 @@ int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile,
                for(int i=0;i<processIDS.size();i++){
                        appendTaxFiles((taxFileName + toString(processIDS[i]) + ".temp"), taxFileName);
                        appendTaxFiles((tempTaxFile + toString(processIDS[i]) + ".temp"), tempTaxFile);
+                       appendTaxFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
                        m->mothurRemove((m->getFullPathName(taxFileName) + toString(processIDS[i]) + ".temp"));
                        m->mothurRemove((m->getFullPathName(tempTaxFile) + toString(processIDS[i]) + ".temp"));
+                       m->mothurRemove((m->getFullPathName(accnos) + toString(processIDS[i]) + ".temp"));
                }
                
                return num;
@@ -917,13 +949,16 @@ void ClassifySeqsCommand::appendTaxFiles(string temp, string filename) {
 
 //**********************************************************************************************************************
 
-int ClassifySeqsCommand::driver(linePair* filePos, string taxFName, string tempTFName, string filename){
+int ClassifySeqsCommand::driver(linePair* filePos, string taxFName, string tempTFName, string accnos, string filename){
        try {
                ofstream outTax;
                m->openOutputFile(taxFName, outTax);
                
                ofstream outTaxSimple;
                m->openOutputFile(tempTFName, outTaxSimple);
+               
+               ofstream outAcc;
+               m->openOutputFile(accnos, outAcc);
        
                ifstream inFASTA;
                m->openInputFile(filename, inFASTA);
@@ -936,7 +971,11 @@ int ClassifySeqsCommand::driver(linePair* filePos, string taxFName, string tempT
                int count = 0;
                
                while (!done) {
-                       if (m->control_pressed) { return 0; }
+                       if (m->control_pressed) { 
+                               inFASTA.close();
+                               outTax.close();
+                               outTaxSimple.close();
+                               outAcc.close(); return 0; }
                
                        Sequence* candidateSeq = new Sequence(inFASTA); m->gobble(inFASTA);
                        
@@ -945,17 +984,20 @@ int ClassifySeqsCommand::driver(linePair* filePos, string taxFName, string tempT
                                taxonomy = classify->getTaxonomy(candidateSeq);
                                
                                if (m->control_pressed) { delete candidateSeq; return 0; }
-
-                               if (taxonomy != "bad seq") {
-                                       //output confidence scores or not
-                                       if (probs) {
-                                               outTax << candidateSeq->getName() << '\t' << taxonomy << endl;
-                                       }else{
-                                               outTax << candidateSeq->getName() << '\t' << classify->getSimpleTax() << endl;
-                                       }
-                                       
-                                       outTaxSimple << candidateSeq->getName() << '\t' << classify->getSimpleTax() << endl;
+                               
+                               if (taxonomy == "unknown;") { m->mothurOut("[WARNING]: " + candidateSeq->getName() + " could not be classified. You can use the remove.lineage command with taxon=unknown; to remove such sequences."); m->mothurOutEndLine(); }
+                               
+                               //output confidence scores or not
+                               if (probs) {
+                                       outTax << candidateSeq->getName() << '\t' << taxonomy << endl;
+                               }else{
+                                       outTax << candidateSeq->getName() << '\t' << classify->getSimpleTax() << endl;
                                }
+                               
+                               if (classify->getFlipped()) { outAcc << candidateSeq->getName() << endl; }
+                               
+                               outTaxSimple << candidateSeq->getName() << '\t' << classify->getSimpleTax() << endl;
+                               
                                count++;
                        }
                        delete candidateSeq;
@@ -977,6 +1019,7 @@ int ClassifySeqsCommand::driver(linePair* filePos, string taxFName, string tempT
                inFASTA.close();
                outTax.close();
                outTaxSimple.close();
+               outAcc.close();
                
                return count;
        }
@@ -987,10 +1030,11 @@ int ClassifySeqsCommand::driver(linePair* filePos, string taxFName, string tempT
 }
 //**********************************************************************************************************************
 #ifdef USE_MPI
-int ClassifySeqsCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& newFile, MPI_File& tempFile, vector<unsigned long long>& MPIPos){
+int ClassifySeqsCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& newFile, MPI_File& tempFile, MPI_File& accFile, vector<unsigned long long>& MPIPos){
        try {
                MPI_Status statusNew; 
                MPI_Status statusTemp; 
+               MPI_Status statusAcc; 
                MPI_Status status; 
                
                int pid;
@@ -1018,29 +1062,40 @@ int ClassifySeqsCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File
                        if (candidateSeq->getName() != "") {
                                taxonomy = classify->getTaxonomy(candidateSeq);
                                
-                               if (taxonomy != "bad seq") {
-                                       //output confidence scores or not
-                                       if (probs) {
-                                               outputString =  candidateSeq->getName() + "\t" + taxonomy + "\n";
-                                       }else{
-                                               outputString =  candidateSeq->getName() + "\t" + classify->getSimpleTax() + "\n";
-                                       }
-                                       
-                                       int length = outputString.length();
-                                       char* buf2 = new char[length];
-                                       memcpy(buf2, outputString.c_str(), length);
+                               if (taxonomy == "unknown;") { m->mothurOut("[WARNING]: " + candidateSeq->getName() + " could not be classified. You can use the remove.lineage command with taxon=unknown; to remove such sequences."); m->mothurOutEndLine(); }
                                
-                                       MPI_File_write_shared(newFile, buf2, length, MPI_CHAR, &statusNew);
-                                       delete buf2;
-
+                               //output confidence scores or not
+                               if (probs) {
+                                       outputString =  candidateSeq->getName() + "\t" + taxonomy + "\n";
+                               }else{
                                        outputString =  candidateSeq->getName() + "\t" + classify->getSimpleTax() + "\n";
-                                       length = outputString.length();
-                                       char* buf = new char[length];
-                                       memcpy(buf, outputString.c_str(), length);
+                               }
+                               
+                               int length = outputString.length();
+                               char* buf2 = new char[length];
+                               memcpy(buf2, outputString.c_str(), length);
+                               
+                               MPI_File_write_shared(newFile, buf2, length, MPI_CHAR, &statusNew);
+                               delete buf2;
+                               
+                               outputString =  candidateSeq->getName() + "\t" + classify->getSimpleTax() + "\n";
+                               length = outputString.length();
+                               char* buf = new char[length];
+                               memcpy(buf, outputString.c_str(), length);
                                
-                                       MPI_File_write_shared(tempFile, buf, length, MPI_CHAR, &statusTemp);
-                                       delete buf;
+                               MPI_File_write_shared(tempFile, buf, length, MPI_CHAR, &statusTemp);
+                               delete buf;
+                               
+                               if (classify->getFlipped()) { 
+                                       outputString =  candidateSeq->getName() + "\n";
+                                       length = outputString.length();
+                                       char* buf3 = new char[length];
+                                       memcpy(buf3, outputString.c_str(), length);
+                                       
+                                       MPI_File_write_shared(accFile, buf3, length, MPI_CHAR, &statusAcc);
+                                       delete buf3;
                                }
+                               
                        }                               
                        delete candidateSeq;
                        
index 0e21a203caaa0c4a8166d7ecb08a685cd36f3169..0bf4a9154412080d28fa065ab33789279832fbeb 100644 (file)
@@ -72,16 +72,16 @@ private:
        string fastaFileName, templateFileName, distanceFileName, namefile, search, method, taxonomyFileName, outputDir, groupfile;
        int processors, kmerSize, numWanted, cutoff, iters;
        float match, misMatch, gapOpen, gapExtend;
-       bool abort, probs, save;
+       bool abort, probs, save, flip;
        
-       int driver(linePair*, string, string, string);
+       int driver(linePair*, string, string, string, string);
        void appendTaxFiles(string, string);
-       int createProcesses(string, string, string); 
+       int createProcesses(string, string, string, string); 
        string addUnclassifieds(string, int);
        
        int MPIReadNamesFile(string);
        #ifdef USE_MPI
-       int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long long>&);
+       int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long long>&);
        #endif
 };
 
@@ -93,16 +93,17 @@ struct classifyData {
        string taxFName; 
        string tempTFName; 
        string filename;
-       string search, taxonomyFileName, templateFileName, method;
+       string search, taxonomyFileName, templateFileName, method, accnos;
        unsigned long long start;
        unsigned long long end;
        MothurOut* m;
        float match, misMatch, gapOpen, gapExtend;
        int count, kmerSize, threadID, cutoff, iters, numWanted;
-       bool probs;
+       bool probs, flip;
         
        classifyData(){}
-       classifyData(bool p, string me, string te, string tx, string a, string r, string f, string se, int ks, int i, int numW, MothurOut* mout, unsigned long long st, unsigned long long en, float ma, float misMa, float gapO, float gapE, int cut, int tid) {
+       classifyData(string acc, bool p, string me, string te, string tx, string a, string r, string f, string se, int ks, int i, int numW, MothurOut* mout, unsigned long long st, unsigned long long en, float ma, float misMa, float gapO, float gapE, int cut, int tid, bool fli) {
+               accnos = acc;
                taxonomyFileName = tx;
                templateFileName = te;
                taxFName = a;
@@ -124,6 +125,7 @@ struct classifyData {
                threadID = tid;
                probs = p;
                count = 0;
+               flip = fli;
        }
 };
 
@@ -141,6 +143,9 @@ static DWORD WINAPI MyClassThreadFunction(LPVOID lpParam){
                ofstream outTaxSimple;
                pDataArray->m->openOutputFile(pDataArray->tempTFName, outTaxSimple);
                
+               ofstream outAcc;
+               pDataArray->m->openOutputFile(pDataArray->accnos, outAcc);
+               
                ifstream inFASTA;
                pDataArray->m->openInputFile(pDataArray->filename, inFASTA);
                
@@ -157,12 +162,12 @@ static DWORD WINAPI MyClassThreadFunction(LPVOID lpParam){
                
                //make classify
                Classify* myclassify;
-               if(pDataArray->method == "bayesian"){   myclassify = new Bayesian(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->cutoff, pDataArray->iters, pDataArray->threadID);           }
-               else if(pDataArray->method == "knn"){   myclassify = new Knn(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, pDataArray->numWanted, pDataArray->threadID);                           }
+               if(pDataArray->method == "bayesian"){   myclassify = new Bayesian(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->cutoff, pDataArray->iters, pDataArray->threadID, pDataArray->flip);         }
+               else if(pDataArray->method == "knn"){   myclassify = new Knn(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, pDataArray->numWanted, pDataArray->threadID, pDataArray->flipThreshold);                                }
                else {
                        pDataArray->m->mothurOut(pDataArray->search + " is not a valid method option. I will run the command using bayesian.");
                        pDataArray->m->mothurOutEndLine();
-                       myclassify = new Bayesian(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->cutoff, pDataArray->iters, pDataArray->threadID);   
+                       myclassify = new Bayesian(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->cutoff, pDataArray->iters, pDataArray->threadID, pDataArray->flip); 
                }
                
                if (pDataArray->m->control_pressed) { delete myclassify; return 0; }
@@ -180,16 +185,19 @@ static DWORD WINAPI MyClassThreadFunction(LPVOID lpParam){
                                
                                if (pDataArray->m->control_pressed) { delete candidateSeq; return 0; }
                                
-                               if (taxonomy != "bad seq") {
-                                       //output confidence scores or not
-                                       if (pDataArray->probs) {
-                                               outTax << candidateSeq->getName() << '\t' << taxonomy << endl;
-                                       }else{
-                                               outTax << candidateSeq->getName() << '\t' << myclassify->getSimpleTax() << endl;
-                                       }
-                                       
-                                       outTaxSimple << candidateSeq->getName() << '\t' << myclassify->getSimpleTax() << endl;
+                               if (taxonomy == "unknown;") { pDataArray->m->mothurOut("[WARNING]: " + candidateSeq->getName() + " could not be classified. You can use the remove.lineage command with taxon=unknown; to remove such sequences."); pDataArray->m->mothurOutEndLine(); }
+
+                               //output confidence scores or not
+                               if (pDataArray->probs) {
+                                       outTax << candidateSeq->getName() << '\t' << taxonomy << endl;
+                               }else{
+                                       outTax << candidateSeq->getName() << '\t' << myclassify->getSimpleTax() << endl;
                                }
+                                       
+                               outTaxSimple << candidateSeq->getName() << '\t' << myclassify->getSimpleTax() << endl;
+                                       
+                               if (myclassify->getFlipped()) { outAcc << candidateSeq->getName() << endl; }
+                               
                                count++;
                        }
                        delete candidateSeq;
index ad1bd81b4a9d4554751cfbbc5eb836e107e7b53d..772113baf83e9fbbd057794103f46ffa21c21fda 100644 (file)
@@ -147,7 +147,7 @@ ClusterFragmentsCommand::ClusterFragmentsCommand(string option) {
                        // ...at some point should added some additional type checking...
                        namefile = validParameter.validFile(parameters, "name", true);
                        if (namefile == "not found") { namefile =  "";  }
-                       else if (namefile == "not open") { abort = true; }      
+                       else if (namefile == "not open") { namefile = ""; abort = true; }       
                        else {  readNameFile(); m->setNameFile(namefile); }
                        
                        string temp;
@@ -157,6 +157,11 @@ ClusterFragmentsCommand::ClusterFragmentsCommand(string option) {
                        temp = validParameter.validFile(parameters, "percent", false);          if (temp == "not found"){       temp = "0";                             }
                        m->mothurConvert(temp, percent);
                        
+                       if (namefile == "") {
+                               vector<string> files; files.push_back(fastafile);
+                               parser.getNameFile(files);
+                       }
+                       
                }
                                
        }
index 787d84aa3ef40042f3e2358283648ba63e8eb098..34caf654124886f9cd23638a149b2b3487ca53e2 100644 (file)
@@ -201,7 +201,7 @@ ClusterSplitCommand::ClusterSplitCommand(string option)  {
                        else { distfile = fastafile;  splitmethod = "fasta";  m->setFastaFile(fastafile); }
                        
                        taxFile = validParameter.validFile(parameters, "taxonomy", true);
-                       if (taxFile == "not open") { abort = true; }    
+                       if (taxFile == "not open") { taxFile = ""; abort = true; }      
                        else if (taxFile == "not found") { taxFile = ""; }
                        else {  m->setTaxonomyFile(taxFile); }
                        
index 4f8c53bc9b8cd70fab82a13cd8507a8e5719a07a..223e5dbb86600907fafaafdf5af48ee665adfc5b 100644 (file)
@@ -142,7 +142,7 @@ ConsensusSeqsCommand::ConsensusSeqsCommand(string option)  {
                        }else { m->setFastaFile(fastafile); }   
                        
                        namefile = validParameter.validFile(parameters, "name", true);
-                       if (namefile == "not open") { abort = true; }
+                       if (namefile == "not open") { namefile = ""; abort = true; }
                        else if (namefile == "not found") { namefile = ""; }
                        else { m->setNameFile(namefile); }
                        
@@ -163,7 +163,11 @@ ConsensusSeqsCommand::ConsensusSeqsCommand(string option)  {
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(fastafile);      }
-
+                       
+                       if (namefile == ""){
+                               vector<string> files; files.push_back(fastafile); 
+                               parser.getNameFile(files);
+                       }
                }
        }
        catch(exception& e) {
index 580c3f2d4587ba80b8286d5b46968d6faa3818c3..b2817a779dc1b95070ef77fbe6e8bf5b11159a3c 100644 (file)
@@ -58,6 +58,7 @@ public:
        virtual void readKmerDB(ifstream&){};
        virtual void setNumSeqs(int i) {        numSeqs = i;    }
        virtual vector<int> getSequencesWithKmer(int){ vector<int> filler; return filler; };  
+       virtual int getReversed(int) { return 0; } 
        virtual int getMaxKmer(){       return 1;       }
        
 protected:
index 3fa622f01ff2fc334f5cbb9d79851a9ca7f8b30c..3541e00d064233c4da10619789291222290b33f1 100644 (file)
@@ -125,9 +125,15 @@ DeconvoluteCommand::DeconvoluteCommand(string option)  {
                        }
                        
                        oldNameMapFName = validParameter.validFile(parameters, "name", true);
-                       if (oldNameMapFName == "not open") { abort = true; }
+                       if (oldNameMapFName == "not open") { oldNameMapFName = ""; abort = true; }
                        else if (oldNameMapFName == "not found"){       oldNameMapFName = "";   }
                        else { m->setNameFile(oldNameMapFName); }
+                       
+                       if (oldNameMapFName == "") {
+                               vector<string> files; files.push_back(inFastaName);
+                               parser.getNameFile(files);
+                       }
+                       
                }
 
        }
index 0c42b7dad8281b1d9edd49550c3df95ae773c7e8..432894b06fe9d5ffa6982fdaaa49474a44c403e0 100644 (file)
 //**********************************************************************************************************************
 vector<string> GetGroupsCommand::setParameters(){      
        try {
-               CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
-               CommandParameter pshared("shared", "InputTypes", "", "", "none", "FNGLT-sharedGroup", "none",false,false); parameters.push_back(pshared);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname);
-               CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT-sharedGroup", "none",false,false); parameters.push_back(pgroup);
-               CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
-               CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(ptaxonomy);
+               CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(pfasta);
+               CommandParameter pshared("shared", "InputTypes", "", "", "none", "sharedGroup", "none",false,false); parameters.push_back(pshared);
+               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+               CommandParameter pgroup("group", "InputTypes", "", "", "none", "sharedGroup", "FNGLT",false,false); parameters.push_back(pgroup);
+               CommandParameter plist("list", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(plist);
+               CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(ptaxonomy);
                CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(paccnos);
                CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
@@ -180,12 +180,12 @@ GetGroupsCommand::GetGroupsCommand(string option)  {
                        else { m->setAccnosFile(accnosfile); }
                        
                        fastafile = validParameter.validFile(parameters, "fasta", true);
-                       if (fastafile == "not open") { abort = true; }
+                       if (fastafile == "not open") { fastafile = ""; abort = true; }
                        else if (fastafile == "not found") {  fastafile = "";  }
                        else { m->setFastaFile(fastafile); }
                        
                        namefile = validParameter.validFile(parameters, "name", true);
-                       if (namefile == "not open") { abort = true; }
+                       if (namefile == "not open") { namefile = ""; abort = true; }
                        else if (namefile == "not found") {  namefile = "";  }  
                        else { m->setNameFile(namefile); }
                        
@@ -200,7 +200,7 @@ GetGroupsCommand::GetGroupsCommand(string option)  {
                        else { m->setListFile(listfile); }
                        
                        taxfile = validParameter.validFile(parameters, "taxonomy", true);
-                       if (taxfile == "not open") { abort = true; }
+                       if (taxfile == "not open") { taxfile = ""; abort = true; }
                        else if (taxfile == "not found") {  taxfile = "";  }
                        else { m->setTaxonomyFile(taxfile); }
                        
@@ -253,6 +253,10 @@ GetGroupsCommand::GetGroupsCommand(string option)  {
                        if ((fastafile == "") && (namefile == "") && (groupfile == "")  && (sharedfile == "") && (listfile == "") && (taxfile == ""))  { m->mothurOut("You must provide at least one of the following: fasta, name, taxonomy, group, shared or list."); m->mothurOutEndLine(); abort = true; }
                        if ((groupfile == "") && ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != "")))  { m->mothurOut("If using a fasta, name, taxonomy, group or list, then you must provide a group file."); m->mothurOutEndLine(); abort = true; }
 
+                       if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
+                               vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
+                               parser.getNameFile(files);
+                       }
                }
                
        }
index b4e41e5fb60f48823528907df5804ff05c01c419..c2be580bb73313c91322e903c87027dc5da405d9 100644 (file)
@@ -168,12 +168,12 @@ GetLineageCommand::GetLineageCommand(string option)  {
                        
                        //check for required parameters                 
                        fastafile = validParameter.validFile(parameters, "fasta", true);
-                       if (fastafile == "not open") { abort = true; }
+                       if (fastafile == "not open") { fastafile = ""; abort = true; }
                        else if (fastafile == "not found") {  fastafile = "";  }
                        else { m->setFastaFile(fastafile); }
                        
                        namefile = validParameter.validFile(parameters, "name", true);
-                       if (namefile == "not open") { abort = true; }
+                       if (namefile == "not open") { namefile = ""; abort = true; }
                        else if (namefile == "not found") {  namefile = "";  }  
                        else { m->setNameFile(namefile); }
                        
@@ -192,7 +192,7 @@ GetLineageCommand::GetLineageCommand(string option)  {
                        else { m->setListFile(listfile); }
                        
                        taxfile = validParameter.validFile(parameters, "taxonomy", true);
-                       if (taxfile == "not open") { abort = true; }
+                       if (taxfile == "not open") { taxfile = ""; abort = true; }
                        else if (taxfile == "not found") {                              
                                taxfile = m->getTaxonomyFile(); 
                                if (taxfile != "") { m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
@@ -217,6 +217,11 @@ GetLineageCommand::GetLineageCommand(string option)  {
                        m->splitAtChar(taxons, listOfTaxons, '-');
                        
                        if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, alignreport, taxonomy or listfile."); m->mothurOutEndLine(); abort = true; }
+               
+                       if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
+                               vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
+                               parser.getNameFile(files);
+                       }
                }
 
        }
index ffe49b48810d7916e5324efd9480c87e8322ec92..9bf188db3d25fa4ac16b82973a2802c3b1e6ad75 100644 (file)
@@ -206,12 +206,12 @@ GetSeqsCommand::GetSeqsCommand(string option)  {
                        if (accnosfile2 == "not found") { accnosfile2 = ""; }
                        
                        fastafile = validParameter.validFile(parameters, "fasta", true);
-                       if (fastafile == "not open") { abort = true; }
+                       if (fastafile == "not open") { fastafile = ""; abort = true; }
                        else if (fastafile == "not found") {  fastafile = "";  }
                        else { m->setFastaFile(fastafile); }
                        
                        namefile = validParameter.validFile(parameters, "name", true);
-                       if (namefile == "not open") { abort = true; }
+                       if (namefile == "not open") { namefile = ""; abort = true; }
                        else if (namefile == "not found") {  namefile = "";  }  
                        else { m->setNameFile(namefile); }
                        
@@ -230,7 +230,7 @@ GetSeqsCommand::GetSeqsCommand(string option)  {
                        else { m->setListFile(listfile); }
                        
                        taxfile = validParameter.validFile(parameters, "taxonomy", true);
-                       if (taxfile == "not open") { abort = true; }
+                       if (taxfile == "not open") { taxfile = ""; abort = true; }
                        else if (taxfile == "not found") {  taxfile = "";  }
                        else { m->setTaxonomyFile(taxfile); }
                        
@@ -249,6 +249,11 @@ GetSeqsCommand::GetSeqsCommand(string option)  {
                        dups = m->isTrue(temp);
                        
                        if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == "") && (accnosfile2 == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, alignreport, taxonomy, quality or listfile."); m->mothurOutEndLine(); abort = true; }
+               
+                       if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
+                               vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
+                               parser.getNameFile(files);
+                       }
                }
 
        }
index 99891a46c5cf998c565bffb8012350ebedacc8fa..29c174134eee5689e5210775dbde2f220010d551 100644 (file)
@@ -2,7 +2,7 @@
 #define GROUPMAP_H
 /*
  *  groupmap.h
- *  Dotur
+ *  Mothur
  *
  *  Created by Sarah Westcott on 12/1/08.
  *  Copyright 2008 Schloss Lab UMASS Amherst. All rights reserved.
index 6ee84d3da3e8917c8c02d9c2d37deb68e08397b5..50574f4a5ee0709daf2ebb67b8f0e49971e20ef0 100644 (file)
--- a/kmer.cpp
+++ b/kmer.cpp
@@ -131,6 +131,27 @@ string Kmer::getKmerBases(int kmerNumber){
        }
        return kmer;
 }
+/**************************************************************************************************/
+
+int Kmer::getReverseKmerNumber(int kmerNumber){
+               
+       string kmerString = getKmerBases(kmerNumber);
+       
+       //get Reverse
+       string reverse = "";
+       for(int i=kmerString.length()-1;i>=0;i--){
+               if(kmerString[i] == 'A')                {       reverse += 'T'; }
+               else if(kmerString[i] == 'T'){  reverse += 'A'; }
+               else if(kmerString[i] == 'G'){  reverse += 'C'; }
+               else if(kmerString[i] == 'C'){  reverse += 'G'; }
+               else                                            {       reverse += 'N'; }
+       }
+       
+       int reverseNumber = getKmerNumber(reverse, 0);
+       
+       return reverseNumber;
+       
+}
 
 /**************************************************************************************************/
 
index d5b67d66d173cb24c179569d44886f2591ea2530..9a3c5a0091b868b7f61ae8043546f39b27e212cb 100644 (file)
--- a/kmer.hpp
+++ b/kmer.hpp
@@ -21,6 +21,7 @@ public:
        string getKmerString(string);
        int getKmerNumber(string, int);
        string getKmerBases(int);
+       int getReverseKmerNumber(int);
        vector< map<int, int> > getKmerCounts(string sequence);  //for use in chimeraCheck
        
 private:
index 2703e16d481ceccd6019ff642b97432ba5cf819f..9a5d23500be955b459413beee989601f6c0ec78f 100644 (file)
@@ -57,6 +57,8 @@ KmerDB::~KmerDB(){}
 
 vector<int> KmerDB::findClosestSequences(Sequence* candidateSeq, int num){
        try {
+               if (num > numSeqs) { m->mothurOut("[WARNING]: you requested " + toString(num) + " closest sequences, but the template only contains " + toString(numSeqs) + ", adjusting."); m->mothurOutEndLine(); num = numSeqs; }
+               
                vector<int> topMatches;
                Kmer kmer(kmerSize);
                searchScore = 0;
@@ -215,6 +217,20 @@ int KmerDB::getCount(int kmer) {
        }       
 }
 /**************************************************************************************************/
+int KmerDB::getReversed(int kmerNumber) {
+       try {
+               Kmer kmer(kmerSize);
+               
+               if (kmerNumber < 0) { return 0; }  //if user gives negative number
+               else if (kmerNumber > maxKmer) {        return 0;       }  //or a kmer that is bigger than maxkmer
+               else {  return kmer.getReverseKmerNumber(kmerNumber);   }  // kmer is in vector range
+       }
+       catch(exception& e) {
+               m->errorOut(e, "KmerDB", "getReversed");
+               exit(1);
+       }       
+}
+/**************************************************************************************************/
 vector<int> KmerDB::getSequencesWithKmer(int kmer) {
        try {
                
index 62d4836a066f9bc10cf1a041e21cb07e1e0a1f84..4ae00b91311677488c35961186dfbf072ba005f3 100644 (file)
@@ -36,6 +36,7 @@ public:
        void readKmerDB(ifstream&);
        int getCount(int);  //returns number of sequences with that kmer number
        vector<int> getSequencesWithKmer(int);  //returns vector of sequences that contain kmer passed in
+       int getReversed(int);  //returns reverse compliment kmerNumber 
        int getMaxKmer() { return maxKmer; }
        
 private:
diff --git a/knn.cpp b/knn.cpp
index 6053b6e47a1f16c1398c2c183cd40eef0aefeda0..837fa6d18d4f1aed6cf34e5861942b4230c30dd1 100644 (file)
--- a/knn.cpp
+++ b/knn.cpp
@@ -72,11 +72,11 @@ string Knn::getTaxonomy(Sequence* seq) {
                }
                
                if (closestNames.size() == 0) {
-                       m->mothurOut("Error: All the matches for sequence " + seq->getName() + " have been eliminated. " + seq->getName() + " will be disregarded."); m->mothurOutEndLine();
-                       tax = "bad seq";
+                       m->mothurOut("Error: All the matches for sequence " + seq->getName() + " have been eliminated. "); m->mothurOutEndLine();
+                       tax = "unknown;";
                }else{
                        tax = findCommonTaxonomy(closestNames);
-                       if (tax == "") { m->mothurOut("There are no common levels for sequence " + seq->getName() + ". " + seq->getName() + " will be disregarded."); m->mothurOutEndLine(); tax = "bad seq"; }
+                       if (tax == "") { m->mothurOut("There are no common levels for sequence " + seq->getName() + ". "); m->mothurOutEndLine(); tax = "unknown;"; }
                }
                
                simpleTax = tax;
@@ -90,7 +90,7 @@ string Knn::getTaxonomy(Sequence* seq) {
 /**************************************************************************************************/
 string Knn::findCommonTaxonomy(vector<string> closest)  {
        try {
-               vector< vector<string> > taxons;  //taxon[0] = vector of taxonomy info for closest[0].
+               /*vector< vector<string> > taxons;  //taxon[0] = vector of taxonomy info for closest[0].
                                                                                //so if closest[0] taxonomy is Bacteria;Alphaproteobacteria;Rhizobiales;Azorhizobium_et_rel.;Methylobacterium_et_rel.;Bosea;
                                                                                //taxon[0][0] = Bacteria, taxon[0][1] = Alphaproteobacteria....
                                                                                
@@ -101,6 +101,7 @@ string Knn::findCommonTaxonomy(vector<string> closest)  {
                        if (m->control_pressed) { return "control"; }
                
                        string tax = taxonomy[closest[i]];  //we know its there since we checked in getTaxonomy
+                       cout << tax << endl;
                
                        taxons[i] = parseTax(tax);
                
@@ -128,9 +129,54 @@ string Knn::findCommonTaxonomy(vector<string> closest)  {
                                }
                                break;
                        }
+               }*/
+               
+               string conTax;
+               
+               //create a tree containing sequences from this bin
+               PhyloTree* p = new PhyloTree();
+               
+               for (int i = 0; i < closest.size(); i++) {
+                       p->addSeqToTree(closest[i], taxonomy[closest[i]]);
                }
-       
-               return common;
+               
+               //build tree
+               p->assignHeirarchyIDs(0);
+               
+               TaxNode currentNode = p->get(0);
+               
+               //at each level
+               while (currentNode.children.size() != 0) { //you still have more to explore
+                       
+                       TaxNode bestChild;
+                       int bestChildSize = 0;
+                       
+                       //go through children
+                       for (map<string, int>::iterator itChild = currentNode.children.begin(); itChild != currentNode.children.end(); itChild++) {
+                               
+                               TaxNode temp = p->get(itChild->second);
+                               
+                               //select child with largest accessions - most seqs assigned to it
+                               if (temp.accessions.size() > bestChildSize) {
+                                       bestChild = p->get(itChild->second);
+                                       bestChildSize = temp.accessions.size();
+                               }
+                               
+                       }
+                       
+                       if (bestChildSize == closest.size()) { //if yes, add it
+                               conTax += bestChild.name + ";";
+                       }else{ //if no, quit
+                               break;
+                       }
+                       
+                       //move down a level
+                       currentNode = bestChild;
+               }
+               
+               delete p;       
+               
+               return conTax;
        }
        catch(exception& e) {
                m->errorOut(e, "Knn", "findCommonTaxonomy");
index e2e66c152650226dda8bde8f43d93a5e660c2895..476070eb2fbb654d054238c45ee335b2ce8fe0eb 100644 (file)
--- a/makefile
+++ b/makefile
@@ -32,6 +32,7 @@ ifeq  ($(strip $(64BIT_VERSION)),yes)
        #if you using cygwin to build Windows the following line
        #CXX = x86_64-w64-mingw32-g++
        #CC = x86_64-w64-mingw32-g++
+       #FORTAN_COMPILER = x86_64-w64-mingw32-gfortran
        #TARGET_ARCH += -m64 -static
 
        #if you are a linux user use the following line
index 595b54f664d61e1fcd75e537d48b2e055fde2237..f08b4b8561d9177fbd6f0a5a8f3337b95312e362 100644 (file)
@@ -17,7 +17,7 @@ EstOutput MemChi2::getValues(vector<SharedRAbundVector*> shared) {
                int nonZeroA = 0;
                int nonZeroB = 0;
                int totalOtus = shared[0]->getNumBins();
-               int totalGroups = shared.size();
+               //int totalGroups = shared.size();
                
                //for each otu
                for (int i = 0; i < shared[0]->getNumBins(); i++) {
index af2af96b4dc6ae122fd0b9383d9486b3c776234e..e30558cbe69d3069135e0432f2eda7f9623095b5 100644 (file)
@@ -13,7 +13,8 @@
 //**********************************************************************************************************************
 vector<string> MergeGroupsCommand::setParameters(){    
        try {
-               CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
+               CommandParameter pshared("shared", "InputTypes", "", "", "none", "sharedGroup", "none",false,false); parameters.push_back(pshared);
+               CommandParameter pgroup("group", "InputTypes", "", "", "none", "sharedGroup", "none",false,false); parameters.push_back(pgroup);
                CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pdesign);
                CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
                CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
@@ -33,12 +34,12 @@ vector<string> MergeGroupsCommand::setParameters(){
 string MergeGroupsCommand::getHelpString(){    
        try {
                string helpString = "";
-               helpString += "The merge.groups command reads a shared file and a design file and merges the groups in the shared file that are in the same grouping in the design file.\n";
+               helpString += "The merge.groups command reads a shared or group file and a design file and merges the groups that are in the same grouping in the design file.\n";
                helpString += "The merge.groups command outputs a .shared file. \n";
-               helpString += "The merge.groups command parameters are shared, groups, label and design.  The design and shared parameter are required.\n";
+               helpString += "The merge.groups command parameters are shared, group, groups, label and design.  The design parameter is required.\n";
                helpString += "The design parameter allows you to assign your groups to sets. It is required. \n";
                helpString += "The design file looks like the group file.  It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n";
-               helpString += "The groups parameter allows you to specify which of the groups in your shared you would like included. The group names are separated by dashes.\n";
+               helpString += "The groups parameter allows you to specify which of the groups in your shared or group file you would like included. The group names are separated by dashes.\n";
                helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
                helpString += "The merge.groups command should be in the following format: merge.groups(design=yourDesignFile, shared=yourSharedFile).\n";
                helpString += "Example merge.groups(design=temp.design, groups=A-B-C, shared=temp.shared).\n";
@@ -58,6 +59,7 @@ MergeGroupsCommand::MergeGroupsCommand(){
                setParameters();
                vector<string> tempOutNames;
                outputTypes["shared"] = tempOutNames;
+               outputTypes["group"] = tempOutNames;
        }
        catch(exception& e) {
                m->errorOut(e, "MergeGroupsCommand", "MetaStatsCommand");
@@ -92,6 +94,7 @@ MergeGroupsCommand::MergeGroupsCommand(string option) {
                        //initialize outputTypes
                        vector<string> tempOutNames;
                        outputTypes["shared"] = tempOutNames;
+                       outputTypes["group"] = tempOutNames;
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
@@ -116,6 +119,15 @@ MergeGroupsCommand::MergeGroupsCommand(string option) {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["shared"] = inputDir + it->second;           }
                                }
+                               
+                               it = parameters.find("group");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["group"] = inputDir + it->second;            }
+                               }
+                               
                        }
                        
                        //check for required parameters
@@ -128,15 +140,15 @@ MergeGroupsCommand::MergeGroupsCommand(string option) {
                                else {  m->mothurOut("You have no current designfile and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
                        }else { m->setDesignFile(designfile); } 
                        
-                       //make sure the user has already run the read.otu command
                        sharedfile = validParameter.validFile(parameters, "shared", true);
-                       if (sharedfile == "not open") { abort = true; }
-                       else if (sharedfile == "not found") {                           
-                               //if there is a current shared file, use it
-                               sharedfile = m->getSharedFile(); 
-                               if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
-                               else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
-                       }else { m->setSharedFile(sharedfile); } 
+                       if (sharedfile == "not open") { abort = true; sharedfile = ""; }
+                       else if (sharedfile == "not found") {  sharedfile = ""; }
+                       else { m->setSharedFile(sharedfile); }  
+                       
+                       groupfile = validParameter.validFile(parameters, "group", true);
+                       if (groupfile == "not open") { abort = true; groupfile = ""; }
+                       else if (groupfile == "not found") {  groupfile = ""; }
+                       else { m->setGroupFile(groupfile); }    
                        
                        //check for optional parameter and set defaults
                        // ...at some point should added some additional type checking...
@@ -151,6 +163,19 @@ MergeGroupsCommand::MergeGroupsCommand(string option) {
                        if (groups == "not found") { groups = "all";  }
                        m->splitAtDash(groups, Groups);
                        m->setGroups(Groups);
+                       
+                       if ((sharedfile == "") && (groupfile == "")) { 
+                               //give priority to group, then shared
+                               groupfile = m->getGroupFile(); 
+                               if (groupfile != "") {  m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
+                               else { 
+                                       sharedfile = m->getSharedFile(); 
+                                       if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
+                                       else { 
+                                               m->mothurOut("You have no current groupfile or sharedfile and one is required."); m->mothurOutEndLine(); abort = true;
+                                       }
+                               }
+                       }
                }
                
        }
@@ -165,17 +190,106 @@ int MergeGroupsCommand::execute(){
        try {
                
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
+       
+               designMap = new GroupMap(designfile);
+               designMap->readDesignMap();
+               
+               if (groupfile != "") { processGroupFile(designMap); }
+               if (sharedfile != "") { processSharedFile(designMap); }
+
+               //reset groups parameter
+               m->clearGroups();  
+               delete designMap;
+               
+               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0;}
+               
+               
+               //set shared file as new current sharedfile
+               string current = "";
+               itTypes = outputTypes.find("shared");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
+               }
+               
+               itTypes = outputTypes.find("group");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
+               }
+               
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "MergeGroupsCommand", "execute");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+int MergeGroupsCommand::process(vector<SharedRAbundVector*>& thisLookUp, ofstream& out){
+       try {
+               
+               map<string, SharedRAbundVector> merged;
+               map<string, SharedRAbundVector>::iterator it;
+               
+               for (int i = 0; i < thisLookUp.size(); i++) {
+                       
+                       if (m->control_pressed) { return 0; }
+                       
+                       //what grouping does this group belong to
+                       string grouping = designMap->getGroup(thisLookUp[i]->getGroup());
+                       if (grouping == "not found") { m->mothurOut("[ERROR]: " + thisLookUp[i]->getGroup() + " is not in your design file. Ignoring!"); m->mothurOutEndLine(); grouping = "NOTFOUND"; }
+                       
+                       else {
+                               //do we already have a member of this grouping?
+                               it = merged.find(grouping);
+                               
+                               if (it == merged.end()) { //nope, so create it
+                                       merged[grouping] = *thisLookUp[i];
+                                       merged[grouping].setGroup(grouping);
+                               }else { //yes, merge it
+                                       
+                                       for (int j = 0; j < thisLookUp[i]->getNumBins(); j++) {
+                                               int abund = (it->second).getAbundance(j);
+                                               abund += thisLookUp[i]->getAbundance(j);
+                                               
+                                               (it->second).set(j, abund, grouping);
+                                       }
+                               }
+                       }
+               }
+               
+               //print new file
+               for (it = merged.begin(); it != merged.end(); it++) {
+                       out << (it->second).getLabel() << '\t' << it->first << '\t';
+                       (it->second).print(out);
+               }
+               
+               return 0;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "MergeGroupsCommand", "process");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+int MergeGroupsCommand::processSharedFile(GroupMap*& designMap){
+       try {
                
-               if (outputDir == "") {  outputDir += m->hasPath(sharedfile);  }
-               string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "merge" +  m->getExtension(sharedfile);
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(sharedfile);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + "merge" +  m->getExtension(sharedfile);
                outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName);
                
                ofstream out;
                m->openOutputFile(outputFileName, out);
                
-               designMap = new GroupMap(designfile);
-               designMap->readDesignMap();
-               
                InputData input(sharedfile, "sharedfile");
                lookup = input.getSharedRAbundVectors();
                string lastLabel = lookup[0]->getLabel();
@@ -256,78 +370,68 @@ int MergeGroupsCommand::execute(){
                }
                
                out.close();
-               //reset groups parameter
-               m->clearGroups();  
-               delete designMap;
-               
-               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0;}
-               
-               
-               //set shared file as new current sharedfile
-               string current = "";
-               itTypes = outputTypes.find("shared");
-               if (itTypes != outputTypes.end()) {
-                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
-               }
-               
-               m->mothurOutEndLine();
-               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
-               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
-               m->mothurOutEndLine();
                
+                               
                return 0;
+               
        }
        catch(exception& e) {
-               m->errorOut(e, "MergeGroupsCommand", "execute");
+               m->errorOut(e, "MergeGroupsCommand", "processSharedFile");
                exit(1);
        }
 }
 //**********************************************************************************************************************
 
-int MergeGroupsCommand::process(vector<SharedRAbundVector*>& thisLookUp, ofstream& out){
+int MergeGroupsCommand::processGroupFile(GroupMap*& designMap){
        try {
                
-               map<string, SharedRAbundVector> merged;
-               map<string, SharedRAbundVector>::iterator it;
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "merge" +  m->getExtension(groupfile);
+               outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName);
                
-               for (int i = 0; i < thisLookUp.size(); i++) {
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+               
+               //read groupfile
+               GroupMap groupMap(groupfile);
+               groupMap.readMap();
+               
+               //fill Groups - checks for "all" and for any typo groups
+               SharedUtil* util = new SharedUtil();
+               vector<string> nameGroups = groupMap.getNamesOfGroups();
+               util->setGroups(Groups, nameGroups);
+               delete util;
+               
+               vector<string> namesOfSeqs = groupMap.getNamesSeqs();
+               bool error = false;
+               
+               for (int i = 0; i < namesOfSeqs.size(); i++) {
                        
-                       if (m->control_pressed) { return 0; }
+                       if (m->control_pressed) { break; }
                        
-                       //what grouping does this group belong to
-                       string grouping = designMap->getGroup(thisLookUp[i]->getGroup());
-                       if (grouping == "not found") { m->mothurOut("[ERROR]: " + thisLookUp[i]->getGroup() + " is not in your design file. Ignoring!"); m->mothurOutEndLine(); grouping = "NOTFOUND"; }
+                       string thisGroup = groupMap.getGroup(namesOfSeqs[i]);
                        
-                       else {
-                               //do we already have a member of this grouping?
-                               it = merged.find(grouping);
+                       //are you in a group the user wants
+                       if (m->inUsersGroups(thisGroup, Groups)) {
+                               string thisGrouping = designMap->getGroup(thisGroup);
                                
-                               if (it == merged.end()) { //nope, so create it
-                                       merged[grouping] = *thisLookUp[i];
-                                       merged[grouping].setGroup(grouping);
-                               }else { //yes, merge it
-                                       
-                                       for (int j = 0; j < thisLookUp[i]->getNumBins(); j++) {
-                                               int abund = (it->second).getAbundance(j);
-                                               abund += thisLookUp[i]->getAbundance(j);
-                                               
-                                               (it->second).set(j, abund, grouping);
-                                       }
+                               if (thisGrouping == "not found") { m->mothurOut("[ERROR]: " + namesOfSeqs[i] + " is from group " + thisGroup + " which is not in your design file, please correct."); m->mothurOutEndLine();  error = true; }
+                               else {
+                                       out << namesOfSeqs[i] << '\t' << thisGrouping << endl;
                                }
                        }
                }
                
-               //print new file
-               for (it = merged.begin(); it != merged.end(); it++) {
-                       out << (it->second).getLabel() << '\t' << it->first << '\t';
-                       (it->second).print(out);
-               }
+               if (error) { m->control_pressed = true; }
+
+               out.close();
                
                return 0;
                
        }
        catch(exception& e) {
-               m->errorOut(e, "MergeGroupsCommand", "process");
+               m->errorOut(e, "MergeGroupsCommand", "processGroupFile");
                exit(1);
        }
 }
index 7907dfe85f556682d1cc796323314a2f0c8d7b67..7f216ebed7653a2647c4770e4ce17e5e6d881e95 100644 (file)
@@ -38,10 +38,12 @@ private:
        
        bool abort, allLines, pickedGroups;
        set<string> labels; //holds labels to be used
-       string groups, label, outputDir, inputDir, designfile, sharedfile;
+       string groups, label, outputDir, inputDir, designfile, sharedfile, groupfile;
        vector<string> Groups, outputNames;
                
        int process(vector<SharedRAbundVector*>&, ofstream&);
+       int processSharedFile(GroupMap*&);
+       int processGroupFile(GroupMap*&);
 };
 
 #endif
index 3b7c4599329aede91be3f6325d27df1accf77c03..50344e24bfea85e8b7c4b829b5136e3e63d34fc7 100644 (file)
--- a/mothur.h
+++ b/mothur.h
@@ -106,6 +106,19 @@ struct ThreadNode {
        IntNode* right;
 };
 
+struct diffPair {
+       float   prob;
+       float   reverseProb;
+       
+       diffPair() {
+               prob = 0; reverseProb = 0;
+       }
+       diffPair(float p, float rp) {
+               prob = p;
+               reverseProb = rp;
+       }
+};
+
 /************************************************************/
 struct clusterNode {
        int numSeq;
index 9dcdc80e761f1483059297dbcd996a0a06b43e5b..20a7b5235029b6d7908ca0f23f119c3c38d095f1 100644 (file)
@@ -2152,7 +2152,7 @@ int MothurOut::removeConfidences(string& tax) {
                                int pos2 = taxon.find_last_of(')');
                                if (pos2 != -1) {
                                        string confidenceScore = taxon.substr(pos+1, (pos2-(pos+1)));
-                                       if (isContainingOnlyDigits(confidenceScore)) {
+                                       if (isNumeric1(confidenceScore)) {
                                                taxon = taxon.substr(0, pos); //rip off confidence 
                                        }
                                }
index 71714f184e644105041910933a5bf877539deb24..1e8c4451538683d4725ed60e535a5a86a8a0ea79 100644 (file)
@@ -104,7 +104,7 @@ map<string, string> OptionParser::getParameters() {
                                }
                        }else{ it++; }
                }
-       
+               
                return parameters;      
        }
        catch(exception& e) {
@@ -113,4 +113,54 @@ map<string, string> OptionParser::getParameters() {
        }
 }
 
+/***********************************************************************/
+//pass a vector of filenames that may match the current namefile.  
+//this function will look at each one, if the rootnames match, mothur will warn 
+//the user that they may have neglected to provide a namefile.
+//stops when it finds a match.
+bool OptionParser::getNameFile(vector<string> files) { 
+       try {
+               string namefile = m->getNameFile();
+               bool match = false;
+               
+               if (namefile != "") {
+                       string temp = m->getRootName(m->getSimpleName(namefile));
+                       vector<string> rootName;
+                       m->splitAtChar(temp, rootName, '.');
+                       
+                       for (int i = 0; i < files.size(); i++) {
+                               temp = m->getRootName(m->getSimpleName(files[i]));
+                               vector<string> root;
+                               m->splitAtChar(temp, root, '.');
+                               
+                               int smallest = rootName.size();
+                               if (root.size() < smallest) { smallest = root.size(); }
+                               
+                               int numMatches = 0;
+                               for(int j = 0; j < smallest; j++) {
+                                       if (root[j] == rootName[j]) { numMatches++; }
+                               }
+                               
+                               if (smallest > 0) {
+                                       if ((numMatches >= (smallest-2)) && (root[0] == rootName[0])) {
+                                               m->mothurOut("[WARNING]: This command can take a namefile and you did not provide one. The current namefile is " + namefile + " which seems to match " + files[i] + ".");
+                                               m->mothurOutEndLine();
+                                               match = true;
+                                               break;
+                                       }
+                               }
+                       }
+                       
+               }
+               
+               
+               return match;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "OptionParser", "getNameFile");
+               exit(1);
+       }
+}
+
+                               
 /***********************************************************************/
index 7707e739ce15d7e0dd6657bc08a288222dfbdbb0..facd1f85182cc526a6db01d85968d9231afcfc88 100644 (file)
@@ -14,7 +14,7 @@
 
 #include "mothur.h"
 #include "mothurout.h"
-
+#include "command.hpp"
 
 /***********************************************************************/
 
@@ -23,6 +23,7 @@ public:
        OptionParser(string);
        ~OptionParser() {}
        map<string, string> getParameters();
+       bool getNameFile(vector<string>);
 private:
        map<string, string> parameters;
        MothurOut* m;
index 9cba70b9d2772edba281cd5f03459d1fdc32a966..9509a20af2528392c2df8590aea61fef6a7665b4 100644 (file)
@@ -14,6 +14,8 @@
 vector<string> ParseFastaQCommand::setParameters(){    
        try {
                CommandParameter pfastq("fastq", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfastq);
+               CommandParameter pfasta("fasta", "Bool", "", "T", "", "", "",false,false); parameters.push_back(pfasta);
+               CommandParameter pqual("qfile", "Bool", "", "T", "", "", "",false,false); parameters.push_back(pqual);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
                
@@ -104,6 +106,15 @@ ParseFastaQCommand::ParseFastaQCommand(string option){
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);   if (outputDir == "not found"){  outputDir = m->hasPath(fastaQFile);     }
+                       
+                       string temp;
+                       temp = validParameter.validFile(parameters, "fasta", false);    if(temp == "not found"){        temp = "T";     }
+                       fasta = m->isTrue(temp); 
+
+                       temp = validParameter.validFile(parameters, "qfile", false);    if(temp == "not found"){        temp = "T";     }
+                       qual = m->isTrue(temp); 
+                       
+                       if ((!fasta) && (!qual)) { m->mothurOut("[ERROR]: no outputs selected. Aborting."); m->mothurOutEndLine(); abort=true; }
 
                }               
        }
@@ -122,13 +133,16 @@ int ParseFastaQCommand::execute(){
                string fastaFile = outputDir + m->getRootName(m->getSimpleName(fastaQFile)) + "fasta";
                string qualFile = outputDir + m->getRootName(m->getSimpleName(fastaQFile)) + "qual";
                ofstream outFasta, outQual;
-               m->openOutputFile(fastaFile, outFasta);  outputNames.push_back(fastaFile); outputTypes["fasta"].push_back(fastaFile);
-               m->openOutputFile(qualFile, outQual);   outputNames.push_back(qualFile);  outputTypes["qfile"].push_back(qualFile);
+               
+               if (fasta) { m->openOutputFile(fastaFile, outFasta);  outputNames.push_back(fastaFile); outputTypes["fasta"].push_back(fastaFile);      }
+               if (qual) { m->openOutputFile(qualFile, outQual);       outputNames.push_back(qualFile);  outputTypes["qfile"].push_back(qualFile);             }
                
                ifstream in;
                m->openInputFile(fastaQFile, in);
                
                while (!in.eof()) {
+                       
+                       if (m->control_pressed) { break; }
                
                        //read sequence name
                        string name = m->getline(in); m->gobble(in);
@@ -147,27 +161,27 @@ int ParseFastaQCommand::execute(){
                        else { name2 = name2.substr(1);  }
                        
                        //read quality scores
-                       string qual = m->getline(in); m->gobble(in);
-                       if (qual == "") {  m->mothurOut("[ERROR]: missing quality for " + name2); m->mothurOutEndLine(); m->control_pressed = true; break; }
+                       string quality = m->getline(in); m->gobble(in);
+                       if (quality == "") {  m->mothurOut("[ERROR]: missing quality for " + name2); m->mothurOutEndLine(); m->control_pressed = true; break; }
                        
                        //sanity check sequence length and number of quality scores match
                        if (name2 != "") { if (name != name2) { m->mothurOut("[ERROR]: names do not match. read " + name + " for fasta and " + name2 + " for quality."); m->mothurOutEndLine(); m->control_pressed = true; break; } }
-                       if (qual.length() != sequence.length()) { m->mothurOut("[ERROR]: lengths do not match. read " + toString(sequence.length()) + " characters for fasta and " + toString(qual.length()) + " characters for quality scores."); m->mothurOutEndLine(); m->control_pressed = true; break; }
-                       
-                       //convert quality scores
-                       vector<int> qualScores = convertQual(qual);
+                       if (quality.length() != sequence.length()) { m->mothurOut("[ERROR]: lengths do not match. read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores."); m->mothurOutEndLine(); m->control_pressed = true; break; }
                        
                        //print sequence info to files
-                       outFasta << ">" << name << endl << sequence << endl;
+                       if (fasta) { outFasta << ">" << name << endl << sequence << endl; }
                        
-                       outQual << ">" << name << endl;
-                       for (int i = 0; i < qualScores.size(); i++) { outQual << qualScores[i] << " "; }
-                       outQual << endl;
+                       if (qual) { 
+                               vector<int> qualScores = convertQual(quality);
+                               outQual << ">" << name << endl;
+                               for (int i = 0; i < qualScores.size(); i++) { outQual << qualScores[i] << " "; }
+                               outQual << endl;
+                       }
                }
                
                in.close();
-               outFasta.close();
-               outQual.close();
+               if (fasta)      { outFasta.close();     }
+               if (qual)       { outQual.close();      }
                
                if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(fastaFile); m->mothurRemove(qualFile); return 0; }
                
index 60524566b2a4f02e4571c38d0602980e676d8138..6feabce848c09c21b2e276df8361ac4d78667f22 100644 (file)
@@ -34,7 +34,7 @@ private:
 
        vector<string> outputNames;     
        string outputDir, fastaQFile;
-       bool abort;
+       bool abort, fasta, qual;
        
        vector<int> convertQual(string);
 };
index ccf29526f5e803049371c452bcdcf7ddad10da48..2d46efc9efebcfbe7807292cb814cf58686f9ac4 100644 (file)
@@ -139,7 +139,7 @@ ParsimonyCommand::ParsimonyCommand(string option)  {
                        if (randomtree == "")  {
                                //check for required parameters
                                treefile = validParameter.validFile(parameters, "tree", true);
-                               if (treefile == "not open") { abort = true; }
+                               if (treefile == "not open") { treefile = ""; abort = true; }
                                else if (treefile == "not found") {                             //if there is a current design file, use it
                                        treefile = m->getTreeFile(); 
                                        if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
@@ -153,7 +153,7 @@ ParsimonyCommand::ParsimonyCommand(string option)  {
                                else { m->setGroupFile(groupfile); }
                                
                                namefile = validParameter.validFile(parameters, "name", true);
-                               if (namefile == "not open") { abort = true; }
+                               if (namefile == "not open") { namefile = ""; abort = true; }
                                else if (namefile == "not found") { namefile = ""; }
                                else { m->setNameFile(namefile); }
                        }
@@ -177,6 +177,11 @@ ParsimonyCommand::ParsimonyCommand(string option)  {
                        m->setProcessors(temp);
                        m->mothurConvert(temp, processors);
                        
+                       if (namefile == "") {
+                               vector<string> files; files.push_back(treefile);
+                               parser.getNameFile(files);
+                       }
+                       
                }
 
        }
index 69c7b304623e44878cdafb8925fbffd9f45cd718..2b15d11d29580d1455ffea983788ab0715fe194b 100644 (file)
@@ -144,7 +144,7 @@ PhyloDiversityCommand::PhyloDiversityCommand(string option)  {
                        
                        //check for required parameters
                        treefile = validParameter.validFile(parameters, "tree", true);
-                       if (treefile == "not open") { abort = true; }
+                       if (treefile == "not open") { treefile = ""; abort = true; }
                        else if (treefile == "not found") {                             
                                //if there is a current design file, use it
                                treefile = m->getTreeFile(); 
@@ -159,7 +159,7 @@ PhyloDiversityCommand::PhyloDiversityCommand(string option)  {
                        else { m->setGroupFile(groupfile); }
                        
                        namefile = validParameter.validFile(parameters, "name", true);
-                       if (namefile == "not open") { abort = true; }
+                       if (namefile == "not open") { namefile = ""; abort = true; }
                        else if (namefile == "not found") { namefile = ""; }
                        else { m->setNameFile(namefile); }
                        
@@ -197,6 +197,11 @@ PhyloDiversityCommand::PhyloDiversityCommand(string option)  {
                        }
                        
                        if ((!collect) && (!rarefy) && (!summary)) { m->mothurOut("No outputs selected. You must set either collect, rarefy or summary to true, summary=T by default."); m->mothurOutEndLine(); abort=true; }
+                       
+                       if (namefile == "") {
+                               vector<string> files; files.push_back(treefile);
+                               parser.getNameFile(files);
+                       }
                }
                
        }
index 22e8e75ce7047e00b135483f16b566eb506a71fb..7d9fcac2558da8f50653a11ddafd997b58c1c286 100644 (file)
@@ -121,6 +121,7 @@ string PhyloSummary::getNextTaxon(string& heirarchy){
 
 int PhyloSummary::addSeqToTree(string seqName, string seqTaxonomy){
        try {
+                               
                numSeqs++;
                
                map<string, int>::iterator childPointer;
index e430fb9e85622dcd1d425534857fe37dff600da9..dba1e3b56951f8ae1c484ace22187a9b65053d35 100644 (file)
@@ -20,6 +20,7 @@ PhyloTree::PhyloTree(){
                tree[0].heirarchyID = "0";
                maxLevel = 0;
                calcTotals = true;
+               addSeqToTree("unknown", "unknown;");
        }
        catch(exception& e) {
                m->errorOut(e, "PhyloTree", "PhyloTree");
@@ -127,6 +128,7 @@ PhyloTree::PhyloTree(string tfile){
                maxLevel = 0;
                calcTotals = true;
                string name, tax;
+               addSeqToTree("unknown", "unknown;");
 
                
                #ifdef USE_MPI
@@ -232,7 +234,6 @@ string PhyloTree::getNextTaxon(string& heirarchy, string seqname){
 
 int PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){
        try {
-                       
                numSeqs++;
                
                map<string, int>::iterator childPointer;
@@ -375,7 +376,7 @@ void PhyloTree::binUnclassified(string file){
                map<string, int>::iterator childPointer;
                
                vector<TaxNode> copy = tree;
-                       
+               
                //fill out tree
                fillOutTree(0, copy);
        
@@ -484,16 +485,16 @@ string PhyloTree::getFullTaxonomy(string seqName) {
 
 void PhyloTree::print(ofstream& out, vector<TaxNode>& copy){
        try {
-       
+               
                //output mothur version
                out << "#" << m->getVersion() << endl;
                
                out << copy.size() << endl;
                
                out << maxLevel << endl;
-               
+                               
                for (int i = 0; i < copy.size(); i++) {
-       
+                               
                        out << copy[i].level << '\t'<< copy[i].name << '\t' << copy[i].children.size() << '\t';
                        
                        map<string,int>::iterator it;
@@ -606,6 +607,7 @@ bool PhyloTree::ErrorCheck(vector<string> templateFileNames){
        try {
        
                bool okay = true;
+               templateFileNames.push_back("unknown");
                
                map<string, int>::iterator itFind;
                map<string, int> taxonomyFileNames = name2Taxonomy;
index 00f960bbbab8d07ecbee08dc01b78907c60c0185..1d6c391de69c83a86971873577a0df0b21efdbcd 100644 (file)
@@ -128,11 +128,11 @@ PhylotypeCommand::PhylotypeCommand(string option)  {
                                        m->mothurOut("No valid current files. taxonomy is a required parameter."); m->mothurOutEndLine(); 
                                        abort = true; 
                                }
-                       }else if (taxonomyFileName == "not open") { abort = true; }     
+                       }else if (taxonomyFileName == "not open") { taxonomyFileName = ""; abort = true; }      
                        else { m->setTaxonomyFile(taxonomyFileName); }
                        
                        namefile = validParameter.validFile(parameters, "name", true);
-                       if (namefile == "not open") { abort = true; }
+                       if (namefile == "not open") { namefile = ""; abort = true; }
                        else if (namefile == "not found") { namefile = ""; }
                        else { readNamesFile(); m->setNameFile(namefile); }     
                        
@@ -153,6 +153,11 @@ PhylotypeCommand::PhylotypeCommand(string option)  {
                                else { allLines = 1;  }
                        }
                        
+                       if (namefile == "") {
+                               vector<string> files; files.push_back(taxonomyFileName);
+                               parser.getNameFile(files);
+                       }
+                       
                }
        }
        catch(exception& e) {
index 21de0344e7c856bb83105abe9b51cf8fe5b4a8ec..582da493e4dc5cdf5267d65606181d1594f56a87 100644 (file)
@@ -147,7 +147,7 @@ PreClusterCommand::PreClusterCommand(string option) {
                        // ...at some point should added some additional type checking...
                        namefile = validParameter.validFile(parameters, "name", true);
                        if (namefile == "not found") { namefile =  "";  }
-                       else if (namefile == "not open") { abort = true; }      
+                       else if (namefile == "not open") { namefile = ""; abort = true; }       
                        else {  m->setNameFile(namefile); }
                        
                        groupfile = validParameter.validFile(parameters, "group", true);
@@ -162,7 +162,10 @@ PreClusterCommand::PreClusterCommand(string option) {
                        m->setProcessors(temp);
                        m->mothurConvert(temp, processors);
                        
-                       
+                       if (namefile == "") {
+                               vector<string> files; files.push_back(fastafile);
+                               parser.getNameFile(files);
+                       }
                }
                                
        }
index fb930efcfd881561f27a5889f9da65aaaebd5f02..36400c1324380cda8e5570c028ac48aa4beaa2c9 100644 (file)
@@ -22,6 +22,7 @@ void ReferenceDB::clearMemory()  {
        setSavedReference("");
        for(int i = 0; i < wordGenusProb.size(); i++) { wordGenusProb[i].clear(); }
        wordGenusProb.clear();
+       WordPairDiffArr.clear();
        setSavedTaxonomy("");
 }
 /*******************************************************
index 2f292472cd35da2161099438c3f6ff79418039d1..5262e80d010a29da6896ada17bcfd7091dce7fec 100644 (file)
@@ -26,6 +26,7 @@ class ReferenceDB {
                bool save;
                vector<Sequence> referenceSeqs;
                vector< vector<float> > wordGenusProb;
+               vector<diffPair> WordPairDiffArr;
        
                string getSavedReference()                      { return referencefile;         }
                void setSavedReference(string p)        { referencefile = p;            }
index e5a3bb2edc7fc97dacb89393ef3a2968b64247b4..f35da2e46588829480a5540cdcd1148fd54e31bf 100644 (file)
 //**********************************************************************************************************************
 vector<string> RemoveGroupsCommand::setParameters(){   
        try {
-               CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
-               CommandParameter pshared("shared", "InputTypes", "", "", "none", "FNGLT-sharedGroup", "none",false,false); parameters.push_back(pshared);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname);
-               CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT-sharedGroup", "none",false,false); parameters.push_back(pgroup);
-               CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
-               CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(ptaxonomy);
+               CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(pfasta);
+               CommandParameter pshared("shared", "InputTypes", "", "", "none", "sharedGroup", "none",false,false); parameters.push_back(pshared);
+               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+               CommandParameter pgroup("group", "InputTypes", "", "", "none", "sharedGroup", "FNGLT",false,false); parameters.push_back(pgroup);
+               CommandParameter plist("list", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(plist);
+               CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(ptaxonomy);
                CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(paccnos);
                CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
@@ -253,6 +253,12 @@ RemoveGroupsCommand::RemoveGroupsCommand(string option)  {
                        
                        if ((fastafile == "") && (namefile == "") && (groupfile == "")  && (sharedfile == "") && (listfile == "") && (taxfile == ""))  { m->mothurOut("You must provide at least one of the following: fasta, name, taxonomy, group, shared or list."); m->mothurOutEndLine(); abort = true; }
                        if ((groupfile == "") && ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != "")))  { m->mothurOut("If using a fasta, name, taxonomy, group or list, then you must provide a group file."); m->mothurOutEndLine(); abort = true; }
+                       
+                       if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
+                               vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
+                               parser.getNameFile(files);
+                       }
+               
                }
                
        }
index f55158fc259ace27c87d5a4fcca10e7c55539f13..9fec2496068950b4863448fe500b17cbc957670a 100644 (file)
@@ -169,12 +169,12 @@ RemoveLineageCommand::RemoveLineageCommand(string option)  {
                        
                        //check for required parameters                 
                        fastafile = validParameter.validFile(parameters, "fasta", true);
-                       if (fastafile == "not open") { abort = true; }
+                       if (fastafile == "not open") { fastafile = ""; abort = true; }
                        else if (fastafile == "not found") {  fastafile = "";  }        
                        else { m->setFastaFile(fastafile); }
                        
                        namefile = validParameter.validFile(parameters, "name", true);
-                       if (namefile == "not open") { abort = true; }
+                       if (namefile == "not open") { namefile = ""; abort = true; }
                        else if (namefile == "not found") {  namefile = "";  }  
                        else { m->setNameFile(namefile); }
                        
@@ -193,7 +193,7 @@ RemoveLineageCommand::RemoveLineageCommand(string option)  {
                        else { m->setListFile(listfile); }
                        
                        taxfile = validParameter.validFile(parameters, "taxonomy", true);
-                       if (taxfile == "not open") { abort = true; }
+                       if (taxfile == "not open") { taxfile = ""; abort = true; }
                        else if (taxfile == "not found") {              
                                taxfile = m->getTaxonomyFile(); 
                                if (taxfile != "") { m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
@@ -220,7 +220,12 @@ RemoveLineageCommand::RemoveLineageCommand(string option)  {
                        if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == ""))  { m->mothurOut("You must provide one of the following: fasta, name, group, alignreport, taxonomy or listfile."); m->mothurOutEndLine(); abort = true; }
                
                        if ((usedDups != "") && (namefile == "")) {  m->mothurOut("You may only use dups with the name option."); m->mothurOutEndLine();  abort = true; }                       
-
+                       
+                       if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
+                               vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
+                               parser.getNameFile(files);
+                       }
+                       
                }
 
        }
index 59c9b348ba1af9b719f35ae12a249265c7fe9662..2abe9ef78a8c76354d377087fa7a23deb27b4d25 100644 (file)
@@ -194,12 +194,12 @@ RemoveSeqsCommand::RemoveSeqsCommand(string option)  {
                        }else { m->setAccnosFile(accnosfile); } 
                        
                        fastafile = validParameter.validFile(parameters, "fasta", true);
-                       if (fastafile == "not open") { abort = true; }
+                       if (fastafile == "not open") { fastafile = ""; abort = true; }
                        else if (fastafile == "not found") {  fastafile = "";  }        
                        else { m->setFastaFile(fastafile); }
                                                                   
                        namefile = validParameter.validFile(parameters, "name", true);
-                       if (namefile == "not open") { abort = true; }
+                       if (namefile == "not open") { namefile = ""; abort = true; }
                        else if (namefile == "not found") {  namefile = "";  }  
                        else { m->setNameFile(namefile); } 
                                                                   
@@ -237,6 +237,10 @@ RemoveSeqsCommand::RemoveSeqsCommand(string option)  {
                        
                        if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == ""))  { m->mothurOut("You must provide at least one of the following: fasta, name, group, taxonomy, quality, alignreport or list."); m->mothurOutEndLine(); abort = true; }
                        
+                       if ((fastafile != "") && (namefile == "")) {
+                               vector<string> files; files.push_back(fastafile);
+                               parser.getNameFile(files);
+                       }
                }
 
        }
index 16618f74cc8e251c7d28d10ca37fcd2e7b09d017..41067396c6f5333faeaf3346afd9b865ad351531 100644 (file)
@@ -260,6 +260,11 @@ ScreenSeqsCommand::ScreenSeqsCommand(string option)  {
                        
                        temp = validParameter.validFile(parameters, "criteria", false); if (temp == "not found"){       temp = "90";                            }
                        m->mothurConvert(temp, criteria); 
+                       
+                       if (namefile == "") {
+                               vector<string> files; files.push_back(fastafile);
+                               parser.getNameFile(files);
+                       }
                }
 
        }
index fe80b7810e716264152835060f25d1f2e45c1fed..24142e0cf4aa6174a8006f1e0a7a85c82f10ab24 100644 (file)
@@ -122,7 +122,7 @@ AlignCheckCommand::AlignCheckCommand(string option)  {
                        else if (mapfile == "not found") {  mapfile = "";  m->mothurOut("You must provide an map file."); m->mothurOutEndLine(); abort = true; }        
                        
                        fastafile = validParameter.validFile(parameters, "fasta", true);
-                       if (fastafile == "not open") { abort = true; }
+                       if (fastafile == "not open") { fastafile = ""; abort = true; }
                        else if (fastafile == "not found") {                            
                                fastafile = m->getFastaFile(); 
                                if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
@@ -139,7 +139,11 @@ AlignCheckCommand::AlignCheckCommand(string option)  {
                                outputDir = ""; 
                                outputDir += m->hasPath(fastafile); //if user entered a file with a path then preserve it       
                        }
-
+                       
+                       if ((namefile == "") && (fastafile != "")){
+                               vector<string> files; files.push_back(fastafile); 
+                               parser.getNameFile(files);
+                       }
                }
 
        }
index 9ac7c5bcef6fee9c8e01b7838a3560f3478c5fcc..d8ebe50fb11859367256a28286dec9789cc70d36 100644 (file)
@@ -176,7 +176,7 @@ SeqErrorCommand::SeqErrorCommand(string option)  {
                                if (queryFileName != "") { m->mothurOut("Using " + queryFileName + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
                                else {  m->mothurOut("You have no current fasta file and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
                        }
-                       else if (queryFileName == "not open") { abort = true; } 
+                       else if (queryFileName == "not open") { queryFileName = ""; abort = true; }     
                        else { m->setFastaFile(queryFileName); }
                        
                        referenceFileName = validParameter.validFile(parameters, "reference", true);
@@ -246,6 +246,11 @@ SeqErrorCommand::SeqErrorCommand(string option)  {
 
                        substitutionMatrix.resize(6);
                        for(int i=0;i<6;i++){   substitutionMatrix[i].resize(6,0);      }
+                       
+                       if ((namesFileName == "") && (queryFileName != "")){
+                               vector<string> files; files.push_back(queryFileName); 
+                               parser.getNameFile(files);
+                       }
                }
        }
        catch(exception& e) {
index 93950d1f6222c10af735808d87130213b6664017..e8f73ca10875f8321f624adfb62ee02f7baf2770 100644 (file)
@@ -132,8 +132,12 @@ SeqSummaryCommand::SeqSummaryCommand(string option)  {
                        string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = m->getProcessors();      }
                        m->setProcessors(temp);
                        m->mothurConvert(temp, processors);
-
-
+                       
+                       if (namefile == "") {
+                               vector<string> files; files.push_back(fastafile);
+                               parser.getNameFile(files);
+                       }
+                       
                }
        }
        catch(exception& e) {
index 162d3be9e69d0f86f7e4f2122c3c914b613392c3..090ee14c1c838098aaaeb1a831ec01e3272ed512 100644 (file)
@@ -604,7 +604,6 @@ void Sequence::padFromPos(int end){
 bool Sequence::getIsAligned(){
        return isAligned;
 }
-
 //********************************************************************************************************************
 
 void Sequence::reverseComplement(){
index 17bad60d2f358b41d4dce9eeafc6a155ad069df7..67a1f59f8388dbfec55ae7b552f02640b5d6b93f 100644 (file)
@@ -202,8 +202,19 @@ int SetDirectoryCommand::execute(){
                                if (lastChar != "\\") { tempdefault += "\\"; }  
                        #endif
                        
-                       m->mothurOut("tempDefault=" + tempdefault); m->mothurOutEndLine();  
-                       m->setDefaultPath(tempdefault);
+                       //test to make sure directory exists
+                       tempdefault = m->getFullPathName(tempdefault);
+                       string inTemp = tempdefault + tag + "temp";
+                       ofstream in;
+                       in.open(inTemp.c_str(), ios::trunc);
+                       if(!in) {
+                               m->mothurOut(tempdefault + " directory does not exist or is not writable."); m->mothurOutEndLine(); 
+                       }else{
+                               in.close();
+                               m->mothurRemove(inTemp);
+                               m->mothurOut("tempDefault=" + tempdefault); m->mothurOutEndLine();  
+                               m->setDefaultPath(tempdefault);
+                       }
                }
 
                return 0;
index 40a4c8499a599207b56831c91e6f0f41c023eb2c..537821134edebcf9dcad70f8c1f38be0da39b121 100644 (file)
@@ -23,7 +23,7 @@ vector<string> ShhherCommand::setParameters(){
        try {
                CommandParameter pflow("flow", "InputTypes", "", "", "none", "fileflow", "none",false,false); parameters.push_back(pflow);
                CommandParameter pfile("file", "InputTypes", "", "", "none", "fileflow", "none",false,false); parameters.push_back(pfile);
-               CommandParameter plookup("lookup", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plookup);
+               CommandParameter plookup("lookup", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(plookup);
                CommandParameter pcutoff("cutoff", "Number", "", "0.01", "", "", "",false,false); parameters.push_back(pcutoff);
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
                CommandParameter pmaxiter("maxiter", "Number", "", "1000", "", "", "",false,false); parameters.push_back(pmaxiter);
@@ -2152,7 +2152,7 @@ void ShhherCommand::writeQualities(vector<int> otuCounts){
        try {
                string thisOutputDir = outputDir;
                if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
-               string qualityFileName = thisOutputDir + flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.qual";
+               string qualityFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + ".shhh.qual";
 
                ofstream qualityFile;
                m->openOutputFile(qualityFileName, qualityFile);
@@ -2259,7 +2259,7 @@ void ShhherCommand::writeSequences(vector<int> otuCounts){
        try {
                string thisOutputDir = outputDir;
                if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
-               string fastaFileName = thisOutputDir + flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.fasta";
+               string fastaFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + ".shhh.fasta";
                ofstream fastaFile;
                m->openOutputFile(fastaFileName, fastaFile);
                
@@ -2307,7 +2307,7 @@ void ShhherCommand::writeNames(vector<int> otuCounts){
        try {
                string thisOutputDir = outputDir;
                if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
-               string nameFileName = thisOutputDir + flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.names";
+               string nameFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + ".shhh.names";
                ofstream nameFile;
                m->openOutputFile(nameFileName, nameFile);
                
@@ -2345,7 +2345,7 @@ void ShhherCommand::writeGroups(){
        try {
                string thisOutputDir = outputDir;
                if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
-               string fileRoot = thisOutputDir + flowFileName.substr(0,flowFileName.find_last_of('.'));
+               string fileRoot = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName));
                string groupFileName = fileRoot + ".shhh.groups";
                ofstream groupFile;
                m->openOutputFile(groupFileName, groupFile);
@@ -2370,7 +2370,7 @@ void ShhherCommand::writeClusters(vector<int> otuCounts){
        try {
                string thisOutputDir = outputDir;
                if (outputDir == "") {  thisOutputDir += m->hasPath(flowFileName);  }
-               string otuCountsFileName = thisOutputDir + flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.counts";
+               string otuCountsFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + ".shhh.counts";
                ofstream otuCountsFile;
                m->openOutputFile(otuCountsFileName, otuCountsFile);
                
index 6a339b880eb54eedaa7be7af3c297ed0089beb10..518e78d6386521dcb4ef57700c1ffb903ae02a13 100644 (file)
@@ -165,6 +165,11 @@ ShhhSeqsCommand::ShhhSeqsCommand(string option) {
                        temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
                        m->setProcessors(temp);
                        m->mothurConvert(temp, processors);
+                       
+                       if (namefile == "") {
+                               vector<string> files; files.push_back(fastafile);
+                               parser.getNameFile(files);
+                       }
                }
        }
        catch(exception& e) {
index 989ef596168ff1894009fd8c625edc881197ef87..8131fa9195f6b8d939b27891605dd862d618964b 100644 (file)
@@ -216,7 +216,6 @@ SplitAbundCommand::SplitAbundCommand(string option)  {
                        m->mothurConvert(temp, cutoff); 
 
                        if (cutoff == 0) {  m->mothurOut("You must provide a cutoff to qualify what is abundant for the split.abund command. "); m->mothurOutEndLine(); abort = true;  }
-
                }
 
        }
index 113439c9220429f4d9466a72a579c5aa2caa10ae..26716554decad148623563d4b120148325b7477a 100644 (file)
@@ -125,7 +125,7 @@ SplitGroupCommand::SplitGroupCommand(string option)  {
 
                        
                        namefile = validParameter.validFile(parameters, "name", true);
-                       if (namefile == "not open") { abort = true; }
+                       if (namefile == "not open") { namefile = ""; abort = true; }
                        else if (namefile == "not found") { namefile = ""; }    
                        else { m->setNameFile(namefile); }
                
@@ -151,6 +151,11 @@ SplitGroupCommand::SplitGroupCommand(string option)  {
                                                
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(groupfile);      }
+                       
+                       if (namefile == "") {
+                               vector<string> files; files.push_back(fastafile);
+                               parser.getNameFile(files);
+                       }
                }
 
        }
index fe9e93423969796187dd59452f7e3a3496d4ee57..c352feb099a83879bae9e1aca0f35d5bfe56e691 100644 (file)
@@ -255,6 +255,10 @@ SubSampleCommand::SubSampleCommand(string option) {
                        if ((groupfile != "") && ((fastafile != "") && (listfile != ""))) { 
                                m->mothurOut("A new group file can only be made from the subsample of a listfile or fastafile, not both. Please correct."); m->mothurOutEndLine(); abort = true; }
                        
+                       if ((fastafile != "") && (namefile == "")) {
+                               vector<string> files; files.push_back(fastafile);
+                               parser.getNameFile(files);
+                       }
                }
 
        }
index 421a8f2ad36c64e218aa68a4eb6fd8dc651362d0..2a40e0862ba0b0444db035246cdcb95409fe4923 100644 (file)
@@ -111,7 +111,7 @@ SummaryQualCommand::SummaryQualCommand(string option)  {
                        
                        //check for required parameters
                        qualfile = validParameter.validFile(parameters, "qfile", true);
-                       if (qualfile == "not open") { abort = true; }
+                       if (qualfile == "not open") { qualfile = ""; abort = true; }
                        else if (qualfile == "not found") {                             
                                qualfile = m->getQualFile(); 
                                if (qualfile != "") { m->mothurOut("Using " + qualfile + " as input file for the qfile parameter."); m->mothurOutEndLine(); }
@@ -131,7 +131,12 @@ SummaryQualCommand::SummaryQualCommand(string option)  {
                        
                        string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = m->getProcessors();      }
                        m->setProcessors(temp);
-                       m->mothurConvert(temp, processors);                     
+                       m->mothurConvert(temp, processors);     
+                       
+                       if (namefile == "") {
+                               vector<string> files; files.push_back(qualfile);
+                               parser.getNameFile(files);
+                       }
                }
        }
        catch(exception& e) {
index a19986aba92ffeae80d3786c2322dc3d7d5bb461..ffa304102b95570669e4e977a9f4859b18c24c71 100644 (file)
@@ -157,6 +157,12 @@ SummaryTaxCommand::SummaryTaxCommand(string option)  {
                                outputDir = ""; 
                                outputDir += m->hasPath(taxfile); //if user entered a file with a path then preserve it 
                        }
+                       
+                       if (namefile == "") {
+                               vector<string> files; files.push_back(taxfile);
+                               parser.getNameFile(files);
+                       }
+                       
                }
        }
        catch(exception& e) {
index 213e241ccfc6ee8feeb6c34ed9b589343c7c71d1..dd3427b9a9733285e5e793f78cabe5bdc3ed1cf3 100644 (file)
@@ -292,6 +292,11 @@ TrimSeqsCommand::TrimSeqsCommand(string option)  {
                                m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
                                abort = true;
                        }
+                       
+                       if (nameFile == "") {
+                               vector<string> files; files.push_back(fastaFile);
+                               parser.getNameFile(files);
+                       }
                }
 
        }
index 047d06e27fadff92f647f75875f038f4bf8b04fd..a404f79222ac83cf41d42c04ceb614df1e01dceb 100644 (file)
@@ -155,7 +155,7 @@ UnifracUnweightedCommand::UnifracUnweightedCommand(string option)  {
                        else { m->setGroupFile(groupfile); }
                        
                        namefile = validParameter.validFile(parameters, "name", true);
-                       if (namefile == "not open") { abort = true; }
+                       if (namefile == "not open") { namefile = ""; abort = true; }
                        else if (namefile == "not found") { namefile = ""; }
                        else { m->setNameFile(namefile); }
                        
@@ -198,6 +198,11 @@ UnifracUnweightedCommand::UnifracUnweightedCommand(string option)  {
                                m->splitAtDash(groups, Groups);
                                m->setGroups(Groups);
                        }
+                       
+                       if (namefile == "") {
+                               vector<string> files; files.push_back(treefile);
+                               parser.getNameFile(files);
+                       }
                }
                
        }
index b007db1f6eddc2d8e6119add616803b1d58526a2..f9dc450444b242555710f91dc3567f96e69e6d3d 100644 (file)
@@ -141,7 +141,7 @@ UnifracWeightedCommand::UnifracWeightedCommand(string option) {
                        
                        //check for required parameters
                        treefile = validParameter.validFile(parameters, "tree", true);
-                       if (treefile == "not open") { abort = true; }
+                       if (treefile == "not open") { treefile = ""; abort = true; }
                        else if (treefile == "not found") {                             //if there is a current design file, use it
                                treefile = m->getTreeFile(); 
                                if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
@@ -155,7 +155,7 @@ UnifracWeightedCommand::UnifracWeightedCommand(string option) {
                        else { m->setGroupFile(groupfile); }
                        
                        namefile = validParameter.validFile(parameters, "name", true);
-                       if (namefile == "not open") { abort = true; }
+                       if (namefile == "not open") { namefile = ""; abort = true; }
                        else if (namefile == "not found") { namefile = ""; }
                        else { m->setNameFile(namefile); }
                        
@@ -192,6 +192,11 @@ UnifracWeightedCommand::UnifracWeightedCommand(string option) {
                        m->mothurConvert(temp, processors);
                        
                        if (!random) {  iters = 0;  } //turn off random calcs
+                       
+                       if (namefile == "") {
+                               vector<string> files; files.push_back(treefile);
+                               parser.getNameFile(files);
+                       }
                }