added topdown parameter to pre.cluster. added more debugging output to bayesian...

author Sarah Westcott <mothur.westcott@gmail.com>

Wed, 16 Jan 2013 16:30:18 +0000 (11:30 -0500)

committer Sarah Westcott <mothur.westcott@gmail.com>

Wed, 16 Jan 2013 16:30:18 +0000 (11:30 -0500)
author Sarah Westcott <mothur.westcott@gmail.com>
Wed, 16 Jan 2013 16:30:18 +0000 (11:30 -0500)
committer Sarah Westcott <mothur.westcott@gmail.com>
Wed, 16 Jan 2013 16:30:18 +0000 (11:30 -0500)
diff --git a/bayesian.cpp b/bayesian.cpp

index cf70010e76a6b9cba77b8ade2fcd7d18713391d3..6eaab6f2f974f8e1036561c2f76d9587d6dd0399 100644 (file)
--- a/bayesian.cpp
+++ b/bayesian.cpp
@@ -308,7 +308,11 @@ string Bayesian::getTaxonomy(Sequence* seq) {
                 //bootstrap - to set confidenceScore
                 int numToSelect = queryKmers.size() / 8;
         
+        if (m->debug) {  m->mothurOut(seq->getName() + "\t"); }
+        
                 tax = bootstrapResults(queryKmers, index, numToSelect);
+        
+        if (m->debug) {  m->mothurOut("\n"); }
                 
                 return tax;     
         }
@@ -374,6 +378,7 @@ string Bayesian::bootstrapResults(vector<int> kmers, int tax, int numToSelect) {
                 int seqTaxIndex = tax;
                 TaxNode seqTax = phyloTree->get(tax);
                 
+        
                 while (seqTax.level != 0) { //while you are not at the root
                                         
                                 itBoot2 = confidenceScores.find(seqTaxIndex); //is this a classification we already have a count on
@@ -383,11 +388,13 @@ string Bayesian::bootstrapResults(vector<int> kmers, int tax, int numToSelect) {
                                         confidence = itBoot2->second;
                                 }
                                 
+                if (m->debug) { m->mothurOut(seqTax.name + "(" + toString(((confidence/(float)iters) * 100)) + ");"); }
+            
                                 if (((confidence/(float)iters) * 100) >= confidenceThreshold) {
                                         confidenceTax = seqTax.name + "(" + toString(((confidence/(float)iters) * 100)) + ");" + confidenceTax;
                                         simpleTax = seqTax.name + ";" + simpleTax;
                                 }
-                               
+            
                                 seqTaxIndex = seqTax.parent;
                                 seqTax = phyloTree->get(seqTax.parent);
                 }
diff --git a/chimerauchimecommand.cpp b/chimerauchimecommand.cpp

index ce7ce456d87c5c8c6e5a137cd661b02710e0977c..9a25582ddad078665bbc3f7e098dc4cbcf047097 100644 (file)
--- a/chimerauchimecommand.cpp
+++ b/chimerauchimecommand.cpp
@@ -23,6 +23,7 @@ vector<string> ChimeraUchimeCommand::setParameters(){
          CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount);
                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
+        CommandParameter pstrand("strand", "String", "", "", "", "", "","",false,false); parameters.push_back(pstrand);
                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
                 CommandParameter pabskew("abskew", "Number", "", "1.9", "", "", "","",false,false); parameters.push_back(pabskew);
@@ -61,7 +62,7 @@ string ChimeraUchimeCommand::getHelpString(){
                 string helpString = "";
                 helpString += "The chimera.uchime command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
                 helpString += "This command is a wrapper for uchime written by Robert C. Edgar.\n";
-               helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, dereplicate, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl and queryfact.\n";
+               helpString += "The chimera.uchime command parameters are fasta, name, count, reference, processors, dereplicate, abskew, chimealns, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, skipgaps, skipgaps2, minlen, maxlen, ucl, strand and queryfact.\n";
                 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
                 helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
          helpString += "The count parameter allows you to provide a count file, if you are using template=self. \n";
@@ -543,6 +544,8 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option)  {
                         maxp = validParameter.validFile(parameters, "maxp", false);                                             if (maxp == "not found")                        { useMaxp = false; maxp = "2";                                          }       else{ useMaxp = true;                   }
                         minlen = validParameter.validFile(parameters, "minlen", false);                                 if (minlen == "not found")                      { useMinlen = false; minlen = "10";                                     }       else{ useMinlen = true;                 }
                         maxlen = validParameter.validFile(parameters, "maxlen", false);                                 if (maxlen == "not found")                      { useMaxlen = false; maxlen = "10000";                          }       else{ useMaxlen = true;                 }
+            
+            strand = validParameter.validFile(parameters, "strand", false);    if (strand == "not found")      {  strand = ""; }
                         
                         temp = validParameter.validFile(parameters, "ucl", false);                                              if (temp == "not found") { temp = "f"; }
                         ucl = m->isTrue(temp);
@@ -1232,6 +1235,15 @@ int ChimeraUchimeCommand::driver(string outputFName, string filename, string acc
                         *tempa = '\0'; strncat(tempa, alns.c_str(), alns.length());
                         cPara.push_back(tempa);
                 }
+        
+        if (strand != "") {
+                       char* tempA = new char[9]; 
+                       *tempA = '\0'; strncat(tempA, "--strand", 8);
+                       cPara.push_back(tempA);
+                       char* tempa = new char[strand.length()+1];
+                       *tempa = '\0'; strncat(tempa, strand.c_str(), strand.length());
+                       cPara.push_back(tempa);
+               }
                 
                 if (useAbskew) {
                         char* tempskew = new char[9];
@@ -1631,7 +1643,7 @@ int ChimeraUchimeCommand::createProcesses(string outputFileName, string filename
                         
                         uchimeData* tempUchime = new uchimeData(outputFileName+extension, uchimeLocation, templatefile, files[i], "", "", "", accnos+extension, alns+extension, dummy, m, 0, 0,  i);
                         tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
-                       tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
+                       tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand);
                         
                         pDataArray.push_back(tempUchime);
                         processIDS.push_back(i);
@@ -1763,7 +1775,7 @@ int ChimeraUchimeCommand::createProcessesGroups(string outputFName, string filen
                         
                         uchimeData* tempUchime = new uchimeData(outputFName+extension, uchimeLocation, templatefile, filename+extension, fastaFile, nameFile, groupFile, accnos+extension, alns+extension, groups, m, lines[i].start, lines[i].end,  i);
                         tempUchime->setBooleans(useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount);
-                       tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract);
+                       tempUchime->setVariables(abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand);
                         
                         pDataArray.push_back(tempUchime);
                         processIDS.push_back(i);
diff --git a/chimerauchimecommand.h b/chimerauchimecommand.h

index 735c97d1269848b94ac5a01926f93e08d1ccd5ef..8861e19ffc4f2cca99fd50f38efd3d1f9798e2af 100644 (file)
--- a/chimerauchimecommand.h
+++ b/chimerauchimecommand.h
@@ -49,7 +49,7 @@ private:
         int createProcesses(string, string, string, string, int&);
                 
         bool abort, useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount, hasName, dups;
-       string fastafile, groupfile, templatefile, outputDir, namefile, countfile, abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, uchimeLocation;
+       string fastafile, groupfile, templatefile, outputDir, namefile, countfile, abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, uchimeLocation, strand;
         int processors;
         
         SequenceParser* sparser;
@@ -87,7 +87,7 @@ struct uchimeData {
         int threadID, count, numChimeras;
         vector<string> groups;
         bool useAbskew, chimealns, useMinH, useMindiv, useXn, useDn, useXa, useChunks, useMinchunk, useIdsmoothwindow, useMinsmoothid, useMaxp, skipgaps, skipgaps2, useMinlen, useMaxlen, ucl, useQueryfract, hasCount;
-       string abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract;
+       string abskew, minh, mindiv, xn, dn, xa, chunks, minchunk, idsmoothwindow, minsmoothid, maxp, minlen, maxlen, queryfract, strand;
         
         uchimeData(){}
         uchimeData(string o, string uloc, string t, string file, string f, string n, string g, string ac,  string al, vector<string> gr, MothurOut* mout, int st, int en, int tid) {
@@ -130,10 +130,11 @@ struct uchimeData {
          hasCount = hc;
         }
         
-       void setVariables(string abske, string min, string mindi, string x, string d, string xa2, string chunk, string minchun, string idsmoothwindo, string minsmoothi, string max, string minle, string maxle, string queryfrac) {
+       void setVariables(string abske, string min, string mindi, string x, string d, string xa2, string chunk, string minchun, string idsmoothwindo, string minsmoothi, string max, string minle, string maxle, string queryfrac, string stra) {
                 abskew = abske;
                 minh = min;
                 mindiv = mindi;
+        strand = stra;
                 xn = x;
                 dn = d;
                 xa = xa2;
@@ -243,6 +244,15 @@ static DWORD WINAPI MyUchimeThreadFunction(LPVOID lpParam){
                                 cPara.push_back(tempa);
                         }
                         
+            if (pDataArray->strand != "") {
+                char* tempA = new char[9]; 
+                *tempA = '\0'; strncat(tempA, "--strand", 8);
+                cPara.push_back(tempA);
+                char* tempa = new char[pDataArray->strand.length()+1];
+                *tempa = '\0'; strncat(tempa, pDataArray->strand.c_str(), pDataArray->strand.length());
+                cPara.push_back(tempa);
+            }
+            
                         if (pDataArray->useAbskew) {
                                 char* tempskew = new char[9];
                                 *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
@@ -589,6 +599,15 @@ static DWORD WINAPI MyUchimeSeqsThreadFunction(LPVOID lpParam){
                         cPara.push_back(tempa);
                 }
                 
+        if (pDataArray->strand != "") {
+            char* tempA = new char[9]; 
+            *tempA = '\0'; strncat(tempA, "--strand", 8);
+            cPara.push_back(tempA);
+            char* tempa = new char[pDataArray->strand.length()+1];
+            *tempa = '\0'; strncat(tempa, pDataArray->strand.c_str(), pDataArray->strand.length());
+            cPara.push_back(tempa);
+        }
+        
                 if (pDataArray->useAbskew) {
                         char* tempskew = new char[9];
                         *tempskew = '\0'; strncat(tempskew, "--abskew", 8);
diff --git a/classifyseqscommand.cpp b/classifyseqscommand.cpp

index 89668d9e7bd122577d9be167f590a96a6aedbf22..6ec2d3886a218405f8132a03fe1c59e0b8c29c20 100644 (file)
--- a/classifyseqscommand.cpp
+++ b/classifyseqscommand.cpp
@@ -610,7 +610,7 @@ int ClassifySeqsCommand::execute(){
         try {
                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
          
-        string outputMethodTag = method + ".";
+        string outputMethodTag = method;
                 if(method == "wang"){   classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand(), flip, writeShortcuts);     }
                 else if(method == "knn"){       classify = new Knn(taxonomyFileName, templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, numWanted, rand());                               }
          else if(method == "zap"){      
diff --git a/clustersplitcommand.cpp b/clustersplitcommand.cpp

index 6d3c86ff1fa2efb62d46618f372706fd117c002a..0e6798f39039ad9245e7e12131ac5722cdc34804 100644 (file)
--- a/clustersplitcommand.cpp
+++ b/clustersplitcommand.cpp
@@ -447,11 +447,9 @@ int ClusterSplitCommand::execute(){
                 
                 m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to split the distance file."); m->mothurOutEndLine();
                 estart = time(NULL);
-                
+              
          if (!runCluster) { 
-#ifdef USE_MPI
-    }
-#endif 
+
                  m->mothurOutEndLine();
                  m->mothurOut("Output File Names: "); m->mothurOutEndLine();
                  for (int i = 0; i < distName.size(); i++) {    m->mothurOut(distName[i].begin()->first); m->mothurOutEndLine(); m->mothurOut(distName[i].begin()->second); m->mothurOutEndLine();      }
@@ -459,7 +457,7 @@ int ClusterSplitCommand::execute(){
                  return 0;
                  
          }
-                
+   
                 //****************** break up files between processes and cluster each file set ******************************//
         #ifdef USE_MPI
                         ////you are process 0 from above////
diff --git a/commandfactory.cpp b/commandfactory.cpp

index a691509ab0ae00e5f19ae52a8bffe6801fc26a31..c8421d29c513f65bfe8684fe10a09e8c439d7915 100644 (file)
--- a/commandfactory.cpp
+++ b/commandfactory.cpp
@@ -892,6 +892,41 @@ void CommandFactory::printCommands(ostream& out) {
         }
  }
  /***********************************************************************/
+void CommandFactory::printCommandsCategories(ostream& out) {
+    try {
+        map<string, string> commands = getListCommands();
+        map<string, string>::iterator it;
+
+        map<string, string> categories;
+        map<string, string>::iterator itCat;
+        //loop through each command outputting info
+        for (it = commands.begin(); it != commands.end(); it++) {
+    
+            Command* thisCommand = getCommand(it->first);
+    
+            //don't add hidden commands
+            if (thisCommand->getCommandCategory() != "Hidden") {
+                itCat = categories.find(thisCommand->getCommandCategory());
+                if (itCat == categories.end()) {
+                    categories[thisCommand->getCommandCategory()] = thisCommand->getCommandName();
+                }else {
+                    categories[thisCommand->getCommandCategory()] += ", " + thisCommand->getCommandName();
+                }
+            }
+        }
+        
+        for (itCat = categories.begin(); itCat != categories.end(); itCat++) {
+            out << itCat->first << " commmands include: " << itCat->second << endl;
+        }
+
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CommandFactory", "printCommandsCategories");
+               exit(1);
+       }
+}      
+
+/***********************************************************************/
  
  
  
diff --git a/commandfactory.hpp b/commandfactory.hpp

index e95db8016a4149c9c2defdbb9e7933ff6ee3905a..36e8f462b146fc7846ee6cf9ead1eb42e9757a36 100644 (file)
--- a/commandfactory.hpp
+++ b/commandfactory.hpp
@@ -26,6 +26,7 @@ public:
         bool isValidCommand(string);\r
         bool isValidCommand(string, string);\r
         void printCommands(ostream&);\r
+    void printCommandsCategories(ostream&);\r
         void setOutputDirectory(string o)               {       outputDir = o;  m->setOutputDir(o);     }\r
         void setInputDirectory(string i)                {       inputDir = i;           }\r
         void setLogfileName(string n, bool a)   {       logFileName = n;  append = a;           }\r
diff --git a/filtersharedcommand.cpp b/filtersharedcommand.cpp

index 413a9a432532b12987a8fa8eb65948692cec9b22..4d1c301edf9566b8101e46320e71dc84f8479b67 100644 (file)
--- a/filtersharedcommand.cpp
+++ b/filtersharedcommand.cpp
@@ -405,22 +405,8 @@ int FilterSharedCommand::processShared(vector<SharedRAbundVector*>& thislookup)
                  for (int j = 0; j < rareCounts.size(); j++) { //add "rare" OTU to the filtered lookup
                      filteredLookup[j]->push_back(rareCounts[j], thislookup[j]->getGroup());
                  }
-                
-                //create new label
-                string oldLastLabel = saveBinLabels[saveBinLabels.size()-1];
-                string tag = "";
-                string otuNumber = "";
-                for (int i = 0;i < oldLastLabel.length(); i++){
-                    //add numbers
-                    if( oldLastLabel[i]>47 && oldLastLabel[i]<58){ otuNumber += oldLastLabel[i];  }
-                    else { tag += oldLastLabel[i]; }
-                }
-                
-                int oldLastBin;
-                m->mothurConvert(otuNumber, oldLastBin);
-                oldLastBin++;
-                string newLabel = tag + toString(oldLastBin);
-                filteredLabels.push_back(newLabel);
+                //create label for rare OTUs
+                filteredLabels.push_back("rareOTUs");
              }
          }
          
diff --git a/parsefastaqcommand.h b/parsefastaqcommand.h

index b4d90e542a98f329dd166a93c54d57852e0f54dd..cb86bd66c0ae8597f605006893fd70371d16b5fc 100644 (file)
--- a/parsefastaqcommand.h
+++ b/parsefastaqcommand.h
@@ -21,7 +21,7 @@ public:
         ~ParseFastaQCommand() {}
         
         vector<string> setParameters();
-       string getCommandName()                 { return "parse.fastq";         }
+       string getCommandName()                 { return "fastq.info";          }
         string getCommandCategory()             { return "Sequence Processing"; }
         
         string getHelpString(); 
diff --git a/preclustercommand.cpp b/preclustercommand.cpp

index 0c7895cfd3c4b5c1988977f102d5523bfe20f9e0..19616d8b0e4ce5d46e6753bbc58b776d1b5dd656 100644 (file)
--- a/preclustercommand.cpp
+++ b/preclustercommand.cpp
@@ -19,6 +19,8 @@ vector<string> PreClusterCommand::setParameters(){
                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
                 CommandParameter pdiffs("diffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pdiffs);
                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
+        CommandParameter ptopdown("topdown", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(ptopdown);
+
                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
                 
@@ -37,11 +39,12 @@ string PreClusterCommand::getHelpString(){
                 string helpString = "";
                 helpString += "The pre.cluster command groups sequences that are within a given number of base mismatches.\n";
                 helpString += "The pre.cluster command outputs a new fasta and name file.\n";
-               helpString += "The pre.cluster command parameters are fasta, name, group, count, processors and diffs. The fasta parameter is required. \n";
+               helpString += "The pre.cluster command parameters are fasta, name, group, count, topdown, processors and diffs. The fasta parameter is required. \n";
                 helpString += "The name parameter allows you to give a list of seqs that are identical. This file is 2 columns, first column is name or representative sequence, second column is a list of its identical sequences separated by commas.\n";
                 helpString += "The group parameter allows you to provide a group file so you can cluster by group. \n";
          helpString += "The count parameter allows you to provide a count file so you can cluster by group. \n";
                 helpString += "The diffs parameter allows you to specify maximum number of mismatched bases allowed between sequences in a grouping. The default is 1.\n";
+        helpString += "The topdown parameter allows you to specify whether to cluster from largest abundance to smallest or smallest to largest.  Default=T, meanging largest to smallest.\n";
                 helpString += "The pre.cluster command should be in the following format: \n";
                 helpString += "pre.cluster(fasta=yourFastaFile, names=yourNamesFile, diffs=yourMaxDiffs) \n";
                 helpString += "Example pre.cluster(fasta=amazon.fasta, diffs=2).\n";
@@ -210,6 +213,9 @@ PreClusterCommand::PreClusterCommand(string option) {
                         m->setProcessors(temp);
                         m->mothurConvert(temp, processors);
                         
+            temp = validParameter.validFile(parameters, "topdown", false);             if(temp == "not found"){  temp = "T"; }
+                       topdown = m->isTrue(temp);
+            
              if (countfile == "") {
                  if (namefile == "") {
                      vector<string> files; files.push_back(fastafile);
@@ -440,7 +446,7 @@ int PreClusterCommand::createProcessesGroups(string newFName, string newNName, s
                         // Allocate memory for thread data.
                         string extension = toString(i) + ".temp";
                         
-                       preClusterData* tempPreCluster = new preClusterData(fastafile, namefile, groupfile, countfile, (newFName+extension), (newNName+extension), newMFile, groups, m, lines[i].start, lines[i].end, diffs, i);
+                       preClusterData* tempPreCluster = new preClusterData(fastafile, namefile, groupfile, countfile, (newFName+extension), (newNName+extension), newMFile, groups, m, lines[i].start, lines[i].end, diffs, topdown, i);
                         pDataArray.push_back(tempPreCluster);
                         processIDS.push_back(i);
                         
@@ -545,7 +551,8 @@ int PreClusterCommand::process(string newMapFile){
                 m->openOutputFile(newMapFile, out);
                 
                 //sort seqs by number of identical seqs
-               sort(alignSeqs.begin(), alignSeqs.end(), comparePriority);
+        if (topdown) { sort(alignSeqs.begin(), alignSeqs.end(), comparePriorityTopDown);  }
+        else {  sort(alignSeqs.begin(), alignSeqs.end(), comparePriorityDownTop);  }
                 
                 int count = 0;
                 int numSeqs = alignSeqs.size();
diff --git a/preclustercommand.h b/preclustercommand.h

index 56bf1f36deaaea02b1357b1d229d87ad96bcafaa..e9568860e30bdc7fbf77b946838c99447e0eec5f 100644 (file)
--- a/preclustercommand.h
+++ b/preclustercommand.h
@@ -29,13 +29,21 @@ struct seqPNode {
         ~seqPNode() {}
  };
  /************************************************************/
-inline bool comparePriority(seqPNode first, seqPNode second) {  
+inline bool comparePriorityTopDown(seqPNode first, seqPNode second) {  
      if (first.numIdentical > second.numIdentical) { return true;  }
      else if (first.numIdentical == second.numIdentical) { 
          if (first.seq.getName() > second.seq.getName()) { return true; }
      }
      return false; 
  }
+/************************************************************/
+inline bool comparePriorityDownTop(seqPNode first, seqPNode second) {  
+    if (first.numIdentical < second.numIdentical) { return true;  }
+    else if (first.numIdentical == second.numIdentical) { 
+        if (first.seq.getName() > second.seq.getName()) { return true; }
+    }
+    return false; 
+}
  //************************************************************/
  
  class PreClusterCommand : public Command {
@@ -71,7 +79,7 @@ private:
      CountTable ct;
      
         int diffs, length, processors;
-       bool abort, bygroup;
+       bool abort, bygroup, topdown;
         string fastafile, namefile, outputDir, groupfile, countfile;
         vector<seqPNode> alignSeqs; //maps the number of identical seqs to a sequence
         map<string, string> names; //represents the names file first column maps to second column
@@ -107,9 +115,10 @@ struct preClusterData {
         int diffs, threadID;
         vector<string> groups;
         vector<string> mapFileNames;
+    bool topdown;
         
         preClusterData(){}
-       preClusterData(string f, string n, string g, string c, string nff,  string nnf, string nmf, vector<string> gr, MothurOut* mout, int st, int en, int d, int tid) {
+       preClusterData(string f, string n, string g, string c, string nff,  string nnf, string nmf, vector<string> gr, MothurOut* mout, int st, int en, int d, bool td, int tid) {
                 fastafile = f;
                 namefile = n;
                 groupfile = g;
@@ -123,6 +132,7 @@ struct preClusterData {
                 threadID = tid;
                 groups = gr;
          countfile = c;
+        topdown = td;
         }
  };
  
@@ -235,9 +245,10 @@ static DWORD WINAPI MyPreclusterThreadFunction(LPVOID lpParam){
                         pDataArray->m->openOutputFile(pDataArray->newMName+pDataArray->groups[k]+".map", out);
                         pDataArray->mapFileNames.push_back(pDataArray->newMName+pDataArray->groups[k]+".map");
                         
-                       //sort seqs by number of identical seqs
-                       sort(alignSeqs.begin(), alignSeqs.end(), comparePriority);
-                       
+            //sort seqs by number of identical seqs
+            if (pDataArray->topdown) { sort(alignSeqs.begin(), alignSeqs.end(), comparePriorityTopDown);  }
+            else {  sort(alignSeqs.begin(), alignSeqs.end(), comparePriorityDownTop);  }
+            
                         int count = 0;
                         
                         //think about running through twice...
diff --git a/refchimeratest.cpp b/refchimeratest.cpp

index c618e788386550f40cf83c6a328342022e47f243..701fb9eb6a711fd82108d6694790b02992bc6816 100644 (file)
--- a/refchimeratest.cpp
+++ b/refchimeratest.cpp
@@ -128,7 +128,7 @@ int RefChimeraTest::analyzeUnalignedQuery(string queryName, string querySeq, ofs
      
      for(int i=0;i<numRefSeqs;i++){
          double length = 0;
-        int diffs = alignQueryToReferences(querySeq, referenceSeqs[i], queryAlign[i], refAlign[i], length);
+        double diffs = alignQueryToReferences(querySeq, referenceSeqs[i], queryAlign[i], refAlign[i], length);
          if(diffs < bestRefDiffs){
              bestRefDiffs = diffs;
              bestRefLength = length;
@@ -324,7 +324,7 @@ double RefChimeraTest::alignQueryToReferences(string query, string reference, st
                         
                 int end = refLength - 1;
          int maxRow = 0;
-        double maxRowValue = -100000000000;
+        double maxRowValue = -2147483647;
          for(int i=0;i<queryLength;i++){
              if(alignMatrix[i][end] > maxRowValue){
                  maxRow = i;
@@ -334,7 +334,7 @@ double RefChimeraTest::alignQueryToReferences(string query, string reference, st
          
          end = queryLength - 1;
          int maxColumn = 0;
-        double maxColumnValue = -100000000000;
+        double maxColumnValue = -2147483647;
  
          for(int j=0;j<refLength;j++){
              if(alignMatrix[end][j] > maxColumnValue){
diff --git a/splitgroupscommand.h b/splitgroupscommand.h

index b251fe55bc4fd010582c82297e8b757a0f17113e..ec22b8fedaec8752dd1b9161c6476b0223fc157e 100644 (file)
--- a/splitgroupscommand.h
+++ b/splitgroupscommand.h
@@ -28,7 +28,7 @@ public:
         ~SplitGroupCommand() {}
         
         vector<string> setParameters();
-       string getCommandName()                 { return "split.group";                         }
+       string getCommandName()                 { return "split.groups";                                }
         string getCommandCategory()             { return "Sequence Processing";         }
         
         string getHelpString(); 
diff --git a/trimflowscommand.cpp b/trimflowscommand.cpp

index 44121b97dfff0bb0c6611878c7e1c04e046709ea..e9bd081605b4211a26084334122ee12da83b949b 100644 (file)
--- a/trimflowscommand.cpp
+++ b/trimflowscommand.cpp
@@ -14,7 +14,7 @@
  //**********************************************************************************************************************
  vector<string> TrimFlowsCommand::setParameters(){      
         try {
-               CommandParameter pflow("flow", "InputTypes", "", "", "none", "none", "none","flow",false,true,true); parameters.push_back(pflow);
+               CommandParameter pflow("flow", "InputTypes", "", "", "none", "none", "none","flow-file",false,true,true); parameters.push_back(pflow);
                 CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(poligos);
                 CommandParameter pmaxhomop("maxhomop", "Number", "", "9", "", "", "","",false,false); parameters.push_back(pmaxhomop);
                 CommandParameter pmaxflows("maxflows", "Number", "", "450", "", "", "","",false,false); parameters.push_back(pmaxflows);
@@ -63,7 +63,7 @@ string TrimFlowsCommand::getOutputPattern(string type) {
          
          if (type == "flow") {  pattern = "[filename],[tag],flow"; } 
          else if (type == "fasta") {  pattern = "[filename],flow.fasta"; } 
-        else if (type == "file") {  pattern = "[filename],[tag],flow.files"; }
+        else if (type == "file") {  pattern = "[filename],flow.files"; }
          else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
          
          return pattern;
@@ -306,7 +306,6 @@ int TrimFlowsCommand::execute(){
                 
                 if(allFiles){
                         set<string> namesAlreadyProcessed;
-            variables["[tag]"] = "";
                         flowFilesFileName = getOutputFileName("file",variables);
                         m->openOutputFile(flowFilesFileName, output);
  
@@ -342,7 +341,6 @@ int TrimFlowsCommand::execute(){
                         output.close();
                 }
                 else{
-            variables["[tag]"] = "";
                         flowFilesFileName = getOutputFileName("file",variables);
                         m->openOutputFile(flowFilesFileName, output);
author	Sarah Westcott <mothur.westcott@gmail.com>
	Wed, 16 Jan 2013 16:30:18 +0000 (11:30 -0500)
committer	Sarah Westcott <mothur.westcott@gmail.com>
	Wed, 16 Jan 2013 16:30:18 +0000 (11:30 -0500)
bayesian.cpp		patch \| blob \| history
chimerauchimecommand.cpp		patch \| blob \| history
chimerauchimecommand.h		patch \| blob \| history
classifyseqscommand.cpp		patch \| blob \| history
clustersplitcommand.cpp		patch \| blob \| history
commandfactory.cpp		patch \| blob \| history
commandfactory.hpp		patch \| blob \| history
filtersharedcommand.cpp		patch \| blob \| history
parsefastaqcommand.h		patch \| blob \| history
preclustercommand.cpp		patch \| blob \| history
preclustercommand.h		patch \| blob \| history
refchimeratest.cpp		patch \| blob \| history
splitgroupscommand.h		patch \| blob \| history
trimflowscommand.cpp		patch \| blob \| history