]> git.donarmstrong.com Git - mothur.git/blobdiff - clustersplitcommand.cpp
added otu.association command. added calcSpearman, calcKendall and calcPearson functi...
[mothur.git] / clustersplitcommand.cpp
index dc913d8c8895522a39b5ee0bd22be15a82eb7a74..34caf654124886f9cd23638a149b2b3487ca53e2 100644 (file)
@@ -24,13 +24,13 @@ vector<string> ClusterSplitCommand::setParameters(){
                CommandParameter pfasta("fasta", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "FastaTaxName",false,false); parameters.push_back(pfasta);
                CommandParameter pname("name", "InputTypes", "", "", "none", "none", "ColumnName-FastaTaxName",false,false); parameters.push_back(pname);
                CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "ColumnName",false,false); parameters.push_back(pcolumn);
-               CommandParameter ptaxlevel("taxlevel", "Number", "", "1", "", "", "",false,false); parameters.push_back(ptaxlevel);
+               CommandParameter ptaxlevel("taxlevel", "Number", "", "3", "", "", "",false,false); parameters.push_back(ptaxlevel);
                CommandParameter psplitmethod("splitmethod", "Multiple", "classify-fasta-distance", "distance", "", "", "",false,false); parameters.push_back(psplitmethod);
                CommandParameter plarge("large", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(plarge);
                CommandParameter pshowabund("showabund", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pshowabund);
                CommandParameter ptiming("timing", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(ptiming);
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
-               CommandParameter pcutoff("cutoff", "Number", "", "10", "", "", "",false,false); parameters.push_back(pcutoff);
+               CommandParameter pcutoff("cutoff", "Number", "", "0.25", "", "", "",false,false); parameters.push_back(pcutoff);
                CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
                CommandParameter pmethod("method", "Multiple", "furthest-nearest-average-weighted", "average", "", "", "",false,false); parameters.push_back(pmethod);
                CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
@@ -60,12 +60,12 @@ string ClusterSplitCommand::getHelpString(){
                helpString += "The phylip and column parameter allow you to enter your distance file. \n";
                helpString += "The fasta parameter allows you to enter your aligned fasta file. \n";
                helpString += "The name parameter allows you to enter your name file and is required if your distance file is in column format. \n";
-               helpString += "The cutoff parameter allow you to set the distance you want to cluster to, default is 10.0. \n";
+               helpString += "The cutoff parameter allow you to set the distance you want to cluster to, default is 0.25. \n";
                helpString += "The precision parameter allows you specify the precision of the precision of the distances outputted, default=100, meaning 2 decimal places. \n";
                helpString += "The method allows you to specify what clustering algorythm you want to use, default=average, option furthest, nearest, or average. \n";
                helpString += "The splitmethod parameter allows you to specify how you want to split your distance file before you cluster, default=distance, options distance, classify or fasta. \n";
                helpString += "The taxonomy parameter allows you to enter the taxonomy file for your sequences, this is only valid if you are using splitmethod=classify. Be sure your taxonomy file does not include the probability scores. \n";
-               helpString += "The taxlevel parameter allows you to specify the taxonomy level you want to use to split the distance file, default=1, meaning use the first taxon in each list. \n";
+               helpString += "The taxlevel parameter allows you to specify the taxonomy level you want to use to split the distance file, default=3, meaning use the first taxon in each list. \n";
                helpString += "The large parameter allows you to indicate that your distance matrix is too large to fit in RAM.  The default value is false.\n";
 #ifdef USE_MPI
                helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
@@ -201,7 +201,7 @@ ClusterSplitCommand::ClusterSplitCommand(string option)  {
                        else { distfile = fastafile;  splitmethod = "fasta";  m->setFastaFile(fastafile); }
                        
                        taxFile = validParameter.validFile(parameters, "taxonomy", true);
-                       if (taxFile == "not open") { abort = true; }    
+                       if (taxFile == "not open") { taxFile = ""; abort = true; }      
                        else if (taxFile == "not found") { taxFile = ""; }
                        else {  m->setTaxonomyFile(taxFile); }
                        
@@ -264,7 +264,7 @@ ClusterSplitCommand::ClusterSplitCommand(string option)  {
                        if (temp == "not found") { temp = "100"; }
                        //saves precision legnth for formatting below
                        length = temp.length();
-                       convert(temp, precision); 
+                       m->mothurConvert(temp, precision); 
                        
                        temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "T"; }
                        hard = m->isTrue(temp);
@@ -274,7 +274,7 @@ ClusterSplitCommand::ClusterSplitCommand(string option)  {
                        
                        temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
                        m->setProcessors(temp);
-                       convert(temp, processors);
+                       m->mothurConvert(temp, processors);
                        
                        temp = validParameter.validFile(parameters, "splitmethod", false);      
                        if (splitmethod != "fasta") {
@@ -282,12 +282,12 @@ ClusterSplitCommand::ClusterSplitCommand(string option)  {
                                else {  splitmethod = temp; }
                        }
                        
-                       temp = validParameter.validFile(parameters, "cutoff", false);           if (temp == "not found")  { temp = "10"; }
-                       convert(temp, cutoff); 
+                       temp = validParameter.validFile(parameters, "cutoff", false);           if (temp == "not found")  { temp = "0.25"; }
+                       m->mothurConvert(temp, cutoff); 
                        cutoff += (5 / (precision * 10.0));  
                        
-                       temp = validParameter.validFile(parameters, "taxlevel", false);         if (temp == "not found")  { temp = "1"; }
-                       convert(temp, taxLevelCutoff); 
+                       temp = validParameter.validFile(parameters, "taxlevel", false);         if (temp == "not found")  { temp = "3"; }
+                       m->mothurConvert(temp, taxLevelCutoff); 
                        
                        method = validParameter.validFile(parameters, "method", false);         if (method == "not found") { method = "average"; }
                        
@@ -564,14 +564,14 @@ int ClusterSplitCommand::execute(){
                                        listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
                                }else{
                                        
-                                       cout << processors << '\t' << distName.size() << endl;
+                                       //cout << processors << '\t' << distName.size() << endl;
                                        vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...
                                        dividedNames.resize(processors);
                                        
                                        //for each file group figure out which process will complete it
                                        //want to divide the load intelligently so the big files are spread between processes
                                        for (int i = 0; i < distName.size(); i++) { 
-                                               cout << i << endl;
+                                               //cout << i << endl;
                                                int processToAssign = (i+1) % processors; 
                                                if (processToAssign == 0) { processToAssign = processors; }
                                                
@@ -580,7 +580,7 @@ int ClusterSplitCommand::execute(){
                                        
                                        //not lets reverse the order of ever other process, so we balance big files running with little ones
                                        for (int i = 0; i < processors; i++) {
-                                               cout << i << endl;
+                                               //cout << i << endl;
                                                int remainder = ((i+1) % processors);
                                                if (remainder) {  reverse(dividedNames[i].begin(), dividedNames[i].end());  }
                                        }