X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=clustersplitcommand.cpp;h=34caf654124886f9cd23638a149b2b3487ca53e2;hb=8dd3c225255d7084e3aff8740aa4f1f1cabb367a;hp=dc913d8c8895522a39b5ee0bd22be15a82eb7a74;hpb=55386dddad84cc1140d736cabaf4dd0ae16f2e01;p=mothur.git diff --git a/clustersplitcommand.cpp b/clustersplitcommand.cpp index dc913d8..34caf65 100644 --- a/clustersplitcommand.cpp +++ b/clustersplitcommand.cpp @@ -24,13 +24,13 @@ vector ClusterSplitCommand::setParameters(){ CommandParameter pfasta("fasta", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "FastaTaxName",false,false); parameters.push_back(pfasta); CommandParameter pname("name", "InputTypes", "", "", "none", "none", "ColumnName-FastaTaxName",false,false); parameters.push_back(pname); CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "ColumnName",false,false); parameters.push_back(pcolumn); - CommandParameter ptaxlevel("taxlevel", "Number", "", "1", "", "", "",false,false); parameters.push_back(ptaxlevel); + CommandParameter ptaxlevel("taxlevel", "Number", "", "3", "", "", "",false,false); parameters.push_back(ptaxlevel); CommandParameter psplitmethod("splitmethod", "Multiple", "classify-fasta-distance", "distance", "", "", "",false,false); parameters.push_back(psplitmethod); CommandParameter plarge("large", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(plarge); CommandParameter pshowabund("showabund", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pshowabund); CommandParameter ptiming("timing", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(ptiming); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors); - CommandParameter pcutoff("cutoff", "Number", "", "10", "", "", "",false,false); parameters.push_back(pcutoff); + CommandParameter pcutoff("cutoff", "Number", "", "0.25", "", "", "",false,false); parameters.push_back(pcutoff); CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision); CommandParameter pmethod("method", "Multiple", "furthest-nearest-average-weighted", "average", "", "", "",false,false); parameters.push_back(pmethod); CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard); @@ -60,12 +60,12 @@ string ClusterSplitCommand::getHelpString(){ helpString += "The phylip and column parameter allow you to enter your distance file. \n"; helpString += "The fasta parameter allows you to enter your aligned fasta file. \n"; helpString += "The name parameter allows you to enter your name file and is required if your distance file is in column format. \n"; - helpString += "The cutoff parameter allow you to set the distance you want to cluster to, default is 10.0. \n"; + helpString += "The cutoff parameter allow you to set the distance you want to cluster to, default is 0.25. \n"; helpString += "The precision parameter allows you specify the precision of the precision of the distances outputted, default=100, meaning 2 decimal places. \n"; helpString += "The method allows you to specify what clustering algorythm you want to use, default=average, option furthest, nearest, or average. \n"; helpString += "The splitmethod parameter allows you to specify how you want to split your distance file before you cluster, default=distance, options distance, classify or fasta. \n"; helpString += "The taxonomy parameter allows you to enter the taxonomy file for your sequences, this is only valid if you are using splitmethod=classify. Be sure your taxonomy file does not include the probability scores. \n"; - helpString += "The taxlevel parameter allows you to specify the taxonomy level you want to use to split the distance file, default=1, meaning use the first taxon in each list. \n"; + helpString += "The taxlevel parameter allows you to specify the taxonomy level you want to use to split the distance file, default=3, meaning use the first taxon in each list. \n"; helpString += "The large parameter allows you to indicate that your distance matrix is too large to fit in RAM. The default value is false.\n"; #ifdef USE_MPI helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n"; @@ -201,7 +201,7 @@ ClusterSplitCommand::ClusterSplitCommand(string option) { else { distfile = fastafile; splitmethod = "fasta"; m->setFastaFile(fastafile); } taxFile = validParameter.validFile(parameters, "taxonomy", true); - if (taxFile == "not open") { abort = true; } + if (taxFile == "not open") { taxFile = ""; abort = true; } else if (taxFile == "not found") { taxFile = ""; } else { m->setTaxonomyFile(taxFile); } @@ -264,7 +264,7 @@ ClusterSplitCommand::ClusterSplitCommand(string option) { if (temp == "not found") { temp = "100"; } //saves precision legnth for formatting below length = temp.length(); - convert(temp, precision); + m->mothurConvert(temp, precision); temp = validParameter.validFile(parameters, "hard", false); if (temp == "not found") { temp = "T"; } hard = m->isTrue(temp); @@ -274,7 +274,7 @@ ClusterSplitCommand::ClusterSplitCommand(string option) { temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); } m->setProcessors(temp); - convert(temp, processors); + m->mothurConvert(temp, processors); temp = validParameter.validFile(parameters, "splitmethod", false); if (splitmethod != "fasta") { @@ -282,12 +282,12 @@ ClusterSplitCommand::ClusterSplitCommand(string option) { else { splitmethod = temp; } } - temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "10"; } - convert(temp, cutoff); + temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "0.25"; } + m->mothurConvert(temp, cutoff); cutoff += (5 / (precision * 10.0)); - temp = validParameter.validFile(parameters, "taxlevel", false); if (temp == "not found") { temp = "1"; } - convert(temp, taxLevelCutoff); + temp = validParameter.validFile(parameters, "taxlevel", false); if (temp == "not found") { temp = "3"; } + m->mothurConvert(temp, taxLevelCutoff); method = validParameter.validFile(parameters, "method", false); if (method == "not found") { method = "average"; } @@ -564,14 +564,14 @@ int ClusterSplitCommand::execute(){ listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files }else{ - cout << processors << '\t' << distName.size() << endl; + //cout << processors << '\t' << distName.size() << endl; vector < vector < map > > dividedNames; //distNames[1] = vector of filenames for process 1... dividedNames.resize(processors); //for each file group figure out which process will complete it //want to divide the load intelligently so the big files are spread between processes for (int i = 0; i < distName.size(); i++) { - cout << i << endl; + //cout << i << endl; int processToAssign = (i+1) % processors; if (processToAssign == 0) { processToAssign = processors; } @@ -580,7 +580,7 @@ int ClusterSplitCommand::execute(){ //not lets reverse the order of ever other process, so we balance big files running with little ones for (int i = 0; i < processors; i++) { - cout << i << endl; + //cout << i << endl; int remainder = ((i+1) % processors); if (remainder) { reverse(dividedNames[i].begin(), dividedNames[i].end()); } }