CommandParameter pfasta("fasta", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "FastaTaxName",false,false); parameters.push_back(pfasta);
CommandParameter pname("name", "InputTypes", "", "", "none", "none", "ColumnName-FastaTaxName",false,false); parameters.push_back(pname);
CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumnFasta", "PhylipColumnFasta", "ColumnName",false,false); parameters.push_back(pcolumn);
- CommandParameter ptaxlevel("taxlevel", "Number", "", "1", "", "", "",false,false); parameters.push_back(ptaxlevel);
+ CommandParameter ptaxlevel("taxlevel", "Number", "", "3", "", "", "",false,false); parameters.push_back(ptaxlevel);
CommandParameter psplitmethod("splitmethod", "Multiple", "classify-fasta-distance", "distance", "", "", "",false,false); parameters.push_back(psplitmethod);
CommandParameter plarge("large", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(plarge);
CommandParameter pshowabund("showabund", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pshowabund);
CommandParameter ptiming("timing", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(ptiming);
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
- CommandParameter pcutoff("cutoff", "Number", "", "10", "", "", "",false,false); parameters.push_back(pcutoff);
+ CommandParameter pcutoff("cutoff", "Number", "", "0.25", "", "", "",false,false); parameters.push_back(pcutoff);
CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
- CommandParameter pmethod("method", "Multiple", "furthest-nearest-average-weighted", "furthest", "", "", "",false,false); parameters.push_back(pmethod);
- CommandParameter phard("hard", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(phard);
+ CommandParameter pmethod("method", "Multiple", "furthest-nearest-average-weighted", "average", "", "", "",false,false); parameters.push_back(pmethod);
+ CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
helpString += "The phylip and column parameter allow you to enter your distance file. \n";
helpString += "The fasta parameter allows you to enter your aligned fasta file. \n";
helpString += "The name parameter allows you to enter your name file and is required if your distance file is in column format. \n";
- helpString += "The cutoff parameter allow you to set the distance you want to cluster to, default is 10.0. \n";
+ helpString += "The cutoff parameter allow you to set the distance you want to cluster to, default is 0.25. \n";
helpString += "The precision parameter allows you specify the precision of the precision of the distances outputted, default=100, meaning 2 decimal places. \n";
- helpString += "The method allows you to specify what clustering algorythm you want to use, default=furthest, option furthest, nearest, or average. \n";
+ helpString += "The method allows you to specify what clustering algorythm you want to use, default=average, option furthest, nearest, or average. \n";
helpString += "The splitmethod parameter allows you to specify how you want to split your distance file before you cluster, default=distance, options distance, classify or fasta. \n";
helpString += "The taxonomy parameter allows you to enter the taxonomy file for your sequences, this is only valid if you are using splitmethod=classify. Be sure your taxonomy file does not include the probability scores. \n";
- helpString += "The taxlevel parameter allows you to specify the taxonomy level you want to use to split the distance file, default=1, meaning use the first taxon in each list. \n";
+ helpString += "The taxlevel parameter allows you to specify the taxonomy level you want to use to split the distance file, default=3, meaning use the first taxon in each list. \n";
helpString += "The large parameter allows you to indicate that your distance matrix is too large to fit in RAM. The default value is false.\n";
#ifdef USE_MPI
helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
//allow user to run help
if(option == "help") { help(); abort = true; calledHelp = true; }
+ else if(option == "citation") { citation(); abort = true; calledHelp = true;}
else {
vector<string> myArray = setParameters();
phylipfile = validParameter.validFile(parameters, "phylip", true);
if (phylipfile == "not open") { abort = true; }
else if (phylipfile == "not found") { phylipfile = ""; }
- else { distfile = phylipfile; format = "phylip"; }
+ else { distfile = phylipfile; format = "phylip"; m->setPhylipFile(phylipfile); }
columnfile = validParameter.validFile(parameters, "column", true);
if (columnfile == "not open") { abort = true; }
else if (columnfile == "not found") { columnfile = ""; }
- else { distfile = columnfile; format = "column"; }
+ else { distfile = columnfile; format = "column"; m->setColumnFile(columnfile); }
namefile = validParameter.validFile(parameters, "name", true);
if (namefile == "not open") { abort = true; }
- else if (namefile == "not found") { namefile = ""; }
+ else if (namefile == "not found") { namefile = ""; }
+ else { m->setNameFile(namefile); }
fastafile = validParameter.validFile(parameters, "fasta", true);
if (fastafile == "not open") { abort = true; }
else if (fastafile == "not found") { fastafile = ""; }
- else { distfile = fastafile; splitmethod = "fasta"; }
+ else { distfile = fastafile; splitmethod = "fasta"; m->setFastaFile(fastafile); }
taxFile = validParameter.validFile(parameters, "taxonomy", true);
if (taxFile == "not open") { abort = true; }
else if (taxFile == "not found") { taxFile = ""; }
+ else { m->setTaxonomyFile(taxFile); }
if ((phylipfile == "") && (columnfile == "") && (fastafile == "")) {
//is there are current file available for either of these?
length = temp.length();
convert(temp, precision);
- temp = validParameter.validFile(parameters, "hard", false); if (temp == "not found") { temp = "F"; }
+ temp = validParameter.validFile(parameters, "hard", false); if (temp == "not found") { temp = "T"; }
hard = m->isTrue(temp);
temp = validParameter.validFile(parameters, "large", false); if (temp == "not found") { temp = "F"; }
else { splitmethod = temp; }
}
- temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "10"; }
+ temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "0.25"; }
convert(temp, cutoff);
cutoff += (5 / (precision * 10.0));
- temp = validParameter.validFile(parameters, "taxlevel", false); if (temp == "not found") { temp = "1"; }
+ temp = validParameter.validFile(parameters, "taxlevel", false); if (temp == "not found") { temp = "3"; }
convert(temp, taxLevelCutoff);
- method = validParameter.validFile(parameters, "method", false); if (method == "not found") { method = "furthest"; }
+ method = validParameter.validFile(parameters, "method", false); if (method == "not found") { method = "average"; }
if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
else { m->mothurOut("Not a valid clustering method. Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
MPI_Barrier(MPI_COMM_WORLD);
#else
-
+
+ //sanity check
+ if (processors > distName.size()) { processors = distName.size(); }
+
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
if(processors == 1){
listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
}else{
+
+ cout << processors << '\t' << distName.size() << endl;
vector < vector < map<string, string> > > dividedNames; //distNames[1] = vector of filenames for process 1...
dividedNames.resize(processors);
//for each file group figure out which process will complete it
//want to divide the load intelligently so the big files are spread between processes
for (int i = 0; i < distName.size(); i++) {
+ cout << i << endl;
int processToAssign = (i+1) % processors;
if (processToAssign == 0) { processToAssign = processors; }
//not lets reverse the order of ever other process, so we balance big files running with little ones
for (int i = 0; i < processors; i++) {
+ cout << i << endl;
int remainder = ((i+1) % processors);
if (remainder) { reverse(dividedNames[i].begin(), dividedNames[i].end()); }
}
listFileNames.push_back(tempName);
}
in.close();
- remove((toString(processIDS[i]) + ".temp").c_str());
+ m->mothurRemove((toString(processIDS[i]) + ".temp"));
//get labels
filename = toString(processIDS[i]) + ".temp.labels";
if (labels.count(tempName) == 0) { labels.insert(tempName); }
}
in2.close();
- remove((toString(processIDS[i]) + ".temp.labels").c_str());
+ m->mothurRemove((toString(processIDS[i]) + ".temp.labels"));
}
}
#else
listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files
#endif
#endif
- if (m->control_pressed) { for (int i = 0; i < listFileNames.size(); i++) { remove(listFileNames[i].c_str()); } return 0; }
+ if (m->control_pressed) { for (int i = 0; i < listFileNames.size(); i++) { m->mothurRemove(listFileNames[i]); } return 0; }
if (saveCutoff != cutoff) { m->mothurOut("Cutoff was " + toString(saveCutoff) + " changed cutoff to " + toString(cutoff)); m->mothurOutEndLine(); }
ListVector* listSingle;
map<float, int> labelBins = completeListFile(listFileNames, singletonName, labels, listSingle); //returns map of label to numBins
- if (m->control_pressed) { if (listSingle != NULL) { delete listSingle; } for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
+ if (m->control_pressed) { if (listSingle != NULL) { delete listSingle; } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
mergeLists(listFileNames, labelBins, listSingle);
- if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
+ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to merge."); m->mothurOutEndLine();
listSingle->push_back(secondCol);
}
in.close();
- remove(singleton.c_str());
+ m->mothurRemove(singleton);
numSingleBins = listSingle->getNumBins();
}else{ listSingle = NULL; numSingleBins = 0; }
for (int k = 0; k < listNames.size(); k++) {
if (m->control_pressed) {
- if (listSingle != NULL) { delete listSingle; listSingle = NULL; remove(singleton.c_str()); }
- for (int i = 0; i < listNames.size(); i++) { remove(listNames[i].c_str()); }
+ if (listSingle != NULL) { delete listSingle; listSingle = NULL; m->mothurRemove(singleton); }
+ for (int i = 0; i < listNames.size(); i++) { m->mothurRemove(listNames[i]); }
return labelBin;
}
delete input;
outFilled.close();
- remove(listNames[k].c_str());
+ m->mothurRemove(listNames[k]);
rename(filledInList.c_str(), listNames[k].c_str());
}
//get the list info from each file
for (int k = 0; k < listNames.size(); k++) {
- if (m->control_pressed) { if (listSingle != NULL) { delete listSingle; } for (int i = 0; i < listNames.size(); i++) { remove(listNames[i].c_str()); } delete rabund; return 0; }
+ if (m->control_pressed) { if (listSingle != NULL) { delete listSingle; } for (int i = 0; i < listNames.size(); i++) { m->mothurRemove(listNames[i]); } delete rabund; return 0; }
InputData* input = new InputData(listNames[k], "list");
ListVector* list = input->getListVector(thisLabel);
if (listSingle != NULL) { delete listSingle; }
- for (int i = 0; i < listNames.size(); i++) { remove(listNames[i].c_str()); }
+ for (int i = 0; i < listNames.size(); i++) { m->mothurRemove(listNames[i]); }
return 0;
}
if (m->control_pressed) { //clean up
delete matrix; delete list; delete cluster; delete rabund;
listFile.close();
- for (int i = 0; i < listFileNames.size(); i++) { remove(listFileNames[i].c_str()); }
+ for (int i = 0; i < listFileNames.size(); i++) { m->mothurRemove(listFileNames[i]); }
listFileNames.clear(); return listFileNames;
}
listFile.close();
if (m->control_pressed) { //clean up
- for (int i = 0; i < listFileNames.size(); i++) { remove(listFileNames[i].c_str()); }
+ for (int i = 0; i < listFileNames.size(); i++) { m->mothurRemove(listFileNames[i]); }
listFileNames.clear(); return listFileNames;
}
- remove(thisDistFile.c_str());
- remove(thisNamefile.c_str());
+ m->mothurRemove(thisDistFile);
+ m->mothurRemove(thisNamefile);
if (saveCutoff != cutoff) {
if (hard) { saveCutoff = m->ceilDist(saveCutoff, precision); }
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir = m->hasPath(fastafile); }
string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "dist";
- remove(outputFileName.c_str());
+ m->mothurRemove(outputFileName);
for (int i = 0; i < distNames.size(); i++) {