#include "readmatrix.hpp"
#include "inputdata.h"
+
+//**********************************************************************************************************************
+vector<string> ClusterSplitCommand::getValidParameters(){
+ try {
+ string AlignArray[] = {"fasta","phylip","column","name","cutoff","precision","method","splitmethod","taxonomy","taxlevel","large","showabund","timing","hard","processors","outputdir","inputdir"};
+ vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterSplitCommand", "getValidParameters");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+ClusterSplitCommand::ClusterSplitCommand(){
+ try {
+ abort = true; calledHelp = true;
+ vector<string> tempOutNames;
+ outputTypes["list"] = tempOutNames;
+ outputTypes["rabund"] = tempOutNames;
+ outputTypes["sabund"] = tempOutNames;
+ outputTypes["column"] = tempOutNames;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterSplitCommand", "ClusterSplitCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+vector<string> ClusterSplitCommand::getRequiredParameters(){
+ try {
+ string Array[] = {"fasta","phylip","column","or"};
+ vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterSplitCommand", "getRequiredParameters");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+vector<string> ClusterSplitCommand::getRequiredFiles(){
+ try {
+ vector<string> myArray;
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterSplitCommand", "getRequiredFiles");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
//This function checks to make sure the cluster command has no errors and then clusters based on the method chosen.
ClusterSplitCommand::ClusterSplitCommand(string option) {
try{
globaldata = GlobalData::getInstance();
- abort = false;
+ abort = false; calledHelp = false;
format = "";
//allow user to run help
- if(option == "help") { help(); abort = true; }
+ if(option == "help") { help(); abort = true; calledHelp = true; }
else {
//valid paramters for this command
}
}
+ //initialize outputTypes
+ vector<string> tempOutNames;
+ outputTypes["list"] = tempOutNames;
+ outputTypes["rabund"] = tempOutNames;
+ outputTypes["sabund"] = tempOutNames;
+ outputTypes["column"] = tempOutNames;
+
globaldata->newRead();
//if the user changes the output directory command factory will send this info to us in the output parameter
m->mothurOut("The method allows you to specify what clustering algorythm you want to use, default=furthest, option furthest, nearest, or average. \n");
m->mothurOut("The splitmethod parameter allows you to specify how you want to split your distance file before you cluster, default=distance, options distance, classify or fasta. \n");
m->mothurOut("The taxonomy parameter allows you to enter the taxonomy file for your sequences, this is only valid if you are using splitmethod=classify. Be sure your taxonomy file does not include the probability scores. \n");
- m->mothurOut("The taxlevel parameter allows you to specify the taxonomy level you want to use to split the distance file, default=1. \n");
+ m->mothurOut("The taxlevel parameter allows you to specify the taxonomy level you want to use to split the distance file, default=1, meaning use the first taxon in each list. \n");
m->mothurOut("The large parameter allows you to indicate that your distance matrix is too large to fit in RAM. The default value is false.\n");
#ifdef USE_MPI
m->mothurOut("When using MPI, the processors parameter is set to the number of MPI processes running. \n");
int ClusterSplitCommand::execute(){
try {
- if (abort == true) { return 0; }
+ if (abort == true) { if (calledHelp) { return 0; } return 2; }
time_t estart;
vector<string> listFileNames;
SplitMatrix* split;
if (splitmethod == "distance") { split = new SplitMatrix(distfile, namefile, taxFile, cutoff, splitmethod, large); }
else if (splitmethod == "classify") { split = new SplitMatrix(distfile, namefile, taxFile, taxLevelCutoff, splitmethod, large); }
- else if (splitmethod == "fasta") { split = new SplitMatrix(fastafile, namefile, taxFile, taxLevelCutoff, splitmethod, processors, outputDir); }
+ else if (splitmethod == "fasta") { split = new SplitMatrix(fastafile, namefile, taxFile, taxLevelCutoff, cutoff, splitmethod, processors, outputDir); }
else { m->mothurOut("Not a valid splitting method. Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); return 0; }
split->split();
vector< map<string, string> > distName = split->getDistanceFiles(); //returns map of distance files -> namefile sorted by distance file size
delete split;
+ //output a merged distance file
+ if (splitmethod == "fasta") { createMergedDistanceFile(distName); }
+
+
if (m->control_pressed) { return 0; }
m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to split the distance file."); m->mothurOutEndLine();
//for each file group figure out which process will complete it
//want to divide the load intelligently so the big files are spread between processes
- int count = 1;
for (int i = 0; i < distName.size(); i++) {
int processToAssign = (i+1) % processors;
if (processToAssign == 0) { processToAssign = processors; }
//for each file group figure out which process will complete it
//want to divide the load intelligently so the big files are spread between processes
- int count = 1;
for (int i = 0; i < distName.size(); i++) {
int processToAssign = (i+1) % processors;
if (processToAssign == 0) { processToAssign = processors; }
m->openOutputFile(fileroot+ tag + ".rabund", outRabund);
m->openOutputFile(fileroot+ tag + ".list", outList);
- outputNames.push_back(fileroot+ tag + ".sabund");
- outputNames.push_back(fileroot+ tag + ".rabund");
- outputNames.push_back(fileroot+ tag + ".list");
+ outputNames.push_back(fileroot+ tag + ".sabund"); outputTypes["list"].push_back(fileroot+ tag + ".list");
+ outputNames.push_back(fileroot+ tag + ".rabund"); outputTypes["rabund"].push_back(fileroot+ tag + ".rabund");
+ outputNames.push_back(fileroot+ tag + ".list"); outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
map<float, int>::iterator itLabel;
outLabels.close();
exit(0);
- }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
+ }else {
+ m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
+ for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+ exit(0);
+ }
}
//force parent to wait until all the processes are done
m->openOutputFile(fileroot+ tag + ".list", listFile);
listFileNames.push_back(fileroot+ tag + ".list");
-
- time_t estart = time(NULL);
-
+
float previousDist = 0.00000;
float rndPreviousDist = 0.00000;
}
+//**********************************************************************************************************************
+int ClusterSplitCommand::createMergedDistanceFile(vector< map<string, string> > distNames) {
+ try{
+
+#ifdef USE_MPI
+ int pid;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+
+ if (pid != 0) {
+#endif
+
+ string thisOutputDir = outputDir;
+ if (outputDir == "") { thisOutputDir = m->hasPath(fastafile); }
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "dist";
+ remove(outputFileName.c_str());
+
+
+ for (int i = 0; i < distNames.size(); i++) {
+ if (m->control_pressed) { return 0; }
+
+ string thisDistFile = distNames[i].begin()->first;
+
+ m->appendFiles(thisDistFile, outputFileName);
+ }
+
+ outputTypes["column"].push_back(outputFileName); outputNames.push_back(outputFileName);
+
+#ifdef USE_MPI
+ }
+#endif
+
+ return 0;
+
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterSplitCommand", "createMergedDistanceFile");
+ exit(1);
+ }
+}
//**********************************************************************************************************************