#include "readmatrix.hpp"
#include "inputdata.h"
+
+//**********************************************************************************************************************
+vector<string> ClusterSplitCommand::getValidParameters(){
+ try {
+ string AlignArray[] = {"fasta","phylip","column","name","cutoff","precision","method","splitmethod","taxonomy","taxlevel","large","showabund","timing","hard","processors","outputdir","inputdir"};
+ vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterSplitCommand", "getValidParameters");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+ClusterSplitCommand::ClusterSplitCommand(){
+ try {
+ abort = true; calledHelp = true;
+ vector<string> tempOutNames;
+ outputTypes["list"] = tempOutNames;
+ outputTypes["rabund"] = tempOutNames;
+ outputTypes["sabund"] = tempOutNames;
+ outputTypes["column"] = tempOutNames;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterSplitCommand", "ClusterSplitCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+vector<string> ClusterSplitCommand::getRequiredParameters(){
+ try {
+ string Array[] = {"fasta","phylip","column","or"};
+ vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterSplitCommand", "getRequiredParameters");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+vector<string> ClusterSplitCommand::getRequiredFiles(){
+ try {
+ vector<string> myArray;
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterSplitCommand", "getRequiredFiles");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
//This function checks to make sure the cluster command has no errors and then clusters based on the method chosen.
ClusterSplitCommand::ClusterSplitCommand(string option) {
try{
globaldata = GlobalData::getInstance();
- abort = false;
+ abort = false; calledHelp = false;
format = "";
//allow user to run help
- if(option == "help") { help(); abort = true; }
+ if(option == "help") { help(); abort = true; calledHelp = true; }
else {
//valid paramters for this command
}
}
+ //initialize outputTypes
+ vector<string> tempOutNames;
+ outputTypes["list"] = tempOutNames;
+ outputTypes["rabund"] = tempOutNames;
+ outputTypes["sabund"] = tempOutNames;
+ outputTypes["column"] = tempOutNames;
+
globaldata->newRead();
//if the user changes the output directory command factory will send this info to us in the output parameter
int ClusterSplitCommand::execute(){
try {
- if (abort == true) { return 0; }
+ if (abort == true) { if (calledHelp) { return 0; } return 2; }
time_t estart;
vector<string> listFileNames;
SplitMatrix* split;
if (splitmethod == "distance") { split = new SplitMatrix(distfile, namefile, taxFile, cutoff, splitmethod, large); }
else if (splitmethod == "classify") { split = new SplitMatrix(distfile, namefile, taxFile, taxLevelCutoff, splitmethod, large); }
- else if (splitmethod == "fasta") { split = new SplitMatrix(fastafile, namefile, taxFile, taxLevelCutoff, splitmethod, processors, outputDir); }
+ else if (splitmethod == "fasta") { split = new SplitMatrix(fastafile, namefile, taxFile, taxLevelCutoff, cutoff, splitmethod, processors, outputDir); }
else { m->mothurOut("Not a valid splitting method. Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); return 0; }
split->split();
vector< map<string, string> > distName = split->getDistanceFiles(); //returns map of distance files -> namefile sorted by distance file size
delete split;
+ //output a merged distance file
+ if (splitmethod == "fasta") { createMergedDistanceFile(distName); }
+
+
if (m->control_pressed) { return 0; }
m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to split the distance file."); m->mothurOutEndLine();
//for each file group figure out which process will complete it
//want to divide the load intelligently so the big files are spread between processes
- int count = 1;
for (int i = 0; i < distName.size(); i++) {
int processToAssign = (i+1) % processors;
if (processToAssign == 0) { processToAssign = processors; }
//for each file group figure out which process will complete it
//want to divide the load intelligently so the big files are spread between processes
- int count = 1;
for (int i = 0; i < distName.size(); i++) {
int processToAssign = (i+1) % processors;
if (processToAssign == 0) { processToAssign = processors; }
m->openOutputFile(fileroot+ tag + ".rabund", outRabund);
m->openOutputFile(fileroot+ tag + ".list", outList);
- outputNames.push_back(fileroot+ tag + ".sabund");
- outputNames.push_back(fileroot+ tag + ".rabund");
- outputNames.push_back(fileroot+ tag + ".list");
+ outputNames.push_back(fileroot+ tag + ".sabund"); outputTypes["list"].push_back(fileroot+ tag + ".list");
+ outputNames.push_back(fileroot+ tag + ".rabund"); outputTypes["rabund"].push_back(fileroot+ tag + ".rabund");
+ outputNames.push_back(fileroot+ tag + ".list"); outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
map<float, int>::iterator itLabel;
outLabels.close();
exit(0);
- }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
+ }else {
+ m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
+ for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+ exit(0);
+ }
}
//force parent to wait until all the processes are done
m->openOutputFile(fileroot+ tag + ".list", listFile);
listFileNames.push_back(fileroot+ tag + ".list");
-
- time_t estart = time(NULL);
-
+
float previousDist = 0.00000;
float rndPreviousDist = 0.00000;
}
+//**********************************************************************************************************************
+int ClusterSplitCommand::createMergedDistanceFile(vector< map<string, string> > distNames) {
+ try{
+
+#ifdef USE_MPI
+ int pid;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+
+ if (pid != 0) {
+#endif
+
+ string thisOutputDir = outputDir;
+ if (outputDir == "") { thisOutputDir = m->hasPath(fastafile); }
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "dist";
+ remove(outputFileName.c_str());
+
+
+ for (int i = 0; i < distNames.size(); i++) {
+ if (m->control_pressed) { return 0; }
+
+ string thisDistFile = distNames[i].begin()->first;
+
+ m->appendFiles(thisDistFile, outputFileName);
+ }
+
+ outputTypes["column"].push_back(outputFileName); outputNames.push_back(outputFileName);
+
+#ifdef USE_MPI
+ }
+#endif
+
+ return 0;
+
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClusterSplitCommand", "createMergedDistanceFile");
+ exit(1);
+ }
+}
//**********************************************************************************************************************