]> git.donarmstrong.com Git - mothur.git/blobdiff - clustersplitcommand.cpp
modified reportfile class
[mothur.git] / clustersplitcommand.cpp
index 10579a3b209c3f5b15b1590d44d2ab99f90326b1..cb3fc40aaa6e42a0e802d0f47ec3f8897cc4fa61 100644 (file)
 #include "readmatrix.hpp"
 #include "inputdata.h"
 
+
+//**********************************************************************************************************************
+vector<string> ClusterSplitCommand::getValidParameters(){      
+       try {
+               string AlignArray[] =  {"fasta","phylip","column","name","cutoff","precision","method","splitmethod","taxonomy","taxlevel","large","showabund","timing","hard","processors","outputdir","inputdir"};
+               vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClusterSplitCommand", "getValidParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+ClusterSplitCommand::ClusterSplitCommand(){    
+       try {
+               abort = true; calledHelp = true; 
+               vector<string> tempOutNames;
+               outputTypes["list"] = tempOutNames;
+               outputTypes["rabund"] = tempOutNames;
+               outputTypes["sabund"] = tempOutNames;
+               outputTypes["column"] = tempOutNames;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClusterSplitCommand", "ClusterSplitCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> ClusterSplitCommand::getRequiredParameters(){   
+       try {
+               string Array[] =  {"fasta","phylip","column","or"};
+               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClusterSplitCommand", "getRequiredParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> ClusterSplitCommand::getRequiredFiles(){        
+       try {
+               vector<string> myArray;
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClusterSplitCommand", "getRequiredFiles");
+               exit(1);
+       }
+}
 //**********************************************************************************************************************
 //This function checks to make sure the cluster command has no errors and then clusters based on the method chosen.
 ClusterSplitCommand::ClusterSplitCommand(string option)  {
        try{
                globaldata = GlobalData::getInstance();
-               abort = false;
+               abort = false; calledHelp = false;   
                format = "";
                
                //allow user to run help
-               if(option == "help") { help(); abort = true; }
+               if(option == "help") { help(); abort = true; calledHelp = true; }
                
                else {
                        //valid paramters for this command
@@ -44,6 +95,13 @@ ClusterSplitCommand::ClusterSplitCommand(string option)  {
                                }
                        }
                        
+                       //initialize outputTypes
+                       vector<string> tempOutNames;
+                       outputTypes["list"] = tempOutNames;
+                       outputTypes["rabund"] = tempOutNames;
+                       outputTypes["sabund"] = tempOutNames;
+                       outputTypes["column"] = tempOutNames;
+                       
                        globaldata->newRead();
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
@@ -231,7 +289,7 @@ ClusterSplitCommand::~ClusterSplitCommand(){}
 int ClusterSplitCommand::execute(){
        try {
        
-               if (abort == true) {    return 0;       }
+               if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
                
                time_t estart;
                vector<string> listFileNames;
@@ -292,7 +350,7 @@ int ClusterSplitCommand::execute(){
                SplitMatrix* split;
                if (splitmethod == "distance")                  {       split = new SplitMatrix(distfile, namefile, taxFile, cutoff, splitmethod, large);                                                       }
                else if (splitmethod == "classify")             {       split = new SplitMatrix(distfile, namefile, taxFile, taxLevelCutoff, splitmethod, large);                                       }
-               else if (splitmethod == "fasta")                {       split = new SplitMatrix(fastafile, namefile, taxFile, taxLevelCutoff, splitmethod, processors, outputDir);      }
+               else if (splitmethod == "fasta")                {       split = new SplitMatrix(fastafile, namefile, taxFile, taxLevelCutoff, cutoff, splitmethod, processors, outputDir);      }
                else { m->mothurOut("Not a valid splitting method.  Valid splitting algorithms are distance, classify or fasta."); m->mothurOutEndLine(); return 0;             }
                
                split->split();
@@ -303,6 +361,10 @@ int ClusterSplitCommand::execute(){
                vector< map<string, string> > distName = split->getDistanceFiles();  //returns map of distance files -> namefile sorted by distance file size
                delete split;
                
+               //output a merged distance file
+               if (splitmethod == "fasta")             { createMergedDistanceFile(distName); }
+                       
+                               
                if (m->control_pressed) { return 0; }
                
                m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to split the distance file."); m->mothurOutEndLine();
@@ -317,7 +379,6 @@ int ClusterSplitCommand::execute(){
                                        
                        //for each file group figure out which process will complete it
                        //want to divide the load intelligently so the big files are spread between processes
-                       int count = 1;
                        for (int i = 0; i < distName.size(); i++) { 
                                int processToAssign = (i+1) % processors; 
                                if (processToAssign == 0) { processToAssign = processors; }
@@ -475,7 +536,6 @@ int ClusterSplitCommand::execute(){
                                        
                                        //for each file group figure out which process will complete it
                                        //want to divide the load intelligently so the big files are spread between processes
-                                       int count = 1;
                                        for (int i = 0; i < distName.size(); i++) { 
                                                int processToAssign = (i+1) % processors; 
                                                if (processToAssign == 0) { processToAssign = processors; }
@@ -696,9 +756,9 @@ int ClusterSplitCommand::mergeLists(vector<string> listNames, map<float, int> us
                m->openOutputFile(fileroot+ tag + ".rabund",    outRabund);
                m->openOutputFile(fileroot+ tag + ".list",              outList);
                                
-               outputNames.push_back(fileroot+ tag + ".sabund");
-               outputNames.push_back(fileroot+ tag + ".rabund");
-               outputNames.push_back(fileroot+ tag + ".list");
+               outputNames.push_back(fileroot+ tag + ".sabund");  outputTypes["list"].push_back(fileroot+ tag + ".list");
+               outputNames.push_back(fileroot+ tag + ".rabund");  outputTypes["rabund"].push_back(fileroot+ tag + ".rabund");
+               outputNames.push_back(fileroot+ tag + ".list");    outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
                
                map<float, int>::iterator itLabel;
 
@@ -828,7 +888,11 @@ int ClusterSplitCommand::createProcesses(vector < vector < map<string, string> >
                                outLabels.close();
 
                                exit(0);
-                       }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
+                       }else { 
+                               m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
+                               for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+                               exit(0);
+                       }
                }
                
                //force parent to wait until all the processes are done
@@ -924,9 +988,7 @@ vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNa
                        m->openOutputFile(fileroot+ tag + ".list",      listFile);
                
                        listFileNames.push_back(fileroot+ tag + ".list");
-               
-                       time_t estart = time(NULL);
-                       
+                               
                        float previousDist = 0.00000;
                        float rndPreviousDist = 0.00000;
                        
@@ -1016,5 +1078,45 @@ vector<string> ClusterSplitCommand::cluster(vector< map<string, string> > distNa
 
 
 }
+//**********************************************************************************************************************
 
+int ClusterSplitCommand::createMergedDistanceFile(vector< map<string, string> > distNames) {
+       try{
+               
+#ifdef USE_MPI
+               int pid;
+               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+               
+               if (pid != 0) {
+#endif
+               
+               string thisOutputDir = outputDir;
+               if (outputDir == "") { thisOutputDir = m->hasPath(fastafile); }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "dist";
+               remove(outputFileName.c_str());
+               
+               
+               for (int i = 0; i < distNames.size(); i++) {
+                       if (m->control_pressed) {  return 0; }
+                       
+                       string thisDistFile = distNames[i].begin()->first;
+                       
+                       m->appendFiles(thisDistFile, outputFileName);
+               }       
+                       
+               outputTypes["column"].push_back(outputFileName); outputNames.push_back(outputFileName);
+                       
+#ifdef USE_MPI
+               }
+#endif
+                               
+               return 0;       
+               
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClusterSplitCommand", "createMergedDistanceFile");
+               exit(1);
+       }
+}
 //**********************************************************************************************************************