]> git.donarmstrong.com Git - mothur.git/commitdiff
paralellized summary.shared
authorwestcott <westcott>
Wed, 15 Sep 2010 09:51:33 +0000 (09:51 +0000)
committerwestcott <westcott>
Wed, 15 Sep 2010 09:51:33 +0000 (09:51 +0000)
heatmapcommand.cpp
makefile
pcacommand.cpp
summarycommand.cpp
summarysharedcommand.cpp
summarysharedcommand.h

index c6345328e6d5ac5b0e577d03a797d35448446227..fb8e0216530623ea11375784eb898ddfe20ecc58 100644 (file)
@@ -109,7 +109,7 @@ void HeatMapCommand::help(){
                m->mothurOut("The group names are separated by dashes. The label parameter allows you to select what distance levels you would like a heatmap created for, and are also separated by dashes.\n");
                m->mothurOut("The fontsize parameter allows you to adjust the font size of the picture created, default=24.\n");
                m->mothurOut("The heatmap.bin command should be in the following format: heatmap.bin(groups=yourGroups, sorted=yourSorted, label=yourLabels).\n");
-               m->mothurOut("Example heatmap.bin(groups=A-B-C, sorted=F, scale=log10).\n");
+               m->mothurOut("Example heatmap.bin(groups=A-B-C, sorted=none, scale=log10).\n");
                m->mothurOut("The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n");
                m->mothurOut("The default value for scale is log10; your other options are log2 and linear.\n");
                m->mothurOut("The heatmap.bin command outputs a .svg file for each label you specify.\n");
index 278a3adc659ac0d8e6d5afd9815ad0adac7dafd6..6f194bb349eacfcdf23424a950db3cf387f17d1e 100644 (file)
--- a/makefile
+++ b/makefile
@@ -13,7 +13,7 @@
 
 CXXFLAGS += -O3
 
-MOTHUR_FILES = "\"Enter_your_default_path_here\""
+MOTHUR_FILES = "\"../Release\""
 
 RELEASE_DATE = "\"9/9/2010\""
 VERSION = "\"1.13.0\""
index c982d7563c8f26066924eeab070f5fd16a5b30e5..51e3c319f377b893224e1e2f2d7093b3665c0d8a 100644 (file)
@@ -512,15 +512,15 @@ void PCACommand::output(string fnameRoot, vector<string> name_list, vector<vecto
                        }
                }
                
-               ofstream pcaData((fnameRoot+"pca").c_str(), ios::trunc);
+               ofstream pcaData((fnameRoot+"pcoa").c_str(), ios::trunc);
                pcaData.setf(ios::fixed, ios::floatfield);
                pcaData.setf(ios::showpoint);   
-               outputNames.push_back(fnameRoot+"pca");
+               outputNames.push_back(fnameRoot+"pcoa");
                
-               ofstream pcaLoadings((fnameRoot+"pca.loadings").c_str(), ios::trunc);
+               ofstream pcaLoadings((fnameRoot+"pcoa.loadings").c_str(), ios::trunc);
                pcaLoadings.setf(ios::fixed, ios::floatfield);
                pcaLoadings.setf(ios::showpoint);
-               outputNames.push_back(fnameRoot+"pca.loadings");        
+               outputNames.push_back(fnameRoot+"pcoa.loadings");       
                
                pcaLoadings << "axis\tloading\n";
                for(int i=0;i<rank;i++){
index 208544adf1bd7ec2f0f59603c4eb86840275cf80..638119543480ffe0a8364379edfef0b184c526bf 100644 (file)
@@ -100,7 +100,7 @@ SummaryCommand::SummaryCommand(string option)  {
                        temp = validParameter.validFile(parameters, "size", false);                     if (temp == "not found") { temp = "0"; }
                        convert(temp, size); 
                        
-                       temp = validParameter.validFile(parameters, "groupmode", false);                if (temp == "not found") { temp = "F"; }
+                       temp = validParameter.validFile(parameters, "groupmode", false);                if (temp == "not found") { temp = "T"; }
                        groupMode = m->isTrue(temp);
                        
        
@@ -123,7 +123,7 @@ void SummaryCommand::help(){
                m->mothurOut("Example summary.single(label=unique-.01-.03, calc=sobs-chao-ace-jack-bootstrap-shannon-npshannon-simpson).\n");
                validCalculator->printCalc("summary", cout);
                m->mothurOut("The default value calc is sobs-chao-ace-jack-shannon-npshannon-simpson\n");
-               m->mothurOut("If you are running summary.single with a shared file and would like your summary results collated in one file, set groupmode=t. (Default=False).\n");
+               m->mothurOut("If you are running summary.single with a shared file and would like your summary results collated in one file, set groupmode=t. (Default=true).\n");
                m->mothurOut("The label parameter is used to analyze specific labels in your input.\n");
                m->mothurOut("Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabels).\n\n");
        }
index f44a2cd540345424e946c48ef733768ff4691ae8..e43c99708cd81a231286c10dc1c12d9035059fce 100644 (file)
@@ -48,7 +48,7 @@ SummarySharedCommand::SummarySharedCommand(string option)  {
                
                else {
                        //valid paramters for this command
-                       string Array[] =  {"label","calc","groups","all","outputdir","inputdir"};
+                       string Array[] =  {"label","calc","groups","all","outputdir","inputdir", "processors"};
                        vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                        
                        OptionParser parser(option);
@@ -104,6 +104,9 @@ SummarySharedCommand::SummarySharedCommand(string option)  {
                        string temp = validParameter.validFile(parameters, "all", false);                               if (temp == "not found") { temp = "false"; }
                        all = m->isTrue(temp);
                        
+                       temp = validParameter.validFile(parameters, "processors", false);       if(temp == "not found"){        temp = "1"; }
+                       convert(temp, processors); 
+                       
                        if (abort == false) {
                        
                                validCalculator = new ValidCalculators();
@@ -157,10 +160,6 @@ SummarySharedCommand::SummarySharedCommand(string option)  {
                                        }
                                }
                                
-                               outputFileName = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + "shared.summary";
-                               m->openOutputFile(outputFileName, outputFileHandle);
-                               outputNames.push_back(outputFileName);
-                               
                                mult = false;
                        }
                }
@@ -211,6 +210,9 @@ int SummarySharedCommand::execute(){
        
                if (abort == true) { return 0; }
                
+               ofstream outputFileHandle, outAll;
+               string outputFileName = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + "shared.summary";
+               
                //if the users entered no valid calculators don't execute command
                if (sumCalculators.size() == 0) { return 0; }
                //check if any calcs can do multiples
@@ -230,17 +232,22 @@ int SummarySharedCommand::execute(){
                lookup = input->getSharedRAbundVectors();
                string lastLabel = lookup[0]->getLabel();
                
+               /******************************************************/
+               //output headings for files
+               /******************************************************/
                //output estimator names as column headers
+               m->openOutputFile(outputFileName, outputFileHandle);
                outputFileHandle << "label" <<'\t' << "comparison" << '\t'; 
                for(int i=0;i<sumCalculators.size();i++){
                        outputFileHandle << '\t' << sumCalculators[i]->getName();
                        if (sumCalculators[i]->getCols() == 3) {   outputFileHandle << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci";  }
                }
                outputFileHandle << endl;
+               outputFileHandle.close();
                
                //create file and put column headers for multiple groups file
+               string outAllFileName = ((m->getRootName(globaldata->inputFileName)) + "sharedmultiple.summary");
                if (mult == true) {
-                       outAllFileName = ((m->getRootName(globaldata->inputFileName)) + "sharedmultiple.summary");
                        m->openOutputFile(outAllFileName, outAll);
                        outputNames.push_back(outAllFileName);
                        
@@ -251,6 +258,7 @@ int SummarySharedCommand::execute(){
                                }
                        }
                        outAll << endl;
+                       outAll.close();
                }
                
                if (lookup.size() < 2) { 
@@ -258,28 +266,38 @@ int SummarySharedCommand::execute(){
                        for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
                        
                        //close files and clean up
-                       outputFileHandle.close();  remove(outputFileName.c_str());
-                       if (mult == true) {  outAll.close();  remove(outAllFileName.c_str());  }
+                       remove(outputFileName.c_str());
+                       if (mult == true) { remove(outAllFileName.c_str());  }
                        return 0;
                //if you only have 2 groups you don't need a .sharedmultiple file
                }else if ((lookup.size() == 2) && (mult == true)) { 
                        mult = false;
-                       outAll.close();  
                        remove(outAllFileName.c_str());
                        outputNames.pop_back();
                }
                
                if (m->control_pressed) {
-                       if (mult) { outAll.close();  remove(outAllFileName.c_str());  }
-                       outputFileHandle.close(); remove(outputFileName.c_str()); 
+                       if (mult) {  remove(outAllFileName.c_str());  }
+                       remove(outputFileName.c_str()); 
                        delete input; globaldata->ginput = NULL;
                        for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
                        for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
                        globaldata->Groups.clear(); 
                        return 0;
                }
-                                                               
-                                                                                                               
+               /******************************************************/
+               
+               
+               /******************************************************/
+               //comparison breakup to be used by different processes later
+               numGroups = globaldata->Groups.size();
+               lines.resize(processors);
+               for (int i = 0; i < processors; i++) {
+                       lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups);
+                       lines[i].end = int (sqrt(float(i+1)/float(processors)) * numGroups);
+               }               
+               /******************************************************/
+               
                //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
                set<string> processedLabels;
                set<string> userLabels = labels;
@@ -287,8 +305,8 @@ int SummarySharedCommand::execute(){
                //as long as you are not at the end of the file or done wih the lines you want
                while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
                        if (m->control_pressed) {
-                               if (mult) { outAll.close();  remove(outAllFileName.c_str());  }
-                               outputFileHandle.close(); remove(outputFileName.c_str()); 
+                               if (mult) {  remove(outAllFileName.c_str());  }
+                               remove(outputFileName.c_str()); 
                                delete input; globaldata->ginput = NULL;
                                for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
                                for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
@@ -299,7 +317,7 @@ int SummarySharedCommand::execute(){
                
                        if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
                                m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
-                               process(lookup);
+                               process(lookup, outputFileName, outAllFileName);
                                
                                processedLabels.insert(lookup[0]->getLabel());
                                userLabels.erase(lookup[0]->getLabel());
@@ -312,7 +330,7 @@ int SummarySharedCommand::execute(){
                                        lookup = input->getSharedRAbundVectors(lastLabel);
 
                                        m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
-                                       process(lookup);
+                                       process(lookup, outputFileName, outAllFileName);
                                        
                                        processedLabels.insert(lookup[0]->getLabel());
                                        userLabels.erase(lookup[0]->getLabel());
@@ -330,8 +348,8 @@ int SummarySharedCommand::execute(){
                }
                
                if (m->control_pressed) {
-                       if (mult) { outAll.close();  remove(outAllFileName.c_str());  }
-                       outputFileHandle.close(); remove(outputFileName.c_str()); 
+                       if (mult) { remove(outAllFileName.c_str());  }
+                       remove(outputFileName.c_str()); 
                        delete input; globaldata->ginput = NULL;
                        for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
                        globaldata->Groups.clear(); 
@@ -357,7 +375,7 @@ int SummarySharedCommand::execute(){
                                lookup = input->getSharedRAbundVectors(lastLabel);
 
                                m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
-                               process(lookup);
+                               process(lookup, outputFileName, outAllFileName);
                                for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
                }
                
@@ -365,10 +383,6 @@ int SummarySharedCommand::execute(){
                //reset groups parameter
                globaldata->Groups.clear();  
                
-               //close files
-               outputFileHandle.close();
-               if (mult == true) {  outAll.close();  }
-               
                for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
                delete input;  globaldata->ginput = NULL;
                
@@ -380,7 +394,8 @@ int SummarySharedCommand::execute(){
                
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
-               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOut(outputFileName); m->mothurOutEndLine();    
+               if (mult) { m->mothurOut(outAllFileName); m->mothurOutEndLine();        }
                m->mothurOutEndLine();
 
                return 0;
@@ -392,72 +407,143 @@ int SummarySharedCommand::execute(){
 }
 
 /***********************************************************/
-int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup) {
+int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string sumFileName, string sumAllFileName) {
        try {
-                               //loop through calculators and add to file all for all calcs that can do mutiple groups
-                               if (mult == true) {
-                                       //output label
-                                       outAll << thisLookup[0]->getLabel() << '\t';
-                                       
-                                       //output groups names
-                                       string outNames = "";
-                                       for (int j = 0; j < thisLookup.size(); j++) {
-                                               outNames += thisLookup[j]->getGroup() +  "-";
+                               
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                               if(processors == 1){
+                                       driver(thisLookup, 0, numGroups, sumFileName, sumAllFileName);
+                                       m->appendFiles((sumFileName + ".temp"), sumFileName);
+                                       remove((sumFileName + ".temp").c_str());
+                                       if (mult) {
+                                               m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
+                                               remove((sumAllFileName + ".temp").c_str());
+                                       }
+                               }else{
+                                       int process = 0;
+                                       vector<int> processIDS;
+               
+                                       //loop through and create all the processes you want
+                                       while (process != processors) {
+                                               int pid = fork();
+                                               
+                                               if (pid > 0) {
+                                                       processIDS.push_back(pid); 
+                                                       process++;
+                                               }else if (pid == 0){
+                                                       driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp");   
+                                                       exit(0);
+                                               }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
+                                       }
+                               
+                                       //force parent to wait until all the processes are done
+                                       for (int i = 0; i < processIDS.size(); i++) {
+                                               int temp = processIDS[i];
+                                               wait(&temp);
                                        }
-                                       outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-';
-                                       outAll << outNames << '\t';
                                        
-                                       for(int i=0;i<sumCalculators.size();i++){
-                                               if (sumCalculators[i]->getMultiple() == true) { 
-                                                       sumCalculators[i]->getValues(thisLookup);
-                                                       
-                                                       if (m->control_pressed) { return 1; }
-                                                       
-                                                       outAll << '\t';
-                                                       sumCalculators[i]->print(outAll);
+                                       for (int i = 0; i < processIDS.size(); i++) {
+                                               m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
+                                               remove((sumFileName + toString(processIDS[i]) + ".temp").c_str());
+                                               if (mult) {
+                                                       if (i == 0) {  m->appendFiles((sumAllFileName + toString(processIDS[i]) + ".temp"), sumAllFileName);  }
+                                                       remove((sumAllFileName + toString(processIDS[i]) + ".temp").c_str());
                                                }
                                        }
-                                       outAll << endl;
-                               }
-       
-                               int n = 1; 
-                               vector<SharedRAbundVector*> subset;
-                               for (int k = 0; k < (thisLookup.size() - 1); k++) { // pass cdd each set of groups to compare
-       
-                                       for (int l = n; l < thisLookup.size(); l++) {
-                                               
-                                               outputFileHandle << thisLookup[0]->getLabel() << '\t';
-                                               
-                                               subset.clear(); //clear out old pair of sharedrabunds
-                                               //add new pair of sharedrabunds
-                                               subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); 
-                                               
-                                               //sort groups to be alphanumeric
-                                               if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) {
-                                                       outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups
-                                               }else{
-                                                       outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups
-                                               }
-                                               
-                                               for(int i=0;i<sumCalculators.size();i++) {
 
-                                                       sumCalculators[i]->getValues(subset); //saves the calculator outputs
-                                                       
-                                                       if (m->control_pressed) { return 1; }
-                                                       
-                                                       outputFileHandle << '\t';
-                                                       sumCalculators[i]->print(outputFileHandle);
-                                               }
-                                               outputFileHandle << endl;
-                                       }
-                                       n++;
                                }
-                       return 0;
+                       #else
+                               driver(thisLookup, 0, numGroups, (sumFileName + ".temp"), (sumAllFileName + ".temp"));
+                               m->appendFiles((sumFileName + ".temp"), sumFileName);
+                               remove((sumFileName + ".temp").c_str());
+                               if (mult) {
+                                       m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
+                                       remove((sumAllFileName + ".temp").c_str());
+                               }
+                       #endif
        }
        catch(exception& e) {
                m->errorOut(e, "SummarySharedCommand", "process");
                exit(1);
        }
 }
+/**************************************************************************************************/
+int SummarySharedCommand::driver(vector<SharedRAbundVector*> thisLookup, int start, int end, string sumFile, string sumAllFile) { 
+       try {
+               
+               //loop through calculators and add to file all for all calcs that can do mutiple groups
+               if (mult == true) {
+                       ofstream outAll;
+                       m->openOutputFile(sumAllFile, outAll);
+                       
+                       //output label
+                       outAll << thisLookup[0]->getLabel() << '\t';
+                       
+                       //output groups names
+                       string outNames = "";
+                       for (int j = 0; j < thisLookup.size(); j++) {
+                               outNames += thisLookup[j]->getGroup() +  "-";
+                       }
+                       outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-';
+                       outAll << outNames << '\t';
+                       
+                       for(int i=0;i<sumCalculators.size();i++){
+                               if (sumCalculators[i]->getMultiple() == true) { 
+                                       sumCalculators[i]->getValues(thisLookup);
+                                       
+                                       if (m->control_pressed) { outAll.close(); return 1; }
+                                       
+                                       outAll << '\t';
+                                       sumCalculators[i]->print(outAll);
+                               }
+                       }
+                       outAll << endl;
+                       outAll.close();
+               }
+               
+               ofstream outputFileHandle;
+               m->openOutputFile(sumFile, outputFileHandle);
+               
+               vector<SharedRAbundVector*> subset;
+               for (int k = start; k < end; k++) { // pass cdd each set of groups to compare
+
+                       for (int l = 0; l < k; l++) {
+                               
+                               outputFileHandle << thisLookup[0]->getLabel() << '\t';
+                               
+                               subset.clear(); //clear out old pair of sharedrabunds
+                               //add new pair of sharedrabunds
+                               subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); 
+                               
+                               //sort groups to be alphanumeric
+                               if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) {
+                                       outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups
+                               }else{
+                                       outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups
+                               }
+                               
+                               for(int i=0;i<sumCalculators.size();i++) {
+
+                                       sumCalculators[i]->getValues(subset); //saves the calculator outputs
+                                       
+                                       if (m->control_pressed) { outputFileHandle.close(); return 1; }
+                                       
+                                       outputFileHandle << '\t';
+                                       sumCalculators[i]->print(outputFileHandle);
+                               }
+                               outputFileHandle << endl;
+                       }
+               }
+               
+               outputFileHandle.close();
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SummarySharedCommand", "driver");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+
 
-/***********************************************************/
index bcd8ab671f2727331179dbe3511d4acd72f72417..b99e0f037206496f788635fa7d8a6ce21958d56b 100644 (file)
@@ -28,6 +28,11 @@ public:
        void help();
        
 private:
+       struct linePair {
+               int start;
+               int end;
+       };
+       vector<linePair> lines;
        GlobalData* globaldata;
        ReadOTUFile* read;
        vector<Calculator*> sumCalculators;     
@@ -39,9 +44,10 @@ private:
        string label, calc, groups;
        vector<string>  Estimators, Groups, outputNames;
        vector<SharedRAbundVector*> lookup;
-       string outputFileName, format, outAllFileName, outputDir;
-       ofstream outputFileHandle, outAll;
-       int process(vector<SharedRAbundVector*>);
+       string format, outputDir;
+       int numGroups, processors;
+       int process(vector<SharedRAbundVector*>, string, string);
+       int driver(vector<SharedRAbundVector*>, int, int, string, string);
 
 };