From 7588ff51d365aad66f455694afb90b6fd3e6639a Mon Sep 17 00:00:00 2001 From: westcott Date: Wed, 15 Sep 2010 09:51:33 +0000 Subject: [PATCH] paralellized summary.shared --- heatmapcommand.cpp | 2 +- makefile | 2 +- pcacommand.cpp | 8 +- summarycommand.cpp | 4 +- summarysharedcommand.cpp | 246 ++++++++++++++++++++++++++------------- summarysharedcommand.h | 12 +- 6 files changed, 183 insertions(+), 91 deletions(-) diff --git a/heatmapcommand.cpp b/heatmapcommand.cpp index c634532..fb8e021 100644 --- a/heatmapcommand.cpp +++ b/heatmapcommand.cpp @@ -109,7 +109,7 @@ void HeatMapCommand::help(){ m->mothurOut("The group names are separated by dashes. The label parameter allows you to select what distance levels you would like a heatmap created for, and are also separated by dashes.\n"); m->mothurOut("The fontsize parameter allows you to adjust the font size of the picture created, default=24.\n"); m->mothurOut("The heatmap.bin command should be in the following format: heatmap.bin(groups=yourGroups, sorted=yourSorted, label=yourLabels).\n"); - m->mothurOut("Example heatmap.bin(groups=A-B-C, sorted=F, scale=log10).\n"); + m->mothurOut("Example heatmap.bin(groups=A-B-C, sorted=none, scale=log10).\n"); m->mothurOut("The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n"); m->mothurOut("The default value for scale is log10; your other options are log2 and linear.\n"); m->mothurOut("The heatmap.bin command outputs a .svg file for each label you specify.\n"); diff --git a/makefile b/makefile index 278a3ad..6f194bb 100644 --- a/makefile +++ b/makefile @@ -13,7 +13,7 @@ CXXFLAGS += -O3 -MOTHUR_FILES = "\"Enter_your_default_path_here\"" +MOTHUR_FILES = "\"../Release\"" RELEASE_DATE = "\"9/9/2010\"" VERSION = "\"1.13.0\"" diff --git a/pcacommand.cpp b/pcacommand.cpp index c982d75..51e3c31 100644 --- a/pcacommand.cpp +++ b/pcacommand.cpp @@ -512,15 +512,15 @@ void PCACommand::output(string fnameRoot, vector name_list, vectorisTrue(temp); @@ -123,7 +123,7 @@ void SummaryCommand::help(){ m->mothurOut("Example summary.single(label=unique-.01-.03, calc=sobs-chao-ace-jack-bootstrap-shannon-npshannon-simpson).\n"); validCalculator->printCalc("summary", cout); m->mothurOut("The default value calc is sobs-chao-ace-jack-shannon-npshannon-simpson\n"); - m->mothurOut("If you are running summary.single with a shared file and would like your summary results collated in one file, set groupmode=t. (Default=False).\n"); + m->mothurOut("If you are running summary.single with a shared file and would like your summary results collated in one file, set groupmode=t. (Default=true).\n"); m->mothurOut("The label parameter is used to analyze specific labels in your input.\n"); m->mothurOut("Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabels).\n\n"); } diff --git a/summarysharedcommand.cpp b/summarysharedcommand.cpp index f44a2cd..e43c997 100644 --- a/summarysharedcommand.cpp +++ b/summarysharedcommand.cpp @@ -48,7 +48,7 @@ SummarySharedCommand::SummarySharedCommand(string option) { else { //valid paramters for this command - string Array[] = {"label","calc","groups","all","outputdir","inputdir"}; + string Array[] = {"label","calc","groups","all","outputdir","inputdir", "processors"}; vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); OptionParser parser(option); @@ -104,6 +104,9 @@ SummarySharedCommand::SummarySharedCommand(string option) { string temp = validParameter.validFile(parameters, "all", false); if (temp == "not found") { temp = "false"; } all = m->isTrue(temp); + temp = validParameter.validFile(parameters, "processors", false); if(temp == "not found"){ temp = "1"; } + convert(temp, processors); + if (abort == false) { validCalculator = new ValidCalculators(); @@ -157,10 +160,6 @@ SummarySharedCommand::SummarySharedCommand(string option) { } } - outputFileName = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + "shared.summary"; - m->openOutputFile(outputFileName, outputFileHandle); - outputNames.push_back(outputFileName); - mult = false; } } @@ -211,6 +210,9 @@ int SummarySharedCommand::execute(){ if (abort == true) { return 0; } + ofstream outputFileHandle, outAll; + string outputFileName = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + "shared.summary"; + //if the users entered no valid calculators don't execute command if (sumCalculators.size() == 0) { return 0; } //check if any calcs can do multiples @@ -230,17 +232,22 @@ int SummarySharedCommand::execute(){ lookup = input->getSharedRAbundVectors(); string lastLabel = lookup[0]->getLabel(); + /******************************************************/ + //output headings for files + /******************************************************/ //output estimator names as column headers + m->openOutputFile(outputFileName, outputFileHandle); outputFileHandle << "label" <<'\t' << "comparison" << '\t'; for(int i=0;igetName(); if (sumCalculators[i]->getCols() == 3) { outputFileHandle << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci"; } } outputFileHandle << endl; + outputFileHandle.close(); //create file and put column headers for multiple groups file + string outAllFileName = ((m->getRootName(globaldata->inputFileName)) + "sharedmultiple.summary"); if (mult == true) { - outAllFileName = ((m->getRootName(globaldata->inputFileName)) + "sharedmultiple.summary"); m->openOutputFile(outAllFileName, outAll); outputNames.push_back(outAllFileName); @@ -251,6 +258,7 @@ int SummarySharedCommand::execute(){ } } outAll << endl; + outAll.close(); } if (lookup.size() < 2) { @@ -258,28 +266,38 @@ int SummarySharedCommand::execute(){ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } //close files and clean up - outputFileHandle.close(); remove(outputFileName.c_str()); - if (mult == true) { outAll.close(); remove(outAllFileName.c_str()); } + remove(outputFileName.c_str()); + if (mult == true) { remove(outAllFileName.c_str()); } return 0; //if you only have 2 groups you don't need a .sharedmultiple file }else if ((lookup.size() == 2) && (mult == true)) { mult = false; - outAll.close(); remove(outAllFileName.c_str()); outputNames.pop_back(); } if (m->control_pressed) { - if (mult) { outAll.close(); remove(outAllFileName.c_str()); } - outputFileHandle.close(); remove(outputFileName.c_str()); + if (mult) { remove(outAllFileName.c_str()); } + remove(outputFileName.c_str()); delete input; globaldata->ginput = NULL; for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } for(int i=0;iGroups.clear(); return 0; } - - + /******************************************************/ + + + /******************************************************/ + //comparison breakup to be used by different processes later + numGroups = globaldata->Groups.size(); + lines.resize(processors); + for (int i = 0; i < processors; i++) { + lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups); + lines[i].end = int (sqrt(float(i+1)/float(processors)) * numGroups); + } + /******************************************************/ + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set processedLabels; set userLabels = labels; @@ -287,8 +305,8 @@ int SummarySharedCommand::execute(){ //as long as you are not at the end of the file or done wih the lines you want while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { if (m->control_pressed) { - if (mult) { outAll.close(); remove(outAllFileName.c_str()); } - outputFileHandle.close(); remove(outputFileName.c_str()); + if (mult) { remove(outAllFileName.c_str()); } + remove(outputFileName.c_str()); delete input; globaldata->ginput = NULL; for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } for(int i=0;igetLabel()) == 1){ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); - process(lookup); + process(lookup, outputFileName, outAllFileName); processedLabels.insert(lookup[0]->getLabel()); userLabels.erase(lookup[0]->getLabel()); @@ -312,7 +330,7 @@ int SummarySharedCommand::execute(){ lookup = input->getSharedRAbundVectors(lastLabel); m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); - process(lookup); + process(lookup, outputFileName, outAllFileName); processedLabels.insert(lookup[0]->getLabel()); userLabels.erase(lookup[0]->getLabel()); @@ -330,8 +348,8 @@ int SummarySharedCommand::execute(){ } if (m->control_pressed) { - if (mult) { outAll.close(); remove(outAllFileName.c_str()); } - outputFileHandle.close(); remove(outputFileName.c_str()); + if (mult) { remove(outAllFileName.c_str()); } + remove(outputFileName.c_str()); delete input; globaldata->ginput = NULL; for(int i=0;iGroups.clear(); @@ -357,7 +375,7 @@ int SummarySharedCommand::execute(){ lookup = input->getSharedRAbundVectors(lastLabel); m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); - process(lookup); + process(lookup, outputFileName, outAllFileName); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } @@ -365,10 +383,6 @@ int SummarySharedCommand::execute(){ //reset groups parameter globaldata->Groups.clear(); - //close files - outputFileHandle.close(); - if (mult == true) { outAll.close(); } - for(int i=0;iginput = NULL; @@ -380,7 +394,8 @@ int SummarySharedCommand::execute(){ m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); - for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } + m->mothurOut(outputFileName); m->mothurOutEndLine(); + if (mult) { m->mothurOut(outAllFileName); m->mothurOutEndLine(); } m->mothurOutEndLine(); return 0; @@ -392,72 +407,143 @@ int SummarySharedCommand::execute(){ } /***********************************************************/ -int SummarySharedCommand::process(vector thisLookup) { +int SummarySharedCommand::process(vector thisLookup, string sumFileName, string sumAllFileName) { try { - //loop through calculators and add to file all for all calcs that can do mutiple groups - if (mult == true) { - //output label - outAll << thisLookup[0]->getLabel() << '\t'; - - //output groups names - string outNames = ""; - for (int j = 0; j < thisLookup.size(); j++) { - outNames += thisLookup[j]->getGroup() + "-"; + + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + if(processors == 1){ + driver(thisLookup, 0, numGroups, sumFileName, sumAllFileName); + m->appendFiles((sumFileName + ".temp"), sumFileName); + remove((sumFileName + ".temp").c_str()); + if (mult) { + m->appendFiles((sumAllFileName + ".temp"), sumAllFileName); + remove((sumAllFileName + ".temp").c_str()); + } + }else{ + int process = 0; + vector processIDS; + + //loop through and create all the processes you want + while (process != processors) { + int pid = fork(); + + if (pid > 0) { + processIDS.push_back(pid); + process++; + }else if (pid == 0){ + driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp"); + exit(0); + }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); } + } + + //force parent to wait until all the processes are done + for (int i = 0; i < processIDS.size(); i++) { + int temp = processIDS[i]; + wait(&temp); } - outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-'; - outAll << outNames << '\t'; - for(int i=0;igetMultiple() == true) { - sumCalculators[i]->getValues(thisLookup); - - if (m->control_pressed) { return 1; } - - outAll << '\t'; - sumCalculators[i]->print(outAll); + for (int i = 0; i < processIDS.size(); i++) { + m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName); + remove((sumFileName + toString(processIDS[i]) + ".temp").c_str()); + if (mult) { + if (i == 0) { m->appendFiles((sumAllFileName + toString(processIDS[i]) + ".temp"), sumAllFileName); } + remove((sumAllFileName + toString(processIDS[i]) + ".temp").c_str()); } } - outAll << endl; - } - - int n = 1; - vector subset; - for (int k = 0; k < (thisLookup.size() - 1); k++) { // pass cdd each set of groups to compare - - for (int l = n; l < thisLookup.size(); l++) { - - outputFileHandle << thisLookup[0]->getLabel() << '\t'; - - subset.clear(); //clear out old pair of sharedrabunds - //add new pair of sharedrabunds - subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); - - //sort groups to be alphanumeric - if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) { - outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups - }else{ - outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups - } - - for(int i=0;igetValues(subset); //saves the calculator outputs - - if (m->control_pressed) { return 1; } - - outputFileHandle << '\t'; - sumCalculators[i]->print(outputFileHandle); - } - outputFileHandle << endl; - } - n++; } - return 0; + #else + driver(thisLookup, 0, numGroups, (sumFileName + ".temp"), (sumAllFileName + ".temp")); + m->appendFiles((sumFileName + ".temp"), sumFileName); + remove((sumFileName + ".temp").c_str()); + if (mult) { + m->appendFiles((sumAllFileName + ".temp"), sumAllFileName); + remove((sumAllFileName + ".temp").c_str()); + } + #endif } catch(exception& e) { m->errorOut(e, "SummarySharedCommand", "process"); exit(1); } } +/**************************************************************************************************/ +int SummarySharedCommand::driver(vector thisLookup, int start, int end, string sumFile, string sumAllFile) { + try { + + //loop through calculators and add to file all for all calcs that can do mutiple groups + if (mult == true) { + ofstream outAll; + m->openOutputFile(sumAllFile, outAll); + + //output label + outAll << thisLookup[0]->getLabel() << '\t'; + + //output groups names + string outNames = ""; + for (int j = 0; j < thisLookup.size(); j++) { + outNames += thisLookup[j]->getGroup() + "-"; + } + outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-'; + outAll << outNames << '\t'; + + for(int i=0;igetMultiple() == true) { + sumCalculators[i]->getValues(thisLookup); + + if (m->control_pressed) { outAll.close(); return 1; } + + outAll << '\t'; + sumCalculators[i]->print(outAll); + } + } + outAll << endl; + outAll.close(); + } + + ofstream outputFileHandle; + m->openOutputFile(sumFile, outputFileHandle); + + vector subset; + for (int k = start; k < end; k++) { // pass cdd each set of groups to compare + + for (int l = 0; l < k; l++) { + + outputFileHandle << thisLookup[0]->getLabel() << '\t'; + + subset.clear(); //clear out old pair of sharedrabunds + //add new pair of sharedrabunds + subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); + + //sort groups to be alphanumeric + if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) { + outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups + }else{ + outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups + } + + for(int i=0;igetValues(subset); //saves the calculator outputs + + if (m->control_pressed) { outputFileHandle.close(); return 1; } + + outputFileHandle << '\t'; + sumCalculators[i]->print(outputFileHandle); + } + outputFileHandle << endl; + } + } + + outputFileHandle.close(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "SummarySharedCommand", "driver"); + exit(1); + } +} +/**************************************************************************************************/ + -/***********************************************************/ diff --git a/summarysharedcommand.h b/summarysharedcommand.h index bcd8ab6..b99e0f0 100644 --- a/summarysharedcommand.h +++ b/summarysharedcommand.h @@ -28,6 +28,11 @@ public: void help(); private: + struct linePair { + int start; + int end; + }; + vector lines; GlobalData* globaldata; ReadOTUFile* read; vector sumCalculators; @@ -39,9 +44,10 @@ private: string label, calc, groups; vector Estimators, Groups, outputNames; vector lookup; - string outputFileName, format, outAllFileName, outputDir; - ofstream outputFileHandle, outAll; - int process(vector); + string format, outputDir; + int numGroups, processors; + int process(vector, string, string); + int driver(vector, int, int, string, string); }; -- 2.39.2