paralellized rarefaction.single

author westcott <westcott>

Mon, 4 Oct 2010 18:03:32 +0000 (18:03 +0000)

committer westcott <westcott>

Mon, 4 Oct 2010 18:03:32 +0000 (18:03 +0000)
author westcott <westcott>
Mon, 4 Oct 2010 18:03:32 +0000 (18:03 +0000)
committer westcott <westcott>
Mon, 4 Oct 2010 18:03:32 +0000 (18:03 +0000)
diff --git a/classifyotucommand.cpp b/classifyotucommand.cpp

index b724a68fb2ec34b63e740ae5bcaa3d9981af8ed8..d27a88141534c7bf9b8115eaccb55d7966096753 100644 (file)
--- a/classifyotucommand.cpp
+++ b/classifyotucommand.cpp
@@ -399,7 +399,7 @@ string ClassifyOtuCommand::findConsensusTaxonomy(int bin, ListVector* thisList,
                 }
                 
                                 
-               if (conTax == "") {  conTax = "unclassified;";  }
+               if (conTax == "") {  conTax = "no_consensus;";  }
                 
                 delete phylo;   
                 
diff --git a/display.h b/display.h

index 8c73b576936d5de9f9f092cfc788addaeef1529d..806773733d4704aa6432a71f21e2188602853b71 100644 (file)
--- a/display.h
+++ b/display.h
@@ -17,6 +17,8 @@ public:
         virtual void init(string) = 0;
         virtual void reset() = 0;
         virtual void close() = 0;
+       virtual void outputTempFiles(string) {}
+       virtual void inputTempFiles(string) {}
         virtual bool isCalcMultiple() = 0;
         virtual void setAll(bool){}
         virtual bool hasLciHci(){ return false; }
diff --git a/filterseqscommand.cpp b/filterseqscommand.cpp

index 798ae64bfa88620fb6403f68842c50cee381ca25..ffffe4e4224c64f0b6b78d4019f8486b47d8511c 100644 (file)
--- a/filterseqscommand.cpp
+++ b/filterseqscommand.cpp
@@ -147,13 +147,14 @@ void FilterSeqsCommand::help(){
         try {
                                 
                 m->mothurOut("The filter.seqs command reads a file containing sequences and creates a .filter and .filter.fasta file.\n");
-               m->mothurOut("The filter.seqs command parameters are fasta, trump, soft, hard and vertical. \n");
+               m->mothurOut("The filter.seqs command parameters are fasta, trump, soft, hard, processors and vertical. \n");
                 m->mothurOut("The fasta parameter is required. You may enter several fasta files to build the filter from and filter, by separating their names with -'s.\n");
                 m->mothurOut("For example: fasta=abrecovery.fasta-amazon.fasta \n");
                 m->mothurOut("The trump parameter .... The default is ...\n");
                 m->mothurOut("The soft parameter .... The default is ....\n");
                 m->mothurOut("The hard parameter allows you to enter a file containing the filter you want to use.\n");
                 m->mothurOut("The vertical parameter removes columns where all sequences contain a gap character. The default is T.\n");
+               m->mothurOut("The processors parameter allows you to specify the number of processors to use. The default is 1.\n");
                 m->mothurOut("The filter.seqs command should be in the following format: \n");
                 m->mothurOut("filter.seqs(fasta=yourFastaFile, trump=yourTrump) \n");
                 m->mothurOut("Example filter.seqs(fasta=abrecovery.fasta, trump=.).\n");
diff --git a/mothur b/mothur

index 977c74f32988828c64f01fc6e0f1b1dcff4fbf46..648dccdfd360c3a212309afd720842a90afeddde 100755 (executable)

Binary files a/mothur and b/mothur differ
diff --git a/raredisplay.cpp b/raredisplay.cpp

index 5922822d57a5e0245129e63f30844cf746a88b41..b82127c51e6a64b50480a0f9b359cda3cd8e75f0 100644 (file)
--- a/raredisplay.cpp
+++ b/raredisplay.cpp
@@ -135,6 +135,55 @@ void RareDisplay::close(){
                 exit(1);
         }
  }
+/***********************************************************************/
+
+void RareDisplay::inputTempFiles(string filename){
+       try {
+               ifstream in;
+               m->openInputFile(filename, in);
+               
+               int thisIters;
+               in >> thisIters; m->gobble(in);
+               
+               for (int i = 0; i < seqs.size(); i++) {
+                       double tempresult, tempvar;
+                       in >> tempresult >> tempvar; m->gobble(in);
+                       
+                       //find weighted result
+                       results[i] = ((nIters * results[i]) + (thisIters * tempresult)) / (float)(nIters + thisIters);
+                       
+                       var[i] = ((nIters * var[i]) + (thisIters * tempvar)) / (float)(nIters + thisIters);
+               }
+               
+               in.close();
+       }
+       catch(exception& e) {
+               m->errorOut(e, "RareDisplay", "inputTempFiles");
+               exit(1);
+       }
+}
+
+/***********************************************************************/
+
+void RareDisplay::outputTempFiles(string filename){
+       try {
+               ofstream out;
+               m->openOutputFile(filename, out);
+               
+               out << nIters << endl;
+               
+               for (int i = 0; i < seqs.size(); i++) {
+                       out << results[i] << '\t' << var[i] << endl;
+               }
+               
+               out.close();
+       }
+       catch(exception& e) {
+               m->errorOut(e, "RareDisplay", "outputTempFiles");
+               exit(1);
+       }
+}
+
  
  /***********************************************************************/
  
diff --git a/raredisplay.h b/raredisplay.h

index 72147ade3a9a37e3b865cb5166473b65f8f7777d..a87596225b818c525fdd97156f5baf64b1dceb71 100644 (file)
--- a/raredisplay.h
+++ b/raredisplay.h
@@ -20,6 +20,9 @@ public:
         void close();
         bool isCalcMultiple() { return estimate->getMultiple(); }
         
+       void outputTempFiles(string);
+       void inputTempFiles(string);
+       
  private:
         Calculator* estimate;
         FileOutput* output;
diff --git a/rarefact.cpp b/rarefact.cpp

index 691d01708bab5b5bef4118ab38662819280b56de..d04885f4333e3f224bddbfa99aa9614e9f171b4c 100644 (file)
--- a/rarefact.cpp
+++ b/rarefact.cpp
@@ -23,6 +23,45 @@ int Rarefact::getCurve(float percentFreq = 0.01, int nIters = 1000){
                 int increment = 1;
                 if (percentFreq < 1.0) {  increment = numSeqs * percentFreq;  }
                 else { increment = percentFreq;  }      
+               
+               #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                               if(processors == 1){
+                                       driver(rcd, increment, nIters); 
+                               }else{
+                                       vector<int> procIters;
+                                       
+                                       int numItersPerProcessor = nIters / processors;
+                                       
+                                       //divide iters between processes
+                                       for (int i = 0; i < processors; i++) {
+                                               if(i == processors - 1){
+                                                       numItersPerProcessor = nIters - i * numItersPerProcessor;
+                                               }
+                                               procIters.push_back(numItersPerProcessor);
+                                       }
+                                       
+                                       createProcesses(procIters, rcd, increment); 
+                               }
+
+               #else
+                       driver(rcd, increment, nIters); 
+               #endif
+
+               for(int i=0;i<displays.size();i++){
+                       displays[i]->close();
+               }
+               
+               delete rcd;
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Rarefact", "getCurve");
+               exit(1);
+       }
+}
+/***********************************************************************/
+int Rarefact::driver(RarefactionCurveData* rcd, int increment, int nIters = 1000){
+       try {
                         
                 for(int iter=0;iter<nIters;iter++){
                 
@@ -64,18 +103,68 @@ int Rarefact::getCurve(float percentFreq = 0.01, int nIters = 1000){
                         delete rank;
                 }
  
-               for(int i=0;i<displays.size();i++){
-                       displays[i]->close();
-               }
-               delete rcd;
                 return 0;
         }
         catch(exception& e) {
-               m->errorOut(e, "Rarefact", "getCurve");
+               m->errorOut(e, "Rarefact", "driver");
                 exit(1);
         }
  }
+/**************************************************************************************************/
  
+int Rarefact::createProcesses(vector<int>& procIters, RarefactionCurveData* rcd, int increment) {
+       try {
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               int process = 1;
+               int num = 0;
+               vector<int> processIDS;
+               
+               EstOutput results;
+               
+               //loop through and create all the processes you want
+               while (process != processors) {
+                       int pid = fork();
+                       
+                       if (pid > 0) {
+                               processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
+                               process++;
+                       }else if (pid == 0){
+                               driver(rcd, increment, procIters[process]);
+                       
+                               //pass numSeqs to parent
+                               for(int i=0;i<displays.size();i++){
+                                       string tempFile = toString(getpid()) + toString(i) + ".rarefact.temp";
+                                       displays[i]->outputTempFiles(tempFile);
+                               }
+                               exit(0);
+                       }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
+               }
+               
+               driver(rcd, increment, procIters[0]);
+               
+               //force parent to wait until all the processes are done
+               for (int i=0;i<(processors-1);i++) { 
+                       int temp = processIDS[i];
+                       wait(&temp);
+               }
+               
+               //get data created by processes
+               for (int i=0;i<(processors-1);i++) { 
+                       for(int j=0;j<displays.size();j++){
+                               string s = toString(processIDS[i]) + toString(j) + ".rarefact.temp";
+                               displays[j]->inputTempFiles(s);
+                               remove(s.c_str());
+                       }
+               }
+               
+               return 0;
+#endif         
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Rarefact", "createProcesses");
+               exit(1);
+       }
+}
  /***********************************************************************/
  
  int Rarefact::getSharedCurve(float percentFreq = 0.01, int nIters = 1000){
diff --git a/rarefact.h b/rarefact.h

index 20c19251b132dd8ba512ab1530206546eb0bbbec..7d1fab4b16b412b56f07225acac50936b0348e77 100644 (file)
--- a/rarefact.h
+++ b/rarefact.h
@@ -11,8 +11,8 @@
  class Rarefact {
         
  public:
-       Rarefact(OrderVector* o, vector<Display*> disp) :
-                       numSeqs(o->getNumSeqs()), order(o), displays(disp), label(o->getLabel())  { m = MothurOut::getInstance(); }
+       Rarefact(OrderVector* o, vector<Display*> disp, int p) :
+                       numSeqs(o->getNumSeqs()), order(o), displays(disp), label(o->getLabel()), processors(p)  { m = MothurOut::getInstance(); }
         Rarefact(vector<SharedRAbundVector*> shared, vector<Display*> disp) :
                                          lookup(shared), displays(disp) {  globaldata = GlobalData::getInstance(); m = MothurOut::getInstance(); }
  
@@ -24,11 +24,14 @@ private:
         GlobalData* globaldata;
         OrderVector* order;
         vector<Display*> displays;
-       int numSeqs, numGroupComb;
+       int numSeqs, numGroupComb, processors;
         string label;
         void mergeVectors(SharedRAbundVector*, SharedRAbundVector*);
         vector<SharedRAbundVector*> lookup; 
         MothurOut* m;
+       
+       int createProcesses(vector<int>&, RarefactionCurveData*, int);
+       int driver(RarefactionCurveData*, int, int);
  
  };
  
diff --git a/rarefactcommand.cpp b/rarefactcommand.cpp

index 73d09b1206647cf832a06ec583e3a1d8d3956e2d..ae28d230b419db28c36272e57fc38ac7e2feff65 100644 (file)
--- a/rarefactcommand.cpp
+++ b/rarefactcommand.cpp
@@ -40,7 +40,7 @@ RareFactCommand::RareFactCommand(string option)  {
                 
                 else {
                         //valid paramters for this command
-                       string Array[] =  {"iters","freq","label","calc","abund","outputdir","inputdir"};
+                       string Array[] =  {"iters","freq","label","calc","abund","processors","outputdir","inputdir"};
                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                         
                         OptionParser parser(option);
@@ -93,6 +93,9 @@ RareFactCommand::RareFactCommand(string option)  {
                         
                         temp = validParameter.validFile(parameters, "iters", false);                    if (temp == "not found") { temp = "1000"; }
                         convert(temp, nIters); 
+                       
+                       temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = "1";                             }
+                       convert(temp, processors);
                 }
                 
         }
@@ -107,8 +110,9 @@ void RareFactCommand::help(){
         try {
                 m->mothurOut("The rarefaction.single command can only be executed after a successful read.otu WTIH ONE EXECEPTION.\n");
                 m->mothurOut("The rarefaction.single command can be executed after a successful cluster command.  It will use the .list file from the output of the cluster.\n");
-               m->mothurOut("The rarefaction.single command parameters are label, iters, freq, calc and abund.  No parameters are required. \n");
+               m->mothurOut("The rarefaction.single command parameters are label, iters, freq, calc, processors and abund.  No parameters are required. \n");
                 m->mothurOut("The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n");
+               m->mothurOut("The processors parameter allows you to specify the number of processors to use. The default is 1.\n");
                 m->mothurOut("The rarefaction.single command should be in the following format: \n");
                 m->mothurOut("rarefaction.single(label=yourLabel, iters=yourIters, freq=yourFreq, calc=yourEstimators).\n");
                 m->mothurOut("Example rarefaction.single(label=unique-.01-.03, iters=10000, freq=10, calc=sobs-rchao-race-rjack-rbootstrap-rshannon-rnpshannon-rsimpson).\n");
@@ -235,7 +239,7 @@ int RareFactCommand::execute(){
                                 if(allLines == 1 || labels.count(order->getLabel()) == 1){
                                         
                                         m->mothurOut(order->getLabel()); m->mothurOutEndLine();
-                                       rCurve = new Rarefact(order, rDisplays);
+                                       rCurve = new Rarefact(order, rDisplays, processors);
                                         rCurve->getCurve(freq, nIters);
                                         delete rCurve;
                                         
@@ -250,7 +254,7 @@ int RareFactCommand::execute(){
                                         order = (input->getOrderVector(lastLabel));
                                         
                                         m->mothurOut(order->getLabel()); m->mothurOutEndLine();
-                                       rCurve = new Rarefact(order, rDisplays);
+                                       rCurve = new Rarefact(order, rDisplays, processors);
                                         rCurve->getCurve(freq, nIters);
                                         delete rCurve;
                                         
@@ -290,7 +294,7 @@ int RareFactCommand::execute(){
                                 order = (input->getOrderVector(lastLabel));
                                 
                                 m->mothurOut(order->getLabel()); m->mothurOutEndLine();
-                               rCurve = new Rarefact(order, rDisplays);
+                               rCurve = new Rarefact(order, rDisplays, processors);
                                 rCurve->getCurve(freq, nIters);
                                 delete rCurve;
                                 
diff --git a/rarefactcommand.h b/rarefactcommand.h

index f4492a2ec15e97a9cf52295202306689e2820961..3cf0ef7f4cf68a5976d67dfe9cc4c9a9f0c7d57c 100644 (file)
--- a/rarefactcommand.h
+++ b/rarefactcommand.h
@@ -36,7 +36,7 @@ private:
         InputData* input;
         ValidCalculators* validCalculator;
         Rarefact* rCurve;
-       int nIters, abund;
+       int nIters, abund, processors;
         float freq;
         
         bool abort, allLines;
diff --git a/unifracunweightedcommand.cpp b/unifracunweightedcommand.cpp

index a50d147de23d7cbc882be909e4a78870a867d4c3..88df73d44c97b4311e1c1ce8b20a0f815b072f12 100644 (file)
--- a/unifracunweightedcommand.cpp
+++ b/unifracunweightedcommand.cpp
@@ -104,11 +104,12 @@ UnifracUnweightedCommand::UnifracUnweightedCommand(string option)  {
  void UnifracUnweightedCommand::help(){
         try {
                 m->mothurOut("The unifrac.unweighted command can only be executed after a successful read.tree command.\n");
-               m->mothurOut("The unifrac.unweighted command parameters are groups, iters, distance and random.  No parameters are required.\n");
+               m->mothurOut("The unifrac.unweighted command parameters are groups, iters, distance, processors and random.  No parameters are required.\n");
                 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 1 valid group.\n");
                 m->mothurOut("The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n");
                 m->mothurOut("The distance parameter allows you to create a distance file from the results. The default is false.\n");
                 m->mothurOut("The random parameter allows you to shut off the comparison to random trees. The default is false, meaning compare don't your trees with randomly generated trees.\n");
+               m->mothurOut("The processors parameter allows you to specify the number of processors to use. The default is 1.\n");
                 m->mothurOut("The unifrac.unweighted command should be in the following format: unifrac.unweighted(groups=yourGroups, iters=yourIters).\n");
                 m->mothurOut("Example unifrac.unweighted(groups=A-B-C, iters=500).\n");
                 m->mothurOut("The default value for groups is all the groups in your groupfile, and iters is 1000.\n");
diff --git a/unifracweightedcommand.cpp b/unifracweightedcommand.cpp

index e4a0a9d66e3128753371dca2d90554c43c4b59a9..e8add90a259bac0da04bf2b8d52be247e9fc1fd1 100644 (file)
--- a/unifracweightedcommand.cpp
+++ b/unifracweightedcommand.cpp
@@ -96,11 +96,12 @@ UnifracWeightedCommand::UnifracWeightedCommand(string option) {
  void UnifracWeightedCommand::help(){
         try {
                 m->mothurOut("The unifrac.weighted command can only be executed after a successful read.tree command.\n");
-               m->mothurOut("The unifrac.weighted command parameters are groups, iters, distance and random.  No parameters are required.\n");
+               m->mothurOut("The unifrac.weighted command parameters are groups, iters, distance, processors and random.  No parameters are required.\n");
                 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n");
                 m->mothurOut("The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n");
                 m->mothurOut("The distance parameter allows you to create a distance file from the results. The default is false.\n");
                 m->mothurOut("The random parameter allows you to shut off the comparison to random trees. The default is false, meaning don't compare your trees with randomly generated trees.\n");
+               m->mothurOut("The processors parameter allows you to specify the number of processors to use. The default is 1.\n");
                 m->mothurOut("The unifrac.weighted command should be in the following format: unifrac.weighted(groups=yourGroups, iters=yourIters).\n");
                 m->mothurOut("Example unifrac.weighted(groups=A-B-C, iters=500).\n");
                 m->mothurOut("The default value for groups is all the groups in your groupfile, and iters is 1000.\n");
author	westcott <westcott>
	Mon, 4 Oct 2010 18:03:32 +0000 (18:03 +0000)
committer	westcott <westcott>
	Mon, 4 Oct 2010 18:03:32 +0000 (18:03 +0000)
classifyotucommand.cpp		patch \| blob \| history
display.h		patch \| blob \| history
filterseqscommand.cpp		patch \| blob \| history
mothur		patch \| blob \| history
raredisplay.cpp		patch \| blob \| history
raredisplay.h		patch \| blob \| history
rarefact.cpp		patch \| blob \| history
rarefact.h		patch \| blob \| history
rarefactcommand.cpp		patch \| blob \| history
rarefactcommand.h		patch \| blob \| history
unifracunweightedcommand.cpp		patch \| blob \| history
unifracweightedcommand.cpp		patch \| blob \| history