]> git.donarmstrong.com Git - mothur.git/blobdiff - summarysharedcommand.cpp
added distance option to summary.shared
[mothur.git] / summarysharedcommand.cpp
index 1046c1ba942f4f0dad58226397707bf59309a953..3069a6ffbd8a43f1a0d3c956d3d3190769404a07 100644 (file)
@@ -48,7 +48,7 @@ SummarySharedCommand::SummarySharedCommand(string option)  {
                
                else {
                        //valid paramters for this command
-                       string Array[] =  {"label","calc","groups","all","outputdir","inputdir"};
+                       string Array[] =  {"label","calc","groups","all","outputdir","distance","inputdir", "processors"};
                        vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                        
                        OptionParser parser(option);
@@ -104,6 +104,12 @@ SummarySharedCommand::SummarySharedCommand(string option)  {
                        string temp = validParameter.validFile(parameters, "all", false);                               if (temp == "not found") { temp = "false"; }
                        all = m->isTrue(temp);
                        
+                       temp = validParameter.validFile(parameters, "distance", false);                                 if (temp == "not found") { temp = "false"; }
+                       createPhylip = m->isTrue(temp);
+                       
+                       temp = validParameter.validFile(parameters, "processors", false);       if(temp == "not found"){        temp = "1"; }
+                       convert(temp, processors); 
+                       
                        if (abort == false) {
                        
                                validCalculator = new ValidCalculators();
@@ -157,10 +163,6 @@ SummarySharedCommand::SummarySharedCommand(string option)  {
                                        }
                                }
                                
-                               outputFileName = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + "shared.summary";
-                               m->openOutputFile(outputFileName, outputFileHandle);
-                               outputNames.push_back(outputFileName);
-                               
                                mult = false;
                        }
                }
@@ -211,6 +213,9 @@ int SummarySharedCommand::execute(){
        
                if (abort == true) { return 0; }
                
+               ofstream outputFileHandle, outAll;
+               string outputFileName = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + "shared.summary";
+               
                //if the users entered no valid calculators don't execute command
                if (sumCalculators.size() == 0) { return 0; }
                //check if any calcs can do multiples
@@ -229,18 +234,23 @@ int SummarySharedCommand::execute(){
                input = globaldata->ginput;
                lookup = input->getSharedRAbundVectors();
                string lastLabel = lookup[0]->getLabel();
-               
+       
+               /******************************************************/
+               //output headings for files
+               /******************************************************/
                //output estimator names as column headers
+               m->openOutputFile(outputFileName, outputFileHandle);
                outputFileHandle << "label" <<'\t' << "comparison" << '\t'; 
                for(int i=0;i<sumCalculators.size();i++){
                        outputFileHandle << '\t' << sumCalculators[i]->getName();
-                       if (sumCalculators[i]->getCols() == 3) {   outputFileHandle << "\tlci\thci";  }
+                       if (sumCalculators[i]->getCols() == 3) {   outputFileHandle << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci";  }
                }
                outputFileHandle << endl;
+               outputFileHandle.close();
                
                //create file and put column headers for multiple groups file
+               string outAllFileName = ((m->getRootName(globaldata->inputFileName)) + "sharedmultiple.summary");
                if (mult == true) {
-                       outAllFileName = ((m->getRootName(globaldata->inputFileName)) + "sharedmultiple.summary");
                        m->openOutputFile(outAllFileName, outAll);
                        outputNames.push_back(outAllFileName);
                        
@@ -251,6 +261,7 @@ int SummarySharedCommand::execute(){
                                }
                        }
                        outAll << endl;
+                       outAll.close();
                }
                
                if (lookup.size() < 2) { 
@@ -258,28 +269,38 @@ int SummarySharedCommand::execute(){
                        for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
                        
                        //close files and clean up
-                       outputFileHandle.close();  remove(outputFileName.c_str());
-                       if (mult == true) {  outAll.close();  remove(outAllFileName.c_str());  }
+                       remove(outputFileName.c_str());
+                       if (mult == true) { remove(outAllFileName.c_str());  }
                        return 0;
                //if you only have 2 groups you don't need a .sharedmultiple file
                }else if ((lookup.size() == 2) && (mult == true)) { 
                        mult = false;
-                       outAll.close();  
                        remove(outAllFileName.c_str());
                        outputNames.pop_back();
                }
                
                if (m->control_pressed) {
-                       if (mult) { outAll.close();  remove(outAllFileName.c_str());  }
-                       outputFileHandle.close(); remove(outputFileName.c_str()); 
+                       if (mult) {  remove(outAllFileName.c_str());  }
+                       remove(outputFileName.c_str()); 
                        delete input; globaldata->ginput = NULL;
                        for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
                        for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
                        globaldata->Groups.clear(); 
                        return 0;
                }
-                                                               
-                                                                                                               
+               /******************************************************/
+               
+               
+               /******************************************************/
+               //comparison breakup to be used by different processes later
+               numGroups = globaldata->Groups.size();
+               lines.resize(processors);
+               for (int i = 0; i < processors; i++) {
+                       lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups);
+                       lines[i].end = int (sqrt(float(i+1)/float(processors)) * numGroups);
+               }               
+               /******************************************************/
+               
                //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
                set<string> processedLabels;
                set<string> userLabels = labels;
@@ -287,8 +308,8 @@ int SummarySharedCommand::execute(){
                //as long as you are not at the end of the file or done wih the lines you want
                while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
                        if (m->control_pressed) {
-                               if (mult) { outAll.close();  remove(outAllFileName.c_str());  }
-                               outputFileHandle.close(); remove(outputFileName.c_str()); 
+                               if (mult) {  remove(outAllFileName.c_str());  }
+                               remove(outputFileName.c_str()); 
                                delete input; globaldata->ginput = NULL;
                                for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
                                for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
@@ -299,7 +320,7 @@ int SummarySharedCommand::execute(){
                
                        if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
                                m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
-                               process(lookup);
+                               process(lookup, outputFileName, outAllFileName);
                                
                                processedLabels.insert(lookup[0]->getLabel());
                                userLabels.erase(lookup[0]->getLabel());
@@ -312,7 +333,7 @@ int SummarySharedCommand::execute(){
                                        lookup = input->getSharedRAbundVectors(lastLabel);
 
                                        m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
-                                       process(lookup);
+                                       process(lookup, outputFileName, outAllFileName);
                                        
                                        processedLabels.insert(lookup[0]->getLabel());
                                        userLabels.erase(lookup[0]->getLabel());
@@ -330,8 +351,8 @@ int SummarySharedCommand::execute(){
                }
                
                if (m->control_pressed) {
-                       if (mult) { outAll.close();  remove(outAllFileName.c_str());  }
-                       outputFileHandle.close(); remove(outputFileName.c_str()); 
+                       if (mult) { remove(outAllFileName.c_str());  }
+                       remove(outputFileName.c_str()); 
                        delete input; globaldata->ginput = NULL;
                        for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
                        globaldata->Groups.clear(); 
@@ -357,7 +378,7 @@ int SummarySharedCommand::execute(){
                                lookup = input->getSharedRAbundVectors(lastLabel);
 
                                m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
-                               process(lookup);
+                               process(lookup, outputFileName, outAllFileName);
                                for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
                }
                
@@ -365,10 +386,6 @@ int SummarySharedCommand::execute(){
                //reset groups parameter
                globaldata->Groups.clear();  
                
-               //close files
-               outputFileHandle.close();
-               if (mult == true) {  outAll.close();  }
-               
                for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
                delete input;  globaldata->ginput = NULL;
                
@@ -380,6 +397,8 @@ int SummarySharedCommand::execute(){
                
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               m->mothurOut(outputFileName); m->mothurOutEndLine();    
+               if (mult) { m->mothurOut(outAllFileName); m->mothurOutEndLine();        }
                for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
                m->mothurOutEndLine();
 
@@ -392,71 +411,236 @@ int SummarySharedCommand::execute(){
 }
 
 /***********************************************************/
-int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup) {
+int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string sumFileName, string sumAllFileName) {
        try {
-                               //loop through calculators and add to file all for all calcs that can do mutiple groups
-                               if (mult == true) {
-                                       //output label
-                                       outAll << thisLookup[0]->getLabel() << '\t';
-                                       
-                                       //output groups names
-                                       string outNames = "";
-                                       for (int j = 0; j < thisLookup.size(); j++) {
-                                               outNames += thisLookup[j]->getGroup() +  "-";
+                       vector< vector<seqDist> > calcDists;  //vector containing vectors that contains the summary results for each group compare
+                       calcDists.resize(sumCalculators.size()); //one for each calc, this will be used to make .dist files
+                               
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                               if(processors == 1){
+                                       driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp", calcDists);
+                                       m->appendFiles((sumFileName + ".temp"), sumFileName);
+                                       remove((sumFileName + ".temp").c_str());
+                                       if (mult) {
+                                               m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
+                                               remove((sumAllFileName + ".temp").c_str());
                                        }
-                                       outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-';
-                                       outAll << outNames << '\t';
-                                       
-                                       for(int i=0;i<sumCalculators.size();i++){
-                                               if (sumCalculators[i]->getMultiple() == true) { 
-                                                       sumCalculators[i]->getValues(thisLookup);
+                               }else{
+                                       int process = 1;
+                                       vector<int> processIDS;
+               
+                                       //loop through and create all the processes you want
+                                       while (process != processors) {
+                                               int pid = fork();
+                                               
+                                               if (pid > 0) {
+                                                       processIDS.push_back(pid); 
+                                                       process++;
+                                               }else if (pid == 0){
+                                                       driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);   
                                                        
-                                                       if (m->control_pressed) { return 1; }
+                                                       //only do this if you want a distance file
+                                                       if (createPhylip) {
+                                                               string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(getpid()) + ".dist";
+                                                               ofstream outtemp;
+                                                               m->openOutputFile(tempdistFileName, outtemp);
+                                                               
+                                                               for (int i = 0; i < calcDists.size(); i++) {
+                                                                       outtemp << calcDists[i].size() << endl;
+                                                                       
+                                                                       for (int j = 0; j < calcDists[i].size(); j++) {
+                                                                               outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl;
+                                                                       }
+                                                               }
+                                                               outtemp.close();
+                                                       }
                                                        
-                                                       outAll << '\t';
-                                                       sumCalculators[i]->print(outAll);
-                                               }
+                                                       exit(0);
+                                               }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
                                        }
-                                       outAll << endl;
-                               }
-       
-                               int n = 1; 
-                               vector<SharedRAbundVector*> subset;
-                               for (int k = 0; k < (thisLookup.size() - 1); k++) { // pass cdd each set of groups to commpare
-                                       for (int l = n; l < thisLookup.size(); l++) {
-                                               
-                                               outputFileHandle << thisLookup[0]->getLabel() << '\t';
+                                       
+                                       //parent do your part
+                                       driver(thisLookup, lines[0].start, lines[0].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);   
+                                       m->appendFiles((sumFileName + toString(getpid()) + ".temp"), sumFileName);
+                                       remove((sumFileName + toString(getpid()) + ".temp").c_str());
+                                       if (mult) { m->appendFiles((sumAllFileName + toString(getpid()) + ".temp"), sumAllFileName); }
                                                
-                                               subset.clear(); //clear out old pair of sharedrabunds
-                                               //add new pair of sharedrabunds
-                                               subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); 
+                                       //force parent to wait until all the processes are done
+                                       for (int i = 0; i < processIDS.size(); i++) {
+                                               int temp = processIDS[i];
+                                               wait(&temp);
+                                       }
+                                       
+                                       for (int i = 0; i < processIDS.size(); i++) {
+                                               m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
+                                               remove((sumFileName + toString(processIDS[i]) + ".temp").c_str());
+                                               if (mult) {     remove((sumAllFileName + toString(processIDS[i]) + ".temp").c_str());   }
                                                
-                                               //sort groups to be alphanumeric
-                                               if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) {
-                                                       outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups
-                                               }else{
-                                                       outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups
+                                               if (createPhylip) {
+                                                       string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(processIDS[i]) +  ".dist";
+                                                       ifstream intemp;
+                                                       m->openInputFile(tempdistFileName, intemp);
+                                                       
+                                                       for (int i = 0; i < calcDists.size(); i++) {
+                                                               int size = 0;
+                                                               intemp >> size; m->gobble(intemp);
+                                                                       
+                                                               for (int j = 0; j < size; j++) {
+                                                                       int seq1 = 0;
+                                                                       int seq2 = 0;
+                                                                       float dist = 1.0;
+                                                                       
+                                                                       intemp >> seq1 >> seq2 >> dist;   m->gobble(intemp);
+                                                                       
+                                                                       seqDist tempDist(seq1, seq2, dist);
+                                                                       calcDists[i].push_back(tempDist);
+                                                               }
+                                                       }
+                                                       intemp.close();
+                                                       remove(tempdistFileName.c_str());
                                                }
-                                               
-                                               for(int i=0;i<sumCalculators.size();i++) {
+                                       }
 
-                                                       sumCalculators[i]->getValues(subset); //saves the calculator outputs
-                                                       
-                                                       if (m->control_pressed) { return 1; }
-                                                       
-                                                       outputFileHandle << '\t';
-                                                       sumCalculators[i]->print(outputFileHandle);
+                               }
+                       #else
+                               driver(thisLookup, 0, numGroups, (sumFileName + ".temp"), (sumAllFileName + ".temp"), calcDists);
+                               m->appendFiles((sumFileName + ".temp"), sumFileName);
+                               remove((sumFileName + ".temp").c_str());
+                               if (mult) {
+                                       m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
+                                       remove((sumAllFileName + ".temp").c_str());
+                               }
+                       #endif
+                       
+                       if (createPhylip) {
+                               for (int i = 0; i < calcDists.size(); i++) {
+                                       if (m->control_pressed) { break; }
+                               
+                                       string distFileName = outputDir + m->getRootName(m->getSimpleName(sumFileName)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".dist";
+                                       outputNames.push_back(distFileName);
+                                       ofstream outDist;
+                                       m->openOutputFile(distFileName, outDist);
+                                       outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
+                                       
+                                       //initialize matrix
+                                       vector< vector<float> > matrix; //square matrix to represent the distance
+                                       matrix.resize(thisLookup.size());
+                                       for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
+                                       
+                                       
+                                       for (int j = 0; j < calcDists[i].size(); j++) {
+                                               int row = calcDists[i][j].seq1;
+                                               int column = calcDists[i][j].seq2;
+                                               float dist = calcDists[i][j].dist;
+                                               
+                                               matrix[row][column] = dist;
+                                               matrix[column][row] = dist;
+                                       }
+                                       
+                                       //output to file
+                                       outDist << thisLookup.size() << endl;
+                                       for (int r=0; r<thisLookup.size(); r++) { 
+                                               //output name
+                                               string name = thisLookup[r]->getGroup();
+                                               if (name.length() < 10) { //pad with spaces to make compatible
+                                                       while (name.length() < 10) {  name += " ";  }
                                                }
-                                               outputFileHandle << endl;
+                                               outDist << name << '\t';
+                                       
+                                               //output distances
+                                               for (int l = 0; l < r; l++) {   outDist  << matrix[r][l] << '\t';  }
+                                               outDist << endl;
                                        }
-                                       n++;
+                                       
+                                       outDist.close();
                                }
-                       return 0;
+                       }
        }
        catch(exception& e) {
                m->errorOut(e, "SummarySharedCommand", "process");
                exit(1);
        }
 }
+/**************************************************************************************************/
+int SummarySharedCommand::driver(vector<SharedRAbundVector*> thisLookup, int start, int end, string sumFile, string sumAllFile, vector< vector<seqDist> >& calcDists) { 
+       try {
+               
+               //loop through calculators and add to file all for all calcs that can do mutiple groups
+               if (mult == true) {
+                       ofstream outAll;
+                       m->openOutputFile(sumAllFile, outAll);
+                       
+                       //output label
+                       outAll << thisLookup[0]->getLabel() << '\t';
+                       
+                       //output groups names
+                       string outNames = "";
+                       for (int j = 0; j < thisLookup.size(); j++) {
+                               outNames += thisLookup[j]->getGroup() +  "-";
+                       }
+                       outNames = outNames.substr(0, outNames.length()-1); //rip off extra '-';
+                       outAll << outNames << '\t';
+                       
+                       for(int i=0;i<sumCalculators.size();i++){
+                               if (sumCalculators[i]->getMultiple() == true) { 
+                                       sumCalculators[i]->getValues(thisLookup);
+                                       
+                                       if (m->control_pressed) { outAll.close(); return 1; }
+                                       
+                                       outAll << '\t';
+                                       sumCalculators[i]->print(outAll);
+                               }
+                       }
+                       outAll << endl;
+                       outAll.close();
+               }
+               
+               ofstream outputFileHandle;
+               m->openOutputFile(sumFile, outputFileHandle);
+               
+               vector<SharedRAbundVector*> subset;
+               for (int k = start; k < end; k++) { // pass cdd each set of groups to compare
+
+                       for (int l = 0; l < k; l++) {
+                               
+                               outputFileHandle << thisLookup[0]->getLabel() << '\t';
+                               
+                               subset.clear(); //clear out old pair of sharedrabunds
+                               //add new pair of sharedrabunds
+                               subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); 
+                               
+                               //sort groups to be alphanumeric
+                               if (thisLookup[k]->getGroup() > thisLookup[l]->getGroup()) {
+                                       outputFileHandle << (thisLookup[l]->getGroup() +'\t' + thisLookup[k]->getGroup()) << '\t'; //print out groups
+                               }else{
+                                       outputFileHandle << (thisLookup[k]->getGroup() +'\t' + thisLookup[l]->getGroup()) << '\t'; //print out groups
+                               }
+                               
+                               for(int i=0;i<sumCalculators.size();i++) {
+
+                                       vector<double> tempdata = sumCalculators[i]->getValues(subset); //saves the calculator outputs
+                                       
+                                       if (m->control_pressed) { outputFileHandle.close(); return 1; }
+                                       
+                                       outputFileHandle << '\t';
+                                       sumCalculators[i]->print(outputFileHandle);
+                                       
+                                       seqDist temp(l, k, tempdata[0]);
+                                       calcDists[i].push_back(temp);
+                               }
+                               outputFileHandle << endl;
+                       }
+               }
+               
+               outputFileHandle.close();
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SummarySharedCommand", "driver");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+
 
-/***********************************************************/