]> git.donarmstrong.com Git - mothur.git/blobdiff - summarysharedcommand.cpp
fixing helps
[mothur.git] / summarysharedcommand.cpp
index e43c99708cd81a231286c10dc1c12d9035059fce..32a9f7c2ebee2019ed3421eba21d42c4197087d0 100644 (file)
 #include "sharedjackknife.h"
 #include "whittaker.h"
 
-
+//**********************************************************************************************************************
+vector<string> SummarySharedCommand::getValidParameters(){     
+       try {
+               string Array[] =  {"label","calc","groups","all","outputdir","distance","inputdir", "processors"};
+               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SummarySharedCommand", "getValidParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+SummarySharedCommand::SummarySharedCommand(){  
+       try {
+               abort = true;
+               //initialize outputTypes
+               vector<string> tempOutNames;
+               outputTypes["summary"] = tempOutNames;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> SummarySharedCommand::getRequiredParameters(){  
+       try {
+               vector<string> myArray;
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SummarySharedCommand", "getRequiredParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> SummarySharedCommand::getRequiredFiles(){       
+       try {
+               string Array[] =  {"shared"};
+               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SummarySharedCommand", "getRequiredFiles");
+               exit(1);
+       }
+}
 //**********************************************************************************************************************
 
 SummarySharedCommand::SummarySharedCommand(string option)  {
@@ -48,7 +95,7 @@ SummarySharedCommand::SummarySharedCommand(string option)  {
                
                else {
                        //valid paramters for this command
-                       string Array[] =  {"label","calc","groups","all","outputdir","inputdir", "processors"};
+                       string Array[] =  {"label","calc","groups","all","outputdir","distance","inputdir", "processors"};
                        vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                        
                        OptionParser parser(option);
@@ -66,6 +113,10 @@ SummarySharedCommand::SummarySharedCommand(string option)  {
                                 m->mothurOut("You must read a list and a group, or a shared before you can use the summary.shared command."); m->mothurOutEndLine(); abort = true; 
                        }
                        
+                       //initialize outputTypes
+                       vector<string> tempOutNames;
+                       outputTypes["summary"] = tempOutNames;
+                       
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
                                outputDir = ""; 
@@ -104,6 +155,9 @@ SummarySharedCommand::SummarySharedCommand(string option)  {
                        string temp = validParameter.validFile(parameters, "all", false);                               if (temp == "not found") { temp = "false"; }
                        all = m->isTrue(temp);
                        
+                       temp = validParameter.validFile(parameters, "distance", false);                                 if (temp == "not found") { temp = "false"; }
+                       createPhylip = m->isTrue(temp);
+                       
                        temp = validParameter.validFile(parameters, "processors", false);       if(temp == "not found"){        temp = "1"; }
                        convert(temp, processors); 
                        
@@ -175,13 +229,14 @@ SummarySharedCommand::SummarySharedCommand(string option)  {
 void SummarySharedCommand::help(){
        try {
                m->mothurOut("The summary.shared command can only be executed after a successful read.otu command.\n");
-               m->mothurOut("The summary.shared command parameters are label, calc and all.  No parameters are required.\n");
+               m->mothurOut("The summary.shared command parameters are label, calc, distance and all.  No parameters are required.\n");
                m->mothurOut("The summary.shared command should be in the following format: \n");
                m->mothurOut("summary.shared(label=yourLabel, calc=yourEstimators, groups=yourGroups).\n");
                m->mothurOut("Example summary.shared(label=unique-.01-.03, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n");
                validCalculator->printCalc("sharedsummary", cout);
                m->mothurOut("The default value for calc is sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan\n");
                m->mothurOut("The default value for groups is all the groups in your groupfile.\n");
+               m->mothurOut("The distance parameter allows you to indicate you would like a distance file created for each calculator for each label, default=f.\n");
                m->mothurOut("The label parameter is used to analyze specific labels in your input.\n");
                m->mothurOut("The all parameter is used to specify if you want the estimate of all your groups together.  This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n");
                m->mothurOut("If you use sharedchao and run into memory issues, set all to false. \n");
@@ -231,7 +286,7 @@ int SummarySharedCommand::execute(){
                input = globaldata->ginput;
                lookup = input->getSharedRAbundVectors();
                string lastLabel = lookup[0]->getLabel();
-               
+       
                /******************************************************/
                //output headings for files
                /******************************************************/
@@ -395,7 +450,8 @@ int SummarySharedCommand::execute(){
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
                m->mothurOut(outputFileName); m->mothurOutEndLine();    
-               if (mult) { m->mothurOut(outAllFileName); m->mothurOutEndLine();        }
+               if (mult) { m->mothurOut(outAllFileName); m->mothurOutEndLine();        outputTypes["summary"].push_back(outAllFileName); }
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    } outputTypes["summary"].push_back(outputFileName);
                m->mothurOutEndLine();
 
                return 0;
@@ -409,10 +465,12 @@ int SummarySharedCommand::execute(){
 /***********************************************************/
 int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string sumFileName, string sumAllFileName) {
        try {
+                       vector< vector<seqDist> > calcDists;  //vector containing vectors that contains the summary results for each group compare
+                       calcDists.resize(sumCalculators.size()); //one for each calc, this will be used to make .dist files
                                
                        #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                                if(processors == 1){
-                                       driver(thisLookup, 0, numGroups, sumFileName, sumAllFileName);
+                                       driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp", calcDists);
                                        m->appendFiles((sumFileName + ".temp"), sumFileName);
                                        remove((sumFileName + ".temp").c_str());
                                        if (mult) {
@@ -420,7 +478,7 @@ int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string
                                                remove((sumAllFileName + ".temp").c_str());
                                        }
                                }else{
-                                       int process = 0;
+                                       int process = 1;
                                        vector<int> processIDS;
                
                                        //loop through and create all the processes you want
@@ -431,11 +489,34 @@ int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string
                                                        processIDS.push_back(pid); 
                                                        process++;
                                                }else if (pid == 0){
-                                                       driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp");   
+                                                       driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);   
+                                                       
+                                                       //only do this if you want a distance file
+                                                       if (createPhylip) {
+                                                               string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(getpid()) + ".dist";
+                                                               ofstream outtemp;
+                                                               m->openOutputFile(tempdistFileName, outtemp);
+                                                               
+                                                               for (int i = 0; i < calcDists.size(); i++) {
+                                                                       outtemp << calcDists[i].size() << endl;
+                                                                       
+                                                                       for (int j = 0; j < calcDists[i].size(); j++) {
+                                                                               outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl;
+                                                                       }
+                                                               }
+                                                               outtemp.close();
+                                                       }
+                                                       
                                                        exit(0);
                                                }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
                                        }
-                               
+                                       
+                                       //parent do your part
+                                       driver(thisLookup, lines[0].start, lines[0].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);   
+                                       m->appendFiles((sumFileName + toString(getpid()) + ".temp"), sumFileName);
+                                       remove((sumFileName + toString(getpid()) + ".temp").c_str());
+                                       if (mult) { m->appendFiles((sumAllFileName + toString(getpid()) + ".temp"), sumAllFileName); }
+                                               
                                        //force parent to wait until all the processes are done
                                        for (int i = 0; i < processIDS.size(); i++) {
                                                int temp = processIDS[i];
@@ -445,15 +526,36 @@ int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string
                                        for (int i = 0; i < processIDS.size(); i++) {
                                                m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
                                                remove((sumFileName + toString(processIDS[i]) + ".temp").c_str());
-                                               if (mult) {
-                                                       if (i == 0) {  m->appendFiles((sumAllFileName + toString(processIDS[i]) + ".temp"), sumAllFileName);  }
-                                                       remove((sumAllFileName + toString(processIDS[i]) + ".temp").c_str());
+                                               if (mult) {     remove((sumAllFileName + toString(processIDS[i]) + ".temp").c_str());   }
+                                               
+                                               if (createPhylip) {
+                                                       string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(processIDS[i]) +  ".dist";
+                                                       ifstream intemp;
+                                                       m->openInputFile(tempdistFileName, intemp);
+                                                       
+                                                       for (int i = 0; i < calcDists.size(); i++) {
+                                                               int size = 0;
+                                                               intemp >> size; m->gobble(intemp);
+                                                                       
+                                                               for (int j = 0; j < size; j++) {
+                                                                       int seq1 = 0;
+                                                                       int seq2 = 0;
+                                                                       float dist = 1.0;
+                                                                       
+                                                                       intemp >> seq1 >> seq2 >> dist;   m->gobble(intemp);
+                                                                       
+                                                                       seqDist tempDist(seq1, seq2, dist);
+                                                                       calcDists[i].push_back(tempDist);
+                                                               }
+                                                       }
+                                                       intemp.close();
+                                                       remove(tempdistFileName.c_str());
                                                }
                                        }
 
                                }
                        #else
-                               driver(thisLookup, 0, numGroups, (sumFileName + ".temp"), (sumAllFileName + ".temp"));
+                               driver(thisLookup, 0, numGroups, (sumFileName + ".temp"), (sumAllFileName + ".temp"), calcDists);
                                m->appendFiles((sumFileName + ".temp"), sumFileName);
                                remove((sumFileName + ".temp").c_str());
                                if (mult) {
@@ -461,6 +563,50 @@ int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string
                                        remove((sumAllFileName + ".temp").c_str());
                                }
                        #endif
+                       
+                       if (createPhylip) {
+                               for (int i = 0; i < calcDists.size(); i++) {
+                                       if (m->control_pressed) { break; }
+                               
+                                       string distFileName = outputDir + m->getRootName(m->getSimpleName(sumFileName)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".dist";
+                                       outputNames.push_back(distFileName);
+                                       ofstream outDist;
+                                       m->openOutputFile(distFileName, outDist);
+                                       outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
+                                       
+                                       //initialize matrix
+                                       vector< vector<float> > matrix; //square matrix to represent the distance
+                                       matrix.resize(thisLookup.size());
+                                       for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
+                                       
+                                       
+                                       for (int j = 0; j < calcDists[i].size(); j++) {
+                                               int row = calcDists[i][j].seq1;
+                                               int column = calcDists[i][j].seq2;
+                                               float dist = calcDists[i][j].dist;
+                                               
+                                               matrix[row][column] = dist;
+                                               matrix[column][row] = dist;
+                                       }
+                                       
+                                       //output to file
+                                       outDist << thisLookup.size() << endl;
+                                       for (int r=0; r<thisLookup.size(); r++) { 
+                                               //output name
+                                               string name = thisLookup[r]->getGroup();
+                                               if (name.length() < 10) { //pad with spaces to make compatible
+                                                       while (name.length() < 10) {  name += " ";  }
+                                               }
+                                               outDist << name << '\t';
+                                       
+                                               //output distances
+                                               for (int l = 0; l < r; l++) {   outDist  << matrix[r][l] << '\t';  }
+                                               outDist << endl;
+                                       }
+                                       
+                                       outDist.close();
+                               }
+                       }
        }
        catch(exception& e) {
                m->errorOut(e, "SummarySharedCommand", "process");
@@ -468,7 +614,7 @@ int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string
        }
 }
 /**************************************************************************************************/
-int SummarySharedCommand::driver(vector<SharedRAbundVector*> thisLookup, int start, int end, string sumFile, string sumAllFile) { 
+int SummarySharedCommand::driver(vector<SharedRAbundVector*> thisLookup, int start, int end, string sumFile, string sumAllFile, vector< vector<seqDist> >& calcDists) { 
        try {
                
                //loop through calculators and add to file all for all calcs that can do mutiple groups
@@ -524,12 +670,15 @@ int SummarySharedCommand::driver(vector<SharedRAbundVector*> thisLookup, int sta
                                
                                for(int i=0;i<sumCalculators.size();i++) {
 
-                                       sumCalculators[i]->getValues(subset); //saves the calculator outputs
+                                       vector<double> tempdata = sumCalculators[i]->getValues(subset); //saves the calculator outputs
                                        
                                        if (m->control_pressed) { outputFileHandle.close(); return 1; }
                                        
                                        outputFileHandle << '\t';
                                        sumCalculators[i]->print(outputFileHandle);
+                                       
+                                       seqDist temp(l, k, tempdata[0]);
+                                       calcDists[i].push_back(temp);
                                }
                                outputFileHandle << endl;
                        }