]> git.donarmstrong.com Git - mothur.git/blobdiff - summarysharedcommand.cpp
fixes while testing 1.33.0
[mothur.git] / summarysharedcommand.cpp
index 6db7dab85b9b51ae798aaed4df4bc581adc5db6d..59c12a03dae958f4bb199a6aad94056264c0426a 100644 (file)
@@ -8,60 +8,23 @@
  */
 
 #include "summarysharedcommand.h"
-#include "sharedsobscollectsummary.h"
-#include "sharedchao1.h"
-#include "sharedace.h"
-#include "sharednseqs.h"
-#include "sharedjabund.h"
-#include "sharedsorabund.h"
-#include "sharedjclass.h"
-#include "sharedsorclass.h"
-#include "sharedjest.h"
-#include "sharedsorest.h"
-#include "sharedthetayc.h"
-#include "sharedthetan.h"
-#include "sharedkstest.h"
-#include "whittaker.h"
-#include "sharedochiai.h"
-#include "sharedanderbergs.h"
-#include "sharedkulczynski.h"
-#include "sharedkulczynskicody.h"
-#include "sharedlennon.h"
-#include "sharedmorisitahorn.h"
-#include "sharedbraycurtis.h"
-#include "sharedjackknife.h"
-#include "whittaker.h"
-#include "odum.h"
-#include "canberra.h"
-#include "structeuclidean.h"
-#include "structchord.h"
-#include "hellinger.h"
-#include "manhattan.h"
-#include "structpearson.h"
-#include "soergel.h"
-#include "spearman.h"
-#include "structkulczynski.h"
-#include "structchi2.h"
-#include "speciesprofile.h"
-#include "hamming.h"
-#include "gower.h"
-#include "memchi2.h"
-#include "memchord.h"
-#include "memeuclidean.h"
-#include "mempearson.h"
+#include "subsample.h"
 
 //**********************************************************************************************************************
 vector<string> SummarySharedCommand::setParameters(){  
        try {
-               CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
-               CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
-               CommandParameter pdistance("distance", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pdistance);
-               CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-skulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "",true,false); parameters.push_back(pcalc);
-               CommandParameter pall("all", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pall);
-               CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
-               CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
-               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
-               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+               CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared);
+               CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
+        CommandParameter psubsample("subsample", "String", "", "", "", "", "","phylip",false,false); parameters.push_back(psubsample);
+               CommandParameter pdistance("distance", "Boolean", "", "F", "", "", "","phylip",false,false); parameters.push_back(pdistance);
+               CommandParameter pcalc("calc", "Multiple", "sharedchao-sharedsobs-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-whittaker-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-structchi2-hamming-gower-memchi2-memchord-memeuclidean-mempearson-jsd-rjsd", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan", "", "", "","",true,false,true); parameters.push_back(pcalc);
+        CommandParameter poutput("output", "Multiple", "lt-square", "lt", "", "", "","",false,false); parameters.push_back(poutput);
+               CommandParameter pall("all", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pall);
+        CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
+               CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
+               CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
                
                vector<string> myArray;
                for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
@@ -77,11 +40,14 @@ string SummarySharedCommand::getHelpString(){
        try {
                string helpString = "";
                ValidCalculators validCalculator;
-               helpString += "The summary.shared command parameters are shared, label, calc, distance, processors and all.  shared is required if there is no current sharedfile.\n";
+               helpString += "The summary.shared command parameters are shared, label, calc, distance, processors, subsample, iters and all.  shared is required if there is no current sharedfile.\n";
                helpString += "The summary.shared command should be in the following format: \n";
                helpString += "summary.shared(label=yourLabel, calc=yourEstimators, groups=yourGroups).\n";
                helpString += "Example summary.shared(label=unique-.01-.03, groups=B-C, calc=sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan).\n";
                helpString +=  validCalculator.printCalc("sharedsummary");
+        helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample.\n";
+        helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group.\n";
+        helpString += "The output parameter allows you to specify format of your distance matrix. Options are lt, and square. The default is lt.\n";
                helpString += "The default value for calc is sharedsobs-sharedchao-sharedace-jabund-sorensonabund-jclass-sorclass-jest-sorest-thetayc-thetan\n";
                helpString += "The default value for groups is all the groups in your groupfile.\n";
                helpString += "The distance parameter allows you to indicate you would like a distance file created for each calculator for each label, default=f.\n";
@@ -89,7 +55,7 @@ string SummarySharedCommand::getHelpString(){
                helpString += "The all parameter is used to specify if you want the estimate of all your groups together.  This estimate can only be made for sharedsobs and sharedchao calculators. The default is false.\n";
                helpString += "If you use sharedchao and run into memory issues, set all to false. \n";
                helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 2 valid groups.\n";
-               helpString += "Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n\n";
+               helpString += "Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n";
                return helpString;
        }
        catch(exception& e) {
@@ -98,12 +64,29 @@ string SummarySharedCommand::getHelpString(){
        }
 }
 //**********************************************************************************************************************
+string SummarySharedCommand::getOutputPattern(string type) {
+    try {
+        string pattern = "";
+        
+        if (type == "summary") {  pattern = "[filename],summary-[filename],[tag],summary"; } 
+        else if (type == "phylip") {  pattern = "[filename],[calc],[distance],[outputtag],[tag2],dist"; } 
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
+        
+        return pattern;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "SummarySharedCommand", "getOutputPattern");
+        exit(1);
+    }
+}
+//**********************************************************************************************************************
 SummarySharedCommand::SummarySharedCommand(){  
        try {
                abort = true; calledHelp = true; 
                setParameters();
                vector<string> tempOutNames;
                outputTypes["summary"] = tempOutNames;
+        outputTypes["phylip"] = tempOutNames;
        }
        catch(exception& e) {
                m->errorOut(e, "SummarySharedCommand", "SummarySharedCommand");
@@ -119,6 +102,7 @@ SummarySharedCommand::SummarySharedCommand(string option)  {
                                
                //allow user to run help
                if(option == "help") {  help(); abort = true; calledHelp = true; }
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;}
                
                else {
                        vector<string> myArray = setParameters();
@@ -137,6 +121,7 @@ SummarySharedCommand::SummarySharedCommand(string option)  {
                        //initialize outputTypes
                        vector<string> tempOutNames;
                        outputTypes["summary"] = tempOutNames;
+            outputTypes["phylip"] = tempOutNames;
                        
                        //if the user changes the input directory command factory will send this info to us in the output parameter 
                        string inputDir = validParameter.validFile(parameters, "inputdir", false);              
@@ -160,7 +145,7 @@ SummarySharedCommand::SummarySharedCommand(string option)  {
                                sharedfile = m->getSharedFile(); 
                                if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
                                else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
-                       }
+                       }else { m->setSharedFile(sharedfile); }
                        
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
@@ -183,23 +168,46 @@ SummarySharedCommand::SummarySharedCommand(string option)  {
                                 if (calc == "default")  {  calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan";  }
                        }
                        m->splitAtDash(calc, Estimators);
+                       if (m->inUsersGroups("citation", Estimators)) { 
+                               ValidCalculators validCalc; validCalc.printCitations(Estimators); 
+                               //remove citation from list of calcs
+                               for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") {  Estimators.erase(Estimators.begin()+i); break; } }
+                       }
                        
                        groups = validParameter.validFile(parameters, "groups", false);                 
                        if (groups == "not found") { groups = ""; }
                        else { 
                                m->splitAtDash(groups, Groups);
-                               m->Groups = Groups;
+                               m->setGroups(Groups);
                        }
                        
                        string temp = validParameter.validFile(parameters, "all", false);                               if (temp == "not found") { temp = "false"; }
                        all = m->isTrue(temp);
                        
-                       temp = validParameter.validFile(parameters, "distance", false);                                 if (temp == "not found") { temp = "false"; }
+            temp = validParameter.validFile(parameters, "iters", false);                       if (temp == "not found") { temp = "1000"; }
+                       m->mothurConvert(temp, iters); 
+            
+            output = validParameter.validFile(parameters, "output", false);            
+            if(output == "not found"){ output = "lt"; }
+            else { createPhylip = true; }
+                       if ((output != "lt") && (output != "square")) { m->mothurOut(output + " is not a valid output form. Options are lt and square. I will use lt."); m->mothurOutEndLine(); output = "lt"; }
+            
+            temp = validParameter.validFile(parameters, "subsample", false);           if (temp == "not found") { temp = "F"; }
+                       if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
+            else {  
+                if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; }  //we will set it to smallest group later 
+                else { subsample = false; }
+            }
+            
+            if (subsample == false) { iters = 0; }
+            
+            temp = validParameter.validFile(parameters, "distance", false);                                    if (temp == "not found") { temp = "false"; }
                        createPhylip = m->isTrue(temp);
-                       
+            if (subsample) { createPhylip = true; }
+            
                        temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
                        m->setProcessors(temp);
-                       convert(temp, processors); 
+                       m->mothurConvert(temp, processors); 
                        
                        if (abort == false) {
                        
@@ -286,6 +294,10 @@ SummarySharedCommand::SummarySharedCommand(string option)  {
                                                        sumCalculators.push_back(new MemEuclidean());
                                                }else if (Estimators[i] == "mempearson") { 
                                                        sumCalculators.push_back(new MemPearson());
+                                               }else if (Estimators[i] == "jsd") {
+                                                       sumCalculators.push_back(new JSD());
+                                               }else if (Estimators[i] == "rjsd") {
+                                                       sumCalculators.push_back(new RJSD());
                                                }
                                        }
                                }
@@ -307,7 +319,9 @@ int SummarySharedCommand::execute(){
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
                
                ofstream outputFileHandle, outAll;
-               string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "shared.summary";
+        map<string, string> variables; 
+               variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
+               string outputFileName = getOutputFileName("summary",variables);
                
                //if the users entered no valid calculators don't execute command
                if (sumCalculators.size() == 0) { return 0; }
@@ -338,7 +352,8 @@ int SummarySharedCommand::execute(){
                outputFileHandle.close();
                
                //create file and put column headers for multiple groups file
-               string outAllFileName = ((m->getRootName(sharedfile)) + "sharedmultiple.summary");
+        variables["[tag]"]= "multiple";
+               string outAllFileName = getOutputFileName("summary",variables);
                if (mult == true) {
                        m->openOutputFile(outAllFileName, outAll);
                        outputNames.push_back(outAllFileName);
@@ -358,31 +373,58 @@ int SummarySharedCommand::execute(){
                        for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
                        
                        //close files and clean up
-                       remove(outputFileName.c_str());
-                       if (mult == true) { remove(outAllFileName.c_str());  }
+                       m->mothurRemove(outputFileName);
+                       if (mult == true) { m->mothurRemove(outAllFileName);  }
                        return 0;
                //if you only have 2 groups you don't need a .sharedmultiple file
                }else if ((lookup.size() == 2) && (mult == true)) { 
                        mult = false;
-                       remove(outAllFileName.c_str());
+                       m->mothurRemove(outAllFileName);
                        outputNames.pop_back();
                }
                
                if (m->control_pressed) {
-                       if (mult) {  remove(outAllFileName.c_str());  }
-                       remove(outputFileName.c_str()); 
+                       if (mult) {  m->mothurRemove(outAllFileName);  }
+                       m->mothurRemove(outputFileName); 
                        delete input;
                        for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
                        for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
-                       m->Groups.clear(); 
+                       m->clearGroups(); 
                        return 0;
                }
                /******************************************************/
-               
+        if (subsample) { 
+            if (subsampleSize == -1) { //user has not set size, set size = smallest samples size
+                subsampleSize = lookup[0]->getNumSeqs();
+                for (int i = 1; i < lookup.size(); i++) {
+                    int thisSize = lookup[i]->getNumSeqs();
+                    
+                    if (thisSize < subsampleSize) {    subsampleSize = thisSize;       }
+                }
+            }else {
+                m->clearGroups();
+                Groups.clear();
+                vector<SharedRAbundVector*> temp;
+                for (int i = 0; i < lookup.size(); i++) {
+                    if (lookup[i]->getNumSeqs() < subsampleSize) { 
+                        m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
+                        delete lookup[i];
+                    }else { 
+                        Groups.push_back(lookup[i]->getGroup()); 
+                        temp.push_back(lookup[i]);
+                    }
+                } 
+                lookup = temp;
+                m->setGroups(Groups);
+            }
+            
+            if (lookup.size() < 2) { m->mothurOut("You have not provided enough valid groups.  I cannot run the command."); m->mothurOutEndLine(); m->control_pressed = true; delete input; return 0; }
+        }
+
                
                /******************************************************/
                //comparison breakup to be used by different processes later
-               numGroups = m->Groups.size();
+               numGroups = lookup.size();
                lines.resize(processors);
                for (int i = 0; i < processors; i++) {
                        lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups);
@@ -397,12 +439,12 @@ int SummarySharedCommand::execute(){
                //as long as you are not at the end of the file or done wih the lines you want
                while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
                        if (m->control_pressed) {
-                               if (mult) {  remove(outAllFileName.c_str());  }
-                               remove(outputFileName.c_str()); 
+                               if (mult) {  m->mothurRemove(outAllFileName);  }
+                               m->mothurRemove(outputFileName); 
                                delete input; 
                                for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
                                for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
-                               m->Groups.clear(); 
+                               m->clearGroups(); 
                                return 0;
                        }
 
@@ -440,11 +482,11 @@ int SummarySharedCommand::execute(){
                }
                
                if (m->control_pressed) {
-                       if (mult) { remove(outAllFileName.c_str());  }
-                       remove(outputFileName.c_str()); 
+                       if (mult) { m->mothurRemove(outAllFileName);  }
+                       m->mothurRemove(outputFileName); 
                        delete input; 
                        for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
-                       m->Groups.clear(); 
+                       m->clearGroups(); 
                        return 0;
                }
 
@@ -473,14 +515,14 @@ int SummarySharedCommand::execute(){
                
                                
                //reset groups parameter
-               m->Groups.clear();  
+               m->clearGroups();  
                
                for(int i=0;i<sumCalculators.size();i++){  delete sumCalculators[i]; }
                delete input;  
                
                if (m->control_pressed) {
-                       remove(outAllFileName.c_str());  
-                       remove(outputFileName.c_str()); 
+                       m->mothurRemove(outAllFileName);  
+                       m->mothurRemove(outputFileName); 
                        return 0;
                }
                
@@ -498,157 +540,343 @@ int SummarySharedCommand::execute(){
                exit(1);
        }
 }
-
 /***********************************************************/
-int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string sumFileName, string sumAllFileName) {
+int SummarySharedCommand::printSims(ostream& out, vector< vector<double> >& simMatrix) {
        try {
-                       vector< vector<seqDist> > calcDists;  //vector containing vectors that contains the summary results for each group compare
-                       calcDists.resize(sumCalculators.size()); //one for each calc, this will be used to make .dist files
-                               
-                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-                               if(processors == 1){
-                                       driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp", calcDists);
-                                       m->appendFiles((sumFileName + ".temp"), sumFileName);
-                                       remove((sumFileName + ".temp").c_str());
-                                       if (mult) {
-                                               m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
-                                               remove((sumAllFileName + ".temp").c_str());
-                                       }
-                               }else{
-                                       int process = 1;
-                                       vector<int> processIDS;
                
-                                       //loop through and create all the processes you want
-                                       while (process != processors) {
-                                               int pid = fork();
-                                               
-                                               if (pid > 0) {
-                                                       processIDS.push_back(pid); 
-                                                       process++;
-                                               }else if (pid == 0){
-                                                       driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);   
-                                                       
-                                                       //only do this if you want a distance file
-                                                       if (createPhylip) {
-                                                               string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(getpid()) + ".dist";
-                                                               ofstream outtemp;
-                                                               m->openOutputFile(tempdistFileName, outtemp);
-                                                               
-                                                               for (int i = 0; i < calcDists.size(); i++) {
-                                                                       outtemp << calcDists[i].size() << endl;
-                                                                       
-                                                                       for (int j = 0; j < calcDists[i].size(); j++) {
-                                                                               outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl;
-                                                                       }
-                                                               }
-                                                               outtemp.close();
-                                                       }
-                                                       
-                                                       exit(0);
-                                               }else { 
-                                                       m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
-                                                       for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
-                                                       exit(0);
-                                               }
-                                       }
-                                       
-                                       //parent do your part
-                                       driver(thisLookup, lines[0].start, lines[0].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);   
-                                       m->appendFiles((sumFileName + toString(getpid()) + ".temp"), sumFileName);
-                                       remove((sumFileName + toString(getpid()) + ".temp").c_str());
-                                       if (mult) { m->appendFiles((sumAllFileName + toString(getpid()) + ".temp"), sumAllFileName); }
-                                               
-                                       //force parent to wait until all the processes are done
-                                       for (int i = 0; i < processIDS.size(); i++) {
-                                               int temp = processIDS[i];
-                                               wait(&temp);
-                                       }
-                                       
-                                       for (int i = 0; i < processIDS.size(); i++) {
-                                               m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
-                                               remove((sumFileName + toString(processIDS[i]) + ".temp").c_str());
-                                               if (mult) {     remove((sumAllFileName + toString(processIDS[i]) + ".temp").c_str());   }
-                                               
-                                               if (createPhylip) {
-                                                       string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(processIDS[i]) +  ".dist";
-                                                       ifstream intemp;
-                                                       m->openInputFile(tempdistFileName, intemp);
-                                                       
-                                                       for (int i = 0; i < calcDists.size(); i++) {
-                                                               int size = 0;
-                                                               intemp >> size; m->gobble(intemp);
-                                                                       
-                                                               for (int j = 0; j < size; j++) {
-                                                                       int seq1 = 0;
-                                                                       int seq2 = 0;
-                                                                       float dist = 1.0;
-                                                                       
-                                                                       intemp >> seq1 >> seq2 >> dist;   m->gobble(intemp);
-                                                                       
-                                                                       seqDist tempDist(seq1, seq2, dist);
-                                                                       calcDists[i].push_back(tempDist);
-                                                               }
-                                                       }
-                                                       intemp.close();
-                                                       remove(tempdistFileName.c_str());
-                                               }
-                                       }
-
-                               }
-                       #else
-                               driver(thisLookup, 0, numGroups, (sumFileName + ".temp"), (sumAllFileName + ".temp"), calcDists);
-                               m->appendFiles((sumFileName + ".temp"), sumFileName);
-                               remove((sumFileName + ".temp").c_str());
-                               if (mult) {
-                                       m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
-                                       remove((sumAllFileName + ".temp").c_str());
+               out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
+               
+               //output num seqs
+               out << simMatrix.size() << endl;
+               
+               if (output == "lt") {
+                       for (int b = 0; b < simMatrix.size(); b++)      {
+                               out << lookup[b]->getGroup() << '\t';
+                               for (int n = 0; n < b; n++)     {
+                    if (m->control_pressed) { return 0; }
+                                       out << simMatrix[b][n] << '\t'; 
                                }
-                       #endif
-                       
-                       if (createPhylip) {
-                               for (int i = 0; i < calcDists.size(); i++) {
-                                       if (m->control_pressed) { break; }
-                               
-                                       string distFileName = outputDir + m->getRootName(m->getSimpleName(sumFileName)) + sumCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".dist";
-                                       outputNames.push_back(distFileName);
-                                       ofstream outDist;
-                                       m->openOutputFile(distFileName, outDist);
-                                       outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
-                                       
-                                       //initialize matrix
-                                       vector< vector<float> > matrix; //square matrix to represent the distance
-                                       matrix.resize(thisLookup.size());
-                                       for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
-                                       
-                                       
-                                       for (int j = 0; j < calcDists[i].size(); j++) {
-                                               int row = calcDists[i][j].seq1;
-                                               int column = calcDists[i][j].seq2;
-                                               float dist = calcDists[i][j].dist;
-                                               
-                                               matrix[row][column] = dist;
-                                               matrix[column][row] = dist;
-                                       }
-                                       
-                                       //output to file
-                                       outDist << thisLookup.size() << endl;
-                                       for (int r=0; r<thisLookup.size(); r++) { 
-                                               //output name
-                                               string name = thisLookup[r]->getGroup();
-                                               if (name.length() < 10) { //pad with spaces to make compatible
-                                                       while (name.length() < 10) {  name += " ";  }
-                                               }
-                                               outDist << name << '\t';
-                                       
-                                               //output distances
-                                               for (int l = 0; l < r; l++) {   outDist  << matrix[r][l] << '\t';  }
-                                               outDist << endl;
-                                       }
-                                       
-                                       outDist.close();
+                               out << endl;
+                       }
+               }else{
+                       for (int b = 0; b < simMatrix.size(); m++)      {
+                               out << lookup[b]->getGroup() << '\t';
+                               for (int n = 0; n < simMatrix[b].size(); n++)   {
+                    if (m->control_pressed) { return 0; }
+                                       out << simMatrix[b][n] << '\t'; 
                                }
+                               out << endl;
                        }
-               return 0;
+               }
+        
+        return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SummarySharedCommand", "printSims");
+               exit(1);
+       }
+}
+/***********************************************************/
+int SummarySharedCommand::process(vector<SharedRAbundVector*> thisLookup, string sumFileName, string sumAllFileName) {
+       try {
+        vector< vector< vector<seqDist> > > calcDistsTotals;  //each iter, one for each calc, then each groupCombos dists. this will be used to make .dist files
+        vector< vector<seqDist>  > calcDists; calcDists.resize(sumCalculators.size());                 
+        
+        for (int thisIter = 0; thisIter < iters+1; thisIter++) {
+            
+            vector<SharedRAbundVector*> thisItersLookup = thisLookup;
+            
+            if (subsample && (thisIter != 0)) { //we want the summary results for the whole dataset, then the subsampling
+                SubSample sample;
+                vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds
+                
+                //make copy of lookup so we don't get access violations
+                vector<SharedRAbundVector*> newLookup;
+                for (int k = 0; k < thisItersLookup.size(); k++) {
+                    SharedRAbundVector* temp = new SharedRAbundVector();
+                    temp->setLabel(thisItersLookup[k]->getLabel());
+                    temp->setGroup(thisItersLookup[k]->getGroup());
+                    newLookup.push_back(temp);
+                }
+                
+                //for each bin
+                for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
+                    if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
+                    for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
+                }
+                
+                tempLabels = sample.getSample(newLookup, subsampleSize);
+                thisItersLookup = newLookup;
+            }
+        
+            
+            if(processors == 1){
+                driver(thisLookup, 0, numGroups, sumFileName+".temp", sumAllFileName+".temp", calcDists);
+                m->appendFiles((sumFileName + ".temp"), sumFileName);
+                m->mothurRemove((sumFileName + ".temp"));
+                if (mult) {
+                    m->appendFiles((sumAllFileName + ".temp"), sumAllFileName);
+                    m->mothurRemove((sumAllFileName + ".temp"));
+                }
+            }else{
+                
+                int process = 1;
+                vector<int> processIDS;
+                
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+                //loop through and create all the processes you want
+                while (process != processors) {
+                    int pid = fork();
+                    
+                    if (pid > 0) {
+                        processIDS.push_back(pid); 
+                        process++;
+                    }else if (pid == 0){
+                        driver(thisLookup, lines[process].start, lines[process].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);   
+                        
+                        //only do this if you want a distance file
+                        if (createPhylip) {
+                            string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(getpid()) + ".dist";
+                            ofstream outtemp;
+                            m->openOutputFile(tempdistFileName, outtemp);
+                            
+                            for (int i = 0; i < calcDists.size(); i++) {
+                                outtemp << calcDists[i].size() << endl;
+                                
+                                for (int j = 0; j < calcDists[i].size(); j++) {
+                                    outtemp << calcDists[i][j].seq1 << '\t' << calcDists[i][j].seq2 << '\t' << calcDists[i][j].dist << endl;
+                                }
+                            }
+                            outtemp.close();
+                        }
+                        
+                        exit(0);
+                    }else { 
+                        m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
+                        for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+                        exit(0);
+                    }
+                }
+                
+                //parent do your part
+                driver(thisLookup, lines[0].start, lines[0].end, sumFileName + toString(getpid()) + ".temp", sumAllFileName + toString(getpid()) + ".temp", calcDists);   
+                m->appendFiles((sumFileName + toString(getpid()) + ".temp"), sumFileName);
+                m->mothurRemove((sumFileName + toString(getpid()) + ".temp"));
+                if (mult) { m->appendFiles((sumAllFileName + toString(getpid()) + ".temp"), sumAllFileName); }
+                
+                //force parent to wait until all the processes are done
+                for (int i = 0; i < processIDS.size(); i++) {
+                    int temp = processIDS[i];
+                    wait(&temp);
+                }
+                
+                for (int i = 0; i < processIDS.size(); i++) {
+                    m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
+                    m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
+                    if (mult) {        m->mothurRemove((sumAllFileName + toString(processIDS[i]) + ".temp"));  }
+                    
+                    if (createPhylip) {
+                        string tempdistFileName = m->getRootName(m->getSimpleName(sumFileName)) + toString(processIDS[i]) +  ".dist";
+                        ifstream intemp;
+                        m->openInputFile(tempdistFileName, intemp);
+                        
+                        for (int k = 0; k < calcDists.size(); k++) {
+                            int size = 0;
+                            intemp >> size; m->gobble(intemp);
+                            
+                            for (int j = 0; j < size; j++) {
+                                int seq1 = 0;
+                                int seq2 = 0;
+                                float dist = 1.0;
+                                
+                                intemp >> seq1 >> seq2 >> dist;   m->gobble(intemp);
+                                
+                                seqDist tempDist(seq1, seq2, dist);
+                                calcDists[k].push_back(tempDist);
+                            }
+                        }
+                        intemp.close();
+                        m->mothurRemove(tempdistFileName);
+                    }
+                }
+#else
+                //////////////////////////////////////////////////////////////////////////////////////////////////////
+                //Windows version shared memory, so be careful when passing variables through the summarySharedData struct. 
+                //Above fork() will clone, so memory is separate, but that's not the case with windows, 
+                //Taking advantage of shared memory to pass results vectors.
+                //////////////////////////////////////////////////////////////////////////////////////////////////////
+                
+                vector<summarySharedData*> pDataArray; 
+                DWORD   dwThreadIdArray[processors-1];
+                HANDLE  hThreadArray[processors-1];
+                
+                //Create processor worker threads.
+                for( int i=1; i<processors; i++ ){
+                    
+                    //make copy of lookup so we don't get access violations
+                    vector<SharedRAbundVector*> newLookup;
+                    for (int k = 0; k < thisLookup.size(); k++) {
+                        SharedRAbundVector* temp = new SharedRAbundVector();
+                        temp->setLabel(thisLookup[k]->getLabel());
+                        temp->setGroup(thisLookup[k]->getGroup());
+                        newLookup.push_back(temp);
+                    }
+                
+                    
+                    //for each bin
+                    for (int k = 0; k < thisLookup[0]->getNumBins(); k++) {
+                        if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
+                        for (int j = 0; j < thisLookup.size(); j++) { newLookup[j]->push_back(thisLookup[j]->getAbundance(k), thisLookup[j]->getGroup()); }
+                    }
+                    
+                    // Allocate memory for thread data.
+                    summarySharedData* tempSum = new summarySharedData((sumFileName+toString(i)+".temp"), m, lines[i].start, lines[i].end, Estimators, newLookup);
+                    pDataArray.push_back(tempSum);
+                    processIDS.push_back(i);
+                    
+                    hThreadArray[i-1] = CreateThread(NULL, 0, MySummarySharedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);   
+                }
+                
+                //parent do your part
+                driver(thisLookup, lines[0].start, lines[0].end, sumFileName +"0.temp", sumAllFileName + "0.temp", calcDists);   
+                m->appendFiles((sumFileName + "0.temp"), sumFileName);
+                m->mothurRemove((sumFileName + "0.temp"));
+                if (mult) { m->appendFiles((sumAllFileName + "0.temp"), sumAllFileName); }
+                
+                //Wait until all threads have terminated.
+                WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+                
+                //Close all thread handles and free memory allocations.
+                for(int i=0; i < pDataArray.size(); i++){
+                    if (pDataArray[i]->count != (pDataArray[i]->end-pDataArray[i]->start)) {
+                        m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end-pDataArray[i]->start) + " groups assigned to it, quitting. \n"); m->control_pressed = true; 
+                    }
+                    m->appendFiles((sumFileName + toString(processIDS[i]) + ".temp"), sumFileName);
+                    m->mothurRemove((sumFileName + toString(processIDS[i]) + ".temp"));
+                    
+                    for (int j = 0; j < pDataArray[i]->thisLookup.size(); j++) {  delete pDataArray[i]->thisLookup[j];  } 
+                    
+                    if (createPhylip) {
+                        for (int k = 0; k < calcDists.size(); k++) {
+                            int size = pDataArray[i]->calcDists[k].size();
+                            for (int j = 0; j < size; j++) {    calcDists[k].push_back(pDataArray[i]->calcDists[k][j]);    }
+                        }
+                    }
+                    
+                    CloseHandle(hThreadArray[i]);
+                    delete pDataArray[i];
+                }
+                
+#endif
+            }
+            
+            if (subsample && (thisIter != 0)) { //we want the summary results for the whole dataset, then the subsampling
+                
+                calcDistsTotals.push_back(calcDists); 
+                //clean up memory
+                for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
+                thisItersLookup.clear();
+            }else {
+                if (createPhylip) {
+                    for (int i = 0; i < calcDists.size(); i++) {
+                        if (m->control_pressed) { break; }
+                        
+                        //initialize matrix
+                        vector< vector<double> > matrix; //square matrix to represent the distance
+                        matrix.resize(thisLookup.size());
+                        for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
+                        
+                        for (int j = 0; j < calcDists[i].size(); j++) {
+                            int row = calcDists[i][j].seq1;
+                            int column = calcDists[i][j].seq2;
+                            double dist = calcDists[i][j].dist;
+                            
+                            matrix[row][column] = dist;
+                            matrix[column][row] = dist;
+                        }
+                        
+                        map<string, string> variables; 
+                        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
+                        variables["[calc]"] = sumCalculators[i]->getName();
+                        variables["[distance]"] = thisLookup[0]->getLabel();
+                        variables["[outputtag]"] = output;
+                        variables["[tag2]"] = "";
+                        string distFileName = getOutputFileName("phylip",variables);
+                        outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
+                        ofstream outDist;
+                        m->openOutputFile(distFileName, outDist);
+                        outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint);
+                        
+                        printSims(outDist, matrix);
+                        
+                        outDist.close();
+                    }
+                }
+            }
+            for (int i = 0; i < calcDists.size(); i++) {  calcDists[i].clear(); }
+               }
+
+        if (iters != 0) {
+            //we need to find the average distance and standard deviation for each groups distance
+            vector< vector<seqDist>  > calcAverages = m->getAverages(calcDistsTotals);
+            
+            //find standard deviation
+            vector< vector<seqDist>  > stdDev = m->getStandardDeviation(calcDistsTotals, calcAverages); 
+            
+            //print results
+            for (int i = 0; i < calcDists.size(); i++) {
+                vector< vector<double> > matrix; //square matrix to represent the distance
+                matrix.resize(thisLookup.size());
+                for (int k = 0; k < thisLookup.size(); k++) {  matrix[k].resize(thisLookup.size(), 0.0); }
+                
+                vector< vector<double> > stdmatrix; //square matrix to represent the stdDev
+                stdmatrix.resize(thisLookup.size());
+                for (int k = 0; k < thisLookup.size(); k++) {  stdmatrix[k].resize(thisLookup.size(), 0.0); }
+                
+                
+                for (int j = 0; j < calcAverages[i].size(); j++) {
+                    int row = calcAverages[i][j].seq1;
+                    int column = calcAverages[i][j].seq2;
+                    float dist = calcAverages[i][j].dist;
+                    float stdDist = stdDev[i][j].dist;
+                    
+                    matrix[row][column] = dist;
+                    matrix[column][row] = dist;
+                    stdmatrix[row][column] = stdDist;
+                    stdmatrix[column][row] = stdDist;
+                }
+                
+                map<string, string> variables; 
+                variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
+                variables["[calc]"] = sumCalculators[i]->getName();
+                variables["[distance]"] = thisLookup[0]->getLabel();
+                variables["[outputtag]"] = output;
+                variables["[tag2]"] = "ave";
+                string distFileName = getOutputFileName("phylip",variables);
+                outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
+                ofstream outAve;
+                m->openOutputFile(distFileName, outAve);
+                outAve.setf(ios::fixed, ios::floatfield); outAve.setf(ios::showpoint);
+                
+                printSims(outAve, matrix);
+                
+                outAve.close();
+                
+                variables["[tag2]"] = "std";
+                distFileName = getOutputFileName("phylip",variables);
+                outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName);
+                ofstream outSTD;
+                m->openOutputFile(distFileName, outSTD);
+                outSTD.setf(ios::fixed, ios::floatfield); outSTD.setf(ios::showpoint);
+                
+                printSims(outSTD, stdmatrix);
+                
+                outSTD.close();
+                
+            }
+        }
+        
+        return 0;
        }
        catch(exception& e) {
                m->errorOut(e, "SummarySharedCommand", "process");
@@ -727,7 +955,7 @@ int SummarySharedCommand::driver(vector<SharedRAbundVector*> thisLookup, int sta
                                        outputFileHandle << '\t';
                                        sumCalculators[i]->print(outputFileHandle);
                                        
-                                       seqDist temp(l, k, (1.0 - tempdata[0]));
+                                       seqDist temp(l, k, tempdata[0]);
                                        calcDists[i].push_back(temp);
                                }
                                outputFileHandle << endl;