]> git.donarmstrong.com Git - mothur.git/blobdiff - getmetacommunitycommand.cpp
added mothurgetpid function. fixed bug with align.seqs related to g++ 4.8 change...
[mothur.git] / getmetacommunitycommand.cpp
index 8f78ca2a45fce56f6ef8996c880b48677ae32357..288e5ca2e0f84daf6f92eeba2a2b4a93943b596c 100644 (file)
@@ -7,7 +7,10 @@
 //
 
 #include "getmetacommunitycommand.h"
-
+#include "communitytype.h"
+#include "kmeans.h"
+#include "validcalculator.h"
+#include "subsample.h"
 
 //**********************************************************************************************************************
 vector<string> GetMetaCommunityCommand::setParameters(){
@@ -15,13 +18,17 @@ vector<string> GetMetaCommunityCommand::setParameters(){
         CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","outputType",false,true); parameters.push_back(pshared);
         CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
                CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
+        CommandParameter pcalc("calc", "Multiple", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-whittaker-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-hamming-structchi2-gower-memchi2-memchord-memeuclidean-mempearson-jsd-rjsd", "rjsd", "", "", "","",false,false,true); parameters.push_back(pcalc);
+        CommandParameter psubsample("subsample", "String", "", "", "", "", "","",false,false); parameters.push_back(psubsample);
+        CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
         CommandParameter pminpartitions("minpartitions", "Number", "", "5", "", "", "","",false,false,true); parameters.push_back(pminpartitions);
         CommandParameter pmaxpartitions("maxpartitions", "Number", "", "100", "", "", "","",false,false,true); parameters.push_back(pmaxpartitions);
         CommandParameter poptimizegap("optimizegap", "Number", "", "3", "", "", "","",false,false,true); parameters.push_back(poptimizegap);
         CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
-               
+               CommandParameter pmethod("method", "Multiple", "dmm-kmeans-pam", "dmm", "", "", "","",false,false,true); parameters.push_back(pmethod);
+        
                vector<string> myArray;
                for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
                return myArray;
@@ -35,9 +42,13 @@ vector<string> GetMetaCommunityCommand::setParameters(){
 string GetMetaCommunityCommand::getHelpString(){
        try {
                string helpString = "";
-               helpString += "The get.communitytype command parameters are shared, label, groups, minpartitions, maxpartitions, optimizegap and processors. The shared file is required. \n";
+               helpString += "The get.communitytype command parameters are shared, method, label, groups, minpartitions, maxpartitions, optimizegap and processors. The shared file is required. \n";
         helpString += "The label parameter is used to analyze specific labels in your input. labels are separated by dashes.\n";
                helpString += "The groups parameter allows you to specify which of the groups in your shared file you would like analyzed.  Group names are separated by dashes.\n";
+        helpString += "The method parameter allows to select the method you would like to use.  Options are dmm, kmeans and pam. Default=dmm.\n";
+        helpString += "The calc parameter allows to select the calculator you would like to use to calculate the distance matrix used by the pam and kmeans method. By default the rjsd calculator is used.\n";
+        helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample while calculating the distance matrix for the pam and kmeans method.\n";
+        helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group while calculating the distance matrix for the pam and kmeans methods.\n";
                helpString += "The minpartitions parameter is used to .... Default=5.\n";
         helpString += "The maxpartitions parameter is used to .... Default=10.\n";
         helpString += "The optimizegap parameter is used to .... Default=3.\n";
@@ -55,12 +66,12 @@ string GetMetaCommunityCommand::getOutputPattern(string type) {
     try {
         string pattern = "";
         
-        if (type == "fit")              {  pattern = "[filename],[distance],mix.fit"; }
-        else if (type == "relabund")    {  pattern = "[filename],[distance],[tag],mix.relabund"; }
-        else if (type == "design")      {  pattern = "[filename],[distance],mix.design"; }
-        else if (type == "matrix")      {  pattern = "[filename],[distance],[tag],mix.posterior"; }
-        else if (type == "parameters")  {  pattern = "[filename],[distance],mix.parameters"; }
-        else if (type == "summary")  {  pattern = "[filename],[distance],mix.summary"; }
+        if (type == "fit")              {  pattern = "[filename],[distance],[method],mix.fit"; }
+        else if (type == "relabund")    {  pattern = "[filename],[distance],[method],[tag],mix.relabund"; }
+        else if (type == "design")      {  pattern = "[filename],[distance],[method],mix.design"; }
+        else if (type == "matrix")      {  pattern = "[filename],[distance],[method],[tag],mix.posterior"; }
+        else if (type == "parameters")  {  pattern = "[filename],[distance],[method],mix.parameters"; }
+        else if (type == "summary")  {  pattern = "[filename],[distance],[method],mix.summary"; }
         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
         
         return pattern;
@@ -171,6 +182,37 @@ GetMetaCommunityCommand::GetMetaCommunityCommand(string option)  {
                                if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
                                else { allLines = 1;  }
                        }
+            
+            method = validParameter.validFile(parameters, "method", false);
+                       if (method == "not found") { method = "dmm"; }
+                       
+                       if ((method == "dmm") || (method == "kmeans") || (method == "pam")) { }
+                       else { m->mothurOut("[ERROR]: " + method + " is not a valid method.  Valid algorithms are dmm, kmeans and pam."); m->mothurOutEndLine(); abort = true; }
+            
+            calc = validParameter.validFile(parameters, "calc", false);
+                       if (calc == "not found") { calc = "rjsd";  }
+                       else {
+                if (calc == "default")  {  calc = "rjsd";  }
+                       }
+                       m->splitAtDash(calc, Estimators);
+                       if (m->inUsersGroups("citation", Estimators)) {
+                               ValidCalculators validCalc; validCalc.printCitations(Estimators);
+                               //remove citation from list of calcs
+                               for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") {  Estimators.erase(Estimators.begin()+i); break; } }
+                       }
+            if (Estimators.size() != 1) { abort = true; m->mothurOut("[ERROR]: only one calculator is allowed.\n"); }
+            
+            temp = validParameter.validFile(parameters, "iters", false);                       if (temp == "not found") { temp = "1000"; }
+                       m->mothurConvert(temp, iters);
+            
+            temp = validParameter.validFile(parameters, "subsample", false);           if (temp == "not found") { temp = "F"; }
+                       if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
+            else {
+                if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; }  //we will set it to smallest group later
+                else { subsample = false; }
+            }
+            
+            if (subsample == false) { iters = 0; }
                }
                
        }
@@ -194,6 +236,35 @@ int GetMetaCommunityCommand::execute(){
         set<string> processedLabels;
         set<string> userLabels = labels;
         
+        if (subsample) {
+            if (subsampleSize == -1) { //user has not set size, set size = smallest samples size
+                subsampleSize = lookup[0]->getNumSeqs();
+                for (int i = 1; i < lookup.size(); i++) {
+                    int thisSize = lookup[i]->getNumSeqs();
+                    
+                    if (thisSize < subsampleSize) {    subsampleSize = thisSize;       }
+                }
+            }else {
+                m->clearGroups();
+                Groups.clear();
+                vector<SharedRAbundVector*> temp;
+                for (int i = 0; i < lookup.size(); i++) {
+                    if (lookup[i]->getNumSeqs() < subsampleSize) {
+                        m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
+                        delete lookup[i];
+                    }else {
+                        Groups.push_back(lookup[i]->getGroup());
+                        temp.push_back(lookup[i]);
+                    }
+                }
+                lookup = temp;
+                m->setGroups(Groups);
+            }
+            
+            if (lookup.size() < 2) { m->mothurOut("You have not provided enough valid groups.  I cannot run the command."); m->mothurOutEndLine(); m->control_pressed = true;  return 0; }
+        }
+
+        
         //as long as you are not at the end of the file or done wih the lines you want
         while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
             
@@ -295,6 +366,7 @@ int GetMetaCommunityCommand::createProcesses(vector<SharedRAbundVector*>& thislo
         map<string, string> variables;
         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
         variables["[distance]"] = thislookup[0]->getLabel();
+        variables["[method]"] = method;
                string outputFileName = getOutputFileName("fit", variables);
         outputNames.push_back(outputFileName); outputTypes["fit"].push_back(outputFileName);
                 
@@ -343,11 +415,11 @@ int GetMetaCommunityCommand::createProcesses(vector<SharedRAbundVector*>& thislo
                                process++;
                        }else if (pid == 0){
                 outputNames.clear();
-                               num = processDriver(thislookup, dividedPartitions[process], (outputFileName + toString(getpid())), rels[process], matrix[process], doneFlags, process);
+                               num = processDriver(thislookup, dividedPartitions[process], (outputFileName + m->mothurGetpid(process)), rels[process], matrix[process], doneFlags, process);
                 
                 //pass numSeqs to parent
                                ofstream out;
-                               string tempFile = toString(getpid()) + ".outputNames.temp";
+                               string tempFile = m->mothurGetpid(process) + ".outputNames.temp";
                                m->openOutputFile(tempFile, out);
                 out << num << endl;
                 out << outputNames.size() << endl;
@@ -363,7 +435,12 @@ int GetMetaCommunityCommand::createProcesses(vector<SharedRAbundVector*>& thislo
                }
                
                //do my part
-        m->mothurOut("K\tNLE\t\tlogDet\tBIC\t\tAIC\t\tLaplace\n");
+        if (method == "dmm") {  m->mothurOut("K\tNLE\t\tlogDet\tBIC\t\tAIC\t\tLaplace\n");  }
+        else {
+            m->mothurOut("K\tCH\t");
+            for (int i = 0; i < thislookup.size(); i++) {  m->mothurOut(thislookup[i]->getGroup() + '\t'); }
+            m->mothurOut("\n");
+        }
                minPartition = processDriver(thislookup, dividedPartitions[0], outputFileName, rels[0], matrix[0], doneFlags, 0);
                
                //force parent to wait until all the processes are done
@@ -437,56 +514,6 @@ int GetMetaCommunityCommand::createProcesses(vector<SharedRAbundVector*>& thislo
         }
         
 #else
-        /*
-        vector<metaCommunityData*> pDataArray;
-               DWORD   dwThreadIdArray[processors-1];
-               HANDLE  hThreadArray[processors-1];
-        
-               //Create processor worker threads.
-               for( int i=1; i<processors; i++ ){
-            //copy lookup
-            //make copy of lookup so we don't get access violations
-            vector<SharedRAbundVector*> newLookup;
-            for (int k = 0; k < thislookup.size(); k++) {
-                SharedRAbundVector* temp = new SharedRAbundVector();
-                temp->setLabel(thislookup[k]->getLabel());
-                temp->setGroup(thislookup[k]->getGroup());
-                newLookup.push_back(temp);
-            }
-            
-            //for each bin
-            for (int k = 0; k < thislookup[0]->getNumBins(); k++) {
-                if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
-                for (int j = 0; j < thislookup.size(); j++) { newLookup[j]->push_back(thislookup[j]->getAbundance(k), thislookup[j]->getGroup()); }
-            }
-
-            processIDS.push_back(i);
-            
-                       // Allocate memory for thread data.
-                       metaCommunityData* tempMeta = new metaCommunityData(newLookup, m, dividedPartitions[i], outputFileName + toString(i), rels[i], matrix[i], minpartitions, maxpartitions, optimizegap);
-                       pDataArray.push_back(tempMeta);
-                       
-                       hThreadArray[i] = CreateThread(NULL, 0, MyMetaCommunityThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);
-               }
-               
-        //do my part
-               minPartition = processDriver(thislookup, dividedPartitions[0], outputFileName, rels[0], matrix[0]);
-        
-               //Wait until all threads have terminated.
-               WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
-               
-               //Close all thread handles and free memory allocations.
-               for(int i=0; i < pDataArray.size(); i++){
-            if (pDataArray[i]->minPartition < minPartition) { minPartition = pDataArray[i]->minPartition; }
-            for (int j = 0; j < pDataArray[i]->outputNames.size(); j++) {
-                outputNames.push_back(pDataArray[i]->outputNames[j]);
-            }
-            m->appendFilesWithoutHeaders(outputFileName + toString(processIDS[i]), outputFileName);
-            m->mothurRemove(outputFileName + toString(processIDS[i]));
-                       CloseHandle(hThreadArray[i]);
-                       delete pDataArray[i];
-               } */
-        //do my part
         m->mothurOut("K\tNLE\t\tlogDet\tBIC\t\tAIC\t\tLaplace\n");
                minPartition = processDriver(thislookup, dividedPartitions[0], outputFileName, rels[0], matrix[0], doneFlags, 0);
 #endif
@@ -501,7 +528,8 @@ int GetMetaCommunityCommand::createProcesses(vector<SharedRAbundVector*>& thislo
         
         //run generate Summary function for smallest minPartition
         variables["[tag]"] = toString(minPartition);
-        generateSummaryFile(minPartition, variables);
+        vector<double> piValues = generateDesignFile(minPartition, variables);
+        if (method == "dmm") {  generateSummaryFile(minPartition, variables, piValues); } //pam doesn't make a relabund file
         
         return 0;
 
@@ -516,12 +544,25 @@ int GetMetaCommunityCommand::processDriver(vector<SharedRAbundVector*>& thislook
        try {
         
         double minLaplace = 1e10;
-        int minPartition = 0;
+        int minPartition = 1;
+        vector<double> minSilhouettes; minSilhouettes.resize(thislookup.size(), 0);
+        
+               ofstream fitData, silData;
+        if (method == "dmm") {
+            m->openOutputFile(outputFileName, fitData);
+            fitData.setf(ios::fixed, ios::floatfield);
+            fitData.setf(ios::showpoint);
+            fitData << "K\tNLE\tlogDet\tBIC\tAIC\tLaplace" << endl;
+        }else if((method == "pam") || (method == "kmeans")) { //because ch is looking of maximal value
+            minLaplace = 0;
+            m->openOutputFile(outputFileName, silData);
+            silData.setf(ios::fixed, ios::floatfield);
+            silData.setf(ios::showpoint);
+            silData << "K\tCH\t";
+            for (int i = 0; i < thislookup.size(); i++) { silData << thislookup[i]->getGroup() << '\t';  }
+            silData << endl;
+        } 
         
-               ofstream fitData;
-               m->openOutputFile(outputFileName, fitData);
-        fitData.setf(ios::fixed, ios::floatfield);
-        fitData.setf(ios::showpoint);
         cout.setf(ios::fixed, ios::floatfield);
         cout.setf(ios::showpoint);
 
@@ -529,7 +570,18 @@ int GetMetaCommunityCommand::processDriver(vector<SharedRAbundVector*>& thislook
         vector<string> thisGroups;
         for (int i = 0; i < thislookup.size(); i++) { sharedMatrix.push_back(thislookup[i]->getAbundances()); thisGroups.push_back(thislookup[i]->getGroup()); }
         
-        fitData << "K\tNLE\tlogDet\tBIC\tAIC\tLaplace" << endl;
+        vector< vector<double> > dists; //do we want to output this matrix??
+        if ((method == "pam") || (method == "kmeans")) {  dists = generateDistanceMatrix(thislookup);  }
+        
+        if (m->debug) {
+            m->mothurOut("[DEBUG]: dists = \n");
+            for (int i = 0; i < dists.size(); i++) {
+                if (m->control_pressed) { break; }
+                m->mothurOut("[DEBUG]: i = " + toString(i) + '\t');
+                for (int j = 0; j < i; j++) { m->mothurOut(toString(dists[i][j]) +"\t"); }
+                m->mothurOut("\n");
+            }
+        }
         
         for(int i=0;i<parts.size();i++){
             
@@ -551,33 +603,52 @@ int GetMetaCommunityCommand::processDriver(vector<SharedRAbundVector*>& thislook
                 }
             }
             
-            qFinderDMM findQ(sharedMatrix, numPartitions);
-            
-            double laplace = findQ.getLaplace();
-            m->mothurOut(toString(numPartitions) + '\t');
-            cout << setprecision (2) << findQ.getNLL() << '\t' << findQ.getLogDet() << '\t';
-            m->mothurOutJustToLog(toString(findQ.getNLL()) + '\t' + toString(findQ.getLogDet()) + '\t');
-            cout << findQ.getBIC() << '\t' << findQ.getAIC() << '\t' << laplace;
-            m->mothurOutJustToLog(toString(findQ.getBIC()) + '\t' + toString(findQ.getAIC()) + '\t' + toString(laplace));
-            
-            fitData << numPartitions << '\t';
-            fitData << setprecision (2) << findQ.getNLL() << '\t' << findQ.getLogDet() << '\t';
-            fitData << findQ.getBIC() << '\t' << findQ.getAIC() << '\t' << laplace << endl;
-            
-            if(laplace < minLaplace){
-                minPartition = numPartitions;
-                minLaplace = laplace;
-                m->mothurOut("***");
+            CommunityTypeFinder* finder = NULL;
+            if (method == "dmm")            {   finder = new qFinderDMM(sharedMatrix, numPartitions);   }
+            else if (method == "kmeans")    {   finder = new KMeans(sharedMatrix, numPartitions);       }
+            else if (method == "pam")       {   finder = new Pam(sharedMatrix, dists, numPartitions);                 }
+            else {
+                if (i == 0) {  m->mothurOut(method + " is not a valid method option. I will run the command using dmm.\n"); }
+                finder = new qFinderDMM(sharedMatrix, numPartitions);
             }
-            m->mothurOutEndLine();
             
             string relabund = relabunds[i];
-            outputNames.push_back(relabund); outputTypes["relabund"].push_back(relabund);
             string matrixName = matrix[i];
             outputNames.push_back(matrixName); outputTypes["matrix"].push_back(matrixName);
             
-            findQ.printZMatrix(matrixName, thisGroups);
-            findQ.printRelAbund(relabund, m->currentBinLabels);
+            finder->printZMatrix(matrixName, thisGroups);
+            
+            double chi; vector<double> silhouettes;
+            if (method == "dmm") {
+                double laplace = finder->getLaplace();
+                if(laplace < minLaplace){
+                    minPartition = numPartitions;
+                    minLaplace = laplace;
+                }
+            }else {
+                chi = finder->calcCHIndex(dists);
+                silhouettes = finder->calcSilhouettes(dists);
+                if (chi > minLaplace) { //save partition with maximum ch index score
+                    minPartition = numPartitions;
+                    minLaplace = chi;
+                    minSilhouettes = silhouettes;
+                }
+            }
+            
+            if (method == "dmm") {
+                finder->printFitData(cout, minLaplace);
+                finder->printFitData(fitData);
+                finder->printRelAbund(relabund, m->currentSharedBinLabels);
+                outputNames.push_back(relabund); outputTypes["relabund"].push_back(relabund);
+            }else if ((method == "pam") || (method == "kmeans")) { //print silouettes and ch values
+                finder->printSilData(cout, chi, silhouettes);
+                finder->printSilData(silData, chi, silhouettes);
+                if (method == "kmeans") {
+                    finder->printRelAbund(relabund, m->currentSharedBinLabels);
+                    outputNames.push_back(relabund); outputTypes["relabund"].push_back(relabund);
+                }
+            }
+            delete finder;
             
             if(optimizegap != -1 && (numPartitions - minPartition) >= optimizegap && numPartitions >= minpartitions){
                 string tempDoneFile = m->getRootName(m->getSimpleName(sharedfile)) + toString(processID) + ".done.temp";
@@ -588,9 +659,7 @@ int GetMetaCommunityCommand::processDriver(vector<SharedRAbundVector*>& thislook
                 break;
             }
         }
-        fitData.close();
-        
-        //minPartition = 4;
+        if (method == "dmm") { fitData.close(); }
         
         if (m->control_pressed) { return 0; }
 
@@ -672,7 +741,7 @@ vector<double> GetMetaCommunityCommand::generateDesignFile(int numPartitions, ma
 inline bool summaryFunction(summaryData i, summaryData j){ return i.difference > j.difference;   }
 
 /**************************************************************************************************/
-int GetMetaCommunityCommand::generateSummaryFile(int numPartitions, map<string,string> v){
+int GetMetaCommunityCommand::generateSummaryFile(int numPartitions, map<string,string> v, vector<double> piValues){
     try {
         vector<summaryData> summary;
         
@@ -683,13 +752,11 @@ int GetMetaCommunityCommand::generateSummaryFile(int numPartitions, map<string,s
         string name, header;
         double mean, lci, uci;
         
-        
-        vector<double> piValues = generateDesignFile(numPartitions, v);
-        
         ifstream referenceFile;
         map<string, string> variables;
         variables["[filename]"] = v["[filename]"];
         variables["[distance]"] = v["[distance]"];
+        variables["[method]"] = method;
         variables["[tag]"] = "1";
         string reference = getOutputFileName("relabund", variables);
         m->openInputFile(reference, referenceFile); //((fileRoot + label + ".1mix.relabund").c_str());
@@ -808,4 +875,238 @@ int GetMetaCommunityCommand::generateSummaryFile(int numPartitions, map<string,s
     
 }
 //**********************************************************************************************************************
+vector<vector<double> > GetMetaCommunityCommand::generateDistanceMatrix(vector<SharedRAbundVector*>& thisLookup){
+    try {
+        vector<vector<double> > results;
+        
+        Calculator* matrixCalculator;
+        ValidCalculators validCalculator;
+        int i = 0;
+        
+        if (validCalculator.isValidCalculator("matrix", Estimators[i]) == true) {
+            if (Estimators[i] == "sharedsobs") {
+                matrixCalculator = new SharedSobsCS();
+            }else if (Estimators[i] == "sharedchao") {
+                matrixCalculator = new SharedChao1();
+            }else if (Estimators[i] == "sharedace") {
+                matrixCalculator = new SharedAce();
+            }else if (Estimators[i] == "jabund") {
+                matrixCalculator = new JAbund();
+            }else if (Estimators[i] == "sorabund") {
+                matrixCalculator = new SorAbund();
+            }else if (Estimators[i] == "jclass") {
+                matrixCalculator = new Jclass();
+            }else if (Estimators[i] == "sorclass") {
+                matrixCalculator = new SorClass();
+            }else if (Estimators[i] == "jest") {
+                matrixCalculator = new Jest();
+            }else if (Estimators[i] == "sorest") {
+                matrixCalculator = new SorEst();
+            }else if (Estimators[i] == "thetayc") {
+                matrixCalculator = new ThetaYC();
+            }else if (Estimators[i] == "thetan") {
+                matrixCalculator = new ThetaN();
+            }else if (Estimators[i] == "kstest") {
+                matrixCalculator = new KSTest();
+            }else if (Estimators[i] == "sharednseqs") {
+                matrixCalculator = new SharedNSeqs();
+            }else if (Estimators[i] == "ochiai") {
+                matrixCalculator = new Ochiai();
+            }else if (Estimators[i] == "anderberg") {
+                matrixCalculator = new Anderberg();
+            }else if (Estimators[i] == "kulczynski") {
+                matrixCalculator = new Kulczynski();
+            }else if (Estimators[i] == "kulczynskicody") {
+                matrixCalculator = new KulczynskiCody();
+            }else if (Estimators[i] == "lennon") {
+                matrixCalculator = new Lennon();
+            }else if (Estimators[i] == "morisitahorn") {
+                matrixCalculator = new MorHorn();
+            }else if (Estimators[i] == "braycurtis") {
+                matrixCalculator = new BrayCurtis();
+            }else if (Estimators[i] == "whittaker") {
+                matrixCalculator = new Whittaker();
+            }else if (Estimators[i] == "odum") {
+                matrixCalculator = new Odum();
+            }else if (Estimators[i] == "canberra") {
+                matrixCalculator = new Canberra();
+            }else if (Estimators[i] == "structeuclidean") {
+                matrixCalculator = new StructEuclidean();
+            }else if (Estimators[i] == "structchord") {
+                matrixCalculator = new StructChord();
+            }else if (Estimators[i] == "hellinger") {
+                matrixCalculator = new Hellinger();
+            }else if (Estimators[i] == "manhattan") {
+                matrixCalculator = new Manhattan();
+            }else if (Estimators[i] == "structpearson") {
+                matrixCalculator = new StructPearson();
+            }else if (Estimators[i] == "soergel") {
+                matrixCalculator = new Soergel();
+            }else if (Estimators[i] == "spearman") {
+                matrixCalculator = new Spearman();
+            }else if (Estimators[i] == "structkulczynski") {
+                matrixCalculator = new StructKulczynski();
+            }else if (Estimators[i] == "speciesprofile") {
+                matrixCalculator = new SpeciesProfile();
+            }else if (Estimators[i] == "hamming") {
+                matrixCalculator = new Hamming();
+            }else if (Estimators[i] == "structchi2") {
+                matrixCalculator = new StructChi2();
+            }else if (Estimators[i] == "gower") {
+                matrixCalculator = new Gower();
+            }else if (Estimators[i] == "memchi2") {
+                matrixCalculator = new MemChi2();
+            }else if (Estimators[i] == "memchord") {
+                matrixCalculator = new MemChord();
+            }else if (Estimators[i] == "memeuclidean") {
+                matrixCalculator = new MemEuclidean();
+            }else if (Estimators[i] == "mempearson") {
+                matrixCalculator = new MemPearson();
+            }else if (Estimators[i] == "jsd") {
+                matrixCalculator = new JSD();
+            }else if (Estimators[i] == "rjsd") {
+                matrixCalculator = new RJSD();
+            }else {
+                m->mothurOut("[ERROR]: " + Estimators[i] + " is not a valid calculator, please correct.\n"); m->control_pressed = true; return results;
+            }
+        }
+        
+        //calc distances
+        vector< vector< vector<seqDist> > > calcDistsTotals;  //each iter, then each groupCombos dists. this will be used to make .dist files
+        vector< vector<seqDist> > calcDists; calcDists.resize(1);
+        
+        for (int thisIter = 0; thisIter < iters+1; thisIter++) {
+            vector<SharedRAbundVector*> thisItersLookup = thisLookup;
+            
+            if (subsample && (thisIter != 0)) {
+                SubSample sample;
+                vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds
+                
+                //make copy of lookup so we don't get access violations
+                vector<SharedRAbundVector*> newLookup;
+                for (int k = 0; k < thisItersLookup.size(); k++) {
+                    SharedRAbundVector* temp = new SharedRAbundVector();
+                    temp->setLabel(thisItersLookup[k]->getLabel());
+                    temp->setGroup(thisItersLookup[k]->getGroup());
+                    newLookup.push_back(temp);
+                }
+                
+                //for each bin
+                for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
+                    if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return results; }
+                    for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
+                }
+                
+                tempLabels = sample.getSample(newLookup, subsampleSize);
+                thisItersLookup = newLookup;
+            }
+            
+           
+            driver(thisItersLookup, calcDists, matrixCalculator);
+                     
+            if (subsample && (thisIter != 0)) {
+                if((thisIter) % 100 == 0){     m->mothurOutJustToScreen(toString(thisIter)+"\n");              }
+                calcDistsTotals.push_back(calcDists);
+                for (int i = 0; i < calcDists.size(); i++) {
+                    for (int j = 0; j < calcDists[i].size(); j++) {
+                        if (m->debug) {  m->mothurOut("[DEBUG]: Results: iter = " + toString(thisIter) + ", " + thisLookup[calcDists[i][j].seq1]->getGroup() + " - " + thisLookup[calcDists[i][j].seq2]->getGroup() + " distance = " + toString(calcDists[i][j].dist) + ".\n");  }
+                    }
+                }
+                //clean up memory
+                for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
+                thisItersLookup.clear();
+            }else { //print results for whole dataset
+                for (int i = 0; i < calcDists.size(); i++) {
+                    if (m->control_pressed) { break; }
+                    
+                    //initialize matrix
+                    results.resize(thisLookup.size());
+                    for (int k = 0; k < thisLookup.size(); k++) {  results[k].resize(thisLookup.size(), 0.0); }
+                    
+                    for (int j = 0; j < calcDists[i].size(); j++) {
+                        int row = calcDists[i][j].seq1;
+                        int column = calcDists[i][j].seq2;
+                        double dist = calcDists[i][j].dist;
+                        
+                        results[row][column] = dist;
+                        results[column][row] = dist;
+                    }
+                }
+            }
+            for (int i = 0; i < calcDists.size(); i++) {  calcDists[i].clear(); }
+               }
+               
+        if (iters != 0) {
+            //we need to find the average distance and standard deviation for each groups distance
+            vector< vector<seqDist>  > calcAverages = m->getAverages(calcDistsTotals, "average");
+            
+            //print results
+            for (int i = 0; i < calcDists.size(); i++) {
+                results.resize(thisLookup.size());
+                for (int k = 0; k < thisLookup.size(); k++) {  results[k].resize(thisLookup.size(), 0.0); }
+                
+                for (int j = 0; j < calcAverages[i].size(); j++) {
+                    int row = calcAverages[i][j].seq1;
+                    int column = calcAverages[i][j].seq2;
+                    float dist = calcAverages[i][j].dist;
+                    
+                    results[row][column] = dist;
+                    results[column][row] = dist;
+                }
+            }
+        }
+
+        
+        return results;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "GetMetaCommunityCommand", "generateDistanceMatrix");
+        exit(1);
+    }
+}
+/**************************************************************************************************/
+int GetMetaCommunityCommand::driver(vector<SharedRAbundVector*> thisLookup, vector< vector<seqDist> >& calcDists, Calculator* matrixCalculator) {
+       try {
+               vector<SharedRAbundVector*> subset;
+        
+               for (int k = 0; k < thisLookup.size(); k++) { // pass cdd each set of groups to compare
+                       
+                       for (int l = 0; l < k; l++) {
+                               
+                               if (k != l) { //we dont need to similiarity of a groups to itself
+                                       subset.clear(); //clear out old pair of sharedrabunds
+                                       //add new pair of sharedrabunds
+                                       subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]);
+                                       
+                                       
+                    
+                    //if this calc needs all groups to calculate the pair load all groups
+                    if (matrixCalculator->getNeedsAll()) {
+                        //load subset with rest of lookup for those calcs that need everyone to calc for a pair
+                        for (int w = 0; w < thisLookup.size(); w++) {
+                            if ((w != k) && (w != l)) { subset.push_back(thisLookup[w]); }
+                        }
+                    }
+                    
+                    vector<double> tempdata = matrixCalculator->getValues(subset); //saves the calculator outputs
+                    
+                    if (m->control_pressed) { return 1; }
+                    
+                    seqDist temp(l, k, tempdata[0]);
+                    //cout << l << '\t' << k << '\t' <<  tempdata[0] << endl;
+                    calcDists[0].push_back(temp);
+                }
+                               
+                       }
+               }
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "MatrixOutputCommand", "driver");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************