//
#include "getmetacommunitycommand.h"
-
+#include "communitytype.h"
+#include "kmeans.h"
+#include "validcalculator.h"
+#include "subsample.h"
//**********************************************************************************************************************
vector<string> GetMetaCommunityCommand::setParameters(){
CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","outputType",false,true); parameters.push_back(pshared);
CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
+ CommandParameter pcalc("calc", "Multiple", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-whittaker-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-hamming-structchi2-gower-memchi2-memchord-memeuclidean-mempearson-jsd-rjsd", "rjsd", "", "", "","",false,false,true); parameters.push_back(pcalc);
+ CommandParameter psubsample("subsample", "String", "", "", "", "", "","",false,false); parameters.push_back(psubsample);
+ CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
CommandParameter pminpartitions("minpartitions", "Number", "", "5", "", "", "","",false,false,true); parameters.push_back(pminpartitions);
CommandParameter pmaxpartitions("maxpartitions", "Number", "", "100", "", "", "","",false,false,true); parameters.push_back(pmaxpartitions);
CommandParameter poptimizegap("optimizegap", "Number", "", "3", "", "", "","",false,false,true); parameters.push_back(poptimizegap);
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
-
+ CommandParameter pmethod("method", "Multiple", "dmm-kmeans-pam", "dmm", "", "", "","",false,false,true); parameters.push_back(pmethod);
+
vector<string> myArray;
for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
return myArray;
string GetMetaCommunityCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The get.communitytype command parameters are shared, label, groups, minpartitions, maxpartitions, optimizegap and processors. The shared file is required. \n";
+ helpString += "The get.communitytype command parameters are shared, method, label, groups, minpartitions, maxpartitions, optimizegap and processors. The shared file is required. \n";
helpString += "The label parameter is used to analyze specific labels in your input. labels are separated by dashes.\n";
helpString += "The groups parameter allows you to specify which of the groups in your shared file you would like analyzed. Group names are separated by dashes.\n";
+ helpString += "The method parameter allows to select the method you would like to use. Options are dmm, kmeans and pam. Default=dmm.\n";
+ helpString += "The calc parameter allows to select the calculator you would like to use to calculate the distance matrix used by the pam and kmeans method. By default the rjsd calculator is used.\n";
+ helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample while calculating the distance matrix for the pam and kmeans method.\n";
+ helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group while calculating the distance matrix for the pam and kmeans methods.\n";
helpString += "The minpartitions parameter is used to .... Default=5.\n";
helpString += "The maxpartitions parameter is used to .... Default=10.\n";
helpString += "The optimizegap parameter is used to .... Default=3.\n";
try {
string pattern = "";
- if (type == "fit") { pattern = "[filename],[distance],mix.fit"; }
- else if (type == "relabund") { pattern = "[filename],[distance],[tag],mix.relabund"; }
- else if (type == "design") { pattern = "[filename],[distance],mix.design"; }
- else if (type == "matrix") { pattern = "[filename],[distance],[tag],mix.posterior"; }
- else if (type == "parameters") { pattern = "[filename],[distance],mix.parameters"; }
- else if (type == "summary") { pattern = "[filename],[distance],mix.summary"; }
+ if (type == "fit") { pattern = "[filename],[distance],[method],mix.fit"; }
+ else if (type == "relabund") { pattern = "[filename],[distance],[method],[tag],mix.relabund"; }
+ else if (type == "design") { pattern = "[filename],[distance],[method],mix.design"; }
+ else if (type == "matrix") { pattern = "[filename],[distance],[method],[tag],mix.posterior"; }
+ else if (type == "parameters") { pattern = "[filename],[distance],[method],mix.parameters"; }
+ else if (type == "summary") { pattern = "[filename],[distance],[method],mix.summary"; }
else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
return pattern;
if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
else { allLines = 1; }
}
+
+ method = validParameter.validFile(parameters, "method", false);
+ if (method == "not found") { method = "dmm"; }
+
+ if ((method == "dmm") || (method == "kmeans") || (method == "pam")) { }
+ else { m->mothurOut("[ERROR]: " + method + " is not a valid method. Valid algorithms are dmm, kmeans and pam."); m->mothurOutEndLine(); abort = true; }
+
+ calc = validParameter.validFile(parameters, "calc", false);
+ if (calc == "not found") { calc = "rjsd"; }
+ else {
+ if (calc == "default") { calc = "rjsd"; }
+ }
+ m->splitAtDash(calc, Estimators);
+ if (m->inUsersGroups("citation", Estimators)) {
+ ValidCalculators validCalc; validCalc.printCitations(Estimators);
+ //remove citation from list of calcs
+ for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") { Estimators.erase(Estimators.begin()+i); break; } }
+ }
+ if (Estimators.size() != 1) { abort = true; m->mothurOut("[ERROR]: only one calculator is allowed.\n"); }
+
+ temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
+ m->mothurConvert(temp, iters);
+
+ temp = validParameter.validFile(parameters, "subsample", false); if (temp == "not found") { temp = "F"; }
+ if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
+ else {
+ if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later
+ else { subsample = false; }
+ }
+
+ if (subsample == false) { iters = 0; }
}
}
set<string> processedLabels;
set<string> userLabels = labels;
+ if (subsample) {
+ if (subsampleSize == -1) { //user has not set size, set size = smallest samples size
+ subsampleSize = lookup[0]->getNumSeqs();
+ for (int i = 1; i < lookup.size(); i++) {
+ int thisSize = lookup[i]->getNumSeqs();
+
+ if (thisSize < subsampleSize) { subsampleSize = thisSize; }
+ }
+ }else {
+ m->clearGroups();
+ Groups.clear();
+ vector<SharedRAbundVector*> temp;
+ for (int i = 0; i < lookup.size(); i++) {
+ if (lookup[i]->getNumSeqs() < subsampleSize) {
+ m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
+ delete lookup[i];
+ }else {
+ Groups.push_back(lookup[i]->getGroup());
+ temp.push_back(lookup[i]);
+ }
+ }
+ lookup = temp;
+ m->setGroups(Groups);
+ }
+
+ if (lookup.size() < 2) { m->mothurOut("You have not provided enough valid groups. I cannot run the command."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
+ }
+
+
//as long as you are not at the end of the file or done wih the lines you want
while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
map<string, string> variables;
variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
variables["[distance]"] = thislookup[0]->getLabel();
+ variables["[method]"] = method;
string outputFileName = getOutputFileName("fit", variables);
outputNames.push_back(outputFileName); outputTypes["fit"].push_back(outputFileName);
//loop through and create all the processes you want
while (process != processors) {
- int pid = fork();
+ pid_t pid = fork();
if (pid > 0) {
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
outputNames.clear();
- num = processDriver(thislookup, dividedPartitions[process], (outputFileName + toString(getpid())), rels[process], matrix[process], doneFlags, process);
+ num = processDriver(thislookup, dividedPartitions[process], (outputFileName + m->mothurGetpid(process)), rels[process], matrix[process], doneFlags, process);
//pass numSeqs to parent
ofstream out;
- string tempFile = toString(getpid()) + ".outputNames.temp";
+ string tempFile = m->mothurGetpid(process) + ".outputNames.temp";
m->openOutputFile(tempFile, out);
out << num << endl;
out << outputNames.size() << endl;
}
//do my part
- m->mothurOut("K\tNLE\t\tlogDet\tBIC\t\tAIC\t\tLaplace\n");
+ if (method == "dmm") { m->mothurOut("K\tNLE\t\tlogDet\tBIC\t\tAIC\t\tLaplace\n"); }
+ else {
+ m->mothurOut("K\tCH\t");
+ for (int i = 0; i < thislookup.size(); i++) { m->mothurOut(thislookup[i]->getGroup() + '\t'); }
+ m->mothurOut("\n");
+ }
minPartition = processDriver(thislookup, dividedPartitions[0], outputFileName, rels[0], matrix[0], doneFlags, 0);
//force parent to wait until all the processes are done
}
#else
- /*
- vector<metaCommunityData*> pDataArray;
- DWORD dwThreadIdArray[processors-1];
- HANDLE hThreadArray[processors-1];
-
- //Create processor worker threads.
- for( int i=1; i<processors; i++ ){
- //copy lookup
- //make copy of lookup so we don't get access violations
- vector<SharedRAbundVector*> newLookup;
- for (int k = 0; k < thislookup.size(); k++) {
- SharedRAbundVector* temp = new SharedRAbundVector();
- temp->setLabel(thislookup[k]->getLabel());
- temp->setGroup(thislookup[k]->getGroup());
- newLookup.push_back(temp);
- }
-
- //for each bin
- for (int k = 0; k < thislookup[0]->getNumBins(); k++) {
- if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; }
- for (int j = 0; j < thislookup.size(); j++) { newLookup[j]->push_back(thislookup[j]->getAbundance(k), thislookup[j]->getGroup()); }
- }
-
- processIDS.push_back(i);
-
- // Allocate memory for thread data.
- metaCommunityData* tempMeta = new metaCommunityData(newLookup, m, dividedPartitions[i], outputFileName + toString(i), rels[i], matrix[i], minpartitions, maxpartitions, optimizegap);
- pDataArray.push_back(tempMeta);
-
- hThreadArray[i] = CreateThread(NULL, 0, MyMetaCommunityThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);
- }
-
- //do my part
- minPartition = processDriver(thislookup, dividedPartitions[0], outputFileName, rels[0], matrix[0]);
-
- //Wait until all threads have terminated.
- WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
-
- //Close all thread handles and free memory allocations.
- for(int i=0; i < pDataArray.size(); i++){
- if (pDataArray[i]->minPartition < minPartition) { minPartition = pDataArray[i]->minPartition; }
- for (int j = 0; j < pDataArray[i]->outputNames.size(); j++) {
- outputNames.push_back(pDataArray[i]->outputNames[j]);
- }
- m->appendFilesWithoutHeaders(outputFileName + toString(processIDS[i]), outputFileName);
- m->mothurRemove(outputFileName + toString(processIDS[i]));
- CloseHandle(hThreadArray[i]);
- delete pDataArray[i];
- } */
- //do my part
m->mothurOut("K\tNLE\t\tlogDet\tBIC\t\tAIC\t\tLaplace\n");
minPartition = processDriver(thislookup, dividedPartitions[0], outputFileName, rels[0], matrix[0], doneFlags, 0);
#endif
//run generate Summary function for smallest minPartition
variables["[tag]"] = toString(minPartition);
- generateSummaryFile(minPartition, variables);
+ vector<double> piValues = generateDesignFile(minPartition, variables);
+ if (method == "dmm") { generateSummaryFile(minPartition, variables, piValues); } //pam doesn't make a relabund file
return 0;
try {
double minLaplace = 1e10;
- int minPartition = 0;
+ int minPartition = 1;
+ vector<double> minSilhouettes; minSilhouettes.resize(thislookup.size(), 0);
+
+ ofstream fitData, silData;
+ if (method == "dmm") {
+ m->openOutputFile(outputFileName, fitData);
+ fitData.setf(ios::fixed, ios::floatfield);
+ fitData.setf(ios::showpoint);
+ fitData << "K\tNLE\tlogDet\tBIC\tAIC\tLaplace" << endl;
+ }else if((method == "pam") || (method == "kmeans")) { //because ch is looking of maximal value
+ minLaplace = 0;
+ m->openOutputFile(outputFileName, silData);
+ silData.setf(ios::fixed, ios::floatfield);
+ silData.setf(ios::showpoint);
+ silData << "K\tCH\t";
+ for (int i = 0; i < thislookup.size(); i++) { silData << thislookup[i]->getGroup() << '\t'; }
+ silData << endl;
+ }
- ofstream fitData;
- m->openOutputFile(outputFileName, fitData);
- fitData.setf(ios::fixed, ios::floatfield);
- fitData.setf(ios::showpoint);
cout.setf(ios::fixed, ios::floatfield);
cout.setf(ios::showpoint);
vector<string> thisGroups;
for (int i = 0; i < thislookup.size(); i++) { sharedMatrix.push_back(thislookup[i]->getAbundances()); thisGroups.push_back(thislookup[i]->getGroup()); }
- fitData << "K\tNLE\tlogDet\tBIC\tAIC\tLaplace" << endl;
+ vector< vector<double> > dists; //do we want to output this matrix??
+ if ((method == "pam") || (method == "kmeans")) { dists = generateDistanceMatrix(thislookup); }
+
+ if (m->debug) {
+ m->mothurOut("[DEBUG]: dists = \n");
+ for (int i = 0; i < dists.size(); i++) {
+ if (m->control_pressed) { break; }
+ m->mothurOut("[DEBUG]: i = " + toString(i) + '\t');
+ for (int j = 0; j < i; j++) { m->mothurOut(toString(dists[i][j]) +"\t"); }
+ m->mothurOut("\n");
+ }
+ }
for(int i=0;i<parts.size();i++){
}
}
- qFinderDMM findQ(sharedMatrix, numPartitions);
-
- double laplace = findQ.getLaplace();
- m->mothurOut(toString(numPartitions) + '\t');
- cout << setprecision (2) << findQ.getNLL() << '\t' << findQ.getLogDet() << '\t';
- m->mothurOutJustToLog(toString(findQ.getNLL()) + '\t' + toString(findQ.getLogDet()) + '\t');
- cout << findQ.getBIC() << '\t' << findQ.getAIC() << '\t' << laplace;
- m->mothurOutJustToLog(toString(findQ.getBIC()) + '\t' + toString(findQ.getAIC()) + '\t' + toString(laplace));
-
- fitData << numPartitions << '\t';
- fitData << setprecision (2) << findQ.getNLL() << '\t' << findQ.getLogDet() << '\t';
- fitData << findQ.getBIC() << '\t' << findQ.getAIC() << '\t' << laplace << endl;
-
- if(laplace < minLaplace){
- minPartition = numPartitions;
- minLaplace = laplace;
- m->mothurOut("***");
+ CommunityTypeFinder* finder = NULL;
+ if (method == "dmm") { finder = new qFinderDMM(sharedMatrix, numPartitions); }
+ else if (method == "kmeans") { finder = new KMeans(sharedMatrix, numPartitions); }
+ else if (method == "pam") { finder = new Pam(sharedMatrix, dists, numPartitions); }
+ else {
+ if (i == 0) { m->mothurOut(method + " is not a valid method option. I will run the command using dmm.\n"); }
+ finder = new qFinderDMM(sharedMatrix, numPartitions);
}
- m->mothurOutEndLine();
string relabund = relabunds[i];
- outputNames.push_back(relabund); outputTypes["relabund"].push_back(relabund);
string matrixName = matrix[i];
outputNames.push_back(matrixName); outputTypes["matrix"].push_back(matrixName);
- findQ.printZMatrix(matrixName, thisGroups);
- findQ.printRelAbund(relabund, m->currentSharedBinLabels);
+ finder->printZMatrix(matrixName, thisGroups);
+
+ double chi; vector<double> silhouettes;
+ if (method == "dmm") {
+ double laplace = finder->getLaplace();
+ if(laplace < minLaplace){
+ minPartition = numPartitions;
+ minLaplace = laplace;
+ }
+ }else {
+ chi = finder->calcCHIndex(dists);
+ silhouettes = finder->calcSilhouettes(dists);
+ if (chi > minLaplace) { //save partition with maximum ch index score
+ minPartition = numPartitions;
+ minLaplace = chi;
+ minSilhouettes = silhouettes;
+ }
+ }
+
+ if (method == "dmm") {
+ finder->printFitData(cout, minLaplace);
+ finder->printFitData(fitData);
+ finder->printRelAbund(relabund, m->currentSharedBinLabels);
+ outputNames.push_back(relabund); outputTypes["relabund"].push_back(relabund);
+ }else if ((method == "pam") || (method == "kmeans")) { //print silouettes and ch values
+ finder->printSilData(cout, chi, silhouettes);
+ finder->printSilData(silData, chi, silhouettes);
+ if (method == "kmeans") {
+ finder->printRelAbund(relabund, m->currentSharedBinLabels);
+ outputNames.push_back(relabund); outputTypes["relabund"].push_back(relabund);
+ }
+ }
+ delete finder;
if(optimizegap != -1 && (numPartitions - minPartition) >= optimizegap && numPartitions >= minpartitions){
string tempDoneFile = m->getRootName(m->getSimpleName(sharedfile)) + toString(processID) + ".done.temp";
break;
}
}
- fitData.close();
-
- //minPartition = 4;
+ if (method == "dmm") { fitData.close(); }
if (m->control_pressed) { return 0; }
inline bool summaryFunction(summaryData i, summaryData j){ return i.difference > j.difference; }
/**************************************************************************************************/
-int GetMetaCommunityCommand::generateSummaryFile(int numPartitions, map<string,string> v){
+int GetMetaCommunityCommand::generateSummaryFile(int numPartitions, map<string,string> v, vector<double> piValues){
try {
vector<summaryData> summary;
string name, header;
double mean, lci, uci;
-
- vector<double> piValues = generateDesignFile(numPartitions, v);
-
ifstream referenceFile;
map<string, string> variables;
variables["[filename]"] = v["[filename]"];
variables["[distance]"] = v["[distance]"];
+ variables["[method]"] = method;
variables["[tag]"] = "1";
string reference = getOutputFileName("relabund", variables);
m->openInputFile(reference, referenceFile); //((fileRoot + label + ".1mix.relabund").c_str());
}
//**********************************************************************************************************************
+vector<vector<double> > GetMetaCommunityCommand::generateDistanceMatrix(vector<SharedRAbundVector*>& thisLookup){
+ try {
+ vector<vector<double> > results;
+
+ Calculator* matrixCalculator;
+ ValidCalculators validCalculator;
+ int i = 0;
+
+ if (validCalculator.isValidCalculator("matrix", Estimators[i]) == true) {
+ if (Estimators[i] == "sharedsobs") {
+ matrixCalculator = new SharedSobsCS();
+ }else if (Estimators[i] == "sharedchao") {
+ matrixCalculator = new SharedChao1();
+ }else if (Estimators[i] == "sharedace") {
+ matrixCalculator = new SharedAce();
+ }else if (Estimators[i] == "jabund") {
+ matrixCalculator = new JAbund();
+ }else if (Estimators[i] == "sorabund") {
+ matrixCalculator = new SorAbund();
+ }else if (Estimators[i] == "jclass") {
+ matrixCalculator = new Jclass();
+ }else if (Estimators[i] == "sorclass") {
+ matrixCalculator = new SorClass();
+ }else if (Estimators[i] == "jest") {
+ matrixCalculator = new Jest();
+ }else if (Estimators[i] == "sorest") {
+ matrixCalculator = new SorEst();
+ }else if (Estimators[i] == "thetayc") {
+ matrixCalculator = new ThetaYC();
+ }else if (Estimators[i] == "thetan") {
+ matrixCalculator = new ThetaN();
+ }else if (Estimators[i] == "kstest") {
+ matrixCalculator = new KSTest();
+ }else if (Estimators[i] == "sharednseqs") {
+ matrixCalculator = new SharedNSeqs();
+ }else if (Estimators[i] == "ochiai") {
+ matrixCalculator = new Ochiai();
+ }else if (Estimators[i] == "anderberg") {
+ matrixCalculator = new Anderberg();
+ }else if (Estimators[i] == "kulczynski") {
+ matrixCalculator = new Kulczynski();
+ }else if (Estimators[i] == "kulczynskicody") {
+ matrixCalculator = new KulczynskiCody();
+ }else if (Estimators[i] == "lennon") {
+ matrixCalculator = new Lennon();
+ }else if (Estimators[i] == "morisitahorn") {
+ matrixCalculator = new MorHorn();
+ }else if (Estimators[i] == "braycurtis") {
+ matrixCalculator = new BrayCurtis();
+ }else if (Estimators[i] == "whittaker") {
+ matrixCalculator = new Whittaker();
+ }else if (Estimators[i] == "odum") {
+ matrixCalculator = new Odum();
+ }else if (Estimators[i] == "canberra") {
+ matrixCalculator = new Canberra();
+ }else if (Estimators[i] == "structeuclidean") {
+ matrixCalculator = new StructEuclidean();
+ }else if (Estimators[i] == "structchord") {
+ matrixCalculator = new StructChord();
+ }else if (Estimators[i] == "hellinger") {
+ matrixCalculator = new Hellinger();
+ }else if (Estimators[i] == "manhattan") {
+ matrixCalculator = new Manhattan();
+ }else if (Estimators[i] == "structpearson") {
+ matrixCalculator = new StructPearson();
+ }else if (Estimators[i] == "soergel") {
+ matrixCalculator = new Soergel();
+ }else if (Estimators[i] == "spearman") {
+ matrixCalculator = new Spearman();
+ }else if (Estimators[i] == "structkulczynski") {
+ matrixCalculator = new StructKulczynski();
+ }else if (Estimators[i] == "speciesprofile") {
+ matrixCalculator = new SpeciesProfile();
+ }else if (Estimators[i] == "hamming") {
+ matrixCalculator = new Hamming();
+ }else if (Estimators[i] == "structchi2") {
+ matrixCalculator = new StructChi2();
+ }else if (Estimators[i] == "gower") {
+ matrixCalculator = new Gower();
+ }else if (Estimators[i] == "memchi2") {
+ matrixCalculator = new MemChi2();
+ }else if (Estimators[i] == "memchord") {
+ matrixCalculator = new MemChord();
+ }else if (Estimators[i] == "memeuclidean") {
+ matrixCalculator = new MemEuclidean();
+ }else if (Estimators[i] == "mempearson") {
+ matrixCalculator = new MemPearson();
+ }else if (Estimators[i] == "jsd") {
+ matrixCalculator = new JSD();
+ }else if (Estimators[i] == "rjsd") {
+ matrixCalculator = new RJSD();
+ }else {
+ m->mothurOut("[ERROR]: " + Estimators[i] + " is not a valid calculator, please correct.\n"); m->control_pressed = true; return results;
+ }
+ }
+
+ //calc distances
+ vector< vector< vector<seqDist> > > calcDistsTotals; //each iter, then each groupCombos dists. this will be used to make .dist files
+ vector< vector<seqDist> > calcDists; calcDists.resize(1);
+
+ for (int thisIter = 0; thisIter < iters+1; thisIter++) {
+
+ vector<SharedRAbundVector*> thisItersLookup = thisLookup;
+
+ if (subsample && (thisIter != 0)) {
+ SubSample sample;
+ vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds
+
+ //make copy of lookup so we don't get access violations
+ vector<SharedRAbundVector*> newLookup;
+ for (int k = 0; k < thisItersLookup.size(); k++) {
+ SharedRAbundVector* temp = new SharedRAbundVector();
+ temp->setLabel(thisItersLookup[k]->getLabel());
+ temp->setGroup(thisItersLookup[k]->getGroup());
+ newLookup.push_back(temp);
+ }
+
+ //for each bin
+ for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
+ if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return results; }
+ for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
+ }
+
+ tempLabels = sample.getSample(newLookup, subsampleSize);
+ thisItersLookup = newLookup;
+ }
+
+
+ driver(thisItersLookup, calcDists, matrixCalculator);
+
+ if (subsample && (thisIter != 0)) {
+ if((thisIter) % 100 == 0){ m->mothurOutJustToScreen(toString(thisIter)+"\n"); }
+ calcDistsTotals.push_back(calcDists);
+ for (int i = 0; i < calcDists.size(); i++) {
+ for (int j = 0; j < calcDists[i].size(); j++) {
+ if (m->debug) { m->mothurOut("[DEBUG]: Results: iter = " + toString(thisIter) + ", " + thisLookup[calcDists[i][j].seq1]->getGroup() + " - " + thisLookup[calcDists[i][j].seq2]->getGroup() + " distance = " + toString(calcDists[i][j].dist) + ".\n"); }
+ }
+ }
+ //clean up memory
+ for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
+ thisItersLookup.clear();
+ }else { //print results for whole dataset
+ for (int i = 0; i < calcDists.size(); i++) {
+ if (m->control_pressed) { break; }
+
+ //initialize matrix
+ results.resize(thisLookup.size());
+ for (int k = 0; k < thisLookup.size(); k++) { results[k].resize(thisLookup.size(), 0.0); }
+
+ for (int j = 0; j < calcDists[i].size(); j++) {
+ int row = calcDists[i][j].seq1;
+ int column = calcDists[i][j].seq2;
+ double dist = calcDists[i][j].dist;
+
+ results[row][column] = dist;
+ results[column][row] = dist;
+ }
+ }
+ }
+ for (int i = 0; i < calcDists.size(); i++) { calcDists[i].clear(); }
+ }
+
+ if (iters != 0) {
+ //we need to find the average distance and standard deviation for each groups distance
+ vector< vector<seqDist> > calcAverages = m->getAverages(calcDistsTotals, "average");
+
+ //print results
+ for (int i = 0; i < calcDists.size(); i++) {
+ results.resize(thisLookup.size());
+ for (int k = 0; k < thisLookup.size(); k++) { results[k].resize(thisLookup.size(), 0.0); }
+
+ for (int j = 0; j < calcAverages[i].size(); j++) {
+ int row = calcAverages[i][j].seq1;
+ int column = calcAverages[i][j].seq2;
+ float dist = calcAverages[i][j].dist;
+
+ results[row][column] = dist;
+ results[column][row] = dist;
+ }
+ }
+ }
+
+
+ return results;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "GetMetaCommunityCommand", "generateDistanceMatrix");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+int GetMetaCommunityCommand::driver(vector<SharedRAbundVector*> thisLookup, vector< vector<seqDist> >& calcDists, Calculator* matrixCalculator) {
+ try {
+ vector<SharedRAbundVector*> subset;
+
+ for (int k = 0; k < thisLookup.size(); k++) { // pass cdd each set of groups to compare
+
+ for (int l = 0; l < k; l++) {
+
+ if (k != l) { //we dont need to similiarity of a groups to itself
+ subset.clear(); //clear out old pair of sharedrabunds
+ //add new pair of sharedrabunds
+ subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]);
+
+
+
+ //if this calc needs all groups to calculate the pair load all groups
+ if (matrixCalculator->getNeedsAll()) {
+ //load subset with rest of lookup for those calcs that need everyone to calc for a pair
+ for (int w = 0; w < thisLookup.size(); w++) {
+ if ((w != k) && (w != l)) { subset.push_back(thisLookup[w]); }
+ }
+ }
+
+ vector<double> tempdata = matrixCalculator->getValues(subset); //saves the calculator outputs
+
+ if (m->control_pressed) { return 1; }
+
+ seqDist temp(l, k, tempdata[0]);
+ //cout << l << '\t' << k << '\t' << tempdata[0] << endl;
+ calcDists[0].push_back(temp);
+ }
+
+ }
+ }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MatrixOutputCommand", "driver");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************