X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=getmetacommunitycommand.cpp;h=047f176fa6be9b1c11108f0e007391dae33040c7;hb=1ef03d78c36b105e87165ad1ebc9af29cd6144b7;hp=8f78ca2a45fce56f6ef8996c880b48677ae32357;hpb=875ab4b2eec77b920e9fa0042f9a2aae2faff2b0;p=mothur.git diff --git a/getmetacommunitycommand.cpp b/getmetacommunitycommand.cpp index 8f78ca2..047f176 100644 --- a/getmetacommunitycommand.cpp +++ b/getmetacommunitycommand.cpp @@ -7,7 +7,10 @@ // #include "getmetacommunitycommand.h" - +#include "communitytype.h" +#include "kmeans.h" +#include "validcalculator.h" +#include "subsample.h" //********************************************************************************************************************** vector GetMetaCommunityCommand::setParameters(){ @@ -15,13 +18,17 @@ vector GetMetaCommunityCommand::setParameters(){ CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","outputType",false,true); parameters.push_back(pshared); CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); + CommandParameter pcalc("calc", "Multiple", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-whittaker-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-hamming-structchi2-gower-memchi2-memchord-memeuclidean-mempearson-jsd-rjsd", "rjsd", "", "", "","",false,false,true); parameters.push_back(pcalc); + CommandParameter psubsample("subsample", "String", "", "", "", "", "","",false,false); parameters.push_back(psubsample); + CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters); CommandParameter pminpartitions("minpartitions", "Number", "", "5", "", "", "","",false,false,true); parameters.push_back(pminpartitions); CommandParameter pmaxpartitions("maxpartitions", "Number", "", "100", "", "", "","",false,false,true); parameters.push_back(pmaxpartitions); CommandParameter poptimizegap("optimizegap", "Number", "", "3", "", "", "","",false,false,true); parameters.push_back(poptimizegap); CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); - + CommandParameter pmethod("method", "Multiple", "dmm-kmeans-pam", "dmm", "", "", "","",false,false,true); parameters.push_back(pmethod); + vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } return myArray; @@ -35,9 +42,13 @@ vector GetMetaCommunityCommand::setParameters(){ string GetMetaCommunityCommand::getHelpString(){ try { string helpString = ""; - helpString += "The get.communitytype command parameters are shared, label, groups, minpartitions, maxpartitions, optimizegap and processors. The shared file is required. \n"; + helpString += "The get.communitytype command parameters are shared, method, label, groups, minpartitions, maxpartitions, optimizegap and processors. The shared file is required. \n"; helpString += "The label parameter is used to analyze specific labels in your input. labels are separated by dashes.\n"; helpString += "The groups parameter allows you to specify which of the groups in your shared file you would like analyzed. Group names are separated by dashes.\n"; + helpString += "The method parameter allows to select the method you would like to use. Options are dmm, kmeans and pam. Default=dmm.\n"; + helpString += "The calc parameter allows to select the calculator you would like to use to calculate the distance matrix used by the pam and kmeans method. By default the rjsd calculator is used.\n"; + helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample while calculating the distance matrix for the pam and kmeans method.\n"; + helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group while calculating the distance matrix for the pam and kmeans methods.\n"; helpString += "The minpartitions parameter is used to .... Default=5.\n"; helpString += "The maxpartitions parameter is used to .... Default=10.\n"; helpString += "The optimizegap parameter is used to .... Default=3.\n"; @@ -55,12 +66,12 @@ string GetMetaCommunityCommand::getOutputPattern(string type) { try { string pattern = ""; - if (type == "fit") { pattern = "[filename],[distance],mix.fit"; } - else if (type == "relabund") { pattern = "[filename],[distance],[tag],mix.relabund"; } - else if (type == "design") { pattern = "[filename],[distance],mix.design"; } - else if (type == "matrix") { pattern = "[filename],[distance],[tag],mix.posterior"; } - else if (type == "parameters") { pattern = "[filename],[distance],mix.parameters"; } - else if (type == "summary") { pattern = "[filename],[distance],mix.summary"; } + if (type == "fit") { pattern = "[filename],[distance],[method],mix.fit"; } + else if (type == "relabund") { pattern = "[filename],[distance],[method],[tag],mix.relabund"; } + else if (type == "design") { pattern = "[filename],[distance],[method],mix.design"; } + else if (type == "matrix") { pattern = "[filename],[distance],[method],[tag],mix.posterior"; } + else if (type == "parameters") { pattern = "[filename],[distance],[method],mix.parameters"; } + else if (type == "summary") { pattern = "[filename],[distance],[method],mix.summary"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } return pattern; @@ -171,6 +182,37 @@ GetMetaCommunityCommand::GetMetaCommunityCommand(string option) { if(label != "all") { m->splitAtDash(label, labels); allLines = 0; } else { allLines = 1; } } + + method = validParameter.validFile(parameters, "method", false); + if (method == "not found") { method = "dmm"; } + + if ((method == "dmm") || (method == "kmeans") || (method == "pam")) { } + else { m->mothurOut("[ERROR]: " + method + " is not a valid method. Valid algorithms are dmm, kmeans and pam."); m->mothurOutEndLine(); abort = true; } + + calc = validParameter.validFile(parameters, "calc", false); + if (calc == "not found") { calc = "rjsd"; } + else { + if (calc == "default") { calc = "rjsd"; } + } + m->splitAtDash(calc, Estimators); + if (m->inUsersGroups("citation", Estimators)) { + ValidCalculators validCalc; validCalc.printCitations(Estimators); + //remove citation from list of calcs + for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") { Estimators.erase(Estimators.begin()+i); break; } } + } + if (Estimators.size() != 1) { abort = true; m->mothurOut("[ERROR]: only one calculator is allowed.\n"); } + + temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; } + m->mothurConvert(temp, iters); + + temp = validParameter.validFile(parameters, "subsample", false); if (temp == "not found") { temp = "F"; } + if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; } + else { + if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later + else { subsample = false; } + } + + if (subsample == false) { iters = 0; } } } @@ -194,6 +236,35 @@ int GetMetaCommunityCommand::execute(){ set processedLabels; set userLabels = labels; + if (subsample) { + if (subsampleSize == -1) { //user has not set size, set size = smallest samples size + subsampleSize = lookup[0]->getNumSeqs(); + for (int i = 1; i < lookup.size(); i++) { + int thisSize = lookup[i]->getNumSeqs(); + + if (thisSize < subsampleSize) { subsampleSize = thisSize; } + } + }else { + m->clearGroups(); + Groups.clear(); + vector temp; + for (int i = 0; i < lookup.size(); i++) { + if (lookup[i]->getNumSeqs() < subsampleSize) { + m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine(); + delete lookup[i]; + }else { + Groups.push_back(lookup[i]->getGroup()); + temp.push_back(lookup[i]); + } + } + lookup = temp; + m->setGroups(Groups); + } + + if (lookup.size() < 2) { m->mothurOut("You have not provided enough valid groups. I cannot run the command."); m->mothurOutEndLine(); m->control_pressed = true; return 0; } + } + + //as long as you are not at the end of the file or done wih the lines you want while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { @@ -295,6 +366,7 @@ int GetMetaCommunityCommand::createProcesses(vector& thislo map variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile)); variables["[distance]"] = thislookup[0]->getLabel(); + variables["[method]"] = method; string outputFileName = getOutputFileName("fit", variables); outputNames.push_back(outputFileName); outputTypes["fit"].push_back(outputFileName); @@ -363,7 +435,12 @@ int GetMetaCommunityCommand::createProcesses(vector& thislo } //do my part - m->mothurOut("K\tNLE\t\tlogDet\tBIC\t\tAIC\t\tLaplace\n"); + if (method == "dmm") { m->mothurOut("K\tNLE\t\tlogDet\tBIC\t\tAIC\t\tLaplace\n"); } + else { + m->mothurOut("K\tCH\t"); + for (int i = 0; i < thislookup.size(); i++) { m->mothurOut(thislookup[i]->getGroup() + '\t'); } + m->mothurOut("\n"); + } minPartition = processDriver(thislookup, dividedPartitions[0], outputFileName, rels[0], matrix[0], doneFlags, 0); //force parent to wait until all the processes are done @@ -437,56 +514,6 @@ int GetMetaCommunityCommand::createProcesses(vector& thislo } #else - /* - vector pDataArray; - DWORD dwThreadIdArray[processors-1]; - HANDLE hThreadArray[processors-1]; - - //Create processor worker threads. - for( int i=1; i newLookup; - for (int k = 0; k < thislookup.size(); k++) { - SharedRAbundVector* temp = new SharedRAbundVector(); - temp->setLabel(thislookup[k]->getLabel()); - temp->setGroup(thislookup[k]->getGroup()); - newLookup.push_back(temp); - } - - //for each bin - for (int k = 0; k < thislookup[0]->getNumBins(); k++) { - if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; } - for (int j = 0; j < thislookup.size(); j++) { newLookup[j]->push_back(thislookup[j]->getAbundance(k), thislookup[j]->getGroup()); } - } - - processIDS.push_back(i); - - // Allocate memory for thread data. - metaCommunityData* tempMeta = new metaCommunityData(newLookup, m, dividedPartitions[i], outputFileName + toString(i), rels[i], matrix[i], minpartitions, maxpartitions, optimizegap); - pDataArray.push_back(tempMeta); - - hThreadArray[i] = CreateThread(NULL, 0, MyMetaCommunityThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]); - } - - //do my part - minPartition = processDriver(thislookup, dividedPartitions[0], outputFileName, rels[0], matrix[0]); - - //Wait until all threads have terminated. - WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE); - - //Close all thread handles and free memory allocations. - for(int i=0; i < pDataArray.size(); i++){ - if (pDataArray[i]->minPartition < minPartition) { minPartition = pDataArray[i]->minPartition; } - for (int j = 0; j < pDataArray[i]->outputNames.size(); j++) { - outputNames.push_back(pDataArray[i]->outputNames[j]); - } - m->appendFilesWithoutHeaders(outputFileName + toString(processIDS[i]), outputFileName); - m->mothurRemove(outputFileName + toString(processIDS[i])); - CloseHandle(hThreadArray[i]); - delete pDataArray[i]; - } */ - //do my part m->mothurOut("K\tNLE\t\tlogDet\tBIC\t\tAIC\t\tLaplace\n"); minPartition = processDriver(thislookup, dividedPartitions[0], outputFileName, rels[0], matrix[0], doneFlags, 0); #endif @@ -501,7 +528,8 @@ int GetMetaCommunityCommand::createProcesses(vector& thislo //run generate Summary function for smallest minPartition variables["[tag]"] = toString(minPartition); - generateSummaryFile(minPartition, variables); + vector piValues = generateDesignFile(minPartition, variables); + if (method == "dmm") { generateSummaryFile(minPartition, variables, piValues); } //pam doesn't make a relabund file return 0; @@ -516,12 +544,25 @@ int GetMetaCommunityCommand::processDriver(vector& thislook try { double minLaplace = 1e10; - int minPartition = 0; + int minPartition = 1; + vector minSilhouettes; minSilhouettes.resize(thislookup.size(), 0); + + ofstream fitData, silData; + if (method == "dmm") { + m->openOutputFile(outputFileName, fitData); + fitData.setf(ios::fixed, ios::floatfield); + fitData.setf(ios::showpoint); + fitData << "K\tNLE\tlogDet\tBIC\tAIC\tLaplace" << endl; + }else if((method == "pam") || (method == "kmeans")) { //because ch is looking of maximal value + minLaplace = 0; + m->openOutputFile(outputFileName, silData); + silData.setf(ios::fixed, ios::floatfield); + silData.setf(ios::showpoint); + silData << "K\tCH\t"; + for (int i = 0; i < thislookup.size(); i++) { silData << thislookup[i]->getGroup() << '\t'; } + silData << endl; + } - ofstream fitData; - m->openOutputFile(outputFileName, fitData); - fitData.setf(ios::fixed, ios::floatfield); - fitData.setf(ios::showpoint); cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint); @@ -529,7 +570,18 @@ int GetMetaCommunityCommand::processDriver(vector& thislook vector thisGroups; for (int i = 0; i < thislookup.size(); i++) { sharedMatrix.push_back(thislookup[i]->getAbundances()); thisGroups.push_back(thislookup[i]->getGroup()); } - fitData << "K\tNLE\tlogDet\tBIC\tAIC\tLaplace" << endl; + vector< vector > dists; //do we want to output this matrix?? + if ((method == "pam") || (method == "kmeans")) { dists = generateDistanceMatrix(thislookup); } + + if (m->debug) { + m->mothurOut("[DEBUG]: dists = \n"); + for (int i = 0; i < dists.size(); i++) { + if (m->control_pressed) { break; } + m->mothurOut("[DEBUG]: i = " + toString(i) + '\t'); + for (int j = 0; j < i; j++) { m->mothurOut(toString(dists[i][j]) +"\t"); } + m->mothurOut("\n"); + } + } for(int i=0;i& thislook } } - qFinderDMM findQ(sharedMatrix, numPartitions); - - double laplace = findQ.getLaplace(); - m->mothurOut(toString(numPartitions) + '\t'); - cout << setprecision (2) << findQ.getNLL() << '\t' << findQ.getLogDet() << '\t'; - m->mothurOutJustToLog(toString(findQ.getNLL()) + '\t' + toString(findQ.getLogDet()) + '\t'); - cout << findQ.getBIC() << '\t' << findQ.getAIC() << '\t' << laplace; - m->mothurOutJustToLog(toString(findQ.getBIC()) + '\t' + toString(findQ.getAIC()) + '\t' + toString(laplace)); - - fitData << numPartitions << '\t'; - fitData << setprecision (2) << findQ.getNLL() << '\t' << findQ.getLogDet() << '\t'; - fitData << findQ.getBIC() << '\t' << findQ.getAIC() << '\t' << laplace << endl; - - if(laplace < minLaplace){ - minPartition = numPartitions; - minLaplace = laplace; - m->mothurOut("***"); + CommunityTypeFinder* finder = NULL; + if (method == "dmm") { finder = new qFinderDMM(sharedMatrix, numPartitions); } + else if (method == "kmeans") { finder = new KMeans(sharedMatrix, numPartitions); } + else if (method == "pam") { finder = new Pam(sharedMatrix, dists, numPartitions); } + else { + if (i == 0) { m->mothurOut(method + " is not a valid method option. I will run the command using dmm.\n"); } + finder = new qFinderDMM(sharedMatrix, numPartitions); } - m->mothurOutEndLine(); string relabund = relabunds[i]; - outputNames.push_back(relabund); outputTypes["relabund"].push_back(relabund); string matrixName = matrix[i]; outputNames.push_back(matrixName); outputTypes["matrix"].push_back(matrixName); - findQ.printZMatrix(matrixName, thisGroups); - findQ.printRelAbund(relabund, m->currentBinLabels); + finder->printZMatrix(matrixName, thisGroups); + + double chi; vector silhouettes; + if (method == "dmm") { + double laplace = finder->getLaplace(); + if(laplace < minLaplace){ + minPartition = numPartitions; + minLaplace = laplace; + } + }else { + chi = finder->calcCHIndex(dists); + silhouettes = finder->calcSilhouettes(dists); + if (chi > minLaplace) { //save partition with maximum ch index score + minPartition = numPartitions; + minLaplace = chi; + minSilhouettes = silhouettes; + } + } + + if (method == "dmm") { + finder->printFitData(cout, minLaplace); + finder->printFitData(fitData); + finder->printRelAbund(relabund, m->currentSharedBinLabels); + outputNames.push_back(relabund); outputTypes["relabund"].push_back(relabund); + }else if ((method == "pam") || (method == "kmeans")) { //print silouettes and ch values + finder->printSilData(cout, chi, silhouettes); + finder->printSilData(silData, chi, silhouettes); + if (method == "kmeans") { + finder->printRelAbund(relabund, m->currentSharedBinLabels); + outputNames.push_back(relabund); outputTypes["relabund"].push_back(relabund); + } + } + delete finder; if(optimizegap != -1 && (numPartitions - minPartition) >= optimizegap && numPartitions >= minpartitions){ string tempDoneFile = m->getRootName(m->getSimpleName(sharedfile)) + toString(processID) + ".done.temp"; @@ -588,9 +659,7 @@ int GetMetaCommunityCommand::processDriver(vector& thislook break; } } - fitData.close(); - - //minPartition = 4; + if (method == "dmm") { fitData.close(); } if (m->control_pressed) { return 0; } @@ -672,7 +741,7 @@ vector GetMetaCommunityCommand::generateDesignFile(int numPartitions, ma inline bool summaryFunction(summaryData i, summaryData j){ return i.difference > j.difference; } /**************************************************************************************************/ -int GetMetaCommunityCommand::generateSummaryFile(int numPartitions, map v){ +int GetMetaCommunityCommand::generateSummaryFile(int numPartitions, map v, vector piValues){ try { vector summary; @@ -683,13 +752,11 @@ int GetMetaCommunityCommand::generateSummaryFile(int numPartitions, map piValues = generateDesignFile(numPartitions, v); - ifstream referenceFile; map variables; variables["[filename]"] = v["[filename]"]; variables["[distance]"] = v["[distance]"]; + variables["[method]"] = method; variables["[tag]"] = "1"; string reference = getOutputFileName("relabund", variables); m->openInputFile(reference, referenceFile); //((fileRoot + label + ".1mix.relabund").c_str()); @@ -808,4 +875,238 @@ int GetMetaCommunityCommand::generateSummaryFile(int numPartitions, map > GetMetaCommunityCommand::generateDistanceMatrix(vector& thisLookup){ + try { + vector > results; + + Calculator* matrixCalculator; + ValidCalculators validCalculator; + int i = 0; + + if (validCalculator.isValidCalculator("matrix", Estimators[i]) == true) { + if (Estimators[i] == "sharedsobs") { + matrixCalculator = new SharedSobsCS(); + }else if (Estimators[i] == "sharedchao") { + matrixCalculator = new SharedChao1(); + }else if (Estimators[i] == "sharedace") { + matrixCalculator = new SharedAce(); + }else if (Estimators[i] == "jabund") { + matrixCalculator = new JAbund(); + }else if (Estimators[i] == "sorabund") { + matrixCalculator = new SorAbund(); + }else if (Estimators[i] == "jclass") { + matrixCalculator = new Jclass(); + }else if (Estimators[i] == "sorclass") { + matrixCalculator = new SorClass(); + }else if (Estimators[i] == "jest") { + matrixCalculator = new Jest(); + }else if (Estimators[i] == "sorest") { + matrixCalculator = new SorEst(); + }else if (Estimators[i] == "thetayc") { + matrixCalculator = new ThetaYC(); + }else if (Estimators[i] == "thetan") { + matrixCalculator = new ThetaN(); + }else if (Estimators[i] == "kstest") { + matrixCalculator = new KSTest(); + }else if (Estimators[i] == "sharednseqs") { + matrixCalculator = new SharedNSeqs(); + }else if (Estimators[i] == "ochiai") { + matrixCalculator = new Ochiai(); + }else if (Estimators[i] == "anderberg") { + matrixCalculator = new Anderberg(); + }else if (Estimators[i] == "kulczynski") { + matrixCalculator = new Kulczynski(); + }else if (Estimators[i] == "kulczynskicody") { + matrixCalculator = new KulczynskiCody(); + }else if (Estimators[i] == "lennon") { + matrixCalculator = new Lennon(); + }else if (Estimators[i] == "morisitahorn") { + matrixCalculator = new MorHorn(); + }else if (Estimators[i] == "braycurtis") { + matrixCalculator = new BrayCurtis(); + }else if (Estimators[i] == "whittaker") { + matrixCalculator = new Whittaker(); + }else if (Estimators[i] == "odum") { + matrixCalculator = new Odum(); + }else if (Estimators[i] == "canberra") { + matrixCalculator = new Canberra(); + }else if (Estimators[i] == "structeuclidean") { + matrixCalculator = new StructEuclidean(); + }else if (Estimators[i] == "structchord") { + matrixCalculator = new StructChord(); + }else if (Estimators[i] == "hellinger") { + matrixCalculator = new Hellinger(); + }else if (Estimators[i] == "manhattan") { + matrixCalculator = new Manhattan(); + }else if (Estimators[i] == "structpearson") { + matrixCalculator = new StructPearson(); + }else if (Estimators[i] == "soergel") { + matrixCalculator = new Soergel(); + }else if (Estimators[i] == "spearman") { + matrixCalculator = new Spearman(); + }else if (Estimators[i] == "structkulczynski") { + matrixCalculator = new StructKulczynski(); + }else if (Estimators[i] == "speciesprofile") { + matrixCalculator = new SpeciesProfile(); + }else if (Estimators[i] == "hamming") { + matrixCalculator = new Hamming(); + }else if (Estimators[i] == "structchi2") { + matrixCalculator = new StructChi2(); + }else if (Estimators[i] == "gower") { + matrixCalculator = new Gower(); + }else if (Estimators[i] == "memchi2") { + matrixCalculator = new MemChi2(); + }else if (Estimators[i] == "memchord") { + matrixCalculator = new MemChord(); + }else if (Estimators[i] == "memeuclidean") { + matrixCalculator = new MemEuclidean(); + }else if (Estimators[i] == "mempearson") { + matrixCalculator = new MemPearson(); + }else if (Estimators[i] == "jsd") { + matrixCalculator = new JSD(); + }else if (Estimators[i] == "rjsd") { + matrixCalculator = new RJSD(); + }else { + m->mothurOut("[ERROR]: " + Estimators[i] + " is not a valid calculator, please correct.\n"); m->control_pressed = true; return results; + } + } + + //calc distances + vector< vector< vector > > calcDistsTotals; //each iter, then each groupCombos dists. this will be used to make .dist files + vector< vector > calcDists; calcDists.resize(1); + + for (int thisIter = 0; thisIter < iters+1; thisIter++) { + + vector thisItersLookup = thisLookup; + + if (subsample && (thisIter != 0)) { + SubSample sample; + vector tempLabels; //dont need since we arent printing the sampled sharedRabunds + + //make copy of lookup so we don't get access violations + vector newLookup; + for (int k = 0; k < thisItersLookup.size(); k++) { + SharedRAbundVector* temp = new SharedRAbundVector(); + temp->setLabel(thisItersLookup[k]->getLabel()); + temp->setGroup(thisItersLookup[k]->getGroup()); + newLookup.push_back(temp); + } + + //for each bin + for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) { + if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return results; } + for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); } + } + + tempLabels = sample.getSample(newLookup, subsampleSize); + thisItersLookup = newLookup; + } + + + driver(thisItersLookup, calcDists, matrixCalculator); + + if (subsample && (thisIter != 0)) { + if((thisIter) % 100 == 0){ m->mothurOutJustToScreen(toString(thisIter)+"\n"); } + calcDistsTotals.push_back(calcDists); + for (int i = 0; i < calcDists.size(); i++) { + for (int j = 0; j < calcDists[i].size(); j++) { + if (m->debug) { m->mothurOut("[DEBUG]: Results: iter = " + toString(thisIter) + ", " + thisLookup[calcDists[i][j].seq1]->getGroup() + " - " + thisLookup[calcDists[i][j].seq2]->getGroup() + " distance = " + toString(calcDists[i][j].dist) + ".\n"); } + } + } + //clean up memory + for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; } + thisItersLookup.clear(); + }else { //print results for whole dataset + for (int i = 0; i < calcDists.size(); i++) { + if (m->control_pressed) { break; } + + //initialize matrix + results.resize(thisLookup.size()); + for (int k = 0; k < thisLookup.size(); k++) { results[k].resize(thisLookup.size(), 0.0); } + + for (int j = 0; j < calcDists[i].size(); j++) { + int row = calcDists[i][j].seq1; + int column = calcDists[i][j].seq2; + double dist = calcDists[i][j].dist; + + results[row][column] = dist; + results[column][row] = dist; + } + } + } + for (int i = 0; i < calcDists.size(); i++) { calcDists[i].clear(); } + } + + if (iters != 0) { + //we need to find the average distance and standard deviation for each groups distance + vector< vector > calcAverages = m->getAverages(calcDistsTotals, "average"); + + //print results + for (int i = 0; i < calcDists.size(); i++) { + results.resize(thisLookup.size()); + for (int k = 0; k < thisLookup.size(); k++) { results[k].resize(thisLookup.size(), 0.0); } + + for (int j = 0; j < calcAverages[i].size(); j++) { + int row = calcAverages[i][j].seq1; + int column = calcAverages[i][j].seq2; + float dist = calcAverages[i][j].dist; + + results[row][column] = dist; + results[column][row] = dist; + } + } + } + + + return results; + } + catch(exception& e) { + m->errorOut(e, "GetMetaCommunityCommand", "generateDistanceMatrix"); + exit(1); + } +} +/**************************************************************************************************/ +int GetMetaCommunityCommand::driver(vector thisLookup, vector< vector >& calcDists, Calculator* matrixCalculator) { + try { + vector subset; + + for (int k = 0; k < thisLookup.size(); k++) { // pass cdd each set of groups to compare + + for (int l = 0; l < k; l++) { + + if (k != l) { //we dont need to similiarity of a groups to itself + subset.clear(); //clear out old pair of sharedrabunds + //add new pair of sharedrabunds + subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]); + + + + //if this calc needs all groups to calculate the pair load all groups + if (matrixCalculator->getNeedsAll()) { + //load subset with rest of lookup for those calcs that need everyone to calc for a pair + for (int w = 0; w < thisLookup.size(); w++) { + if ((w != k) && (w != l)) { subset.push_back(thisLookup[w]); } + } + } + + vector tempdata = matrixCalculator->getValues(subset); //saves the calculator outputs + + if (m->control_pressed) { return 1; } + + seqDist temp(l, k, tempdata[0]); + //cout << l << '\t' << k << '\t' << tempdata[0] << endl; + calcDists[0].push_back(temp); + } + + } + } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "MatrixOutputCommand", "driver"); + exit(1); + } +} +//**********************************************************************************************************************