2 // getmetacommunitycommand.cpp
5 // Created by SarahsWork on 4/9/13.
6 // Copyright (c) 2013 Schloss Lab. All rights reserved.
9 #include "getmetacommunitycommand.h"
10 #include "communitytype.h"
12 #include "validcalculator.h"
13 #include "subsample.h"
15 //**********************************************************************************************************************
16 vector<string> GetMetaCommunityCommand::setParameters(){
18 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","outputType",false,true); parameters.push_back(pshared);
19 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
20 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
21 CommandParameter pcalc("calc", "Multiple", "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan-kstest-sharednseqs-ochiai-anderberg-kulczynski-kulczynskicody-lennon-morisitahorn-braycurtis-whittaker-odum-canberra-structeuclidean-structchord-hellinger-manhattan-structpearson-soergel-spearman-structkulczynski-speciesprofile-hamming-structchi2-gower-memchi2-memchord-memeuclidean-mempearson-jsd-rjsd", "rjsd", "", "", "","",false,false,true); parameters.push_back(pcalc);
22 CommandParameter psubsample("subsample", "String", "", "", "", "", "","",false,false); parameters.push_back(psubsample);
23 CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters);
24 CommandParameter pminpartitions("minpartitions", "Number", "", "5", "", "", "","",false,false,true); parameters.push_back(pminpartitions);
25 CommandParameter pmaxpartitions("maxpartitions", "Number", "", "100", "", "", "","",false,false,true); parameters.push_back(pmaxpartitions);
26 CommandParameter poptimizegap("optimizegap", "Number", "", "3", "", "", "","",false,false,true); parameters.push_back(poptimizegap);
27 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
28 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
29 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
30 CommandParameter pmethod("method", "Multiple", "dmm-kmeans-pam", "dmm", "", "", "","",false,false,true); parameters.push_back(pmethod);
32 vector<string> myArray;
33 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
37 m->errorOut(e, "NewCommand", "setParameters");
41 //**********************************************************************************************************************
42 string GetMetaCommunityCommand::getHelpString(){
44 string helpString = "";
45 helpString += "The get.communitytype command parameters are shared, method, label, groups, minpartitions, maxpartitions, optimizegap and processors. The shared file is required. \n";
46 helpString += "The label parameter is used to analyze specific labels in your input. labels are separated by dashes.\n";
47 helpString += "The groups parameter allows you to specify which of the groups in your shared file you would like analyzed. Group names are separated by dashes.\n";
48 helpString += "The method parameter allows to select the method you would like to use. Options are dmm, kmeans and pam. Default=dmm.\n";
49 helpString += "The calc parameter allows to select the calculator you would like to use to calculate the distance matrix used by the pam and kmeans method. By default the rjsd calculator is used.\n";
50 helpString += "The iters parameter allows you to choose the number of times you would like to run the subsample while calculating the distance matrix for the pam and kmeans method.\n";
51 helpString += "The subsample parameter allows you to enter the size pergroup of the sample or you can set subsample=T and mothur will use the size of your smallest group while calculating the distance matrix for the pam and kmeans methods.\n";
52 helpString += "The minpartitions parameter is used to .... Default=5.\n";
53 helpString += "The maxpartitions parameter is used to .... Default=10.\n";
54 helpString += "The optimizegap parameter is used to .... Default=3.\n";
55 helpString += "The processors parameter allows you to specify number of processors to use. The default is 1.\n";
56 helpString += "The get.communitytype command should be in the following format: get.communitytype(shared=yourSharedFile).\n";
60 m->errorOut(e, "GetMetaCommunityCommand", "getHelpString");
64 //**********************************************************************************************************************
65 string GetMetaCommunityCommand::getOutputPattern(string type) {
69 if (type == "fit") { pattern = "[filename],[distance],[method],mix.fit"; }
70 else if (type == "relabund") { pattern = "[filename],[distance],[method],[tag],mix.relabund"; }
71 else if (type == "design") { pattern = "[filename],[distance],[method],mix.design"; }
72 else if (type == "matrix") { pattern = "[filename],[distance],[method],[tag],mix.posterior"; }
73 else if (type == "parameters") { pattern = "[filename],[distance],[method],mix.parameters"; }
74 else if (type == "summary") { pattern = "[filename],[distance],[method],mix.summary"; }
75 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
80 m->errorOut(e, "GetMetaCommunityCommand", "getOutputPattern");
84 //**********************************************************************************************************************
85 GetMetaCommunityCommand::GetMetaCommunityCommand(){
87 abort = true; calledHelp = true;
89 vector<string> tempOutNames;
90 outputTypes["fit"] = tempOutNames;
91 outputTypes["relabund"] = tempOutNames;
92 outputTypes["matrix"] = tempOutNames;
93 outputTypes["design"] = tempOutNames;
94 outputTypes["parameters"] = tempOutNames;
95 outputTypes["summary"] = tempOutNames;
98 m->errorOut(e, "GetMetaCommunityCommand", "GetMetaCommunityCommand");
102 //**********************************************************************************************************************
103 GetMetaCommunityCommand::GetMetaCommunityCommand(string option) {
105 abort = false; calledHelp = false;
108 //allow user to run help
109 if(option == "help") { help(); abort = true; calledHelp = true; }
110 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
113 //valid paramters for this command
114 vector<string> myArray = setParameters();
116 OptionParser parser(option);
117 map<string,string> parameters = parser.getParameters();
119 ValidParameters validParameter;
120 map<string,string>::iterator it;
121 //check to make sure all parameters are valid for command
122 for (it = parameters.begin(); it != parameters.end(); it++) {
123 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
126 vector<string> tempOutNames;
127 outputTypes["fit"] = tempOutNames;
128 outputTypes["relabund"] = tempOutNames;
129 outputTypes["matrix"] = tempOutNames;
130 outputTypes["design"] = tempOutNames;
131 outputTypes["parameters"] = tempOutNames;
132 outputTypes["summary"] = tempOutNames;
134 //if the user changes the input directory command factory will send this info to us in the output parameter
135 string inputDir = validParameter.validFile(parameters, "inputdir", false);
136 if (inputDir == "not found"){ inputDir = ""; }
139 it = parameters.find("shared");
140 if(it != parameters.end()){
141 path = m->hasPath(it->second);
142 if (path == "") { parameters["shared"] = inputDir + it->second; }
146 //get shared file, it is required
147 sharedfile = validParameter.validFile(parameters, "shared", true);
148 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
149 else if (sharedfile == "not found") {
150 //if there is a current shared file, use it
151 sharedfile = m->getSharedFile();
152 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
153 else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
154 }else { m->setSharedFile(sharedfile); }
156 //if the user changes the output directory command factory will send this info to us in the output parameter
157 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
158 outputDir = m->hasPath(sharedfile); //if user entered a file with a path then preserve it
161 string temp = validParameter.validFile(parameters, "minpartitions", false); if (temp == "not found"){ temp = "5"; }
162 m->mothurConvert(temp, minpartitions);
164 temp = validParameter.validFile(parameters, "maxpartitions", false); if (temp == "not found"){ temp = "10"; }
165 m->mothurConvert(temp, maxpartitions);
167 temp = validParameter.validFile(parameters, "optimizegap", false); if (temp == "not found"){ temp = "3"; }
168 m->mothurConvert(temp, optimizegap);
170 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
171 m->setProcessors(temp);
172 m->mothurConvert(temp, processors);
174 string groups = validParameter.validFile(parameters, "groups", false);
175 if (groups == "not found") { groups = ""; }
176 else { m->splitAtDash(groups, Groups); }
177 m->setGroups(Groups);
179 string label = validParameter.validFile(parameters, "label", false);
180 if (label == "not found") { label = ""; }
182 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
183 else { allLines = 1; }
186 method = validParameter.validFile(parameters, "method", false);
187 if (method == "not found") { method = "dmm"; }
189 if ((method == "dmm") || (method == "kmeans") || (method == "pam")) { }
190 else { m->mothurOut("[ERROR]: " + method + " is not a valid method. Valid algorithms are dmm, kmeans and pam."); m->mothurOutEndLine(); abort = true; }
192 calc = validParameter.validFile(parameters, "calc", false);
193 if (calc == "not found") { calc = "rjsd"; }
195 if (calc == "default") { calc = "rjsd"; }
197 m->splitAtDash(calc, Estimators);
198 if (m->inUsersGroups("citation", Estimators)) {
199 ValidCalculators validCalc; validCalc.printCitations(Estimators);
200 //remove citation from list of calcs
201 for (int i = 0; i < Estimators.size(); i++) { if (Estimators[i] == "citation") { Estimators.erase(Estimators.begin()+i); break; } }
203 if (Estimators.size() != 1) { abort = true; m->mothurOut("[ERROR]: only one calculator is allowed.\n"); }
205 temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
206 m->mothurConvert(temp, iters);
208 temp = validParameter.validFile(parameters, "subsample", false); if (temp == "not found") { temp = "F"; }
209 if (m->isNumeric1(temp)) { m->mothurConvert(temp, subsampleSize); subsample = true; }
211 if (m->isTrue(temp)) { subsample = true; subsampleSize = -1; } //we will set it to smallest group later
212 else { subsample = false; }
215 if (subsample == false) { iters = 0; }
219 catch(exception& e) {
220 m->errorOut(e, "GetMetaCommunityCommand", "GetMetaCommunityCommand");
224 //**********************************************************************************************************************
226 int GetMetaCommunityCommand::execute(){
229 if (abort == true) { if (calledHelp) { return 0; } return 2; }
231 InputData input(sharedfile, "sharedfile");
232 vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
233 string lastLabel = lookup[0]->getLabel();
235 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
236 set<string> processedLabels;
237 set<string> userLabels = labels;
240 if (subsampleSize == -1) { //user has not set size, set size = smallest samples size
241 subsampleSize = lookup[0]->getNumSeqs();
242 for (int i = 1; i < lookup.size(); i++) {
243 int thisSize = lookup[i]->getNumSeqs();
245 if (thisSize < subsampleSize) { subsampleSize = thisSize; }
250 vector<SharedRAbundVector*> temp;
251 for (int i = 0; i < lookup.size(); i++) {
252 if (lookup[i]->getNumSeqs() < subsampleSize) {
253 m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine();
256 Groups.push_back(lookup[i]->getGroup());
257 temp.push_back(lookup[i]);
261 m->setGroups(Groups);
264 if (lookup.size() < 2) { m->mothurOut("You have not provided enough valid groups. I cannot run the command."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
268 //as long as you are not at the end of the file or done wih the lines you want
269 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
271 if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; }
273 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
275 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
277 createProcesses(lookup);
279 processedLabels.insert(lookup[0]->getLabel());
280 userLabels.erase(lookup[0]->getLabel());
283 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
284 string saveLabel = lookup[0]->getLabel();
286 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
287 lookup = input.getSharedRAbundVectors(lastLabel);
288 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
290 createProcesses(lookup);
292 processedLabels.insert(lookup[0]->getLabel());
293 userLabels.erase(lookup[0]->getLabel());
295 //restore real lastlabel to save below
296 lookup[0]->setLabel(saveLabel);
299 lastLabel = lookup[0]->getLabel();
300 //prevent memory leak
301 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
303 if (m->control_pressed) { return 0; }
305 //get next line to process
306 lookup = input.getSharedRAbundVectors();
309 if (m->control_pressed) { return 0; }
311 //output error messages about any remaining user labels
312 set<string>::iterator it;
313 bool needToRun = false;
314 for (it = userLabels.begin(); it != userLabels.end(); it++) {
315 m->mothurOut("Your file does not include the label " + *it);
316 if (processedLabels.count(lastLabel) != 1) {
317 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
320 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
324 //run last label if you need to
325 if (needToRun == true) {
326 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
327 lookup = input.getSharedRAbundVectors(lastLabel);
329 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
331 createProcesses(lookup);
333 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
336 //output files created by command
337 m->mothurOutEndLine();
338 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
339 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
340 m->mothurOutEndLine();
344 catch(exception& e) {
345 m->errorOut(e, "GetMetaCommunityCommand", "execute");
349 //**********************************************************************************************************************
350 int GetMetaCommunityCommand::createProcesses(vector<SharedRAbundVector*>& thislookup){
353 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
355 processors=1; //qFinderDMM not thread safe
358 vector<int> processIDS;
361 int minPartition = 0;
364 if (maxpartitions < processors) { processors = maxpartitions; }
366 map<string, string> variables;
367 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
368 variables["[distance]"] = thislookup[0]->getLabel();
369 variables["[method]"] = method;
370 string outputFileName = getOutputFileName("fit", variables);
371 outputNames.push_back(outputFileName); outputTypes["fit"].push_back(outputFileName);
373 //divide the partitions between the processors
374 vector< vector<int> > dividedPartitions;
375 vector< vector<string> > rels, matrix;
376 vector<string> doneFlags;
377 dividedPartitions.resize(processors);
378 rels.resize(processors);
379 matrix.resize(processors);
381 //for each file group figure out which process will complete it
382 //want to divide the load intelligently so the big files are spread between processes
383 for (int i=1; i<=maxpartitions; i++) {
385 int processToAssign = (i+1) % processors;
386 if (processToAssign == 0) { processToAssign = processors; }
388 if (m->debug) { m->mothurOut("[DEBUG]: assigning " + toString(i) + " to process " + toString(processToAssign-1) + "\n"); }
389 dividedPartitions[(processToAssign-1)].push_back(i);
391 variables["[tag]"] = toString(i);
392 string relName = getOutputFileName("relabund", variables);
393 string mName = getOutputFileName("matrix", variables);
394 rels[(processToAssign-1)].push_back(relName);
395 matrix[(processToAssign-1)].push_back(mName);
398 for (int i = 0; i < processors; i++) { //read from everyone elses, just write to yours
399 string tempDoneFile = m->getRootName(m->getSimpleName(sharedfile)) + toString(i) + ".done.temp";
400 doneFlags.push_back(tempDoneFile);
402 m->openOutputFile(tempDoneFile, out); //clear out
407 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
409 //loop through and create all the processes you want
410 while (process != processors) {
414 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
418 num = processDriver(thislookup, dividedPartitions[process], (outputFileName + toString(getpid())), rels[process], matrix[process], doneFlags, process);
420 //pass numSeqs to parent
422 string tempFile = toString(getpid()) + ".outputNames.temp";
423 m->openOutputFile(tempFile, out);
425 out << outputNames.size() << endl;
426 for (int i = 0; i < outputNames.size(); i++) { out << outputNames[i] << endl; }
431 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
432 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
438 if (method == "dmm") { m->mothurOut("K\tNLE\t\tlogDet\tBIC\t\tAIC\t\tLaplace\n"); }
440 m->mothurOut("K\tCH\t");
441 for (int i = 0; i < thislookup.size(); i++) { m->mothurOut(thislookup[i]->getGroup() + '\t'); }
444 minPartition = processDriver(thislookup, dividedPartitions[0], outputFileName, rels[0], matrix[0], doneFlags, 0);
446 //force parent to wait until all the processes are done
447 for (int i=0;i<processIDS.size();i++) {
448 int temp = processIDS[i];
452 vector<string> tempOutputNames = outputNames;
453 for (int i=0;i<processIDS.size();i++) {
455 string tempFile = toString(processIDS[i]) + ".outputNames.temp";
456 m->openInputFile(tempFile, in);
459 in >> tempNum; m->gobble(in);
460 if (tempNum < minPartition) { minPartition = tempNum; }
461 in >> tempNum; m->gobble(in);
462 for (int i = 0; i < tempNum; i++) {
463 string tempName = "";
464 in >> tempName; m->gobble(in);
465 tempOutputNames.push_back(tempName);
468 in.close(); m->mothurRemove(tempFile);
470 m->appendFilesWithoutHeaders(outputFileName + toString(processIDS[i]), outputFileName);
471 m->mothurRemove(outputFileName + toString(processIDS[i]));
474 if (processors > 1) {
476 for (int i = 0; i < tempOutputNames.size(); i++) { //remove files if needed
477 string name = tempOutputNames[i];
478 vector<string> parts;
479 m->splitAtChar(name, parts, '.');
481 if (((parts[parts.size()-1] == "relabund") || (parts[parts.size()-1] == "posterior")) && (parts[parts.size()-2] == "mix")) {
482 string tempNum = parts[parts.size()-3];
483 int num; m->mothurConvert(tempNum, num);
484 //if (num > minPartition) {
485 // m->mothurRemove(tempOutputNames[i]);
486 // keep = false; if (m->debug) { m->mothurOut("[DEBUG]: removing " + tempOutputNames[i] + ".\n"); }
489 if (keep) { outputNames.push_back(tempOutputNames[i]); }
494 m->openInputFile(outputFileName, in);
495 string headers = m->getline(in); m->gobble(in);
497 map<int, string> file;
499 string numString, line;
501 in >> numString; line = m->getline(in); m->gobble(in);
502 m->mothurConvert(numString, num);
507 m->openOutputFile(outputFileName, out);
508 out << headers << endl;
509 for (map<int, string>::iterator it = file.begin(); it != file.end(); it++) {
510 out << it->first << '\t' << it->second << endl;
511 if (m->debug) { m->mothurOut("[DEBUG]: printing: " + toString(it->first) + '\t' + it->second + ".\n"); }
517 m->mothurOut("K\tNLE\t\tlogDet\tBIC\t\tAIC\t\tLaplace\n");
518 minPartition = processDriver(thislookup, dividedPartitions[0], outputFileName, rels[0], matrix[0], doneFlags, 0);
520 for (int i = 0; i < processors; i++) { //read from everyone elses, just write to yours
521 string tempDoneFile = m->getRootName(m->getSimpleName(sharedfile)) + toString(i) + ".done.temp";
522 m->mothurRemove(tempDoneFile);
525 if (m->control_pressed) { return 0; }
527 if (m->debug) { m->mothurOut("[DEBUG]: minPartition = " + toString(minPartition) + "\n"); }
529 //run generate Summary function for smallest minPartition
530 variables["[tag]"] = toString(minPartition);
531 vector<double> piValues = generateDesignFile(minPartition, variables);
532 if (method == "dmm") { generateSummaryFile(minPartition, variables, piValues); } //pam doesn't make a relabund file
537 catch(exception& e) {
538 m->errorOut(e, "GetMetaCommunityCommand", "createProcesses");
542 //**********************************************************************************************************************
543 int GetMetaCommunityCommand::processDriver(vector<SharedRAbundVector*>& thislookup, vector<int>& parts, string outputFileName, vector<string> relabunds, vector<string> matrix, vector<string> doneFlags, int processID){
546 double minLaplace = 1e10;
547 int minPartition = 1;
548 vector<double> minSilhouettes; minSilhouettes.resize(thislookup.size(), 0);
550 ofstream fitData, silData;
551 if (method == "dmm") {
552 m->openOutputFile(outputFileName, fitData);
553 fitData.setf(ios::fixed, ios::floatfield);
554 fitData.setf(ios::showpoint);
555 fitData << "K\tNLE\tlogDet\tBIC\tAIC\tLaplace" << endl;
556 }else if((method == "pam") || (method == "kmeans")) { //because ch is looking of maximal value
558 m->openOutputFile(outputFileName, silData);
559 silData.setf(ios::fixed, ios::floatfield);
560 silData.setf(ios::showpoint);
561 silData << "K\tCH\t";
562 for (int i = 0; i < thislookup.size(); i++) { silData << thislookup[i]->getGroup() << '\t'; }
566 cout.setf(ios::fixed, ios::floatfield);
567 cout.setf(ios::showpoint);
569 vector< vector<int> > sharedMatrix;
570 vector<string> thisGroups;
571 for (int i = 0; i < thislookup.size(); i++) { sharedMatrix.push_back(thislookup[i]->getAbundances()); thisGroups.push_back(thislookup[i]->getGroup()); }
573 vector< vector<double> > dists; //do we want to output this matrix??
574 if ((method == "pam") || (method == "kmeans")) { dists = generateDistanceMatrix(thislookup); }
577 m->mothurOut("[DEBUG]: dists = \n");
578 for (int i = 0; i < dists.size(); i++) {
579 if (m->control_pressed) { break; }
580 m->mothurOut("[DEBUG]: i = " + toString(i) + '\t');
581 for (int j = 0; j < i; j++) { m->mothurOut(toString(dists[i][j]) +"\t"); }
586 for(int i=0;i<parts.size();i++){
588 int numPartitions = parts[i];
590 if (m->debug) { m->mothurOut("[DEBUG]: running partition " + toString(numPartitions) + "\n"); }
592 if (m->control_pressed) { break; }
594 //check to see if anyone else is done
595 for (int j = 0; j < doneFlags.size(); j++) {
596 if (!m->isBlank(doneFlags[j])) { //another process has finished
597 //are they done at a lower partition?
599 m->openInputFile(doneFlags[j], in);
601 in >> tempNum; in.close();
602 if (tempNum < numPartitions) { break; } //quit, because someone else has finished
606 CommunityTypeFinder* finder = NULL;
607 if (method == "dmm") { finder = new qFinderDMM(sharedMatrix, numPartitions); }
608 else if (method == "kmeans") { finder = new KMeans(sharedMatrix, numPartitions); }
609 else if (method == "pam") { finder = new Pam(sharedMatrix, dists, numPartitions); }
611 if (i == 0) { m->mothurOut(method + " is not a valid method option. I will run the command using dmm.\n"); }
612 finder = new qFinderDMM(sharedMatrix, numPartitions);
615 string relabund = relabunds[i];
616 string matrixName = matrix[i];
617 outputNames.push_back(matrixName); outputTypes["matrix"].push_back(matrixName);
619 finder->printZMatrix(matrixName, thisGroups);
621 double chi; vector<double> silhouettes;
622 if (method == "dmm") {
623 double laplace = finder->getLaplace();
624 if(laplace < minLaplace){
625 minPartition = numPartitions;
626 minLaplace = laplace;
629 chi = finder->calcCHIndex(dists);
630 silhouettes = finder->calcSilhouettes(dists);
631 if (chi > minLaplace) { //save partition with maximum ch index score
632 minPartition = numPartitions;
634 minSilhouettes = silhouettes;
638 if (method == "dmm") {
639 finder->printFitData(cout, minLaplace);
640 finder->printFitData(fitData);
641 finder->printRelAbund(relabund, m->currentSharedBinLabels);
642 outputNames.push_back(relabund); outputTypes["relabund"].push_back(relabund);
643 }else if ((method == "pam") || (method == "kmeans")) { //print silouettes and ch values
644 finder->printSilData(cout, chi, silhouettes);
645 finder->printSilData(silData, chi, silhouettes);
646 if (method == "kmeans") {
647 finder->printRelAbund(relabund, m->currentSharedBinLabels);
648 outputNames.push_back(relabund); outputTypes["relabund"].push_back(relabund);
653 if(optimizegap != -1 && (numPartitions - minPartition) >= optimizegap && numPartitions >= minpartitions){
654 string tempDoneFile = m->getRootName(m->getSimpleName(sharedfile)) + toString(processID) + ".done.temp";
656 m->openOutputFile(tempDoneFile, outDone);
657 outDone << minPartition << endl;
662 if (method == "dmm") { fitData.close(); }
664 if (m->control_pressed) { return 0; }
668 catch(exception& e) {
669 m->errorOut(e, "GetMetaCommunityCommand", "processDriver");
673 /**************************************************************************************************/
675 vector<double> GetMetaCommunityCommand::generateDesignFile(int numPartitions, map<string,string> variables){
677 vector<double> piValues(numPartitions, 0);
680 variables["[tag]"] = toString(numPartitions);
681 string input = getOutputFileName("matrix", variables);
682 m->openInputFile(input, postFile);//((fileRoot + toString(numPartitions) + "mix.posterior").c_str()); //matrix file
683 variables.erase("[tag]");
684 string outputFileName = getOutputFileName("design", variables);
686 m->openOutputFile(outputFileName, designFile);
687 outputNames.push_back(outputFileName); outputTypes["design"].push_back(outputFileName);
690 vector<string> titles(numPartitions);
692 for(int i=0;i<numPartitions;i++){ postFile >> titles[i]; }
700 if (m->control_pressed) { break; }
702 double maxPosterior = 0.0000;
703 int maxPartition = -1;
705 postFile >> sampleName;
707 for(int i=0;i<numPartitions;i++){
709 postFile >> posterior;
710 if(posterior > maxPosterior){
711 maxPosterior = posterior;
714 piValues[i] += posterior;
718 designFile << sampleName << '\t' << titles[maxPartition] << endl;
723 for(int i=0;i<numPartitions;i++){
724 piValues[i] /= (double)numSamples;
733 catch(exception& e) {
734 m->errorOut(e, "GetMetaCommunityCommand", "generateDesignFile");
739 /**************************************************************************************************/
741 inline bool summaryFunction(summaryData i, summaryData j){ return i.difference > j.difference; }
743 /**************************************************************************************************/
744 int GetMetaCommunityCommand::generateSummaryFile(int numPartitions, map<string,string> v, vector<double> piValues){
746 vector<summaryData> summary;
748 vector<double> pMean(numPartitions, 0);
749 vector<double> pLCI(numPartitions, 0);
750 vector<double> pUCI(numPartitions, 0);
753 double mean, lci, uci;
755 ifstream referenceFile;
756 map<string, string> variables;
757 variables["[filename]"] = v["[filename]"];
758 variables["[distance]"] = v["[distance]"];
759 variables["[method]"] = method;
760 variables["[tag]"] = "1";
761 string reference = getOutputFileName("relabund", variables);
762 m->openInputFile(reference, referenceFile); //((fileRoot + label + ".1mix.relabund").c_str());
763 variables["[tag]"] = toString(numPartitions);
764 string partFile = getOutputFileName("relabund", variables);
765 ifstream partitionFile;
766 m->openInputFile(partFile, partitionFile); //((fileRoot + toString(numPartitions) + "mix.relabund").c_str());
768 header = m->getline(referenceFile);
769 header = m->getline(partitionFile);
770 stringstream head(header);
773 vector<string> thetaValues(numPartitions, "");
774 for(int i=0;i<numPartitions;i++){
775 head >> label >> dummy >> dummy;
776 thetaValues[i] = label.substr(label.find_last_of('_')+1);
780 vector<double> partitionDiff(numPartitions, 0.0000);
782 while(referenceFile){
784 if (m->control_pressed) { break; }
786 referenceFile >> name >> mean >> lci >> uci;
788 summaryData tempData;
789 tempData.name = name;
790 tempData.refMean = mean;
792 double difference = 0.0000;
794 partitionFile >> name;
795 for(int j=0;j<numPartitions;j++){
796 partitionFile >> pMean[j] >> pLCI[j] >> pUCI[j];
797 difference += abs(mean - pMean[j]);
798 partitionDiff[j] += abs(mean - pMean[j]);;
801 tempData.partMean = pMean;
802 tempData.partLCI = pLCI;
803 tempData.partUCI = pUCI;
804 tempData.difference = difference;
805 summary.push_back(tempData);
807 m->gobble(referenceFile);
808 m->gobble(partitionFile);
810 referenceFile.close();
811 partitionFile.close();
813 if (m->control_pressed) { return 0; }
815 int numOTUs = (int)summary.size();
817 sort(summary.begin(), summary.end(), summaryFunction);
819 variables.erase("[tag]");
820 string outputFileName = getOutputFileName("parameters", variables);
821 outputNames.push_back(outputFileName); outputTypes["parameters"].push_back(outputFileName);
823 ofstream parameterFile;
824 m->openOutputFile(outputFileName, parameterFile); //((fileRoot + "mix.parameters").c_str());
825 parameterFile.setf(ios::fixed, ios::floatfield);
826 parameterFile.setf(ios::showpoint);
828 double totalDifference = 0.0000;
829 parameterFile << "Part\tDif2Ref_i\ttheta_i\tpi_i\n";
830 for(int i=0;i<numPartitions;i++){
831 if (m->control_pressed) { break; }
832 parameterFile << i+1 << '\t' << setprecision(2) << partitionDiff[i] << '\t' << thetaValues[i] << '\t' << piValues[i] << endl;
833 totalDifference += partitionDiff[i];
835 parameterFile.close();
837 if (m->control_pressed) { return 0; }
839 string summaryFileName = getOutputFileName("summary", variables);
840 outputNames.push_back(summaryFileName); outputTypes["summary"].push_back(summaryFileName);
842 ofstream summaryFile;
843 m->openOutputFile(summaryFileName, summaryFile); //((fileRoot + "mix.summary").c_str());
844 summaryFile.setf(ios::fixed, ios::floatfield);
845 summaryFile.setf(ios::showpoint);
848 summaryFile << "OTU\tP0.mean";
849 for(int i=0;i<numPartitions;i++){
850 summaryFile << "\tP" << i+1 << ".mean\tP" << i+1 << ".lci\tP" << i+1 << ".uci";
852 summaryFile << "\tDifference\tCumFraction" << endl;
854 double cumDiff = 0.0000;
856 for(int i=0;i<numOTUs;i++){
857 if (m->control_pressed) { break; }
858 summaryFile << summary[i].name << setprecision(2) << '\t' << summary[i].refMean;
859 for(int j=0;j<numPartitions;j++){
860 summaryFile << '\t' << summary[i].partMean[j] << '\t' << summary[i].partLCI[j] << '\t' << summary[i].partUCI[j];
863 cumDiff += summary[i].difference/totalDifference;
864 summaryFile << '\t' << summary[i].difference << '\t' << cumDiff << endl;
871 catch(exception& e) {
872 m->errorOut(e, "GetMetaCommunityCommand", "generateSummaryFile");
877 //**********************************************************************************************************************
878 vector<vector<double> > GetMetaCommunityCommand::generateDistanceMatrix(vector<SharedRAbundVector*>& thisLookup){
880 vector<vector<double> > results;
882 Calculator* matrixCalculator;
883 ValidCalculators validCalculator;
886 if (validCalculator.isValidCalculator("matrix", Estimators[i]) == true) {
887 if (Estimators[i] == "sharedsobs") {
888 matrixCalculator = new SharedSobsCS();
889 }else if (Estimators[i] == "sharedchao") {
890 matrixCalculator = new SharedChao1();
891 }else if (Estimators[i] == "sharedace") {
892 matrixCalculator = new SharedAce();
893 }else if (Estimators[i] == "jabund") {
894 matrixCalculator = new JAbund();
895 }else if (Estimators[i] == "sorabund") {
896 matrixCalculator = new SorAbund();
897 }else if (Estimators[i] == "jclass") {
898 matrixCalculator = new Jclass();
899 }else if (Estimators[i] == "sorclass") {
900 matrixCalculator = new SorClass();
901 }else if (Estimators[i] == "jest") {
902 matrixCalculator = new Jest();
903 }else if (Estimators[i] == "sorest") {
904 matrixCalculator = new SorEst();
905 }else if (Estimators[i] == "thetayc") {
906 matrixCalculator = new ThetaYC();
907 }else if (Estimators[i] == "thetan") {
908 matrixCalculator = new ThetaN();
909 }else if (Estimators[i] == "kstest") {
910 matrixCalculator = new KSTest();
911 }else if (Estimators[i] == "sharednseqs") {
912 matrixCalculator = new SharedNSeqs();
913 }else if (Estimators[i] == "ochiai") {
914 matrixCalculator = new Ochiai();
915 }else if (Estimators[i] == "anderberg") {
916 matrixCalculator = new Anderberg();
917 }else if (Estimators[i] == "kulczynski") {
918 matrixCalculator = new Kulczynski();
919 }else if (Estimators[i] == "kulczynskicody") {
920 matrixCalculator = new KulczynskiCody();
921 }else if (Estimators[i] == "lennon") {
922 matrixCalculator = new Lennon();
923 }else if (Estimators[i] == "morisitahorn") {
924 matrixCalculator = new MorHorn();
925 }else if (Estimators[i] == "braycurtis") {
926 matrixCalculator = new BrayCurtis();
927 }else if (Estimators[i] == "whittaker") {
928 matrixCalculator = new Whittaker();
929 }else if (Estimators[i] == "odum") {
930 matrixCalculator = new Odum();
931 }else if (Estimators[i] == "canberra") {
932 matrixCalculator = new Canberra();
933 }else if (Estimators[i] == "structeuclidean") {
934 matrixCalculator = new StructEuclidean();
935 }else if (Estimators[i] == "structchord") {
936 matrixCalculator = new StructChord();
937 }else if (Estimators[i] == "hellinger") {
938 matrixCalculator = new Hellinger();
939 }else if (Estimators[i] == "manhattan") {
940 matrixCalculator = new Manhattan();
941 }else if (Estimators[i] == "structpearson") {
942 matrixCalculator = new StructPearson();
943 }else if (Estimators[i] == "soergel") {
944 matrixCalculator = new Soergel();
945 }else if (Estimators[i] == "spearman") {
946 matrixCalculator = new Spearman();
947 }else if (Estimators[i] == "structkulczynski") {
948 matrixCalculator = new StructKulczynski();
949 }else if (Estimators[i] == "speciesprofile") {
950 matrixCalculator = new SpeciesProfile();
951 }else if (Estimators[i] == "hamming") {
952 matrixCalculator = new Hamming();
953 }else if (Estimators[i] == "structchi2") {
954 matrixCalculator = new StructChi2();
955 }else if (Estimators[i] == "gower") {
956 matrixCalculator = new Gower();
957 }else if (Estimators[i] == "memchi2") {
958 matrixCalculator = new MemChi2();
959 }else if (Estimators[i] == "memchord") {
960 matrixCalculator = new MemChord();
961 }else if (Estimators[i] == "memeuclidean") {
962 matrixCalculator = new MemEuclidean();
963 }else if (Estimators[i] == "mempearson") {
964 matrixCalculator = new MemPearson();
965 }else if (Estimators[i] == "jsd") {
966 matrixCalculator = new JSD();
967 }else if (Estimators[i] == "rjsd") {
968 matrixCalculator = new RJSD();
970 m->mothurOut("[ERROR]: " + Estimators[i] + " is not a valid calculator, please correct.\n"); m->control_pressed = true; return results;
975 vector< vector< vector<seqDist> > > calcDistsTotals; //each iter, then each groupCombos dists. this will be used to make .dist files
976 vector< vector<seqDist> > calcDists; calcDists.resize(1);
978 for (int thisIter = 0; thisIter < iters+1; thisIter++) {
980 vector<SharedRAbundVector*> thisItersLookup = thisLookup;
982 if (subsample && (thisIter != 0)) {
984 vector<string> tempLabels; //dont need since we arent printing the sampled sharedRabunds
986 //make copy of lookup so we don't get access violations
987 vector<SharedRAbundVector*> newLookup;
988 for (int k = 0; k < thisItersLookup.size(); k++) {
989 SharedRAbundVector* temp = new SharedRAbundVector();
990 temp->setLabel(thisItersLookup[k]->getLabel());
991 temp->setGroup(thisItersLookup[k]->getGroup());
992 newLookup.push_back(temp);
996 for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) {
997 if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return results; }
998 for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); }
1001 tempLabels = sample.getSample(newLookup, subsampleSize);
1002 thisItersLookup = newLookup;
1006 driver(thisItersLookup, calcDists, matrixCalculator);
1008 if (subsample && (thisIter != 0)) {
1009 if((thisIter) % 100 == 0){ m->mothurOutJustToScreen(toString(thisIter)+"\n"); }
1010 calcDistsTotals.push_back(calcDists);
1011 for (int i = 0; i < calcDists.size(); i++) {
1012 for (int j = 0; j < calcDists[i].size(); j++) {
1013 if (m->debug) { m->mothurOut("[DEBUG]: Results: iter = " + toString(thisIter) + ", " + thisLookup[calcDists[i][j].seq1]->getGroup() + " - " + thisLookup[calcDists[i][j].seq2]->getGroup() + " distance = " + toString(calcDists[i][j].dist) + ".\n"); }
1017 for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; }
1018 thisItersLookup.clear();
1019 }else { //print results for whole dataset
1020 for (int i = 0; i < calcDists.size(); i++) {
1021 if (m->control_pressed) { break; }
1024 results.resize(thisLookup.size());
1025 for (int k = 0; k < thisLookup.size(); k++) { results[k].resize(thisLookup.size(), 0.0); }
1027 for (int j = 0; j < calcDists[i].size(); j++) {
1028 int row = calcDists[i][j].seq1;
1029 int column = calcDists[i][j].seq2;
1030 double dist = calcDists[i][j].dist;
1032 results[row][column] = dist;
1033 results[column][row] = dist;
1037 for (int i = 0; i < calcDists.size(); i++) { calcDists[i].clear(); }
1041 //we need to find the average distance and standard deviation for each groups distance
1042 vector< vector<seqDist> > calcAverages = m->getAverages(calcDistsTotals, "average");
1045 for (int i = 0; i < calcDists.size(); i++) {
1046 results.resize(thisLookup.size());
1047 for (int k = 0; k < thisLookup.size(); k++) { results[k].resize(thisLookup.size(), 0.0); }
1049 for (int j = 0; j < calcAverages[i].size(); j++) {
1050 int row = calcAverages[i][j].seq1;
1051 int column = calcAverages[i][j].seq2;
1052 float dist = calcAverages[i][j].dist;
1054 results[row][column] = dist;
1055 results[column][row] = dist;
1063 catch(exception& e) {
1064 m->errorOut(e, "GetMetaCommunityCommand", "generateDistanceMatrix");
1068 /**************************************************************************************************/
1069 int GetMetaCommunityCommand::driver(vector<SharedRAbundVector*> thisLookup, vector< vector<seqDist> >& calcDists, Calculator* matrixCalculator) {
1071 vector<SharedRAbundVector*> subset;
1073 for (int k = 0; k < thisLookup.size(); k++) { // pass cdd each set of groups to compare
1075 for (int l = 0; l < k; l++) {
1077 if (k != l) { //we dont need to similiarity of a groups to itself
1078 subset.clear(); //clear out old pair of sharedrabunds
1079 //add new pair of sharedrabunds
1080 subset.push_back(thisLookup[k]); subset.push_back(thisLookup[l]);
1084 //if this calc needs all groups to calculate the pair load all groups
1085 if (matrixCalculator->getNeedsAll()) {
1086 //load subset with rest of lookup for those calcs that need everyone to calc for a pair
1087 for (int w = 0; w < thisLookup.size(); w++) {
1088 if ((w != k) && (w != l)) { subset.push_back(thisLookup[w]); }
1092 vector<double> tempdata = matrixCalculator->getValues(subset); //saves the calculator outputs
1094 if (m->control_pressed) { return 1; }
1096 seqDist temp(l, k, tempdata[0]);
1097 //cout << l << '\t' << k << '\t' << tempdata[0] << endl;
1098 calcDists[0].push_back(temp);
1106 catch(exception& e) {
1107 m->errorOut(e, "MatrixOutputCommand", "driver");
1111 //**********************************************************************************************************************