2 // getmetacommunitycommand.cpp
5 // Created by SarahsWork on 4/9/13.
6 // Copyright (c) 2013 Schloss Lab. All rights reserved.
9 #include "getmetacommunitycommand.h"
10 #include "qFinderDMM.h"
12 //**********************************************************************************************************************
13 vector<string> GetMetaCommunityCommand::setParameters(){
15 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","outputType",false,true); parameters.push_back(pshared);
16 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
17 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
18 CommandParameter pminpartitions("minpartitions", "Number", "", "5", "", "", "","",false,false,true); parameters.push_back(pminpartitions);
19 CommandParameter pmaxpartitions("maxpartitions", "Number", "", "10", "", "", "","",false,false,true); parameters.push_back(pmaxpartitions);
20 CommandParameter poptimizegap("optimizegap", "Number", "", "3", "", "", "","",false,false,true); parameters.push_back(poptimizegap);
21 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
22 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
24 vector<string> myArray;
25 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
29 m->errorOut(e, "NewCommand", "setParameters");
33 //**********************************************************************************************************************
34 string GetMetaCommunityCommand::getHelpString(){
36 string helpString = "";
37 helpString += "The get.metacommunity command parameters are shared, label, groups, minpartitions, maxpartitions and optimizegap. The shared file is required. \n";
38 helpString += "The label parameter is used to analyze specific labels in your input. labels are separated by dashes.\n";
39 helpString += "The groups parameter allows you to specify which of the groups in your shared file you would like analyzed. Group names are separated by dashes.\n";
40 helpString += "The minpartitions parameter is used to .... Default=5.\n";
41 helpString += "The maxpartitions parameter is used to .... Default=10.\n";
42 helpString += "The optimizegap parameter is used to .... Default=3.\n";
43 helpString += "The get.metacommunity command should be in the following format: get.metacommunity(shared=yourSharedFile).\n";
47 m->errorOut(e, "GetMetaCommunityCommand", "getHelpString");
51 //**********************************************************************************************************************
52 string GetMetaCommunityCommand::getOutputPattern(string type) {
56 if (type == "fit") { pattern = "[filename],[distance],mix.fit"; }
57 else if (type == "relabund") { pattern = "[filename],[distance],[tag],mix.relabund"; }
58 else if (type == "design") { pattern = "[filename],mix.design"; }
59 else if (type == "matrix") { pattern = "[filename],[distance],[tag],mix.posterior"; }
60 else if (type == "parameters") { pattern = "[filename],[distance],mix.parameters"; }
61 else if (type == "summary") { pattern = "[filename],[distance],mix.summary"; }
62 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
67 m->errorOut(e, "GetMetaCommunityCommand", "getOutputPattern");
71 //**********************************************************************************************************************
72 GetMetaCommunityCommand::GetMetaCommunityCommand(){
74 abort = true; calledHelp = true;
76 vector<string> tempOutNames;
77 outputTypes["fit"] = tempOutNames;
78 outputTypes["relabund"] = tempOutNames;
79 outputTypes["matrix"] = tempOutNames;
80 outputTypes["design"] = tempOutNames;
81 outputTypes["parameters"] = tempOutNames;
82 outputTypes["summary"] = tempOutNames;
85 m->errorOut(e, "GetMetaCommunityCommand", "GetMetaCommunityCommand");
89 //**********************************************************************************************************************
90 GetMetaCommunityCommand::GetMetaCommunityCommand(string option) {
92 abort = false; calledHelp = false;
95 //allow user to run help
96 if(option == "help") { help(); abort = true; calledHelp = true; }
97 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
100 //valid paramters for this command
101 vector<string> myArray = setParameters();
103 OptionParser parser(option);
104 map<string,string> parameters = parser.getParameters();
106 ValidParameters validParameter;
107 map<string,string>::iterator it;
108 //check to make sure all parameters are valid for command
109 for (it = parameters.begin(); it != parameters.end(); it++) {
110 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
113 vector<string> tempOutNames;
114 outputTypes["fit"] = tempOutNames;
115 outputTypes["relabund"] = tempOutNames;
116 outputTypes["matrix"] = tempOutNames;
117 outputTypes["design"] = tempOutNames;
118 outputTypes["parameters"] = tempOutNames;
119 outputTypes["summary"] = tempOutNames;
121 //if the user changes the input directory command factory will send this info to us in the output parameter
122 string inputDir = validParameter.validFile(parameters, "inputdir", false);
123 if (inputDir == "not found"){ inputDir = ""; }
126 it = parameters.find("shared");
127 if(it != parameters.end()){
128 path = m->hasPath(it->second);
129 if (path == "") { parameters["shared"] = inputDir + it->second; }
133 //get shared file, it is required
134 sharedfile = validParameter.validFile(parameters, "shared", true);
135 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
136 else if (sharedfile == "not found") {
137 //if there is a current shared file, use it
138 sharedfile = m->getSharedFile();
139 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
140 else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
141 }else { m->setSharedFile(sharedfile); }
143 //if the user changes the output directory command factory will send this info to us in the output parameter
144 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
145 outputDir = m->hasPath(sharedfile); //if user entered a file with a path then preserve it
148 string temp = validParameter.validFile(parameters, "minpartitions", false); if (temp == "not found"){ temp = "5"; }
149 m->mothurConvert(temp, minpartitions);
151 temp = validParameter.validFile(parameters, "maxpartitions", false); if (temp == "not found"){ temp = "10"; }
152 m->mothurConvert(temp, maxpartitions);
154 temp = validParameter.validFile(parameters, "optimizegap", false); if (temp == "not found"){ temp = "3"; }
155 m->mothurConvert(temp, optimizegap);
157 string groups = validParameter.validFile(parameters, "groups", false);
158 if (groups == "not found") { groups = ""; }
159 else { m->splitAtDash(groups, Groups); }
160 m->setGroups(Groups);
162 string label = validParameter.validFile(parameters, "label", false);
163 if (label == "not found") { label = ""; }
165 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
166 else { allLines = 1; }
171 catch(exception& e) {
172 m->errorOut(e, "GetMetaCommunityCommand", "GetMetaCommunityCommand");
176 //**********************************************************************************************************************
178 int GetMetaCommunityCommand::execute(){
181 if (abort == true) { if (calledHelp) { return 0; } return 2; }
183 InputData input(sharedfile, "sharedfile");
184 vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
185 string lastLabel = lookup[0]->getLabel();
187 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
188 set<string> processedLabels;
189 set<string> userLabels = labels;
191 //as long as you are not at the end of the file or done wih the lines you want
192 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
194 if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; }
196 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
198 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
202 processedLabels.insert(lookup[0]->getLabel());
203 userLabels.erase(lookup[0]->getLabel());
206 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
207 string saveLabel = lookup[0]->getLabel();
209 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
210 lookup = input.getSharedRAbundVectors(lastLabel);
211 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
215 processedLabels.insert(lookup[0]->getLabel());
216 userLabels.erase(lookup[0]->getLabel());
218 //restore real lastlabel to save below
219 lookup[0]->setLabel(saveLabel);
222 lastLabel = lookup[0]->getLabel();
223 //prevent memory leak
224 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
226 if (m->control_pressed) { return 0; }
228 //get next line to process
229 lookup = input.getSharedRAbundVectors();
232 if (m->control_pressed) { return 0; }
234 //output error messages about any remaining user labels
235 set<string>::iterator it;
236 bool needToRun = false;
237 for (it = userLabels.begin(); it != userLabels.end(); it++) {
238 m->mothurOut("Your file does not include the label " + *it);
239 if (processedLabels.count(lastLabel) != 1) {
240 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
243 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
247 //run last label if you need to
248 if (needToRun == true) {
249 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
250 lookup = input.getSharedRAbundVectors(lastLabel);
252 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
256 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
259 //output files created by command
260 m->mothurOutEndLine();
261 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
262 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
263 m->mothurOutEndLine();
267 catch(exception& e) {
268 m->errorOut(e, "GetMetaCommunityCommand", "execute");
272 //**********************************************************************************************************************
273 int GetMetaCommunityCommand::process(vector<SharedRAbundVector*>& thislookup){
276 double minLaplace = 1e10;
277 int minPartition = 0;
279 map<string, string> variables;
280 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
281 variables["[distance]"] = thislookup[0]->getLabel();
282 string outputFileName = getOutputFileName("fit", variables);
283 outputNames.push_back(outputFileName); outputTypes["fit"].push_back(outputFileName);
286 m->openOutputFile(outputFileName, fitData);
287 fitData.setf(ios::fixed, ios::floatfield);
288 fitData.setf(ios::showpoint);
289 cout.setf(ios::fixed, ios::floatfield);
290 cout.setf(ios::showpoint);
292 vector< vector<int> > sharedMatrix;
293 for (int i = 0; i < thislookup.size(); i++) { sharedMatrix.push_back(thislookup[i]->getAbundances()); }
295 m->mothurOut("K\tNLE\t\tlogDet\tBIC\t\tAIC\t\tLaplace\n");
296 fitData << "K\tNLE\tlogDet\tBIC\tAIC\tLaplace" << endl;
298 for(int numPartitions=1;numPartitions<=maxpartitions;numPartitions++){
300 if (m->control_pressed) { break; }
302 qFinderDMM findQ(sharedMatrix, numPartitions);
304 double laplace = findQ.getLaplace();
305 m->mothurOut(toString(numPartitions) + '\t');
306 cout << setprecision (2) << findQ.getNLL() << '\t' << findQ.getLogDet() << '\t';
307 m->mothurOutJustToLog(toString(findQ.getNLL()) + '\t' + toString(findQ.getLogDet()) + '\t');
308 cout << findQ.getBIC() << '\t' << findQ.getAIC() << '\t' << laplace;
309 m->mothurOutJustToLog(toString(findQ.getBIC()) + '\t' + toString(findQ.getAIC()) + '\t' + toString(laplace));
311 fitData << numPartitions << '\t';
312 fitData << setprecision (2) << findQ.getNLL() << '\t' << findQ.getLogDet() << '\t';
313 fitData << findQ.getBIC() << '\t' << findQ.getAIC() << '\t' << laplace << endl;
315 if(laplace < minLaplace){
316 minPartition = numPartitions;
317 minLaplace = laplace;
320 m->mothurOutEndLine();
322 variables["[tag]"] = toString(numPartitions);
323 string relabund = getOutputFileName("relabund", variables);
324 outputNames.push_back(relabund); outputTypes["relabund"].push_back(relabund);
325 string matrix = getOutputFileName("matrix", variables);
326 outputNames.push_back(matrix); outputTypes["matrix"].push_back(matrix);
328 findQ.printZMatrix(matrix, m->getGroups());
329 findQ.printRelAbund(relabund, m->currentBinLabels);
331 if(optimizegap != -1 && (numPartitions - minPartition) >= optimizegap && numPartitions >= minpartitions){ break; }
337 if (m->control_pressed) { return 0; }
339 generateSummaryFile(minpartitions, outputTypes["relabund"][0], outputTypes["relabund"][outputTypes["relabund"].size()-1], outputTypes["matrix"][outputTypes["matrix"].size()-1], thislookup[0]->getLabel());
344 catch(exception& e) {
345 m->errorOut(e, "GetMetaCommunityCommand", "process");
349 /**************************************************************************************************/
351 vector<double> GetMetaCommunityCommand::generateDesignFile(int numPartitions, string input){
353 vector<double> piValues(numPartitions, 0);
356 m->openInputFile(input, postFile);//((fileRoot + toString(numPartitions) + "mix.posterior").c_str()); //matrix file
357 map<string, string> variables;
358 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(input));
359 string outputFileName = getOutputFileName("design", variables);
361 m->openOutputFile(outputFileName, designFile);
362 outputNames.push_back(outputFileName); outputTypes["design"].push_back(outputFileName);
365 vector<string> titles(numPartitions);
367 for(int i=0;i<numPartitions;i++){ postFile >> titles[i]; }
375 if (m->control_pressed) { break; }
377 double maxPosterior = 0.0000;
378 int maxPartition = -1;
380 postFile >> sampleName;
382 for(int i=0;i<numPartitions;i++){
384 postFile >> posterior;
385 if(posterior > maxPosterior){
386 maxPosterior = posterior;
389 piValues[i] += posterior;
393 designFile << sampleName << '\t' << titles[maxPartition] << endl;
398 for(int i=0;i<numPartitions;i++){
399 piValues[i] /= (double)numSamples;
408 catch(exception& e) {
409 m->errorOut(e, "GetMetaCommunityCommand", "generateDesignFile");
414 /**************************************************************************************************/
416 inline bool summaryFunction(summaryData i, summaryData j){ return i.difference > j.difference; }
418 /**************************************************************************************************/
419 int GetMetaCommunityCommand::generateSummaryFile(int numPartitions, string reference, string partFile, string designInput, string label){
421 vector<summaryData> summary;
423 vector<double> pMean(numPartitions, 0);
424 vector<double> pLCI(numPartitions, 0);
425 vector<double> pUCI(numPartitions, 0);
428 double mean, lci, uci;
431 vector<double> piValues = generateDesignFile(numPartitions, designInput);
433 ifstream referenceFile;
434 m->openInputFile(reference, referenceFile); //((fileRoot + label + ".1mix.relabund").c_str());
435 ifstream partitionFile;
436 m->openInputFile(partFile, partitionFile); //((fileRoot + toString(numPartitions) + "mix.relabund").c_str());
438 header = m->getline(referenceFile);
439 header = m->getline(partitionFile);
440 stringstream head(header);
443 vector<string> thetaValues(numPartitions, "");
444 for(int i=0;i<numPartitions;i++){
445 head >> label >> dummy >> dummy;
446 thetaValues[i] = label.substr(label.find_last_of('_')+1);
450 vector<double> partitionDiff(numPartitions, 0.0000);
452 while(referenceFile){
454 if (m->control_pressed) { break; }
456 referenceFile >> name >> mean >> lci >> uci;
458 summaryData tempData;
459 tempData.name = name;
460 tempData.refMean = mean;
462 double difference = 0.0000;
464 partitionFile >> name;
465 for(int j=0;j<numPartitions;j++){
466 partitionFile >> pMean[j] >> pLCI[j] >> pUCI[j];
467 difference += abs(mean - pMean[j]);
468 partitionDiff[j] += abs(mean - pMean[j]);;
471 tempData.partMean = pMean;
472 tempData.partLCI = pLCI;
473 tempData.partUCI = pUCI;
474 tempData.difference = difference;
475 summary.push_back(tempData);
477 m->gobble(referenceFile);
478 m->gobble(partitionFile);
480 referenceFile.close();
481 partitionFile.close();
483 if (m->control_pressed) { return 0; }
485 int numOTUs = (int)summary.size();
487 sort(summary.begin(), summary.end(), summaryFunction);
489 map<string, string> variables;
490 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
491 variables["[distance]"] = label;
492 string outputFileName = getOutputFileName("parameters", variables);
493 outputNames.push_back(outputFileName); outputTypes["parameters"].push_back(outputFileName);
495 ofstream parameterFile;
496 m->openOutputFile(outputFileName, parameterFile); //((fileRoot + "mix.parameters").c_str());
497 parameterFile.setf(ios::fixed, ios::floatfield);
498 parameterFile.setf(ios::showpoint);
500 double totalDifference = 0.0000;
501 parameterFile << "Part\tDif2Ref_i\ttheta_i\tpi_i\n";
502 for(int i=0;i<numPartitions;i++){
503 if (m->control_pressed) { break; }
504 parameterFile << i+1 << '\t' << setprecision(2) << partitionDiff[i] << '\t' << thetaValues[i] << '\t' << piValues[i] << endl;
505 totalDifference += partitionDiff[i];
507 parameterFile.close();
509 if (m->control_pressed) { return 0; }
511 string summaryFileName = getOutputFileName("summary", variables);
512 outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName);
514 ofstream summaryFile;
515 m->openOutputFile(summaryFileName, summaryFile); //((fileRoot + "mix.summary").c_str());
516 summaryFile.setf(ios::fixed, ios::floatfield);
517 summaryFile.setf(ios::showpoint);
520 summaryFile << "OTU\tP0.mean";
521 for(int i=0;i<numPartitions;i++){
522 summaryFile << "\tP" << i+1 << ".mean\tP" << i+1 << ".lci\tP" << i+1 << ".uci";
524 summaryFile << "\tDifference\tCumFraction" << endl;
526 double cumDiff = 0.0000;
528 for(int i=0;i<numOTUs;i++){
529 if (m->control_pressed) { break; }
530 summaryFile << summary[i].name << setprecision(2) << '\t' << summary[i].refMean;
531 for(int j=0;j<numPartitions;j++){
532 summaryFile << '\t' << summary[i].partMean[j] << '\t' << summary[i].partLCI[j] << '\t' << summary[i].partUCI[j];
535 cumDiff += summary[i].difference/totalDifference;
536 summaryFile << '\t' << summary[i].difference << '\t' << cumDiff << endl;
543 catch(exception& e) {
544 m->errorOut(e, "GetMetaCommunityCommand", "generateSummaryFile");
549 //**********************************************************************************************************************