]> git.donarmstrong.com Git - mothur.git/blob - getmetacommunitycommand.cpp
59efe608e624ebc4ab9e71ce77aba23554bd40cb
[mothur.git] / getmetacommunitycommand.cpp
1 //
2 //  getmetacommunitycommand.cpp
3 //  Mothur
4 //
5 //  Created by SarahsWork on 4/9/13.
6 //  Copyright (c) 2013 Schloss Lab. All rights reserved.
7 //
8
9 #include "getmetacommunitycommand.h"
10 #include "qFinderDMM.h"
11
12 //**********************************************************************************************************************
13 vector<string> GetMetaCommunityCommand::setParameters(){
14         try {
15         CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","outputType",false,true); parameters.push_back(pshared);
16         CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
17                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
18         CommandParameter pminpartitions("minpartitions", "Number", "", "5", "", "", "","",false,false,true); parameters.push_back(pminpartitions);
19         CommandParameter pmaxpartitions("maxpartitions", "Number", "", "10", "", "", "","",false,false,true); parameters.push_back(pmaxpartitions);
20         CommandParameter poptimizegap("optimizegap", "Number", "", "3", "", "", "","",false,false,true); parameters.push_back(poptimizegap);
21                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
22                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
23                 
24                 vector<string> myArray;
25                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
26                 return myArray;
27         }
28         catch(exception& e) {
29                 m->errorOut(e, "NewCommand", "setParameters");
30                 exit(1);
31         }
32 }
33 //**********************************************************************************************************************
34 string GetMetaCommunityCommand::getHelpString(){
35         try {
36                 string helpString = "";
37                 helpString += "The get.metacommunity command parameters are shared, label, groups, minpartitions, maxpartitions and optimizegap. The shared file is required. \n";
38         helpString += "The label parameter is used to analyze specific labels in your input. labels are separated by dashes.\n";
39                 helpString += "The groups parameter allows you to specify which of the groups in your shared file you would like analyzed.  Group names are separated by dashes.\n";
40                 helpString += "The minpartitions parameter is used to .... Default=5.\n";
41         helpString += "The maxpartitions parameter is used to .... Default=10.\n";
42         helpString += "The optimizegap parameter is used to .... Default=3.\n";
43                 helpString += "The get.metacommunity command should be in the following format: get.metacommunity(shared=yourSharedFile).\n";
44                 return helpString;
45         }
46         catch(exception& e) {
47                 m->errorOut(e, "GetMetaCommunityCommand", "getHelpString");
48                 exit(1);
49         }
50 }
51 //**********************************************************************************************************************
52 string GetMetaCommunityCommand::getOutputPattern(string type) {
53     try {
54         string pattern = "";
55         
56         if (type == "fit")              {  pattern = "[filename],[distance],mix.fit"; }
57         else if (type == "relabund")    {  pattern = "[filename],[distance],[tag],mix.relabund"; }
58         else if (type == "design")      {  pattern = "[filename],mix.design"; }
59         else if (type == "matrix")      {  pattern = "[filename],[distance],[tag],mix.posterior"; }
60         else if (type == "parameters")  {  pattern = "[filename],[distance],mix.parameters"; }
61         else if (type == "summary")  {  pattern = "[filename],[distance],mix.summary"; }
62         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
63         
64         return pattern;
65     }
66     catch(exception& e) {
67         m->errorOut(e, "GetMetaCommunityCommand", "getOutputPattern");
68         exit(1);
69     }
70 }
71 //**********************************************************************************************************************
72 GetMetaCommunityCommand::GetMetaCommunityCommand(){
73         try {
74                 abort = true; calledHelp = true;
75                 setParameters();
76         vector<string> tempOutNames;
77                 outputTypes["fit"] = tempOutNames;
78         outputTypes["relabund"] = tempOutNames;
79         outputTypes["matrix"] = tempOutNames;
80         outputTypes["design"] = tempOutNames;
81         outputTypes["parameters"] = tempOutNames;
82         outputTypes["summary"] = tempOutNames;
83         }
84         catch(exception& e) {
85                 m->errorOut(e, "GetMetaCommunityCommand", "GetMetaCommunityCommand");
86                 exit(1);
87         }
88 }
89 //**********************************************************************************************************************
90 GetMetaCommunityCommand::GetMetaCommunityCommand(string option)  {
91         try {
92                 abort = false; calledHelp = false;
93         allLines=true;
94                 
95                 //allow user to run help
96                 if(option == "help") { help(); abort = true; calledHelp = true; }
97                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
98                 
99                 else {
100                         //valid paramters for this command
101                         vector<string> myArray = setParameters();
102                         
103                         OptionParser parser(option);
104                         map<string,string> parameters = parser.getParameters();
105                         
106                         ValidParameters validParameter;
107                         map<string,string>::iterator it;
108                         //check to make sure all parameters are valid for command
109                         for (it = parameters.begin(); it != parameters.end(); it++) {
110                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
111                         }
112                         
113             vector<string> tempOutNames;
114             outputTypes["fit"] = tempOutNames;
115             outputTypes["relabund"] = tempOutNames;
116             outputTypes["matrix"] = tempOutNames;
117             outputTypes["design"] = tempOutNames;
118             outputTypes["parameters"] = tempOutNames;
119                         outputTypes["summary"] = tempOutNames;
120             
121                         //if the user changes the input directory command factory will send this info to us in the output parameter
122                         string inputDir = validParameter.validFile(parameters, "inputdir", false);
123                         if (inputDir == "not found"){   inputDir = "";          }
124                         else {
125                 string path;
126                 it = parameters.find("shared");
127                                 if(it != parameters.end()){
128                                         path = m->hasPath(it->second);
129                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
130                                 }
131             }
132                        
133             //get shared file, it is required
134                         sharedfile = validParameter.validFile(parameters, "shared", true);
135                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }
136                         else if (sharedfile == "not found") {
137                                 //if there is a current shared file, use it
138                                 sharedfile = m->getSharedFile();
139                                 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
140                                 else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
141                         }else { m->setSharedFile(sharedfile); }
142             
143             //if the user changes the output directory command factory will send this info to us in the output parameter
144                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){
145                                 outputDir = m->hasPath(sharedfile); //if user entered a file with a path then preserve it
146                         }
147             
148             string temp = validParameter.validFile(parameters, "minpartitions", false); if (temp == "not found"){       temp = "5";      }
149                         m->mothurConvert(temp, minpartitions);
150             
151             temp = validParameter.validFile(parameters, "maxpartitions", false);        if (temp == "not found"){       temp = "10";     }
152                         m->mothurConvert(temp, maxpartitions);
153             
154             temp = validParameter.validFile(parameters, "optimizegap", false);          if (temp == "not found"){       temp = "3";      }
155                         m->mothurConvert(temp, optimizegap);
156             
157             string groups = validParameter.validFile(parameters, "groups", false);
158                         if (groups == "not found") { groups = ""; }
159                         else { m->splitAtDash(groups, Groups); }
160                         m->setGroups(Groups);
161             
162             string label = validParameter.validFile(parameters, "label", false);
163                         if (label == "not found") { label = ""; }
164                         else {
165                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
166                                 else { allLines = 1;  }
167                         }
168                 }
169                 
170         }
171         catch(exception& e) {
172                 m->errorOut(e, "GetMetaCommunityCommand", "GetMetaCommunityCommand");
173                 exit(1);
174         }
175 }
176 //**********************************************************************************************************************
177
178 int GetMetaCommunityCommand::execute(){
179         try {
180                 
181                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
182         
183         InputData input(sharedfile, "sharedfile");
184         vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
185         string lastLabel = lookup[0]->getLabel();
186         
187         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
188         set<string> processedLabels;
189         set<string> userLabels = labels;
190         
191         //as long as you are not at the end of the file or done wih the lines you want
192         while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
193             
194             if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  return 0; }
195             
196             if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
197                 
198                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
199                 
200                 process(lookup);
201                 
202                 processedLabels.insert(lookup[0]->getLabel());
203                 userLabels.erase(lookup[0]->getLabel());
204             }
205             
206             if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
207                 string saveLabel = lookup[0]->getLabel();
208                 
209                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
210                 lookup = input.getSharedRAbundVectors(lastLabel);
211                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
212                 
213                 process(lookup);
214                 
215                 processedLabels.insert(lookup[0]->getLabel());
216                 userLabels.erase(lookup[0]->getLabel());
217                 
218                 //restore real lastlabel to save below
219                 lookup[0]->setLabel(saveLabel);
220             }
221             
222             lastLabel = lookup[0]->getLabel();
223             //prevent memory leak
224             for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
225             
226             if (m->control_pressed) { return 0; }
227             
228             //get next line to process
229             lookup = input.getSharedRAbundVectors();
230         }
231         
232         if (m->control_pressed) {  return 0; }
233         
234         //output error messages about any remaining user labels
235         set<string>::iterator it;
236         bool needToRun = false;
237         for (it = userLabels.begin(); it != userLabels.end(); it++) {
238             m->mothurOut("Your file does not include the label " + *it);
239             if (processedLabels.count(lastLabel) != 1) {
240                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
241                 needToRun = true;
242             }else {
243                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
244             }
245         }
246         
247         //run last label if you need to
248         if (needToRun == true)  {
249             for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
250             lookup = input.getSharedRAbundVectors(lastLabel);
251             
252             m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
253             
254             process(lookup);
255             
256             for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
257         }
258                 
259         //output files created by command
260                 m->mothurOutEndLine();
261                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
262                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
263                 m->mothurOutEndLine();
264         return 0;
265                 
266     }
267         catch(exception& e) {
268                 m->errorOut(e, "GetMetaCommunityCommand", "execute");
269                 exit(1);
270         }
271 }
272 //**********************************************************************************************************************
273 int GetMetaCommunityCommand::process(vector<SharedRAbundVector*>& thislookup){
274         try {
275         
276         double minLaplace = 1e10;
277         int minPartition = 0;
278         
279         map<string, string> variables;
280         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
281         variables["[distance]"] = thislookup[0]->getLabel();
282                 string outputFileName = getOutputFileName("fit", variables);
283         outputNames.push_back(outputFileName); outputTypes["fit"].push_back(outputFileName);
284         
285                 ofstream fitData;
286                 m->openOutputFile(outputFileName, fitData);
287         fitData.setf(ios::fixed, ios::floatfield);
288         fitData.setf(ios::showpoint);
289         cout.setf(ios::fixed, ios::floatfield);
290         cout.setf(ios::showpoint);
291
292         vector< vector<int> > sharedMatrix;
293         for (int i = 0; i < thislookup.size(); i++) { sharedMatrix.push_back(thislookup[i]->getAbundances()); }
294         
295         m->mothurOut("K\tNLE\t\tlogDet\tBIC\t\tAIC\t\tLaplace\n");
296         fitData << "K\tNLE\tlogDet\tBIC\tAIC\tLaplace" << endl;
297         
298         for(int numPartitions=1;numPartitions<=maxpartitions;numPartitions++){
299             
300             if (m->control_pressed) { break; }
301             
302             qFinderDMM findQ(sharedMatrix, numPartitions);
303             
304             double laplace = findQ.getLaplace();
305             m->mothurOut(toString(numPartitions) + '\t');
306             cout << setprecision (2) << findQ.getNLL() << '\t' << findQ.getLogDet() << '\t';
307             m->mothurOutJustToLog(toString(findQ.getNLL()) + '\t' + toString(findQ.getLogDet()) + '\t');
308             cout << findQ.getBIC() << '\t' << findQ.getAIC() << '\t' << laplace;
309             m->mothurOutJustToLog(toString(findQ.getBIC()) + '\t' + toString(findQ.getAIC()) + '\t' + toString(laplace));
310             
311             fitData << numPartitions << '\t';
312             fitData << setprecision (2) << findQ.getNLL() << '\t' << findQ.getLogDet() << '\t';
313             fitData << findQ.getBIC() << '\t' << findQ.getAIC() << '\t' << laplace << endl;
314             
315             if(laplace < minLaplace){
316                 minPartition = numPartitions;
317                 minLaplace = laplace;
318                 m->mothurOut("***");
319             }
320             m->mothurOutEndLine();
321             
322             variables["[tag]"] = toString(numPartitions);
323             string relabund = getOutputFileName("relabund", variables);
324             outputNames.push_back(relabund); outputTypes["relabund"].push_back(relabund);
325             string matrix = getOutputFileName("matrix", variables);
326             outputNames.push_back(matrix); outputTypes["matrix"].push_back(matrix);
327             
328             findQ.printZMatrix(matrix, m->getGroups());
329             findQ.printRelAbund(relabund, m->currentBinLabels);
330             
331             if(optimizegap != -1 && (numPartitions - minPartition) >= optimizegap && numPartitions >= minpartitions){ break;  }
332         }
333         fitData.close();
334         
335         //minPartition = 4;
336         
337         if (m->control_pressed) { return 0; }
338         
339         generateSummaryFile(minpartitions, outputTypes["relabund"][0], outputTypes["relabund"][outputTypes["relabund"].size()-1], outputTypes["matrix"][outputTypes["matrix"].size()-1], thislookup[0]->getLabel());
340
341         
342         return 0;
343     }
344         catch(exception& e) {
345                 m->errorOut(e, "GetMetaCommunityCommand", "process");
346                 exit(1);
347         }
348 }
349 /**************************************************************************************************/
350
351 vector<double> GetMetaCommunityCommand::generateDesignFile(int numPartitions, string input){
352     try {
353         vector<double> piValues(numPartitions, 0);
354         
355         ifstream postFile;
356         m->openInputFile(input, postFile);//((fileRoot + toString(numPartitions) + "mix.posterior").c_str()); //matrix file
357         map<string, string> variables;
358         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(input));
359                 string outputFileName = getOutputFileName("design", variables);
360         ofstream designFile;
361         m->openOutputFile(outputFileName, designFile);
362         outputNames.push_back(outputFileName); outputTypes["design"].push_back(outputFileName);
363         
364         
365         vector<string> titles(numPartitions);
366         
367         for(int i=0;i<numPartitions;i++){   postFile >> titles[i];  }
368         
369         double posterior;
370         string sampleName;
371         int numSamples = 0;
372         
373         while(postFile){
374             
375             if (m->control_pressed) { break; }
376             
377             double maxPosterior = 0.0000;
378             int maxPartition = -1;
379             
380             postFile >> sampleName;
381             
382             for(int i=0;i<numPartitions;i++){
383                 
384                 postFile >> posterior;
385                 if(posterior > maxPosterior){
386                     maxPosterior = posterior;
387                     maxPartition = i;
388                 }
389                 piValues[i] += posterior;
390                 
391             }
392             
393             designFile << sampleName << '\t' << titles[maxPartition] << endl;
394             
395             numSamples++;
396             m->gobble(postFile);
397         }
398         for(int i=0;i<numPartitions;i++){
399             piValues[i] /= (double)numSamples;
400         }
401         
402         
403         postFile.close();
404         designFile.close();
405         
406         return piValues;
407     }
408         catch(exception& e) {
409                 m->errorOut(e, "GetMetaCommunityCommand", "generateDesignFile");
410                 exit(1);
411         }
412 }
413
414 /**************************************************************************************************/
415
416 inline bool summaryFunction(summaryData i, summaryData j){ return i.difference > j.difference;   }
417
418 /**************************************************************************************************/
419 int GetMetaCommunityCommand::generateSummaryFile(int numPartitions, string reference, string partFile, string designInput, string label){
420     try {
421         vector<summaryData> summary;
422         
423         vector<double> pMean(numPartitions, 0);
424         vector<double> pLCI(numPartitions, 0);
425         vector<double> pUCI(numPartitions, 0);
426         
427         string name, header;
428         double mean, lci, uci;
429         
430         
431         vector<double> piValues = generateDesignFile(numPartitions, designInput);
432         
433         ifstream referenceFile;
434         m->openInputFile(reference, referenceFile); //((fileRoot + label + ".1mix.relabund").c_str());
435         ifstream partitionFile;
436         m->openInputFile(partFile, partitionFile); //((fileRoot + toString(numPartitions) + "mix.relabund").c_str());
437         
438         header = m->getline(referenceFile);
439         header = m->getline(partitionFile);
440         stringstream head(header);
441         string dummy, label;
442         head >> dummy;
443         vector<string> thetaValues(numPartitions, "");
444         for(int i=0;i<numPartitions;i++){
445             head >> label >> dummy >> dummy;
446             thetaValues[i] = label.substr(label.find_last_of('_')+1);
447         }
448         
449         
450         vector<double> partitionDiff(numPartitions, 0.0000);
451         
452         while(referenceFile){
453             
454             if (m->control_pressed) { break; }
455             
456             referenceFile >> name >> mean >> lci >> uci;
457             
458             summaryData tempData;
459             tempData.name = name;
460             tempData.refMean = mean;
461             
462             double difference = 0.0000;
463             
464             partitionFile >> name;
465             for(int j=0;j<numPartitions;j++){
466                 partitionFile >> pMean[j] >> pLCI[j] >> pUCI[j];
467                 difference += abs(mean - pMean[j]);
468                 partitionDiff[j] += abs(mean - pMean[j]);;
469             }
470             
471             tempData.partMean = pMean;
472             tempData.partLCI = pLCI;
473             tempData.partUCI = pUCI;
474             tempData.difference = difference;
475             summary.push_back(tempData);
476             
477             m->gobble(referenceFile);
478             m->gobble(partitionFile);
479         }
480         referenceFile.close();
481         partitionFile.close();
482         
483         if (m->control_pressed) { return 0; }
484         
485         int numOTUs = (int)summary.size();
486         
487         sort(summary.begin(), summary.end(), summaryFunction);
488         
489         map<string, string> variables;
490         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
491         variables["[distance]"] = label;
492                 string outputFileName = getOutputFileName("parameters", variables);
493         outputNames.push_back(outputFileName); outputTypes["parameters"].push_back(outputFileName);
494         
495         ofstream parameterFile;
496         m->openOutputFile(outputFileName, parameterFile); //((fileRoot + "mix.parameters").c_str());
497         parameterFile.setf(ios::fixed, ios::floatfield);
498         parameterFile.setf(ios::showpoint);
499         
500         double totalDifference =  0.0000;
501         parameterFile << "Part\tDif2Ref_i\ttheta_i\tpi_i\n";
502         for(int i=0;i<numPartitions;i++){
503             if (m->control_pressed) { break; }
504             parameterFile << i+1 << '\t' << setprecision(2) << partitionDiff[i] << '\t' << thetaValues[i] << '\t' << piValues[i] << endl;
505             totalDifference += partitionDiff[i];
506         }
507         parameterFile.close();
508         
509         if (m->control_pressed) { return 0; }
510         
511         string summaryFileName = getOutputFileName("summary", variables);
512         outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName);
513         
514         ofstream summaryFile;
515         m->openOutputFile(summaryFileName, summaryFile); //((fileRoot + "mix.summary").c_str());
516         summaryFile.setf(ios::fixed, ios::floatfield);
517         summaryFile.setf(ios::showpoint);
518         
519         
520         summaryFile << "OTU\tP0.mean";
521         for(int i=0;i<numPartitions;i++){
522             summaryFile << "\tP" << i+1 << ".mean\tP" << i+1 << ".lci\tP" << i+1 << ".uci";
523         }
524         summaryFile << "\tDifference\tCumFraction" << endl;
525         
526         double cumDiff = 0.0000;
527         
528         for(int i=0;i<numOTUs;i++){
529             if (m->control_pressed) { break; }
530             summaryFile << summary[i].name << setprecision(2) << '\t' << summary[i].refMean;
531             for(int j=0;j<numPartitions;j++){
532                 summaryFile  << '\t' << summary[i].partMean[j] << '\t' << summary[i].partLCI[j] << '\t' << summary[i].partUCI[j];
533             }
534             
535             cumDiff += summary[i].difference/totalDifference;
536             summaryFile << '\t' << summary[i].difference << '\t' << cumDiff << endl;
537         }
538         summaryFile.close();
539         
540         return 0;
541         
542     }
543         catch(exception& e) {
544                 m->errorOut(e, "GetMetaCommunityCommand", "generateSummaryFile");
545                 exit(1);
546         }
547     
548 }
549 //**********************************************************************************************************************
550