]> git.donarmstrong.com Git - mothur.git/blob - lefsecommand.cpp
added kruskal.wallis command. added worked on make.lefse. working of lefse command...
[mothur.git] / lefsecommand.cpp
1 //
2 //  lefsecommand.cpp
3 //  Mothur
4 //
5 //  Created by SarahsWork on 6/12/13.
6 //  Copyright (c) 2013 Schloss Lab. All rights reserved.
7 //
8
9 #include "lefsecommand.h"
10 #include "linearalgebra.h"
11
12 //**********************************************************************************************************************
13 vector<string> LefseCommand::setParameters(){
14         try {
15         CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pdesign);
16         CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared);
17         CommandParameter pclass("class", "String", "", "", "", "", "","",false,false); parameters.push_back(pclass);
18         CommandParameter psubclass("subclass", "String", "", "", "", "", "","",false,false); parameters.push_back(psubclass);
19                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
20                 CommandParameter pclasses("classes", "String", "", "", "", "", "","",false,false); parameters.push_back(pclasses);
21         CommandParameter palpha("anova_alpha", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(palpha);
22         CommandParameter pwalpha("wilcoxon_alpha", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(pwalpha);
23         //every command must have inputdir and outputdir.  This allows mothur users to redirect input and output files.
24                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
25                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
26                 
27                 vector<string> myArray;
28                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
29                 return myArray;
30         }
31         catch(exception& e) {
32                 m->errorOut(e, "LefseCommand", "setParameters");
33                 exit(1);
34         }
35 }
36 //**********************************************************************************************************************
37 string LefseCommand::getHelpString(){
38         try {
39                 string helpString = "";
40                 helpString += "The lefse command allows you to ....\n";
41                 helpString += "The lefse command parameters are: shared, design, class, subclass, label, classes, wilcoxon_alpha, anova_alpha.\n";
42                 helpString += "The class parameter is used to indicate the which category you would like used for the Kruskal Wallis analysis. If none is provided first category is used.\n";
43         helpString += "The subclass parameter is used to indicate the .....If none is provided second category is used, or if only one category subclass is ignored. \n";
44         helpString += "The classes parameter is used to indicate the classes you would like to use. Clases should be inputted in the following format: classes=label<value1|value2|value3>-label<value1|value2>. For example to include groups from treatment with value early or late and age= young or old.  class=treatment<Early|Late>-age<young|old>.\n";
45         helpString += "The label parameter is used to indicate which distances in the shared file you would like to use. labels are separated by dashes.\n";
46                 helpString += "The lefse command should be in the following format: lefse(shared=final.an.shared, design=final.design, class=treatment, subclass=age).\n";
47         return helpString;
48         }
49         catch(exception& e) {
50                 m->errorOut(e, "LefseCommand", "getHelpString");
51                 exit(1);
52         }
53 }
54 //**********************************************************************************************************************
55 string LefseCommand::getOutputPattern(string type) {
56     try {
57         string pattern = "";
58         
59         if (type == "summary") {  pattern = "[filename],[distance],lefse_summary"; }
60         else if (type == "kruskall-wallis") {  pattern = "[filename],[distance],kruskall_wallis"; }
61         else if (type == "wilcoxon") {  pattern = "[filename],[distance],wilcoxon"; }
62         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
63         
64         return pattern;
65     }
66     catch(exception& e) {
67         m->errorOut(e, "LefseCommand", "getOutputPattern");
68         exit(1);
69     }
70 }
71 //**********************************************************************************************************************
72 LefseCommand::LefseCommand(){
73         try {
74                 abort = true; calledHelp = true;
75                 setParameters();
76         vector<string> tempOutNames;
77                 outputTypes["summary"] = tempOutNames;
78         outputTypes["kruskall-wallis"] = tempOutNames;
79         outputTypes["wilcoxon"] = tempOutNames;
80         }
81         catch(exception& e) {
82                 m->errorOut(e, "LefseCommand", "LefseCommand");
83                 exit(1);
84         }
85 }
86 //**********************************************************************************************************************
87 LefseCommand::LefseCommand(string option)  {
88         try {
89                 abort = false; calledHelp = false;
90         allLines = 1;
91                 
92                 //allow user to run help
93                 if(option == "help") { help(); abort = true; calledHelp = true; }
94                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
95                 
96                 else {
97                         //valid paramters for this command
98                         vector<string> myArray = setParameters();
99                         
100                         OptionParser parser(option);
101                         map<string,string> parameters = parser.getParameters();
102                         
103                         ValidParameters validParameter;
104                         map<string,string>::iterator it;
105                         //check to make sure all parameters are valid for command
106                         for (it = parameters.begin(); it != parameters.end(); it++) {
107                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
108                         }
109                         
110                         vector<string> tempOutNames;
111             outputTypes["summary"] = tempOutNames;
112             outputTypes["kruskall-wallis"] = tempOutNames;
113             outputTypes["wilcoxon"] = tempOutNames;
114             
115                         //if the user changes the input directory command factory will send this info to us in the output parameter
116                         string inputDir = validParameter.validFile(parameters, "inputdir", false);
117                         if (inputDir == "not found"){   inputDir = "";          }
118                         else {
119                 
120                 string path;
121                                 it = parameters.find("design");
122                                 //user has given a template file
123                                 if(it != parameters.end()){
124                                         path = m->hasPath(it->second);
125                                         //if the user has not given a path then, add inputdir. else leave path alone.
126                                         if (path == "") {       parameters["desing"] = inputDir + it->second;           }
127                                 }
128                                 
129                 it = parameters.find("shared");
130                                 //user has given a template file
131                                 if(it != parameters.end()){
132                                         path = m->hasPath(it->second);
133                                         //if the user has not given a path then, add inputdir. else leave path alone.
134                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
135                                 }
136             }
137                     
138             //get shared file, it is required
139                         sharedfile = validParameter.validFile(parameters, "shared", true);
140                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }
141                         else if (sharedfile == "not found") {
142                                 //if there is a current shared file, use it
143                                 sharedfile = m->getSharedFile();
144                                 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
145                                 else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
146                         }else { m->setSharedFile(sharedfile); }
147             
148             //get shared file, it is required
149                         designfile = validParameter.validFile(parameters, "design", true);
150                         if (designfile == "not open") { designfile = ""; abort = true; }
151                         else if (designfile == "not found") {
152                                 //if there is a current shared file, use it
153                                 designfile = m->getDesignFile();
154                                 if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
155                                 else {  m->mothurOut("You have no current design file and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
156                         }else { m->setDesignFile(designfile); }
157             
158             //if the user changes the output directory command factory will send this info to us in the output parameter
159                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){
160                                 outputDir = m->hasPath(sharedfile); //if user entered a file with a path then preserve it
161                         }
162             
163             string label = validParameter.validFile(parameters, "label", false);
164                         if (label == "not found") { label = ""; }
165                         else {
166                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
167                                 else { allLines = 1;  }
168                         }
169             
170             mclass = validParameter.validFile(parameters, "class", false);
171                         if (mclass == "not found") { mclass = ""; }
172                         
173             subclass = validParameter.validFile(parameters, "subclass", false);
174                         if (subclass == "not found") { subclass = ""; }
175             
176             classes = validParameter.validFile(parameters, "classes", false);
177                         if (classes == "not found") { classes = ""; }
178             
179             string temp = validParameter.validFile(parameters, "anova_alpha", false);
180                         if (temp == "not found") { temp = "0.05"; }
181                         m->mothurConvert(temp, anovaAlpha);
182             
183             temp = validParameter.validFile(parameters, "wilcoxon_alpha", false);
184                         if (temp == "not found") { temp = "0.05"; }
185                         m->mothurConvert(temp, wilcoxonAlpha);
186
187                 }
188                 
189         }
190         catch(exception& e) {
191                 m->errorOut(e, "LefseCommand", "LefseCommand");
192                 exit(1);
193         }
194 }
195 //**********************************************************************************************************************
196
197 int LefseCommand::execute(){
198         try {
199                 
200                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
201         
202         DesignMap designMap(designfile);
203         //if user set classes set groups=those classes
204         if (classes != "") {
205             map<string, vector<string> > thisClasses = m->parseClasses(classes);
206             vector<string> groups = designMap.getNamesUnique(thisClasses);
207             if (groups.size() != 0) { m->setGroups(groups); }
208             else { m->mothurOut("[ERROR]: no groups meet your classes requirement, quitting.\n"); return 0; }
209         }
210         
211         //if user did not select class use first column
212         if (mclass == "") {  mclass = designMap.getDefaultClass(); m->mothurOut("\nYou did not provide a class, using " + mclass +".\n\n"); }
213         
214         InputData input(sharedfile, "sharedfile");
215         vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
216         string lastLabel = lookup[0]->getLabel();
217         
218         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
219         set<string> processedLabels;
220         set<string> userLabels = labels;
221         
222         
223         //as long as you are not at the end of the file or done wih the lines you want
224         while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
225             
226             if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  return 0; }
227             
228             if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
229                 
230                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
231                 
232                 process(lookup, designMap);
233                 
234                 processedLabels.insert(lookup[0]->getLabel());
235                 userLabels.erase(lookup[0]->getLabel());
236             }
237             
238             if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
239                 string saveLabel = lookup[0]->getLabel();
240                 
241                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
242                 lookup = input.getSharedRAbundVectors(lastLabel);
243                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
244                 
245                 process(lookup, designMap);
246                 
247                 processedLabels.insert(lookup[0]->getLabel());
248                 userLabels.erase(lookup[0]->getLabel());
249                 
250                 //restore real lastlabel to save below
251                 lookup[0]->setLabel(saveLabel);
252             }
253             
254             lastLabel = lookup[0]->getLabel();
255             //prevent memory leak
256             for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
257             
258             if (m->control_pressed) { return 0; }
259             
260             //get next line to process
261             lookup = input.getSharedRAbundVectors();
262         }
263         
264         if (m->control_pressed) {  return 0; }
265         
266         //output error messages about any remaining user labels
267         set<string>::iterator it;
268         bool needToRun = false;
269         for (it = userLabels.begin(); it != userLabels.end(); it++) {
270             m->mothurOut("Your file does not include the label " + *it);
271             if (processedLabels.count(lastLabel) != 1) {
272                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
273                 needToRun = true;
274             }else {
275                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
276             }
277         }
278         
279         //run last label if you need to
280         if (needToRun == true)  {
281             for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
282             lookup = input.getSharedRAbundVectors(lastLabel);
283             
284             m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
285             process(lookup, designMap);
286             
287             for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
288         }
289                 
290                 
291         //output files created by command
292                 m->mothurOutEndLine();
293                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
294                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
295                 m->mothurOutEndLine();
296         return 0;
297                 
298     }
299         catch(exception& e) {
300                 m->errorOut(e, "LefseCommand", "execute");
301                 exit(1);
302         }
303 }
304 //**********************************************************************************************************************
305
306 int LefseCommand::process(vector<SharedRAbundVector*>& lookup, DesignMap& designMap) {
307         try {
308         //run kruskal wallis on each otu
309         vector<int> significantOtuLabels = runKruskalWallis(lookup, designMap);
310         
311         //check for subclass
312         if (subclass != "") {  significantOtuLabels = runWilcoxon(lookup, designMap, significantOtuLabels);  }
313         
314         return 0;
315     }
316         catch(exception& e) {
317                 m->errorOut(e, "LefseCommand", "process");
318                 exit(1);
319         }
320 }
321 //**********************************************************************************************************************
322
323 vector<int> LefseCommand::runKruskalWallis(vector<SharedRAbundVector*>& lookup, DesignMap& designMap) {
324         try {
325         map<string, string> variables;
326         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
327         variables["[distance]"] = lookup[0]->getLabel();
328                 string outputFileName = getOutputFileName("kruskall-wallis",variables);
329         
330                 ofstream out;
331                 m->openOutputFile(outputFileName, out);
332                 outputNames.push_back(outputFileName); outputTypes["kruskall-wallis"].push_back(outputFileName);
333         out << "OTULabel\tKW\tPvalue\n";
334         
335         vector<int> significantOtuLabels;
336         int numBins = lookup[0]->getNumBins();
337         //sanity check to make sure each treatment has a group in the shared file
338         set<string> treatments;
339         for (int j = 0; j < lookup.size(); j++) {
340             string group = lookup[j]->getGroup();
341             string treatment = designMap.get(group, mclass); //get value for this group in this category
342             treatments.insert(treatment);
343         }
344         if (treatments.size() < 2) { m->mothurOut("[ERROR]: need at least 2 things to classes to compare, quitting.\n"); m->control_pressed = true; }
345         
346         LinearAlgebra linear;
347         for (int i = 0; i < numBins; i++) {
348             if (m->control_pressed) { break; }
349             
350             vector<spearmanRank> values;
351             for (int j = 0; j < lookup.size(); j++) {
352                 string group = lookup[j]->getGroup();
353                 string treatment = designMap.get(group, mclass); //get value for this group in this category
354                 spearmanRank temp(treatment, lookup[j]->getAbundance(i));
355                 values.push_back(temp);
356             }
357             
358             double pValue = 0.0;
359             double H = linear.calcKruskalWallis(values, pValue);
360             
361             //output H and signifigance
362             out << m->currentBinLabels[i] << '\t' << H << '\t' << pValue << endl;
363             
364             if (pValue < anovaAlpha) {  significantOtuLabels.push_back(i);  }
365         }
366         out.close();
367         
368         return significantOtuLabels;
369     }
370         catch(exception& e) {
371                 m->errorOut(e, "LefseCommand", "runKruskalWallis");
372                 exit(1);
373         }
374 }
375 //**********************************************************************************************************************
376
377 vector<int> LefseCommand::runWilcoxon(vector<SharedRAbundVector*>& lookup, DesignMap& designMap, vector<int> bins) {
378     try {
379         vector<int> significantOtuLabels;
380         //if it exists and meets the following requirements run Wilcoxon
381         /*
382          1. Subclass members all belong to same main class
383          2. Number of groups in each subclass is the same
384          3. anything else??
385          
386          */
387         vector<string> subclasses;
388         map<string, string> subclass2Class;
389         map<string, int> subclassCounts;
390         map<string, vector<int> > subClass2GroupIndex; //maps subclass name to vector of indexes in lookup from that subclass. old -> 1,2,3 means groups in location 1,2,3 of lookup are from old.  Saves time below.
391         bool error = false;
392         for (int j = 0; j < lookup.size(); j++) {
393             string group = lookup[j]->getGroup();
394             string treatment = designMap.get(group, mclass); //get value for this group in this category
395             string thisSub = designMap.get(group, subclass);
396             map<string, string>::iterator it = subclass2Class.find(thisSub);
397             if (it == subclass2Class.end()) {
398                 subclass2Class[thisSub] = treatment;
399                 subclassCounts[thisSub] = 1;
400                 vector<int> temp; temp.push_back(j);
401                 subClass2GroupIndex[thisSub] = temp;
402             }
403             else {
404                 subclassCounts[thisSub]++;
405                 subClass2GroupIndex[thisSub].push_back(j);
406                 if (it->second != treatment) {
407                     error = true;
408                     m->mothurOut("[ERROR]: subclass " + thisSub + " has members in " + it->second + " and " + treatment + ". Subclass members must be from the same class. Ignoring wilcoxon.\n");
409                 }
410             }
411         }
412         
413         if (error) { return significantOtuLabels; }
414         else { //check counts to make sure subclasses are the same size
415             set<int> counts;
416             for (map<string, int>::iterator it = subclassCounts.begin(); it != subclassCounts.end(); it++) { counts.insert(it->second); }
417             if (counts.size() > 1) { m->mothurOut("[ERROR]: subclasses must be the same size. Ignoring wilcoxon.\n");
418                 return significantOtuLabels;  }
419         }
420         
421         int numBins = lookup[0]->getNumBins();
422         vector<compGroup> comp;
423         //find comparisons and fill comp
424         map<string, int>::iterator itB;
425         for(map<string, int>::iterator it=subclassCounts.begin();it!=subclassCounts.end();it++){
426             itB = it;itB++;
427             for(itB;itB!=subclassCounts.end();itB++){
428                 compGroup temp(it->first,itB->first);
429                 comp.push_back(temp);
430             }                   
431         }
432
433         int numComp = comp.size();
434         if (numComp < 2) {  m->mothurOut("[ERROR]: Need at least 2 subclasses, Ignoring Wilcoxon.\n");
435             return significantOtuLabels;  }
436         
437         map<string, string> variables;
438         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
439         variables["[distance]"] = lookup[0]->getLabel();
440         string outputFileName = getOutputFileName("wilcoxon",variables);
441         
442         ofstream out;
443         m->openOutputFile(outputFileName, out);
444         outputNames.push_back(outputFileName); outputTypes["wilcoxon"].push_back(outputFileName);
445         out << "OTULabel\tComparision\tWilcoxon\tPvalue\n";
446         
447         LinearAlgebra linear;
448         for (int i = 0; i < numBins; i++) {
449             if (m->control_pressed) { break; }
450             
451             if (m->inUsersGroups(i, bins)) { //flagged in Kruskal Wallis
452                 
453                 bool sig = false;
454                 //for each subclass comparision
455                 for (int j = 0; j < numComp; j++) {
456                     //fill x and y with this comparisons data
457                     vector<double> x; vector<double> y;
458                     
459                     //fill x and y
460                     vector<int> xIndexes = subClass2GroupIndex[comp[j].group1]; //indexes in lookup for this subclass
461                     for (int k = 0; k < xIndexes.size(); k++) { x.push_back(lookup[xIndexes[k]]->getAbundance(i)); }
462                     
463                     vector<int> yIndexes = subClass2GroupIndex[comp[j].group2]; //indexes in lookup for this subclass
464                     for (int k = 0; k < yIndexes.size(); k++) { y.push_back(lookup[yIndexes[k]]->getAbundance(i)); }
465                     
466                     double pValue = 0.0;
467                     double H = linear.calcWilcoxon(x, y, pValue);
468             
469                     //output H and signifigance
470                     out << m->currentBinLabels[i] << '\t' << comp[j].getCombo() << '\t' << H << '\t' << pValue << endl;
471                     
472                     //set sig - not sure how yet
473                 }
474                 if (sig) {  significantOtuLabels.push_back(i);  }
475             }
476         }
477         out.close();
478         
479         return significantOtuLabels;
480     }
481     catch(exception& e) {
482         m->errorOut(e, "LefseCommand", "runWilcoxon");
483         exit(1);
484     }
485 }
486
487 //**********************************************************************************************************************
488
489