5 // Created by SarahsWork on 6/12/13.
6 // Copyright (c) 2013 Schloss Lab. All rights reserved.
9 #include "lefsecommand.h"
10 #include "linearalgebra.h"
12 //**********************************************************************************************************************
13 vector<string> LefseCommand::setParameters(){
15 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pdesign);
16 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared);
17 CommandParameter pclass("class", "String", "", "", "", "", "","",false,false); parameters.push_back(pclass);
18 CommandParameter psubclass("subclass", "String", "", "", "", "", "","",false,false); parameters.push_back(psubclass);
19 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
20 CommandParameter pclasses("classes", "String", "", "", "", "", "","",false,false); parameters.push_back(pclasses);
21 CommandParameter palpha("anova_alpha", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(palpha);
22 CommandParameter pwalpha("wilcoxon_alpha", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(pwalpha);
23 //every command must have inputdir and outputdir. This allows mothur users to redirect input and output files.
24 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
25 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
27 vector<string> myArray;
28 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
32 m->errorOut(e, "LefseCommand", "setParameters");
36 //**********************************************************************************************************************
37 string LefseCommand::getHelpString(){
39 string helpString = "";
40 helpString += "The lefse command allows you to ....\n";
41 helpString += "The lefse command parameters are: shared, design, class, subclass, label, classes, wilcoxon_alpha, anova_alpha.\n";
42 helpString += "The class parameter is used to indicate the which category you would like used for the Kruskal Wallis analysis. If none is provided first category is used.\n";
43 helpString += "The subclass parameter is used to indicate the .....If none is provided second category is used, or if only one category subclass is ignored. \n";
44 helpString += "The classes parameter is used to indicate the classes you would like to use. Clases should be inputted in the following format: classes=label<value1|value2|value3>-label<value1|value2>. For example to include groups from treatment with value early or late and age= young or old. class=treatment<Early|Late>-age<young|old>.\n";
45 helpString += "The label parameter is used to indicate which distances in the shared file you would like to use. labels are separated by dashes.\n";
46 helpString += "The lefse command should be in the following format: lefse(shared=final.an.shared, design=final.design, class=treatment, subclass=age).\n";
50 m->errorOut(e, "LefseCommand", "getHelpString");
54 //**********************************************************************************************************************
55 string LefseCommand::getOutputPattern(string type) {
59 if (type == "summary") { pattern = "[filename],[distance],lefse_summary"; }
60 else if (type == "kruskall-wallis") { pattern = "[filename],[distance],kruskall_wallis"; }
61 else if (type == "wilcoxon") { pattern = "[filename],[distance],wilcoxon"; }
62 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
67 m->errorOut(e, "LefseCommand", "getOutputPattern");
71 //**********************************************************************************************************************
72 LefseCommand::LefseCommand(){
74 abort = true; calledHelp = true;
76 vector<string> tempOutNames;
77 outputTypes["summary"] = tempOutNames;
78 outputTypes["kruskall-wallis"] = tempOutNames;
79 outputTypes["wilcoxon"] = tempOutNames;
82 m->errorOut(e, "LefseCommand", "LefseCommand");
86 //**********************************************************************************************************************
87 LefseCommand::LefseCommand(string option) {
89 abort = false; calledHelp = false;
92 //allow user to run help
93 if(option == "help") { help(); abort = true; calledHelp = true; }
94 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
97 //valid paramters for this command
98 vector<string> myArray = setParameters();
100 OptionParser parser(option);
101 map<string,string> parameters = parser.getParameters();
103 ValidParameters validParameter;
104 map<string,string>::iterator it;
105 //check to make sure all parameters are valid for command
106 for (it = parameters.begin(); it != parameters.end(); it++) {
107 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
110 vector<string> tempOutNames;
111 outputTypes["summary"] = tempOutNames;
112 outputTypes["kruskall-wallis"] = tempOutNames;
113 outputTypes["wilcoxon"] = tempOutNames;
115 //if the user changes the input directory command factory will send this info to us in the output parameter
116 string inputDir = validParameter.validFile(parameters, "inputdir", false);
117 if (inputDir == "not found"){ inputDir = ""; }
121 it = parameters.find("design");
122 //user has given a template file
123 if(it != parameters.end()){
124 path = m->hasPath(it->second);
125 //if the user has not given a path then, add inputdir. else leave path alone.
126 if (path == "") { parameters["desing"] = inputDir + it->second; }
129 it = parameters.find("shared");
130 //user has given a template file
131 if(it != parameters.end()){
132 path = m->hasPath(it->second);
133 //if the user has not given a path then, add inputdir. else leave path alone.
134 if (path == "") { parameters["shared"] = inputDir + it->second; }
138 //get shared file, it is required
139 sharedfile = validParameter.validFile(parameters, "shared", true);
140 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
141 else if (sharedfile == "not found") {
142 //if there is a current shared file, use it
143 sharedfile = m->getSharedFile();
144 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
145 else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
146 }else { m->setSharedFile(sharedfile); }
148 //get shared file, it is required
149 designfile = validParameter.validFile(parameters, "design", true);
150 if (designfile == "not open") { designfile = ""; abort = true; }
151 else if (designfile == "not found") {
152 //if there is a current shared file, use it
153 designfile = m->getDesignFile();
154 if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
155 else { m->mothurOut("You have no current design file and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
156 }else { m->setDesignFile(designfile); }
158 //if the user changes the output directory command factory will send this info to us in the output parameter
159 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
160 outputDir = m->hasPath(sharedfile); //if user entered a file with a path then preserve it
163 string label = validParameter.validFile(parameters, "label", false);
164 if (label == "not found") { label = ""; }
166 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
167 else { allLines = 1; }
170 mclass = validParameter.validFile(parameters, "class", false);
171 if (mclass == "not found") { mclass = ""; }
173 subclass = validParameter.validFile(parameters, "subclass", false);
174 if (subclass == "not found") { subclass = ""; }
176 classes = validParameter.validFile(parameters, "classes", false);
177 if (classes == "not found") { classes = ""; }
179 string temp = validParameter.validFile(parameters, "anova_alpha", false);
180 if (temp == "not found") { temp = "0.05"; }
181 m->mothurConvert(temp, anovaAlpha);
183 temp = validParameter.validFile(parameters, "wilcoxon_alpha", false);
184 if (temp == "not found") { temp = "0.05"; }
185 m->mothurConvert(temp, wilcoxonAlpha);
190 catch(exception& e) {
191 m->errorOut(e, "LefseCommand", "LefseCommand");
195 //**********************************************************************************************************************
197 int LefseCommand::execute(){
200 if (abort == true) { if (calledHelp) { return 0; } return 2; }
202 DesignMap designMap(designfile);
203 //if user set classes set groups=those classes
205 map<string, vector<string> > thisClasses = m->parseClasses(classes);
206 vector<string> groups = designMap.getNamesUnique(thisClasses);
207 if (groups.size() != 0) { m->setGroups(groups); }
208 else { m->mothurOut("[ERROR]: no groups meet your classes requirement, quitting.\n"); return 0; }
211 //if user did not select class use first column
212 if (mclass == "") { mclass = designMap.getDefaultClass(); m->mothurOut("\nYou did not provide a class, using " + mclass +".\n\n"); }
214 InputData input(sharedfile, "sharedfile");
215 vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
216 string lastLabel = lookup[0]->getLabel();
218 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
219 set<string> processedLabels;
220 set<string> userLabels = labels;
223 //as long as you are not at the end of the file or done wih the lines you want
224 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
226 if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; }
228 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
230 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
232 process(lookup, designMap);
234 processedLabels.insert(lookup[0]->getLabel());
235 userLabels.erase(lookup[0]->getLabel());
238 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
239 string saveLabel = lookup[0]->getLabel();
241 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
242 lookup = input.getSharedRAbundVectors(lastLabel);
243 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
245 process(lookup, designMap);
247 processedLabels.insert(lookup[0]->getLabel());
248 userLabels.erase(lookup[0]->getLabel());
250 //restore real lastlabel to save below
251 lookup[0]->setLabel(saveLabel);
254 lastLabel = lookup[0]->getLabel();
255 //prevent memory leak
256 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
258 if (m->control_pressed) { return 0; }
260 //get next line to process
261 lookup = input.getSharedRAbundVectors();
264 if (m->control_pressed) { return 0; }
266 //output error messages about any remaining user labels
267 set<string>::iterator it;
268 bool needToRun = false;
269 for (it = userLabels.begin(); it != userLabels.end(); it++) {
270 m->mothurOut("Your file does not include the label " + *it);
271 if (processedLabels.count(lastLabel) != 1) {
272 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
275 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
279 //run last label if you need to
280 if (needToRun == true) {
281 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
282 lookup = input.getSharedRAbundVectors(lastLabel);
284 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
285 process(lookup, designMap);
287 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
291 //output files created by command
292 m->mothurOutEndLine();
293 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
294 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
295 m->mothurOutEndLine();
299 catch(exception& e) {
300 m->errorOut(e, "LefseCommand", "execute");
304 //**********************************************************************************************************************
306 int LefseCommand::process(vector<SharedRAbundVector*>& lookup, DesignMap& designMap) {
308 //run kruskal wallis on each otu
309 vector<int> significantOtuLabels = runKruskalWallis(lookup, designMap);
312 if (subclass != "") { significantOtuLabels = runWilcoxon(lookup, designMap, significantOtuLabels); }
316 catch(exception& e) {
317 m->errorOut(e, "LefseCommand", "process");
321 //**********************************************************************************************************************
323 vector<int> LefseCommand::runKruskalWallis(vector<SharedRAbundVector*>& lookup, DesignMap& designMap) {
325 map<string, string> variables;
326 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
327 variables["[distance]"] = lookup[0]->getLabel();
328 string outputFileName = getOutputFileName("kruskall-wallis",variables);
331 m->openOutputFile(outputFileName, out);
332 outputNames.push_back(outputFileName); outputTypes["kruskall-wallis"].push_back(outputFileName);
333 out << "OTULabel\tKW\tPvalue\n";
335 vector<int> significantOtuLabels;
336 int numBins = lookup[0]->getNumBins();
337 //sanity check to make sure each treatment has a group in the shared file
338 set<string> treatments;
339 for (int j = 0; j < lookup.size(); j++) {
340 string group = lookup[j]->getGroup();
341 string treatment = designMap.get(group, mclass); //get value for this group in this category
342 treatments.insert(treatment);
344 if (treatments.size() < 2) { m->mothurOut("[ERROR]: need at least 2 things to classes to compare, quitting.\n"); m->control_pressed = true; }
346 LinearAlgebra linear;
347 for (int i = 0; i < numBins; i++) {
348 if (m->control_pressed) { break; }
350 vector<spearmanRank> values;
351 for (int j = 0; j < lookup.size(); j++) {
352 string group = lookup[j]->getGroup();
353 string treatment = designMap.get(group, mclass); //get value for this group in this category
354 spearmanRank temp(treatment, lookup[j]->getAbundance(i));
355 values.push_back(temp);
359 double H = linear.calcKruskalWallis(values, pValue);
361 //output H and signifigance
362 out << m->currentBinLabels[i] << '\t' << H << '\t' << pValue << endl;
364 if (pValue < anovaAlpha) { significantOtuLabels.push_back(i); }
368 return significantOtuLabels;
370 catch(exception& e) {
371 m->errorOut(e, "LefseCommand", "runKruskalWallis");
375 //**********************************************************************************************************************
377 vector<int> LefseCommand::runWilcoxon(vector<SharedRAbundVector*>& lookup, DesignMap& designMap, vector<int> bins) {
379 vector<int> significantOtuLabels;
380 //if it exists and meets the following requirements run Wilcoxon
382 1. Subclass members all belong to same main class
383 2. Number of groups in each subclass is the same
387 vector<string> subclasses;
388 map<string, string> subclass2Class;
389 map<string, int> subclassCounts;
390 map<string, vector<int> > subClass2GroupIndex; //maps subclass name to vector of indexes in lookup from that subclass. old -> 1,2,3 means groups in location 1,2,3 of lookup are from old. Saves time below.
392 for (int j = 0; j < lookup.size(); j++) {
393 string group = lookup[j]->getGroup();
394 string treatment = designMap.get(group, mclass); //get value for this group in this category
395 string thisSub = designMap.get(group, subclass);
396 map<string, string>::iterator it = subclass2Class.find(thisSub);
397 if (it == subclass2Class.end()) {
398 subclass2Class[thisSub] = treatment;
399 subclassCounts[thisSub] = 1;
400 vector<int> temp; temp.push_back(j);
401 subClass2GroupIndex[thisSub] = temp;
404 subclassCounts[thisSub]++;
405 subClass2GroupIndex[thisSub].push_back(j);
406 if (it->second != treatment) {
408 m->mothurOut("[ERROR]: subclass " + thisSub + " has members in " + it->second + " and " + treatment + ". Subclass members must be from the same class. Ignoring wilcoxon.\n");
413 if (error) { return significantOtuLabels; }
414 else { //check counts to make sure subclasses are the same size
416 for (map<string, int>::iterator it = subclassCounts.begin(); it != subclassCounts.end(); it++) { counts.insert(it->second); }
417 if (counts.size() > 1) { m->mothurOut("[ERROR]: subclasses must be the same size. Ignoring wilcoxon.\n");
418 return significantOtuLabels; }
421 int numBins = lookup[0]->getNumBins();
422 vector<compGroup> comp;
423 //find comparisons and fill comp
424 map<string, int>::iterator itB;
425 for(map<string, int>::iterator it=subclassCounts.begin();it!=subclassCounts.end();it++){
427 for(itB;itB!=subclassCounts.end();itB++){
428 compGroup temp(it->first,itB->first);
429 comp.push_back(temp);
433 int numComp = comp.size();
434 if (numComp < 2) { m->mothurOut("[ERROR]: Need at least 2 subclasses, Ignoring Wilcoxon.\n");
435 return significantOtuLabels; }
437 map<string, string> variables;
438 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
439 variables["[distance]"] = lookup[0]->getLabel();
440 string outputFileName = getOutputFileName("wilcoxon",variables);
443 m->openOutputFile(outputFileName, out);
444 outputNames.push_back(outputFileName); outputTypes["wilcoxon"].push_back(outputFileName);
445 out << "OTULabel\tComparision\tWilcoxon\tPvalue\n";
447 LinearAlgebra linear;
448 for (int i = 0; i < numBins; i++) {
449 if (m->control_pressed) { break; }
451 if (m->inUsersGroups(i, bins)) { //flagged in Kruskal Wallis
454 //for each subclass comparision
455 for (int j = 0; j < numComp; j++) {
456 //fill x and y with this comparisons data
457 vector<double> x; vector<double> y;
460 vector<int> xIndexes = subClass2GroupIndex[comp[j].group1]; //indexes in lookup for this subclass
461 for (int k = 0; k < xIndexes.size(); k++) { x.push_back(lookup[xIndexes[k]]->getAbundance(i)); }
463 vector<int> yIndexes = subClass2GroupIndex[comp[j].group2]; //indexes in lookup for this subclass
464 for (int k = 0; k < yIndexes.size(); k++) { y.push_back(lookup[yIndexes[k]]->getAbundance(i)); }
467 double H = linear.calcWilcoxon(x, y, pValue);
469 //output H and signifigance
470 out << m->currentBinLabels[i] << '\t' << comp[j].getCombo() << '\t' << H << '\t' << pValue << endl;
472 //set sig - not sure how yet
474 if (sig) { significantOtuLabels.push_back(i); }
479 return significantOtuLabels;
481 catch(exception& e) {
482 m->errorOut(e, "LefseCommand", "runWilcoxon");
487 //**********************************************************************************************************************