5 // Created by SarahsWork on 6/12/13.
6 // Copyright (c) 2013 Schloss Lab. All rights reserved.
9 #include "lefsecommand.h"
10 #include "linearalgebra.h"
12 //**********************************************************************************************************************
13 vector<string> LefseCommand::setParameters(){
15 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pdesign);
16 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared);
17 CommandParameter pclass("class", "String", "", "", "", "", "","",false,false); parameters.push_back(pclass);
18 CommandParameter psubclass("subclass", "String", "", "", "", "", "","",false,false); parameters.push_back(psubclass);
19 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
20 CommandParameter pclasses("classes", "String", "", "", "", "", "","",false,false); parameters.push_back(pclasses);
21 CommandParameter palpha("anova_alpha", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(palpha);
22 CommandParameter pwalpha("wilcoxon_alpha", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(pwalpha);
23 //every command must have inputdir and outputdir. This allows mothur users to redirect input and output files.
24 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
25 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
27 vector<string> myArray;
28 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
32 m->errorOut(e, "LefseCommand", "setParameters");
36 //**********************************************************************************************************************
37 string LefseCommand::getHelpString(){
39 string helpString = "";
40 helpString += "The lefse command allows you to ....\n";
41 helpString += "The lefse command parameters are: shared, design, class, subclass, label, classes, wilcoxon_alpha, anova_alpha.\n";
42 helpString += "The class parameter is used to indicate the which category you would like used for the Kruskal Wallis analysis. If none is provided first category is used.\n";
43 helpString += "The subclass parameter is used to indicate the .....If none is provided second category is used, or if only one category subclass is ignored. \n";
44 helpString += "The classes parameter is used to indicate the classes you would like to use. Clases should be inputted in the following format: classes=label<value1|value2|value3>-label<value1|value2>. For example to include groups from treatment with value early or late and age= young or old. class=treatment<Early|Late>-age<young|old>.\n";
45 helpString += "The label parameter is used to indicate which distances in the shared file you would like to use. labels are separated by dashes.\n";
46 helpString += "The lefse command should be in the following format: lefse(shared=final.an.shared, design=final.design, class=treatment, subclass=age).\n";
50 m->errorOut(e, "LefseCommand", "getHelpString");
54 //**********************************************************************************************************************
55 string LefseCommand::getOutputPattern(string type) {
59 if (type == "summary") { pattern = "[filename],[distance],lefse_summary"; }
60 else if (type == "kruskall-wallis") { pattern = "[filename],[distance],kruskall_wallis"; }
61 else if (type == "wilcoxon") { pattern = "[filename],[distance],wilcoxon"; }
62 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
67 m->errorOut(e, "LefseCommand", "getOutputPattern");
71 //**********************************************************************************************************************
72 LefseCommand::LefseCommand(){
74 abort = true; calledHelp = true;
76 vector<string> tempOutNames;
77 outputTypes["summary"] = tempOutNames;
78 outputTypes["kruskall-wallis"] = tempOutNames;
79 outputTypes["wilcoxon"] = tempOutNames;
82 m->errorOut(e, "LefseCommand", "LefseCommand");
86 //**********************************************************************************************************************
87 LefseCommand::LefseCommand(string option) {
89 abort = false; calledHelp = false;
92 //allow user to run help
93 if(option == "help") { help(); abort = true; calledHelp = true; }
94 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
97 //valid paramters for this command
98 vector<string> myArray = setParameters();
100 OptionParser parser(option);
101 map<string,string> parameters = parser.getParameters();
103 ValidParameters validParameter;
104 map<string,string>::iterator it;
105 //check to make sure all parameters are valid for command
106 for (it = parameters.begin(); it != parameters.end(); it++) {
107 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
110 vector<string> tempOutNames;
111 outputTypes["summary"] = tempOutNames;
112 outputTypes["kruskall-wallis"] = tempOutNames;
113 outputTypes["wilcoxon"] = tempOutNames;
115 //if the user changes the input directory command factory will send this info to us in the output parameter
116 string inputDir = validParameter.validFile(parameters, "inputdir", false);
117 if (inputDir == "not found"){ inputDir = ""; }
121 it = parameters.find("design");
122 //user has given a template file
123 if(it != parameters.end()){
124 path = m->hasPath(it->second);
125 //if the user has not given a path then, add inputdir. else leave path alone.
126 if (path == "") { parameters["desing"] = inputDir + it->second; }
129 it = parameters.find("shared");
130 //user has given a template file
131 if(it != parameters.end()){
132 path = m->hasPath(it->second);
133 //if the user has not given a path then, add inputdir. else leave path alone.
134 if (path == "") { parameters["shared"] = inputDir + it->second; }
138 //get shared file, it is required
139 sharedfile = validParameter.validFile(parameters, "shared", true);
140 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
141 else if (sharedfile == "not found") {
142 //if there is a current shared file, use it
143 sharedfile = m->getSharedFile();
144 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
145 else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
146 }else { m->setSharedFile(sharedfile); }
148 //get shared file, it is required
149 designfile = validParameter.validFile(parameters, "design", true);
150 if (designfile == "not open") { designfile = ""; abort = true; }
151 else if (designfile == "not found") {
152 //if there is a current shared file, use it
153 designfile = m->getDesignFile();
154 if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
155 else { m->mothurOut("You have no current design file and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
156 }else { m->setDesignFile(designfile); }
158 //if the user changes the output directory command factory will send this info to us in the output parameter
159 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
160 outputDir = m->hasPath(sharedfile); //if user entered a file with a path then preserve it
163 string label = validParameter.validFile(parameters, "label", false);
164 if (label == "not found") { label = ""; }
166 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
167 else { allLines = 1; }
170 mclass = validParameter.validFile(parameters, "class", false);
171 if (mclass == "not found") { mclass = ""; }
173 subclass = validParameter.validFile(parameters, "subclass", false);
174 if (subclass == "not found") { subclass = ""; }
176 classes = validParameter.validFile(parameters, "classes", false);
177 if (classes == "not found") { classes = ""; }
179 string temp = validParameter.validFile(parameters, "anova_alpha", false);
180 if (temp == "not found") { temp = "0.05"; }
181 m->mothurConvert(temp, anovaAlpha);
183 temp = validParameter.validFile(parameters, "wilcoxon_alpha", false);
184 if (temp == "not found") { temp = "0.05"; }
185 m->mothurConvert(temp, wilcoxonAlpha);
190 catch(exception& e) {
191 m->errorOut(e, "LefseCommand", "LefseCommand");
195 //**********************************************************************************************************************
197 int LefseCommand::execute(){
200 if (abort == true) { if (calledHelp) { return 0; } return 2; }
202 DesignMap designMap(designfile);
203 //if user set classes set groups=those classes
205 map<string, vector<string> > thisClasses = m->parseClasses(classes);
206 vector<string> groups = designMap.getNamesUnique(thisClasses);
207 if (groups.size() != 0) { m->setGroups(groups); }
208 else { m->mothurOut("[ERROR]: no groups meet your classes requirement, quitting.\n"); return 0; }
211 //if user did not select class use first column
212 if (mclass == "") { mclass = designMap.getDefaultClass(); m->mothurOut("\nYou did not provide a class, using " + mclass +".\n\n"); }
214 InputData input(sharedfile, "sharedfile");
215 vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
216 string lastLabel = lookup[0]->getLabel();
218 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
219 set<string> processedLabels;
220 set<string> userLabels = labels;
223 //as long as you are not at the end of the file or done wih the lines you want
224 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
226 if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; }
228 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
230 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
232 process(lookup, designMap);
234 processedLabels.insert(lookup[0]->getLabel());
235 userLabels.erase(lookup[0]->getLabel());
238 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
239 string saveLabel = lookup[0]->getLabel();
241 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
242 lookup = input.getSharedRAbundVectors(lastLabel);
243 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
245 process(lookup, designMap);
247 processedLabels.insert(lookup[0]->getLabel());
248 userLabels.erase(lookup[0]->getLabel());
250 //restore real lastlabel to save below
251 lookup[0]->setLabel(saveLabel);
254 lastLabel = lookup[0]->getLabel();
255 //prevent memory leak
256 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
258 if (m->control_pressed) { return 0; }
260 //get next line to process
261 lookup = input.getSharedRAbundVectors();
264 if (m->control_pressed) { return 0; }
266 //output error messages about any remaining user labels
267 set<string>::iterator it;
268 bool needToRun = false;
269 for (it = userLabels.begin(); it != userLabels.end(); it++) {
270 m->mothurOut("Your file does not include the label " + *it);
271 if (processedLabels.count(lastLabel) != 1) {
272 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
275 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
279 //run last label if you need to
280 if (needToRun == true) {
281 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
282 lookup = input.getSharedRAbundVectors(lastLabel);
284 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
285 process(lookup, designMap);
287 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
291 //output files created by command
292 m->mothurOutEndLine();
293 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
294 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
295 m->mothurOutEndLine();
299 catch(exception& e) {
300 m->errorOut(e, "LefseCommand", "execute");
304 //**********************************************************************************************************************
306 int LefseCommand::process(vector<SharedRAbundVector*>& lookup, DesignMap& designMap) {
308 //run kruskal wallis on each otu
309 vector<int> significantOtuLabels = runKruskalWallis(lookup, designMap);
312 if (subclass != "") { significantOtuLabels = runWilcoxon(lookup, designMap, significantOtuLabels); }
316 catch(exception& e) {
317 m->errorOut(e, "LefseCommand", "process");
321 //**********************************************************************************************************************
323 vector<int> LefseCommand::runKruskalWallis(vector<SharedRAbundVector*>& lookup, DesignMap& designMap) {
325 map<string, string> variables;
326 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
327 variables["[distance]"] = lookup[0]->getLabel();
328 string outputFileName = getOutputFileName("kruskall-wallis",variables);
331 m->openOutputFile(outputFileName, out);
332 outputNames.push_back(outputFileName); outputTypes["kruskall-wallis"].push_back(outputFileName);
333 out << "OTULabel\tKW\tPvalue\n";
335 vector<int> significantOtuLabels;
336 int numBins = lookup[0]->getNumBins();
337 //sanity check to make sure each treatment has a group in the shared file
338 set<string> treatments;
339 for (int j = 0; j < lookup.size(); j++) {
340 string group = lookup[j]->getGroup();
341 string treatment = designMap.get(group, mclass); //get value for this group in this category
342 treatments.insert(treatment);
344 if (treatments.size() < 2) { m->mothurOut("[ERROR]: need at least 2 things to classes to compare, quitting.\n"); m->control_pressed = true; }
346 LinearAlgebra linear;
347 for (int i = 0; i < numBins; i++) {
348 if (m->control_pressed) { break; }
350 vector<spearmanRank> values;
351 for (int j = 0; j < lookup.size(); j++) {
352 string group = lookup[j]->getGroup();
353 string treatment = designMap.get(group, mclass); //get value for this group in this category
354 spearmanRank temp(treatment, lookup[j]->getAbundance(i));
355 values.push_back(temp);
359 double H = linear.calcKruskalWallis(values, pValue);
361 //output H and signifigance
362 out << m->currentBinLabels[i] << '\t' << H << '\t' << pValue << endl;
364 if (pValue < anovaAlpha) { significantOtuLabels.push_back(i); }
368 return significantOtuLabels;
370 catch(exception& e) {
371 m->errorOut(e, "LefseCommand", "runKruskalWallis");
375 //**********************************************************************************************************************
376 //assumes not neccessarily paired
377 vector<int> LefseCommand::runWilcoxon(vector<SharedRAbundVector*>& lookup, DesignMap& designMap, vector<int> bins) {
379 LinearAlgebra linear;
380 vector<int> significantOtuLabels;
381 //if it exists and meets the following requirements run Wilcoxon
383 1. Subclass members all belong to same main class
386 vector<string> subclasses;
387 map<string, string> subclass2Class;
388 map<string, int> subclassCounts;
389 map<string, vector<int> > subClass2GroupIndex; //maps subclass name to vector of indexes in lookup from that subclass. old -> 1,2,3 means groups in location 1,2,3 of lookup are from old. Saves time below.
391 for (int j = 0; j < lookup.size(); j++) {
392 string group = lookup[j]->getGroup();
393 string treatment = designMap.get(group, mclass); //get value for this group in this category
394 string thisSub = designMap.get(group, subclass);
395 map<string, string>::iterator it = subclass2Class.find(thisSub);
396 if (it == subclass2Class.end()) {
397 subclass2Class[thisSub] = treatment;
398 subclassCounts[thisSub] = 1;
399 vector<int> temp; temp.push_back(j);
400 subClass2GroupIndex[thisSub] = temp;
403 subclassCounts[thisSub]++;
404 subClass2GroupIndex[thisSub].push_back(j);
405 if (it->second != treatment) {
407 m->mothurOut("[ERROR]: subclass " + thisSub + " has members in " + it->second + " and " + treatment + ". Subclass members must be from the same class. Ignoring wilcoxon.\n");
412 if (error) { return significantOtuLabels; }
414 int numBins = lookup[0]->getNumBins();
415 vector<compGroup> comp;
416 //find comparisons and fill comp
417 map<string, int>::iterator itB;
418 for(map<string, int>::iterator it=subclassCounts.begin();it!=subclassCounts.end();it++){
420 for(itB;itB!=subclassCounts.end();itB++){
421 compGroup temp(it->first,itB->first);
422 comp.push_back(temp);
426 int numComp = comp.size();
427 if (numComp < 2) { m->mothurOut("[ERROR]: Need at least 2 subclasses, Ignoring Wilcoxon.\n");
428 return significantOtuLabels; }
430 map<string, string> variables;
431 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
432 variables["[distance]"] = lookup[0]->getLabel();
433 string outputFileName = getOutputFileName("wilcoxon",variables);
436 m->openOutputFile(outputFileName, out);
437 outputNames.push_back(outputFileName); outputTypes["wilcoxon"].push_back(outputFileName);
438 out << "OTULabel\tComparision\tWilcoxon\tPvalue\n";
440 for (int i = 0; i < numBins; i++) {
441 if (m->control_pressed) { break; }
443 if (m->inUsersGroups(i, bins)) { //flagged in Kruskal Wallis
446 //for each subclass comparision
447 for (int j = 0; j < numComp; j++) {
448 //fill x and y with this comparisons data
449 vector<double> x; vector<double> y;
451 cout << m->currentBinLabels[i] << '\t' << comp[j].getCombo() << " x <- (";
453 vector<int> xIndexes = subClass2GroupIndex[comp[j].group1]; //indexes in lookup for this subclass
454 for (int k = 0; k < xIndexes.size(); k++) { x.push_back(lookup[xIndexes[k]]->getAbundance(i)); cout << lookup[xIndexes[k]]->getAbundance(i) << ", "; }
457 cout << m->currentBinLabels[i] << '\t' << comp[j].getCombo() << " y <- (";
459 vector<int> yIndexes = subClass2GroupIndex[comp[j].group2]; //indexes in lookup for this subclass
460 for (int k = 0; k < yIndexes.size(); k++) { y.push_back(lookup[yIndexes[k]]->getAbundance(i)); cout << lookup[yIndexes[k]]->getAbundance(i) << ", ";}
464 double H = linear.calcWilcoxon(x, y, pValue);
466 //output H and signifigance
467 if (!isnan(pValue)) { out << m->currentBinLabels[i] << '\t' << comp[j].getCombo() << '\t' << H << '\t' << pValue << endl; }
468 else { out << m->currentBinLabels[i] << '\t' << comp[j].getCombo() << '\t' << H << '\t' << "NA" << endl; }
470 //set sig - not sure how yet
472 if (sig) { significantOtuLabels.push_back(i); }
477 return significantOtuLabels;
479 catch(exception& e) {
480 m->errorOut(e, "LefseCommand", "runWilcoxon");
485 //**********************************************************************************************************************