5 // Created by SarahsWork on 6/3/13.
6 // Copyright (c) 2013 Schloss Lab. All rights reserved.
9 #include "makelefsecommand.h"
10 #include "designmap.h"
12 //**********************************************************************************************************************
13 vector<string> MakeLefseCommand::setParameters(){
15 CommandParameter pshared("shared", "InputTypes", "", "", "SharedRel", "SharedRel", "none","lefse",false,false,true); parameters.push_back(pshared);
16 CommandParameter prelabund("relabund", "InputTypes", "", "", "SharedRel", "SharedRel", "none","lefse",false,false,true); parameters.push_back(prelabund);
17 CommandParameter pconstaxonomy("constaxonomy", "InputTypes", "", "", "none", "none", "none","",false,false,false); parameters.push_back(pconstaxonomy);
18 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,false, true); parameters.push_back(pdesign);
19 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
20 CommandParameter pscale("scale", "Multiple", "totalgroup-totalotu-averagegroup-averageotu", "totalgroup", "", "", "","",false,false); parameters.push_back(pscale);
21 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
22 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
23 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
25 vector<string> myArray;
26 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
30 m->errorOut(e, "MakeLefseCommand", "setParameters");
34 //**********************************************************************************************************************
35 string MakeLefseCommand::getHelpString(){
37 string helpString = "";
38 helpString += "The make.lefse command allows you to create a lefse formatted input file from mothur's output files.\n";
39 helpString += "The make.lefse command parameters are: shared, relabund, constaxonomy, design, scale, groups and label. The shared or relabund are required.\n";
40 helpString += "The shared parameter is used to input your shared file, http://www.wiki.mothur.org/wiki/Shared_file.\n";
41 helpString += "The relabund parameter is used to input your relabund file, http://www.wiki.mothur.org/wiki/Relabund_file.\n";
42 helpString += "The design parameter is used to input your design file, http://www.wiki.mothur.org/wiki/Design_File.\n";
43 helpString += "The constaxonomy parameter is used to input your taxonomy file. http://www.wiki.mothur.org/wiki/Constaxonomy_file. The contaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile). Be SURE that the you are the constaxonomy file distance matches the shared file distance. ie, for *.0.03.cons.taxonomy set label=0.03. Mothur is smart enough to handle shared files that have been subsampled. \n";
44 helpString += "The scale parameter allows you to select what scale you would like to use to convert your shared file abundances to relative abundances. Choices are totalgroup, totalotu, averagegroup, averageotu, default is totalgroup.\n";
45 helpString += "The label parameter allows you to select what distance level you would like used, if none is given the first distance is used.\n";
46 helpString += "The make.lefse command should be in the following format: make.lefse(shared=yourSharedFile)\n";
47 helpString += "make.lefse(shared=final.an.shared)\n";
51 m->errorOut(e, "MakeLefseCommand", "getHelpString");
55 //**********************************************************************************************************************
56 string MakeLefseCommand::getOutputPattern(string type) {
60 if (type == "lefse") { pattern = "[filename],[distance],lefse"; }
61 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
66 m->errorOut(e, "MakeLefseCommand", "getOutputPattern");
70 //**********************************************************************************************************************
71 MakeLefseCommand::MakeLefseCommand(){
73 abort = true; calledHelp = true;
75 vector<string> tempOutNames;
76 outputTypes["lefse"] = tempOutNames;
79 m->errorOut(e, "MakeLefseCommand", "MakeLefseCommand");
83 //**********************************************************************************************************************
84 MakeLefseCommand::MakeLefseCommand(string option) {
86 abort = false; calledHelp = false;
88 //allow user to run help
89 if(option == "help") { help(); abort = true; calledHelp = true; }
90 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
93 //valid paramters for this command
94 vector<string> myArray = setParameters();
96 OptionParser parser(option);
97 map<string,string> parameters = parser.getParameters();
99 ValidParameters validParameter;
100 map<string,string>::iterator it;
101 //check to make sure all parameters are valid for command
102 for (it = parameters.begin(); it != parameters.end(); it++) {
103 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
106 vector<string> tempOutNames;
107 outputTypes["lefse"] = tempOutNames;
109 //if the user changes the input directory command factory will send this info to us in the output parameter
110 string inputDir = validParameter.validFile(parameters, "inputdir", false);
111 if (inputDir == "not found"){ inputDir = ""; }
114 it = parameters.find("shared");
115 //user has given a template file
116 if(it != parameters.end()){
117 path = m->hasPath(it->second);
118 //if the user has not given a path then, add inputdir. else leave path alone.
119 if (path == "") { parameters["shared"] = inputDir + it->second; }
122 it = parameters.find("design");
123 //user has given a template file
124 if(it != parameters.end()){
125 path = m->hasPath(it->second);
126 //if the user has not given a path then, add inputdir. else leave path alone.
127 if (path == "") { parameters["design"] = inputDir + it->second; }
130 it = parameters.find("constaxonomy");
131 //user has given a template file
132 if(it != parameters.end()){
133 path = m->hasPath(it->second);
134 //if the user has not given a path then, add inputdir. else leave path alone.
135 if (path == "") { parameters["constaxonomy"] = inputDir + it->second; }
138 it = parameters.find("relabund");
139 //user has given a template file
140 if(it != parameters.end()){
141 path = m->hasPath(it->second);
142 //if the user has not given a path then, add inputdir. else leave path alone.
143 if (path == "") { parameters["relabund"] = inputDir + it->second; }
147 //check for parameters
148 designfile = validParameter.validFile(parameters, "design", true);
149 if (designfile == "not open") { abort = true; }
150 else if (designfile == "not found") { designfile = ""; }
151 else { m->setDesignFile(designfile); }
153 sharedfile = validParameter.validFile(parameters, "shared", true);
154 if (sharedfile == "not open") { abort = true; }
155 else if (sharedfile == "not found") { sharedfile = ""; }
156 else { m->setSharedFile(sharedfile); }
158 relabundfile = validParameter.validFile(parameters, "relabund", true);
159 if (relabundfile == "not open") { abort = true; }
160 else if (relabundfile == "not found") { relabundfile = ""; }
161 else { m->setRelAbundFile(relabundfile); }
163 constaxonomyfile = validParameter.validFile(parameters, "constaxonomy", true);
164 if (constaxonomyfile == "not open") { constaxonomyfile = ""; abort = true; }
165 else if (constaxonomyfile == "not found") { constaxonomyfile = ""; }
167 label = validParameter.validFile(parameters, "label", false);
168 if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); label=""; }
170 string groups = validParameter.validFile(parameters, "groups", false);
171 if (groups == "not found") { groups = ""; }
173 m->splitAtDash(groups, Groups);
174 m->setGroups(Groups);
178 if ((relabundfile == "") && (sharedfile == "")) {
179 //is there are current file available for either of these?
180 //give priority to shared, then relabund
181 sharedfile = m->getSharedFile();
182 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
184 relabundfile = m->getRelAbundFile();
185 if (relabundfile != "") { m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter."); m->mothurOutEndLine(); }
187 m->mothurOut("No valid current files. You must provide a shared or relabund."); m->mothurOutEndLine();
193 if ((relabundfile != "") && (sharedfile != "")) { m->mothurOut("[ERROR]: You may not use both a shared and relabund file."); m->mothurOutEndLine(); abort = true; }
195 //if the user changes the output directory command factory will send this info to us in the output parameter
196 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
199 scale = validParameter.validFile(parameters, "scale", false); if (scale == "not found") { scale = "totalgroup"; }
201 if ((scale != "totalgroup") && (scale != "totalotu") && (scale != "averagegroup") && (scale != "averageotu")) {
202 m->mothurOut(scale + " is not a valid scaling option for the get.relabund command. Choices are totalgroup, totalotu, averagegroup, averageotu."); m->mothurOutEndLine(); abort = true;
207 catch(exception& e) {
208 m->errorOut(e, "MakeLefseCommand", "MakeLefseCommand");
212 //**********************************************************************************************************************
214 int MakeLefseCommand::execute(){
217 if (abort == true) { if (calledHelp) { return 0; } return 2; }
219 map<string, consTax2> consTax;
220 if (constaxonomyfile != "") { m->readConsTax(constaxonomyfile, consTax); }
222 if (m->control_pressed) { return 0; }
224 if (sharedfile != "") {
225 inputFile = sharedfile;
226 vector<SharedRAbundFloatVector*> lookup = getSharedRelabund();
227 runRelabund(consTax, lookup);
229 inputFile = relabundfile;
230 vector<SharedRAbundFloatVector*> lookup = getRelabund();
231 runRelabund(consTax, lookup);
234 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
236 //output files created by command
237 m->mothurOutEndLine();
238 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
239 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
240 m->mothurOutEndLine();
244 catch(exception& e) {
245 m->errorOut(e, "MakeLefseCommand", "execute");
249 //**********************************************************************************************************************
250 int MakeLefseCommand::runRelabund(map<string, consTax2>& consTax, vector<SharedRAbundFloatVector*>& lookup){
252 if (outputDir == "") { outputDir = m->hasPath(inputFile); }
253 map<string, string> variables;
254 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputFile));
255 variables["[distance]"] = lookup[0]->getLabel();
256 string outputFile = getOutputFileName("lefse",variables);
257 outputNames.push_back(outputFile); outputTypes["lefse"].push_back(outputFile);
260 m->openOutputFile(outputFile, out);
262 DesignMap* designMap = NULL;
263 if (designfile != "") {
264 designMap = new DesignMap(designfile);
265 vector<string> categories = designMap->getNamesOfCategories();
267 if (categories.size() > 3) { m->mothurOut("\n[NOTE]: LEfSe input files allow for a class, subclass and subject. More than 3 categories can cause formatting errors.\n\n"); }
269 for (int j = 0; j < categories.size(); j++) {
270 out << categories[j] << "\t";
271 for (int i = 0; i < lookup.size()-1; i++) {
272 if (m->control_pressed) { out.close(); delete designMap; return 0; }
273 string value = designMap->get(lookup[i]->getGroup(), categories[j]);
274 if (value == "not found") {
275 m->mothurOut("[ERROR]: " + lookup[i]->getGroup() + " is not in your design file, please correct.\n"); m->control_pressed = true;
276 }else { out << value << '\t'; }
278 string value = designMap->get(lookup[lookup.size()-1]->getGroup(), categories[j]);
279 if (value == "not found") {
280 m->mothurOut("[ERROR]: " + lookup[lookup.size()-1]->getGroup() + " is not in your design file, please correct.\n"); m->control_pressed = true;
281 }else { out << value; }
287 for (int i = 0; i < lookup.size()-1; i++) { out << lookup[i]->getGroup() << '\t'; }
288 out << lookup[lookup.size()-1]->getGroup() << endl;
290 for (int i = 0; i < lookup[0]->getNumBins(); i++) { //process each otu
291 if (m->control_pressed) { break; }
292 string nameOfOtu = m->currentSharedBinLabels[i];
293 if (constaxonomyfile != "") { //try to find the otuName in consTax to replace with consensus taxonomy
294 map<string, consTax2>::iterator it = consTax.find(nameOfOtu);
295 if (it != consTax.end()) {
296 nameOfOtu = it->second.taxonomy;
297 //add sanity check abundances here??
298 string fixedName = "";
299 //remove confidences and change ; to |
300 m->removeConfidences(nameOfOtu);
301 for (int j = 0; j < nameOfOtu.length()-1; j++) {
302 if (nameOfOtu[j] == ';') { fixedName += "_" + m->currentSharedBinLabels[i] + '|'; }
303 else { fixedName += nameOfOtu[j]; }
305 nameOfOtu = fixedName;
307 m->mothurOut("[ERROR]: can't find " + nameOfOtu + " in constaxonomy file. Do the distances match, did you forget to use the label parameter?\n"); m->control_pressed = true;
312 out << nameOfOtu << '\t';
314 //print out relabunds for each otu
315 for (int j = 0; j < lookup.size()-1; j++) { out << lookup[j]->getAbundance(i) << '\t'; }
317 out << lookup[lookup.size()-1]->getAbundance(i)<< endl;
323 catch(exception& e) {
324 m->errorOut(e, "MakeLefseCommand", "execute");
328 //**********************************************************************************************************************
329 vector<SharedRAbundFloatVector*> MakeLefseCommand::getSharedRelabund(){
331 InputData input(sharedfile, "sharedfile");
332 vector<SharedRAbundVector*> templookup = input.getSharedRAbundVectors();
333 string lastLabel = templookup[0]->getLabel();
334 vector<SharedRAbundFloatVector*> lookup;
336 if (label == "") { label = lastLabel; }
338 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
339 set<string> labels; labels.insert(label);
340 set<string> processedLabels;
341 set<string> userLabels = labels;
343 //as long as you are not at the end of the file or done wih the lines you want
344 while((templookup[0] != NULL) && (userLabels.size() != 0)) {
345 if (m->control_pressed) { for (int i = 0; i < templookup.size(); i++) { delete templookup[i]; } return lookup; }
347 if(labels.count(templookup[0]->getLabel()) == 1){
348 processedLabels.insert(templookup[0]->getLabel());
349 userLabels.erase(templookup[0]->getLabel());
353 if ((m->anyLabelsToProcess(templookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
354 string saveLabel = templookup[0]->getLabel();
356 for (int i = 0; i < templookup.size(); i++) { delete templookup[i]; }
357 templookup = input.getSharedRAbundVectors(lastLabel);
359 processedLabels.insert(templookup[0]->getLabel());
360 userLabels.erase(templookup[0]->getLabel());
362 //restore real lastlabel to save below
363 templookup[0]->setLabel(saveLabel);
367 lastLabel = templookup[0]->getLabel();
369 //get next line to process
370 //prevent memory leak
371 for (int i = 0; i < templookup.size(); i++) { delete templookup[i]; }
372 templookup = input.getSharedRAbundVectors();
376 if (m->control_pressed) { for (int i = 0; i < templookup.size(); i++) { delete templookup[i]; } return lookup; }
378 //output error messages about any remaining user labels
379 set<string>::iterator it;
380 bool needToRun = false;
381 for (it = userLabels.begin(); it != userLabels.end(); it++) {
382 m->mothurOut("Your file does not include the label " + *it);
383 if (processedLabels.count(lastLabel) != 1) {
384 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
387 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
391 //run last label if you need to
392 if (needToRun == true) {
393 for (int i = 0; i < templookup.size(); i++) { if (templookup[i] != NULL) { delete templookup[i]; } }
394 templookup = input.getSharedRAbundVectors(lastLabel);
398 for (int i = 0; i < templookup.size(); i++) {
399 SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
400 temp->setLabel(templookup[i]->getLabel());
401 temp->setGroup(templookup[i]->getGroup());
402 lookup.push_back(temp);
405 //convert to relabund
406 for (int i = 0; i < templookup.size(); i++) {
407 for (int j = 0; j < templookup[i]->getNumBins(); j++) {
409 if (m->control_pressed) { for (int k = 0; k < templookup.size(); k++) { delete templookup[k]; } return lookup; }
411 int abund = templookup[i]->getAbundance(j);
412 float relabund = 0.0;
414 if (scale == "totalgroup") {
415 relabund = abund / (float) templookup[i]->getNumSeqs();
416 }else if (scale == "totalotu") {
417 //calc the total in this otu
419 for (int l = 0; l < templookup.size(); l++) { totalOtu += templookup[l]->getAbundance(j); }
420 relabund = abund / (float) totalOtu;
421 }else if (scale == "averagegroup") {
422 relabund = abund / (float) (templookup[i]->getNumSeqs() / (float) templookup[i]->getNumBins());
423 }else if (scale == "averageotu") {
424 //calc the total in this otu
426 for (int l = 0; l < templookup.size(); l++) { totalOtu += templookup[l]->getAbundance(j); }
427 float averageOtu = totalOtu / (float) templookup.size();
429 relabund = abund / (float) averageOtu;
430 }else{ m->mothurOut(scale + " is not a valid scaling option."); m->mothurOutEndLine(); m->control_pressed = true; }
432 lookup[i]->push_back(relabund, lookup[i]->getGroup());
436 for (int k = 0; k < templookup.size(); k++) { delete templookup[k]; }
440 catch(exception& e) {
441 m->errorOut(e, "MakeLefseCommand", "getSharedRelabund");
445 //**********************************************************************************************************************
446 vector<SharedRAbundFloatVector*> MakeLefseCommand::getRelabund(){
448 InputData input(relabundfile, "relabund");
449 vector<SharedRAbundFloatVector*> lookupFloat = input.getSharedRAbundFloatVectors();
450 string lastLabel = lookupFloat[0]->getLabel();
452 if (label == "") { label = lastLabel; return lookupFloat; }
454 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
455 set<string> labels; labels.insert(label);
456 set<string> processedLabels;
457 set<string> userLabels = labels;
459 //as long as you are not at the end of the file or done wih the lines you want
460 while((lookupFloat[0] != NULL) && (userLabels.size() != 0)) {
462 if (m->control_pressed) { return lookupFloat; }
464 if(labels.count(lookupFloat[0]->getLabel()) == 1){
465 processedLabels.insert(lookupFloat[0]->getLabel());
466 userLabels.erase(lookupFloat[0]->getLabel());
470 if ((m->anyLabelsToProcess(lookupFloat[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
471 string saveLabel = lookupFloat[0]->getLabel();
473 for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; }
474 lookupFloat = input.getSharedRAbundFloatVectors(lastLabel);
476 processedLabels.insert(lookupFloat[0]->getLabel());
477 userLabels.erase(lookupFloat[0]->getLabel());
479 //restore real lastlabel to save below
480 lookupFloat[0]->setLabel(saveLabel);
484 lastLabel = lookupFloat[0]->getLabel();
486 //get next line to process
487 //prevent memory leak
488 for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; }
489 lookupFloat = input.getSharedRAbundFloatVectors();
493 if (m->control_pressed) { return lookupFloat; }
495 //output error messages about any remaining user labels
496 set<string>::iterator it;
497 bool needToRun = false;
498 for (it = userLabels.begin(); it != userLabels.end(); it++) {
499 m->mothurOut("Your file does not include the label " + *it);
500 if (processedLabels.count(lastLabel) != 1) {
501 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
504 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
508 //run last label if you need to
509 if (needToRun == true) {
510 for (int i = 0; i < lookupFloat.size(); i++) { if (lookupFloat[i] != NULL) { delete lookupFloat[i]; } }
511 lookupFloat = input.getSharedRAbundFloatVectors(lastLabel);
516 catch(exception& e) {
517 m->errorOut(e, "MakeLefseCommand", "getRelabund");
522 //**********************************************************************************************************************