5 // Created by SarahsWork on 6/3/13.
6 // Copyright (c) 2013 Schloss Lab. All rights reserved.
9 #include "makelefsecommand.h"
11 //**********************************************************************************************************************
12 vector<string> MakeLefseCommand::setParameters(){
14 CommandParameter pshared("shared", "InputTypes", "", "", "SharedRel", "SharedRel", "none","lefse",false,false,true); parameters.push_back(pshared);
15 CommandParameter prelabund("relabund", "InputTypes", "", "", "SharedRel", "SharedRel", "none","lefse",false,false,true); parameters.push_back(prelabund);
16 CommandParameter pconstaxonomy("constaxonomy", "InputTypes", "", "", "none", "none", "none","",false,false,false); parameters.push_back(pconstaxonomy);
17 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,false, true); parameters.push_back(pdesign);
18 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
19 CommandParameter pscale("scale", "Multiple", "totalgroup-totalotu-averagegroup-averageotu", "totalgroup", "", "", "","",false,false); parameters.push_back(pscale);
20 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
21 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
22 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
24 vector<string> myArray;
25 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
29 m->errorOut(e, "MakeLefseCommand", "setParameters");
33 //**********************************************************************************************************************
34 string MakeLefseCommand::getHelpString(){
36 string helpString = "";
37 helpString += "The make.lefse command allows you to create a lefse formatted input file from mothur's output files.\n";
38 helpString += "The make.lefse command parameters are: shared, relabund, constaxonomy, design, scale, groups and label. The shared or relabund are required.\n";
39 helpString += "The shared parameter is used to input your shared file, http://www.wiki.mothur.org/wiki/Shared_file.\n";
40 helpString += "The relabund parameter is used to input your relabund file, http://www.wiki.mothur.org/wiki/Relabund_file.\n";
41 helpString += "The design parameter is used to input your design file, http://www.wiki.mothur.org/wiki/Design_File.\n";
42 helpString += "The constaxonomy parameter is used to input your taxonomy file. http://www.wiki.mothur.org/wiki/Constaxonomy_file. The contaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile). Be SURE that the you are the constaxonomy file distance matches the shared file distance. ie, for *.0.03.cons.taxonomy set label=0.03. Mothur is smart enough to handle shared files that have been subsampled. \n";
43 helpString += "The scale parameter allows you to select what scale you would like to use to convert your shared file abundances to relative abundances. Choices are totalgroup, totalotu, averagegroup, averageotu, default is totalgroup.\n";
44 helpString += "The label parameter allows you to select what distance level you would like used, if none is given the first distance is used.\n";
45 helpString += "The make.lefse command should be in the following format: make.lefse(list=yourListFile, taxonomy=outputFromClassify.seqsCommand, name=yourNameFile)\n";
46 helpString += "make.lefse(shared=final.an.list, taxonomy=final.an.taxonomy, name=final.names)\n";
50 m->errorOut(e, "MakeLefseCommand", "getHelpString");
54 //**********************************************************************************************************************
55 string MakeLefseCommand::getOutputPattern(string type) {
59 if (type == "lefse") { pattern = "[filename],[distance],lefse"; }
60 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
65 m->errorOut(e, "MakeLefseCommand", "getOutputPattern");
69 //**********************************************************************************************************************
70 MakeLefseCommand::MakeLefseCommand(){
72 abort = true; calledHelp = true;
74 vector<string> tempOutNames;
75 outputTypes["lefse"] = tempOutNames;
78 m->errorOut(e, "MakeLefseCommand", "MakeLefseCommand");
82 //**********************************************************************************************************************
83 MakeLefseCommand::MakeLefseCommand(string option) {
85 abort = false; calledHelp = false;
87 //allow user to run help
88 if(option == "help") { help(); abort = true; calledHelp = true; }
89 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
92 //valid paramters for this command
93 vector<string> myArray = setParameters();
95 OptionParser parser(option);
96 map<string,string> parameters = parser.getParameters();
98 ValidParameters validParameter;
99 map<string,string>::iterator it;
100 //check to make sure all parameters are valid for command
101 for (it = parameters.begin(); it != parameters.end(); it++) {
102 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
105 vector<string> tempOutNames;
106 outputTypes["lefse"] = tempOutNames;
108 //if the user changes the input directory command factory will send this info to us in the output parameter
109 string inputDir = validParameter.validFile(parameters, "inputdir", false);
110 if (inputDir == "not found"){ inputDir = ""; }
113 it = parameters.find("shared");
114 //user has given a template file
115 if(it != parameters.end()){
116 path = m->hasPath(it->second);
117 //if the user has not given a path then, add inputdir. else leave path alone.
118 if (path == "") { parameters["shared"] = inputDir + it->second; }
121 it = parameters.find("design");
122 //user has given a template file
123 if(it != parameters.end()){
124 path = m->hasPath(it->second);
125 //if the user has not given a path then, add inputdir. else leave path alone.
126 if (path == "") { parameters["design"] = inputDir + it->second; }
129 it = parameters.find("constaxonomy");
130 //user has given a template file
131 if(it != parameters.end()){
132 path = m->hasPath(it->second);
133 //if the user has not given a path then, add inputdir. else leave path alone.
134 if (path == "") { parameters["constaxonomy"] = inputDir + it->second; }
137 it = parameters.find("relabund");
138 //user has given a template file
139 if(it != parameters.end()){
140 path = m->hasPath(it->second);
141 //if the user has not given a path then, add inputdir. else leave path alone.
142 if (path == "") { parameters["relabund"] = inputDir + it->second; }
146 //check for parameters
147 designfile = validParameter.validFile(parameters, "design", true);
148 if (designfile == "not open") { abort = true; }
149 else if (designfile == "not found") { designfile = ""; }
150 else { m->setDesignFile(designfile); }
152 sharedfile = validParameter.validFile(parameters, "shared", true);
153 if (sharedfile == "not open") { abort = true; }
154 else if (sharedfile == "not found") { sharedfile = ""; }
155 else { m->setSharedFile(sharedfile); }
157 relabundfile = validParameter.validFile(parameters, "relabund", true);
158 if (relabundfile == "not open") { abort = true; }
159 else if (relabundfile == "not found") { relabundfile = ""; }
160 else { m->setRelAbundFile(relabundfile); }
162 constaxonomyfile = validParameter.validFile(parameters, "constaxonomy", true);
163 if (constaxonomyfile == "not open") { constaxonomyfile = ""; abort = true; }
164 else if (constaxonomyfile == "not found") { constaxonomyfile = ""; }
166 label = validParameter.validFile(parameters, "label", false);
167 if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); label=""; }
169 string groups = validParameter.validFile(parameters, "groups", false);
170 if (groups == "not found") { groups = ""; }
172 m->splitAtDash(groups, Groups);
173 m->setGroups(Groups);
177 if ((relabundfile == "") && (sharedfile == "")) {
178 //is there are current file available for either of these?
179 //give priority to shared, then relabund
180 sharedfile = m->getSharedFile();
181 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
183 relabundfile = m->getRelAbundFile();
184 if (relabundfile != "") { m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter."); m->mothurOutEndLine(); }
186 m->mothurOut("No valid current files. You must provide a shared or relabund."); m->mothurOutEndLine();
192 if ((relabundfile != "") && (sharedfile != "")) { m->mothurOut("[ERROR]: You may not use both a shared and relabund file."); m->mothurOutEndLine(); abort = true; }
194 //if the user changes the output directory command factory will send this info to us in the output parameter
195 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
198 scale = validParameter.validFile(parameters, "scale", false); if (scale == "not found") { scale = "totalgroup"; }
200 if ((scale != "totalgroup") && (scale != "totalotu") && (scale != "averagegroup") && (scale != "averageotu")) {
201 m->mothurOut(scale + " is not a valid scaling option for the get.relabund command. Choices are totalgroup, totalotu, averagegroup, averageotu."); m->mothurOutEndLine(); abort = true;
206 catch(exception& e) {
207 m->errorOut(e, "MakeLefseCommand", "MakeLefseCommand");
211 //**********************************************************************************************************************
213 int MakeLefseCommand::execute(){
216 if (abort == true) { if (calledHelp) { return 0; } return 2; }
218 map<string, consTax2> consTax;
219 if (constaxonomyfile != "") { m->readConsTax(constaxonomyfile, consTax); }
221 if (m->control_pressed) { return 0; }
223 if (sharedfile != "") {
224 inputFile = sharedfile;
225 vector<SharedRAbundFloatVector*> lookup = getSharedRelabund();
226 runRelabund(consTax, lookup);
228 inputFile = relabundfile;
229 vector<SharedRAbundFloatVector*> lookup = getRelabund();
230 runRelabund(consTax, lookup);
233 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
235 //output files created by command
236 m->mothurOutEndLine();
237 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
238 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
239 m->mothurOutEndLine();
243 catch(exception& e) {
244 m->errorOut(e, "MakeLefseCommand", "execute");
248 //**********************************************************************************************************************
249 int MakeLefseCommand::runRelabund(map<string, consTax2>& consTax, vector<SharedRAbundFloatVector*>& lookup){
251 if (outputDir == "") { outputDir = m->hasPath(inputFile); }
252 map<string, string> variables;
253 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputFile));
254 variables["[distance]"] = lookup[0]->getLabel();
255 string outputFile = getOutputFileName("lefse",variables);
256 outputNames.push_back(outputFile); outputTypes["lefse"].push_back(outputFile);
259 m->openOutputFile(outputFile, out);
261 GroupMap* designMap = NULL;
262 if (designfile != "") {
263 designMap = new GroupMap(designfile);
264 designMap->readDesignMap();
266 out << "treatment\t";
267 for (int i = 0; i < lookup.size(); i++) {
268 string treatment = designMap->getGroup(lookup[i]->getGroup());
269 if (treatment == "not found") {
270 m->mothurOut("[ERROR]: " + lookup[i]->getGroup() + " is not in your design file, please correct.\n");
271 }else { out << treatment << '\t'; }
277 for (int i = 0; i < lookup.size(); i++) { out << lookup[i]->getGroup() << '\t'; }
280 for (int i = 0; i < lookup[0]->getNumBins(); i++) { //process each otu
281 if (m->control_pressed) { break; }
282 string nameOfOtu = m->currentBinLabels[i];
283 if (constaxonomyfile != "") { //try to find the otuName in consTax to replace with consensus taxonomy
284 map<string, consTax2>::iterator it = consTax.find(nameOfOtu);
285 if (it != consTax.end()) {
286 nameOfOtu = it->second.taxonomy;
287 //add sanity check abundances here??
288 string fixedName = "";
289 //remove confidences and change ; to |
290 m->removeConfidences(nameOfOtu);
291 for (int j = 0; j < nameOfOtu.length()-1; j++) {
292 if (nameOfOtu[j] == ';') { fixedName += "_" + m->currentBinLabels[i] + '|'; }
293 else { fixedName += nameOfOtu[j]; }
295 nameOfOtu = fixedName;
297 m->mothurOut("[ERROR]: can't find " + nameOfOtu + " in constaxonomy file. Do the distances match, did you forget to use the label parameter?\n"); m->control_pressed = true;
302 out << nameOfOtu << '\t';
304 //print out relabunds for each otu
305 for (int j = 0; j < lookup.size(); j++) { out << lookup[j]->getAbundance(i) << '\t'; }
312 catch(exception& e) {
313 m->errorOut(e, "MakeLefseCommand", "execute");
317 //**********************************************************************************************************************
318 vector<SharedRAbundFloatVector*> MakeLefseCommand::getSharedRelabund(){
320 InputData input(sharedfile, "sharedfile");
321 vector<SharedRAbundVector*> templookup = input.getSharedRAbundVectors();
322 string lastLabel = templookup[0]->getLabel();
323 vector<SharedRAbundFloatVector*> lookup;
325 if (label == "") { label = lastLabel; }
327 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
328 set<string> labels; labels.insert(label);
329 set<string> processedLabels;
330 set<string> userLabels = labels;
332 //as long as you are not at the end of the file or done wih the lines you want
333 while((templookup[0] != NULL) && (userLabels.size() != 0)) {
334 if (m->control_pressed) { for (int i = 0; i < templookup.size(); i++) { delete templookup[i]; } return lookup; }
336 if(labels.count(templookup[0]->getLabel()) == 1){
337 processedLabels.insert(templookup[0]->getLabel());
338 userLabels.erase(templookup[0]->getLabel());
342 if ((m->anyLabelsToProcess(templookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
343 string saveLabel = templookup[0]->getLabel();
345 for (int i = 0; i < templookup.size(); i++) { delete templookup[i]; }
346 templookup = input.getSharedRAbundVectors(lastLabel);
348 processedLabels.insert(templookup[0]->getLabel());
349 userLabels.erase(templookup[0]->getLabel());
351 //restore real lastlabel to save below
352 templookup[0]->setLabel(saveLabel);
356 lastLabel = templookup[0]->getLabel();
358 //get next line to process
359 //prevent memory leak
360 for (int i = 0; i < templookup.size(); i++) { delete templookup[i]; }
361 templookup = input.getSharedRAbundVectors();
365 if (m->control_pressed) { for (int i = 0; i < templookup.size(); i++) { delete templookup[i]; } return lookup; }
367 //output error messages about any remaining user labels
368 set<string>::iterator it;
369 bool needToRun = false;
370 for (it = userLabels.begin(); it != userLabels.end(); it++) {
371 m->mothurOut("Your file does not include the label " + *it);
372 if (processedLabels.count(lastLabel) != 1) {
373 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
376 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
380 //run last label if you need to
381 if (needToRun == true) {
382 for (int i = 0; i < templookup.size(); i++) { if (templookup[i] != NULL) { delete templookup[i]; } }
383 templookup = input.getSharedRAbundVectors(lastLabel);
387 for (int i = 0; i < templookup.size(); i++) {
388 SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
389 temp->setLabel(templookup[i]->getLabel());
390 temp->setGroup(templookup[i]->getGroup());
391 lookup.push_back(temp);
394 //convert to relabund
395 for (int i = 0; i < templookup.size(); i++) {
396 for (int j = 0; j < templookup[i]->getNumBins(); j++) {
398 if (m->control_pressed) { for (int k = 0; k < templookup.size(); k++) { delete templookup[k]; } return lookup; }
400 int abund = templookup[i]->getAbundance(j);
401 float relabund = 0.0;
403 if (scale == "totalgroup") {
404 relabund = abund / (float) templookup[i]->getNumSeqs();
405 }else if (scale == "totalotu") {
406 //calc the total in this otu
408 for (int l = 0; l < templookup.size(); l++) { totalOtu += templookup[l]->getAbundance(j); }
409 relabund = abund / (float) totalOtu;
410 }else if (scale == "averagegroup") {
411 relabund = abund / (float) (templookup[i]->getNumSeqs() / (float) templookup[i]->getNumBins());
412 }else if (scale == "averageotu") {
413 //calc the total in this otu
415 for (int l = 0; l < templookup.size(); l++) { totalOtu += templookup[l]->getAbundance(j); }
416 float averageOtu = totalOtu / (float) templookup.size();
418 relabund = abund / (float) averageOtu;
419 }else{ m->mothurOut(scale + " is not a valid scaling option."); m->mothurOutEndLine(); m->control_pressed = true; }
421 lookup[i]->push_back(relabund, lookup[i]->getGroup());
425 for (int k = 0; k < templookup.size(); k++) { delete templookup[k]; }
429 catch(exception& e) {
430 m->errorOut(e, "MakeLefseCommand", "getSharedRelabund");
434 //**********************************************************************************************************************
435 vector<SharedRAbundFloatVector*> MakeLefseCommand::getRelabund(){
437 InputData input(relabundfile, "relabund");
438 vector<SharedRAbundFloatVector*> lookupFloat = input.getSharedRAbundFloatVectors();
439 string lastLabel = lookupFloat[0]->getLabel();
441 if (label == "") { label = lastLabel; return lookupFloat; }
443 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
444 set<string> labels; labels.insert(label);
445 set<string> processedLabels;
446 set<string> userLabels = labels;
448 //as long as you are not at the end of the file or done wih the lines you want
449 while((lookupFloat[0] != NULL) && (userLabels.size() != 0)) {
451 if (m->control_pressed) { return lookupFloat; }
453 if(labels.count(lookupFloat[0]->getLabel()) == 1){
454 processedLabels.insert(lookupFloat[0]->getLabel());
455 userLabels.erase(lookupFloat[0]->getLabel());
459 if ((m->anyLabelsToProcess(lookupFloat[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
460 string saveLabel = lookupFloat[0]->getLabel();
462 for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; }
463 lookupFloat = input.getSharedRAbundFloatVectors(lastLabel);
465 processedLabels.insert(lookupFloat[0]->getLabel());
466 userLabels.erase(lookupFloat[0]->getLabel());
468 //restore real lastlabel to save below
469 lookupFloat[0]->setLabel(saveLabel);
473 lastLabel = lookupFloat[0]->getLabel();
475 //get next line to process
476 //prevent memory leak
477 for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; }
478 lookupFloat = input.getSharedRAbundFloatVectors();
482 if (m->control_pressed) { return lookupFloat; }
484 //output error messages about any remaining user labels
485 set<string>::iterator it;
486 bool needToRun = false;
487 for (it = userLabels.begin(); it != userLabels.end(); it++) {
488 m->mothurOut("Your file does not include the label " + *it);
489 if (processedLabels.count(lastLabel) != 1) {
490 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
493 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
497 //run last label if you need to
498 if (needToRun == true) {
499 for (int i = 0; i < lookupFloat.size(); i++) { if (lookupFloat[i] != NULL) { delete lookupFloat[i]; } }
500 lookupFloat = input.getSharedRAbundFloatVectors(lastLabel);
505 catch(exception& e) {
506 m->errorOut(e, "MakeLefseCommand", "getRelabund");
511 //**********************************************************************************************************************