]> git.donarmstrong.com Git - mothur.git/blob - makelefsecommand.cpp
adding labels to list file.
[mothur.git] / makelefsecommand.cpp
1 //
2 //  makelefse.cpp
3 //  Mothur
4 //
5 //  Created by SarahsWork on 6/3/13.
6 //  Copyright (c) 2013 Schloss Lab. All rights reserved.
7 //
8
9 #include "makelefsecommand.h"
10 #include "designmap.h"
11
12 //**********************************************************************************************************************
13 vector<string> MakeLefseCommand::setParameters(){
14         try {
15         CommandParameter pshared("shared", "InputTypes", "", "", "SharedRel", "SharedRel", "none","lefse",false,false,true); parameters.push_back(pshared);
16                 CommandParameter prelabund("relabund", "InputTypes", "", "", "SharedRel", "SharedRel", "none","lefse",false,false,true); parameters.push_back(prelabund);
17         CommandParameter pconstaxonomy("constaxonomy", "InputTypes", "", "", "none", "none", "none","",false,false,false); parameters.push_back(pconstaxonomy);
18         CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,false, true); parameters.push_back(pdesign);
19         CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
20         CommandParameter pscale("scale", "Multiple", "totalgroup-totalotu-averagegroup-averageotu", "totalgroup", "", "", "","",false,false); parameters.push_back(pscale);
21                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
22         CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
23                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
24                 
25                 vector<string> myArray;
26                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
27                 return myArray;
28         }
29         catch(exception& e) {
30                 m->errorOut(e, "MakeLefseCommand", "setParameters");
31                 exit(1);
32         }
33 }
34 //**********************************************************************************************************************
35 string MakeLefseCommand::getHelpString(){
36         try {
37                 string helpString = "";
38                 helpString += "The make.lefse command allows you to create a lefse formatted input file from mothur's output files.\n";
39                 helpString += "The make.lefse command parameters are: shared, relabund, constaxonomy, design, scale, groups and label.  The shared or relabund are required.\n";
40                 helpString += "The shared parameter is used to input your shared file, http://www.wiki.mothur.org/wiki/Shared_file.\n";
41         helpString += "The relabund parameter is used to input your relabund file, http://www.wiki.mothur.org/wiki/Relabund_file.\n";
42         helpString += "The design parameter is used to input your design file, http://www.wiki.mothur.org/wiki/Design_File.\n";
43         helpString += "The constaxonomy parameter is used to input your taxonomy file. http://www.wiki.mothur.org/wiki/Constaxonomy_file. The contaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile). Be SURE that the you are the constaxonomy file distance matches the shared file distance.  ie, for *.0.03.cons.taxonomy set label=0.03. Mothur is smart enough to handle shared files that have been subsampled. \n";
44         helpString += "The scale parameter allows you to select what scale you would like to use to convert your shared file abundances to relative abundances. Choices are totalgroup, totalotu, averagegroup, averageotu, default is totalgroup.\n";
45                 helpString += "The label parameter allows you to select what distance level you would like used, if none is given the first distance is used.\n";
46                 helpString += "The make.lefse command should be in the following format: make.lefse(shared=yourSharedFile)\n";
47                 helpString += "make.lefse(shared=final.an.shared)\n";
48                 return helpString;
49         }
50         catch(exception& e) {
51                 m->errorOut(e, "MakeLefseCommand", "getHelpString");
52                 exit(1);
53         }
54 }
55 //**********************************************************************************************************************
56 string MakeLefseCommand::getOutputPattern(string type) {
57     try {
58         string pattern = "";
59         
60         if (type == "lefse") {  pattern = "[filename],[distance],lefse"; }
61         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
62         
63         return pattern;
64     }
65     catch(exception& e) {
66         m->errorOut(e, "MakeLefseCommand", "getOutputPattern");
67         exit(1);
68     }
69 }
70 //**********************************************************************************************************************
71 MakeLefseCommand::MakeLefseCommand(){
72         try {
73                 abort = true; calledHelp = true;
74                 setParameters();
75         vector<string> tempOutNames;
76                 outputTypes["lefse"] = tempOutNames; 
77                         }
78         catch(exception& e) {
79                 m->errorOut(e, "MakeLefseCommand", "MakeLefseCommand");
80                 exit(1);
81         }
82 }
83 //**********************************************************************************************************************
84 MakeLefseCommand::MakeLefseCommand(string option)  {
85         try {
86                 abort = false; calledHelp = false;
87                 
88                 //allow user to run help
89                 if(option == "help") { help(); abort = true; calledHelp = true; }
90                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
91                 
92                 else {
93                         //valid paramters for this command
94                         vector<string> myArray = setParameters();
95                         
96                         OptionParser parser(option);
97                         map<string,string> parameters = parser.getParameters();
98                         
99                         ValidParameters validParameter;
100                         map<string,string>::iterator it;
101                         //check to make sure all parameters are valid for command
102                         for (it = parameters.begin(); it != parameters.end(); it++) {
103                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
104                         }
105                         
106                         vector<string> tempOutNames;
107             outputTypes["lefse"] = tempOutNames;
108             
109                         //if the user changes the input directory command factory will send this info to us in the output parameter
110                         string inputDir = validParameter.validFile(parameters, "inputdir", false);
111                         if (inputDir == "not found"){   inputDir = "";          }
112                         else {
113                 string path;
114                                 it = parameters.find("shared");
115                                 //user has given a template file
116                                 if(it != parameters.end()){
117                                         path = m->hasPath(it->second);
118                                         //if the user has not given a path then, add inputdir. else leave path alone.
119                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
120                                 }
121                                 
122                                 it = parameters.find("design");
123                                 //user has given a template file
124                                 if(it != parameters.end()){
125                                         path = m->hasPath(it->second);
126                                         //if the user has not given a path then, add inputdir. else leave path alone.
127                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
128                                 }
129                                 
130                                 it = parameters.find("constaxonomy");
131                                 //user has given a template file
132                                 if(it != parameters.end()){
133                                         path = m->hasPath(it->second);
134                                         //if the user has not given a path then, add inputdir. else leave path alone.
135                                         if (path == "") {       parameters["constaxonomy"] = inputDir + it->second;             }
136                                 }
137                 
138                 it = parameters.find("relabund");
139                                 //user has given a template file
140                                 if(it != parameters.end()){
141                                         path = m->hasPath(it->second);
142                                         //if the user has not given a path then, add inputdir. else leave path alone.
143                                         if (path == "") {       parameters["relabund"] = inputDir + it->second;         }
144                                 }
145             }
146                     
147                         //check for parameters
148                         designfile = validParameter.validFile(parameters, "design", true);
149                         if (designfile == "not open") { abort = true; }
150                         else if (designfile == "not found") { designfile = ""; }
151                         else { m->setDesignFile(designfile); }
152             
153             sharedfile = validParameter.validFile(parameters, "shared", true);
154                         if (sharedfile == "not open") { abort = true; }
155                         else if (sharedfile == "not found") { sharedfile = ""; }
156                         else {  m->setSharedFile(sharedfile); }
157                         
158                         relabundfile = validParameter.validFile(parameters, "relabund", true);
159                         if (relabundfile == "not open") { abort = true; }
160                         else if (relabundfile == "not found") { relabundfile = ""; }
161                         else {  m->setRelAbundFile(relabundfile); }
162             
163             constaxonomyfile = validParameter.validFile(parameters, "constaxonomy", true);
164                         if (constaxonomyfile == "not open") { constaxonomyfile = ""; abort = true; }
165                         else if (constaxonomyfile == "not found") {  constaxonomyfile = "";  }
166                         
167                         label = validParameter.validFile(parameters, "label", false);
168                         if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); label=""; }
169             
170             string groups = validParameter.validFile(parameters, "groups", false);
171                         if (groups == "not found") { groups = "";  }
172                         else {
173                                 m->splitAtDash(groups, Groups);
174                                 m->setGroups(Groups);
175                         }
176
177
178             if ((relabundfile == "") && (sharedfile == "")) {
179                                 //is there are current file available for either of these?
180                                 //give priority to shared, then relabund
181                                 sharedfile = m->getSharedFile();
182                                 if (sharedfile != "") {  m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
183                                 else {
184                                         relabundfile = m->getRelAbundFile();
185                                         if (relabundfile != "") {   m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter."); m->mothurOutEndLine(); }
186                                         else {
187                                                 m->mothurOut("No valid current files. You must provide a shared or relabund."); m->mothurOutEndLine();
188                                                 abort = true;
189                                         }
190                                 }
191                         }
192                         
193                         if ((relabundfile != "") && (sharedfile != "")) { m->mothurOut("[ERROR]: You may not use both a shared and relabund file."); m->mothurOutEndLine(); abort = true;  }
194                         
195             //if the user changes the output directory command factory will send this info to us in the output parameter
196                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){
197                                 outputDir = "";                         }
198             
199             scale = validParameter.validFile(parameters, "scale", false);                               if (scale == "not found") { scale = "totalgroup"; }
200                         
201                         if ((scale != "totalgroup") && (scale != "totalotu") && (scale != "averagegroup") && (scale != "averageotu")) {
202                                 m->mothurOut(scale + " is not a valid scaling option for the get.relabund command. Choices are totalgroup, totalotu, averagegroup, averageotu."); m->mothurOutEndLine(); abort = true;
203                         }
204         }
205                 
206         }
207         catch(exception& e) {
208                 m->errorOut(e, "MakeLefseCommand", "MakeLefseCommand");
209                 exit(1);
210         }
211 }
212 //**********************************************************************************************************************
213
214 int MakeLefseCommand::execute(){
215         try {
216                 
217                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
218       
219         map<string, consTax2> consTax;
220         if (constaxonomyfile != "") {  m->readConsTax(constaxonomyfile, consTax);  }
221         
222         if (m->control_pressed) { return 0; }
223         
224         if (sharedfile != "") {
225             inputFile = sharedfile;
226             vector<SharedRAbundFloatVector*> lookup = getSharedRelabund();
227             runRelabund(consTax, lookup);
228         }else {
229             inputFile = relabundfile;
230             vector<SharedRAbundFloatVector*> lookup = getRelabund();
231             runRelabund(consTax, lookup);
232         }
233         
234         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
235                 
236         //output files created by command
237                 m->mothurOutEndLine();
238                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
239                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
240                 m->mothurOutEndLine();
241         return 0;
242                 
243     }
244         catch(exception& e) {
245                 m->errorOut(e, "MakeLefseCommand", "execute");
246                 exit(1);
247         }
248 }
249 //**********************************************************************************************************************
250 int MakeLefseCommand::runRelabund(map<string, consTax2>& consTax, vector<SharedRAbundFloatVector*>& lookup){
251         try {
252         if (outputDir == "") { outputDir = m->hasPath(inputFile); }
253         map<string, string> variables;
254         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputFile));
255         variables["[distance]"] = lookup[0]->getLabel();
256                 string outputFile = getOutputFileName("lefse",variables);
257                 outputNames.push_back(outputFile); outputTypes["lefse"].push_back(outputFile);
258         
259         ofstream out;
260         m->openOutputFile(outputFile, out);
261
262         DesignMap* designMap = NULL;
263         if (designfile != "") {
264             designMap = new DesignMap(designfile);
265             vector<string> categories = designMap->getNamesOfCategories();
266             
267             if (categories.size() > 3) {  m->mothurOut("\n[NOTE]: LEfSe input files allow for a class, subclass and subject.  More than 3 categories can cause formatting errors.\n\n"); }
268             
269             for (int j = 0; j < categories.size(); j++) {
270                 out << categories[j] << "\t";
271                 for (int i = 0; i < lookup.size()-1; i++) {
272                     if (m->control_pressed) { out.close(); delete designMap; return 0; }
273                     string value = designMap->get(lookup[i]->getGroup(), categories[j]);
274                     if (value == "not found") {
275                         m->mothurOut("[ERROR]: " + lookup[i]->getGroup() + " is not in your design file, please correct.\n"); m->control_pressed = true;
276                     }else { out << value << '\t'; }
277                 }
278                 string value = designMap->get(lookup[lookup.size()-1]->getGroup(), categories[j]);
279                 if (value == "not found") {
280                     m->mothurOut("[ERROR]: " + lookup[lookup.size()-1]->getGroup() + " is not in your design file, please correct.\n"); m->control_pressed = true;
281                 }else { out << value; }
282                 out << endl;
283             }
284         }
285         
286         out << "group\t";
287         for (int i = 0; i < lookup.size()-1; i++) {  out << lookup[i]->getGroup() << '\t'; }
288         out << lookup[lookup.size()-1]->getGroup() << endl;
289         
290         for (int i = 0; i < lookup[0]->getNumBins(); i++) { //process each otu
291             if (m->control_pressed) { break; }
292             string nameOfOtu = m->currentSharedBinLabels[i];
293             if (constaxonomyfile != "") { //try to find the otuName in consTax to replace with consensus taxonomy
294                 map<string, consTax2>::iterator it = consTax.find(nameOfOtu);
295                 if (it != consTax.end()) {
296                     nameOfOtu = it->second.taxonomy;
297                     //add sanity check abundances here??
298                     string fixedName = "";
299                     //remove confidences and change ; to |
300                     m->removeConfidences(nameOfOtu);
301                     for (int j = 0; j < nameOfOtu.length()-1; j++) {
302                         if (nameOfOtu[j] == ';') { fixedName += "_" + m->currentSharedBinLabels[i] + '|'; }
303                         else { fixedName += nameOfOtu[j]; }
304                     }
305                     nameOfOtu = fixedName;
306                 }else {
307                     m->mothurOut("[ERROR]: can't find " + nameOfOtu + " in constaxonomy file. Do the distances match, did you forget to use the label parameter?\n"); m->control_pressed = true;
308                 }
309                 
310             }
311             //print name
312             out << nameOfOtu << '\t';
313             
314             //print out relabunds for each otu
315             for (int j = 0; j < lookup.size()-1; j++) { out << lookup[j]->getAbundance(i) << '\t'; }
316             
317             out << lookup[lookup.size()-1]->getAbundance(i)<< endl;
318         }
319         out.close();
320
321         return 0;
322     }
323         catch(exception& e) {
324                 m->errorOut(e, "MakeLefseCommand", "execute");
325                 exit(1);
326         }
327 }
328 //**********************************************************************************************************************
329 vector<SharedRAbundFloatVector*> MakeLefseCommand::getSharedRelabund(){
330         try {
331                 InputData input(sharedfile, "sharedfile");
332                 vector<SharedRAbundVector*> templookup = input.getSharedRAbundVectors();
333                 string lastLabel = templookup[0]->getLabel();
334         vector<SharedRAbundFloatVector*> lookup;
335                 
336                 if (label == "") { label = lastLabel;  }
337                 else {
338             //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
339             set<string> labels; labels.insert(label);
340             set<string> processedLabels;
341             set<string> userLabels = labels;
342             
343             //as long as you are not at the end of the file or done wih the lines you want
344             while((templookup[0] != NULL) && (userLabels.size() != 0)) {
345                 if (m->control_pressed) {  for (int i = 0; i < templookup.size(); i++) {  delete templookup[i];  } return lookup;  }
346                 
347                 if(labels.count(templookup[0]->getLabel()) == 1){
348                     processedLabels.insert(templookup[0]->getLabel());
349                     userLabels.erase(templookup[0]->getLabel());
350                     break;
351                 }
352                 
353                 if ((m->anyLabelsToProcess(templookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
354                     string saveLabel = templookup[0]->getLabel();
355                     
356                     for (int i = 0; i < templookup.size(); i++) {  delete templookup[i];  }
357                     templookup = input.getSharedRAbundVectors(lastLabel);
358                     
359                     processedLabels.insert(templookup[0]->getLabel());
360                     userLabels.erase(templookup[0]->getLabel());
361                     
362                     //restore real lastlabel to save below
363                     templookup[0]->setLabel(saveLabel);
364                     break;
365                 }
366                 
367                 lastLabel = templookup[0]->getLabel();
368                 
369                 //get next line to process
370                 //prevent memory leak
371                 for (int i = 0; i < templookup.size(); i++) {  delete templookup[i];  }
372                 templookup = input.getSharedRAbundVectors();
373             }
374             
375             
376             if (m->control_pressed) { for (int i = 0; i < templookup.size(); i++) {  delete templookup[i];  } return lookup;  }
377             
378             //output error messages about any remaining user labels
379             set<string>::iterator it;
380             bool needToRun = false;
381             for (it = userLabels.begin(); it != userLabels.end(); it++) {
382                 m->mothurOut("Your file does not include the label " + *it);
383                 if (processedLabels.count(lastLabel) != 1) {
384                     m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
385                     needToRun = true;
386                 }else {
387                     m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
388                 }
389             }
390             
391             //run last label if you need to
392             if (needToRun == true)  {
393                 for (int i = 0; i < templookup.size(); i++) {  if (templookup[i] != NULL) {     delete templookup[i];   } }
394                 templookup = input.getSharedRAbundVectors(lastLabel);
395             }
396                 }
397         
398         for (int i = 0; i < templookup.size(); i++) {
399             SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
400             temp->setLabel(templookup[i]->getLabel());
401             temp->setGroup(templookup[i]->getGroup());
402             lookup.push_back(temp);
403         }
404         
405         //convert to relabund
406         for (int i = 0; i < templookup.size(); i++) {
407                         for (int j = 0; j < templookup[i]->getNumBins(); j++) {
408                 
409                                 if (m->control_pressed) { for (int k = 0; k < templookup.size(); k++) {  delete templookup[k];  } return lookup; }
410                 
411                                 int abund = templookup[i]->getAbundance(j);
412                                 float relabund = 0.0;
413                                 
414                                 if (scale == "totalgroup") {
415                                         relabund = abund / (float) templookup[i]->getNumSeqs();
416                                 }else if (scale == "totalotu") {
417                                         //calc the total in this otu
418                                         int totalOtu = 0;
419                                         for (int l = 0; l < templookup.size(); l++) {  totalOtu += templookup[l]->getAbundance(j); }
420                                         relabund = abund / (float) totalOtu;
421                                 }else if (scale == "averagegroup") {
422                                         relabund = abund / (float) (templookup[i]->getNumSeqs() / (float) templookup[i]->getNumBins());
423                                 }else if (scale == "averageotu") {
424                                         //calc the total in this otu
425                                         int totalOtu = 0;
426                                         for (int l = 0; l < templookup.size(); l++) {  totalOtu += templookup[l]->getAbundance(j); }
427                                         float averageOtu = totalOtu / (float) templookup.size();
428                                         
429                                         relabund = abund / (float) averageOtu;
430                                 }else{ m->mothurOut(scale + " is not a valid scaling option."); m->mothurOutEndLine(); m->control_pressed = true;  }
431                                 
432                                 lookup[i]->push_back(relabund, lookup[i]->getGroup());
433                         }
434         }
435
436         for (int k = 0; k < templookup.size(); k++) {  delete templookup[k];  }
437         
438                 return lookup;
439         }
440         catch(exception& e) {
441                 m->errorOut(e, "MakeLefseCommand", "getSharedRelabund");
442                 exit(1);
443         }
444 }
445 //**********************************************************************************************************************
446 vector<SharedRAbundFloatVector*> MakeLefseCommand::getRelabund(){
447         try {
448                 InputData input(relabundfile, "relabund");
449                 vector<SharedRAbundFloatVector*> lookupFloat = input.getSharedRAbundFloatVectors();
450                 string lastLabel = lookupFloat[0]->getLabel();
451                 
452                 if (label == "") { label = lastLabel; return lookupFloat; }
453                 
454                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
455                 set<string> labels; labels.insert(label);
456                 set<string> processedLabels;
457                 set<string> userLabels = labels;
458                 
459                 //as long as you are not at the end of the file or done wih the lines you want
460                 while((lookupFloat[0] != NULL) && (userLabels.size() != 0)) {
461                         
462                         if (m->control_pressed) {  return lookupFloat;  }
463                         
464                         if(labels.count(lookupFloat[0]->getLabel()) == 1){
465                                 processedLabels.insert(lookupFloat[0]->getLabel());
466                                 userLabels.erase(lookupFloat[0]->getLabel());
467                                 break;
468                         }
469                         
470                         if ((m->anyLabelsToProcess(lookupFloat[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
471                                 string saveLabel = lookupFloat[0]->getLabel();
472                                 
473                                 for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  }
474                                 lookupFloat = input.getSharedRAbundFloatVectors(lastLabel);
475                                 
476                                 processedLabels.insert(lookupFloat[0]->getLabel());
477                                 userLabels.erase(lookupFloat[0]->getLabel());
478                                 
479                                 //restore real lastlabel to save below
480                                 lookupFloat[0]->setLabel(saveLabel);
481                                 break;
482                         }
483                         
484                         lastLabel = lookupFloat[0]->getLabel();
485                         
486                         //get next line to process
487                         //prevent memory leak
488                         for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  }
489                         lookupFloat = input.getSharedRAbundFloatVectors();
490                 }
491                 
492                 
493                 if (m->control_pressed) { return lookupFloat;  }
494                 
495                 //output error messages about any remaining user labels
496                 set<string>::iterator it;
497                 bool needToRun = false;
498                 for (it = userLabels.begin(); it != userLabels.end(); it++) {
499                         m->mothurOut("Your file does not include the label " + *it);
500                         if (processedLabels.count(lastLabel) != 1) {
501                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
502                                 needToRun = true;
503                         }else {
504                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
505                         }
506                 }
507                 
508                 //run last label if you need to
509                 if (needToRun == true)  {
510                         for (int i = 0; i < lookupFloat.size(); i++) {  if (lookupFloat[i] != NULL) {   delete lookupFloat[i];  } }
511                         lookupFloat = input.getSharedRAbundFloatVectors(lastLabel);
512                 }
513                 
514                 return lookupFloat;
515         }
516         catch(exception& e) {
517                 m->errorOut(e, "MakeLefseCommand", "getRelabund");
518         exit(1);
519         }
520 }
521
522 //**********************************************************************************************************************
523