]> git.donarmstrong.com Git - mothur.git/blob - makelefsecommand.cpp
added make.lefse command. fixed bug in make.contigs with trimming reverse barcodes...
[mothur.git] / makelefsecommand.cpp
1 //
2 //  makelefse.cpp
3 //  Mothur
4 //
5 //  Created by SarahsWork on 6/3/13.
6 //  Copyright (c) 2013 Schloss Lab. All rights reserved.
7 //
8
9 #include "makelefsecommand.h"
10
11 //**********************************************************************************************************************
12 vector<string> MakeLefseCommand::setParameters(){
13         try {
14         CommandParameter pshared("shared", "InputTypes", "", "", "SharedRel", "SharedRel", "none","lefse",false,false,true); parameters.push_back(pshared);
15                 CommandParameter prelabund("relabund", "InputTypes", "", "", "SharedRel", "SharedRel", "none","lefse",false,false,true); parameters.push_back(prelabund);
16         CommandParameter pconstaxonomy("constaxonomy", "InputTypes", "", "", "none", "none", "none","",false,false,false); parameters.push_back(pconstaxonomy);
17         CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,false, true); parameters.push_back(pdesign);
18         CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
19         CommandParameter pscale("scale", "Multiple", "totalgroup-totalotu-averagegroup-averageotu", "totalgroup", "", "", "","",false,false); parameters.push_back(pscale);
20                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
21         CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
22                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
23                 
24                 vector<string> myArray;
25                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
26                 return myArray;
27         }
28         catch(exception& e) {
29                 m->errorOut(e, "MakeLefseCommand", "setParameters");
30                 exit(1);
31         }
32 }
33 //**********************************************************************************************************************
34 string MakeLefseCommand::getHelpString(){
35         try {
36                 string helpString = "";
37                 helpString += "The make.lefse command allows you to create a lefse formatted input file from mothur's output files.\n";
38                 helpString += "The make.lefse command parameters are: shared, relabund, constaxonomy, design, scale, groups and label.  The shared or relabund are required.\n";
39                 helpString += "The shared parameter is used to input your shared file, http://www.wiki.mothur.org/wiki/Shared_file.\n";
40         helpString += "The relabund parameter is used to input your relabund file, http://www.wiki.mothur.org/wiki/Relabund_file.\n";
41         helpString += "The design parameter is used to input your design file, http://www.wiki.mothur.org/wiki/Design_File.\n";
42         helpString += "The constaxonomy parameter is used to input your taxonomy file. http://www.wiki.mothur.org/wiki/Constaxonomy_file. The contaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile). Be SURE that the you are the constaxonomy file distance matches the shared file distance.  ie, for *.0.03.cons.taxonomy set label=0.03. Mothur is smart enough to handle shared files that have been subsampled. \n";
43         helpString += "The scale parameter allows you to select what scale you would like to use to convert your shared file abundances to relative abundances. Choices are totalgroup, totalotu, averagegroup, averageotu, default is totalgroup.\n";
44                 helpString += "The label parameter allows you to select what distance level you would like used, if none is given the first distance is used.\n";
45                 helpString += "The make.lefse command should be in the following format: make.lefse(list=yourListFile, taxonomy=outputFromClassify.seqsCommand, name=yourNameFile)\n";
46                 helpString += "make.lefse(shared=final.an.list, taxonomy=final.an.taxonomy, name=final.names)\n";
47                 return helpString;
48         }
49         catch(exception& e) {
50                 m->errorOut(e, "MakeLefseCommand", "getHelpString");
51                 exit(1);
52         }
53 }
54 //**********************************************************************************************************************
55 string MakeLefseCommand::getOutputPattern(string type) {
56     try {
57         string pattern = "";
58         
59         if (type == "lefse") {  pattern = "[filename],[distance],lefse"; }
60         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
61         
62         return pattern;
63     }
64     catch(exception& e) {
65         m->errorOut(e, "MakeLefseCommand", "getOutputPattern");
66         exit(1);
67     }
68 }
69 //**********************************************************************************************************************
70 MakeLefseCommand::MakeLefseCommand(){
71         try {
72                 abort = true; calledHelp = true;
73                 setParameters();
74         vector<string> tempOutNames;
75                 outputTypes["lefse"] = tempOutNames; 
76                         }
77         catch(exception& e) {
78                 m->errorOut(e, "MakeLefseCommand", "MakeLefseCommand");
79                 exit(1);
80         }
81 }
82 //**********************************************************************************************************************
83 MakeLefseCommand::MakeLefseCommand(string option)  {
84         try {
85                 abort = false; calledHelp = false;
86                 
87                 //allow user to run help
88                 if(option == "help") { help(); abort = true; calledHelp = true; }
89                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
90                 
91                 else {
92                         //valid paramters for this command
93                         vector<string> myArray = setParameters();
94                         
95                         OptionParser parser(option);
96                         map<string,string> parameters = parser.getParameters();
97                         
98                         ValidParameters validParameter;
99                         map<string,string>::iterator it;
100                         //check to make sure all parameters are valid for command
101                         for (it = parameters.begin(); it != parameters.end(); it++) {
102                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
103                         }
104                         
105                         vector<string> tempOutNames;
106             outputTypes["lefse"] = tempOutNames;
107             
108                         //if the user changes the input directory command factory will send this info to us in the output parameter
109                         string inputDir = validParameter.validFile(parameters, "inputdir", false);
110                         if (inputDir == "not found"){   inputDir = "";          }
111                         else {
112                 string path;
113                                 it = parameters.find("shared");
114                                 //user has given a template file
115                                 if(it != parameters.end()){
116                                         path = m->hasPath(it->second);
117                                         //if the user has not given a path then, add inputdir. else leave path alone.
118                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
119                                 }
120                                 
121                                 it = parameters.find("design");
122                                 //user has given a template file
123                                 if(it != parameters.end()){
124                                         path = m->hasPath(it->second);
125                                         //if the user has not given a path then, add inputdir. else leave path alone.
126                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
127                                 }
128                                 
129                                 it = parameters.find("constaxonomy");
130                                 //user has given a template file
131                                 if(it != parameters.end()){
132                                         path = m->hasPath(it->second);
133                                         //if the user has not given a path then, add inputdir. else leave path alone.
134                                         if (path == "") {       parameters["constaxonomy"] = inputDir + it->second;             }
135                                 }
136                 
137                 it = parameters.find("relabund");
138                                 //user has given a template file
139                                 if(it != parameters.end()){
140                                         path = m->hasPath(it->second);
141                                         //if the user has not given a path then, add inputdir. else leave path alone.
142                                         if (path == "") {       parameters["relabund"] = inputDir + it->second;         }
143                                 }
144             }
145                     
146                         //check for parameters
147                         designfile = validParameter.validFile(parameters, "design", true);
148                         if (designfile == "not open") { abort = true; }
149                         else if (designfile == "not found") { designfile = ""; }
150                         else { m->setDesignFile(designfile); }
151             
152             sharedfile = validParameter.validFile(parameters, "shared", true);
153                         if (sharedfile == "not open") { abort = true; }
154                         else if (sharedfile == "not found") { sharedfile = ""; }
155                         else {  m->setSharedFile(sharedfile); }
156                         
157                         relabundfile = validParameter.validFile(parameters, "relabund", true);
158                         if (relabundfile == "not open") { abort = true; }
159                         else if (relabundfile == "not found") { relabundfile = ""; }
160                         else {  m->setRelAbundFile(relabundfile); }
161             
162             constaxonomyfile = validParameter.validFile(parameters, "constaxonomy", true);
163                         if (constaxonomyfile == "not open") { constaxonomyfile = ""; abort = true; }
164                         else if (constaxonomyfile == "not found") {  constaxonomyfile = "";  }
165                         
166                         label = validParameter.validFile(parameters, "label", false);
167                         if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); label=""; }
168             
169             string groups = validParameter.validFile(parameters, "groups", false);
170                         if (groups == "not found") { groups = "";  }
171                         else {
172                                 m->splitAtDash(groups, Groups);
173                                 m->setGroups(Groups);
174                         }
175
176
177             if ((relabundfile == "") && (sharedfile == "")) {
178                                 //is there are current file available for either of these?
179                                 //give priority to shared, then relabund
180                                 sharedfile = m->getSharedFile();
181                                 if (sharedfile != "") {  m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
182                                 else {
183                                         relabundfile = m->getRelAbundFile();
184                                         if (relabundfile != "") {   m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter."); m->mothurOutEndLine(); }
185                                         else {
186                                                 m->mothurOut("No valid current files. You must provide a shared or relabund."); m->mothurOutEndLine();
187                                                 abort = true;
188                                         }
189                                 }
190                         }
191                         
192                         if ((relabundfile != "") && (sharedfile != "")) { m->mothurOut("[ERROR]: You may not use both a shared and relabund file."); m->mothurOutEndLine(); abort = true;  }
193                         
194             //if the user changes the output directory command factory will send this info to us in the output parameter
195                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){
196                                 outputDir = "";                         }
197             
198             scale = validParameter.validFile(parameters, "scale", false);                               if (scale == "not found") { scale = "totalgroup"; }
199                         
200                         if ((scale != "totalgroup") && (scale != "totalotu") && (scale != "averagegroup") && (scale != "averageotu")) {
201                                 m->mothurOut(scale + " is not a valid scaling option for the get.relabund command. Choices are totalgroup, totalotu, averagegroup, averageotu."); m->mothurOutEndLine(); abort = true;
202                         }
203         }
204                 
205         }
206         catch(exception& e) {
207                 m->errorOut(e, "MakeLefseCommand", "MakeLefseCommand");
208                 exit(1);
209         }
210 }
211 //**********************************************************************************************************************
212
213 int MakeLefseCommand::execute(){
214         try {
215                 
216                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
217       
218         map<string, consTax2> consTax;
219         if (constaxonomyfile != "") {  m->readConsTax(constaxonomyfile, consTax);  }
220         
221         if (m->control_pressed) { return 0; }
222         
223         if (sharedfile != "") {
224             inputFile = sharedfile;
225             vector<SharedRAbundFloatVector*> lookup = getSharedRelabund();
226             runRelabund(consTax, lookup);
227         }else {
228             inputFile = relabundfile;
229             vector<SharedRAbundFloatVector*> lookup = getRelabund();
230             runRelabund(consTax, lookup);
231         }
232         
233         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
234                 
235         //output files created by command
236                 m->mothurOutEndLine();
237                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
238                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
239                 m->mothurOutEndLine();
240         return 0;
241                 
242     }
243         catch(exception& e) {
244                 m->errorOut(e, "MakeLefseCommand", "execute");
245                 exit(1);
246         }
247 }
248 //**********************************************************************************************************************
249 int MakeLefseCommand::runRelabund(map<string, consTax2>& consTax, vector<SharedRAbundFloatVector*>& lookup){
250         try {
251         if (outputDir == "") { outputDir = m->hasPath(inputFile); }
252         map<string, string> variables;
253         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputFile));
254         variables["[distance]"] = lookup[0]->getLabel();
255                 string outputFile = getOutputFileName("lefse",variables);
256                 outputNames.push_back(outputFile); outputTypes["lefse"].push_back(outputFile);
257         
258         ofstream out;
259         m->openOutputFile(outputFile, out);
260
261         GroupMap* designMap = NULL;
262         if (designfile != "") {
263             designMap = new GroupMap(designfile);
264             designMap->readDesignMap();
265             
266             out << "treatment\t";
267             for (int i = 0; i < lookup.size(); i++) {
268                 string treatment = designMap->getGroup(lookup[i]->getGroup());
269                 if (treatment == "not found") {
270                     m->mothurOut("[ERROR]: " + lookup[i]->getGroup() + " is not in your design file, please correct.\n"); 
271                 }else { out << treatment << '\t'; }
272             }
273             out << endl;
274         }
275         
276         out << "group\t";
277         for (int i = 0; i < lookup.size(); i++) {  out << lookup[i]->getGroup() << '\t'; }
278         out << endl;
279         
280         for (int i = 0; i < lookup[0]->getNumBins(); i++) { //process each otu
281             if (m->control_pressed) { break; }
282             string nameOfOtu = m->currentBinLabels[i];
283             if (constaxonomyfile != "") { //try to find the otuName in consTax to replace with consensus taxonomy
284                 map<string, consTax2>::iterator it = consTax.find(nameOfOtu);
285                 if (it != consTax.end()) {
286                     nameOfOtu = it->second.taxonomy;
287                     //add sanity check abundances here??
288                     string fixedName = "";
289                     //remove confidences and change ; to |
290                     m->removeConfidences(nameOfOtu);
291                     for (int j = 0; j < nameOfOtu.length()-1; j++) {
292                         if (nameOfOtu[j] == ';') { fixedName += "_" + m->currentBinLabels[i] + '|'; }
293                         else { fixedName += nameOfOtu[j]; }
294                     }
295                     nameOfOtu = fixedName;
296                 }else {
297                     m->mothurOut("[ERROR]: can't find " + nameOfOtu + " in constaxonomy file. Do the distances match, did you forget to use the label parameter?\n"); m->control_pressed = true;
298                 }
299                 
300             }
301             //print name
302             out << nameOfOtu << '\t';
303             
304             //print out relabunds for each otu
305             for (int j = 0; j < lookup.size(); j++) { out << lookup[j]->getAbundance(i) << '\t'; }
306             out << endl;
307         }
308         out.close();
309
310         return 0;
311     }
312         catch(exception& e) {
313                 m->errorOut(e, "MakeLefseCommand", "execute");
314                 exit(1);
315         }
316 }
317 //**********************************************************************************************************************
318 vector<SharedRAbundFloatVector*> MakeLefseCommand::getSharedRelabund(){
319         try {
320                 InputData input(sharedfile, "sharedfile");
321                 vector<SharedRAbundVector*> templookup = input.getSharedRAbundVectors();
322                 string lastLabel = templookup[0]->getLabel();
323         vector<SharedRAbundFloatVector*> lookup;
324                 
325                 if (label == "") { label = lastLabel;  }
326                 else {
327             //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
328             set<string> labels; labels.insert(label);
329             set<string> processedLabels;
330             set<string> userLabels = labels;
331             
332             //as long as you are not at the end of the file or done wih the lines you want
333             while((templookup[0] != NULL) && (userLabels.size() != 0)) {
334                 if (m->control_pressed) {  for (int i = 0; i < templookup.size(); i++) {  delete templookup[i];  } return lookup;  }
335                 
336                 if(labels.count(templookup[0]->getLabel()) == 1){
337                     processedLabels.insert(templookup[0]->getLabel());
338                     userLabels.erase(templookup[0]->getLabel());
339                     break;
340                 }
341                 
342                 if ((m->anyLabelsToProcess(templookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
343                     string saveLabel = templookup[0]->getLabel();
344                     
345                     for (int i = 0; i < templookup.size(); i++) {  delete templookup[i];  }
346                     templookup = input.getSharedRAbundVectors(lastLabel);
347                     
348                     processedLabels.insert(templookup[0]->getLabel());
349                     userLabels.erase(templookup[0]->getLabel());
350                     
351                     //restore real lastlabel to save below
352                     templookup[0]->setLabel(saveLabel);
353                     break;
354                 }
355                 
356                 lastLabel = templookup[0]->getLabel();
357                 
358                 //get next line to process
359                 //prevent memory leak
360                 for (int i = 0; i < templookup.size(); i++) {  delete templookup[i];  }
361                 templookup = input.getSharedRAbundVectors();
362             }
363             
364             
365             if (m->control_pressed) { for (int i = 0; i < templookup.size(); i++) {  delete templookup[i];  } return lookup;  }
366             
367             //output error messages about any remaining user labels
368             set<string>::iterator it;
369             bool needToRun = false;
370             for (it = userLabels.begin(); it != userLabels.end(); it++) {
371                 m->mothurOut("Your file does not include the label " + *it);
372                 if (processedLabels.count(lastLabel) != 1) {
373                     m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
374                     needToRun = true;
375                 }else {
376                     m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
377                 }
378             }
379             
380             //run last label if you need to
381             if (needToRun == true)  {
382                 for (int i = 0; i < templookup.size(); i++) {  if (templookup[i] != NULL) {     delete templookup[i];   } }
383                 templookup = input.getSharedRAbundVectors(lastLabel);
384             }
385                 }
386         
387         for (int i = 0; i < templookup.size(); i++) {
388             SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
389             temp->setLabel(templookup[i]->getLabel());
390             temp->setGroup(templookup[i]->getGroup());
391             lookup.push_back(temp);
392         }
393         
394         //convert to relabund
395         for (int i = 0; i < templookup.size(); i++) {
396                         for (int j = 0; j < templookup[i]->getNumBins(); j++) {
397                 
398                                 if (m->control_pressed) { for (int k = 0; k < templookup.size(); k++) {  delete templookup[k];  } return lookup; }
399                 
400                                 int abund = templookup[i]->getAbundance(j);
401                                 float relabund = 0.0;
402                                 
403                                 if (scale == "totalgroup") {
404                                         relabund = abund / (float) templookup[i]->getNumSeqs();
405                                 }else if (scale == "totalotu") {
406                                         //calc the total in this otu
407                                         int totalOtu = 0;
408                                         for (int l = 0; l < templookup.size(); l++) {  totalOtu += templookup[l]->getAbundance(j); }
409                                         relabund = abund / (float) totalOtu;
410                                 }else if (scale == "averagegroup") {
411                                         relabund = abund / (float) (templookup[i]->getNumSeqs() / (float) templookup[i]->getNumBins());
412                                 }else if (scale == "averageotu") {
413                                         //calc the total in this otu
414                                         int totalOtu = 0;
415                                         for (int l = 0; l < templookup.size(); l++) {  totalOtu += templookup[l]->getAbundance(j); }
416                                         float averageOtu = totalOtu / (float) templookup.size();
417                                         
418                                         relabund = abund / (float) averageOtu;
419                                 }else{ m->mothurOut(scale + " is not a valid scaling option."); m->mothurOutEndLine(); m->control_pressed = true;  }
420                                 
421                                 lookup[i]->push_back(relabund, lookup[i]->getGroup());
422                         }
423         }
424
425         for (int k = 0; k < templookup.size(); k++) {  delete templookup[k];  }
426         
427                 return lookup;
428         }
429         catch(exception& e) {
430                 m->errorOut(e, "MakeLefseCommand", "getSharedRelabund");
431                 exit(1);
432         }
433 }
434 //**********************************************************************************************************************
435 vector<SharedRAbundFloatVector*> MakeLefseCommand::getRelabund(){
436         try {
437                 InputData input(relabundfile, "relabund");
438                 vector<SharedRAbundFloatVector*> lookupFloat = input.getSharedRAbundFloatVectors();
439                 string lastLabel = lookupFloat[0]->getLabel();
440                 
441                 if (label == "") { label = lastLabel; return lookupFloat; }
442                 
443                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
444                 set<string> labels; labels.insert(label);
445                 set<string> processedLabels;
446                 set<string> userLabels = labels;
447                 
448                 //as long as you are not at the end of the file or done wih the lines you want
449                 while((lookupFloat[0] != NULL) && (userLabels.size() != 0)) {
450                         
451                         if (m->control_pressed) {  return lookupFloat;  }
452                         
453                         if(labels.count(lookupFloat[0]->getLabel()) == 1){
454                                 processedLabels.insert(lookupFloat[0]->getLabel());
455                                 userLabels.erase(lookupFloat[0]->getLabel());
456                                 break;
457                         }
458                         
459                         if ((m->anyLabelsToProcess(lookupFloat[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
460                                 string saveLabel = lookupFloat[0]->getLabel();
461                                 
462                                 for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  }
463                                 lookupFloat = input.getSharedRAbundFloatVectors(lastLabel);
464                                 
465                                 processedLabels.insert(lookupFloat[0]->getLabel());
466                                 userLabels.erase(lookupFloat[0]->getLabel());
467                                 
468                                 //restore real lastlabel to save below
469                                 lookupFloat[0]->setLabel(saveLabel);
470                                 break;
471                         }
472                         
473                         lastLabel = lookupFloat[0]->getLabel();
474                         
475                         //get next line to process
476                         //prevent memory leak
477                         for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  }
478                         lookupFloat = input.getSharedRAbundFloatVectors();
479                 }
480                 
481                 
482                 if (m->control_pressed) { return lookupFloat;  }
483                 
484                 //output error messages about any remaining user labels
485                 set<string>::iterator it;
486                 bool needToRun = false;
487                 for (it = userLabels.begin(); it != userLabels.end(); it++) {
488                         m->mothurOut("Your file does not include the label " + *it);
489                         if (processedLabels.count(lastLabel) != 1) {
490                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
491                                 needToRun = true;
492                         }else {
493                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
494                         }
495                 }
496                 
497                 //run last label if you need to
498                 if (needToRun == true)  {
499                         for (int i = 0; i < lookupFloat.size(); i++) {  if (lookupFloat[i] != NULL) {   delete lookupFloat[i];  } }
500                         lookupFloat = input.getSharedRAbundFloatVectors(lastLabel);
501                 }
502                 
503                 return lookupFloat;
504         }
505         catch(exception& e) {
506                 m->errorOut(e, "MakeLefseCommand", "getRelabund");
507         exit(1);
508         }
509 }
510
511 //**********************************************************************************************************************
512