]> git.donarmstrong.com Git - mothur.git/blob - makelefsecommand.cpp
added kruskal.wallis command. added worked on make.lefse. working of lefse command...
[mothur.git] / makelefsecommand.cpp
1 //
2 //  makelefse.cpp
3 //  Mothur
4 //
5 //  Created by SarahsWork on 6/3/13.
6 //  Copyright (c) 2013 Schloss Lab. All rights reserved.
7 //
8
9 #include "makelefsecommand.h"
10 #include "designmap.h"
11
12 //**********************************************************************************************************************
13 vector<string> MakeLefseCommand::setParameters(){
14         try {
15         CommandParameter pshared("shared", "InputTypes", "", "", "SharedRel", "SharedRel", "none","lefse",false,false,true); parameters.push_back(pshared);
16                 CommandParameter prelabund("relabund", "InputTypes", "", "", "SharedRel", "SharedRel", "none","lefse",false,false,true); parameters.push_back(prelabund);
17         CommandParameter pconstaxonomy("constaxonomy", "InputTypes", "", "", "none", "none", "none","",false,false,false); parameters.push_back(pconstaxonomy);
18         CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,false, true); parameters.push_back(pdesign);
19         CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
20         CommandParameter pscale("scale", "Multiple", "totalgroup-totalotu-averagegroup-averageotu", "totalgroup", "", "", "","",false,false); parameters.push_back(pscale);
21                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
22         CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
23                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
24                 
25                 vector<string> myArray;
26                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
27                 return myArray;
28         }
29         catch(exception& e) {
30                 m->errorOut(e, "MakeLefseCommand", "setParameters");
31                 exit(1);
32         }
33 }
34 //**********************************************************************************************************************
35 string MakeLefseCommand::getHelpString(){
36         try {
37                 string helpString = "";
38                 helpString += "The make.lefse command allows you to create a lefse formatted input file from mothur's output files.\n";
39                 helpString += "The make.lefse command parameters are: shared, relabund, constaxonomy, design, scale, groups and label.  The shared or relabund are required.\n";
40                 helpString += "The shared parameter is used to input your shared file, http://www.wiki.mothur.org/wiki/Shared_file.\n";
41         helpString += "The relabund parameter is used to input your relabund file, http://www.wiki.mothur.org/wiki/Relabund_file.\n";
42         helpString += "The design parameter is used to input your design file, http://www.wiki.mothur.org/wiki/Design_File.\n";
43         helpString += "The constaxonomy parameter is used to input your taxonomy file. http://www.wiki.mothur.org/wiki/Constaxonomy_file. The contaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile). Be SURE that the you are the constaxonomy file distance matches the shared file distance.  ie, for *.0.03.cons.taxonomy set label=0.03. Mothur is smart enough to handle shared files that have been subsampled. \n";
44         helpString += "The scale parameter allows you to select what scale you would like to use to convert your shared file abundances to relative abundances. Choices are totalgroup, totalotu, averagegroup, averageotu, default is totalgroup.\n";
45                 helpString += "The label parameter allows you to select what distance level you would like used, if none is given the first distance is used.\n";
46                 helpString += "The make.lefse command should be in the following format: make.lefse(list=yourListFile, taxonomy=outputFromClassify.seqsCommand, name=yourNameFile)\n";
47                 helpString += "make.lefse(shared=final.an.list, taxonomy=final.an.taxonomy, name=final.names)\n";
48                 return helpString;
49         }
50         catch(exception& e) {
51                 m->errorOut(e, "MakeLefseCommand", "getHelpString");
52                 exit(1);
53         }
54 }
55 //**********************************************************************************************************************
56 string MakeLefseCommand::getOutputPattern(string type) {
57     try {
58         string pattern = "";
59         
60         if (type == "lefse") {  pattern = "[filename],[distance],lefse"; }
61         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
62         
63         return pattern;
64     }
65     catch(exception& e) {
66         m->errorOut(e, "MakeLefseCommand", "getOutputPattern");
67         exit(1);
68     }
69 }
70 //**********************************************************************************************************************
71 MakeLefseCommand::MakeLefseCommand(){
72         try {
73                 abort = true; calledHelp = true;
74                 setParameters();
75         vector<string> tempOutNames;
76                 outputTypes["lefse"] = tempOutNames; 
77                         }
78         catch(exception& e) {
79                 m->errorOut(e, "MakeLefseCommand", "MakeLefseCommand");
80                 exit(1);
81         }
82 }
83 //**********************************************************************************************************************
84 MakeLefseCommand::MakeLefseCommand(string option)  {
85         try {
86                 abort = false; calledHelp = false;
87                 
88                 //allow user to run help
89                 if(option == "help") { help(); abort = true; calledHelp = true; }
90                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
91                 
92                 else {
93                         //valid paramters for this command
94                         vector<string> myArray = setParameters();
95                         
96                         OptionParser parser(option);
97                         map<string,string> parameters = parser.getParameters();
98                         
99                         ValidParameters validParameter;
100                         map<string,string>::iterator it;
101                         //check to make sure all parameters are valid for command
102                         for (it = parameters.begin(); it != parameters.end(); it++) {
103                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
104                         }
105                         
106                         vector<string> tempOutNames;
107             outputTypes["lefse"] = tempOutNames;
108             
109                         //if the user changes the input directory command factory will send this info to us in the output parameter
110                         string inputDir = validParameter.validFile(parameters, "inputdir", false);
111                         if (inputDir == "not found"){   inputDir = "";          }
112                         else {
113                 string path;
114                                 it = parameters.find("shared");
115                                 //user has given a template file
116                                 if(it != parameters.end()){
117                                         path = m->hasPath(it->second);
118                                         //if the user has not given a path then, add inputdir. else leave path alone.
119                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
120                                 }
121                                 
122                                 it = parameters.find("design");
123                                 //user has given a template file
124                                 if(it != parameters.end()){
125                                         path = m->hasPath(it->second);
126                                         //if the user has not given a path then, add inputdir. else leave path alone.
127                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
128                                 }
129                                 
130                                 it = parameters.find("constaxonomy");
131                                 //user has given a template file
132                                 if(it != parameters.end()){
133                                         path = m->hasPath(it->second);
134                                         //if the user has not given a path then, add inputdir. else leave path alone.
135                                         if (path == "") {       parameters["constaxonomy"] = inputDir + it->second;             }
136                                 }
137                 
138                 it = parameters.find("relabund");
139                                 //user has given a template file
140                                 if(it != parameters.end()){
141                                         path = m->hasPath(it->second);
142                                         //if the user has not given a path then, add inputdir. else leave path alone.
143                                         if (path == "") {       parameters["relabund"] = inputDir + it->second;         }
144                                 }
145             }
146                     
147                         //check for parameters
148                         designfile = validParameter.validFile(parameters, "design", true);
149                         if (designfile == "not open") { abort = true; }
150                         else if (designfile == "not found") { designfile = ""; }
151                         else { m->setDesignFile(designfile); }
152             
153             sharedfile = validParameter.validFile(parameters, "shared", true);
154                         if (sharedfile == "not open") { abort = true; }
155                         else if (sharedfile == "not found") { sharedfile = ""; }
156                         else {  m->setSharedFile(sharedfile); }
157                         
158                         relabundfile = validParameter.validFile(parameters, "relabund", true);
159                         if (relabundfile == "not open") { abort = true; }
160                         else if (relabundfile == "not found") { relabundfile = ""; }
161                         else {  m->setRelAbundFile(relabundfile); }
162             
163             constaxonomyfile = validParameter.validFile(parameters, "constaxonomy", true);
164                         if (constaxonomyfile == "not open") { constaxonomyfile = ""; abort = true; }
165                         else if (constaxonomyfile == "not found") {  constaxonomyfile = "";  }
166                         
167                         label = validParameter.validFile(parameters, "label", false);
168                         if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); label=""; }
169             
170             string groups = validParameter.validFile(parameters, "groups", false);
171                         if (groups == "not found") { groups = "";  }
172                         else {
173                                 m->splitAtDash(groups, Groups);
174                                 m->setGroups(Groups);
175                         }
176
177
178             if ((relabundfile == "") && (sharedfile == "")) {
179                                 //is there are current file available for either of these?
180                                 //give priority to shared, then relabund
181                                 sharedfile = m->getSharedFile();
182                                 if (sharedfile != "") {  m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
183                                 else {
184                                         relabundfile = m->getRelAbundFile();
185                                         if (relabundfile != "") {   m->mothurOut("Using " + relabundfile + " as input file for the relabund parameter."); m->mothurOutEndLine(); }
186                                         else {
187                                                 m->mothurOut("No valid current files. You must provide a shared or relabund."); m->mothurOutEndLine();
188                                                 abort = true;
189                                         }
190                                 }
191                         }
192                         
193                         if ((relabundfile != "") && (sharedfile != "")) { m->mothurOut("[ERROR]: You may not use both a shared and relabund file."); m->mothurOutEndLine(); abort = true;  }
194                         
195             //if the user changes the output directory command factory will send this info to us in the output parameter
196                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){
197                                 outputDir = "";                         }
198             
199             scale = validParameter.validFile(parameters, "scale", false);                               if (scale == "not found") { scale = "totalgroup"; }
200                         
201                         if ((scale != "totalgroup") && (scale != "totalotu") && (scale != "averagegroup") && (scale != "averageotu")) {
202                                 m->mothurOut(scale + " is not a valid scaling option for the get.relabund command. Choices are totalgroup, totalotu, averagegroup, averageotu."); m->mothurOutEndLine(); abort = true;
203                         }
204         }
205                 
206         }
207         catch(exception& e) {
208                 m->errorOut(e, "MakeLefseCommand", "MakeLefseCommand");
209                 exit(1);
210         }
211 }
212 //**********************************************************************************************************************
213
214 int MakeLefseCommand::execute(){
215         try {
216                 
217                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
218       
219         map<string, consTax2> consTax;
220         if (constaxonomyfile != "") {  m->readConsTax(constaxonomyfile, consTax);  }
221         
222         if (m->control_pressed) { return 0; }
223         
224         if (sharedfile != "") {
225             inputFile = sharedfile;
226             vector<SharedRAbundFloatVector*> lookup = getSharedRelabund();
227             runRelabund(consTax, lookup);
228         }else {
229             inputFile = relabundfile;
230             vector<SharedRAbundFloatVector*> lookup = getRelabund();
231             runRelabund(consTax, lookup);
232         }
233         
234         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
235                 
236         //output files created by command
237                 m->mothurOutEndLine();
238                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
239                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
240                 m->mothurOutEndLine();
241         return 0;
242                 
243     }
244         catch(exception& e) {
245                 m->errorOut(e, "MakeLefseCommand", "execute");
246                 exit(1);
247         }
248 }
249 //**********************************************************************************************************************
250 int MakeLefseCommand::runRelabund(map<string, consTax2>& consTax, vector<SharedRAbundFloatVector*>& lookup){
251         try {
252         if (outputDir == "") { outputDir = m->hasPath(inputFile); }
253         map<string, string> variables;
254         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputFile));
255         variables["[distance]"] = lookup[0]->getLabel();
256                 string outputFile = getOutputFileName("lefse",variables);
257                 outputNames.push_back(outputFile); outputTypes["lefse"].push_back(outputFile);
258         
259         ofstream out;
260         m->openOutputFile(outputFile, out);
261
262         DesignMap* designMap = NULL;
263         if (designfile != "") {
264             designMap = new DesignMap(designfile);
265             vector<string> categories = designMap->getNamesOfCategories();
266             
267             for (int j = 0; j < categories.size(); j++) {
268                 out << categories[j] << "\t";
269                 for (int i = 0; i < lookup.size(); i++) {
270                     if (m->control_pressed) { out.close(); delete designMap; return 0; }
271                     string value = designMap->get(lookup[i]->getGroup(), categories[j]);
272                     if (value == "not found") {
273                         m->mothurOut("[ERROR]: " + lookup[i]->getGroup() + " is not in your design file, please correct.\n"); m->control_pressed = true;
274                     }else { out << value << '\t'; }
275                 }
276                 out << endl;
277             }
278         }
279         
280         out << "group\t";
281         for (int i = 0; i < lookup.size(); i++) {  out << lookup[i]->getGroup() << '\t'; }
282         out << endl;
283         
284         for (int i = 0; i < lookup[0]->getNumBins(); i++) { //process each otu
285             if (m->control_pressed) { break; }
286             string nameOfOtu = m->currentBinLabels[i];
287             if (constaxonomyfile != "") { //try to find the otuName in consTax to replace with consensus taxonomy
288                 map<string, consTax2>::iterator it = consTax.find(nameOfOtu);
289                 if (it != consTax.end()) {
290                     nameOfOtu = it->second.taxonomy;
291                     //add sanity check abundances here??
292                     string fixedName = "";
293                     //remove confidences and change ; to |
294                     m->removeConfidences(nameOfOtu);
295                     for (int j = 0; j < nameOfOtu.length()-1; j++) {
296                         if (nameOfOtu[j] == ';') { fixedName += "_" + m->currentBinLabels[i] + '|'; }
297                         else { fixedName += nameOfOtu[j]; }
298                     }
299                     nameOfOtu = fixedName;
300                 }else {
301                     m->mothurOut("[ERROR]: can't find " + nameOfOtu + " in constaxonomy file. Do the distances match, did you forget to use the label parameter?\n"); m->control_pressed = true;
302                 }
303                 
304             }
305             //print name
306             out << nameOfOtu << '\t';
307             
308             //print out relabunds for each otu
309             for (int j = 0; j < lookup.size(); j++) { out << lookup[j]->getAbundance(i) << '\t'; }
310             out << endl;
311         }
312         out.close();
313
314         return 0;
315     }
316         catch(exception& e) {
317                 m->errorOut(e, "MakeLefseCommand", "execute");
318                 exit(1);
319         }
320 }
321 //**********************************************************************************************************************
322 vector<SharedRAbundFloatVector*> MakeLefseCommand::getSharedRelabund(){
323         try {
324                 InputData input(sharedfile, "sharedfile");
325                 vector<SharedRAbundVector*> templookup = input.getSharedRAbundVectors();
326                 string lastLabel = templookup[0]->getLabel();
327         vector<SharedRAbundFloatVector*> lookup;
328                 
329                 if (label == "") { label = lastLabel;  }
330                 else {
331             //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
332             set<string> labels; labels.insert(label);
333             set<string> processedLabels;
334             set<string> userLabels = labels;
335             
336             //as long as you are not at the end of the file or done wih the lines you want
337             while((templookup[0] != NULL) && (userLabels.size() != 0)) {
338                 if (m->control_pressed) {  for (int i = 0; i < templookup.size(); i++) {  delete templookup[i];  } return lookup;  }
339                 
340                 if(labels.count(templookup[0]->getLabel()) == 1){
341                     processedLabels.insert(templookup[0]->getLabel());
342                     userLabels.erase(templookup[0]->getLabel());
343                     break;
344                 }
345                 
346                 if ((m->anyLabelsToProcess(templookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
347                     string saveLabel = templookup[0]->getLabel();
348                     
349                     for (int i = 0; i < templookup.size(); i++) {  delete templookup[i];  }
350                     templookup = input.getSharedRAbundVectors(lastLabel);
351                     
352                     processedLabels.insert(templookup[0]->getLabel());
353                     userLabels.erase(templookup[0]->getLabel());
354                     
355                     //restore real lastlabel to save below
356                     templookup[0]->setLabel(saveLabel);
357                     break;
358                 }
359                 
360                 lastLabel = templookup[0]->getLabel();
361                 
362                 //get next line to process
363                 //prevent memory leak
364                 for (int i = 0; i < templookup.size(); i++) {  delete templookup[i];  }
365                 templookup = input.getSharedRAbundVectors();
366             }
367             
368             
369             if (m->control_pressed) { for (int i = 0; i < templookup.size(); i++) {  delete templookup[i];  } return lookup;  }
370             
371             //output error messages about any remaining user labels
372             set<string>::iterator it;
373             bool needToRun = false;
374             for (it = userLabels.begin(); it != userLabels.end(); it++) {
375                 m->mothurOut("Your file does not include the label " + *it);
376                 if (processedLabels.count(lastLabel) != 1) {
377                     m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
378                     needToRun = true;
379                 }else {
380                     m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
381                 }
382             }
383             
384             //run last label if you need to
385             if (needToRun == true)  {
386                 for (int i = 0; i < templookup.size(); i++) {  if (templookup[i] != NULL) {     delete templookup[i];   } }
387                 templookup = input.getSharedRAbundVectors(lastLabel);
388             }
389                 }
390         
391         for (int i = 0; i < templookup.size(); i++) {
392             SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
393             temp->setLabel(templookup[i]->getLabel());
394             temp->setGroup(templookup[i]->getGroup());
395             lookup.push_back(temp);
396         }
397         
398         //convert to relabund
399         for (int i = 0; i < templookup.size(); i++) {
400                         for (int j = 0; j < templookup[i]->getNumBins(); j++) {
401                 
402                                 if (m->control_pressed) { for (int k = 0; k < templookup.size(); k++) {  delete templookup[k];  } return lookup; }
403                 
404                                 int abund = templookup[i]->getAbundance(j);
405                                 float relabund = 0.0;
406                                 
407                                 if (scale == "totalgroup") {
408                                         relabund = abund / (float) templookup[i]->getNumSeqs();
409                                 }else if (scale == "totalotu") {
410                                         //calc the total in this otu
411                                         int totalOtu = 0;
412                                         for (int l = 0; l < templookup.size(); l++) {  totalOtu += templookup[l]->getAbundance(j); }
413                                         relabund = abund / (float) totalOtu;
414                                 }else if (scale == "averagegroup") {
415                                         relabund = abund / (float) (templookup[i]->getNumSeqs() / (float) templookup[i]->getNumBins());
416                                 }else if (scale == "averageotu") {
417                                         //calc the total in this otu
418                                         int totalOtu = 0;
419                                         for (int l = 0; l < templookup.size(); l++) {  totalOtu += templookup[l]->getAbundance(j); }
420                                         float averageOtu = totalOtu / (float) templookup.size();
421                                         
422                                         relabund = abund / (float) averageOtu;
423                                 }else{ m->mothurOut(scale + " is not a valid scaling option."); m->mothurOutEndLine(); m->control_pressed = true;  }
424                                 
425                                 lookup[i]->push_back(relabund, lookup[i]->getGroup());
426                         }
427         }
428
429         for (int k = 0; k < templookup.size(); k++) {  delete templookup[k];  }
430         
431                 return lookup;
432         }
433         catch(exception& e) {
434                 m->errorOut(e, "MakeLefseCommand", "getSharedRelabund");
435                 exit(1);
436         }
437 }
438 //**********************************************************************************************************************
439 vector<SharedRAbundFloatVector*> MakeLefseCommand::getRelabund(){
440         try {
441                 InputData input(relabundfile, "relabund");
442                 vector<SharedRAbundFloatVector*> lookupFloat = input.getSharedRAbundFloatVectors();
443                 string lastLabel = lookupFloat[0]->getLabel();
444                 
445                 if (label == "") { label = lastLabel; return lookupFloat; }
446                 
447                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
448                 set<string> labels; labels.insert(label);
449                 set<string> processedLabels;
450                 set<string> userLabels = labels;
451                 
452                 //as long as you are not at the end of the file or done wih the lines you want
453                 while((lookupFloat[0] != NULL) && (userLabels.size() != 0)) {
454                         
455                         if (m->control_pressed) {  return lookupFloat;  }
456                         
457                         if(labels.count(lookupFloat[0]->getLabel()) == 1){
458                                 processedLabels.insert(lookupFloat[0]->getLabel());
459                                 userLabels.erase(lookupFloat[0]->getLabel());
460                                 break;
461                         }
462                         
463                         if ((m->anyLabelsToProcess(lookupFloat[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
464                                 string saveLabel = lookupFloat[0]->getLabel();
465                                 
466                                 for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  }
467                                 lookupFloat = input.getSharedRAbundFloatVectors(lastLabel);
468                                 
469                                 processedLabels.insert(lookupFloat[0]->getLabel());
470                                 userLabels.erase(lookupFloat[0]->getLabel());
471                                 
472                                 //restore real lastlabel to save below
473                                 lookupFloat[0]->setLabel(saveLabel);
474                                 break;
475                         }
476                         
477                         lastLabel = lookupFloat[0]->getLabel();
478                         
479                         //get next line to process
480                         //prevent memory leak
481                         for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  }
482                         lookupFloat = input.getSharedRAbundFloatVectors();
483                 }
484                 
485                 
486                 if (m->control_pressed) { return lookupFloat;  }
487                 
488                 //output error messages about any remaining user labels
489                 set<string>::iterator it;
490                 bool needToRun = false;
491                 for (it = userLabels.begin(); it != userLabels.end(); it++) {
492                         m->mothurOut("Your file does not include the label " + *it);
493                         if (processedLabels.count(lastLabel) != 1) {
494                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
495                                 needToRun = true;
496                         }else {
497                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
498                         }
499                 }
500                 
501                 //run last label if you need to
502                 if (needToRun == true)  {
503                         for (int i = 0; i < lookupFloat.size(); i++) {  if (lookupFloat[i] != NULL) {   delete lookupFloat[i];  } }
504                         lookupFloat = input.getSharedRAbundFloatVectors(lastLabel);
505                 }
506                 
507                 return lookupFloat;
508         }
509         catch(exception& e) {
510                 m->errorOut(e, "MakeLefseCommand", "getRelabund");
511         exit(1);
512         }
513 }
514
515 //**********************************************************************************************************************
516