]> git.donarmstrong.com Git - mothur.git/blob - phylotypecommand.cpp
sffinfo bug with flow grams right index when clipQualRight=0
[mothur.git] / phylotypecommand.cpp
1 /*
2  *  phylotypecommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 11/20/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "phylotypecommand.h"
11 #include "phylotree.h"
12 #include "listvector.hpp"
13 #include "rabundvector.hpp"
14 #include "sabundvector.hpp"
15
16 //**********************************************************************************************************************
17 vector<string> PhylotypeCommand::setParameters(){       
18         try {
19                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "none","list-rabund-sabund",false,true,true); parameters.push_back(ptaxonomy);
20                 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "ColumnName","rabund-sabund",false,false,true); parameters.push_back(pname);
21                 CommandParameter pcount("count", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pcount);
22                 CommandParameter pcutoff("cutoff", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(pcutoff);
23                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
24                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
25                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
26                 
27                 vector<string> myArray;
28                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
29                 return myArray;
30         }
31         catch(exception& e) {
32                 m->errorOut(e, "PhylotypeCommand", "setParameters");
33                 exit(1);
34         }
35 }
36 //**********************************************************************************************************************
37 string PhylotypeCommand::getHelpString(){       
38         try {
39                 string helpString = "";
40                 helpString += "The phylotype command reads a taxonomy file and outputs a .list, .rabund and .sabund file. \n";
41                 helpString += "The phylotype command parameter options are taxonomy, name, count, cutoff and label. The taxonomy parameter is required.\n";
42                 helpString += "The cutoff parameter allows you to specify the level you want to stop at.  The default is the highest level in your taxonomy file. \n";
43                 helpString += "For example: taxonomy = Bacteria;Bacteroidetes-Chlorobi;Bacteroidetes; - cutoff=2, would truncate the taxonomy to Bacteria;Bacteroidetes-Chlorobi; \n";
44                 helpString += "For the cutoff parameter levels count up from the root of the phylotree. This enables you to look at the grouping down to a specific resolution, say the genus level.\n";
45                 helpString += "The label parameter allows you to specify which level you would like, and are separated by dashes.  The default all levels in your taxonomy file. \n";
46                 helpString += "For the label parameter, levels count down from the root to keep the output similiar to mothur's other commands which report information from finer resolution to coarser resolutions.\n";
47                 helpString += "The phylotype command should be in the following format: \n";
48                 helpString += "phylotype(taxonomy=yourTaxonomyFile, cutoff=yourCutoff, label=yourLabels) \n";
49                 helpString += "Eaxample: phylotype(taxonomy=amazon.taxonomy, cutoff=5, label=1-3-5).\n";
50                 return helpString;
51         }
52         catch(exception& e) {
53                 m->errorOut(e, "PhylotypeCommand", "getHelpString");
54                 exit(1);
55         }
56 }
57 //**********************************************************************************************************************
58 string PhylotypeCommand::getOutputPattern(string type) {
59     try {
60         string pattern = "";
61         
62         if (type == "list") {  pattern = "[filename],[clustertag],list-[filename],[clustertag],[tag2],list"; }
63         else if (type == "rabund") {  pattern = "[filename],[clustertag],rabund"; }
64         else if (type == "sabund") {  pattern = "[filename],[clustertag],sabund"; }
65         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
66         
67         return pattern;
68     }
69     catch(exception& e) {
70         m->errorOut(e, "PhylotypeCommand", "getOutputPattern");
71         exit(1);
72     }
73 }
74 //**********************************************************************************************************************
75 PhylotypeCommand::PhylotypeCommand(){   
76         try {
77                 abort = true; calledHelp = true; 
78                 setParameters();
79                 vector<string> tempOutNames;
80                 outputTypes["list"] = tempOutNames;
81                 outputTypes["sabund"] = tempOutNames;
82                 outputTypes["rabund"] = tempOutNames;
83         }
84         catch(exception& e) {
85                 m->errorOut(e, "PhylotypeCommand", "PhylotypeCommand");
86                 exit(1);
87         }
88 }
89 /**********************************************************************************************************************/
90 PhylotypeCommand::PhylotypeCommand(string option)  {
91         try {
92                 abort = false; calledHelp = false;   
93                 
94                 //allow user to run help
95                 if(option == "help") { help(); abort = true; calledHelp = true; }
96                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
97                 
98                 else {
99                         vector<string> myArray = setParameters();
100                         
101                         OptionParser parser(option);
102                         map<string, string> parameters = parser.getParameters(); 
103                         
104                         ValidParameters validParameter;
105                         map<string, string>::iterator it;
106                         
107                         //check to make sure all parameters are valid for command
108                         for (it = parameters.begin(); it != parameters.end(); it++) { 
109                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
110                         }
111                         
112                         //initialize outputTypes
113                         vector<string> tempOutNames;
114                         outputTypes["list"] = tempOutNames;
115                         outputTypes["sabund"] = tempOutNames;
116                         outputTypes["rabund"] = tempOutNames;
117                         
118                         //if the user changes the input directory command factory will send this info to us in the output parameter 
119                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
120                         if (inputDir == "not found"){   inputDir = "";          }
121                         else {
122                                 string path;
123                                 it = parameters.find("taxonomy");
124                                 //user has given a template file
125                                 if(it != parameters.end()){ 
126                                         path = m->hasPath(it->second);
127                                         //if the user has not given a path then, add inputdir. else leave path alone.
128                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
129                                 }
130                                 
131                                 it = parameters.find("name");
132                                 //user has given a template file
133                                 if(it != parameters.end()){ 
134                                         path = m->hasPath(it->second);
135                                         //if the user has not given a path then, add inputdir. else leave path alone.
136                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
137                                 }
138                 
139                 it = parameters.find("count");
140                                 //user has given a template file
141                                 if(it != parameters.end()){
142                                         path = m->hasPath(it->second);
143                                         //if the user has not given a path then, add inputdir. else leave path alone.
144                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
145                                 }
146                         }
147
148                         taxonomyFileName = validParameter.validFile(parameters, "taxonomy", true);
149                         if (taxonomyFileName == "not found") { 
150                                 taxonomyFileName = m->getTaxonomyFile(); 
151                                 if (taxonomyFileName != "") {  m->mothurOut("Using " + taxonomyFileName + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
152                                 else { 
153                                         m->mothurOut("No valid current files. taxonomy is a required parameter."); m->mothurOutEndLine(); 
154                                         abort = true; 
155                                 }
156                         }else if (taxonomyFileName == "not open") { taxonomyFileName = ""; abort = true; }      
157                         else { m->setTaxonomyFile(taxonomyFileName); }
158                         
159                         namefile = validParameter.validFile(parameters, "name", true);
160                         if (namefile == "not open") { namefile = ""; abort = true; }
161                         else if (namefile == "not found") { namefile = ""; }
162                         else { readNamesFile(); m->setNameFile(namefile); }
163             
164             countfile = validParameter.validFile(parameters, "count", true);
165                         if (countfile == "not open") { abort = true; countfile = ""; }
166                         else if (countfile == "not found") { countfile = ""; }
167                         else { m->setCountTableFile(countfile); }
168                         
169                         //if the user changes the output directory command factory will send this info to us in the output parameter 
170                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
171                                 outputDir = ""; 
172                                 outputDir += m->hasPath(taxonomyFileName); //if user entered a file with a path then preserve it        
173                         }
174                         
175             if ((countfile != "") && (namefile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
176             
177                         string temp = validParameter.validFile(parameters, "cutoff", false);
178                         if (temp == "not found") { temp = "-1"; }
179                         m->mothurConvert(temp, cutoff); 
180                         
181                         label = validParameter.validFile(parameters, "label", false);                   
182                         if (label == "not found") { label = ""; allLines = 1; }
183                         else { 
184                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
185                                 else { allLines = 1;  }
186                         }
187                         
188             if (countfile == "") {
189                 if (namefile == "") {
190                     vector<string> files; files.push_back(taxonomyFileName);
191                     parser.getNameFile(files);
192                 }
193                         }
194                 }
195         }
196         catch(exception& e) {
197                 m->errorOut(e, "PhylotypeCommand", "PhylotypeCommand");
198                 exit(1);
199         }
200 }
201 /**********************************************************************************************************************/
202
203 int PhylotypeCommand::execute(){
204         try {
205         
206                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
207                 
208                 //reads in taxonomy file and makes all the taxonomies the same length 
209                 //by appending the last taxon to a given taxonomy as many times as needed to 
210                 //make it as long as the longest taxonomy in the file 
211                 TaxEqualizer* taxEqual = new TaxEqualizer(taxonomyFileName, cutoff, outputDir);
212                 
213                 if (m->control_pressed) { delete taxEqual; return 0; }
214                 
215                 string equalizedTaxFile = taxEqual->getEqualizedTaxFile();
216                 
217                 delete taxEqual;
218                 
219                 //build taxonomy tree from equalized file
220                 PhyloTree* tree = new PhyloTree(equalizedTaxFile);
221                 vector<int> leaves = tree->getGenusNodes();
222                 
223                 //store leaf nodes in current map
224                 for (int i = 0; i < leaves.size(); i++)         {       currentNodes[leaves[i]] = leaves[i];    }
225                 
226                 bool done = false;
227                 if (tree->get(leaves[0]).parent == -1) {  m->mothurOut("Empty Tree"); m->mothurOutEndLine();    done = true;    }
228                 
229                 if (m->control_pressed) { delete tree; return 0; }
230                 
231                 ofstream outList, outRabund, outSabund;
232         map<string, string> variables;
233         string fileroot = outputDir + m->getRootName(m->getSimpleName(taxonomyFileName));
234         variables["[filename]"] = fileroot;
235         variables["[clustertag]"] = "tx";
236         string sabundFileName = getOutputFileName("sabund", variables);
237         string rabundFileName = getOutputFileName("rabund", variables);
238         if (countfile != "") { variables["[tag2]"] = "unique_list"; }
239         string listFileName = getOutputFileName("list", variables);
240         
241         if (countfile == "") {
242             m->openOutputFile(sabundFileName,   outSabund);
243             m->openOutputFile(rabundFileName,   outRabund);
244             outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
245             outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
246             
247         }
248                 m->openOutputFile(listFileName, outList);
249         outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
250
251         
252                 int count = 1;          
253                 //start at leaves of tree and work towards root, processing the labels the user wants
254                 while((!done) && ((allLines == 1) || (labels.size() != 0))) {
255                 
256                         string level = toString(count); 
257                         count++;
258                         
259                         if (m->control_pressed) { 
260                                 if (countfile == "") { outRabund.close(); outSabund.close(); } outList.close();
261                                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  }
262                                 delete tree; return 0; 
263                         }
264                         
265                         //is this a level the user want output for
266                         if(allLines == 1 || labels.count(level) == 1){  
267                                 
268                                 //output level
269                                 m->mothurOut(level); m->mothurOutEndLine();
270                                 
271                                 ListVector list;
272                                 list.setLabel(level);
273                 
274                                 //go through nodes and build listvector 
275                                 for (itCurrent = currentNodes.begin(); itCurrent != currentNodes.end(); itCurrent++) {
276                         
277                                         //get parents
278                                         TaxNode node = tree->get(itCurrent->first);
279                                         parentNodes[node.parent] = node.parent;
280                                         
281                                         vector<string> names = node.accessions;
282                                         
283                                         //make the names compatable with listvector
284                                         string name = "";
285                                         for (int i = 0; i < names.size(); i++) {  
286                         
287                         if (names[i] != "unknown") {
288                             if (namefile != "") {       
289                                 map<string, string>::iterator itNames = namemap.find(names[i]);  //make sure this name is in namefile
290                                 
291                                 if (itNames != namemap.end()) {  name += namemap[names[i]] + ",";   } //you found it in namefile
292                                 else { m->mothurOut("[ERROR]: " + names[i] + " is not in your namefile, please correct."); m->mothurOutEndLine(); m->control_pressed = true;  }
293                                 
294                             }else{   name += names[i] + ",";    }
295                         }
296                                         }
297                     
298                     if (m->control_pressed) { break; }
299                     
300                                         name = name.substr(0, name.length()-1);  //rip off extra ','
301                                         //add bin to list vector
302                                         if (name != "") { list.push_back(name); } //caused by unknown
303                                 }       
304                                 
305                                 //print listvector
306                                 list.print(outList);
307                 if (countfile == "") {
308                     //print rabund
309                     list.getRAbundVector().print(outRabund);
310                     //print sabund
311                     list.getSAbundVector().print(outSabund);
312                 }
313                                 labels.erase(level);
314                                 
315                         }else {
316                                 
317                                 //just get parents
318                                 for (itCurrent = currentNodes.begin(); itCurrent != currentNodes.end(); itCurrent++) {
319                                         int parent = tree->get(itCurrent->first).parent;
320                                         parentNodes[parent] = parent;
321                                 }
322                         }
323                         
324                         //move up a level
325                         currentNodes = parentNodes;
326                         parentNodes.clear();
327                         
328                         //have we reached the rootnode
329                         if (tree->get(currentNodes.begin()->first).parent == -1)  {  done = true;  }
330                 }
331                         
332                 outList.close();
333         if (countfile == "") {
334             outSabund.close();
335             outRabund.close();
336         }
337                 
338                 delete tree;
339                 
340                 if (m->control_pressed) { 
341                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]);  }
342                         return 0; 
343                 }
344                 
345                 //set list file as new current listfile
346                 string current = "";
347                 itTypes = outputTypes.find("list");
348                 if (itTypes != outputTypes.end()) {
349                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
350                 }
351                 
352                 //set rabund file as new current rabundfile
353                 itTypes = outputTypes.find("rabund");
354                 if (itTypes != outputTypes.end()) {
355                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
356                 }
357                 
358                 //set sabund file as new current sabundfile
359                 itTypes = outputTypes.find("sabund");
360                 if (itTypes != outputTypes.end()) {
361                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
362                 }               
363                 
364                 m->mothurOutEndLine();
365                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
366                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
367                 m->mothurOutEndLine();
368                 
369                 return 0;               
370         }
371
372         catch(exception& e) {
373                 m->errorOut(e, "PhylotypeCommand", "execute");
374                 exit(1);
375         }
376 }
377 /*****************************************************************/
378 int PhylotypeCommand::readNamesFile() {
379         try {
380                                 
381                 ifstream in;
382                 m->openInputFile(namefile, in);
383                 
384                 string first, second;
385                 map<string, string>::iterator itNames;
386                 
387                 while(!in.eof()) {
388                         in >> first >> second; m->gobble(in);
389                         
390                         itNames = namemap.find(first);
391                         if (itNames == namemap.end()) {  
392                                 namemap[first] = second; 
393                         }else {  m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); namemap.clear(); namefile = ""; return 1; }                   
394                 }
395                 in.close();
396                 
397                 return 0;
398         }
399         catch(exception& e) {
400                 m->errorOut(e, "PhylotypeCommand", "readNamesFile");
401                 exit(1);
402         }
403 }
404
405 /**********************************************************************************************************************/