]> git.donarmstrong.com Git - mothur.git/blob - phylotypecommand.cpp
removed read.dist, read.otu, read.tree and globaldata. added current to defaults...
[mothur.git] / phylotypecommand.cpp
1 /*
2  *  phylotypecommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 11/20/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "phylotypecommand.h"
11 #include "phylotree.h"
12 #include "listvector.hpp"
13 #include "rabundvector.hpp"
14 #include "sabundvector.hpp"
15
16 //**********************************************************************************************************************
17 vector<string> PhylotypeCommand::setParameters(){       
18         try {
19                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptaxonomy);
20                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
21                 CommandParameter pcutoff("cutoff", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pcutoff);
22                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
23                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
24                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
25                 
26                 vector<string> myArray;
27                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
28                 return myArray;
29         }
30         catch(exception& e) {
31                 m->errorOut(e, "PhylotypeCommand", "setParameters");
32                 exit(1);
33         }
34 }
35 //**********************************************************************************************************************
36 string PhylotypeCommand::getHelpString(){       
37         try {
38                 string helpString = "";
39                 helpString += "The phylotype command reads a taxonomy file and outputs a .list, .rabund and .sabund file. \n";
40                 helpString += "The phylotype command parameter options are taxonomy, cutoff and label. The taxonomy parameter is required.\n";
41                 helpString += "The cutoff parameter allows you to specify the level you want to stop at.  The default is the highest level in your taxonomy file. \n";
42                 helpString += "For example: taxonomy = Bacteria;Bacteroidetes-Chlorobi;Bacteroidetes; - cutoff=2, would truncate the taxonomy to Bacteria;Bacteroidetes-Chlorobi; \n";
43                 helpString += "For the cutoff parameter levels count up from the root of the phylotree. This enables you to look at the grouping down to a specific resolution, say the genus level.\n";
44                 helpString += "The label parameter allows you to specify which level you would like, and are separated by dashes.  The default all levels in your taxonomy file. \n";
45                 helpString += "For the label parameter, levels count down from the root to keep the output similiar to mothur's other commands which report information from finer resolution to coarser resolutions.\n";
46                 helpString += "The phylotype command should be in the following format: \n";
47                 helpString += "phylotype(taxonomy=yourTaxonomyFile, cutoff=yourCutoff, label=yourLabels) \n";
48                 helpString += "Eaxample: phylotype(taxonomy=amazon.taxonomy, cutoff=5, label=1-3-5).\n\n";
49                 return helpString;
50         }
51         catch(exception& e) {
52                 m->errorOut(e, "PhylotypeCommand", "getHelpString");
53                 exit(1);
54         }
55 }
56
57 //**********************************************************************************************************************
58 PhylotypeCommand::PhylotypeCommand(){   
59         try {
60                 abort = true; calledHelp = true; 
61                 setParameters();
62                 vector<string> tempOutNames;
63                 outputTypes["list"] = tempOutNames;
64                 outputTypes["sabund"] = tempOutNames;
65                 outputTypes["rabund"] = tempOutNames;
66         }
67         catch(exception& e) {
68                 m->errorOut(e, "PhylotypeCommand", "PhylotypeCommand");
69                 exit(1);
70         }
71 }
72 /**********************************************************************************************************************/
73 PhylotypeCommand::PhylotypeCommand(string option)  {
74         try {
75                 abort = false; calledHelp = false;   
76                 
77                 //allow user to run help
78                 if(option == "help") { help(); abort = true; calledHelp = true; }
79                 
80                 else {
81                         vector<string> myArray = setParameters();
82                         
83                         OptionParser parser(option);
84                         map<string, string> parameters = parser.getParameters(); 
85                         
86                         ValidParameters validParameter;
87                         map<string, string>::iterator it;
88                         
89                         //check to make sure all parameters are valid for command
90                         for (it = parameters.begin(); it != parameters.end(); it++) { 
91                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
92                         }
93                         
94                         //initialize outputTypes
95                         vector<string> tempOutNames;
96                         outputTypes["list"] = tempOutNames;
97                         outputTypes["sabund"] = tempOutNames;
98                         outputTypes["rabund"] = tempOutNames;
99                         
100                         //if the user changes the input directory command factory will send this info to us in the output parameter 
101                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
102                         if (inputDir == "not found"){   inputDir = "";          }
103                         else {
104                                 string path;
105                                 it = parameters.find("taxonomy");
106                                 //user has given a template file
107                                 if(it != parameters.end()){ 
108                                         path = m->hasPath(it->second);
109                                         //if the user has not given a path then, add inputdir. else leave path alone.
110                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
111                                 }
112                                 
113                                 it = parameters.find("name");
114                                 //user has given a template file
115                                 if(it != parameters.end()){ 
116                                         path = m->hasPath(it->second);
117                                         //if the user has not given a path then, add inputdir. else leave path alone.
118                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
119                                 }
120                         }
121
122                         taxonomyFileName = validParameter.validFile(parameters, "taxonomy", true);
123                         if (taxonomyFileName == "not found") { 
124                                 taxonomyFileName = m->getTaxonomyFile(); 
125                                 if (taxonomyFileName != "") {  m->mothurOut("Using " + taxonomyFileName + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
126                                 else { 
127                                         m->mothurOut("No valid current files. taxonomy is a required parameter."); m->mothurOutEndLine(); 
128                                         abort = true; 
129                                 }
130                         }else if (taxonomyFileName == "not open") { abort = true; }     
131                         
132                         namefile = validParameter.validFile(parameters, "name", true);
133                         if (namefile == "not open") { abort = true; }
134                         else if (namefile == "not found") { namefile = ""; }
135                         else { readNamesFile(); }       
136                         
137                         //if the user changes the output directory command factory will send this info to us in the output parameter 
138                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
139                                 outputDir = ""; 
140                                 outputDir += m->hasPath(taxonomyFileName); //if user entered a file with a path then preserve it        
141                         }
142                         
143                         string temp = validParameter.validFile(parameters, "cutoff", false);
144                         if (temp == "not found") { temp = "-1"; }
145                         convert(temp, cutoff); 
146                         
147                         label = validParameter.validFile(parameters, "label", false);                   
148                         if (label == "not found") { label = ""; allLines = 1; }
149                         else { 
150                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
151                                 else { allLines = 1;  }
152                         }
153                         
154                 }
155         }
156         catch(exception& e) {
157                 m->errorOut(e, "PhylotypeCommand", "PhylotypeCommand");
158                 exit(1);
159         }
160 }
161 /**********************************************************************************************************************/
162
163 int PhylotypeCommand::execute(){
164         try {
165         
166                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
167                 
168                 //reads in taxonomy file and makes all the taxonomies the same length 
169                 //by appending the last taxon to a given taxonomy as many times as needed to 
170                 //make it as long as the longest taxonomy in the file 
171                 TaxEqualizer* taxEqual = new TaxEqualizer(taxonomyFileName, cutoff, outputDir);
172                 
173                 if (m->control_pressed) { delete taxEqual; return 0; }
174                 
175                 string equalizedTaxFile = taxEqual->getEqualizedTaxFile();
176                 
177                 delete taxEqual;
178                 
179                 //build taxonomy tree from equalized file
180                 PhyloTree* tree = new PhyloTree(equalizedTaxFile);
181                 vector<int> leaves = tree->getGenusNodes();
182                 
183                 //store leaf nodes in current map
184                 for (int i = 0; i < leaves.size(); i++)         {       currentNodes[leaves[i]] = leaves[i];    }
185                 
186                 bool done = false;
187                 if (tree->get(leaves[0]).parent == -1) {  m->mothurOut("Empty Tree"); m->mothurOutEndLine();    done = true;    }
188                 
189                 if (m->control_pressed) { delete tree; return 0; }
190                 
191                 string fileroot = outputDir + m->getRootName(m->getSimpleName(taxonomyFileName));
192                 
193                 ofstream outList;
194                 string outputListFile = fileroot + "tx.list";
195                 m->openOutputFile(outputListFile, outList);
196                 ofstream outSabund;
197                 string outputSabundFile = fileroot + "tx.sabund";
198                 m->openOutputFile(outputSabundFile, outSabund);
199                 ofstream outRabund;
200                 string outputRabundFile = fileroot + "tx.rabund";
201                 m->openOutputFile(outputRabundFile, outRabund);
202                 
203                 outputNames.push_back(outputListFile); outputTypes["list"].push_back(outputListFile);
204                 outputNames.push_back(outputSabundFile); outputTypes["sabund"].push_back(outputSabundFile);
205                 outputNames.push_back(outputRabundFile); outputTypes["rabund"].push_back(outputRabundFile);
206                 
207                 int count = 1;          
208                 //start at leaves of tree and work towards root, processing the labels the user wants
209                 while((!done) && ((allLines == 1) || (labels.size() != 0))) {
210                 
211                         string level = toString(count); 
212                         count++;
213                         
214                         if (m->control_pressed) { 
215                                 outRabund.close(); outSabund.close(); outList.close();
216                                 for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());  }
217                                 delete tree; return 0; 
218                         }
219                         
220                         //is this a level the user want output for
221                         if(allLines == 1 || labels.count(level) == 1){  
222                                 
223                                 //output level
224                                 m->mothurOut(level); m->mothurOutEndLine();
225                                 
226                                 ListVector list;
227                                 list.setLabel(level);
228                                 //go through nodes and build listvector 
229                                 for (itCurrent = currentNodes.begin(); itCurrent != currentNodes.end(); itCurrent++) {
230                         
231                                         //get parents
232                                         TaxNode node = tree->get(itCurrent->first);
233                                         parentNodes[node.parent] = node.parent;
234                                         
235                                         vector<string> names = node.accessions;
236                                         
237                                         //make the names compatable with listvector
238                                         string name = "";
239                                         for (int i = 0; i < names.size(); i++) {  
240                                                 if (namefile != "") {   
241                                                         map<string, string>::iterator itNames = namemap.find(names[i]);  //make sure this name is in namefile
242                 
243                                                         if (itNames != namemap.end()) {  name += namemap[names[i]] + ",";   } //you found it in namefile
244                                                         else { m->mothurOut(names[i] + " is not in your namefile, please correct."); m->mothurOutEndLine(); exit(1);  }
245                                                         
246                                                 }else{   name += names[i] + ",";        }
247                                         }
248                                         name = name.substr(0, name.length()-1);  //rip off extra ','
249                                         
250                                         //add bin to list vector
251                                         list.push_back(name);
252                                 }       
253                                 
254                                 //print listvector
255                                 list.print(outList);
256                                 //print rabund
257                                 list.getRAbundVector().print(outRabund);
258                                 //print sabund
259                                 list.getSAbundVector().print(outSabund);
260                         
261                                 labels.erase(level);
262                                 
263                         }else {
264                                 
265                                 //just get parents
266                                 for (itCurrent = currentNodes.begin(); itCurrent != currentNodes.end(); itCurrent++) {
267                                         int parent = tree->get(itCurrent->first).parent;
268                                         parentNodes[parent] = parent;
269                                 }
270                         }
271                         
272                         //move up a level
273                         currentNodes = parentNodes;
274                         parentNodes.clear();
275                         
276                         //have we reached the rootnode
277                         if (tree->get(currentNodes.begin()->first).parent == -1)  {  done = true;  }
278                 }
279                         
280                 outList.close();
281                 outSabund.close();
282                 outRabund.close();      
283                 
284                 delete tree;
285                 
286                 if (m->control_pressed) { 
287                         for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str());  }
288                         return 0; 
289                 }
290                 
291                 //set list file as new current listfile
292                 string current = "";
293                 itTypes = outputTypes.find("list");
294                 if (itTypes != outputTypes.end()) {
295                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
296                 }
297                 
298                 //set rabund file as new current rabundfile
299                 itTypes = outputTypes.find("rabund");
300                 if (itTypes != outputTypes.end()) {
301                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
302                 }
303                 
304                 //set sabund file as new current sabundfile
305                 itTypes = outputTypes.find("sabund");
306                 if (itTypes != outputTypes.end()) {
307                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
308                 }               
309                 
310                 m->mothurOutEndLine();
311                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
312                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
313                 m->mothurOutEndLine();
314                 
315                 return 0;               
316         }
317
318         catch(exception& e) {
319                 m->errorOut(e, "PhylotypeCommand", "execute");
320                 exit(1);
321         }
322 }
323 /*****************************************************************/
324 int PhylotypeCommand::readNamesFile() {
325         try {
326                                 
327                 ifstream in;
328                 m->openInputFile(namefile, in);
329                 
330                 string first, second;
331                 map<string, string>::iterator itNames;
332                 
333                 while(!in.eof()) {
334                         in >> first >> second; m->gobble(in);
335                         
336                         itNames = namemap.find(first);
337                         if (itNames == namemap.end()) {  
338                                 namemap[first] = second; 
339                         }else {  m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); namemap.clear(); namefile = ""; return 1; }                   
340                 }
341                 in.close();
342                 
343                 return 0;
344         }
345         catch(exception& e) {
346                 m->errorOut(e, "PhylotypeCommand", "readNamesFile");
347                 exit(1);
348         }
349 }
350
351 /**********************************************************************************************************************/