]> git.donarmstrong.com Git - mothur.git/blob - classifytreecommand.cpp
added load.logfile command. changed summary.single output for subsample=t.
[mothur.git] / classifytreecommand.cpp
1 //
2 //  classifytreecommand.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 2/20/12.
6 //  Copyright (c) 2012 Schloss Lab. All rights reserved.
7 //
8
9 #include "classifytreecommand.h"
10 #include "phylotree.h"
11 #include "treereader.h"
12
13 //**********************************************************************************************************************
14 vector<string> ClassifyTreeCommand::setParameters(){    
15         try {
16                 CommandParameter ptree("tree", "InputTypes", "", "", "", "", "none",false,true); parameters.push_back(ptree);
17         CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "", "", "none",false,true); parameters.push_back(ptaxonomy);
18         CommandParameter pname("name", "InputTypes", "", "", "", "", "none",false,false); parameters.push_back(pname);
19         CommandParameter pgroup("group", "InputTypes", "", "", "", "", "none",false,false); parameters.push_back(pgroup);
20         CommandParameter pcutoff("cutoff", "Number", "", "51", "", "", "",false,true); parameters.push_back(pcutoff);
21                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
22                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
23                 
24                 vector<string> myArray;
25                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
26                 return myArray;
27         }
28         catch(exception& e) {
29                 m->errorOut(e, "ClassifyTreeCommand", "setParameters");
30                 exit(1);
31         }
32 }
33 //**********************************************************************************************************************
34 string ClassifyTreeCommand::getHelpString(){    
35         try {
36                 string helpString = "";
37                 helpString += "The classify.tree command reads a tree and taxonomy file and output the consensus taxonomy for each node on the tree. \n";
38                 helpString += "If you provide a group file, the concensus for each group will also be provided. \n";
39                 helpString += "The new tree contains labels at each internal node.  The label is the node number so you can relate the tree to the summary file.\n";
40                 helpString += "The summary file lists the concensus taxonomy for the descendants of each node.\n";
41                 helpString += "The classify.tree command parameters are tree, group, name and taxonomy. The tree and taxonomy files are required.\n";
42         helpString += "The cutoff parameter allows you to specify a consensus confidence threshold for your taxonomy.  The default is 51, meaning 51%. Cutoff cannot be below 51.\n";
43         helpString += "The classify.tree command should be used in the following format: classify.tree(tree=test.tre, group=test.group, taxonomy=test.taxonomy)\n";
44                 helpString += "Note: No spaces between parameter labels (i.e. tree), '=' and parameters (i.e.yourTreefile).\n"; 
45                 return helpString;
46         }
47         catch(exception& e) {
48                 m->errorOut(e, "ClassifyTreeCommand", "getHelpString");
49                 exit(1);
50         }
51 }
52 //**********************************************************************************************************************
53 string ClassifyTreeCommand::getOutputFileNameTag(string type, string inputName=""){     
54         try {
55         string outputFileName = "";
56                 map<string, vector<string> >::iterator it;
57         
58         //is this a type this command creates
59         it = outputTypes.find(type);
60         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
61         else {
62             if (type == "tree") {  outputFileName =  "taxonomy.tre"; }
63             else if (type == "summary") {  outputFileName =  "taxonomy.summary"; }
64             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
65         }
66         return outputFileName;
67         }
68         catch(exception& e) {
69                 m->errorOut(e, "ClassifyTreeCommand", "getOutputFileNameTag");
70                 exit(1);
71         }
72 }
73 //**********************************************************************************************************************
74 ClassifyTreeCommand::ClassifyTreeCommand(){     
75         try {
76                 abort = true; calledHelp = true; 
77                 setParameters();
78                 vector<string> tempOutNames;
79                 outputTypes["tree"] = tempOutNames;
80                 outputTypes["summary"] = tempOutNames;
81         }
82         catch(exception& e) {
83                 m->errorOut(e, "ClassifyTreeCommand", "ClassifyTreeCommand");
84                 exit(1);
85         }
86 }
87 //**********************************************************************************************************************
88 ClassifyTreeCommand::ClassifyTreeCommand(string option)  {
89         try {
90                 abort = false; calledHelp = false;   
91                 
92                 //allow user to run help
93                 if(option == "help") { help(); abort = true; calledHelp = true; }
94                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
95                 
96                 else {
97                         vector<string> myArray = setParameters();
98                         
99                         OptionParser parser(option);
100                         map<string, string> parameters = parser.getParameters();
101                         
102                         ValidParameters validParameter;
103                         map<string, string>::iterator it;
104                         
105                         //check to make sure all parameters are valid for command
106                         for (it = parameters.begin(); it != parameters.end(); it++) { 
107                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
108                         }
109                         
110                         vector<string> tempOutNames;
111                         outputTypes["tree"] = tempOutNames;
112                         outputTypes["summary"] = tempOutNames;
113                         
114                         //if the user changes the input directory command factory will send this info to us in the output parameter 
115                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
116                         if (inputDir == "not found"){   inputDir = "";          }
117                         else {
118                                 string path;
119                                 it = parameters.find("tree");
120                                 //user has given a template file
121                                 if(it != parameters.end()){ 
122                                         path = m->hasPath(it->second);
123                                         //if the user has not given a path then, add inputdir. else leave path alone.
124                                         if (path == "") {       parameters["tree"] = inputDir + it->second;             }
125                                 }
126                                 
127                                 it = parameters.find("name");
128                                 //user has given a template file
129                                 if(it != parameters.end()){ 
130                                         path = m->hasPath(it->second);
131                                         //if the user has not given a path then, add inputdir. else leave path alone.
132                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
133                                 }
134                                 
135                                 it = parameters.find("group");
136                                 //user has given a template file
137                                 if(it != parameters.end()){ 
138                                         path = m->hasPath(it->second);
139                                         //if the user has not given a path then, add inputdir. else leave path alone.
140                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
141                                 }
142                                 
143                                 it = parameters.find("taxonomy");
144                                 //user has given a template file
145                                 if(it != parameters.end()){ 
146                                         path = m->hasPath(it->second);
147                                         //if the user has not given a path then, add inputdir. else leave path alone.
148                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
149                                 }
150                         }
151                         
152                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
153             
154                         //check for required parameters
155                         treefile = validParameter.validFile(parameters, "tree", true);
156                         if (treefile == "not open") { treefile = ""; abort = true; }
157                         else if (treefile == "not found") { treefile = ""; 
158                 treefile = m->getTreeFile(); 
159                 if (treefile != "") {  m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
160                 else { m->mothurOut("No valid current files. You must provide a tree file."); m->mothurOutEndLine(); abort = true; }
161             }else { m->setTreeFile(treefile); } 
162             
163             taxonomyfile = validParameter.validFile(parameters, "taxonomy", true);
164                         if (taxonomyfile == "not open") { taxonomyfile = ""; abort = true; }
165                         else if (taxonomyfile == "not found") { taxonomyfile = ""; 
166                 taxonomyfile = m->getTaxonomyFile(); 
167                 if (taxonomyfile != "") {  m->mothurOut("Using " + taxonomyfile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
168                 else { m->mothurOut("No valid current files. You must provide a taxonomy file."); m->mothurOutEndLine(); abort = true; }
169             }else { m->setTaxonomyFile(taxonomyfile); } 
170                         
171                         namefile = validParameter.validFile(parameters, "name", true);
172                         if (namefile == "not open") { namefile = ""; abort = true; }
173                         else if (namefile == "not found") { namefile = ""; }
174                         else { m->setNameFile(namefile); }
175                         
176                         groupfile = validParameter.validFile(parameters, "group", true);
177                         if (groupfile == "not open") { groupfile = ""; abort = true; }
178                         else if (groupfile == "not found") { groupfile = ""; }
179                         else { m->setGroupFile(groupfile); }
180             
181             string temp = validParameter.validFile(parameters, "cutoff", false);                        if (temp == "not found") { temp = "51"; }
182                         m->mothurConvert(temp, cutoff); 
183                         
184                         if ((cutoff < 51) || (cutoff > 100)) { m->mothurOut("cutoff must be above 50, and no greater than 100."); m->mothurOutEndLine(); abort = true;  }
185             
186             if (namefile == "") {
187                                 vector<string> files; files.push_back(treefile);
188                                 parser.getNameFile(files);
189                         }
190                         
191                 }
192         }
193         catch(exception& e) {
194                 m->errorOut(e, "ClassifyTreeCommand", "ClassifyTreeCommand");           
195                 exit(1);
196         }
197 }
198 //**********************************************************************************************************************
199
200 int ClassifyTreeCommand::execute(){
201         try {
202                 
203                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
204                 
205                 cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint);
206                 
207                 int start = time(NULL);
208         
209                 /***************************************************/
210                 //    reading tree info                                                    //
211                 /***************************************************/
212         m->setTreeFile(treefile);
213         
214         TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
215         vector<Tree*> T = reader->getTrees();
216         TreeMap* tmap = T[0]->getTreeMap();
217         Tree* outputTree = T[0];
218         delete reader;
219
220         if (namefile != "") { m->readNames(namefile, nameMap, nameCount); }
221                         
222         if (m->control_pressed) { delete tmap;  delete outputTree;  return 0; }
223                 
224         m->readTax(taxonomyfile, taxMap);
225         
226         /***************************************************/
227         //              get concensus taxonomies                    //
228         /***************************************************/
229         getClassifications(outputTree);
230         delete outputTree; delete tmap;
231                         
232                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]);        } return 0; }
233                 
234                 //set tree file as new current treefile
235                 if (treefile != "") {
236                         string current = "";
237                         itTypes = outputTypes.find("tree");
238                         if (itTypes != outputTypes.end()) {
239                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTreeFile(current); }
240                         }
241                 }
242                 
243                 m->mothurOutEndLine(); m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to find the concensus taxonomies."); m->mothurOutEndLine();
244                 m->mothurOutEndLine();
245                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
246                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
247                 m->mothurOutEndLine();
248         
249                 return 0;
250         }
251         catch(exception& e) {
252                 m->errorOut(e, "ClassifyTreeCommand", "execute");       
253                 exit(1);
254         }
255 }
256 //**********************************************************************************************************************
257 //traverse tree finding concensus taxonomy at each node
258 //label node with a number to relate to output summary file
259 //report all concensus taxonomies to file 
260 int ClassifyTreeCommand::getClassifications(Tree*& T){
261         try {
262                 
263                 string thisOutputDir = outputDir;
264                 if (outputDir == "") {  thisOutputDir += m->hasPath(treefile);  }
265                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(treefile)) + getOutputFileNameTag("summary");
266                 outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName);
267                 
268                 ofstream out;
269                 m->openOutputFile(outputFileName, out);
270                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
271                 
272                 //print headings
273                 out << "TreeNode\t";
274                 if (groupfile != "") { out << "Group\t"; } 
275         out << "NumRep\tTaxonomy" << endl; 
276                 
277                 string treeOutputDir = outputDir;
278                 if (outputDir == "") {  treeOutputDir += m->hasPath(treefile);  }
279                 string outputTreeFileName = treeOutputDir + m->getRootName(m->getSimpleName(treefile)) + getOutputFileNameTag("tree");
280                 
281                 //create a map from tree node index to names of descendants, save time later
282                 map<int, map<string, set<string> > > nodeToDescendants; //node# -> (groupName -> groupMembers)
283                 for (int i = 0; i < T->getNumNodes(); i++) {
284                         if (m->control_pressed) { return 0; }
285                         
286                         nodeToDescendants[i] = getDescendantList(T, i, nodeToDescendants);
287                 }
288                 
289                 //for each node
290                 for (int i = T->getNumLeaves(); i < T->getNumNodes(); i++) {
291                         
292                         if (m->control_pressed) { out.close(); return 0; }
293             
294                         string tax = "not classifed";
295             int size;
296             if (groupfile != "") {
297                 for (map<string, set<string> >::iterator itGroups = nodeToDescendants[i].begin(); itGroups != nodeToDescendants[i].end(); itGroups++) {
298                     if (itGroups->first != "AllGroups") {
299                         tax = getTaxonomy(itGroups->second, size);
300                         out << (i+1) << '\t' << itGroups->first << '\t' << size << '\t' << tax << endl;
301                     }
302                 }
303             }else {
304                 string group = "AllGroups";
305                 tax = getTaxonomy(nodeToDescendants[i][group], size);
306                 out << (i+1) << '\t' << size << '\t' << tax << endl;
307             }
308                                 
309                         T->tree[i].setLabel((i+1));
310                 }
311                 out.close();
312         
313                 ofstream outTree;
314                 m->openOutputFile(outputTreeFileName, outTree);
315                 outputNames.push_back(outputTreeFileName); outputTypes["tree"].push_back(outputTreeFileName);
316                 T->print(outTree, "both");
317                 outTree.close();
318         
319                 return 0;
320         }
321         catch(exception& e) {
322                 m->errorOut(e, "ClassifyTreeCommand", "GetConcensusTaxonomies");        
323                 exit(1);
324         }
325 }
326 //**********************************************************************************************************************
327 string ClassifyTreeCommand::getTaxonomy(set<string> names, int& size) {
328         try{
329                 string conTax = "";
330         size = 0;
331                         
332                 //create a tree containing sequences from this bin
333                 PhyloTree* phylo = new PhyloTree();
334                 
335                 for (set<string>::iterator it = names.begin(); it != names.end(); it++) {
336             
337             
338                         //if namesfile include the names
339                         if (namefile != "") {
340                 
341                                 //is this sequence in the name file - namemap maps seqName -> repSeqName
342                                 map<string, string>::iterator it2 = nameMap.find(*it);
343                                 
344                                 if (it2 == nameMap.end()) { //this name is not in name file, skip it
345                                         m->mothurOut((*it) + " is not in your name file.  I will not include it in the consensus."); m->mothurOutEndLine();
346                                 }else{
347                                         
348                                         //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique
349                                         map<string, string>::iterator itTax = taxMap.find((it2->second));
350                     
351                                         if (itTax == taxMap.end()) { //this name is not in taxonomy file, skip it
352                         
353                                                 if ((*it) != (it2->second)) { m->mothurOut((*it) + " is represented by " +  it2->second + " and is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine(); }
354                                                 else {  m->mothurOut((*it) + " is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine(); }
355                                         }else{
356                                                 //add seq to tree
357                         int num = nameCount[(*it)]; // we know its there since we found it in nameMap
358                                                 for (int i = 0; i < num; i++) {  phylo->addSeqToTree((*it)+toString(i), it2->second);  }
359                         size += num;
360                                         }
361                                 }
362                                 
363                         }else{
364                                 //is this sequence in the taxonomy file - look for repSeqName since we are assuming the taxonomy file is unique
365                                 map<string, string>::iterator itTax = taxMap.find((*it));
366                 
367                                 if (itTax == taxMap.end()) { //this name is not in taxonomy file, skip it
368                                         m->mothurOut((*it) + " is not in your taxonomy file.  I will not include it in the consensus."); m->mothurOutEndLine();
369                                 }else{
370                                         //add seq to tree
371                                         phylo->addSeqToTree((*it), itTax->second);
372                     size++;
373                                 }
374                         }
375             
376                         if (m->control_pressed) { delete phylo; return conTax; }
377                         
378                 }
379                 
380                 //build tree
381                 phylo->assignHeirarchyIDs(0);
382                 
383                 TaxNode currentNode = phylo->get(0);
384                 int myLevel = 0;        
385                 //at each level
386                 while (currentNode.children.size() != 0) { //you still have more to explore
387             
388                         TaxNode bestChild;
389                         int bestChildSize = 0;
390                         
391                         //go through children
392                         for (map<string, int>::iterator itChild = currentNode.children.begin(); itChild != currentNode.children.end(); itChild++) {
393                                 
394                                 TaxNode temp = phylo->get(itChild->second);
395                                 
396                                 //select child with largest accesions - most seqs assigned to it
397                                 if (temp.accessions.size() > bestChildSize) {
398                                         bestChild = phylo->get(itChild->second);
399                                         bestChildSize = temp.accessions.size();
400                                 }
401                                 
402                         }
403             
404                         //is this taxonomy above cutoff
405                         int consensusConfidence = ceil((bestChildSize / (float) size) * 100);
406                         
407                         if (consensusConfidence >= cutoff) { //if yes, add it
408                 conTax += bestChild.name + "(" + toString(consensusConfidence) + ");";
409                                 myLevel++;
410                         }else{ //if no, quit
411                                 break;
412                         }
413                         
414                         //move down a level
415                         currentNode = bestChild;
416                 }
417                 
418                 if (myLevel != phylo->getMaxLevel()) {
419                         while (myLevel != phylo->getMaxLevel()) {
420                                 conTax += "unclassified;";
421                                 myLevel++;
422                         }
423                 }               
424                 if (conTax == "") {  conTax = "no_consensus;";  }
425                 
426                 delete phylo;   
427         
428         return conTax;
429         
430         }
431         catch(exception& e) {
432                 m->errorOut(e, "ClassifyTreeCommand", "getTaxonomy");
433                 exit(1);
434         }
435 }
436
437 //**********************************************************************************************************************
438 map<string, set<string> > ClassifyTreeCommand::getDescendantList(Tree*& T, int i, map<int, map<string, set<string> > > descendants){
439         try {
440                 map<string ,set<string> > names;
441                 
442                 map<string ,set<string> >::iterator it;
443         map<string ,set<string> >::iterator it2;
444                 
445                 int lc = T->tree[i].getLChild();
446                 int rc = T->tree[i].getRChild();
447         TreeMap* tmap = T->getTreeMap();
448                 
449                 if (lc == -1) { //you are a leaf your only descendant is yourself
450             string group = tmap->getGroup(T->tree[i].getName());
451             set<string> mynames; mynames.insert(T->tree[i].getName());
452             names[group] = mynames; //mygroup -> me
453             names["AllGroups"] = mynames;
454                 }else{ //your descedants are the combination of your childrens descendants
455                         names = descendants[lc];
456                         for (it = descendants[rc].begin(); it != descendants[rc].end(); it++) {
457                 it2 = names.find(it->first); //do we already have this group
458                 if (it2 == names.end()) { //nope, so add it
459                     names[it->first] = it->second;
460                 }else {
461                     for (set<string>::iterator it3 = (it->second).begin(); it3 != (it->second).end(); it3++) {
462                         names[it->first].insert(*it3);
463                     }
464                 }
465                                 
466                         }
467                 }
468                 
469                 return names;
470         }
471         catch(exception& e) {
472                 m->errorOut(e, "ClassifyTreeCommand", "getDescendantList");     
473                 exit(1);
474         }
475 }
476 /*****************************************************************/
477
478