]> git.donarmstrong.com Git - mothur.git/blob - phylotree.cpp
1001542ea36548742a2c92b809875fd4e7ec4f89
[mothur.git] / phylotree.cpp
1 /*
2  *  doTaxonomy.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 6/17/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "phylotree.h"
11
12 /**************************************************************************************************/
13
14 PhyloTree::PhyloTree(){
15         try {
16                 m = MothurOut::getInstance();
17                 numNodes = 1;
18                 numSeqs = 0;
19                 tree.push_back(TaxNode("Root"));
20                 tree[0].heirarchyID = "0";
21                 maxLevel = 0;
22         }
23         catch(exception& e) {
24                 m->errorOut(e, "PhyloTree", "PhyloTree");
25                 exit(1);
26         }
27 }
28 /**************************************************************************************************/
29
30 PhyloTree::PhyloTree(string tfile){
31         try {
32                 numNodes = 1;
33                 numSeqs = 0;
34                 tree.push_back(TaxNode("Root"));
35                 tree[0].heirarchyID = "0";
36                 maxLevel = 0;
37                 
38                 ifstream in;
39                 openInputFile(tfile, in);
40                 
41                 //read in users taxonomy file and add sequences to tree
42                 string name, tax;
43                 while(!in.eof()){
44                         in >> name >> tax; gobble(in);
45                         
46                         addSeqToTree(name, tax);
47                 }
48                 in.close();
49         
50                 assignHeirarchyIDs(0);
51         }
52         catch(exception& e) {
53                 m->errorOut(e, "PhyloTree", "PhyloTree");
54                 exit(1);
55         }
56 }
57
58 /**************************************************************************************************/
59
60 string PhyloTree::getNextTaxon(string& heirarchy){
61         try {
62                 string currentLevel = "";
63                 if(heirarchy != ""){
64                         currentLevel=heirarchy.substr(0,heirarchy.find_first_of(';'));
65                         heirarchy=heirarchy.substr(heirarchy.find_first_of(';')+1);
66                 }
67                 return currentLevel;
68         }
69         catch(exception& e) {
70                 m->errorOut(e, "PhyloTree", "getNextTaxon");
71                 exit(1);
72         }
73 }
74
75 /**************************************************************************************************/
76
77 void PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){
78         try {
79                 numSeqs++;
80                 
81                 map<string, int>::iterator childPointer;
82                 
83                 int currentNode = 0;
84                 int level = 1;
85                 
86                 tree[0].accessions.push_back(seqName);
87                 string taxon;// = getNextTaxon(seqTaxonomy);
88                 
89                 while(seqTaxonomy != ""){
90                         
91                         level++;
92                         
93                         //somehow the parent is getting one too many accnos
94                         //use print to reassign the taxa id
95                         taxon = getNextTaxon(seqTaxonomy);
96                         
97                         childPointer = tree[currentNode].children.find(taxon);
98                         
99                         if(childPointer != tree[currentNode].children.end()){   //if the node already exists, move on
100                                 currentNode = childPointer->second;
101                                 tree[currentNode].accessions.push_back(seqName);
102                                 name2Taxonomy[seqName] = currentNode;
103                         }
104                         else{                                                                                   //otherwise, create it
105                                 tree.push_back(TaxNode(taxon));
106                                 numNodes++;
107                                 tree[currentNode].children[taxon] = numNodes-1;
108                                 tree[numNodes-1].parent = currentNode;
109                                 
110                                 //                      int numChildren = tree[currentNode].children.size();
111                                 //                      string heirarchyID = tree[currentNode].heirarchyID;
112                                 //                      tree[currentNode].accessions.push_back(seqName);
113                                 
114                                 currentNode = tree[currentNode].children[taxon];
115                                 tree[currentNode].accessions.push_back(seqName);
116                                 name2Taxonomy[seqName] = currentNode;
117                                 //                      tree[currentNode].level = level;
118                                 //                      tree[currentNode].childNumber = numChildren;
119                                 //                      tree[currentNode].heirarchyID = heirarchyID + '.' + toString(tree[currentNode].childNumber);
120                         }
121                 
122                         if (seqTaxonomy == "") {   uniqueTaxonomies[currentNode] = currentNode; }
123                 }
124
125         }
126         catch(exception& e) {
127                 m->errorOut(e, "PhyloTree", "addSeqToTree");
128                 exit(1);
129         }
130 }
131 /**************************************************************************************************/
132 vector<int> PhyloTree::getGenusNodes()  {
133         try {
134                 genusIndex.clear();
135                 //generate genusIndexes
136                 map<int, int>::iterator it2;
137                 for (it2=uniqueTaxonomies.begin(); it2!=uniqueTaxonomies.end(); it2++) {  genusIndex.push_back(it2->first);     }
138                 
139                 return genusIndex;
140         }
141         catch(exception& e) {
142                 m->errorOut(e, "PhyloTree", "getGenusNodes");
143                 exit(1);
144         }
145 }
146 /**************************************************************************************************/
147
148 void PhyloTree::assignHeirarchyIDs(int index){
149         try {
150                 map<string,int>::iterator it;
151                 int counter = 1;
152                 
153                 for(it=tree[index].children.begin();it!=tree[index].children.end();it++){
154                         tree[it->second].heirarchyID = tree[index].heirarchyID + '.' + toString(counter);
155                         counter++;
156                         tree[it->second].level = tree[index].level + 1;
157                         
158                         //save maxLevel for binning the unclassified seqs
159                         if (tree[it->second].level > maxLevel) { maxLevel = tree[it->second].level; } 
160                         
161                         assignHeirarchyIDs(it->second);
162                 }
163         }
164         catch(exception& e) {
165                 m->errorOut(e, "PhyloTree", "assignHeirarchyIDs");
166                 exit(1);
167         }
168 }
169 /**************************************************************************************************/
170 void PhyloTree::binUnclassified(){
171         try {
172                 map<string, int>::iterator itBin;
173                 map<string, int>::iterator childPointer;
174                 
175                 //go through the seqs and if a sequence finest taxon is not the same level as the most finely defined taxon then classify it as unclassified where necessary
176                 for (itBin = name2Taxonomy.begin(); itBin != name2Taxonomy.end(); itBin++) {
177                 
178                         int level = tree[itBin->second].level;
179                         int currentNode = itBin->second;
180                         
181                         //this sequence is unclassified at some levels
182                         while(level != maxLevel){
183                         
184                                 level++;
185                         
186                                 string taxon = "unclassified";  
187                                 
188                                 //does the parent have a child names 'unclassified'?
189                                 childPointer = tree[currentNode].children.find(taxon);
190                                 
191                                 if(childPointer != tree[currentNode].children.end()){   //if the node already exists, move on
192                                         currentNode = childPointer->second; //currentNode becomes 'unclassified'
193                                         tree[currentNode].accessions.push_back(itBin->first);  //add this seq
194                                         name2Taxonomy[itBin->first] = currentNode;
195                                 }
196                                 else{                                                                                   //otherwise, create it
197                                         tree.push_back(TaxNode(taxon));
198                                         numNodes++;
199                                         tree[currentNode].children[taxon] = numNodes-1;
200                                         tree[numNodes-1].parent = currentNode;
201                                                                         
202                                         currentNode = tree[currentNode].children[taxon];
203                                         tree[currentNode].accessions.push_back(itBin->first);
204                                         name2Taxonomy[itBin->first] = currentNode;
205                                 }
206                                 
207                                 if (level == maxLevel) {   uniqueTaxonomies[currentNode] = currentNode; }
208                         }
209                 }
210                 
211                 //clear HeirarchyIDs and reset them
212                 for (int i = 1; i < tree.size(); i++) {
213                         tree[i].heirarchyID = "";
214                 }
215                 assignHeirarchyIDs(0);
216                 
217         }
218         catch(exception& e) {
219                 m->errorOut(e, "PhyloTree", "binUnclassified");
220                 exit(1);
221         }
222 }
223 /**************************************************************************************************/
224 string PhyloTree::getFullTaxonomy(string seqName) {
225         try {
226                 string tax = "";
227                 
228                 int currentNode = name2Taxonomy[seqName];
229                 
230                 while (tree[currentNode].parent != -1) {
231                         tax = tree[currentNode].name + ";" + tax;
232                         currentNode = tree[currentNode].parent;
233                 }
234                 
235                 return tax;
236         }
237         catch(exception& e) {
238                 m->errorOut(e, "PhyloTree", "getFullTaxonomy");
239                 exit(1);
240         }
241 }
242 /**************************************************************************************************/
243
244 void PhyloTree::print(ofstream& out){
245         try {
246                 out << tree[0].level << '\t'<< tree[0].heirarchyID << '\t' << tree[0].name << '\t' << tree[0].children.size() << '\t' << tree[0].accessions.size() << endl;
247                 print(0, out);
248         }
249         catch(exception& e) {
250                 m->errorOut(e, "PhyloTree", "print");
251                 exit(1);
252         }
253 }
254
255 /**************************************************************************************************/
256
257 void PhyloTree::print(int i, ofstream& out){
258         try {
259                 map<string,int>::iterator it;
260                 for(it=tree[i].children.begin();it!=tree[i].children.end();it++){
261                         out <<tree[it->second].level << '\t' << tree[it->second].heirarchyID << '\t' << tree[it->second].name << '\t' << tree[it->second].children.size() << '\t' << tree[it->second].accessions.size() << endl;
262                         print(it->second, out);
263                 }
264                 
265                 
266         }
267         catch(exception& e) {
268                 m->errorOut(e, "PhyloTree", "print");
269                 exit(1);
270         }
271 }
272
273 /**************************************************************************************************/
274
275
276