]> git.donarmstrong.com Git - mothur.git/blob - phylotree.cpp
you can now use a distance matrix as input for the heatmap.sim command.
[mothur.git] / phylotree.cpp
1 /*
2  *  doTaxonomy.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 6/17/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "phylotree.h"
11
12 /**************************************************************************************************/
13
14 PhyloTree::PhyloTree(){
15         try {
16                 numNodes = 1;
17                 numSeqs = 0;
18                 tree.push_back(TaxNode("Root"));
19                 tree[0].heirarchyID = "0";
20         }
21         catch(exception& e) {
22                 errorOut(e, "PhyloTree", "PhyloTree");
23                 exit(1);
24         }
25 }
26 /**************************************************************************************************/
27
28 PhyloTree::PhyloTree(string tfile){
29         try {
30                 numNodes = 1;
31                 numSeqs = 0;
32                 tree.push_back(TaxNode("Root"));
33                 tree[0].heirarchyID = "0";
34                 maxLevel = 0;
35                 
36                 ifstream in;
37                 openInputFile(tfile, in);
38                 
39                 //read in users taxonomy file and add sequences to tree
40                 string name, tax;
41                 while(!in.eof()){
42                         in >> name >> tax; gobble(in);
43                         
44                         addSeqToTree(name, tax);
45                 }
46                 in.close();
47         
48                 assignHeirarchyIDs(0);
49         }
50         catch(exception& e) {
51                 errorOut(e, "PhyloTree", "PhyloTree");
52                 exit(1);
53         }
54 }
55
56 /**************************************************************************************************/
57
58 string PhyloTree::getNextTaxon(string& heirarchy){
59         try {
60                 string currentLevel = "";
61                 if(heirarchy != ""){
62                         currentLevel=heirarchy.substr(0,heirarchy.find_first_of(';'));
63                         heirarchy=heirarchy.substr(heirarchy.find_first_of(';')+1);
64                 }
65                 return currentLevel;
66         }
67         catch(exception& e) {
68                 errorOut(e, "PhyloTree", "getNextTaxon");
69                 exit(1);
70         }
71 }
72
73 /**************************************************************************************************/
74
75 void PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){
76         try {
77                 numSeqs++;
78                 
79                 map<string, int>::iterator childPointer;
80                 
81                 int currentNode = 0;
82                 int level = 1;
83                 
84                 tree[0].accessions.push_back(seqName);
85                 string taxon;// = getNextTaxon(seqTaxonomy);
86                 
87                 while(seqTaxonomy != ""){
88                         
89                         level++;
90                         
91                         //somehow the parent is getting one too many accnos
92                         //use print to reassign the taxa id
93                         taxon = getNextTaxon(seqTaxonomy);
94                         
95                         childPointer = tree[currentNode].children.find(taxon);
96                         
97                         if(childPointer != tree[currentNode].children.end()){   //if the node already exists, move on
98                                 currentNode = childPointer->second;
99                                 tree[currentNode].accessions.push_back(seqName);
100                                 name2Taxonomy[seqName] = currentNode;
101                         }
102                         else{                                                                                   //otherwise, create it
103                                 tree.push_back(TaxNode(taxon));
104                                 numNodes++;
105                                 tree[currentNode].children[taxon] = numNodes-1;
106                                 tree[numNodes-1].parent = currentNode;
107                                 
108                                 //                      int numChildren = tree[currentNode].children.size();
109                                 //                      string heirarchyID = tree[currentNode].heirarchyID;
110                                 //                      tree[currentNode].accessions.push_back(seqName);
111                                 
112                                 currentNode = tree[currentNode].children[taxon];
113                                 tree[currentNode].accessions.push_back(seqName);
114                                 name2Taxonomy[seqName] = currentNode;
115                                 //                      tree[currentNode].level = level;
116                                 //                      tree[currentNode].childNumber = numChildren;
117                                 //                      tree[currentNode].heirarchyID = heirarchyID + '.' + toString(tree[currentNode].childNumber);
118                         }
119                 
120                         if (seqTaxonomy == "") {   uniqueTaxonomies[currentNode] = currentNode; }
121                 }
122
123         }
124         catch(exception& e) {
125                 errorOut(e, "PhyloTree", "addSeqToTree");
126                 exit(1);
127         }
128 }
129 /**************************************************************************************************/
130 vector<int> PhyloTree::getGenusNodes()  {
131         try {
132                 genusIndex.clear();
133                 //generate genusIndexes
134                 map<int, int>::iterator it2;
135                 for (it2=uniqueTaxonomies.begin(); it2!=uniqueTaxonomies.end(); it2++) {  genusIndex.push_back(it2->first);     }
136                 
137                 return genusIndex;
138         }
139         catch(exception& e) {
140                 errorOut(e, "PhyloTree", "getGenusNodes");
141                 exit(1);
142         }
143 }
144 /**************************************************************************************************/
145
146 void PhyloTree::assignHeirarchyIDs(int index){
147         try {
148                 map<string,int>::iterator it;
149                 int counter = 1;
150                 
151                 for(it=tree[index].children.begin();it!=tree[index].children.end();it++){
152                         tree[it->second].heirarchyID = tree[index].heirarchyID + '.' + toString(counter);
153                         counter++;
154                         tree[it->second].level = tree[index].level + 1;
155                         
156                         //save maxLevel for binning the unclassified seqs
157                         if (tree[it->second].level > maxLevel) { maxLevel = tree[it->second].level; } 
158                         
159                         assignHeirarchyIDs(it->second);
160                 }
161         }
162         catch(exception& e) {
163                 errorOut(e, "PhyloTree", "assignHeirarchyIDs");
164                 exit(1);
165         }
166 }
167 /**************************************************************************************************/
168 void PhyloTree::binUnclassified(){
169         try {
170                 map<string, int>::iterator itBin;
171                 map<string, int>::iterator childPointer;
172                 
173                 //go through the seqs and if a sequence finest taxon is not the same level as the most finely defined taxon then classify it as unclassified where necessary
174                 for (itBin = name2Taxonomy.begin(); itBin != name2Taxonomy.end(); itBin++) {
175                 
176                         int level = tree[itBin->second].level;
177                         int currentNode = itBin->second;
178                         
179                         //this sequence is unclassified at some levels
180                         while(level != maxLevel){
181                         
182                                 level++;
183                         
184                                 string taxon = "unclassified";  
185                                 
186                                 //does the parent have a child names 'unclassified'?
187                                 childPointer = tree[currentNode].children.find(taxon);
188                                 
189                                 if(childPointer != tree[currentNode].children.end()){   //if the node already exists, move on
190                                         currentNode = childPointer->second; //currentNode becomes 'unclassified'
191                                         tree[currentNode].accessions.push_back(itBin->first);  //add this seq
192                                         name2Taxonomy[itBin->first] = currentNode;
193                                 }
194                                 else{                                                                                   //otherwise, create it
195                                         tree.push_back(TaxNode(taxon));
196                                         numNodes++;
197                                         tree[currentNode].children[taxon] = numNodes-1;
198                                         tree[numNodes-1].parent = currentNode;
199                                                                         
200                                         currentNode = tree[currentNode].children[taxon];
201                                         tree[currentNode].accessions.push_back(itBin->first);
202                                         name2Taxonomy[itBin->first] = currentNode;
203                                 }
204                                 
205                                 if (level == maxLevel) {   uniqueTaxonomies[currentNode] = currentNode; }
206                         }
207                 }
208                 
209                 //clear HeirarchyIDs and reset them
210                 for (int i = 1; i < tree.size(); i++) {
211                         tree[i].heirarchyID = "";
212                 }
213                 assignHeirarchyIDs(0);
214                 
215         }
216         catch(exception& e) {
217                 errorOut(e, "PhyloTree", "binUnclassified");
218                 exit(1);
219         }
220 }
221 /**************************************************************************************************/
222
223 void PhyloTree::print(ofstream& out){
224         try {
225                 out << tree[0].level << '\t'<< tree[0].heirarchyID << '\t' << tree[0].name << '\t' << tree[0].children.size() << '\t' << tree[0].accessions.size() << endl;
226                 print(0, out);
227         }
228         catch(exception& e) {
229                 errorOut(e, "PhyloTree", "print");
230                 exit(1);
231         }
232 }
233
234 /**************************************************************************************************/
235
236 void PhyloTree::print(int i, ofstream& out){
237         try {
238                 map<string,int>::iterator it;
239                 for(it=tree[i].children.begin();it!=tree[i].children.end();it++){
240                         out <<tree[it->second].level << '\t' << tree[it->second].heirarchyID << '\t' << tree[it->second].name << '\t' << tree[it->second].children.size() << '\t' << tree[it->second].accessions.size() << endl;
241                         print(it->second, out);
242                 }
243                 
244                 
245         }
246         catch(exception& e) {
247                 errorOut(e, "PhyloTree", "print");
248                 exit(1);
249         }
250 }
251
252 /**************************************************************************************************/
253
254
255