]> git.donarmstrong.com Git - mothur.git/blob - phylotree.cpp
some changes while testing 1.9
[mothur.git] / phylotree.cpp
1 /*
2  *  doTaxonomy.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 6/17/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "phylotree.h"
11
12 /**************************************************************************************************/
13
14 PhyloTree::PhyloTree(){
15         try {
16                 m = MothurOut::getInstance();
17                 numNodes = 1;
18                 numSeqs = 0;
19                 tree.push_back(TaxNode("Root"));
20                 tree[0].heirarchyID = "0";
21                 maxLevel = 0;
22         }
23         catch(exception& e) {
24                 m->errorOut(e, "PhyloTree", "PhyloTree");
25                 exit(1);
26         }
27 }
28 /**************************************************************************************************/
29
30 PhyloTree::PhyloTree(string tfile){
31         try {
32                 m = MothurOut::getInstance();
33                 numNodes = 1;
34                 numSeqs = 0;
35                 tree.push_back(TaxNode("Root"));
36                 tree[0].heirarchyID = "0";
37                 maxLevel = 0;
38                 
39                 ifstream in;
40                 openInputFile(tfile, in);
41                 
42                 //read in users taxonomy file and add sequences to tree
43                 string name, tax;
44                 while(!in.eof()){
45                         in >> name >> tax; gobble(in);
46                         
47                         addSeqToTree(name, tax);
48                 }
49                 in.close();
50         
51                 assignHeirarchyIDs(0);
52         }
53         catch(exception& e) {
54                 m->errorOut(e, "PhyloTree", "PhyloTree");
55                 exit(1);
56         }
57 }
58
59 /**************************************************************************************************/
60
61 string PhyloTree::getNextTaxon(string& heirarchy){
62         try {
63                 string currentLevel = "";
64                 if(heirarchy != ""){
65                         int pos = heirarchy.find_first_of(';');
66                         currentLevel=heirarchy.substr(0,pos);
67                         if (pos != (heirarchy.length()-1)) {  heirarchy=heirarchy.substr(pos+1);  }
68                         else { heirarchy = ""; }
69                 }
70                 return currentLevel;
71         }
72         catch(exception& e) {
73                 m->errorOut(e, "PhyloTree", "getNextTaxon");
74                 exit(1);
75         }
76 }
77
78 /**************************************************************************************************/
79
80 int PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){
81         try {
82                 numSeqs++;
83                 
84                 map<string, int>::iterator childPointer;
85                 
86                 int currentNode = 0;
87                 int level = 1;
88                 
89                 tree[0].accessions.push_back(seqName);
90                 string taxon;// = getNextTaxon(seqTaxonomy);
91                 
92                 while(seqTaxonomy != ""){
93                         
94                         level++;
95                         
96                         if (m->control_pressed) { return 0; }
97                         
98                         //somehow the parent is getting one too many accnos
99                         //use print to reassign the taxa id
100                         taxon = getNextTaxon(seqTaxonomy);
101                         
102                         childPointer = tree[currentNode].children.find(taxon);
103                         
104                         if(childPointer != tree[currentNode].children.end()){   //if the node already exists, move on
105                                 currentNode = childPointer->second;
106                                 tree[currentNode].accessions.push_back(seqName);
107                                 name2Taxonomy[seqName] = currentNode;
108                         }
109                         else{                                                                                   //otherwise, create it
110                                 tree.push_back(TaxNode(taxon));
111                                 numNodes++;
112                                 tree[currentNode].children[taxon] = numNodes-1;
113                                 tree[numNodes-1].parent = currentNode;
114                                 
115                                 //                      int numChildren = tree[currentNode].children.size();
116                                 //                      string heirarchyID = tree[currentNode].heirarchyID;
117                                 //                      tree[currentNode].accessions.push_back(seqName);
118                                 
119                                 currentNode = tree[currentNode].children[taxon];
120                                 tree[currentNode].accessions.push_back(seqName);
121                                 name2Taxonomy[seqName] = currentNode;
122                                 //                      tree[currentNode].level = level;
123                                 //                      tree[currentNode].childNumber = numChildren;
124                                 //                      tree[currentNode].heirarchyID = heirarchyID + '.' + toString(tree[currentNode].childNumber);
125                         }
126                 
127                         if (seqTaxonomy == "") {   uniqueTaxonomies[currentNode] = currentNode; }
128                 }
129
130         }
131         catch(exception& e) {
132                 m->errorOut(e, "PhyloTree", "addSeqToTree");
133                 exit(1);
134         }
135 }
136 /**************************************************************************************************/
137 vector<int> PhyloTree::getGenusNodes()  {
138         try {
139                 genusIndex.clear();
140                 //generate genusIndexes
141                 map<int, int>::iterator it2;
142                 for (it2=uniqueTaxonomies.begin(); it2!=uniqueTaxonomies.end(); it2++) {  genusIndex.push_back(it2->first);     }
143                 
144                 return genusIndex;
145         }
146         catch(exception& e) {
147                 m->errorOut(e, "PhyloTree", "getGenusNodes");
148                 exit(1);
149         }
150 }
151 /**************************************************************************************************/
152
153 void PhyloTree::assignHeirarchyIDs(int index){
154         try {
155                 map<string,int>::iterator it;
156                 int counter = 1;
157                 
158                 for(it=tree[index].children.begin();it!=tree[index].children.end();it++){
159                         tree[it->second].heirarchyID = tree[index].heirarchyID + '.' + toString(counter);
160                         counter++;
161                         tree[it->second].level = tree[index].level + 1;
162                                                 
163                         //save maxLevel for binning the unclassified seqs
164                         if (tree[it->second].level > maxLevel) { maxLevel = tree[it->second].level; } 
165                         
166                         assignHeirarchyIDs(it->second);
167                 }
168         }
169         catch(exception& e) {
170                 m->errorOut(e, "PhyloTree", "assignHeirarchyIDs");
171                 exit(1);
172         }
173 }
174 /**************************************************************************************************/
175 void PhyloTree::binUnclassified(){
176         try {
177                 map<string, int>::iterator itBin;
178                 map<string, int>::iterator childPointer;
179                 
180                 //go through the seqs and if a sequence finest taxon is not the same level as the most finely defined taxon then classify it as unclassified where necessary
181                 for (itBin = name2Taxonomy.begin(); itBin != name2Taxonomy.end(); itBin++) {
182                 
183                         int level = tree[itBin->second].level;
184                         int currentNode = itBin->second;
185                         
186                         //this sequence is unclassified at some levels
187                         while(level != maxLevel){
188                         
189                                 level++;
190                         
191                                 string taxon = "unclassified";  
192                                 
193                                 //does the parent have a child names 'unclassified'?
194                                 childPointer = tree[currentNode].children.find(taxon);
195                                 
196                                 if(childPointer != tree[currentNode].children.end()){   //if the node already exists, move on
197                                         currentNode = childPointer->second; //currentNode becomes 'unclassified'
198                                         tree[currentNode].accessions.push_back(itBin->first);  //add this seq
199                                         name2Taxonomy[itBin->first] = currentNode;
200                                 }
201                                 else{                                                                                   //otherwise, create it
202                                         tree.push_back(TaxNode(taxon));
203                                         numNodes++;
204                                         tree[currentNode].children[taxon] = numNodes-1;
205                                         tree[numNodes-1].parent = currentNode;
206                                                                         
207                                         currentNode = tree[currentNode].children[taxon];
208                                         tree[currentNode].accessions.push_back(itBin->first);
209                                         name2Taxonomy[itBin->first] = currentNode;
210                                 }
211                                 
212                                 if (level == maxLevel) {   uniqueTaxonomies[currentNode] = currentNode; }
213                         }
214                 }
215                 
216                 //clear HeirarchyIDs and reset them
217                 for (int i = 1; i < tree.size(); i++) {
218                         tree[i].heirarchyID = "";
219                 }
220                 assignHeirarchyIDs(0);
221                 
222         }
223         catch(exception& e) {
224                 m->errorOut(e, "PhyloTree", "binUnclassified");
225                 exit(1);
226         }
227 }
228 /**************************************************************************************************/
229 string PhyloTree::getFullTaxonomy(string seqName) {
230         try {
231                 string tax = "";
232                 
233                 int currentNode = name2Taxonomy[seqName];
234                 
235                 while (tree[currentNode].parent != -1) {
236                         tax = tree[currentNode].name + ";" + tax;
237                         currentNode = tree[currentNode].parent;
238                 }
239                 
240                 return tax;
241         }
242         catch(exception& e) {
243                 m->errorOut(e, "PhyloTree", "getFullTaxonomy");
244                 exit(1);
245         }
246 }
247 /**************************************************************************************************/
248
249 void PhyloTree::print(ofstream& out){
250         try {
251                 out << tree[0].level << '\t'<< tree[0].heirarchyID << '\t' << tree[0].name << '\t' << tree[0].children.size() << '\t' << tree[0].accessions.size() << endl;
252                 print(0, out);
253         }
254         catch(exception& e) {
255                 m->errorOut(e, "PhyloTree", "print");
256                 exit(1);
257         }
258 }
259
260 /**************************************************************************************************/
261
262 void PhyloTree::print(int i, ofstream& out){
263         try {
264                 map<string,int>::iterator it;
265                 for(it=tree[i].children.begin();it!=tree[i].children.end();it++){
266                         out <<tree[it->second].level << '\t' << tree[it->second].heirarchyID << '\t' << tree[it->second].name << '\t' << tree[it->second].children.size() << '\t' << tree[it->second].accessions.size() << endl;
267                         print(it->second, out);
268                 }
269                 
270                 
271         }
272         catch(exception& e) {
273                 m->errorOut(e, "PhyloTree", "print");
274                 exit(1);
275         }
276 }
277
278 /**************************************************************************************************/
279
280
281