]> git.donarmstrong.com Git - mothur.git/blob - phylotree.cpp
worked on hcluster. made .single command run using a sharedfile. and various other...
[mothur.git] / phylotree.cpp
1 /*
2  *  doTaxonomy.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 6/17/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "phylotree.h"
11
12 /**************************************************************************************************/
13
14 PhyloTree::PhyloTree(){
15         try {
16                 numNodes = 1;
17                 numSeqs = 0;
18                 tree.push_back(TaxNode("Root"));
19                 tree[0].heirarchyID = "0";
20                 maxLevel = 0;
21         }
22         catch(exception& e) {
23                 errorOut(e, "PhyloTree", "PhyloTree");
24                 exit(1);
25         }
26 }
27 /**************************************************************************************************/
28
29 PhyloTree::PhyloTree(string tfile){
30         try {
31                 numNodes = 1;
32                 numSeqs = 0;
33                 tree.push_back(TaxNode("Root"));
34                 tree[0].heirarchyID = "0";
35                 maxLevel = 0;
36                 
37                 ifstream in;
38                 openInputFile(tfile, in);
39                 
40                 //read in users taxonomy file and add sequences to tree
41                 string name, tax;
42                 while(!in.eof()){
43                         in >> name >> tax; gobble(in);
44                         
45                         addSeqToTree(name, tax);
46                 }
47                 in.close();
48         
49                 assignHeirarchyIDs(0);
50         }
51         catch(exception& e) {
52                 errorOut(e, "PhyloTree", "PhyloTree");
53                 exit(1);
54         }
55 }
56
57 /**************************************************************************************************/
58
59 string PhyloTree::getNextTaxon(string& heirarchy){
60         try {
61                 string currentLevel = "";
62                 if(heirarchy != ""){
63                         currentLevel=heirarchy.substr(0,heirarchy.find_first_of(';'));
64                         heirarchy=heirarchy.substr(heirarchy.find_first_of(';')+1);
65                 }
66                 return currentLevel;
67         }
68         catch(exception& e) {
69                 errorOut(e, "PhyloTree", "getNextTaxon");
70                 exit(1);
71         }
72 }
73
74 /**************************************************************************************************/
75
76 void PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){
77         try {
78                 numSeqs++;
79                 
80                 map<string, int>::iterator childPointer;
81                 
82                 int currentNode = 0;
83                 int level = 1;
84                 
85                 tree[0].accessions.push_back(seqName);
86                 string taxon;// = getNextTaxon(seqTaxonomy);
87                 
88                 while(seqTaxonomy != ""){
89                         
90                         level++;
91                         
92                         //somehow the parent is getting one too many accnos
93                         //use print to reassign the taxa id
94                         taxon = getNextTaxon(seqTaxonomy);
95                         
96                         childPointer = tree[currentNode].children.find(taxon);
97                         
98                         if(childPointer != tree[currentNode].children.end()){   //if the node already exists, move on
99                                 currentNode = childPointer->second;
100                                 tree[currentNode].accessions.push_back(seqName);
101                                 name2Taxonomy[seqName] = currentNode;
102                         }
103                         else{                                                                                   //otherwise, create it
104                                 tree.push_back(TaxNode(taxon));
105                                 numNodes++;
106                                 tree[currentNode].children[taxon] = numNodes-1;
107                                 tree[numNodes-1].parent = currentNode;
108                                 
109                                 //                      int numChildren = tree[currentNode].children.size();
110                                 //                      string heirarchyID = tree[currentNode].heirarchyID;
111                                 //                      tree[currentNode].accessions.push_back(seqName);
112                                 
113                                 currentNode = tree[currentNode].children[taxon];
114                                 tree[currentNode].accessions.push_back(seqName);
115                                 name2Taxonomy[seqName] = currentNode;
116                                 //                      tree[currentNode].level = level;
117                                 //                      tree[currentNode].childNumber = numChildren;
118                                 //                      tree[currentNode].heirarchyID = heirarchyID + '.' + toString(tree[currentNode].childNumber);
119                         }
120                 
121                         if (seqTaxonomy == "") {   uniqueTaxonomies[currentNode] = currentNode; }
122                 }
123
124         }
125         catch(exception& e) {
126                 errorOut(e, "PhyloTree", "addSeqToTree");
127                 exit(1);
128         }
129 }
130 /**************************************************************************************************/
131 vector<int> PhyloTree::getGenusNodes()  {
132         try {
133                 genusIndex.clear();
134                 //generate genusIndexes
135                 map<int, int>::iterator it2;
136                 for (it2=uniqueTaxonomies.begin(); it2!=uniqueTaxonomies.end(); it2++) {  genusIndex.push_back(it2->first);     }
137                 
138                 return genusIndex;
139         }
140         catch(exception& e) {
141                 errorOut(e, "PhyloTree", "getGenusNodes");
142                 exit(1);
143         }
144 }
145 /**************************************************************************************************/
146
147 void PhyloTree::assignHeirarchyIDs(int index){
148         try {
149                 map<string,int>::iterator it;
150                 int counter = 1;
151                 
152                 for(it=tree[index].children.begin();it!=tree[index].children.end();it++){
153                         tree[it->second].heirarchyID = tree[index].heirarchyID + '.' + toString(counter);
154                         counter++;
155                         tree[it->second].level = tree[index].level + 1;
156                         
157                         //save maxLevel for binning the unclassified seqs
158                         if (tree[it->second].level > maxLevel) { maxLevel = tree[it->second].level; } 
159                         
160                         assignHeirarchyIDs(it->second);
161                 }
162         }
163         catch(exception& e) {
164                 errorOut(e, "PhyloTree", "assignHeirarchyIDs");
165                 exit(1);
166         }
167 }
168 /**************************************************************************************************/
169 void PhyloTree::binUnclassified(){
170         try {
171                 map<string, int>::iterator itBin;
172                 map<string, int>::iterator childPointer;
173                 
174                 //go through the seqs and if a sequence finest taxon is not the same level as the most finely defined taxon then classify it as unclassified where necessary
175                 for (itBin = name2Taxonomy.begin(); itBin != name2Taxonomy.end(); itBin++) {
176                 
177                         int level = tree[itBin->second].level;
178                         int currentNode = itBin->second;
179                         
180                         //this sequence is unclassified at some levels
181                         while(level != maxLevel){
182                         
183                                 level++;
184                         
185                                 string taxon = "unclassified";  
186                                 
187                                 //does the parent have a child names 'unclassified'?
188                                 childPointer = tree[currentNode].children.find(taxon);
189                                 
190                                 if(childPointer != tree[currentNode].children.end()){   //if the node already exists, move on
191                                         currentNode = childPointer->second; //currentNode becomes 'unclassified'
192                                         tree[currentNode].accessions.push_back(itBin->first);  //add this seq
193                                         name2Taxonomy[itBin->first] = currentNode;
194                                 }
195                                 else{                                                                                   //otherwise, create it
196                                         tree.push_back(TaxNode(taxon));
197                                         numNodes++;
198                                         tree[currentNode].children[taxon] = numNodes-1;
199                                         tree[numNodes-1].parent = currentNode;
200                                                                         
201                                         currentNode = tree[currentNode].children[taxon];
202                                         tree[currentNode].accessions.push_back(itBin->first);
203                                         name2Taxonomy[itBin->first] = currentNode;
204                                 }
205                                 
206                                 if (level == maxLevel) {   uniqueTaxonomies[currentNode] = currentNode; }
207                         }
208                 }
209                 
210                 //clear HeirarchyIDs and reset them
211                 for (int i = 1; i < tree.size(); i++) {
212                         tree[i].heirarchyID = "";
213                 }
214                 assignHeirarchyIDs(0);
215                 
216         }
217         catch(exception& e) {
218                 errorOut(e, "PhyloTree", "binUnclassified");
219                 exit(1);
220         }
221 }
222 /**************************************************************************************************/
223 string PhyloTree::getFullTaxonomy(string seqName) {
224         try {
225                 string tax = "";
226                 
227                 int currentNode = name2Taxonomy[seqName];
228                 
229                 while (tree[currentNode].parent != -1) {
230                         tax = tree[currentNode].name + ";" + tax;
231                         currentNode = tree[currentNode].parent;
232                 }
233                 
234                 return tax;
235         }
236         catch(exception& e) {
237                 errorOut(e, "PhyloTree", "getFullTaxonomy");
238                 exit(1);
239         }
240 }
241 /**************************************************************************************************/
242
243 void PhyloTree::print(ofstream& out){
244         try {
245                 out << tree[0].level << '\t'<< tree[0].heirarchyID << '\t' << tree[0].name << '\t' << tree[0].children.size() << '\t' << tree[0].accessions.size() << endl;
246                 print(0, out);
247         }
248         catch(exception& e) {
249                 errorOut(e, "PhyloTree", "print");
250                 exit(1);
251         }
252 }
253
254 /**************************************************************************************************/
255
256 void PhyloTree::print(int i, ofstream& out){
257         try {
258                 map<string,int>::iterator it;
259                 for(it=tree[i].children.begin();it!=tree[i].children.end();it++){
260                         out <<tree[it->second].level << '\t' << tree[it->second].heirarchyID << '\t' << tree[it->second].name << '\t' << tree[it->second].children.size() << '\t' << tree[it->second].accessions.size() << endl;
261                         print(it->second, out);
262                 }
263                 
264                 
265         }
266         catch(exception& e) {
267                 errorOut(e, "PhyloTree", "print");
268                 exit(1);
269         }
270 }
271
272 /**************************************************************************************************/
273
274
275