5 * Created by Pat Schloss on 6/17/09.
6 * Copyright 2009 Patrick D. Schloss. All rights reserved.
10 #include "phylotree.h"
12 /**************************************************************************************************/
14 PhyloTree::PhyloTree(){
18 tree.push_back(TaxNode("Root"));
19 tree[0].heirarchyID = "0";
23 errorOut(e, "PhyloTree", "PhyloTree");
27 /**************************************************************************************************/
29 PhyloTree::PhyloTree(string tfile){
33 tree.push_back(TaxNode("Root"));
34 tree[0].heirarchyID = "0";
38 openInputFile(tfile, in);
40 //read in users taxonomy file and add sequences to tree
43 in >> name >> tax; gobble(in);
45 addSeqToTree(name, tax);
49 assignHeirarchyIDs(0);
52 errorOut(e, "PhyloTree", "PhyloTree");
57 /**************************************************************************************************/
59 string PhyloTree::getNextTaxon(string& heirarchy){
61 string currentLevel = "";
63 currentLevel=heirarchy.substr(0,heirarchy.find_first_of(';'));
64 heirarchy=heirarchy.substr(heirarchy.find_first_of(';')+1);
69 errorOut(e, "PhyloTree", "getNextTaxon");
74 /**************************************************************************************************/
76 void PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){
80 map<string, int>::iterator childPointer;
85 tree[0].accessions.push_back(seqName);
86 string taxon;// = getNextTaxon(seqTaxonomy);
88 while(seqTaxonomy != ""){
92 //somehow the parent is getting one too many accnos
93 //use print to reassign the taxa id
94 taxon = getNextTaxon(seqTaxonomy);
96 childPointer = tree[currentNode].children.find(taxon);
98 if(childPointer != tree[currentNode].children.end()){ //if the node already exists, move on
99 currentNode = childPointer->second;
100 tree[currentNode].accessions.push_back(seqName);
101 name2Taxonomy[seqName] = currentNode;
103 else{ //otherwise, create it
104 tree.push_back(TaxNode(taxon));
106 tree[currentNode].children[taxon] = numNodes-1;
107 tree[numNodes-1].parent = currentNode;
109 // int numChildren = tree[currentNode].children.size();
110 // string heirarchyID = tree[currentNode].heirarchyID;
111 // tree[currentNode].accessions.push_back(seqName);
113 currentNode = tree[currentNode].children[taxon];
114 tree[currentNode].accessions.push_back(seqName);
115 name2Taxonomy[seqName] = currentNode;
116 // tree[currentNode].level = level;
117 // tree[currentNode].childNumber = numChildren;
118 // tree[currentNode].heirarchyID = heirarchyID + '.' + toString(tree[currentNode].childNumber);
121 if (seqTaxonomy == "") { uniqueTaxonomies[currentNode] = currentNode; }
125 catch(exception& e) {
126 errorOut(e, "PhyloTree", "addSeqToTree");
130 /**************************************************************************************************/
131 vector<int> PhyloTree::getGenusNodes() {
134 //generate genusIndexes
135 map<int, int>::iterator it2;
136 for (it2=uniqueTaxonomies.begin(); it2!=uniqueTaxonomies.end(); it2++) { genusIndex.push_back(it2->first); }
140 catch(exception& e) {
141 errorOut(e, "PhyloTree", "getGenusNodes");
145 /**************************************************************************************************/
147 void PhyloTree::assignHeirarchyIDs(int index){
149 map<string,int>::iterator it;
152 for(it=tree[index].children.begin();it!=tree[index].children.end();it++){
153 tree[it->second].heirarchyID = tree[index].heirarchyID + '.' + toString(counter);
155 tree[it->second].level = tree[index].level + 1;
157 //save maxLevel for binning the unclassified seqs
158 if (tree[it->second].level > maxLevel) { maxLevel = tree[it->second].level; }
160 assignHeirarchyIDs(it->second);
163 catch(exception& e) {
164 errorOut(e, "PhyloTree", "assignHeirarchyIDs");
168 /**************************************************************************************************/
169 void PhyloTree::binUnclassified(){
171 map<string, int>::iterator itBin;
172 map<string, int>::iterator childPointer;
174 //go through the seqs and if a sequence finest taxon is not the same level as the most finely defined taxon then classify it as unclassified where necessary
175 for (itBin = name2Taxonomy.begin(); itBin != name2Taxonomy.end(); itBin++) {
177 int level = tree[itBin->second].level;
178 int currentNode = itBin->second;
180 //this sequence is unclassified at some levels
181 while(level != maxLevel){
185 string taxon = "unclassified";
187 //does the parent have a child names 'unclassified'?
188 childPointer = tree[currentNode].children.find(taxon);
190 if(childPointer != tree[currentNode].children.end()){ //if the node already exists, move on
191 currentNode = childPointer->second; //currentNode becomes 'unclassified'
192 tree[currentNode].accessions.push_back(itBin->first); //add this seq
193 name2Taxonomy[itBin->first] = currentNode;
195 else{ //otherwise, create it
196 tree.push_back(TaxNode(taxon));
198 tree[currentNode].children[taxon] = numNodes-1;
199 tree[numNodes-1].parent = currentNode;
201 currentNode = tree[currentNode].children[taxon];
202 tree[currentNode].accessions.push_back(itBin->first);
203 name2Taxonomy[itBin->first] = currentNode;
206 if (level == maxLevel) { uniqueTaxonomies[currentNode] = currentNode; }
210 //clear HeirarchyIDs and reset them
211 for (int i = 1; i < tree.size(); i++) {
212 tree[i].heirarchyID = "";
214 assignHeirarchyIDs(0);
217 catch(exception& e) {
218 errorOut(e, "PhyloTree", "binUnclassified");
222 /**************************************************************************************************/
223 string PhyloTree::getFullTaxonomy(string seqName) {
227 int currentNode = name2Taxonomy[seqName];
229 while (tree[currentNode].parent != -1) {
230 tax = tree[currentNode].name + ";" + tax;
231 currentNode = tree[currentNode].parent;
236 catch(exception& e) {
237 errorOut(e, "PhyloTree", "getFullTaxonomy");
241 /**************************************************************************************************/
243 void PhyloTree::print(ofstream& out){
245 out << tree[0].level << '\t'<< tree[0].heirarchyID << '\t' << tree[0].name << '\t' << tree[0].children.size() << '\t' << tree[0].accessions.size() << endl;
248 catch(exception& e) {
249 errorOut(e, "PhyloTree", "print");
254 /**************************************************************************************************/
256 void PhyloTree::print(int i, ofstream& out){
258 map<string,int>::iterator it;
259 for(it=tree[i].children.begin();it!=tree[i].children.end();it++){
260 out <<tree[it->second].level << '\t' << tree[it->second].heirarchyID << '\t' << tree[it->second].name << '\t' << tree[it->second].children.size() << '\t' << tree[it->second].accessions.size() << endl;
261 print(it->second, out);
266 catch(exception& e) {
267 errorOut(e, "PhyloTree", "print");
272 /**************************************************************************************************/