5 * Created by Pat Schloss on 6/17/09.
6 * Copyright 2009 Patrick D. Schloss. All rights reserved.
10 #include "phylotree.h"
12 /**************************************************************************************************/
14 PhyloTree::PhyloTree(){
18 tree.push_back(TaxNode("Root"));
19 tree[0].heirarchyID = "0";
22 errorOut(e, "PhyloTree", "PhyloTree");
26 /**************************************************************************************************/
28 PhyloTree::PhyloTree(string tfile){
32 tree.push_back(TaxNode("Root"));
33 tree[0].heirarchyID = "0";
37 openInputFile(tfile, in);
39 //read in users taxonomy file and add sequences to tree
42 in >> name >> tax; gobble(in);
44 addSeqToTree(name, tax);
48 assignHeirarchyIDs(0);
51 errorOut(e, "PhyloTree", "PhyloTree");
56 /**************************************************************************************************/
58 string PhyloTree::getNextTaxon(string& heirarchy){
60 string currentLevel = "";
62 currentLevel=heirarchy.substr(0,heirarchy.find_first_of(';'));
63 heirarchy=heirarchy.substr(heirarchy.find_first_of(';')+1);
68 errorOut(e, "PhyloTree", "getNextTaxon");
73 /**************************************************************************************************/
75 void PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){
79 map<string, int>::iterator childPointer;
84 tree[0].accessions.push_back(seqName);
85 string taxon;// = getNextTaxon(seqTaxonomy);
87 while(seqTaxonomy != ""){
91 //somehow the parent is getting one too many accnos
92 //use print to reassign the taxa id
93 taxon = getNextTaxon(seqTaxonomy);
95 childPointer = tree[currentNode].children.find(taxon);
97 if(childPointer != tree[currentNode].children.end()){ //if the node already exists, move on
98 currentNode = childPointer->second;
99 tree[currentNode].accessions.push_back(seqName);
100 name2Taxonomy[seqName] = currentNode;
102 else{ //otherwise, create it
103 tree.push_back(TaxNode(taxon));
105 tree[currentNode].children[taxon] = numNodes-1;
106 tree[numNodes-1].parent = currentNode;
108 // int numChildren = tree[currentNode].children.size();
109 // string heirarchyID = tree[currentNode].heirarchyID;
110 // tree[currentNode].accessions.push_back(seqName);
112 currentNode = tree[currentNode].children[taxon];
113 tree[currentNode].accessions.push_back(seqName);
114 name2Taxonomy[seqName] = currentNode;
115 // tree[currentNode].level = level;
116 // tree[currentNode].childNumber = numChildren;
117 // tree[currentNode].heirarchyID = heirarchyID + '.' + toString(tree[currentNode].childNumber);
120 if (seqTaxonomy == "") { uniqueTaxonomies[currentNode] = currentNode; }
124 catch(exception& e) {
125 errorOut(e, "PhyloTree", "addSeqToTree");
129 /**************************************************************************************************/
130 vector<int> PhyloTree::getGenusNodes() {
133 //generate genusIndexes
134 map<int, int>::iterator it2;
135 for (it2=uniqueTaxonomies.begin(); it2!=uniqueTaxonomies.end(); it2++) { genusIndex.push_back(it2->first); }
139 catch(exception& e) {
140 errorOut(e, "PhyloTree", "getGenusNodes");
144 /**************************************************************************************************/
146 void PhyloTree::assignHeirarchyIDs(int index){
148 map<string,int>::iterator it;
151 for(it=tree[index].children.begin();it!=tree[index].children.end();it++){
152 tree[it->second].heirarchyID = tree[index].heirarchyID + '.' + toString(counter);
154 tree[it->second].level = tree[index].level + 1;
156 //save maxLevel for binning the unclassified seqs
157 if (tree[it->second].level > maxLevel) { maxLevel = tree[it->second].level; }
159 assignHeirarchyIDs(it->second);
162 catch(exception& e) {
163 errorOut(e, "PhyloTree", "assignHeirarchyIDs");
167 /**************************************************************************************************/
168 void PhyloTree::binUnclassified(){
170 map<string, int>::iterator itBin;
171 map<string, int>::iterator childPointer;
173 //go through the seqs and if a sequence finest taxon is not the same level as the most finely defined taxon then classify it as unclassified where necessary
174 for (itBin = name2Taxonomy.begin(); itBin != name2Taxonomy.end(); itBin++) {
176 int level = tree[itBin->second].level;
177 int currentNode = itBin->second;
179 //this sequence is unclassified at some levels
180 while(level != maxLevel){
184 string taxon = "unclassified";
186 //does the parent have a child names 'unclassified'?
187 childPointer = tree[currentNode].children.find(taxon);
189 if(childPointer != tree[currentNode].children.end()){ //if the node already exists, move on
190 currentNode = childPointer->second; //currentNode becomes 'unclassified'
191 tree[currentNode].accessions.push_back(itBin->first); //add this seq
192 name2Taxonomy[itBin->first] = currentNode;
194 else{ //otherwise, create it
195 tree.push_back(TaxNode(taxon));
197 tree[currentNode].children[taxon] = numNodes-1;
198 tree[numNodes-1].parent = currentNode;
200 currentNode = tree[currentNode].children[taxon];
201 tree[currentNode].accessions.push_back(itBin->first);
202 name2Taxonomy[itBin->first] = currentNode;
205 if (level == maxLevel) { uniqueTaxonomies[currentNode] = currentNode; }
209 //clear HeirarchyIDs and reset them
210 for (int i = 1; i < tree.size(); i++) {
211 tree[i].heirarchyID = "";
213 assignHeirarchyIDs(0);
216 catch(exception& e) {
217 errorOut(e, "PhyloTree", "binUnclassified");
221 /**************************************************************************************************/
223 void PhyloTree::print(ofstream& out){
225 out << tree[0].level << '\t'<< tree[0].heirarchyID << '\t' << tree[0].name << '\t' << tree[0].children.size() << '\t' << tree[0].accessions.size() << endl;
228 catch(exception& e) {
229 errorOut(e, "PhyloTree", "print");
234 /**************************************************************************************************/
236 void PhyloTree::print(int i, ofstream& out){
238 map<string,int>::iterator it;
239 for(it=tree[i].children.begin();it!=tree[i].children.end();it++){
240 out <<tree[it->second].level << '\t' << tree[it->second].heirarchyID << '\t' << tree[it->second].name << '\t' << tree[it->second].children.size() << '\t' << tree[it->second].accessions.size() << endl;
241 print(it->second, out);
246 catch(exception& e) {
247 errorOut(e, "PhyloTree", "print");
252 /**************************************************************************************************/