5 * Created by Pat Schloss on 6/17/09.
6 * Copyright 2009 Patrick D. Schloss. All rights reserved.
10 #include "phylotree.h"
12 /**************************************************************************************************/
14 PhyloTree::PhyloTree(){
16 m = MothurOut::getInstance();
19 tree.push_back(TaxNode("Root"));
20 tree[0].heirarchyID = "0";
24 m->errorOut(e, "PhyloTree", "PhyloTree");
28 /**************************************************************************************************/
30 PhyloTree::PhyloTree(string tfile){
34 tree.push_back(TaxNode("Root"));
35 tree[0].heirarchyID = "0";
39 openInputFile(tfile, in);
41 //read in users taxonomy file and add sequences to tree
44 in >> name >> tax; gobble(in);
46 addSeqToTree(name, tax);
50 assignHeirarchyIDs(0);
53 m->errorOut(e, "PhyloTree", "PhyloTree");
58 /**************************************************************************************************/
60 string PhyloTree::getNextTaxon(string& heirarchy){
62 string currentLevel = "";
64 currentLevel=heirarchy.substr(0,heirarchy.find_first_of(';'));
65 heirarchy=heirarchy.substr(heirarchy.find_first_of(';')+1);
70 m->errorOut(e, "PhyloTree", "getNextTaxon");
75 /**************************************************************************************************/
77 void PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){
81 map<string, int>::iterator childPointer;
86 tree[0].accessions.push_back(seqName);
87 string taxon;// = getNextTaxon(seqTaxonomy);
89 while(seqTaxonomy != ""){
93 //somehow the parent is getting one too many accnos
94 //use print to reassign the taxa id
95 taxon = getNextTaxon(seqTaxonomy);
97 childPointer = tree[currentNode].children.find(taxon);
99 if(childPointer != tree[currentNode].children.end()){ //if the node already exists, move on
100 currentNode = childPointer->second;
101 tree[currentNode].accessions.push_back(seqName);
102 name2Taxonomy[seqName] = currentNode;
104 else{ //otherwise, create it
105 tree.push_back(TaxNode(taxon));
107 tree[currentNode].children[taxon] = numNodes-1;
108 tree[numNodes-1].parent = currentNode;
110 // int numChildren = tree[currentNode].children.size();
111 // string heirarchyID = tree[currentNode].heirarchyID;
112 // tree[currentNode].accessions.push_back(seqName);
114 currentNode = tree[currentNode].children[taxon];
115 tree[currentNode].accessions.push_back(seqName);
116 name2Taxonomy[seqName] = currentNode;
117 // tree[currentNode].level = level;
118 // tree[currentNode].childNumber = numChildren;
119 // tree[currentNode].heirarchyID = heirarchyID + '.' + toString(tree[currentNode].childNumber);
122 if (seqTaxonomy == "") { uniqueTaxonomies[currentNode] = currentNode; }
126 catch(exception& e) {
127 m->errorOut(e, "PhyloTree", "addSeqToTree");
131 /**************************************************************************************************/
132 vector<int> PhyloTree::getGenusNodes() {
135 //generate genusIndexes
136 map<int, int>::iterator it2;
137 for (it2=uniqueTaxonomies.begin(); it2!=uniqueTaxonomies.end(); it2++) { genusIndex.push_back(it2->first); }
141 catch(exception& e) {
142 m->errorOut(e, "PhyloTree", "getGenusNodes");
146 /**************************************************************************************************/
148 void PhyloTree::assignHeirarchyIDs(int index){
150 map<string,int>::iterator it;
153 for(it=tree[index].children.begin();it!=tree[index].children.end();it++){
154 tree[it->second].heirarchyID = tree[index].heirarchyID + '.' + toString(counter);
156 tree[it->second].level = tree[index].level + 1;
158 //save maxLevel for binning the unclassified seqs
159 if (tree[it->second].level > maxLevel) { maxLevel = tree[it->second].level; }
161 assignHeirarchyIDs(it->second);
164 catch(exception& e) {
165 m->errorOut(e, "PhyloTree", "assignHeirarchyIDs");
169 /**************************************************************************************************/
170 void PhyloTree::binUnclassified(){
172 map<string, int>::iterator itBin;
173 map<string, int>::iterator childPointer;
175 //go through the seqs and if a sequence finest taxon is not the same level as the most finely defined taxon then classify it as unclassified where necessary
176 for (itBin = name2Taxonomy.begin(); itBin != name2Taxonomy.end(); itBin++) {
178 int level = tree[itBin->second].level;
179 int currentNode = itBin->second;
181 //this sequence is unclassified at some levels
182 while(level != maxLevel){
186 string taxon = "unclassified";
188 //does the parent have a child names 'unclassified'?
189 childPointer = tree[currentNode].children.find(taxon);
191 if(childPointer != tree[currentNode].children.end()){ //if the node already exists, move on
192 currentNode = childPointer->second; //currentNode becomes 'unclassified'
193 tree[currentNode].accessions.push_back(itBin->first); //add this seq
194 name2Taxonomy[itBin->first] = currentNode;
196 else{ //otherwise, create it
197 tree.push_back(TaxNode(taxon));
199 tree[currentNode].children[taxon] = numNodes-1;
200 tree[numNodes-1].parent = currentNode;
202 currentNode = tree[currentNode].children[taxon];
203 tree[currentNode].accessions.push_back(itBin->first);
204 name2Taxonomy[itBin->first] = currentNode;
207 if (level == maxLevel) { uniqueTaxonomies[currentNode] = currentNode; }
211 //clear HeirarchyIDs and reset them
212 for (int i = 1; i < tree.size(); i++) {
213 tree[i].heirarchyID = "";
215 assignHeirarchyIDs(0);
218 catch(exception& e) {
219 m->errorOut(e, "PhyloTree", "binUnclassified");
223 /**************************************************************************************************/
224 string PhyloTree::getFullTaxonomy(string seqName) {
228 int currentNode = name2Taxonomy[seqName];
230 while (tree[currentNode].parent != -1) {
231 tax = tree[currentNode].name + ";" + tax;
232 currentNode = tree[currentNode].parent;
237 catch(exception& e) {
238 m->errorOut(e, "PhyloTree", "getFullTaxonomy");
242 /**************************************************************************************************/
244 void PhyloTree::print(ofstream& out){
246 out << tree[0].level << '\t'<< tree[0].heirarchyID << '\t' << tree[0].name << '\t' << tree[0].children.size() << '\t' << tree[0].accessions.size() << endl;
249 catch(exception& e) {
250 m->errorOut(e, "PhyloTree", "print");
255 /**************************************************************************************************/
257 void PhyloTree::print(int i, ofstream& out){
259 map<string,int>::iterator it;
260 for(it=tree[i].children.begin();it!=tree[i].children.end();it++){
261 out <<tree[it->second].level << '\t' << tree[it->second].heirarchyID << '\t' << tree[it->second].name << '\t' << tree[it->second].children.size() << '\t' << tree[it->second].accessions.size() << endl;
262 print(it->second, out);
267 catch(exception& e) {
268 m->errorOut(e, "PhyloTree", "print");
273 /**************************************************************************************************/