5 * Created by Pat Schloss on 6/17/09.
6 * Copyright 2009 Patrick D. Schloss. All rights reserved.
10 #include "phylotree.h"
12 /**************************************************************************************************/
14 PhyloTree::PhyloTree(){
16 m = MothurOut::getInstance();
19 tree.push_back(TaxNode("Root"));
20 tree[0].heirarchyID = "0";
24 m->errorOut(e, "PhyloTree", "PhyloTree");
28 /**************************************************************************************************/
30 PhyloTree::PhyloTree(string tfile){
32 m = MothurOut::getInstance();
35 tree.push_back(TaxNode("Root"));
36 tree[0].heirarchyID = "0";
40 openInputFile(tfile, in);
42 //read in users taxonomy file and add sequences to tree
45 in >> name >> tax; gobble(in);
47 addSeqToTree(name, tax);
51 assignHeirarchyIDs(0);
54 m->errorOut(e, "PhyloTree", "PhyloTree");
59 /**************************************************************************************************/
61 string PhyloTree::getNextTaxon(string& heirarchy){
63 string currentLevel = "";
65 int pos = heirarchy.find_first_of(';');
66 currentLevel=heirarchy.substr(0,pos);
67 if (pos != (heirarchy.length()-1)) { heirarchy=heirarchy.substr(pos+1); }
68 else { heirarchy = ""; }
73 m->errorOut(e, "PhyloTree", "getNextTaxon");
78 /**************************************************************************************************/
80 int PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){
84 map<string, int>::iterator childPointer;
89 tree[0].accessions.push_back(seqName);
90 string taxon;// = getNextTaxon(seqTaxonomy);
92 while(seqTaxonomy != ""){
96 if (m->control_pressed) { return 0; }
98 //somehow the parent is getting one too many accnos
99 //use print to reassign the taxa id
100 taxon = getNextTaxon(seqTaxonomy);
102 childPointer = tree[currentNode].children.find(taxon);
104 if(childPointer != tree[currentNode].children.end()){ //if the node already exists, move on
105 currentNode = childPointer->second;
106 tree[currentNode].accessions.push_back(seqName);
107 name2Taxonomy[seqName] = currentNode;
109 else{ //otherwise, create it
110 tree.push_back(TaxNode(taxon));
112 tree[currentNode].children[taxon] = numNodes-1;
113 tree[numNodes-1].parent = currentNode;
115 // int numChildren = tree[currentNode].children.size();
116 // string heirarchyID = tree[currentNode].heirarchyID;
117 // tree[currentNode].accessions.push_back(seqName);
119 currentNode = tree[currentNode].children[taxon];
120 tree[currentNode].accessions.push_back(seqName);
121 name2Taxonomy[seqName] = currentNode;
122 // tree[currentNode].level = level;
123 // tree[currentNode].childNumber = numChildren;
124 // tree[currentNode].heirarchyID = heirarchyID + '.' + toString(tree[currentNode].childNumber);
127 if (seqTaxonomy == "") { uniqueTaxonomies[currentNode] = currentNode; }
131 catch(exception& e) {
132 m->errorOut(e, "PhyloTree", "addSeqToTree");
136 /**************************************************************************************************/
137 vector<int> PhyloTree::getGenusNodes() {
140 //generate genusIndexes
141 map<int, int>::iterator it2;
142 for (it2=uniqueTaxonomies.begin(); it2!=uniqueTaxonomies.end(); it2++) { genusIndex.push_back(it2->first); }
146 catch(exception& e) {
147 m->errorOut(e, "PhyloTree", "getGenusNodes");
151 /**************************************************************************************************/
153 void PhyloTree::assignHeirarchyIDs(int index){
155 map<string,int>::iterator it;
158 for(it=tree[index].children.begin();it!=tree[index].children.end();it++){
159 tree[it->second].heirarchyID = tree[index].heirarchyID + '.' + toString(counter);
161 tree[it->second].level = tree[index].level + 1;
163 //save maxLevel for binning the unclassified seqs
164 if (tree[it->second].level > maxLevel) { maxLevel = tree[it->second].level; }
166 assignHeirarchyIDs(it->second);
169 catch(exception& e) {
170 m->errorOut(e, "PhyloTree", "assignHeirarchyIDs");
174 /**************************************************************************************************/
175 void PhyloTree::binUnclassified(){
177 map<string, int>::iterator itBin;
178 map<string, int>::iterator childPointer;
180 //go through the seqs and if a sequence finest taxon is not the same level as the most finely defined taxon then classify it as unclassified where necessary
181 for (itBin = name2Taxonomy.begin(); itBin != name2Taxonomy.end(); itBin++) {
183 int level = tree[itBin->second].level;
184 int currentNode = itBin->second;
186 //this sequence is unclassified at some levels
187 while(level != maxLevel){
191 string taxon = "unclassified";
193 //does the parent have a child names 'unclassified'?
194 childPointer = tree[currentNode].children.find(taxon);
196 if(childPointer != tree[currentNode].children.end()){ //if the node already exists, move on
197 currentNode = childPointer->second; //currentNode becomes 'unclassified'
198 tree[currentNode].accessions.push_back(itBin->first); //add this seq
199 name2Taxonomy[itBin->first] = currentNode;
201 else{ //otherwise, create it
202 tree.push_back(TaxNode(taxon));
204 tree[currentNode].children[taxon] = numNodes-1;
205 tree[numNodes-1].parent = currentNode;
207 currentNode = tree[currentNode].children[taxon];
208 tree[currentNode].accessions.push_back(itBin->first);
209 name2Taxonomy[itBin->first] = currentNode;
212 if (level == maxLevel) { uniqueTaxonomies[currentNode] = currentNode; }
216 //clear HeirarchyIDs and reset them
217 for (int i = 1; i < tree.size(); i++) {
218 tree[i].heirarchyID = "";
220 assignHeirarchyIDs(0);
223 catch(exception& e) {
224 m->errorOut(e, "PhyloTree", "binUnclassified");
228 /**************************************************************************************************/
229 string PhyloTree::getFullTaxonomy(string seqName) {
233 int currentNode = name2Taxonomy[seqName];
235 while (tree[currentNode].parent != -1) {
236 tax = tree[currentNode].name + ";" + tax;
237 currentNode = tree[currentNode].parent;
242 catch(exception& e) {
243 m->errorOut(e, "PhyloTree", "getFullTaxonomy");
247 /**************************************************************************************************/
249 void PhyloTree::print(ofstream& out){
251 out << tree[0].level << '\t'<< tree[0].heirarchyID << '\t' << tree[0].name << '\t' << tree[0].children.size() << '\t' << tree[0].accessions.size() << endl;
254 catch(exception& e) {
255 m->errorOut(e, "PhyloTree", "print");
260 /**************************************************************************************************/
262 void PhyloTree::print(int i, ofstream& out){
264 map<string,int>::iterator it;
265 for(it=tree[i].children.begin();it!=tree[i].children.end();it++){
266 out <<tree[it->second].level << '\t' << tree[it->second].heirarchyID << '\t' << tree[it->second].name << '\t' << tree[it->second].children.size() << '\t' << tree[it->second].accessions.size() << endl;
267 print(it->second, out);
272 catch(exception& e) {
273 m->errorOut(e, "PhyloTree", "print");
278 /**************************************************************************************************/