5 * Created by Pat Schloss on 6/17/09.
6 * Copyright 2009 Patrick D. Schloss. All rights reserved.
10 #include "phylotree.h"
12 /**************************************************************************************************/
14 PhyloTree::PhyloTree(){
16 m = MothurOut::getInstance();
19 tree.push_back(TaxNode("Root"));
20 tree[0].heirarchyID = "0";
25 m->errorOut(e, "PhyloTree", "PhyloTree");
29 /**************************************************************************************************/
31 PhyloTree::PhyloTree(ifstream& in){
33 m = MothurOut::getInstance();
36 in >> numNodes; gobble(in);
38 tree.resize(numNodes);
40 for (int i = 0; i < tree.size(); i++) {
41 in >> tree[i].name >> tree[i].level >> tree[i].parent; gobble(in);
47 in >> numGenus; gobble(in);
51 for (int i = 0; i < numGenus; i++) {
52 in >> gnode >> gsize; gobble(in);
54 uniqueTaxonomies[gnode] = gnode;
55 totals.push_back(gsize);
62 m->errorOut(e, "PhyloTree", "PhyloTree");
66 /**************************************************************************************************/
68 PhyloTree::PhyloTree(string tfile){
70 m = MothurOut::getInstance();
73 tree.push_back(TaxNode("Root"));
74 tree[0].heirarchyID = "0";
79 openInputFile(tfile, in);
81 //read in users taxonomy file and add sequences to tree
84 in >> name >> tax; gobble(in);
86 addSeqToTree(name, tax);
90 assignHeirarchyIDs(0);
93 m->errorOut(e, "PhyloTree", "PhyloTree");
98 /**************************************************************************************************/
100 string PhyloTree::getNextTaxon(string& heirarchy){
102 string currentLevel = "";
104 int pos = heirarchy.find_first_of(';');
105 currentLevel=heirarchy.substr(0,pos);
106 if (pos != (heirarchy.length()-1)) { heirarchy=heirarchy.substr(pos+1); }
107 else { heirarchy = ""; }
111 catch(exception& e) {
112 m->errorOut(e, "PhyloTree", "getNextTaxon");
117 /**************************************************************************************************/
119 int PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){
123 map<string, int>::iterator childPointer;
128 tree[0].accessions.push_back(seqName);
129 string taxon;// = getNextTaxon(seqTaxonomy);
131 while(seqTaxonomy != ""){
135 if (m->control_pressed) { return 0; }
137 //somehow the parent is getting one too many accnos
138 //use print to reassign the taxa id
139 taxon = getNextTaxon(seqTaxonomy);
141 childPointer = tree[currentNode].children.find(taxon);
143 if(childPointer != tree[currentNode].children.end()){ //if the node already exists, move on
144 currentNode = childPointer->second;
145 tree[currentNode].accessions.push_back(seqName);
146 name2Taxonomy[seqName] = currentNode;
148 else{ //otherwise, create it
149 tree.push_back(TaxNode(taxon));
151 tree[currentNode].children[taxon] = numNodes-1;
152 tree[numNodes-1].parent = currentNode;
154 // int numChildren = tree[currentNode].children.size();
155 // string heirarchyID = tree[currentNode].heirarchyID;
156 // tree[currentNode].accessions.push_back(seqName);
158 currentNode = tree[currentNode].children[taxon];
159 tree[currentNode].accessions.push_back(seqName);
160 name2Taxonomy[seqName] = currentNode;
161 // tree[currentNode].level = level;
162 // tree[currentNode].childNumber = numChildren;
163 // tree[currentNode].heirarchyID = heirarchyID + '.' + toString(tree[currentNode].childNumber);
166 if (seqTaxonomy == "") { uniqueTaxonomies[currentNode] = currentNode; }
170 catch(exception& e) {
171 m->errorOut(e, "PhyloTree", "addSeqToTree");
175 /**************************************************************************************************/
176 vector<int> PhyloTree::getGenusNodes() {
179 //generate genusIndexes
180 map<int, int>::iterator it2;
181 for (it2=uniqueTaxonomies.begin(); it2!=uniqueTaxonomies.end(); it2++) { genusIndex.push_back(it2->first); }
185 catch(exception& e) {
186 m->errorOut(e, "PhyloTree", "getGenusNodes");
190 /**************************************************************************************************/
191 vector<int> PhyloTree::getGenusTotals() {
196 //reset counts because we are on a new word
197 for (int j = 0; j < genusIndex.size(); j++) {
198 totals.push_back(tree[genusIndex[j]].accessions.size());
206 catch(exception& e) {
207 m->errorOut(e, "PhyloTree", "getGenusNodes");
211 /**************************************************************************************************/
213 void PhyloTree::assignHeirarchyIDs(int index){
215 map<string,int>::iterator it;
218 for(it=tree[index].children.begin();it!=tree[index].children.end();it++){
219 tree[it->second].heirarchyID = tree[index].heirarchyID + '.' + toString(counter);
221 tree[it->second].level = tree[index].level + 1;
223 //save maxLevel for binning the unclassified seqs
224 if (tree[it->second].level > maxLevel) { maxLevel = tree[it->second].level; }
226 assignHeirarchyIDs(it->second);
229 catch(exception& e) {
230 m->errorOut(e, "PhyloTree", "assignHeirarchyIDs");
234 /**************************************************************************************************/
235 void PhyloTree::binUnclassified(){
237 map<string, int>::iterator itBin;
238 map<string, int>::iterator childPointer;
240 //go through the seqs and if a sequence finest taxon is not the same level as the most finely defined taxon then classify it as unclassified where necessary
241 for (itBin = name2Taxonomy.begin(); itBin != name2Taxonomy.end(); itBin++) {
243 int level = tree[itBin->second].level;
244 int currentNode = itBin->second;
246 //this sequence is unclassified at some levels
247 while(level != maxLevel){
251 string taxon = "unclassified";
253 //does the parent have a child names 'unclassified'?
254 childPointer = tree[currentNode].children.find(taxon);
256 if(childPointer != tree[currentNode].children.end()){ //if the node already exists, move on
257 currentNode = childPointer->second; //currentNode becomes 'unclassified'
258 tree[currentNode].accessions.push_back(itBin->first); //add this seq
259 name2Taxonomy[itBin->first] = currentNode;
261 else{ //otherwise, create it
262 tree.push_back(TaxNode(taxon));
264 tree[currentNode].children[taxon] = numNodes-1;
265 tree[numNodes-1].parent = currentNode;
267 currentNode = tree[currentNode].children[taxon];
268 tree[currentNode].accessions.push_back(itBin->first);
269 name2Taxonomy[itBin->first] = currentNode;
272 if (level == maxLevel) { uniqueTaxonomies[currentNode] = currentNode; }
276 //clear HeirarchyIDs and reset them
277 for (int i = 1; i < tree.size(); i++) {
278 tree[i].heirarchyID = "";
280 assignHeirarchyIDs(0);
283 catch(exception& e) {
284 m->errorOut(e, "PhyloTree", "binUnclassified");
288 /**************************************************************************************************/
289 string PhyloTree::getFullTaxonomy(string seqName) {
293 int currentNode = name2Taxonomy[seqName];
295 while (tree[currentNode].parent != -1) {
296 tax = tree[currentNode].name + ";" + tax;
297 currentNode = tree[currentNode].parent;
302 catch(exception& e) {
303 m->errorOut(e, "PhyloTree", "getFullTaxonomy");
307 /**************************************************************************************************/
309 void PhyloTree::print(ofstream& out){
311 out << tree[0].level << '\t'<< tree[0].heirarchyID << '\t' << tree[0].name << '\t' << tree[0].children.size() << '\t' << tree[0].accessions.size() << endl;
314 catch(exception& e) {
315 m->errorOut(e, "PhyloTree", "print");
320 /**************************************************************************************************/
322 void PhyloTree::print(int i, ofstream& out){
324 map<string,int>::iterator it;
325 for(it=tree[i].children.begin();it!=tree[i].children.end();it++){
326 out <<tree[it->second].level << '\t' << tree[it->second].heirarchyID << '\t' << tree[it->second].name << '\t' << tree[it->second].children.size() << '\t' << tree[it->second].accessions.size() << endl;
327 print(it->second, out);
330 catch(exception& e) {
331 m->errorOut(e, "PhyloTree", "print");
335 /**************************************************************************************************/
336 void PhyloTree::printTreeNodes(string treefilename) {
339 openOutputFile(treefilename, outTree);
342 outTree << tree.size() << endl;
343 for (int i = 0; i < tree.size(); i++) {
344 outTree << tree[i].name << '\t' << tree[i].level << '\t' << tree[i].parent << endl;
348 outTree << endl << uniqueTaxonomies.size() << endl;
349 map<int, int>::iterator it2;
350 for (it2=uniqueTaxonomies.begin(); it2!=uniqueTaxonomies.end(); it2++) { outTree << it2->first << '\t' << tree[it2->first].accessions.size() << endl; }
357 catch(exception& e) {
358 m->errorOut(e, "PhyloTree", "printTreeNodes");
362 /**************************************************************************************************/