X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=phylotree.cpp;h=1001542ea36548742a2c92b809875fd4e7ec4f89;hb=74844a60d80c6dd06e3fb02ee9b928424f9019b0;hp=2a441922cd28ccf1850a6fedbd954f16ebc3ecf3;hpb=7a2154809d332281cf4006943a9bd94b8208c837;p=mothur.git diff --git a/phylotree.cpp b/phylotree.cpp index 2a44192..1001542 100644 --- a/phylotree.cpp +++ b/phylotree.cpp @@ -13,13 +13,15 @@ PhyloTree::PhyloTree(){ try { + m = MothurOut::getInstance(); numNodes = 1; numSeqs = 0; tree.push_back(TaxNode("Root")); tree[0].heirarchyID = "0"; + maxLevel = 0; } catch(exception& e) { - errorOut(e, "PhyloTree", "PhyloTree"); + m->errorOut(e, "PhyloTree", "PhyloTree"); exit(1); } } @@ -31,6 +33,7 @@ PhyloTree::PhyloTree(string tfile){ numSeqs = 0; tree.push_back(TaxNode("Root")); tree[0].heirarchyID = "0"; + maxLevel = 0; ifstream in; openInputFile(tfile, in); @@ -47,7 +50,7 @@ PhyloTree::PhyloTree(string tfile){ assignHeirarchyIDs(0); } catch(exception& e) { - errorOut(e, "PhyloTree", "PhyloTree"); + m->errorOut(e, "PhyloTree", "PhyloTree"); exit(1); } } @@ -64,7 +67,7 @@ string PhyloTree::getNextTaxon(string& heirarchy){ return currentLevel; } catch(exception& e) { - errorOut(e, "PhyloTree", "getNextTaxon"); + m->errorOut(e, "PhyloTree", "getNextTaxon"); exit(1); } } @@ -121,7 +124,7 @@ void PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){ } catch(exception& e) { - errorOut(e, "PhyloTree", "addSeqToTree"); + m->errorOut(e, "PhyloTree", "addSeqToTree"); exit(1); } } @@ -136,7 +139,7 @@ vector PhyloTree::getGenusNodes() { return genusIndex; } catch(exception& e) { - errorOut(e, "PhyloTree", "getGenusNodes"); + m->errorOut(e, "PhyloTree", "getGenusNodes"); exit(1); } } @@ -151,15 +154,91 @@ void PhyloTree::assignHeirarchyIDs(int index){ tree[it->second].heirarchyID = tree[index].heirarchyID + '.' + toString(counter); counter++; tree[it->second].level = tree[index].level + 1; + + //save maxLevel for binning the unclassified seqs + if (tree[it->second].level > maxLevel) { maxLevel = tree[it->second].level; } + assignHeirarchyIDs(it->second); } } catch(exception& e) { - errorOut(e, "PhyloTree", "assignHeirarchyIDs"); + m->errorOut(e, "PhyloTree", "assignHeirarchyIDs"); + exit(1); + } +} +/**************************************************************************************************/ +void PhyloTree::binUnclassified(){ + try { + map::iterator itBin; + map::iterator childPointer; + + //go through the seqs and if a sequence finest taxon is not the same level as the most finely defined taxon then classify it as unclassified where necessary + for (itBin = name2Taxonomy.begin(); itBin != name2Taxonomy.end(); itBin++) { + + int level = tree[itBin->second].level; + int currentNode = itBin->second; + + //this sequence is unclassified at some levels + while(level != maxLevel){ + + level++; + + string taxon = "unclassified"; + + //does the parent have a child names 'unclassified'? + childPointer = tree[currentNode].children.find(taxon); + + if(childPointer != tree[currentNode].children.end()){ //if the node already exists, move on + currentNode = childPointer->second; //currentNode becomes 'unclassified' + tree[currentNode].accessions.push_back(itBin->first); //add this seq + name2Taxonomy[itBin->first] = currentNode; + } + else{ //otherwise, create it + tree.push_back(TaxNode(taxon)); + numNodes++; + tree[currentNode].children[taxon] = numNodes-1; + tree[numNodes-1].parent = currentNode; + + currentNode = tree[currentNode].children[taxon]; + tree[currentNode].accessions.push_back(itBin->first); + name2Taxonomy[itBin->first] = currentNode; + } + + if (level == maxLevel) { uniqueTaxonomies[currentNode] = currentNode; } + } + } + + //clear HeirarchyIDs and reset them + for (int i = 1; i < tree.size(); i++) { + tree[i].heirarchyID = ""; + } + assignHeirarchyIDs(0); + + } + catch(exception& e) { + m->errorOut(e, "PhyloTree", "binUnclassified"); + exit(1); + } +} +/**************************************************************************************************/ +string PhyloTree::getFullTaxonomy(string seqName) { + try { + string tax = ""; + + int currentNode = name2Taxonomy[seqName]; + + while (tree[currentNode].parent != -1) { + tax = tree[currentNode].name + ";" + tax; + currentNode = tree[currentNode].parent; + } + + return tax; + } + catch(exception& e) { + m->errorOut(e, "PhyloTree", "getFullTaxonomy"); exit(1); } } - /**************************************************************************************************/ void PhyloTree::print(ofstream& out){ @@ -168,7 +247,7 @@ void PhyloTree::print(ofstream& out){ print(0, out); } catch(exception& e) { - errorOut(e, "PhyloTree", "print"); + m->errorOut(e, "PhyloTree", "print"); exit(1); } } @@ -182,9 +261,11 @@ void PhyloTree::print(int i, ofstream& out){ out <second].level << '\t' << tree[it->second].heirarchyID << '\t' << tree[it->second].name << '\t' << tree[it->second].children.size() << '\t' << tree[it->second].accessions.size() << endl; print(it->second, out); } + + } catch(exception& e) { - errorOut(e, "PhyloTree", "print"); + m->errorOut(e, "PhyloTree", "print"); exit(1); } }