From: westcott Date: Wed, 5 May 2010 14:10:34 +0000 (+0000) Subject: filled out reference taxonomy with "unclassified" so that if a cutoff is used the... X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=62cdb3eb996664abdb8fee21bf4a329cd0694867 filled out reference taxonomy with "unclassified" so that if a cutoff is used the summary file still adds up --- diff --git a/phylotree.cpp b/phylotree.cpp index 855eaf9..399a4bd 100644 --- a/phylotree.cpp +++ b/phylotree.cpp @@ -361,18 +361,31 @@ void PhyloTree::binUnclassified(string file){ map::iterator childPointer; vector copy = tree; - int copyNodes = numNodes; + + //fill out tree + fillOutTree(0, copy); + + //get leaf nodes that may need externsion + for (int i = 0; i < copy.size(); i++) { + + if (copy[i].children.size() == 0) { + leafNodes[i] = i; + } + } + + int copyNodes = copy.size(); //go through the seqs and if a sequence finest taxon is not the same level as the most finely defined taxon then classify it as unclassified where necessary - for (itBin = name2Taxonomy.begin(); itBin != name2Taxonomy.end(); itBin++) { + map::iterator itLeaf; + for (itLeaf = leafNodes.begin(); itLeaf != leafNodes.end(); itLeaf++) { if (m->control_pressed) { out.close(); break; } - int level = copy[itBin->second].level; - int currentNode = itBin->second; + int level = copy[itLeaf->second].level; + int currentNode = itLeaf->second; //this sequence is unclassified at some levels - while(level != maxLevel){ + while(level <= maxLevel){ level++; @@ -383,7 +396,6 @@ void PhyloTree::binUnclassified(string file){ if(childPointer != copy[currentNode].children.end()){ //if the node already exists, move on currentNode = childPointer->second; //currentNode becomes 'unclassified' - copy[currentNode].accessions.push_back(itBin->first); //add this seq } else{ //otherwise, create it copy.push_back(TaxNode(taxon)); @@ -393,7 +405,6 @@ void PhyloTree::binUnclassified(string file){ copy[copyNodes-1].level = copy[currentNode].level + 1; currentNode = copy[currentNode].children[taxon]; - copy[currentNode].accessions.push_back(itBin->first); } } } @@ -410,6 +421,32 @@ void PhyloTree::binUnclassified(string file){ } } /**************************************************************************************************/ +void PhyloTree::fillOutTree(int index, vector& copy) { + try { + map::iterator it; + + it = copy[index].children.find("unclassified"); + if (it == copy[index].children.end()) { //no unclassified at this level + string taxon = "unclassified"; + copy.push_back(TaxNode(taxon)); + copy[index].children[taxon] = copy.size()-1; + copy[copy.size()-1].parent = index; + copy[copy.size()-1].level = copy[index].level + 1; + } + + if (tree[index].level <= maxLevel) { + for(it=tree[index].children.begin();it!=tree[index].children.end();it++){ //check your children + fillOutTree(it->second, copy); + } + } + + } + catch(exception& e) { + m->errorOut(e, "PhyloTree", "fillOutTree"); + exit(1); + } +} +/**************************************************************************************************/ string PhyloTree::getFullTaxonomy(string seqName) { try { string tax = ""; diff --git a/phylotree.h b/phylotree.h index a57cf25..2f37450 100644 --- a/phylotree.h +++ b/phylotree.h @@ -51,7 +51,8 @@ public: private: string getNextTaxon(string&); - void print(ofstream&, vector&); + void print(ofstream&, vector&); //used to create static reference taxonomy file + void fillOutTree(int, vector&); //used to create static reference taxonomy file void binUnclassified(string); vector tree; @@ -59,6 +60,7 @@ private: vector totals; //holds the numSeqs at each genus level taxonomy map name2Taxonomy; //maps name to index in tree map uniqueTaxonomies; //map of unique taxonomies + map leafNodes; //used to create static reference taxonomy file //void print(int, ofstream&); int numNodes; int numSeqs;