map<string, int>::iterator childPointer;
vector<TaxNode> copy = tree;
- int copyNodes = numNodes;
+
+ //fill out tree
+ fillOutTree(0, copy);
+
+ //get leaf nodes that may need externsion
+ for (int i = 0; i < copy.size(); i++) {
+
+ if (copy[i].children.size() == 0) {
+ leafNodes[i] = i;
+ }
+ }
+
+ int copyNodes = copy.size();
//go through the seqs and if a sequence finest taxon is not the same level as the most finely defined taxon then classify it as unclassified where necessary
- for (itBin = name2Taxonomy.begin(); itBin != name2Taxonomy.end(); itBin++) {
+ map<int, int>::iterator itLeaf;
+ for (itLeaf = leafNodes.begin(); itLeaf != leafNodes.end(); itLeaf++) {
if (m->control_pressed) { out.close(); break; }
- int level = copy[itBin->second].level;
- int currentNode = itBin->second;
+ int level = copy[itLeaf->second].level;
+ int currentNode = itLeaf->second;
//this sequence is unclassified at some levels
- while(level != maxLevel){
+ while(level <= maxLevel){
level++;
if(childPointer != copy[currentNode].children.end()){ //if the node already exists, move on
currentNode = childPointer->second; //currentNode becomes 'unclassified'
- copy[currentNode].accessions.push_back(itBin->first); //add this seq
}
else{ //otherwise, create it
copy.push_back(TaxNode(taxon));
copy[copyNodes-1].level = copy[currentNode].level + 1;
currentNode = copy[currentNode].children[taxon];
- copy[currentNode].accessions.push_back(itBin->first);
}
}
}
}
}
/**************************************************************************************************/
+void PhyloTree::fillOutTree(int index, vector<TaxNode>& copy) {
+ try {
+ map<string,int>::iterator it;
+
+ it = copy[index].children.find("unclassified");
+ if (it == copy[index].children.end()) { //no unclassified at this level
+ string taxon = "unclassified";
+ copy.push_back(TaxNode(taxon));
+ copy[index].children[taxon] = copy.size()-1;
+ copy[copy.size()-1].parent = index;
+ copy[copy.size()-1].level = copy[index].level + 1;
+ }
+
+ if (tree[index].level <= maxLevel) {
+ for(it=tree[index].children.begin();it!=tree[index].children.end();it++){ //check your children
+ fillOutTree(it->second, copy);
+ }
+ }
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PhyloTree", "fillOutTree");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
string PhyloTree::getFullTaxonomy(string seqName) {
try {
string tax = "";
private:
string getNextTaxon(string&);
- void print(ofstream&, vector<TaxNode>&);
+ void print(ofstream&, vector<TaxNode>&); //used to create static reference taxonomy file
+ void fillOutTree(int, vector<TaxNode>&); //used to create static reference taxonomy file
void binUnclassified(string);
vector<TaxNode> tree;
vector<int> totals; //holds the numSeqs at each genus level taxonomy
map<string, int> name2Taxonomy; //maps name to index in tree
map<int, int> uniqueTaxonomies; //map of unique taxonomies
+ map<int, int> leafNodes; //used to create static reference taxonomy file
//void print(int, ofstream&);
int numNodes;
int numSeqs;