]> git.donarmstrong.com Git - mothur.git/commitdiff
filled out reference taxonomy with "unclassified" so that if a cutoff is used the...
authorwestcott <westcott>
Wed, 5 May 2010 14:10:34 +0000 (14:10 +0000)
committerwestcott <westcott>
Wed, 5 May 2010 14:10:34 +0000 (14:10 +0000)
phylotree.cpp
phylotree.h

index 855eaf968b796424700f957c2ac78fa32db6d9f6..399a4bd39007f4c94802cdcda45099699ed5010d 100644 (file)
@@ -361,18 +361,31 @@ void PhyloTree::binUnclassified(string file){
                map<string, int>::iterator childPointer;
                
                vector<TaxNode> copy = tree;
-               int copyNodes = numNodes;
+                               
+               //fill out tree
+               fillOutTree(0, copy);
+               
+               //get leaf nodes that may need externsion
+               for (int i = 0; i < copy.size(); i++) {  
+
+                       if (copy[i].children.size() == 0) {
+                               leafNodes[i] = i;
+                       }
+               }
+               
+               int copyNodes = copy.size();
                
                //go through the seqs and if a sequence finest taxon is not the same level as the most finely defined taxon then classify it as unclassified where necessary
-               for (itBin = name2Taxonomy.begin(); itBin != name2Taxonomy.end(); itBin++) {
+               map<int, int>::iterator itLeaf;
+               for (itLeaf = leafNodes.begin(); itLeaf != leafNodes.end(); itLeaf++) {
                        
                        if (m->control_pressed) {  out.close(); break;  }
                        
-                       int level = copy[itBin->second].level;
-                       int currentNode = itBin->second;
+                       int level = copy[itLeaf->second].level;
+                       int currentNode = itLeaf->second;
                        
                        //this sequence is unclassified at some levels
-                       while(level != maxLevel){
+                       while(level <= maxLevel){
                
                                level++;
                        
@@ -383,7 +396,6 @@ void PhyloTree::binUnclassified(string file){
                                
                                if(childPointer != copy[currentNode].children.end()){   //if the node already exists, move on
                                        currentNode = childPointer->second; //currentNode becomes 'unclassified'
-                                       copy[currentNode].accessions.push_back(itBin->first);  //add this seq
                                }
                                else{                                                                                   //otherwise, create it
                                        copy.push_back(TaxNode(taxon));
@@ -393,7 +405,6 @@ void PhyloTree::binUnclassified(string file){
                                        copy[copyNodes-1].level = copy[currentNode].level + 1;
                                                                        
                                        currentNode = copy[currentNode].children[taxon];
-                                       copy[currentNode].accessions.push_back(itBin->first);
                                }
                        }
                }
@@ -410,6 +421,32 @@ void PhyloTree::binUnclassified(string file){
        }
 }
 /**************************************************************************************************/
+void PhyloTree::fillOutTree(int index, vector<TaxNode>& copy) {
+       try {
+               map<string,int>::iterator it;
+               
+               it = copy[index].children.find("unclassified");
+               if (it == copy[index].children.end()) { //no unclassified at this level
+                       string taxon = "unclassified";
+                       copy.push_back(TaxNode(taxon));
+                       copy[index].children[taxon] = copy.size()-1;
+                       copy[copy.size()-1].parent = index;
+                       copy[copy.size()-1].level = copy[index].level + 1;
+               }
+               
+               if (tree[index].level <= maxLevel) {
+                       for(it=tree[index].children.begin();it!=tree[index].children.end();it++){ //check your children
+                               fillOutTree(it->second, copy);
+                       }
+               }
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PhyloTree", "fillOutTree");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
 string PhyloTree::getFullTaxonomy(string seqName) {
        try {
                string tax = "";
index a57cf256ecf0e0eded1182681f7ab91601e0a1a5..2f3745074e170210bfc0b8d6b000361b4a410687 100644 (file)
@@ -51,7 +51,8 @@ public:
        
 private:
        string getNextTaxon(string&);
-       void print(ofstream&, vector<TaxNode>&);
+       void print(ofstream&, vector<TaxNode>&); //used to create static reference taxonomy file
+       void fillOutTree(int, vector<TaxNode>&); //used to create static reference taxonomy file
        void binUnclassified(string);
        
        vector<TaxNode> tree;
@@ -59,6 +60,7 @@ private:
        vector<int> totals; //holds the numSeqs at each genus level taxonomy
        map<string, int> name2Taxonomy;  //maps name to index in tree
        map<int, int> uniqueTaxonomies;  //map of unique taxonomies
+       map<int, int> leafNodes; //used to create static reference taxonomy file
        //void print(int, ofstream&);
        int numNodes;
        int numSeqs;