]> git.donarmstrong.com Git - mothur.git/blobdiff - phylotree.cpp
fixes while testing 1.33.0
[mothur.git] / phylotree.cpp
index 4ed3d8c1c02e1c5a1ee93b9dd6143309f5606f5c..2e3136e1f42e1a93d15fcab921b2e612f55f3997 100644 (file)
@@ -75,7 +75,7 @@ PhyloTree::PhyloTree(ifstream& in, string filename){
                        for (int i = 0; i < numGenus; i++) {
                                iss >> gnode >> gsize; m->gobble(iss);
                                
-                               uniqueTaxonomies[gnode] = gnode;
+                               uniqueTaxonomies.insert(gnode);
                                totals.push_back(gsize);
                        }
                        
@@ -102,7 +102,7 @@ PhyloTree::PhyloTree(ifstream& in, string filename){
                        for (int i = 0; i < numGenus; i++) {
                                in >> gnode >> gsize; m->gobble(in);
                                
-                               uniqueTaxonomies[gnode] = gnode;
+                               uniqueTaxonomies.insert(gnode);
                                totals.push_back(gsize);
                        }
                        
@@ -178,16 +178,13 @@ PhyloTree::PhyloTree(string tfile){
                        MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
                
                #else
-                       ifstream in;
-                       m->openInputFile(tfile, in);
-                       
-                       //read in users taxonomy file and add sequences to tree
-                       while(!in.eof()){
-                               in >> name >> tax; m->gobble(in);
-                       
-                               addSeqToTree(name, tax);
-                       }
-                       in.close();
+            map<string, string> temp;
+            m->readTax(tfile, temp);
+        
+            for (map<string, string>::iterator itTemp = temp.begin(); itTemp != temp.end();) {
+                addSeqToTree(itTemp->first, itTemp->second);
+                temp.erase(itTemp++);
+            }
                #endif
        
                assignHeirarchyIDs(0);
@@ -236,6 +233,48 @@ string PhyloTree::getNextTaxon(string& heirarchy, string seqname){
                exit(1);
        }
 }
+/**************************************************************************************************/
+
+vector<string> PhyloTree::getSeqs(string seqTaxonomy){
+       try {
+        string taxCopy = seqTaxonomy;
+        vector<string> names;
+        map<string, int>::iterator childPointer;
+               
+               int currentNode = 0;
+
+        m->removeConfidences(seqTaxonomy);
+        
+        string taxon;
+        while(seqTaxonomy != ""){
+                       
+                       if (m->control_pressed) { return names; }
+                       
+                       taxon = getNextTaxon(seqTaxonomy, "");
+            
+            if (m->debug) { m->mothurOut(taxon +'\n'); }
+                       
+                       if (taxon == "") {  m->mothurOut(taxCopy + " has an error in the taxonomy.  This may be due to a ;;"); m->mothurOutEndLine(); break;  }
+                       
+                       childPointer = tree[currentNode].children.find(taxon);
+                       
+                       if(childPointer != tree[currentNode].children.end()){   //if the node already exists, move on
+                               currentNode = childPointer->second;
+                       }
+                       else{                                                                                   //otherwise, error this taxonomy is not in tree
+                               m->mothurOut("[ERROR]: " + taxCopy + " is not in taxonomy tree, please correct."); m->mothurOutEndLine(); m->control_pressed = true; return names;
+                       }
+            
+                       if (seqTaxonomy == "") {   names = tree[currentNode].accessions;        }
+               }
+        
+        return names;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "PhyloTree", "getSeqs");
+               exit(1);
+       }
+}
 
 /**************************************************************************************************/
 
@@ -262,8 +301,10 @@ int PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){
                        //somehow the parent is getting one too many accnos
                        //use print to reassign the taxa id
                        taxon = getNextTaxon(seqTaxonomy, seqName);
+            
+            if (m->debug) { m->mothurOut(seqName +'\t' + taxon +'\n'); }
                        
-                       if (taxon == "") {  m->mothurOut(seqName + " has an error in the taxonomy.  This may be due to a ;;"); m->mothurOutEndLine(); if (currentNode != 0) {  uniqueTaxonomies[currentNode] = currentNode; } break;  }
+                       if (taxon == "") {  m->mothurOut(seqName + " has an error in the taxonomy.  This may be due to a ;;"); m->mothurOutEndLine(); if (currentNode != 0) {  uniqueTaxonomies.insert(currentNode); } break;  }
                        
                        childPointer = tree[currentNode].children.find(taxon);
                        
@@ -283,7 +324,7 @@ int PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){
                                name2Taxonomy[seqName] = currentNode;
                        }
        
-                       if (seqTaxonomy == "") {   uniqueTaxonomies[currentNode] = currentNode; }
+                       if (seqTaxonomy == "") {   uniqueTaxonomies.insert(currentNode);        }
                }
                
                return 0;
@@ -298,9 +339,16 @@ vector<int> PhyloTree::getGenusNodes()     {
        try {
                genusIndex.clear();
                //generate genusIndexes
-               map<int, int>::iterator it2;
-               for (it2=uniqueTaxonomies.begin(); it2!=uniqueTaxonomies.end(); it2++) {  genusIndex.push_back(it2->first);     }
-               
+               set<int>::iterator it2;
+        map<int, int> temp;
+               for (it2=uniqueTaxonomies.begin(); it2!=uniqueTaxonomies.end(); it2++) {  genusIndex.push_back(*it2);   temp[*it2] = genusIndex.size()-1; }
+               
+        for (map<string, int>::iterator itName = name2Taxonomy.begin(); itName != name2Taxonomy.end(); itName++) {
+            map<int, int>::iterator itTemp = temp.find(itName->second);
+            if (itTemp != temp.end()) { name2GenusNodeIndex[itName->first] = itTemp->second; }
+            else {  m->mothurOut("[ERROR]: trouble making name2GenusNodeIndex, aborting.\n"); m->control_pressed = true; }
+        }
+        
                return genusIndex;
        }
        catch(exception& e) {
@@ -337,6 +385,9 @@ void PhyloTree::assignHeirarchyIDs(int index){
                int counter = 1;
                
                for(it=tree[index].children.begin();it!=tree[index].children.end();it++){
+            
+            if (m->debug) { m->mothurOut(toString(index) +'\t' + tree[it->second].name +'\n'); }
+                
                        tree[it->second].heirarchyID = tree[index].heirarchyID + '.' + toString(counter);
                        counter++;
                        tree[it->second].level = tree[index].level + 1;
@@ -395,6 +446,8 @@ void PhyloTree::binUnclassified(string file){
                        }
                }
                
+        if (m->debug) { m->mothurOut("maxLevel = " + toString(maxLevel) +'\n'); }
+        
                int copyNodes = copy.size();
        
                //go through the seqs and if a sequence finest taxon is not the same level as the most finely defined taxon then classify it as unclassified where necessary
@@ -405,11 +458,14 @@ void PhyloTree::binUnclassified(string file){
                        
                        int level = copy[itLeaf->second].level;
                        int currentNode = itLeaf->second;
+            
+            if (m->debug) { m->mothurOut(copy[currentNode].name +'\n'); }
                        
                        //this sequence is unclassified at some levels
                        while(level < maxLevel){
                
                                level++;
+                if (m->debug) { m->mothurOut("level = " + toString(level) +'\n'); }
                        
                                string taxon = "unclassified";  
                                
@@ -544,8 +600,8 @@ void PhyloTree::printTreeNodes(string treefilename) {
                        
                        //print genus nodes
                        outTree << endl << uniqueTaxonomies.size() << endl;
-                       map<int, int>::iterator it2;
-                       for (it2=uniqueTaxonomies.begin(); it2!=uniqueTaxonomies.end(); it2++) {  outTree << it2->first << '\t' << tree[it2->first].accessions.size() << endl;  }
+                       set<int>::iterator it2;
+                       for (it2=uniqueTaxonomies.begin(); it2!=uniqueTaxonomies.end(); it2++) {  outTree << *it2 << '\t' << tree[*it2].accessions.size() << endl;      }
                        outTree << endl;
                        
                        outTree.close();
@@ -597,12 +653,12 @@ string PhyloTree::getName(int i ){
        }
 }
 /**************************************************************************************************/
-int PhyloTree::getIndex(string seqName){
+int PhyloTree::getGenusIndex(string seqName){
        try {
-               map<string, int>::iterator itFind = name2Taxonomy.find(seqName);
+               map<string, int>::iterator itFind = name2GenusNodeIndex.find(seqName);
        
-               if (itFind != name2Taxonomy.end()) {  return name2Taxonomy[seqName];  }
-               else { m->mothurOut("Cannot find " + seqName + ". Mismatch with taxonomy and template files. Cannot continue."); m->mothurOutEndLine(); exit(1);}
+               if (itFind != name2GenusNodeIndex.end()) {  return itFind->second;  }
+               else { m->mothurOut("Cannot find " + seqName + ". Could be a mismatch with taxonomy and template files. Cannot continue."); m->mothurOutEndLine(); exit(1);}
        }
        catch(exception& e) {
                m->errorOut(e, "PhyloTree", "get");