]> git.donarmstrong.com Git - mothur.git/blobdiff - phylotree.cpp
sffinfo bug with flow grams right index when clipQualRight=0
[mothur.git] / phylotree.cpp
index e430fb9e85622dcd1d425534857fe37dff600da9..b9bab4ec2934a4121305f87111cf2d238f5ec4d5 100644 (file)
@@ -20,6 +20,7 @@ PhyloTree::PhyloTree(){
                tree[0].heirarchyID = "0";
                maxLevel = 0;
                calcTotals = true;
+               addSeqToTree("unknown", "unknown;");
        }
        catch(exception& e) {
                m->errorOut(e, "PhyloTree", "PhyloTree");
@@ -74,7 +75,7 @@ PhyloTree::PhyloTree(ifstream& in, string filename){
                        for (int i = 0; i < numGenus; i++) {
                                iss >> gnode >> gsize; m->gobble(iss);
                                
-                               uniqueTaxonomies[gnode] = gnode;
+                               uniqueTaxonomies.insert(gnode);
                                totals.push_back(gsize);
                        }
                        
@@ -101,7 +102,7 @@ PhyloTree::PhyloTree(ifstream& in, string filename){
                        for (int i = 0; i < numGenus; i++) {
                                in >> gnode >> gsize; m->gobble(in);
                                
-                               uniqueTaxonomies[gnode] = gnode;
+                               uniqueTaxonomies.insert(gnode);
                                totals.push_back(gsize);
                        }
                        
@@ -127,7 +128,6 @@ PhyloTree::PhyloTree(string tfile){
                maxLevel = 0;
                calcTotals = true;
                string name, tax;
-
                
                #ifdef USE_MPI
                        int pid, num, processors;
@@ -178,20 +178,26 @@ PhyloTree::PhyloTree(string tfile){
                        MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
                
                #else
-                       ifstream in;
-                       m->openInputFile(tfile, in);
-                       
-                       //read in users taxonomy file and add sequences to tree
-                       while(!in.eof()){
-                               in >> name >> tax; m->gobble(in);
-                       
-                               addSeqToTree(name, tax);
-                       }
-                       in.close();
+            map<string, string> temp;
+            m->readTax(tfile, temp);
+        
+            for (map<string, string>::iterator itTemp = temp.begin(); itTemp != temp.end();) {
+                addSeqToTree(itTemp->first, itTemp->second);
+                temp.erase(itTemp++);
+            }
                #endif
        
                assignHeirarchyIDs(0);
-       
+        
+        
+        string unknownTax = "unknown;";
+        //added last taxon until you get desired level
+               for (int i = 1; i < maxLevel; i++) {
+                       unknownTax += "unclassfied;";
+               }
+        
+        addSeqToTree("unknown", unknownTax);
+        
                //create file for summary if needed
                setUp(tfile);
        }
@@ -232,7 +238,6 @@ string PhyloTree::getNextTaxon(string& heirarchy, string seqname){
 
 int PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){
        try {
-                       
                numSeqs++;
                
                map<string, int>::iterator childPointer;
@@ -254,8 +259,10 @@ int PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){
                        //somehow the parent is getting one too many accnos
                        //use print to reassign the taxa id
                        taxon = getNextTaxon(seqTaxonomy, seqName);
+            
+            if (m->debug) { m->mothurOut(seqName +'\t' + taxon +'\n'); }
                        
-                       if (taxon == "") {  m->mothurOut(seqName + " has an error in the taxonomy.  This may be due to a ;;"); m->mothurOutEndLine(); if (currentNode != 0) {  uniqueTaxonomies[currentNode] = currentNode; } break;  }
+                       if (taxon == "") {  m->mothurOut(seqName + " has an error in the taxonomy.  This may be due to a ;;"); m->mothurOutEndLine(); if (currentNode != 0) {  uniqueTaxonomies.insert(currentNode); } break;  }
                        
                        childPointer = tree[currentNode].children.find(taxon);
                        
@@ -275,7 +282,7 @@ int PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){
                                name2Taxonomy[seqName] = currentNode;
                        }
        
-                       if (seqTaxonomy == "") {   uniqueTaxonomies[currentNode] = currentNode; }
+                       if (seqTaxonomy == "") {   uniqueTaxonomies.insert(currentNode);        }
                }
                
                return 0;
@@ -290,9 +297,16 @@ vector<int> PhyloTree::getGenusNodes()     {
        try {
                genusIndex.clear();
                //generate genusIndexes
-               map<int, int>::iterator it2;
-               for (it2=uniqueTaxonomies.begin(); it2!=uniqueTaxonomies.end(); it2++) {  genusIndex.push_back(it2->first);     }
-               
+               set<int>::iterator it2;
+        map<int, int> temp;
+               for (it2=uniqueTaxonomies.begin(); it2!=uniqueTaxonomies.end(); it2++) {  genusIndex.push_back(*it2);   temp[*it2] = genusIndex.size()-1; }
+               
+        for (map<string, int>::iterator itName = name2Taxonomy.begin(); itName != name2Taxonomy.end(); itName++) {
+            map<int, int>::iterator itTemp = temp.find(itName->second);
+            if (itTemp != temp.end()) { name2GenusNodeIndex[itName->first] = itTemp->second; }
+            else {  m->mothurOut("[ERROR]: trouble making name2GenusNodeIndex, aborting.\n"); m->control_pressed = true; }
+        }
+        
                return genusIndex;
        }
        catch(exception& e) {
@@ -329,6 +343,9 @@ void PhyloTree::assignHeirarchyIDs(int index){
                int counter = 1;
                
                for(it=tree[index].children.begin();it!=tree[index].children.end();it++){
+            
+            if (m->debug) { m->mothurOut(toString(index) +'\t' + tree[it->second].name +'\n'); }
+                
                        tree[it->second].heirarchyID = tree[index].heirarchyID + '.' + toString(counter);
                        counter++;
                        tree[it->second].level = tree[index].level + 1;
@@ -375,7 +392,7 @@ void PhyloTree::binUnclassified(string file){
                map<string, int>::iterator childPointer;
                
                vector<TaxNode> copy = tree;
-                       
+               
                //fill out tree
                fillOutTree(0, copy);
        
@@ -387,6 +404,8 @@ void PhyloTree::binUnclassified(string file){
                        }
                }
                
+        if (m->debug) { m->mothurOut("maxLevel = " + toString(maxLevel) +'\n'); }
+        
                int copyNodes = copy.size();
        
                //go through the seqs and if a sequence finest taxon is not the same level as the most finely defined taxon then classify it as unclassified where necessary
@@ -397,11 +416,14 @@ void PhyloTree::binUnclassified(string file){
                        
                        int level = copy[itLeaf->second].level;
                        int currentNode = itLeaf->second;
+            
+            if (m->debug) { m->mothurOut(copy[currentNode].name +'\n'); }
                        
                        //this sequence is unclassified at some levels
                        while(level < maxLevel){
                
                                level++;
+                if (m->debug) { m->mothurOut("level = " + toString(level) +'\n'); }
                        
                                string taxon = "unclassified";  
                                
@@ -484,16 +506,16 @@ string PhyloTree::getFullTaxonomy(string seqName) {
 
 void PhyloTree::print(ofstream& out, vector<TaxNode>& copy){
        try {
-       
+               
                //output mothur version
                out << "#" << m->getVersion() << endl;
                
                out << copy.size() << endl;
                
                out << maxLevel << endl;
-               
+                               
                for (int i = 0; i < copy.size(); i++) {
-       
+                               
                        out << copy[i].level << '\t'<< copy[i].name << '\t' << copy[i].children.size() << '\t';
                        
                        map<string,int>::iterator it;
@@ -536,8 +558,8 @@ void PhyloTree::printTreeNodes(string treefilename) {
                        
                        //print genus nodes
                        outTree << endl << uniqueTaxonomies.size() << endl;
-                       map<int, int>::iterator it2;
-                       for (it2=uniqueTaxonomies.begin(); it2!=uniqueTaxonomies.end(); it2++) {  outTree << it2->first << '\t' << tree[it2->first].accessions.size() << endl;  }
+                       set<int>::iterator it2;
+                       for (it2=uniqueTaxonomies.begin(); it2!=uniqueTaxonomies.end(); it2++) {  outTree << *it2 << '\t' << tree[*it2].accessions.size() << endl;      }
                        outTree << endl;
                        
                        outTree.close();
@@ -589,12 +611,12 @@ string PhyloTree::getName(int i ){
        }
 }
 /**************************************************************************************************/
-int PhyloTree::getIndex(string seqName){
+int PhyloTree::getGenusIndex(string seqName){
        try {
-               map<string, int>::iterator itFind = name2Taxonomy.find(seqName);
+               map<string, int>::iterator itFind = name2GenusNodeIndex.find(seqName);
        
-               if (itFind != name2Taxonomy.end()) {  return name2Taxonomy[seqName];  }
-               else { m->mothurOut("Cannot find " + seqName + ". Mismatch with taxonomy and template files. Cannot continue."); m->mothurOutEndLine(); exit(1);}
+               if (itFind != name2GenusNodeIndex.end()) {  return itFind->second;  }
+               else { m->mothurOut("Cannot find " + seqName + ". Could be a mismatch with taxonomy and template files. Cannot continue."); m->mothurOutEndLine(); exit(1);}
        }
        catch(exception& e) {
                m->errorOut(e, "PhyloTree", "get");
@@ -606,17 +628,20 @@ bool PhyloTree::ErrorCheck(vector<string> templateFileNames){
        try {
        
                bool okay = true;
+               templateFileNames.push_back("unknown");
                
                map<string, int>::iterator itFind;
                map<string, int> taxonomyFileNames = name2Taxonomy;
                
+        if (m->debug) { m->mothurOut("[DEBUG]: in error check. Numseqs in template = " + toString(templateFileNames.size()) + ". Numseqs in taxonomy = " + toString(taxonomyFileNames.size()) + ".\n"); }
+        
                for (int i = 0; i < templateFileNames.size(); i++) {
                        itFind = taxonomyFileNames.find(templateFileNames[i]);
                        
                        if (itFind != taxonomyFileNames.end()) { //found it so erase it
                                taxonomyFileNames.erase(itFind);
                        }else {
-                               m->mothurOut(templateFileNames[i] + " is in your template file and is not in your taxonomy file. Please correct."); m->mothurOutEndLine();
+                               m->mothurOut("'" +templateFileNames[i] + "' is in your template file and is not in your taxonomy file. Please correct."); m->mothurOutEndLine();
                                okay = false;
                        }