]> git.donarmstrong.com Git - mothur.git/blobdiff - phylotree.cpp
fixed bug with creation of .tree.sum file
[mothur.git] / phylotree.cpp
index 53492cafe6f17fbc2b167c8393dda6c24c72ea55..e2b0805656064d0930de37933a1e599301573881 100644 (file)
@@ -125,7 +125,7 @@ PhyloTree::PhyloTree(string tfile){
                
                #ifdef USE_MPI
                        int pid, num, processors;
-                       vector<long> positions;
+                       vector<unsigned long int> positions;
                        
                        MPI_Status status; 
                        MPI_File inMPI;
@@ -178,14 +178,14 @@ PhyloTree::PhyloTree(string tfile){
                        //read in users taxonomy file and add sequences to tree
                        while(!in.eof()){
                                in >> name >> tax; gobble(in);
-                               
+                       
                                addSeqToTree(name, tax);
                        }
                        in.close();
                #endif
-               
+       
                assignHeirarchyIDs(0);
-               
+       
                //create file for summary if needed
                setUp(tfile);
        }
@@ -197,14 +197,22 @@ PhyloTree::PhyloTree(string tfile){
 
 /**************************************************************************************************/
 
-string PhyloTree::getNextTaxon(string& heirarchy){
+string PhyloTree::getNextTaxon(string& heirarchy, string seqname){
        try {
                string currentLevel = "";
                if(heirarchy != ""){
                        int pos = heirarchy.find_first_of(';');
-                       currentLevel=heirarchy.substr(0,pos);
-                       if (pos != (heirarchy.length()-1)) {  heirarchy=heirarchy.substr(pos+1);  }
-                       else { heirarchy = ""; }
+                       
+                       if (pos == -1) { //you can't find another ;
+                               currentLevel = heirarchy;
+                               heirarchy = "";
+                               m->mothurOut(seqname + " is missing a ;, please check for other errors."); m->mothurOutEndLine();
+                       }else{
+                               currentLevel=heirarchy.substr(0,pos);
+                               if (pos != (heirarchy.length()-1)) {  heirarchy=heirarchy.substr(pos+1);  }
+                               else { heirarchy = ""; }
+                       }
+                       
                }
                return currentLevel;
        }
@@ -228,16 +236,16 @@ int PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){
                
                tree[0].accessions.push_back(seqName);
                string taxon;// = getNextTaxon(seqTaxonomy);
-               
+       
                while(seqTaxonomy != ""){
                        
                        level++;
-                       
+               
                        if (m->control_pressed) { return 0; }
                        
                        //somehow the parent is getting one too many accnos
                        //use print to reassign the taxa id
-                       taxon = getNextTaxon(seqTaxonomy);
+                       taxon = getNextTaxon(seqTaxonomy, seqName);
                        
                        if (taxon == "") {  m->mothurOut(seqName + " has an error in the taxonomy.  This may be due to a ;;"); m->mothurOutEndLine(); if (currentNode != 0) {  uniqueTaxonomies[currentNode] = currentNode; } break;  }
                        
@@ -254,21 +262,13 @@ int PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){
                                tree[currentNode].children[taxon] = numNodes-1;
                                tree[numNodes-1].parent = currentNode;
                                
-                               //                      int numChildren = tree[currentNode].children.size();
-                               //                      string heirarchyID = tree[currentNode].heirarchyID;
-                               //                      tree[currentNode].accessions.push_back(seqName);
-                               
                                currentNode = tree[currentNode].children[taxon];
                                tree[currentNode].accessions.push_back(seqName);
                                name2Taxonomy[seqName] = currentNode;
-                               //                      tree[currentNode].level = level;
-                               //                      tree[currentNode].childNumber = numChildren;
-                               //                      tree[currentNode].heirarchyID = heirarchyID + '.' + toString(tree[currentNode].childNumber);
                        }
        
                        if (seqTaxonomy == "") {   uniqueTaxonomies[currentNode] = currentNode; }
                }
-
        }
        catch(exception& e) {
                m->errorOut(e, "PhyloTree", "addSeqToTree");
@@ -346,12 +346,7 @@ void PhyloTree::setUp(string tfile){
                        if (pid == 0) {  binUnclassified(taxFileNameTest);  }
                
                #else
-                       //create file needed for summary if it doesn't exist
-                       ifstream FileTest(taxFileNameTest.c_str());
-                       
-                       if (!FileTest) { 
-                               binUnclassified(taxFileNameTest); 
-                       }
+                       binUnclassified(taxFileNameTest); 
                #endif
        }
        catch(exception& e) {
@@ -370,11 +365,11 @@ void PhyloTree::binUnclassified(string file){
                map<string, int>::iterator childPointer;
                
                vector<TaxNode> copy = tree;
-                               
+                       
                //fill out tree
                fillOutTree(0, copy);
-               
-               //get leaf nodes that may need externsion
+       
+               //get leaf nodes that may need extension
                for (int i = 0; i < copy.size(); i++) {  
 
                        if (copy[i].children.size() == 0) {
@@ -383,7 +378,7 @@ void PhyloTree::binUnclassified(string file){
                }
                
                int copyNodes = copy.size();
-               
+       
                //go through the seqs and if a sequence finest taxon is not the same level as the most finely defined taxon then classify it as unclassified where necessary
                map<int, int>::iterator itLeaf;
                for (itLeaf = leafNodes.begin(); itLeaf != leafNodes.end(); itLeaf++) {
@@ -394,7 +389,7 @@ void PhyloTree::binUnclassified(string file){
                        int currentNode = itLeaf->second;
                        
                        //this sequence is unclassified at some levels
-                       while(level <= maxLevel){
+                       while(level < maxLevel){
                
                                level++;
                        
@@ -432,6 +427,7 @@ void PhyloTree::binUnclassified(string file){
 /**************************************************************************************************/
 void PhyloTree::fillOutTree(int index, vector<TaxNode>& copy) {
        try {
+       
                map<string,int>::iterator it;
                
                it = copy[index].children.find("unclassified");
@@ -443,12 +439,12 @@ void PhyloTree::fillOutTree(int index, vector<TaxNode>& copy) {
                        copy[copy.size()-1].level = copy[index].level + 1;
                }
                
-               if (tree[index].level <= maxLevel) {
+               if (tree[index].level < maxLevel) {
                        for(it=tree[index].children.begin();it!=tree[index].children.end();it++){ //check your children
                                fillOutTree(it->second, copy);
                        }
                }
-               
+
        }
        catch(exception& e) {
                m->errorOut(e, "PhyloTree", "fillOutTree");
@@ -480,6 +476,8 @@ void PhyloTree::print(ofstream& out, vector<TaxNode>& copy){
        try {
                out << copy.size() << endl;
                
+               out << maxLevel << endl;
+               
                for (int i = 0; i < copy.size(); i++) {
        
                        out << copy[i].level << '\t'<< copy[i].name << '\t' << copy[i].children.size() << '\t';