]> git.donarmstrong.com Git - mothur.git/blobdiff - phylotree.cpp
changes to chop.seqs
[mothur.git] / phylotree.cpp
index 855eaf968b796424700f957c2ac78fa32db6d9f6..1c2deba8a303edd692470dd1deb8c4e60062ac15 100644 (file)
@@ -32,6 +32,8 @@ PhyloTree::PhyloTree(ifstream& in, string filename){
        try {
                m = MothurOut::getInstance();
                calcTotals = false;
+               numNodes = 0;
+               numSeqs = 0;
                
                #ifdef USE_MPI
                        MPI_File inMPI;
@@ -122,12 +124,13 @@ PhyloTree::PhyloTree(string tfile){
 
                
                #ifdef USE_MPI
-                       int pid, num;
+                       int pid, num, processors;
                        vector<long> positions;
                        
                        MPI_Status status; 
                        MPI_File inMPI;
                        MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+                       MPI_Comm_size(MPI_COMM_WORLD, &processors);
 
                        char inFileName[1024];
                        strcpy(inFileName, tfile.c_str());
@@ -138,12 +141,14 @@ PhyloTree::PhyloTree(string tfile){
                                positions = setFilePosEachLine(tfile, num);
                                
                                //send file positions to all processes
-                               MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs
-                               MPI_Bcast(&positions[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos 
+                               for(int i = 1; i < processors; i++) { 
+                                       MPI_Send(&num, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
+                                       MPI_Send(&positions[0], (num+1), MPI_LONG, i, 2001, MPI_COMM_WORLD);
+                               }
                        }else{
-                               MPI_Bcast(&num, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
-                               positions.resize(num);
-                               MPI_Bcast(&positions[0], (num+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
+                               MPI_Recv(&num, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
+                               positions.resize(num+1);
+                               MPI_Recv(&positions[0], (num+1), MPI_LONG, 0, 2001, MPI_COMM_WORLD, &status);
                        }
                
                        //read file 
@@ -164,6 +169,7 @@ PhyloTree::PhyloTree(string tfile){
                        }
                        
                        MPI_File_close(&inMPI);
+                       MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
                
                #else
                        ifstream in;
@@ -212,6 +218,7 @@ string PhyloTree::getNextTaxon(string& heirarchy){
 
 int PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){
        try {
+                       
                numSeqs++;
                
                map<string, int>::iterator childPointer;
@@ -232,6 +239,8 @@ int PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){
                        //use print to reassign the taxa id
                        taxon = getNextTaxon(seqTaxonomy);
                        
+                       if (taxon == "") {  m->mothurOut(seqName + " has an error in the taxonomy.  This may be due to a ;;"); m->mothurOutEndLine(); if (currentNode != 0) {  uniqueTaxonomies[currentNode] = currentNode; } break;  }
+                       
                        childPointer = tree[currentNode].children.find(taxon);
                        
                        if(childPointer != tree[currentNode].children.end()){   //if the node already exists, move on
@@ -256,7 +265,7 @@ int PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){
                                //                      tree[currentNode].childNumber = numChildren;
                                //                      tree[currentNode].heirarchyID = heirarchyID + '.' + toString(tree[currentNode].childNumber);
                        }
-               
+       
                        if (seqTaxonomy == "") {   uniqueTaxonomies[currentNode] = currentNode; }
                }
 
@@ -361,18 +370,31 @@ void PhyloTree::binUnclassified(string file){
                map<string, int>::iterator childPointer;
                
                vector<TaxNode> copy = tree;
-               int copyNodes = numNodes;
+                               
+               //fill out tree
+               fillOutTree(0, copy);
+               
+               //get leaf nodes that may need externsion
+               for (int i = 0; i < copy.size(); i++) {  
+
+                       if (copy[i].children.size() == 0) {
+                               leafNodes[i] = i;
+                       }
+               }
+               
+               int copyNodes = copy.size();
                
                //go through the seqs and if a sequence finest taxon is not the same level as the most finely defined taxon then classify it as unclassified where necessary
-               for (itBin = name2Taxonomy.begin(); itBin != name2Taxonomy.end(); itBin++) {
+               map<int, int>::iterator itLeaf;
+               for (itLeaf = leafNodes.begin(); itLeaf != leafNodes.end(); itLeaf++) {
                        
                        if (m->control_pressed) {  out.close(); break;  }
                        
-                       int level = copy[itBin->second].level;
-                       int currentNode = itBin->second;
+                       int level = copy[itLeaf->second].level;
+                       int currentNode = itLeaf->second;
                        
                        //this sequence is unclassified at some levels
-                       while(level != maxLevel){
+                       while(level <= maxLevel){
                
                                level++;
                        
@@ -383,7 +405,6 @@ void PhyloTree::binUnclassified(string file){
                                
                                if(childPointer != copy[currentNode].children.end()){   //if the node already exists, move on
                                        currentNode = childPointer->second; //currentNode becomes 'unclassified'
-                                       copy[currentNode].accessions.push_back(itBin->first);  //add this seq
                                }
                                else{                                                                                   //otherwise, create it
                                        copy.push_back(TaxNode(taxon));
@@ -393,7 +414,6 @@ void PhyloTree::binUnclassified(string file){
                                        copy[copyNodes-1].level = copy[currentNode].level + 1;
                                                                        
                                        currentNode = copy[currentNode].children[taxon];
-                                       copy[currentNode].accessions.push_back(itBin->first);
                                }
                        }
                }
@@ -410,6 +430,32 @@ void PhyloTree::binUnclassified(string file){
        }
 }
 /**************************************************************************************************/
+void PhyloTree::fillOutTree(int index, vector<TaxNode>& copy) {
+       try {
+               map<string,int>::iterator it;
+               
+               it = copy[index].children.find("unclassified");
+               if (it == copy[index].children.end()) { //no unclassified at this level
+                       string taxon = "unclassified";
+                       copy.push_back(TaxNode(taxon));
+                       copy[index].children[taxon] = copy.size()-1;
+                       copy[copy.size()-1].parent = index;
+                       copy[copy.size()-1].level = copy[index].level + 1;
+               }
+               
+               if (tree[index].level <= maxLevel) {
+                       for(it=tree[index].children.begin();it!=tree[index].children.end();it++){ //check your children
+                               fillOutTree(it->second, copy);
+                       }
+               }
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PhyloTree", "fillOutTree");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
 string PhyloTree::getFullTaxonomy(string seqName) {
        try {
                string tax = "";
@@ -479,7 +525,6 @@ void PhyloTree::printTreeNodes(string treefilename) {
                        for (it2=uniqueTaxonomies.begin(); it2!=uniqueTaxonomies.end(); it2++) {  outTree << it2->first << '\t' << tree[it2->first].accessions.size() << endl;  }
                        outTree << endl;
                        
-                       
                        outTree.close();
                
                #ifdef USE_MPI
@@ -494,6 +539,93 @@ void PhyloTree::printTreeNodes(string treefilename) {
        }
 }
 /**************************************************************************************************/
+TaxNode PhyloTree::get(int i ){
+       try {
+               if (i < tree.size()) {  return tree[i];  }
+               else {  cout << i << '\t' << tree.size() << endl ; m->mothurOut("Mismatch with taxonomy and template files. Cannot continue."); m->mothurOutEndLine(); exit(1); }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PhyloTree", "get");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+TaxNode PhyloTree::get(string seqName){
+       try {
+               map<string, int>::iterator itFind = name2Taxonomy.find(seqName);
+       
+               if (itFind != name2Taxonomy.end()) {  return tree[name2Taxonomy[seqName]];  }
+               else { m->mothurOut("Cannot find " + seqName + ". Mismatch with taxonomy and template files. Cannot continue."); m->mothurOutEndLine(); exit(1);}
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PhyloTree", "get");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+string PhyloTree::getName(int i ){
+       try {
+               if (i < tree.size()) {  return tree[i].name;     }
+               else { m->mothurOut("Mismatch with taxonomy and template files. Cannot continue."); m->mothurOutEndLine(); exit(1); }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PhyloTree", "get");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+int PhyloTree::getIndex(string seqName){
+       try {
+               map<string, int>::iterator itFind = name2Taxonomy.find(seqName);
+       
+               if (itFind != name2Taxonomy.end()) {  return name2Taxonomy[seqName];  }
+               else { m->mothurOut("Cannot find " + seqName + ". Mismatch with taxonomy and template files. Cannot continue."); m->mothurOutEndLine(); exit(1);}
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PhyloTree", "get");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+bool PhyloTree::ErrorCheck(vector<string> templateFileNames){
+       try {
+       
+               bool okay = true;
+               
+               map<string, int>::iterator itFind;
+               map<string, int> taxonomyFileNames = name2Taxonomy;
+               
+               for (int i = 0; i < templateFileNames.size(); i++) {
+                       itFind = taxonomyFileNames.find(templateFileNames[i]);
+                       
+                       if (itFind != name2Taxonomy.end()) { //found it so erase it
+                               taxonomyFileNames.erase(itFind);
+                       }else {
+                               m->mothurOut(templateFileNames[i] + " is in your template file and is not in your taxonomy file. Please correct."); m->mothurOutEndLine();
+                               okay = false;
+                       }
+                       
+                       templateFileNames.erase(templateFileNames.begin()+i);
+                       i--;
+               }
+               
+               if (taxonomyFileNames.size() > 0) { //there are names in tax file that are not in template
+                       okay = false;
+                       
+                       for (itFind = taxonomyFileNames.begin(); itFind != taxonomyFileNames.end(); itFind++) {
+                               m->mothurOut(itFind->first + " is in your taxonomy file and is not in your template file. Please correct."); m->mothurOutEndLine();
+                       }
+               }
+               
+               return okay;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PhyloTree", "ErrorCheck");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+