]> git.donarmstrong.com Git - mothur.git/blobdiff - phylotree.cpp
added count file to cluster.classic and cluster.split. modified splitting classes...
[mothur.git] / phylotree.cpp
index 2ea219350c1f8bf8099b7cfc40d28ddcdfc9d872..3dde18680c625eb816230a8d13774ccfc47032cf 100644 (file)
@@ -20,6 +20,7 @@ PhyloTree::PhyloTree(){
                tree[0].heirarchyID = "0";
                maxLevel = 0;
                calcTotals = true;
+               addSeqToTree("unknown", "unknown;");
        }
        catch(exception& e) {
                m->errorOut(e, "PhyloTree", "PhyloTree");
@@ -55,24 +56,24 @@ PhyloTree::PhyloTree(ifstream& in, string filename){
                        delete buffer;
                        
                        //read version
-                       getline(iss); gobble(iss);
+                       m->getline(iss); m->gobble(iss);
                        
-                       iss >> numNodes; gobble(iss);
+                       iss >> numNodes; m->gobble(iss);
                        
                        tree.resize(numNodes);
                        
                        for (int i = 0; i < tree.size(); i++) {
-                               iss >> tree[i].name >> tree[i].level >> tree[i].parent; gobble(iss);
+                               iss >> tree[i].name >> tree[i].level >> tree[i].parent; m->gobble(iss);
                        }
                        
                        //read genus nodes
                        int numGenus = 0;
-                       iss >> numGenus; gobble(iss);
+                       iss >> numGenus; m->gobble(iss);
                        
                        int gnode, gsize;
                        totals.clear();
                        for (int i = 0; i < numGenus; i++) {
-                               iss >> gnode >> gsize; gobble(iss);
+                               iss >> gnode >> gsize; m->gobble(iss);
                                
                                uniqueTaxonomies[gnode] = gnode;
                                totals.push_back(gsize);
@@ -82,24 +83,24 @@ PhyloTree::PhyloTree(ifstream& in, string filename){
                        
                #else
                        //read version
-                       string line = getline(in); gobble(in);
+                       string line = m->getline(in); m->gobble(in);
                        
-                       in >> numNodes; gobble(in);
+                       in >> numNodes; m->gobble(in);
                        
                        tree.resize(numNodes);
                        
                        for (int i = 0; i < tree.size(); i++) {
-                               in >> tree[i].name >> tree[i].level >> tree[i].parent; gobble(in);
+                               in >> tree[i].name >> tree[i].level >> tree[i].parent; m->gobble(in);
                        }
                        
                        //read genus nodes
                        int numGenus = 0;
-                       in >> numGenus; gobble(in);
+                       in >> numGenus; m->gobble(in);
                        
                        int gnode, gsize;
                        totals.clear();
                        for (int i = 0; i < numGenus; i++) {
-                               in >> gnode >> gsize; gobble(in);
+                               in >> gnode >> gsize; m->gobble(in);
                                
                                uniqueTaxonomies[gnode] = gnode;
                                totals.push_back(gsize);
@@ -127,11 +128,10 @@ PhyloTree::PhyloTree(string tfile){
                maxLevel = 0;
                calcTotals = true;
                string name, tax;
-
                
                #ifdef USE_MPI
                        int pid, num, processors;
-                       vector<unsigned long int> positions;
+                       vector<unsigned long long> positions;
                        
                        MPI_Status status; 
                        MPI_File inMPI;
@@ -144,7 +144,7 @@ PhyloTree::PhyloTree(string tfile){
                        MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
 
                        if (pid == 0) {
-                               positions = setFilePosEachLine(tfile, num);
+                               positions = m->setFilePosEachLine(tfile, num);
                                
                                //send file positions to all processes
                                for(int i = 1; i < processors; i++) { 
@@ -178,20 +178,26 @@ PhyloTree::PhyloTree(string tfile){
                        MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
                
                #else
-                       ifstream in;
-                       openInputFile(tfile, in);
-                       
-                       //read in users taxonomy file and add sequences to tree
-                       while(!in.eof()){
-                               in >> name >> tax; gobble(in);
-                       
-                               addSeqToTree(name, tax);
-                       }
-                       in.close();
+            map<string, string> temp;
+            m->readTax(tfile, temp);
+        
+            for (map<string, string>::iterator itTemp = temp.begin(); itTemp != temp.end();) {
+                addSeqToTree(itTemp->first, itTemp->second);
+                temp.erase(itTemp++);
+            }
                #endif
        
                assignHeirarchyIDs(0);
-       
+        
+        
+        string unknownTax = "unknown;";
+        //added last taxon until you get desired level
+               for (int i = 1; i < maxLevel; i++) {
+                       unknownTax += "unclassfied;";
+               }
+        
+        addSeqToTree("unknown", unknownTax);
+        
                //create file for summary if needed
                setUp(tfile);
        }
@@ -232,7 +238,6 @@ string PhyloTree::getNextTaxon(string& heirarchy, string seqname){
 
 int PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){
        try {
-                       
                numSeqs++;
                
                map<string, int>::iterator childPointer;
@@ -241,6 +246,8 @@ int PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){
                int level = 1;
                
                tree[0].accessions.push_back(seqName);
+               m->removeConfidences(seqTaxonomy);
+               
                string taxon;// = getNextTaxon(seqTaxonomy);
        
                while(seqTaxonomy != ""){
@@ -275,6 +282,8 @@ int PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){
        
                        if (seqTaxonomy == "") {   uniqueTaxonomies[currentNode] = currentNode; }
                }
+               
+               return 0;
        }
        catch(exception& e) {
                m->errorOut(e, "PhyloTree", "addSeqToTree");
@@ -365,13 +374,13 @@ void PhyloTree::binUnclassified(string file){
        try {
        
                ofstream out;
-               openOutputFile(file, out);
+               m->openOutputFile(file, out);
                
                map<string, int>::iterator itBin;
                map<string, int>::iterator childPointer;
                
                vector<TaxNode> copy = tree;
-                       
+               
                //fill out tree
                fillOutTree(0, copy);
        
@@ -480,16 +489,16 @@ string PhyloTree::getFullTaxonomy(string seqName) {
 
 void PhyloTree::print(ofstream& out, vector<TaxNode>& copy){
        try {
-       
+               
                //output mothur version
                out << "#" << m->getVersion() << endl;
                
                out << copy.size() << endl;
                
                out << maxLevel << endl;
-               
+                               
                for (int i = 0; i < copy.size(); i++) {
-       
+                               
                        out << copy[i].level << '\t'<< copy[i].name << '\t' << copy[i].children.size() << '\t';
                        
                        map<string,int>::iterator it;
@@ -519,7 +528,7 @@ void PhyloTree::printTreeNodes(string treefilename) {
                #endif
 
                        ofstream outTree;
-                       openOutputFile(treefilename, outTree);
+                       m->openOutputFile(treefilename, outTree);
                        
                        //output mothur version
                        outTree << "#" << m->getVersion() << endl;
@@ -602,23 +611,27 @@ bool PhyloTree::ErrorCheck(vector<string> templateFileNames){
        try {
        
                bool okay = true;
+               templateFileNames.push_back("unknown");
                
                map<string, int>::iterator itFind;
                map<string, int> taxonomyFileNames = name2Taxonomy;
                
+        if (m->debug) { m->mothurOut("[DEBUG]: in error check. Numseqs in template = " + toString(templateFileNames.size()) + ". Numseqs in taxonomy = " + toString(taxonomyFileNames.size()) + ".\n"); }
+        
                for (int i = 0; i < templateFileNames.size(); i++) {
                        itFind = taxonomyFileNames.find(templateFileNames[i]);
                        
                        if (itFind != taxonomyFileNames.end()) { //found it so erase it
                                taxonomyFileNames.erase(itFind);
                        }else {
-                               m->mothurOut(templateFileNames[i] + " is in your template file and is not in your taxonomy file. Please correct."); m->mothurOutEndLine();
+                               m->mothurOut("'" +templateFileNames[i] + "' is in your template file and is not in your taxonomy file. Please correct."); m->mothurOutEndLine();
                                okay = false;
                        }
                        
-                       templateFileNames.erase(templateFileNames.begin()+i);
-                       i--;
+                       //templateFileNames.erase(templateFileNames.begin()+i);
+                       //i--;
                }
+               templateFileNames.clear();
                
                if (taxonomyFileNames.size() > 0) { //there are names in tax file that are not in template
                        okay = false;