]> git.donarmstrong.com Git - mothur.git/blobdiff - phylosummary.cpp
added code to format fast files for uchime. started work on sff.multiple command
[mothur.git] / phylosummary.cpp
index a9a170b86c77dff145079df269fd6b36813848ee..5f7bbc3c73a2161417a1841f567719a4bd1f8c4a 100644 (file)
@@ -25,7 +25,7 @@ PhyloSummary::PhyloSummary(string refTfile, string groupFile){
                }
                                
                //check for necessary files
-               string taxFileNameTest = refTfile.substr(0,refTfile.find_last_of(".")+1) + "tree.sum";
+               string taxFileNameTest = m->getFullPathName((refTfile.substr(0,refTfile.find_last_of(".")+1) + "tree.sum"));
                ifstream FileTest(taxFileNameTest.c_str());
                
                if (!FileTest) { 
@@ -71,22 +71,18 @@ PhyloSummary::PhyloSummary(string groupFile){
 }
 /**************************************************************************************************/
 
-void PhyloSummary::summarize(string userTfile){
+int PhyloSummary::summarize(string userTfile){
        try {
-               
-               ifstream in;
-               m->openInputFile(userTfile, in);
-               
-               //read in users taxonomy file and add sequences to tree
-               string name, tax;
-               while(!in.eof()){
-                       in >> name >> tax; m->gobble(in);
-                       
-                       addSeqToTree(name, tax);
-                       
-                       if (m->control_pressed) { break;  }
-               }
-               in.close();
+               map<string, string> temp;
+        m->readTax(userTfile, temp);
+        
+        for (map<string, string>::iterator itTemp = temp.begin(); itTemp != temp.end();) {
+            addSeqToTree(itTemp->first, itTemp->second);
+                       numSeqs++;
+            temp.erase(itTemp++);
+        }
+        
+        return numSeqs;
        }
        catch(exception& e) {
                m->errorOut(e, "PhyloSummary", "summarize");
@@ -117,6 +113,7 @@ string PhyloSummary::getNextTaxon(string& heirarchy){
 
 int PhyloSummary::addSeqToTree(string seqName, string seqTaxonomy){
        try {
+                               
                numSeqs++;
                
                map<string, int>::iterator childPointer;
@@ -126,6 +123,9 @@ int PhyloSummary::addSeqToTree(string seqName, string seqTaxonomy){
                
                int level = 0;
                
+               //are there confidence scores, if so remove them
+               if (seqTaxonomy.find_first_of('(') != -1) {  m->removeConfidences(seqTaxonomy); }
+               
                while (seqTaxonomy != "") {
                        
                        if (m->control_pressed) { return 0; }
@@ -141,7 +141,7 @@ int PhyloSummary::addSeqToTree(string seqName, string seqTaxonomy){
                                        //find out the sequences group
                                        string group = groupmap->getGroup(seqName);
                                        
-                                       if (group == "not found") {  m->mothurOut(seqName + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine();  }
+                                       if (group == "not found") {  m->mothurOut("[WARNING]: " + seqName + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine();  }
                                        
                                        //do you have a count for this group?
                                        map<string, int>::iterator itGroup = tree[childPointer->second].groupCount.find(group);
@@ -168,14 +168,15 @@ int PhyloSummary::addSeqToTree(string seqName, string seqTaxonomy){
                                        
                                        //initialize groupcounts
                                        if (groupmap != NULL) {
-                                               for (int j = 0; j < groupmap->namesOfGroups.size(); j++) {
-                                                       tree[index].groupCount[groupmap->namesOfGroups[j]] = 0;
+                                               vector<string> mGroups = groupmap->getNamesOfGroups();
+                                               for (int j = 0; j < mGroups.size(); j++) {
+                                                       tree[index].groupCount[mGroups[j]] = 0;
                                                }
                                                
                                                //find out the sequences group
                                                string group = groupmap->getGroup(seqName);
                                                
-                                               if (group == "not found") {  m->mothurOut(seqName + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine();  }
+                                               if (group == "not found") {  m->mothurOut("[WARNING]: " + seqName + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine();  }
                                                
                                                //do you have a count for this group?
                                                map<string, int>::iterator itGroup = tree[index].groupCount.find(group);
@@ -200,7 +201,7 @@ int PhyloSummary::addSeqToTree(string seqName, string seqTaxonomy){
                                for (int k = level; k < maxLevel; k++) {  seqTaxonomy += "unclassified;";   }
                        }
                }
-
+               return 0;
        }
        catch(exception& e) {
                m->errorOut(e, "PhyloSummary", "addSeqToTree");
@@ -220,6 +221,9 @@ int PhyloSummary::addSeqToTree(string seqTaxonomy, vector<string> names){
                
                int level = 0;
                
+               //are there confidence scores, if so remove them
+               if (seqTaxonomy.find_first_of('(') != -1) {  m->removeConfidences(seqTaxonomy); }
+               
                while (seqTaxonomy != "") {
                        
                        if (m->control_pressed) { return 0; }
@@ -234,15 +238,16 @@ int PhyloSummary::addSeqToTree(string seqTaxonomy, vector<string> names){
                                if (groupmap != NULL) {
                                        
                                        map<string, bool> containsGroup; 
-                                       for (int j = 0; j < groupmap->namesOfGroups.size(); j++) {
-                                               containsGroup[groupmap->namesOfGroups[j]] = false;
+                                       vector<string> mGroups = groupmap->getNamesOfGroups();
+                                       for (int j = 0; j < mGroups.size(); j++) {
+                                               containsGroup[mGroups[j]] = false;
                                        }
                                        
                                        for (int k = 0; k < names.size(); k++) {
                                                //find out the sequences group
                                                string group = groupmap->getGroup(names[k]);
                                        
-                                               if (group == "not found") {  m->mothurOut(names[k] + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine();  }
+                                               if (group == "not found") {  m->mothurOut("[WARNING]: " + names[k] + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine();  }
                                                else {
                                                        containsGroup[group] = true;
                                                }
@@ -273,9 +278,10 @@ int PhyloSummary::addSeqToTree(string seqTaxonomy, vector<string> names){
                                        //initialize groupcounts
                                        if (groupmap != NULL) {
                                                map<string, bool> containsGroup; 
-                                               for (int j = 0; j < groupmap->namesOfGroups.size(); j++) {
-                                                       tree[index].groupCount[groupmap->namesOfGroups[j]] = 0;
-                                                       containsGroup[groupmap->namesOfGroups[j]] = false;
+                                               vector<string> mGroups = groupmap->getNamesOfGroups();
+                                               for (int j = 0; j < mGroups.size(); j++) {
+                                                       tree[index].groupCount[mGroups[j]] = 0;
+                                                       containsGroup[mGroups[j]] = false;
                                                }
                                                
                                                
@@ -283,7 +289,7 @@ int PhyloSummary::addSeqToTree(string seqTaxonomy, vector<string> names){
                                                        //find out the sequences group
                                                        string group = groupmap->getGroup(names[k]);
                                                        
-                                                       if (group == "not found") {  m->mothurOut(names[k] + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine();  }
+                                                       if (group == "not found") {  m->mothurOut("[WARNING]: " + names[k] + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine();  }
                                                        else {
                                                                containsGroup[group] = true;
                                                        }
@@ -310,7 +316,7 @@ int PhyloSummary::addSeqToTree(string seqTaxonomy, vector<string> names){
                                for (int k = level; k < maxLevel; k++) {  seqTaxonomy += "unclassified;";   }
                        }
                }
-               
+               return 0;
        }
        catch(exception& e) {
                m->errorOut(e, "PhyloSummary", "addSeqToTree");
@@ -343,36 +349,46 @@ void PhyloSummary::print(ofstream& out){
        try {
                
                if (ignore) { assignRank(0); }
-               
+       
                //print labels
                out << "taxlevel\t rankID\t taxon\t daughterlevels\t total\t";
                if (groupmap != NULL) {
                        //so the labels match the counts below, since the map sorts them automatically...
                        //sort(groupmap->namesOfGroups.begin(), groupmap->namesOfGroups.end());
-                       
-                       for (int i = 0; i < groupmap->namesOfGroups.size(); i++) {
-                               out << groupmap->namesOfGroups[i] << '\t';
+                       vector<string> mGroups = groupmap->getNamesOfGroups();
+                       for (int i = 0; i < mGroups.size(); i++) {
+                               out << mGroups[i] << '\t';
                        }
                }
                
                out << endl;
                
                int totalChildrenInTree = 0;
+               map<string, int>::iterator itGroup;
                
                map<string,int>::iterator it;
                for(it=tree[0].children.begin();it!=tree[0].children.end();it++){   
-                       if (tree[it->second].total != 0)  {   totalChildrenInTree++; }
+                       if (tree[it->second].total != 0)  {   
+                               totalChildrenInTree++; 
+                               tree[0].total += tree[it->second].total;
+                               
+                               if (groupmap != NULL) {
+                                       vector<string> mGroups = groupmap->getNamesOfGroups();
+                                       for (int i = 0; i < mGroups.size(); i++) { tree[0].groupCount[mGroups[i]] += tree[it->second].groupCount[mGroups[i]]; } 
+                               }
+                       }
                }
                
                //print root
                out << tree[0].level << "\t" << tree[0].rank << "\t" << tree[0].name << "\t" << totalChildrenInTree << "\t" << tree[0].total << "\t";
                
-               map<string, int>::iterator itGroup;
+               
                if (groupmap != NULL) {
                        //for (itGroup = tree[0].groupCount.begin(); itGroup != tree[0].groupCount.end(); itGroup++) {
                        //      out << itGroup->second << '\t';
                        //}
-                       for (int i = 0; i < groupmap->namesOfGroups.size(); i++) {  out << tree[0].groupCount[groupmap->namesOfGroups[i]] << '\t'; } 
+                       vector<string> mGroups = groupmap->getNamesOfGroups();
+                       for (int i = 0; i < mGroups.size(); i++) {  out << tree[0].groupCount[mGroups[i]] << '\t'; } 
                }
                out << endl;
                
@@ -409,9 +425,11 @@ void PhyloSummary::print(int i, ofstream& out){
                                        //for (itGroup = tree[it->second].groupCount.begin(); itGroup != tree[it->second].groupCount.end(); itGroup++) {
                                        //      out << itGroup->second << '\t';
                                        //}
-                                       for (int i = 0; i < groupmap->namesOfGroups.size(); i++) {  out << tree[it->second].groupCount[groupmap->namesOfGroups[i]] << '\t'; } 
+                                       vector<string> mGroups = groupmap->getNamesOfGroups();
+                                       for (int i = 0; i < mGroups.size(); i++) {  out << tree[it->second].groupCount[mGroups[i]] << '\t'; } 
                                }
                                out << endl;
+                               
                        }
                        
                        print(it->second, out);
@@ -452,8 +470,8 @@ void PhyloSummary::readTreeStruct(ifstream& in){
                        
                        //initialize groupcounts
                        if (groupmap != NULL) {
-                               for (int j = 0; j < groupmap->namesOfGroups.size(); j++) {
-                                       tree[i].groupCount[groupmap->namesOfGroups[j]] = 0;
+                               for (int j = 0; j < (groupmap->getNamesOfGroups()).size(); j++) {
+                                       tree[i].groupCount[(groupmap->getNamesOfGroups())[j]] = 0;
                                }
                        }
                        
@@ -466,11 +484,10 @@ void PhyloSummary::readTreeStruct(ifstream& in){
 
        }
        catch(exception& e) {
-               m->errorOut(e, "PhyloSummary", "print");
+               m->errorOut(e, "PhyloSummary", "readTreeStruct");
                exit(1);
        }
 }
-
 /**************************************************************************************************/