]> git.donarmstrong.com Git - mothur.git/blobdiff - phylosummary.cpp
added summary file to classify.otu
[mothur.git] / phylosummary.cpp
index 4e94605e47caf6e09d099a4df32dfdd7677be676..a9a170b86c77dff145079df269fd6b36813848ee 100644 (file)
@@ -15,6 +15,7 @@ PhyloSummary::PhyloSummary(string refTfile, string groupFile){
        try {
                m = MothurOut::getInstance();
                maxLevel = 0;
+               ignore = false;
                
                if (groupFile != "") {
                        groupmap = new GroupMap(groupFile);
@@ -42,6 +43,32 @@ PhyloSummary::PhyloSummary(string refTfile, string groupFile){
                exit(1);
        }
 }
+
+/**************************************************************************************************/
+
+PhyloSummary::PhyloSummary(string groupFile){
+       try {
+               m = MothurOut::getInstance();
+               maxLevel = 0;
+               ignore = true;
+               
+               if (groupFile != "") {
+                       groupmap = new GroupMap(groupFile);
+                       groupmap->readMap();
+               }else{
+                       groupmap = NULL;
+               }
+               
+               tree.push_back(rawTaxNode("Root"));
+               tree[0].rank = "0";
+               
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PhyloSummary", "PhyloSummary");
+               exit(1);
+       }
+}
 /**************************************************************************************************/
 
 void PhyloSummary::summarize(string userTfile){
@@ -128,9 +155,43 @@ int PhyloSummary::addSeqToTree(string seqName, string seqTaxonomy){
                                tree[childPointer->second].total++;
 
                                currentNode = childPointer->second;
-                       }else{  //otherwise, error
-                               m->mothurOut("Warning: cannot find taxon " + taxon + " in reference taxonomy tree at level " + toString(tree[currentNode].level) + " for " + seqName + ". This may cause totals of daughter levels not to add up in summary file."); m->mothurOutEndLine();
-                               break;
+                       }else{  
+                               if (ignore) {
+                                               
+                                       tree.push_back(rawTaxNode(taxon));
+                                       int index = tree.size() - 1;
+                               
+                                       tree[index].parent = currentNode;
+                                       tree[index].level = (level+1);
+                                       tree[index].total = 1;
+                                       tree[currentNode].children[taxon] = index;
+                                       
+                                       //initialize groupcounts
+                                       if (groupmap != NULL) {
+                                               for (int j = 0; j < groupmap->namesOfGroups.size(); j++) {
+                                                       tree[index].groupCount[groupmap->namesOfGroups[j]] = 0;
+                                               }
+                                               
+                                               //find out the sequences group
+                                               string group = groupmap->getGroup(seqName);
+                                               
+                                               if (group == "not found") {  m->mothurOut(seqName + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine();  }
+                                               
+                                               //do you have a count for this group?
+                                               map<string, int>::iterator itGroup = tree[index].groupCount.find(group);
+                                               
+                                               //if yes, increment it - there should not be a case where we can't find it since we load group in read
+                                               if (itGroup != tree[index].groupCount.end()) {
+                                                       tree[index].groupCount[group]++;
+                                               }                                               
+                                       }
+                                       
+                                       currentNode = index;
+                                       
+                               }else{ //otherwise, error
+                                       m->mothurOut("Warning: cannot find taxon " + taxon + " in reference taxonomy tree at level " + toString(tree[currentNode].level) + " for " + seqName + ". This may cause totals of daughter levels not to add up in summary file."); m->mothurOutEndLine();
+                                       break;
+                               }
                        }
                        
                        level++;
@@ -148,6 +209,117 @@ int PhyloSummary::addSeqToTree(string seqName, string seqTaxonomy){
 }
 /**************************************************************************************************/
 
+int PhyloSummary::addSeqToTree(string seqTaxonomy, vector<string> names){
+       try {
+               numSeqs++;
+               
+               map<string, int>::iterator childPointer;
+               
+               int currentNode = 0;
+               string taxon;
+               
+               int level = 0;
+               
+               while (seqTaxonomy != "") {
+                       
+                       if (m->control_pressed) { return 0; }
+                       
+                       //somehow the parent is getting one too many accnos
+                       //use print to reassign the taxa id
+                       taxon = getNextTaxon(seqTaxonomy);
+                       
+                       childPointer = tree[currentNode].children.find(taxon);
+                       
+                       if(childPointer != tree[currentNode].children.end()){   //if the node already exists, update count and move on
+                               if (groupmap != NULL) {
+                                       
+                                       map<string, bool> containsGroup; 
+                                       for (int j = 0; j < groupmap->namesOfGroups.size(); j++) {
+                                               containsGroup[groupmap->namesOfGroups[j]] = false;
+                                       }
+                                       
+                                       for (int k = 0; k < names.size(); k++) {
+                                               //find out the sequences group
+                                               string group = groupmap->getGroup(names[k]);
+                                       
+                                               if (group == "not found") {  m->mothurOut(names[k] + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine();  }
+                                               else {
+                                                       containsGroup[group] = true;
+                                               }
+                                       }
+                                       
+                                       for (map<string, bool>::iterator itGroup = containsGroup.begin(); itGroup != containsGroup.end(); itGroup++) {
+                                               if (itGroup->second == true) {
+                                                       tree[childPointer->second].groupCount[itGroup->first]++;
+                                               }
+                                       }
+                                       
+                               }
+                               
+                               tree[childPointer->second].total++;
+                               
+                               currentNode = childPointer->second;
+                       }else{  
+                               if (ignore) {
+                                       
+                                       tree.push_back(rawTaxNode(taxon));
+                                       int index = tree.size() - 1;
+                                       
+                                       tree[index].parent = currentNode;
+                                       tree[index].level = (level+1);
+                                       tree[index].total = 1;
+                                       tree[currentNode].children[taxon] = index;
+                                       
+                                       //initialize groupcounts
+                                       if (groupmap != NULL) {
+                                               map<string, bool> containsGroup; 
+                                               for (int j = 0; j < groupmap->namesOfGroups.size(); j++) {
+                                                       tree[index].groupCount[groupmap->namesOfGroups[j]] = 0;
+                                                       containsGroup[groupmap->namesOfGroups[j]] = false;
+                                               }
+                                               
+                                               
+                                               for (int k = 0; k < names.size(); k++) {
+                                                       //find out the sequences group
+                                                       string group = groupmap->getGroup(names[k]);
+                                                       
+                                                       if (group == "not found") {  m->mothurOut(names[k] + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine();  }
+                                                       else {
+                                                               containsGroup[group] = true;
+                                                       }
+                                               }
+                                               
+                                               for (map<string, bool>::iterator itGroup = containsGroup.begin(); itGroup != containsGroup.end(); itGroup++) {
+                                                       if (itGroup->second == true) {
+                                                               tree[index].groupCount[itGroup->first]++;
+                                                       }
+                                               }
+                                       }
+                                       
+                                       currentNode = index;
+                                       
+                               }else{ //otherwise, error
+                                       m->mothurOut("Warning: cannot find taxon " + taxon + " in reference taxonomy tree at level " + toString(tree[currentNode].level) + ". This may cause totals of daughter levels not to add up in summary file."); m->mothurOutEndLine();
+                                       break;
+                               }
+                       }
+                       
+                       level++;
+                       
+                       if ((seqTaxonomy == "") && (level < maxLevel)) {  //if you think you are done and you are not.
+                               for (int k = level; k < maxLevel; k++) {  seqTaxonomy += "unclassified;";   }
+                       }
+               }
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PhyloSummary", "addSeqToTree");
+               exit(1);
+       }
+}
+
+/**************************************************************************************************/
+
 void PhyloSummary::assignRank(int index){
        try {
                map<string,int>::iterator it;
@@ -169,6 +341,9 @@ void PhyloSummary::assignRank(int index){
 
 void PhyloSummary::print(ofstream& out){
        try {
+               
+               if (ignore) { assignRank(0); }
+               
                //print labels
                out << "taxlevel\t rankID\t taxon\t daughterlevels\t total\t";
                if (groupmap != NULL) {