try {
m = MothurOut::getInstance();
maxLevel = 0;
+ ignore = false;
if (groupFile != "") {
groupmap = new GroupMap(groupFile);
exit(1);
}
}
+
+/**************************************************************************************************/
+
+PhyloSummary::PhyloSummary(string groupFile){
+ try {
+ m = MothurOut::getInstance();
+ maxLevel = 0;
+ ignore = true;
+
+ if (groupFile != "") {
+ groupmap = new GroupMap(groupFile);
+ groupmap->readMap();
+ }else{
+ groupmap = NULL;
+ }
+
+ tree.push_back(rawTaxNode("Root"));
+ tree[0].rank = "0";
+
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PhyloSummary", "PhyloSummary");
+ exit(1);
+ }
+}
/**************************************************************************************************/
void PhyloSummary::summarize(string userTfile){
try {
ifstream in;
- openInputFile(userTfile, in);
+ m->openInputFile(userTfile, in);
//read in users taxonomy file and add sequences to tree
string name, tax;
while(!in.eof()){
- in >> name >> tax; gobble(in);
+ in >> name >> tax; m->gobble(in);
addSeqToTree(name, tax);
if (group == "not found") { m->mothurOut(seqName + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine(); }
//do you have a count for this group?
- map<string, int>::iterator itGroup = tree[currentNode].groupCount.find(group);
+ map<string, int>::iterator itGroup = tree[childPointer->second].groupCount.find(group);
//if yes, increment it - there should not be a case where we can't find it since we load group in read
- if (itGroup != tree[currentNode].groupCount.end()) {
- tree[currentNode].groupCount[group]++;
+ if (itGroup != tree[childPointer->second].groupCount.end()) {
+ tree[childPointer->second].groupCount[group]++;
}
}
- tree[currentNode].total++;
+ tree[childPointer->second].total++;
currentNode = childPointer->second;
- }else{ //otherwise, error
- m->mothurOut("Warning: cannot find taxon " + taxon + " in reference taxonomy tree at level " + toString(tree[currentNode].level) + " for " + seqName + ". This may cause totals of daughter levels not to add up in summary file."); m->mothurOutEndLine();
- break;
+ }else{
+ if (ignore) {
+
+ tree.push_back(rawTaxNode(taxon));
+ int index = tree.size() - 1;
+
+ tree[index].parent = currentNode;
+ tree[index].level = (level+1);
+ tree[index].total = 1;
+ tree[currentNode].children[taxon] = index;
+
+ //initialize groupcounts
+ if (groupmap != NULL) {
+ for (int j = 0; j < groupmap->namesOfGroups.size(); j++) {
+ tree[index].groupCount[groupmap->namesOfGroups[j]] = 0;
+ }
+
+ //find out the sequences group
+ string group = groupmap->getGroup(seqName);
+
+ if (group == "not found") { m->mothurOut(seqName + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine(); }
+
+ //do you have a count for this group?
+ map<string, int>::iterator itGroup = tree[index].groupCount.find(group);
+
+ //if yes, increment it - there should not be a case where we can't find it since we load group in read
+ if (itGroup != tree[index].groupCount.end()) {
+ tree[index].groupCount[group]++;
+ }
+ }
+
+ currentNode = index;
+
+ }else{ //otherwise, error
+ m->mothurOut("Warning: cannot find taxon " + taxon + " in reference taxonomy tree at level " + toString(tree[currentNode].level) + " for " + seqName + ". This may cause totals of daughter levels not to add up in summary file."); m->mothurOutEndLine();
+ break;
+ }
}
level++;
for (int k = level; k < maxLevel; k++) { seqTaxonomy += "unclassified;"; }
}
}
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PhyloSummary", "addSeqToTree");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+int PhyloSummary::addSeqToTree(string seqTaxonomy, vector<string> names){
+ try {
+ numSeqs++;
+
+ map<string, int>::iterator childPointer;
+
+ int currentNode = 0;
+ string taxon;
+
+ int level = 0;
+
+ while (seqTaxonomy != "") {
+
+ if (m->control_pressed) { return 0; }
+
+ //somehow the parent is getting one too many accnos
+ //use print to reassign the taxa id
+ taxon = getNextTaxon(seqTaxonomy);
+
+ childPointer = tree[currentNode].children.find(taxon);
+
+ if(childPointer != tree[currentNode].children.end()){ //if the node already exists, update count and move on
+ if (groupmap != NULL) {
+
+ map<string, bool> containsGroup;
+ for (int j = 0; j < groupmap->namesOfGroups.size(); j++) {
+ containsGroup[groupmap->namesOfGroups[j]] = false;
+ }
+
+ for (int k = 0; k < names.size(); k++) {
+ //find out the sequences group
+ string group = groupmap->getGroup(names[k]);
+
+ if (group == "not found") { m->mothurOut(names[k] + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine(); }
+ else {
+ containsGroup[group] = true;
+ }
+ }
+
+ for (map<string, bool>::iterator itGroup = containsGroup.begin(); itGroup != containsGroup.end(); itGroup++) {
+ if (itGroup->second == true) {
+ tree[childPointer->second].groupCount[itGroup->first]++;
+ }
+ }
+
+ }
+
+ tree[childPointer->second].total++;
+
+ currentNode = childPointer->second;
+ }else{
+ if (ignore) {
+
+ tree.push_back(rawTaxNode(taxon));
+ int index = tree.size() - 1;
+
+ tree[index].parent = currentNode;
+ tree[index].level = (level+1);
+ tree[index].total = 1;
+ tree[currentNode].children[taxon] = index;
+
+ //initialize groupcounts
+ if (groupmap != NULL) {
+ map<string, bool> containsGroup;
+ for (int j = 0; j < groupmap->namesOfGroups.size(); j++) {
+ tree[index].groupCount[groupmap->namesOfGroups[j]] = 0;
+ containsGroup[groupmap->namesOfGroups[j]] = false;
+ }
+
+
+ for (int k = 0; k < names.size(); k++) {
+ //find out the sequences group
+ string group = groupmap->getGroup(names[k]);
+
+ if (group == "not found") { m->mothurOut(names[k] + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine(); }
+ else {
+ containsGroup[group] = true;
+ }
+ }
+
+ for (map<string, bool>::iterator itGroup = containsGroup.begin(); itGroup != containsGroup.end(); itGroup++) {
+ if (itGroup->second == true) {
+ tree[index].groupCount[itGroup->first]++;
+ }
+ }
+ }
+
+ currentNode = index;
+
+ }else{ //otherwise, error
+ m->mothurOut("Warning: cannot find taxon " + taxon + " in reference taxonomy tree at level " + toString(tree[currentNode].level) + ". This may cause totals of daughter levels not to add up in summary file."); m->mothurOutEndLine();
+ break;
+ }
+ }
+
+ level++;
+
+ if ((seqTaxonomy == "") && (level < maxLevel)) { //if you think you are done and you are not.
+ for (int k = level; k < maxLevel; k++) { seqTaxonomy += "unclassified;"; }
+ }
+ }
+ return 0;
}
catch(exception& e) {
m->errorOut(e, "PhyloSummary", "addSeqToTree");
exit(1);
}
}
+
/**************************************************************************************************/
void PhyloSummary::assignRank(int index){
void PhyloSummary::print(ofstream& out){
try {
+
+ if (ignore) { assignRank(0); }
+
//print labels
out << "taxlevel\t rankID\t taxon\t daughterlevels\t total\t";
if (groupmap != NULL) {
+ //so the labels match the counts below, since the map sorts them automatically...
+ //sort(groupmap->namesOfGroups.begin(), groupmap->namesOfGroups.end());
+
for (int i = 0; i < groupmap->namesOfGroups.size(); i++) {
out << groupmap->namesOfGroups[i] << '\t';
}
map<string, int>::iterator itGroup;
if (groupmap != NULL) {
- for (itGroup = tree[0].groupCount.begin(); itGroup != tree[0].groupCount.end(); itGroup++) {
- out << itGroup->second << '\t';
- }
+ //for (itGroup = tree[0].groupCount.begin(); itGroup != tree[0].groupCount.end(); itGroup++) {
+ // out << itGroup->second << '\t';
+ //}
+ for (int i = 0; i < groupmap->namesOfGroups.size(); i++) { out << tree[0].groupCount[groupmap->namesOfGroups[i]] << '\t'; }
}
out << endl;
map<string, int>::iterator itGroup;
if (groupmap != NULL) {
- for (itGroup = tree[it->second].groupCount.begin(); itGroup != tree[it->second].groupCount.end(); itGroup++) {
- out << itGroup->second << '\t';
- }
+ //for (itGroup = tree[it->second].groupCount.begin(); itGroup != tree[it->second].groupCount.end(); itGroup++) {
+ // out << itGroup->second << '\t';
+ //}
+ for (int i = 0; i < groupmap->namesOfGroups.size(); i++) { out << tree[it->second].groupCount[groupmap->namesOfGroups[i]] << '\t'; }
}
out << endl;
}
/**************************************************************************************************/
void PhyloSummary::readTreeStruct(ifstream& in){
try {
+
+ //read version
+ string line = m->getline(in); m->gobble(in);
+
int num;
- in >> num; gobble(in);
+ in >> num; m->gobble(in);
tree.resize(num);
+
+ in >> maxLevel; m->gobble(in);
//read the tree file
for (int i = 0; i < tree.size(); i++) {
tree[i].total = 0;
- gobble(in);
+ m->gobble(in);
- if (tree[i].level > maxLevel) { maxLevel = tree[i].level; }
+ //if (tree[i].level > maxLevel) { maxLevel = tree[i].level; }
}
}