X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=phylosummary.cpp;h=26ae3a83df25e18c8d2d59344b12f0d9c471e401;hb=b663591fb57c9508f017fd0891911fd959530125;hp=1e8d1bc6c6b2497b85838aeebd4c31c30104a70d;hpb=cd9dbd8b53bbe32af3e9c6bead4aa6d796a278a5;p=mothur.git diff --git a/phylosummary.cpp b/phylosummary.cpp index 1e8d1bc..26ae3a8 100644 --- a/phylosummary.cpp +++ b/phylosummary.cpp @@ -15,6 +15,7 @@ PhyloSummary::PhyloSummary(string refTfile, string groupFile){ try { m = MothurOut::getInstance(); maxLevel = 0; + ignore = false; if (groupFile != "") { groupmap = new GroupMap(groupFile); @@ -42,18 +43,44 @@ PhyloSummary::PhyloSummary(string refTfile, string groupFile){ exit(1); } } + +/**************************************************************************************************/ + +PhyloSummary::PhyloSummary(string groupFile){ + try { + m = MothurOut::getInstance(); + maxLevel = 0; + ignore = true; + + if (groupFile != "") { + groupmap = new GroupMap(groupFile); + groupmap->readMap(); + }else{ + groupmap = NULL; + } + + tree.push_back(rawTaxNode("Root")); + tree[0].rank = "0"; + + + } + catch(exception& e) { + m->errorOut(e, "PhyloSummary", "PhyloSummary"); + exit(1); + } +} /**************************************************************************************************/ void PhyloSummary::summarize(string userTfile){ try { ifstream in; - openInputFile(userTfile, in); + m->openInputFile(userTfile, in); //read in users taxonomy file and add sequences to tree string name, tax; while(!in.eof()){ - in >> name >> tax; gobble(in); + in >> name >> tax; m->gobble(in); addSeqToTree(name, tax); @@ -114,21 +141,57 @@ int PhyloSummary::addSeqToTree(string seqName, string seqTaxonomy){ //find out the sequences group string group = groupmap->getGroup(seqName); + if (group == "not found") { m->mothurOut(seqName + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine(); } + //do you have a count for this group? - map::iterator itGroup = tree[currentNode].groupCount.find(group); + map::iterator itGroup = tree[childPointer->second].groupCount.find(group); //if yes, increment it - there should not be a case where we can't find it since we load group in read - if (itGroup != tree[currentNode].groupCount.end()) { - tree[currentNode].groupCount[group]++; + if (itGroup != tree[childPointer->second].groupCount.end()) { + tree[childPointer->second].groupCount[group]++; } } - tree[currentNode].total++; + tree[childPointer->second].total++; currentNode = childPointer->second; - }else{ //otherwise, create it - m->mothurOut("Error: cannot find taxonomy in tree for " + seqName + "."); m->mothurOutEndLine(); - seqTaxonomy = ""; + }else{ + if (ignore) { + + tree.push_back(rawTaxNode(taxon)); + int index = tree.size() - 1; + + tree[index].parent = currentNode; + tree[index].level = (level+1); + tree[index].total = 1; + tree[currentNode].children[taxon] = index; + + //initialize groupcounts + if (groupmap != NULL) { + for (int j = 0; j < groupmap->namesOfGroups.size(); j++) { + tree[index].groupCount[groupmap->namesOfGroups[j]] = 0; + } + + //find out the sequences group + string group = groupmap->getGroup(seqName); + + if (group == "not found") { m->mothurOut(seqName + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine(); } + + //do you have a count for this group? + map::iterator itGroup = tree[index].groupCount.find(group); + + //if yes, increment it - there should not be a case where we can't find it since we load group in read + if (itGroup != tree[index].groupCount.end()) { + tree[index].groupCount[group]++; + } + } + + currentNode = index; + + }else{ //otherwise, error + m->mothurOut("Warning: cannot find taxon " + taxon + " in reference taxonomy tree at level " + toString(tree[currentNode].level) + " for " + seqName + ". This may cause totals of daughter levels not to add up in summary file."); m->mothurOutEndLine(); + break; + } } level++; @@ -137,13 +200,124 @@ int PhyloSummary::addSeqToTree(string seqName, string seqTaxonomy){ for (int k = level; k < maxLevel; k++) { seqTaxonomy += "unclassified;"; } } } + return 0; + } + catch(exception& e) { + m->errorOut(e, "PhyloSummary", "addSeqToTree"); + exit(1); + } +} +/**************************************************************************************************/ +int PhyloSummary::addSeqToTree(string seqTaxonomy, vector names){ + try { + numSeqs++; + + map::iterator childPointer; + + int currentNode = 0; + string taxon; + + int level = 0; + + while (seqTaxonomy != "") { + + if (m->control_pressed) { return 0; } + + //somehow the parent is getting one too many accnos + //use print to reassign the taxa id + taxon = getNextTaxon(seqTaxonomy); + + childPointer = tree[currentNode].children.find(taxon); + + if(childPointer != tree[currentNode].children.end()){ //if the node already exists, update count and move on + if (groupmap != NULL) { + + map containsGroup; + for (int j = 0; j < groupmap->namesOfGroups.size(); j++) { + containsGroup[groupmap->namesOfGroups[j]] = false; + } + + for (int k = 0; k < names.size(); k++) { + //find out the sequences group + string group = groupmap->getGroup(names[k]); + + if (group == "not found") { m->mothurOut(names[k] + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine(); } + else { + containsGroup[group] = true; + } + } + + for (map::iterator itGroup = containsGroup.begin(); itGroup != containsGroup.end(); itGroup++) { + if (itGroup->second == true) { + tree[childPointer->second].groupCount[itGroup->first]++; + } + } + + } + + tree[childPointer->second].total++; + + currentNode = childPointer->second; + }else{ + if (ignore) { + + tree.push_back(rawTaxNode(taxon)); + int index = tree.size() - 1; + + tree[index].parent = currentNode; + tree[index].level = (level+1); + tree[index].total = 1; + tree[currentNode].children[taxon] = index; + + //initialize groupcounts + if (groupmap != NULL) { + map containsGroup; + for (int j = 0; j < groupmap->namesOfGroups.size(); j++) { + tree[index].groupCount[groupmap->namesOfGroups[j]] = 0; + containsGroup[groupmap->namesOfGroups[j]] = false; + } + + + for (int k = 0; k < names.size(); k++) { + //find out the sequences group + string group = groupmap->getGroup(names[k]); + + if (group == "not found") { m->mothurOut(names[k] + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine(); } + else { + containsGroup[group] = true; + } + } + + for (map::iterator itGroup = containsGroup.begin(); itGroup != containsGroup.end(); itGroup++) { + if (itGroup->second == true) { + tree[index].groupCount[itGroup->first]++; + } + } + } + + currentNode = index; + + }else{ //otherwise, error + m->mothurOut("Warning: cannot find taxon " + taxon + " in reference taxonomy tree at level " + toString(tree[currentNode].level) + ". This may cause totals of daughter levels not to add up in summary file."); m->mothurOutEndLine(); + break; + } + } + + level++; + + if ((seqTaxonomy == "") && (level < maxLevel)) { //if you think you are done and you are not. + for (int k = level; k < maxLevel; k++) { seqTaxonomy += "unclassified;"; } + } + } + return 0; } catch(exception& e) { m->errorOut(e, "PhyloSummary", "addSeqToTree"); exit(1); } } + /**************************************************************************************************/ void PhyloSummary::assignRank(int index){ @@ -167,9 +341,15 @@ void PhyloSummary::assignRank(int index){ void PhyloSummary::print(ofstream& out){ try { + + if (ignore) { assignRank(0); } + //print labels - out << "taxlevel\t rank ID\t label\t daughterlevels\t total\t"; + out << "taxlevel\t rankID\t taxon\t daughterlevels\t total\t"; if (groupmap != NULL) { + //so the labels match the counts below, since the map sorts them automatically... + //sort(groupmap->namesOfGroups.begin(), groupmap->namesOfGroups.end()); + for (int i = 0; i < groupmap->namesOfGroups.size(); i++) { out << groupmap->namesOfGroups[i] << '\t'; } @@ -177,14 +357,22 @@ void PhyloSummary::print(ofstream& out){ out << endl; + int totalChildrenInTree = 0; + + map::iterator it; + for(it=tree[0].children.begin();it!=tree[0].children.end();it++){ + if (tree[it->second].total != 0) { totalChildrenInTree++; } + } + //print root - out << tree[0].level << "\t" << tree[0].rank << "\t" << tree[0].name << "\t" << tree[0].children.size() << "\t" << tree[0].total << "\t"; + out << tree[0].level << "\t" << tree[0].rank << "\t" << tree[0].name << "\t" << totalChildrenInTree << "\t" << tree[0].total << "\t"; map::iterator itGroup; if (groupmap != NULL) { - for (itGroup = tree[0].groupCount.begin(); itGroup != tree[0].groupCount.end(); itGroup++) { - out << itGroup->second << '\t'; - } + //for (itGroup = tree[0].groupCount.begin(); itGroup != tree[0].groupCount.end(); itGroup++) { + // out << itGroup->second << '\t'; + //} + for (int i = 0; i < groupmap->namesOfGroups.size(); i++) { out << tree[0].groupCount[groupmap->namesOfGroups[i]] << '\t'; } } out << endl; @@ -206,13 +394,22 @@ void PhyloSummary::print(int i, ofstream& out){ for(it=tree[i].children.begin();it!=tree[i].children.end();it++){ if (tree[it->second].total != 0) { - out << tree[it->second].level << "\t" << tree[it->second].rank << "\t" << tree[it->second].name << "\t" << tree[it->second].children.size() << "\t" << tree[it->second].total << "\t"; + + int totalChildrenInTree = 0; + + map::iterator it2; + for(it2=tree[it->second].children.begin();it2!=tree[it->second].children.end();it2++){ + if (tree[it2->second].total != 0) { totalChildrenInTree++; } + } + + out << tree[it->second].level << "\t" << tree[it->second].rank << "\t" << tree[it->second].name << "\t" << totalChildrenInTree << "\t" << tree[it->second].total << "\t"; map::iterator itGroup; if (groupmap != NULL) { - for (itGroup = tree[it->second].groupCount.begin(); itGroup != tree[it->second].groupCount.end(); itGroup++) { - out << itGroup->second << '\t'; - } + //for (itGroup = tree[it->second].groupCount.begin(); itGroup != tree[it->second].groupCount.end(); itGroup++) { + // out << itGroup->second << '\t'; + //} + for (int i = 0; i < groupmap->namesOfGroups.size(); i++) { out << tree[it->second].groupCount[groupmap->namesOfGroups[i]] << '\t'; } } out << endl; } @@ -228,11 +425,17 @@ void PhyloSummary::print(int i, ofstream& out){ /**************************************************************************************************/ void PhyloSummary::readTreeStruct(ifstream& in){ try { + + //read version + string line = m->getline(in); m->gobble(in); + int num; - in >> num; gobble(in); + in >> num; m->gobble(in); tree.resize(num); + + in >> maxLevel; m->gobble(in); //read the tree file for (int i = 0; i < tree.size(); i++) { @@ -256,9 +459,9 @@ void PhyloSummary::readTreeStruct(ifstream& in){ tree[i].total = 0; - gobble(in); + m->gobble(in); - if (tree[i].level > maxLevel) { maxLevel = tree[i].level; } + //if (tree[i].level > maxLevel) { maxLevel = tree[i].level; } } }