X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=phylosummary.cpp;h=5f7bbc3c73a2161417a1841f567719a4bd1f8c4a;hb=c7e8c2d15bd7cedcfdf18675cb0ea1a0dcd0e3c0;hp=26ae3a83df25e18c8d2d59344b12f0d9c471e401;hpb=5694c92fbf646fe01abc87bde2af59e14a9a56b6;p=mothur.git diff --git a/phylosummary.cpp b/phylosummary.cpp index 26ae3a8..5f7bbc3 100644 --- a/phylosummary.cpp +++ b/phylosummary.cpp @@ -25,7 +25,7 @@ PhyloSummary::PhyloSummary(string refTfile, string groupFile){ } //check for necessary files - string taxFileNameTest = refTfile.substr(0,refTfile.find_last_of(".")+1) + "tree.sum"; + string taxFileNameTest = m->getFullPathName((refTfile.substr(0,refTfile.find_last_of(".")+1) + "tree.sum")); ifstream FileTest(taxFileNameTest.c_str()); if (!FileTest) { @@ -71,22 +71,18 @@ PhyloSummary::PhyloSummary(string groupFile){ } /**************************************************************************************************/ -void PhyloSummary::summarize(string userTfile){ +int PhyloSummary::summarize(string userTfile){ try { - - ifstream in; - m->openInputFile(userTfile, in); - - //read in users taxonomy file and add sequences to tree - string name, tax; - while(!in.eof()){ - in >> name >> tax; m->gobble(in); - - addSeqToTree(name, tax); - - if (m->control_pressed) { break; } - } - in.close(); + map temp; + m->readTax(userTfile, temp); + + for (map::iterator itTemp = temp.begin(); itTemp != temp.end();) { + addSeqToTree(itTemp->first, itTemp->second); + numSeqs++; + temp.erase(itTemp++); + } + + return numSeqs; } catch(exception& e) { m->errorOut(e, "PhyloSummary", "summarize"); @@ -117,6 +113,7 @@ string PhyloSummary::getNextTaxon(string& heirarchy){ int PhyloSummary::addSeqToTree(string seqName, string seqTaxonomy){ try { + numSeqs++; map::iterator childPointer; @@ -126,6 +123,9 @@ int PhyloSummary::addSeqToTree(string seqName, string seqTaxonomy){ int level = 0; + //are there confidence scores, if so remove them + if (seqTaxonomy.find_first_of('(') != -1) { m->removeConfidences(seqTaxonomy); } + while (seqTaxonomy != "") { if (m->control_pressed) { return 0; } @@ -141,7 +141,7 @@ int PhyloSummary::addSeqToTree(string seqName, string seqTaxonomy){ //find out the sequences group string group = groupmap->getGroup(seqName); - if (group == "not found") { m->mothurOut(seqName + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine(); } + if (group == "not found") { m->mothurOut("[WARNING]: " + seqName + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine(); } //do you have a count for this group? map::iterator itGroup = tree[childPointer->second].groupCount.find(group); @@ -168,14 +168,15 @@ int PhyloSummary::addSeqToTree(string seqName, string seqTaxonomy){ //initialize groupcounts if (groupmap != NULL) { - for (int j = 0; j < groupmap->namesOfGroups.size(); j++) { - tree[index].groupCount[groupmap->namesOfGroups[j]] = 0; + vector mGroups = groupmap->getNamesOfGroups(); + for (int j = 0; j < mGroups.size(); j++) { + tree[index].groupCount[mGroups[j]] = 0; } //find out the sequences group string group = groupmap->getGroup(seqName); - if (group == "not found") { m->mothurOut(seqName + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine(); } + if (group == "not found") { m->mothurOut("[WARNING]: " + seqName + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine(); } //do you have a count for this group? map::iterator itGroup = tree[index].groupCount.find(group); @@ -220,6 +221,9 @@ int PhyloSummary::addSeqToTree(string seqTaxonomy, vector names){ int level = 0; + //are there confidence scores, if so remove them + if (seqTaxonomy.find_first_of('(') != -1) { m->removeConfidences(seqTaxonomy); } + while (seqTaxonomy != "") { if (m->control_pressed) { return 0; } @@ -234,15 +238,16 @@ int PhyloSummary::addSeqToTree(string seqTaxonomy, vector names){ if (groupmap != NULL) { map containsGroup; - for (int j = 0; j < groupmap->namesOfGroups.size(); j++) { - containsGroup[groupmap->namesOfGroups[j]] = false; + vector mGroups = groupmap->getNamesOfGroups(); + for (int j = 0; j < mGroups.size(); j++) { + containsGroup[mGroups[j]] = false; } for (int k = 0; k < names.size(); k++) { //find out the sequences group string group = groupmap->getGroup(names[k]); - if (group == "not found") { m->mothurOut(names[k] + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine(); } + if (group == "not found") { m->mothurOut("[WARNING]: " + names[k] + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine(); } else { containsGroup[group] = true; } @@ -273,9 +278,10 @@ int PhyloSummary::addSeqToTree(string seqTaxonomy, vector names){ //initialize groupcounts if (groupmap != NULL) { map containsGroup; - for (int j = 0; j < groupmap->namesOfGroups.size(); j++) { - tree[index].groupCount[groupmap->namesOfGroups[j]] = 0; - containsGroup[groupmap->namesOfGroups[j]] = false; + vector mGroups = groupmap->getNamesOfGroups(); + for (int j = 0; j < mGroups.size(); j++) { + tree[index].groupCount[mGroups[j]] = 0; + containsGroup[mGroups[j]] = false; } @@ -283,7 +289,7 @@ int PhyloSummary::addSeqToTree(string seqTaxonomy, vector names){ //find out the sequences group string group = groupmap->getGroup(names[k]); - if (group == "not found") { m->mothurOut(names[k] + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine(); } + if (group == "not found") { m->mothurOut("[WARNING]: " + names[k] + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine(); } else { containsGroup[group] = true; } @@ -343,36 +349,46 @@ void PhyloSummary::print(ofstream& out){ try { if (ignore) { assignRank(0); } - + //print labels out << "taxlevel\t rankID\t taxon\t daughterlevels\t total\t"; if (groupmap != NULL) { //so the labels match the counts below, since the map sorts them automatically... //sort(groupmap->namesOfGroups.begin(), groupmap->namesOfGroups.end()); - - for (int i = 0; i < groupmap->namesOfGroups.size(); i++) { - out << groupmap->namesOfGroups[i] << '\t'; + vector mGroups = groupmap->getNamesOfGroups(); + for (int i = 0; i < mGroups.size(); i++) { + out << mGroups[i] << '\t'; } } out << endl; int totalChildrenInTree = 0; + map::iterator itGroup; map::iterator it; for(it=tree[0].children.begin();it!=tree[0].children.end();it++){ - if (tree[it->second].total != 0) { totalChildrenInTree++; } + if (tree[it->second].total != 0) { + totalChildrenInTree++; + tree[0].total += tree[it->second].total; + + if (groupmap != NULL) { + vector mGroups = groupmap->getNamesOfGroups(); + for (int i = 0; i < mGroups.size(); i++) { tree[0].groupCount[mGroups[i]] += tree[it->second].groupCount[mGroups[i]]; } + } + } } //print root out << tree[0].level << "\t" << tree[0].rank << "\t" << tree[0].name << "\t" << totalChildrenInTree << "\t" << tree[0].total << "\t"; - map::iterator itGroup; + if (groupmap != NULL) { //for (itGroup = tree[0].groupCount.begin(); itGroup != tree[0].groupCount.end(); itGroup++) { // out << itGroup->second << '\t'; //} - for (int i = 0; i < groupmap->namesOfGroups.size(); i++) { out << tree[0].groupCount[groupmap->namesOfGroups[i]] << '\t'; } + vector mGroups = groupmap->getNamesOfGroups(); + for (int i = 0; i < mGroups.size(); i++) { out << tree[0].groupCount[mGroups[i]] << '\t'; } } out << endl; @@ -409,9 +425,11 @@ void PhyloSummary::print(int i, ofstream& out){ //for (itGroup = tree[it->second].groupCount.begin(); itGroup != tree[it->second].groupCount.end(); itGroup++) { // out << itGroup->second << '\t'; //} - for (int i = 0; i < groupmap->namesOfGroups.size(); i++) { out << tree[it->second].groupCount[groupmap->namesOfGroups[i]] << '\t'; } + vector mGroups = groupmap->getNamesOfGroups(); + for (int i = 0; i < mGroups.size(); i++) { out << tree[it->second].groupCount[mGroups[i]] << '\t'; } } out << endl; + } print(it->second, out); @@ -452,8 +470,8 @@ void PhyloSummary::readTreeStruct(ifstream& in){ //initialize groupcounts if (groupmap != NULL) { - for (int j = 0; j < groupmap->namesOfGroups.size(); j++) { - tree[i].groupCount[groupmap->namesOfGroups[j]] = 0; + for (int j = 0; j < (groupmap->getNamesOfGroups()).size(); j++) { + tree[i].groupCount[(groupmap->getNamesOfGroups())[j]] = 0; } } @@ -466,11 +484,10 @@ void PhyloSummary::readTreeStruct(ifstream& in){ } catch(exception& e) { - m->errorOut(e, "PhyloSummary", "print"); + m->errorOut(e, "PhyloSummary", "readTreeStruct"); exit(1); } } - /**************************************************************************************************/