X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=phylosummary.cpp;fp=phylosummary.cpp;h=0000000000000000000000000000000000000000;hb=4a877efa127e56e81a21f53cfdbbfd3bfbe8c4ff;hp=2f565150aabeec35cc8a3c03a459d8b85090550b;hpb=a6cf29fa4dac0909c7582cb1094151d34093ee76;p=mothur.git diff --git a/phylosummary.cpp b/phylosummary.cpp deleted file mode 100644 index 2f56515..0000000 --- a/phylosummary.cpp +++ /dev/null @@ -1,502 +0,0 @@ -/* - * rawTrainingDataMaker.cpp - * Mothur - * - * Created by westcott on 4/21/10. - * Copyright 2010 Schloss Lab. All rights reserved. - * - */ - -#include "phylosummary.h" - -/**************************************************************************************************/ - -PhyloSummary::PhyloSummary(string refTfile, string groupFile){ - try { - m = MothurOut::getInstance(); - maxLevel = 0; - ignore = false; - - if (groupFile != "") { - groupmap = new GroupMap(groupFile); - groupmap->readMap(); - }else{ - groupmap = NULL; - } - - //check for necessary files - string taxFileNameTest = m->getFullPathName((refTfile.substr(0,refTfile.find_last_of(".")+1) + "tree.sum")); - ifstream FileTest(taxFileNameTest.c_str()); - - if (!FileTest) { - m->mothurOut("Error: can't find " + taxFileNameTest + "."); m->mothurOutEndLine(); exit(1); - }else{ - readTreeStruct(FileTest); - } - - tree[0].rank = "0"; - assignRank(0); - - } - catch(exception& e) { - m->errorOut(e, "PhyloSummary", "PhyloSummary"); - exit(1); - } -} - -/**************************************************************************************************/ - -PhyloSummary::PhyloSummary(string groupFile){ - try { - m = MothurOut::getInstance(); - maxLevel = 0; - ignore = true; - - if (groupFile != "") { - groupmap = new GroupMap(groupFile); - groupmap->readMap(); - }else{ - groupmap = NULL; - } - - tree.push_back(rawTaxNode("Root")); - tree[0].rank = "0"; - - - } - catch(exception& e) { - m->errorOut(e, "PhyloSummary", "PhyloSummary"); - exit(1); - } -} -/**************************************************************************************************/ - -int PhyloSummary::summarize(string userTfile){ - try { - - ifstream in; - m->openInputFile(userTfile, in); - - //read in users taxonomy file and add sequences to tree - string name, tax; - int numSeqs = 0; - while(!in.eof()){ - in >> name >> tax; m->gobble(in); - - addSeqToTree(name, tax); - numSeqs++; - - if (m->control_pressed) { break; } - } - in.close(); - - return numSeqs; - } - catch(exception& e) { - m->errorOut(e, "PhyloSummary", "summarize"); - exit(1); - } -} - -/**************************************************************************************************/ - -string PhyloSummary::getNextTaxon(string& heirarchy){ - try { - string currentLevel = ""; - if(heirarchy != ""){ - int pos = heirarchy.find_first_of(';'); - currentLevel=heirarchy.substr(0,pos); - if (pos != (heirarchy.length()-1)) { heirarchy=heirarchy.substr(pos+1); } - else { heirarchy = ""; } - } - return currentLevel; - } - catch(exception& e) { - m->errorOut(e, "PhyloSummary", "getNextTaxon"); - exit(1); - } -} - -/**************************************************************************************************/ - -int PhyloSummary::addSeqToTree(string seqName, string seqTaxonomy){ - try { - - numSeqs++; - - map::iterator childPointer; - - int currentNode = 0; - string taxon; - - int level = 0; - - //are there confidence scores, if so remove them - if (seqTaxonomy.find_first_of('(') != -1) { m->removeConfidences(seqTaxonomy); } - - while (seqTaxonomy != "") { - - if (m->control_pressed) { return 0; } - - //somehow the parent is getting one too many accnos - //use print to reassign the taxa id - taxon = getNextTaxon(seqTaxonomy); - - childPointer = tree[currentNode].children.find(taxon); - - if(childPointer != tree[currentNode].children.end()){ //if the node already exists, update count and move on - if (groupmap != NULL) { - //find out the sequences group - string group = groupmap->getGroup(seqName); - - if (group == "not found") { m->mothurOut("[WARNING]: " + seqName + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine(); } - - //do you have a count for this group? - map::iterator itGroup = tree[childPointer->second].groupCount.find(group); - - //if yes, increment it - there should not be a case where we can't find it since we load group in read - if (itGroup != tree[childPointer->second].groupCount.end()) { - tree[childPointer->second].groupCount[group]++; - } - } - - tree[childPointer->second].total++; - - currentNode = childPointer->second; - }else{ - if (ignore) { - - tree.push_back(rawTaxNode(taxon)); - int index = tree.size() - 1; - - tree[index].parent = currentNode; - tree[index].level = (level+1); - tree[index].total = 1; - tree[currentNode].children[taxon] = index; - - //initialize groupcounts - if (groupmap != NULL) { - vector mGroups = groupmap->getNamesOfGroups(); - for (int j = 0; j < mGroups.size(); j++) { - tree[index].groupCount[mGroups[j]] = 0; - } - - //find out the sequences group - string group = groupmap->getGroup(seqName); - - if (group == "not found") { m->mothurOut("[WARNING]: " + seqName + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine(); } - - //do you have a count for this group? - map::iterator itGroup = tree[index].groupCount.find(group); - - //if yes, increment it - there should not be a case where we can't find it since we load group in read - if (itGroup != tree[index].groupCount.end()) { - tree[index].groupCount[group]++; - } - } - - currentNode = index; - - }else{ //otherwise, error - m->mothurOut("Warning: cannot find taxon " + taxon + " in reference taxonomy tree at level " + toString(tree[currentNode].level) + " for " + seqName + ". This may cause totals of daughter levels not to add up in summary file."); m->mothurOutEndLine(); - break; - } - } - - level++; - - if ((seqTaxonomy == "") && (level < maxLevel)) { //if you think you are done and you are not. - for (int k = level; k < maxLevel; k++) { seqTaxonomy += "unclassified;"; } - } - } - return 0; - } - catch(exception& e) { - m->errorOut(e, "PhyloSummary", "addSeqToTree"); - exit(1); - } -} -/**************************************************************************************************/ - -int PhyloSummary::addSeqToTree(string seqTaxonomy, vector names){ - try { - numSeqs++; - - map::iterator childPointer; - - int currentNode = 0; - string taxon; - - int level = 0; - - //are there confidence scores, if so remove them - if (seqTaxonomy.find_first_of('(') != -1) { m->removeConfidences(seqTaxonomy); } - - while (seqTaxonomy != "") { - - if (m->control_pressed) { return 0; } - - //somehow the parent is getting one too many accnos - //use print to reassign the taxa id - taxon = getNextTaxon(seqTaxonomy); - - childPointer = tree[currentNode].children.find(taxon); - - if(childPointer != tree[currentNode].children.end()){ //if the node already exists, update count and move on - if (groupmap != NULL) { - - map containsGroup; - vector mGroups = groupmap->getNamesOfGroups(); - for (int j = 0; j < mGroups.size(); j++) { - containsGroup[mGroups[j]] = false; - } - - for (int k = 0; k < names.size(); k++) { - //find out the sequences group - string group = groupmap->getGroup(names[k]); - - if (group == "not found") { m->mothurOut("[WARNING]: " + names[k] + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine(); } - else { - containsGroup[group] = true; - } - } - - for (map::iterator itGroup = containsGroup.begin(); itGroup != containsGroup.end(); itGroup++) { - if (itGroup->second == true) { - tree[childPointer->second].groupCount[itGroup->first]++; - } - } - - } - - tree[childPointer->second].total++; - - currentNode = childPointer->second; - }else{ - if (ignore) { - - tree.push_back(rawTaxNode(taxon)); - int index = tree.size() - 1; - - tree[index].parent = currentNode; - tree[index].level = (level+1); - tree[index].total = 1; - tree[currentNode].children[taxon] = index; - - //initialize groupcounts - if (groupmap != NULL) { - map containsGroup; - vector mGroups = groupmap->getNamesOfGroups(); - for (int j = 0; j < mGroups.size(); j++) { - tree[index].groupCount[mGroups[j]] = 0; - containsGroup[mGroups[j]] = false; - } - - - for (int k = 0; k < names.size(); k++) { - //find out the sequences group - string group = groupmap->getGroup(names[k]); - - if (group == "not found") { m->mothurOut("[WARNING]: " + names[k] + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine(); } - else { - containsGroup[group] = true; - } - } - - for (map::iterator itGroup = containsGroup.begin(); itGroup != containsGroup.end(); itGroup++) { - if (itGroup->second == true) { - tree[index].groupCount[itGroup->first]++; - } - } - } - - currentNode = index; - - }else{ //otherwise, error - m->mothurOut("Warning: cannot find taxon " + taxon + " in reference taxonomy tree at level " + toString(tree[currentNode].level) + ". This may cause totals of daughter levels not to add up in summary file."); m->mothurOutEndLine(); - break; - } - } - - level++; - - if ((seqTaxonomy == "") && (level < maxLevel)) { //if you think you are done and you are not. - for (int k = level; k < maxLevel; k++) { seqTaxonomy += "unclassified;"; } - } - } - return 0; - } - catch(exception& e) { - m->errorOut(e, "PhyloSummary", "addSeqToTree"); - exit(1); - } -} - -/**************************************************************************************************/ - -void PhyloSummary::assignRank(int index){ - try { - map::iterator it; - int counter = 1; - - for(it=tree[index].children.begin();it!=tree[index].children.end();it++){ - tree[it->second].rank = tree[index].rank + '.' + toString(counter); - counter++; - - assignRank(it->second); - } - } - catch(exception& e) { - m->errorOut(e, "PhyloSummary", "assignRank"); - exit(1); - } -} -/**************************************************************************************************/ - -void PhyloSummary::print(ofstream& out){ - try { - - if (ignore) { assignRank(0); } - - //print labels - out << "taxlevel\t rankID\t taxon\t daughterlevels\t total\t"; - if (groupmap != NULL) { - //so the labels match the counts below, since the map sorts them automatically... - //sort(groupmap->namesOfGroups.begin(), groupmap->namesOfGroups.end()); - vector mGroups = groupmap->getNamesOfGroups(); - for (int i = 0; i < mGroups.size(); i++) { - out << mGroups[i] << '\t'; - } - } - - out << endl; - - int totalChildrenInTree = 0; - map::iterator itGroup; - - map::iterator it; - for(it=tree[0].children.begin();it!=tree[0].children.end();it++){ - if (tree[it->second].total != 0) { - totalChildrenInTree++; - tree[0].total += tree[it->second].total; - - if (groupmap != NULL) { - vector mGroups = groupmap->getNamesOfGroups(); - for (int i = 0; i < mGroups.size(); i++) { tree[0].groupCount[mGroups[i]] += tree[it->second].groupCount[mGroups[i]]; } - } - } - } - - //print root - out << tree[0].level << "\t" << tree[0].rank << "\t" << tree[0].name << "\t" << totalChildrenInTree << "\t" << tree[0].total << "\t"; - - - if (groupmap != NULL) { - //for (itGroup = tree[0].groupCount.begin(); itGroup != tree[0].groupCount.end(); itGroup++) { - // out << itGroup->second << '\t'; - //} - vector mGroups = groupmap->getNamesOfGroups(); - for (int i = 0; i < mGroups.size(); i++) { out << tree[0].groupCount[mGroups[i]] << '\t'; } - } - out << endl; - - //print rest - print(0, out); - - } - catch(exception& e) { - m->errorOut(e, "PhyloSummary", "print"); - exit(1); - } -} - -/**************************************************************************************************/ - -void PhyloSummary::print(int i, ofstream& out){ - try { - map::iterator it; - for(it=tree[i].children.begin();it!=tree[i].children.end();it++){ - - if (tree[it->second].total != 0) { - - int totalChildrenInTree = 0; - - map::iterator it2; - for(it2=tree[it->second].children.begin();it2!=tree[it->second].children.end();it2++){ - if (tree[it2->second].total != 0) { totalChildrenInTree++; } - } - - out << tree[it->second].level << "\t" << tree[it->second].rank << "\t" << tree[it->second].name << "\t" << totalChildrenInTree << "\t" << tree[it->second].total << "\t"; - - map::iterator itGroup; - if (groupmap != NULL) { - //for (itGroup = tree[it->second].groupCount.begin(); itGroup != tree[it->second].groupCount.end(); itGroup++) { - // out << itGroup->second << '\t'; - //} - vector mGroups = groupmap->getNamesOfGroups(); - for (int i = 0; i < mGroups.size(); i++) { out << tree[it->second].groupCount[mGroups[i]] << '\t'; } - } - out << endl; - - } - - print(it->second, out); - } - } - catch(exception& e) { - m->errorOut(e, "PhyloSummary", "print"); - exit(1); - } -} -/**************************************************************************************************/ -void PhyloSummary::readTreeStruct(ifstream& in){ - try { - - //read version - string line = m->getline(in); m->gobble(in); - - int num; - - in >> num; m->gobble(in); - - tree.resize(num); - - in >> maxLevel; m->gobble(in); - - //read the tree file - for (int i = 0; i < tree.size(); i++) { - - in >> tree[i].level >> tree[i].name >> num; //num contains the number of children tree[i] has - - //set children - string childName; - int childIndex; - for (int j = 0; j < num; j++) { - in >> childName >> childIndex; - tree[i].children[childName] = childIndex; - } - - //initialize groupcounts - if (groupmap != NULL) { - for (int j = 0; j < (groupmap->getNamesOfGroups()).size(); j++) { - tree[i].groupCount[(groupmap->getNamesOfGroups())[j]] = 0; - } - } - - tree[i].total = 0; - - m->gobble(in); - - //if (tree[i].level > maxLevel) { maxLevel = tree[i].level; } - } - - } - catch(exception& e) { - m->errorOut(e, "PhyloSummary", "readTreeStruct"); - exit(1); - } -} -/**************************************************************************************************/ - - -