X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=phylotree.cpp;h=2e3136e1f42e1a93d15fcab921b2e612f55f3997;hp=3dde18680c625eb816230a8d13774ccfc47032cf;hb=b206f634aae1b4ce13978d203247fb64757d5482;hpb=f687723a8357916e86a05116978e6869b039ce36 diff --git a/phylotree.cpp b/phylotree.cpp index 3dde186..2e3136e 100644 --- a/phylotree.cpp +++ b/phylotree.cpp @@ -75,7 +75,7 @@ PhyloTree::PhyloTree(ifstream& in, string filename){ for (int i = 0; i < numGenus; i++) { iss >> gnode >> gsize; m->gobble(iss); - uniqueTaxonomies[gnode] = gnode; + uniqueTaxonomies.insert(gnode); totals.push_back(gsize); } @@ -102,7 +102,7 @@ PhyloTree::PhyloTree(ifstream& in, string filename){ for (int i = 0; i < numGenus; i++) { in >> gnode >> gsize; m->gobble(in); - uniqueTaxonomies[gnode] = gnode; + uniqueTaxonomies.insert(gnode); totals.push_back(gsize); } @@ -233,6 +233,48 @@ string PhyloTree::getNextTaxon(string& heirarchy, string seqname){ exit(1); } } +/**************************************************************************************************/ + +vector PhyloTree::getSeqs(string seqTaxonomy){ + try { + string taxCopy = seqTaxonomy; + vector names; + map::iterator childPointer; + + int currentNode = 0; + + m->removeConfidences(seqTaxonomy); + + string taxon; + while(seqTaxonomy != ""){ + + if (m->control_pressed) { return names; } + + taxon = getNextTaxon(seqTaxonomy, ""); + + if (m->debug) { m->mothurOut(taxon +'\n'); } + + if (taxon == "") { m->mothurOut(taxCopy + " has an error in the taxonomy. This may be due to a ;;"); m->mothurOutEndLine(); break; } + + childPointer = tree[currentNode].children.find(taxon); + + if(childPointer != tree[currentNode].children.end()){ //if the node already exists, move on + currentNode = childPointer->second; + } + else{ //otherwise, error this taxonomy is not in tree + m->mothurOut("[ERROR]: " + taxCopy + " is not in taxonomy tree, please correct."); m->mothurOutEndLine(); m->control_pressed = true; return names; + } + + if (seqTaxonomy == "") { names = tree[currentNode].accessions; } + } + + return names; + } + catch(exception& e) { + m->errorOut(e, "PhyloTree", "getSeqs"); + exit(1); + } +} /**************************************************************************************************/ @@ -259,8 +301,10 @@ int PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){ //somehow the parent is getting one too many accnos //use print to reassign the taxa id taxon = getNextTaxon(seqTaxonomy, seqName); + + if (m->debug) { m->mothurOut(seqName +'\t' + taxon +'\n'); } - if (taxon == "") { m->mothurOut(seqName + " has an error in the taxonomy. This may be due to a ;;"); m->mothurOutEndLine(); if (currentNode != 0) { uniqueTaxonomies[currentNode] = currentNode; } break; } + if (taxon == "") { m->mothurOut(seqName + " has an error in the taxonomy. This may be due to a ;;"); m->mothurOutEndLine(); if (currentNode != 0) { uniqueTaxonomies.insert(currentNode); } break; } childPointer = tree[currentNode].children.find(taxon); @@ -280,7 +324,7 @@ int PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){ name2Taxonomy[seqName] = currentNode; } - if (seqTaxonomy == "") { uniqueTaxonomies[currentNode] = currentNode; } + if (seqTaxonomy == "") { uniqueTaxonomies.insert(currentNode); } } return 0; @@ -295,9 +339,16 @@ vector PhyloTree::getGenusNodes() { try { genusIndex.clear(); //generate genusIndexes - map::iterator it2; - for (it2=uniqueTaxonomies.begin(); it2!=uniqueTaxonomies.end(); it2++) { genusIndex.push_back(it2->first); } - + set::iterator it2; + map temp; + for (it2=uniqueTaxonomies.begin(); it2!=uniqueTaxonomies.end(); it2++) { genusIndex.push_back(*it2); temp[*it2] = genusIndex.size()-1; } + + for (map::iterator itName = name2Taxonomy.begin(); itName != name2Taxonomy.end(); itName++) { + map::iterator itTemp = temp.find(itName->second); + if (itTemp != temp.end()) { name2GenusNodeIndex[itName->first] = itTemp->second; } + else { m->mothurOut("[ERROR]: trouble making name2GenusNodeIndex, aborting.\n"); m->control_pressed = true; } + } + return genusIndex; } catch(exception& e) { @@ -334,6 +385,9 @@ void PhyloTree::assignHeirarchyIDs(int index){ int counter = 1; for(it=tree[index].children.begin();it!=tree[index].children.end();it++){ + + if (m->debug) { m->mothurOut(toString(index) +'\t' + tree[it->second].name +'\n'); } + tree[it->second].heirarchyID = tree[index].heirarchyID + '.' + toString(counter); counter++; tree[it->second].level = tree[index].level + 1; @@ -392,6 +446,8 @@ void PhyloTree::binUnclassified(string file){ } } + if (m->debug) { m->mothurOut("maxLevel = " + toString(maxLevel) +'\n'); } + int copyNodes = copy.size(); //go through the seqs and if a sequence finest taxon is not the same level as the most finely defined taxon then classify it as unclassified where necessary @@ -402,11 +458,14 @@ void PhyloTree::binUnclassified(string file){ int level = copy[itLeaf->second].level; int currentNode = itLeaf->second; + + if (m->debug) { m->mothurOut(copy[currentNode].name +'\n'); } //this sequence is unclassified at some levels while(level < maxLevel){ level++; + if (m->debug) { m->mothurOut("level = " + toString(level) +'\n'); } string taxon = "unclassified"; @@ -541,8 +600,8 @@ void PhyloTree::printTreeNodes(string treefilename) { //print genus nodes outTree << endl << uniqueTaxonomies.size() << endl; - map::iterator it2; - for (it2=uniqueTaxonomies.begin(); it2!=uniqueTaxonomies.end(); it2++) { outTree << it2->first << '\t' << tree[it2->first].accessions.size() << endl; } + set::iterator it2; + for (it2=uniqueTaxonomies.begin(); it2!=uniqueTaxonomies.end(); it2++) { outTree << *it2 << '\t' << tree[*it2].accessions.size() << endl; } outTree << endl; outTree.close(); @@ -594,12 +653,12 @@ string PhyloTree::getName(int i ){ } } /**************************************************************************************************/ -int PhyloTree::getIndex(string seqName){ +int PhyloTree::getGenusIndex(string seqName){ try { - map::iterator itFind = name2Taxonomy.find(seqName); + map::iterator itFind = name2GenusNodeIndex.find(seqName); - if (itFind != name2Taxonomy.end()) { return name2Taxonomy[seqName]; } - else { m->mothurOut("Cannot find " + seqName + ". Mismatch with taxonomy and template files. Cannot continue."); m->mothurOutEndLine(); exit(1);} + if (itFind != name2GenusNodeIndex.end()) { return itFind->second; } + else { m->mothurOut("Cannot find " + seqName + ". Could be a mismatch with taxonomy and template files. Cannot continue."); m->mothurOutEndLine(); exit(1);} } catch(exception& e) { m->errorOut(e, "PhyloTree", "get");