X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=knn.cpp;h=81b21b265785c2f8a83392ee52e7aeffbc9d4370;hb=372fb21ea66ced432b109225851a1b80ef0491a3;hp=6053b6e47a1f16c1398c2c183cd40eef0aefeda0;hpb=55386dddad84cc1140d736cabaf4dd0ae16f2e01;p=mothur.git diff --git a/knn.cpp b/knn.cpp index 6053b6e..81b21b2 100644 --- a/knn.cpp +++ b/knn.cpp @@ -14,6 +14,7 @@ Knn::Knn(string tfile, string tempFile, string method, int kmerSize, float gapOp : Classify(), num(n), search(method) { try { threadID = tid; + shortcuts = true; //create search database and names vector generateDatabaseAndNames(tfile, tempFile, method, kmerSize, gapOpen, gapExtend, match, misMatch); @@ -72,11 +73,11 @@ string Knn::getTaxonomy(Sequence* seq) { } if (closestNames.size() == 0) { - m->mothurOut("Error: All the matches for sequence " + seq->getName() + " have been eliminated. " + seq->getName() + " will be disregarded."); m->mothurOutEndLine(); - tax = "bad seq"; + m->mothurOut("Error: All the matches for sequence " + seq->getName() + " have been eliminated. "); m->mothurOutEndLine(); + tax = "unknown;"; }else{ tax = findCommonTaxonomy(closestNames); - if (tax == "") { m->mothurOut("There are no common levels for sequence " + seq->getName() + ". " + seq->getName() + " will be disregarded."); m->mothurOutEndLine(); tax = "bad seq"; } + if (tax == "") { m->mothurOut("There are no common levels for sequence " + seq->getName() + ". "); m->mothurOutEndLine(); tax = "unknown;"; } } simpleTax = tax; @@ -90,7 +91,7 @@ string Knn::getTaxonomy(Sequence* seq) { /**************************************************************************************************/ string Knn::findCommonTaxonomy(vector closest) { try { - vector< vector > taxons; //taxon[0] = vector of taxonomy info for closest[0]. + /*vector< vector > taxons; //taxon[0] = vector of taxonomy info for closest[0]. //so if closest[0] taxonomy is Bacteria;Alphaproteobacteria;Rhizobiales;Azorhizobium_et_rel.;Methylobacterium_et_rel.;Bosea; //taxon[0][0] = Bacteria, taxon[0][1] = Alphaproteobacteria.... @@ -101,6 +102,7 @@ string Knn::findCommonTaxonomy(vector closest) { if (m->control_pressed) { return "control"; } string tax = taxonomy[closest[i]]; //we know its there since we checked in getTaxonomy + cout << tax << endl; taxons[i] = parseTax(tax); @@ -128,9 +130,54 @@ string Knn::findCommonTaxonomy(vector closest) { } break; } + }*/ + + string conTax; + + //create a tree containing sequences from this bin + PhyloTree* p = new PhyloTree(); + + for (int i = 0; i < closest.size(); i++) { + p->addSeqToTree(closest[i], taxonomy[closest[i]]); } - - return common; + + //build tree + p->assignHeirarchyIDs(0); + + TaxNode currentNode = p->get(0); + + //at each level + while (currentNode.children.size() != 0) { //you still have more to explore + + TaxNode bestChild; + int bestChildSize = 0; + + //go through children + for (map::iterator itChild = currentNode.children.begin(); itChild != currentNode.children.end(); itChild++) { + + TaxNode temp = p->get(itChild->second); + + //select child with largest accessions - most seqs assigned to it + if (temp.accessions.size() > bestChildSize) { + bestChild = p->get(itChild->second); + bestChildSize = temp.accessions.size(); + } + + } + + if (bestChildSize == closest.size()) { //if yes, add it + conTax += bestChild.name + ";"; + }else{ //if no, quit + break; + } + + //move down a level + currentNode = bestChild; + } + + delete p; + + return conTax; } catch(exception& e) { m->errorOut(e, "Knn", "findCommonTaxonomy");