X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=knn.cpp;h=81b21b265785c2f8a83392ee52e7aeffbc9d4370;hp=64b5b3a288db8ce2f1d6da7e08af90bd8e26e613;hb=df7e3ff9f68ef157b0328a2d353c3258c5d45d89;hpb=6f4b9401f7deb8aaf0d87659298308f4138cc3b0 diff --git a/knn.cpp b/knn.cpp index 64b5b3a..81b21b2 100644 --- a/knn.cpp +++ b/knn.cpp @@ -10,9 +10,12 @@ #include "knn.h" /**************************************************************************************************/ -Knn::Knn(string tfile, string tempFile, string method, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch, int n) -: Classify(), num(n) { +Knn::Knn(string tfile, string tempFile, string method, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch, int n, int tid) +: Classify(), num(n), search(method) { try { + threadID = tid; + shortcuts = true; + //create search database and names vector generateDatabaseAndNames(tfile, tempFile, method, kmerSize, gapOpen, gapExtend, match, misMatch); } @@ -22,13 +25,40 @@ Knn::Knn(string tfile, string tempFile, string method, int kmerSize, float gapOp } } /**************************************************************************************************/ +void Knn::setDistName(string s) { + try { + outDistName = s; + ofstream outDistance; + m->openOutputFile(outDistName, outDistance); + outDistance << "Name\tBestMatch\tDistance" << endl; + outDistance.close(); + } + catch(exception& e) { + m->errorOut(e, "Knn", "setDistName"); + exit(1); + } +} +/**************************************************************************************************/ +Knn::~Knn() { + try { + delete phyloTree; + if (database != NULL) { delete database; } + } + catch(exception& e) { + m->errorOut(e, "Knn", "~Knn"); + exit(1); + } +} +/**************************************************************************************************/ string Knn::getTaxonomy(Sequence* seq) { try { string tax; //use database to find closest seq vector closest = database->findClosestSequences(seq, num); - + + if (search == "distance") { ofstream outDistance; m->openOutputFileAppend(outDistName, outDistance); outDistance << seq->getName() << '\t' << database->getName(closest[0]) << '\t' << database->getSearchScore() << endl; outDistance.close(); } + if (m->control_pressed) { return tax; } vector closestNames; @@ -43,11 +73,11 @@ string Knn::getTaxonomy(Sequence* seq) { } if (closestNames.size() == 0) { - m->mothurOut("Error: All the matches for sequence " + seq->getName() + " have been eliminated. " + seq->getName() + " will be disregarded."); m->mothurOutEndLine(); - tax = "bad seq"; + m->mothurOut("Error: All the matches for sequence " + seq->getName() + " have been eliminated. "); m->mothurOutEndLine(); + tax = "unknown;"; }else{ tax = findCommonTaxonomy(closestNames); - if (tax == "") { m->mothurOut("There are no common levels for sequence " + seq->getName() + ". " + seq->getName() + " will be disregarded."); m->mothurOutEndLine(); tax = "bad seq"; } + if (tax == "") { m->mothurOut("There are no common levels for sequence " + seq->getName() + ". "); m->mothurOutEndLine(); tax = "unknown;"; } } simpleTax = tax; @@ -61,7 +91,7 @@ string Knn::getTaxonomy(Sequence* seq) { /**************************************************************************************************/ string Knn::findCommonTaxonomy(vector closest) { try { - vector< vector > taxons; //taxon[0] = vector of taxonomy info for closest[0]. + /*vector< vector > taxons; //taxon[0] = vector of taxonomy info for closest[0]. //so if closest[0] taxonomy is Bacteria;Alphaproteobacteria;Rhizobiales;Azorhizobium_et_rel.;Methylobacterium_et_rel.;Bosea; //taxon[0][0] = Bacteria, taxon[0][1] = Alphaproteobacteria.... @@ -72,6 +102,7 @@ string Knn::findCommonTaxonomy(vector closest) { if (m->control_pressed) { return "control"; } string tax = taxonomy[closest[i]]; //we know its there since we checked in getTaxonomy + cout << tax << endl; taxons[i] = parseTax(tax); @@ -99,9 +130,54 @@ string Knn::findCommonTaxonomy(vector closest) { } break; } + }*/ + + string conTax; + + //create a tree containing sequences from this bin + PhyloTree* p = new PhyloTree(); + + for (int i = 0; i < closest.size(); i++) { + p->addSeqToTree(closest[i], taxonomy[closest[i]]); } - - return common; + + //build tree + p->assignHeirarchyIDs(0); + + TaxNode currentNode = p->get(0); + + //at each level + while (currentNode.children.size() != 0) { //you still have more to explore + + TaxNode bestChild; + int bestChildSize = 0; + + //go through children + for (map::iterator itChild = currentNode.children.begin(); itChild != currentNode.children.end(); itChild++) { + + TaxNode temp = p->get(itChild->second); + + //select child with largest accessions - most seqs assigned to it + if (temp.accessions.size() > bestChildSize) { + bestChild = p->get(itChild->second); + bestChildSize = temp.accessions.size(); + } + + } + + if (bestChildSize == closest.size()) { //if yes, add it + conTax += bestChild.name + ";"; + }else{ //if no, quit + break; + } + + //move down a level + currentNode = bestChild; + } + + delete p; + + return conTax; } catch(exception& e) { m->errorOut(e, "Knn", "findCommonTaxonomy");