X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=knn.cpp;h=81b21b265785c2f8a83392ee52e7aeffbc9d4370;hp=706d2435117513d4a7149e0cc2a670e0e18dc7bb;hb=1a20e24ee786195ab0e1cccd4f5aede7a88f3f4e;hpb=ef3f6d42fe720cd6d91419e5e32f8c04d8765010 diff --git a/knn.cpp b/knn.cpp index 706d243..81b21b2 100644 --- a/knn.cpp +++ b/knn.cpp @@ -10,16 +10,56 @@ #include "knn.h" /**************************************************************************************************/ -Knn::Knn(string tfile, string tempFile, string method, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch, int n) -: Classify(tfile, tempFile, method, kmerSize, gapOpen, gapExtend, match, misMatch), num(n) {} +Knn::Knn(string tfile, string tempFile, string method, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch, int n, int tid) +: Classify(), num(n), search(method) { + try { + threadID = tid; + shortcuts = true; + + //create search database and names vector + generateDatabaseAndNames(tfile, tempFile, method, kmerSize, gapOpen, gapExtend, match, misMatch); + } + catch(exception& e) { + m->errorOut(e, "Knn", "Knn"); + exit(1); + } +} +/**************************************************************************************************/ +void Knn::setDistName(string s) { + try { + outDistName = s; + ofstream outDistance; + m->openOutputFile(outDistName, outDistance); + outDistance << "Name\tBestMatch\tDistance" << endl; + outDistance.close(); + } + catch(exception& e) { + m->errorOut(e, "Knn", "setDistName"); + exit(1); + } +} +/**************************************************************************************************/ +Knn::~Knn() { + try { + delete phyloTree; + if (database != NULL) { delete database; } + } + catch(exception& e) { + m->errorOut(e, "Knn", "~Knn"); + exit(1); + } +} /**************************************************************************************************/ string Knn::getTaxonomy(Sequence* seq) { try { string tax; //use database to find closest seq - vector closest = database->findClosestSequences(seq, num); + + if (search == "distance") { ofstream outDistance; m->openOutputFileAppend(outDistName, outDistance); outDistance << seq->getName() << '\t' << database->getName(closest[0]) << '\t' << database->getSearchScore() << endl; outDistance.close(); } + + if (m->control_pressed) { return tax; } vector closestNames; for (int i = 0; i < closest.size(); i++) { @@ -28,30 +68,30 @@ string Knn::getTaxonomy(Sequence* seq) { //is this sequence in the taxonomy file if (it == taxonomy.end()) { //error not in file - mothurOut("Error: sequence " + names[closest[i]] + " is not in the taxonomy file. It will be eliminated as a match to sequence " + seq->getName() + "."); mothurOutEndLine(); + m->mothurOut("Error: sequence " + names[closest[i]] + " is not in the taxonomy file. It will be eliminated as a match to sequence " + seq->getName() + "."); m->mothurOutEndLine(); }else{ closestNames.push_back(it->first); } } if (closestNames.size() == 0) { - mothurOut("Error: All the matches for sequence " + seq->getName() + " have been eliminated. " + seq->getName() + " will be disregarded."); mothurOutEndLine(); - tax = "bad seq"; + m->mothurOut("Error: All the matches for sequence " + seq->getName() + " have been eliminated. "); m->mothurOutEndLine(); + tax = "unknown;"; }else{ tax = findCommonTaxonomy(closestNames); - if (tax == "") { mothurOut("There are no common levels for sequence " + seq->getName() + ". " + seq->getName() + " will be disregarded."); mothurOutEndLine(); tax = "bad seq"; } + if (tax == "") { m->mothurOut("There are no common levels for sequence " + seq->getName() + ". "); m->mothurOutEndLine(); tax = "unknown;"; } } simpleTax = tax; return tax; } catch(exception& e) { - errorOut(e, "Knn", "getTaxonomy"); + m->errorOut(e, "Knn", "getTaxonomy"); exit(1); } } /**************************************************************************************************/ string Knn::findCommonTaxonomy(vector closest) { try { - vector< vector > taxons; //taxon[0] = vector of taxonomy info for closest[0]. + /*vector< vector > taxons; //taxon[0] = vector of taxonomy info for closest[0]. //so if closest[0] taxonomy is Bacteria;Alphaproteobacteria;Rhizobiales;Azorhizobium_et_rel.;Methylobacterium_et_rel.;Bosea; //taxon[0][0] = Bacteria, taxon[0][1] = Alphaproteobacteria.... @@ -59,8 +99,10 @@ string Knn::findCommonTaxonomy(vector closest) { int smallest = 100; for (int i = 0; i < closest.size(); i++) { + if (m->control_pressed) { return "control"; } string tax = taxonomy[closest[i]]; //we know its there since we checked in getTaxonomy + cout << tax << endl; taxons[i] = parseTax(tax); @@ -73,6 +115,7 @@ string Knn::findCommonTaxonomy(vector closest) { //start at the highest level all the closest seqs have string common = ""; for (int i = (smallest-1); i >= 0; i--) { + if (m->control_pressed) { return "control"; } string thistax = taxons[0][i]; int num = 0; @@ -87,12 +130,57 @@ string Knn::findCommonTaxonomy(vector closest) { } break; } + }*/ + + string conTax; + + //create a tree containing sequences from this bin + PhyloTree* p = new PhyloTree(); + + for (int i = 0; i < closest.size(); i++) { + p->addSeqToTree(closest[i], taxonomy[closest[i]]); } - - return common; + + //build tree + p->assignHeirarchyIDs(0); + + TaxNode currentNode = p->get(0); + + //at each level + while (currentNode.children.size() != 0) { //you still have more to explore + + TaxNode bestChild; + int bestChildSize = 0; + + //go through children + for (map::iterator itChild = currentNode.children.begin(); itChild != currentNode.children.end(); itChild++) { + + TaxNode temp = p->get(itChild->second); + + //select child with largest accessions - most seqs assigned to it + if (temp.accessions.size() > bestChildSize) { + bestChild = p->get(itChild->second); + bestChildSize = temp.accessions.size(); + } + + } + + if (bestChildSize == closest.size()) { //if yes, add it + conTax += bestChild.name + ";"; + }else{ //if no, quit + break; + } + + //move down a level + currentNode = bestChild; + } + + delete p; + + return conTax; } catch(exception& e) { - errorOut(e, "Knn", "findCommonTaxonomy"); + m->errorOut(e, "Knn", "findCommonTaxonomy"); exit(1); } }