#include "knn.h"
/**************************************************************************************************/
-Knn::Knn(string tfile, string tempFile, string method, int kmerSize, int gapOpen, int gapExtend, int match, int misMatch, int n)
-: Classify(tfile, tempFile, method, kmerSize, gapOpen, gapExtend, match, misMatch), num(n) {}
+Knn::Knn(string tfile, string tempFile, string method, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch, int n, int tid)
+: Classify(), num(n), search(method) {
+ try {
+ threadID = tid;
+ shortcuts = true;
+
+ //create search database and names vector
+ generateDatabaseAndNames(tfile, tempFile, method, kmerSize, gapOpen, gapExtend, match, misMatch);
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Knn", "Knn");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+void Knn::setDistName(string s) {
+ try {
+ outDistName = s;
+ ofstream outDistance;
+ m->openOutputFile(outDistName, outDistance);
+ outDistance << "Name\tBestMatch\tDistance" << endl;
+ outDistance.close();
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Knn", "setDistName");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+Knn::~Knn() {
+ try {
+ delete phyloTree;
+ if (database != NULL) { delete database; }
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Knn", "~Knn");
+ exit(1);
+ }
+}
/**************************************************************************************************/
string Knn::getTaxonomy(Sequence* seq) {
try {
//use database to find closest seq
vector<int> closest = database->findClosestSequences(seq, num);
-
+
+ if (search == "distance") { ofstream outDistance; m->openOutputFileAppend(outDistName, outDistance); outDistance << seq->getName() << '\t' << database->getName(closest[0]) << '\t' << database->getSearchScore() << endl; outDistance.close(); }
+
+ if (m->control_pressed) { return tax; }
+
vector<string> closestNames;
for (int i = 0; i < closest.size(); i++) {
//find that sequences taxonomy in map
//is this sequence in the taxonomy file
if (it == taxonomy.end()) { //error not in file
- mothurOut("Error: sequence " + names[closest[i]] + " is not in the taxonomy file. It will be eliminated as a match to sequence " + seq->getName() + "."); mothurOutEndLine();
+ m->mothurOut("Error: sequence " + names[closest[i]] + " is not in the taxonomy file. It will be eliminated as a match to sequence " + seq->getName() + "."); m->mothurOutEndLine();
}else{ closestNames.push_back(it->first); }
}
if (closestNames.size() == 0) {
- mothurOut("Error: All the matches for sequence " + seq->getName() + " have been eliminated. " + seq->getName() + " will be disregarded."); mothurOutEndLine();
- tax = "bad seq";
+ m->mothurOut("Error: All the matches for sequence " + seq->getName() + " have been eliminated. "); m->mothurOutEndLine();
+ tax = "unknown;";
}else{
tax = findCommonTaxonomy(closestNames);
- if (tax == "") { mothurOut("There are no common levels for sequence " + seq->getName() + ". " + seq->getName() + " will be disregarded."); mothurOutEndLine(); tax = "bad seq"; }
+ if (tax == "") { m->mothurOut("There are no common levels for sequence " + seq->getName() + ". "); m->mothurOutEndLine(); tax = "unknown;"; }
}
+ simpleTax = tax;
return tax;
}
catch(exception& e) {
- errorOut(e, "Knn", "getTaxonomy");
+ m->errorOut(e, "Knn", "getTaxonomy");
exit(1);
}
}
/**************************************************************************************************/
string Knn::findCommonTaxonomy(vector<string> closest) {
try {
- vector< vector<string> > taxons; //taxon[0] = vector of taxonomy info for closest[0].
+ /*vector< vector<string> > taxons; //taxon[0] = vector of taxonomy info for closest[0].
//so if closest[0] taxonomy is Bacteria;Alphaproteobacteria;Rhizobiales;Azorhizobium_et_rel.;Methylobacterium_et_rel.;Bosea;
//taxon[0][0] = Bacteria, taxon[0][1] = Alphaproteobacteria....
int smallest = 100;
for (int i = 0; i < closest.size(); i++) {
+ if (m->control_pressed) { return "control"; }
string tax = taxonomy[closest[i]]; //we know its there since we checked in getTaxonomy
+ cout << tax << endl;
- tax = tax.substr(0, tax.length()-1); //get rid of last ';'
-
- //parse taxonomy
- string individual;
- while (tax.find_first_of(';') != -1) {
- individual = tax.substr(0,tax.find_first_of(';'));
- tax = tax.substr(tax.find_first_of(';')+1, tax.length());
- taxons[i].push_back(individual);
-
- }
- //get last one
- taxons[i].push_back(tax);
+ taxons[i] = parseTax(tax);
//figure out who has the shortest taxonomy info. so you can start comparing there
if (taxons[i].size() < smallest) {
//start at the highest level all the closest seqs have
string common = "";
for (int i = (smallest-1); i >= 0; i--) {
+ if (m->control_pressed) { return "control"; }
string thistax = taxons[0][i];
int num = 0;
}
break;
}
+ }*/
+
+ string conTax;
+
+ //create a tree containing sequences from this bin
+ PhyloTree* p = new PhyloTree();
+
+ for (int i = 0; i < closest.size(); i++) {
+ p->addSeqToTree(closest[i], taxonomy[closest[i]]);
}
-
- return common;
+
+ //build tree
+ p->assignHeirarchyIDs(0);
+
+ TaxNode currentNode = p->get(0);
+
+ //at each level
+ while (currentNode.children.size() != 0) { //you still have more to explore
+
+ TaxNode bestChild;
+ int bestChildSize = 0;
+
+ //go through children
+ for (map<string, int>::iterator itChild = currentNode.children.begin(); itChild != currentNode.children.end(); itChild++) {
+
+ TaxNode temp = p->get(itChild->second);
+
+ //select child with largest accessions - most seqs assigned to it
+ if (temp.accessions.size() > bestChildSize) {
+ bestChild = p->get(itChild->second);
+ bestChildSize = temp.accessions.size();
+ }
+
+ }
+
+ if (bestChildSize == closest.size()) { //if yes, add it
+ conTax += bestChild.name + ";";
+ }else{ //if no, quit
+ break;
+ }
+
+ //move down a level
+ currentNode = bestChild;
+ }
+
+ delete p;
+
+ return conTax;
}
catch(exception& e) {
- errorOut(e, "Knn", "findCommonTaxonomy");
+ m->errorOut(e, "Knn", "findCommonTaxonomy");
exit(1);
}
}