5 * Created by westcott on 11/4/09.
6 * Copyright 2009 Schloss Lab. All rights reserved.
12 /**************************************************************************************************/
13 Knn::Knn(string tfile, string tempFile, string method, int kmerSize, int gapOpen, int gapExtend, int match, int misMatch, int n)
14 : Classify(tfile, tempFile, method, kmerSize, gapOpen, gapExtend, match, misMatch), num(n) {}
15 /**************************************************************************************************/
16 string Knn::getTaxonomy(Sequence* seq) {
20 //use database to find closest seq
21 vector<int> closest = database->findClosestSequences(seq, num);
23 vector<string> closestNames;
24 for (int i = 0; i < closest.size(); i++) {
25 //find that sequences taxonomy in map
26 it = taxonomy.find(names[closest[i]]);
28 //is this sequence in the taxonomy file
29 if (it == taxonomy.end()) { //error not in file
30 mothurOut("Error: sequence " + names[closest[i]] + " is not in the taxonomy file. It will be eliminated as a match to sequence " + seq->getName() + "."); mothurOutEndLine();
31 }else{ closestNames.push_back(it->first); }
34 if (closestNames.size() == 0) {
35 mothurOut("Error: All the matches for sequence " + seq->getName() + " have been eliminated. " + seq->getName() + " will be disregarded."); mothurOutEndLine();
38 tax = findCommonTaxonomy(closestNames);
39 if (tax == "") { mothurOut("There are no common levels for sequence " + seq->getName() + ". " + seq->getName() + " will be disregarded."); mothurOutEndLine(); tax = "bad seq"; }
45 errorOut(e, "Knn", "getTaxonomy");
49 /**************************************************************************************************/
50 string Knn::findCommonTaxonomy(vector<string> closest) {
52 vector< vector<string> > taxons; //taxon[0] = vector of taxonomy info for closest[0].
53 //so if closest[0] taxonomy is Bacteria;Alphaproteobacteria;Rhizobiales;Azorhizobium_et_rel.;Methylobacterium_et_rel.;Bosea;
54 //taxon[0][0] = Bacteria, taxon[0][1] = Alphaproteobacteria....
56 taxons.resize(closest.size());
59 for (int i = 0; i < closest.size(); i++) {
61 string tax = taxonomy[closest[i]]; //we know its there since we checked in getTaxonomy
63 tax = tax.substr(0, tax.length()-1); //get rid of last ';'
67 while (tax.find_first_of(';') != -1) {
68 individual = tax.substr(0,tax.find_first_of(';'));
69 tax = tax.substr(tax.find_first_of(';')+1, tax.length());
70 taxons[i].push_back(individual);
74 taxons[i].push_back(tax);
76 //figure out who has the shortest taxonomy info. so you can start comparing there
77 if (taxons[i].size() < smallest) {
78 smallest = taxons[i].size();
82 //start at the highest level all the closest seqs have
84 for (int i = (smallest-1); i >= 0; i--) {
86 string thistax = taxons[0][i];
88 for (int j = 1; j < taxons.size(); j++) {
89 if (taxons[j][i] != thistax) { break; }
93 if (num == (taxons.size()-1)) { //they all match at this level
94 for (int k = 0; k <= i; k++) {
95 common += taxons[0][k] + ';';
103 catch(exception& e) {
104 errorOut(e, "Knn", "findCommonTaxonomy");
108 /**************************************************************************************************/