]> git.donarmstrong.com Git - mothur.git/blob - knn.cpp
moved utilities out of mothur.h and into mothurOut class.
[mothur.git] / knn.cpp
1 /*
2  *  knn.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 11/4/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "knn.h"
11
12 /**************************************************************************************************/
13 Knn::Knn(string tfile, string tempFile, string method, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch, int n) 
14 : Classify(), num(n), search(method) {
15         try {
16                 //create search database and names vector
17                 generateDatabaseAndNames(tfile, tempFile, method, kmerSize, gapOpen, gapExtend, match, misMatch);
18         }
19         catch(exception& e) {
20                 m->errorOut(e, "Knn", "Knn");
21                 exit(1);
22         }
23 }
24 /**************************************************************************************************/
25 void Knn::setDistName(string s) {
26         try {
27                 outDistName = s;
28                 ofstream outDistance;
29                 m->openOutputFile(outDistName, outDistance);
30                 outDistance << "Name\tBestMatch\tDistance" << endl;
31                 outDistance.close();
32         }
33         catch(exception& e) {
34                 m->errorOut(e, "Knn", "setDistName");
35                 exit(1);
36         }
37 }
38 /**************************************************************************************************/
39 Knn::~Knn() {
40         try {
41                  delete phyloTree; 
42                  if (database != NULL) {  delete database; }
43         }
44         catch(exception& e) {
45                 m->errorOut(e, "Knn", "~Knn");
46                 exit(1);
47         }
48 }
49 /**************************************************************************************************/
50 string Knn::getTaxonomy(Sequence* seq) {
51         try {
52                 string tax;
53                 
54                 //use database to find closest seq
55                 vector<int> closest = database->findClosestSequences(seq, num);
56         
57                 if (search == "distance") { ofstream outDistance; m->openOutputFileAppend(outDistName, outDistance); outDistance << seq->getName() << '\t' << database->getName(closest[0]) << '\t' << database->getSearchScore() << endl; outDistance.close();  }
58         
59                 if (m->control_pressed) { return tax; }
60
61                 vector<string> closestNames;
62                 for (int i = 0; i < closest.size(); i++) {
63                         //find that sequences taxonomy in map
64                         it = taxonomy.find(names[closest[i]]);
65                 
66                         //is this sequence in the taxonomy file
67                         if (it == taxonomy.end()) { //error not in file
68                                 m->mothurOut("Error: sequence " + names[closest[i]] + " is not in the taxonomy file.  It will be eliminated as a match to sequence " + seq->getName() + "."); m->mothurOutEndLine();
69                         }else{   closestNames.push_back(it->first);     }
70                 }
71                 
72                 if (closestNames.size() == 0) {
73                         m->mothurOut("Error: All the matches for sequence " + seq->getName() + " have been eliminated. " + seq->getName() + " will be disregarded."); m->mothurOutEndLine();
74                         tax = "bad seq";
75                 }else{
76                         tax = findCommonTaxonomy(closestNames);
77                         if (tax == "") { m->mothurOut("There are no common levels for sequence " + seq->getName() + ". " + seq->getName() + " will be disregarded."); m->mothurOutEndLine(); tax = "bad seq"; }
78                 }
79                 
80                 simpleTax = tax;
81                 return tax;     
82         }
83         catch(exception& e) {
84                 m->errorOut(e, "Knn", "getTaxonomy");
85                 exit(1);
86         }
87 }
88 /**************************************************************************************************/
89 string Knn::findCommonTaxonomy(vector<string> closest)  {
90         try {
91                 vector< vector<string> > taxons;  //taxon[0] = vector of taxonomy info for closest[0].
92                                                                                 //so if closest[0] taxonomy is Bacteria;Alphaproteobacteria;Rhizobiales;Azorhizobium_et_rel.;Methylobacterium_et_rel.;Bosea;
93                                                                                 //taxon[0][0] = Bacteria, taxon[0][1] = Alphaproteobacteria....
94                                                                                 
95                 taxons.resize(closest.size());
96                 int smallest = 100;
97                 
98                 for (int i = 0; i < closest.size(); i++) {
99                         if (m->control_pressed) { return "control"; }
100                 
101                         string tax = taxonomy[closest[i]];  //we know its there since we checked in getTaxonomy
102                 
103                         taxons[i] = parseTax(tax);
104                 
105                         //figure out who has the shortest taxonomy info. so you can start comparing there
106                         if (taxons[i].size() < smallest) {
107                                 smallest = taxons[i].size();
108                         }
109                 }
110         
111                 //start at the highest level all the closest seqs have
112                 string common = "";
113                 for (int i = (smallest-1); i >= 0; i--) {
114                         if (m->control_pressed) { return "control"; }
115
116                         string thistax = taxons[0][i];
117                         int num = 0;
118                         for (int j = 1; j < taxons.size(); j++) {
119                                 if (taxons[j][i] != thistax) { break; }
120                                 num = j;
121                         }
122                 
123                         if (num == (taxons.size()-1)) { //they all match at this level
124                                 for (int k = 0; k <= i; k++) {
125                                         common += taxons[0][k] + ';';
126                                 }
127                                 break;
128                         }
129                 }
130         
131                 return common;
132         }
133         catch(exception& e) {
134                 m->errorOut(e, "Knn", "findCommonTaxonomy");
135                 exit(1);
136         }
137 }
138 /**************************************************************************************************/
139