]> git.donarmstrong.com Git - mothur.git/blob - kmerdb.cpp
added mothur.h and fixed includes in many files
[mothur.git] / kmerdb.cpp
1 /*
2  *  kmerdb.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 12/16/08.
6  *  Copyright 2008 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 using namespace std;
11
12 #include "mothur.h"
13 #include "sequence.hpp"
14 #include "kmer.hpp"
15 #include "database.hpp"
16 #include "kmerdb.hpp"
17
18 /**************************************************************************************************/
19
20 KmerDB::KmerDB(string fastaFileName, int kSize) : Database(fastaFileName), kmerSize(kSize) {
21
22         string kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
23         ifstream kmerFileTest(kmerDBName.c_str());
24         
25         int power4s[9] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536 };
26         
27         maxKmer = power4s[kmerSize];
28         kmerLocations.resize(maxKmer+1);
29         
30         if(!kmerFileTest){
31                 cout << "Generating the " << kmerDBName << " database...\t";    cout.flush();
32                 generateKmerDB(kmerDBName);     
33         }
34         else{
35                 cout << "Reading in the " << kmerDBName << " database...\t";    cout.flush();
36                 readKmerDB(kmerDBName, kmerFileTest);
37         }
38         cout << "DONE." << endl << endl;        cout.flush();
39
40 }
41
42 /**************************************************************************************************/
43
44 Sequence* KmerDB::findClosestSequence(Sequence* candidateSeq){
45         
46         vector<int> matches(numSeqs, 0);
47         vector<int> timesKmerFound(kmerLocations.size()+1, 0);
48         
49         int maxMatches = 0;
50         int maxSequence = 0;
51         
52         string query = candidateSeq->getUnaligned();
53         
54         int numKmers = query.length() - kmerSize + 1;
55         Kmer kmer(kmerSize);
56         
57         for(int i=0;i<numKmers;i++){
58                 
59                 int kmerNumber = kmer.getKmerNumber(query, i);
60                 
61                 if(timesKmerFound[kmerNumber] == 0){
62                         for(int j=0;j<kmerLocations[kmerNumber].size();j++){
63                                 matches[kmerLocations[kmerNumber][j]]++;
64                         }
65                 }
66                 timesKmerFound[kmerNumber] = 1;
67                 
68         }
69         for(int i=0;i<numSeqs;i++){
70                 if(matches[i] > maxMatches){
71                         maxMatches = matches[i];
72                         maxSequence = i;
73                 }
74         }
75         return templateSequences[maxSequence];
76         
77 }
78
79 /**************************************************************************************************/
80
81 void KmerDB::generateKmerDB(string kmerDBName){
82         
83         
84         Kmer kmer(kmerSize);
85         
86         for(int i=0;i<numSeqs;i++){
87
88                 string seq = templateSequences[i]->getUnaligned();
89                 int numKmers = seq.length() - kmerSize + 1;
90                 
91                 for(int j=0;j<numKmers;j++){
92                         int kmerNumber = kmer.getKmerNumber(seq, j);
93                         kmerLocations[kmerNumber].push_back(i);
94                 }
95         }
96         
97         ofstream kmerFile(kmerDBName.c_str(), ios::trunc);
98         if(!kmerFile) {
99                 cerr << "Error: Could not open " << kmerDBName << endl;
100                 exit(1);
101         }
102         
103         for(int i=0;i<maxKmer;i++){
104                 kmerFile << i << ' ' << kmerLocations[i].size();
105                 for(int j=0;j<kmerLocations[i].size();j++){
106                         kmerFile << ' ' << kmerLocations[i][j];
107                 }
108                 kmerFile << endl;
109         }
110         kmerFile.close();
111         
112 }
113
114 /**************************************************************************************************/
115
116 void KmerDB::readKmerDB(string kmerDBName, ifstream& kmerDBFile){
117
118         kmerDBFile.seekg(0);
119         
120         string seqName;
121         int seqNumber;
122         
123         for(int i=0;i<numSeqs;i++){
124                 int numValues;
125                 kmerDBFile >> seqName >> numValues;
126                 
127                 for(int j=0;j<numValues;j++){
128                         kmerDBFile >> seqNumber;
129                         kmerLocations[i].push_back(seqNumber);
130                 }
131         }
132         kmerDBFile.close();
133 }
134
135 /**************************************************************************************************/