5 * Created by Pat Schloss on 12/16/08.
6 * Copyright 2008 Patrick D. Schloss. All rights reserved.
13 #include "sequence.hpp"
15 #include "database.hpp"
18 /**************************************************************************************************/
20 KmerDB::KmerDB(string fastaFileName, int kSize) : Database(fastaFileName), kmerSize(kSize) {
22 string kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
23 ifstream kmerFileTest(kmerDBName.c_str());
25 int power4s[9] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536 };
27 maxKmer = power4s[kmerSize];
28 kmerLocations.resize(maxKmer+1);
31 cout << "Generating the " << kmerDBName << " database...\t"; cout.flush();
32 generateKmerDB(kmerDBName);
35 cout << "Reading in the " << kmerDBName << " database...\t"; cout.flush();
36 readKmerDB(kmerDBName, kmerFileTest);
38 cout << "DONE." << endl << endl; cout.flush();
42 /**************************************************************************************************/
44 Sequence* KmerDB::findClosestSequence(Sequence* candidateSeq){
46 vector<int> matches(numSeqs, 0);
47 vector<int> timesKmerFound(kmerLocations.size()+1, 0);
52 string query = candidateSeq->getUnaligned();
54 int numKmers = query.length() - kmerSize + 1;
57 for(int i=0;i<numKmers;i++){
59 int kmerNumber = kmer.getKmerNumber(query, i);
61 if(timesKmerFound[kmerNumber] == 0){
62 for(int j=0;j<kmerLocations[kmerNumber].size();j++){
63 matches[kmerLocations[kmerNumber][j]]++;
66 timesKmerFound[kmerNumber] = 1;
69 for(int i=0;i<numSeqs;i++){
70 if(matches[i] > maxMatches){
71 maxMatches = matches[i];
75 return templateSequences[maxSequence];
79 /**************************************************************************************************/
81 void KmerDB::generateKmerDB(string kmerDBName){
86 for(int i=0;i<numSeqs;i++){
88 string seq = templateSequences[i]->getUnaligned();
89 int numKmers = seq.length() - kmerSize + 1;
91 for(int j=0;j<numKmers;j++){
92 int kmerNumber = kmer.getKmerNumber(seq, j);
93 kmerLocations[kmerNumber].push_back(i);
97 ofstream kmerFile(kmerDBName.c_str(), ios::trunc);
99 cerr << "Error: Could not open " << kmerDBName << endl;
103 for(int i=0;i<maxKmer;i++){
104 kmerFile << i << ' ' << kmerLocations[i].size();
105 for(int j=0;j<kmerLocations[i].size();j++){
106 kmerFile << ' ' << kmerLocations[i][j];
114 /**************************************************************************************************/
116 void KmerDB::readKmerDB(string kmerDBName, ifstream& kmerDBFile){
123 for(int i=0;i<numSeqs;i++){
125 kmerDBFile >> seqName >> numValues;
127 for(int j=0;j<numValues;j++){
128 kmerDBFile >> seqNumber;
129 kmerLocations[i].push_back(seqNumber);
135 /**************************************************************************************************/