5 * Created by Pat Schloss on 12/16/08.
6 * Copyright 2008 Patrick D. Schloss. All rights reserved.
17 #include "sequence.hpp"
19 #include "database.hpp"
22 /**************************************************************************************************/
24 KmerDB::KmerDB(string fastaFileName, int kSize) : Database(fastaFileName), kmerSize(kSize) {
26 string kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
27 ifstream kmerFileTest(kmerDBName.c_str());
29 int power4s[9] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536 };
31 maxKmer = power4s[kmerSize];
32 kmerLocations.resize(maxKmer+1);
35 cout << "Generating the " << kmerDBName << " database...\t"; cout.flush();
36 generateKmerDB(kmerDBName);
39 cout << "Reading in the " << kmerDBName << " database...\t"; cout.flush();
40 readKmerDB(kmerDBName, kmerFileTest);
42 cout << "DONE." << endl << endl; cout.flush();
46 /**************************************************************************************************/
48 Sequence* KmerDB::findClosestSequence(Sequence* candidateSeq){
50 vector<int> matches(numSeqs, 0);
51 vector<int> timesKmerFound(kmerLocations.size()+1, 0);
56 string query = candidateSeq->getUnaligned();
58 int numKmers = query.length() - kmerSize + 1;
61 for(int i=0;i<numKmers;i++){
63 int kmerNumber = kmer.getKmerNumber(query, i);
65 if(timesKmerFound[kmerNumber] == 0){
66 for(int j=0;j<kmerLocations[kmerNumber].size();j++){
67 matches[kmerLocations[kmerNumber][j]]++;
70 timesKmerFound[kmerNumber] = 1;
73 for(int i=0;i<numSeqs;i++){
74 if(matches[i] > maxMatches){
75 maxMatches = matches[i];
79 return templateSequences[maxSequence];
83 /**************************************************************************************************/
85 void KmerDB::generateKmerDB(string kmerDBName){
90 for(int i=0;i<numSeqs;i++){
92 string seq = templateSequences[i]->getUnaligned();
93 int numKmers = seq.length() - kmerSize + 1;
95 for(int j=0;j<numKmers;j++){
96 int kmerNumber = kmer.getKmerNumber(seq, j);
97 kmerLocations[kmerNumber].push_back(i);
101 ofstream kmerFile(kmerDBName.c_str(), ios::trunc);
103 cerr << "Error: Could not open " << kmerDBName << endl;
107 for(int i=0;i<maxKmer;i++){
108 kmerFile << i << ' ' << kmerLocations[i].size();
109 for(int j=0;j<kmerLocations[i].size();j++){
110 kmerFile << ' ' << kmerLocations[i][j];
118 /**************************************************************************************************/
120 void KmerDB::readKmerDB(string kmerDBName, ifstream& kmerDBFile){
127 for(int i=0;i<numSeqs;i++){
129 kmerDBFile >> seqName >> numValues;
131 for(int j=0;j<numValues;j++){
132 kmerDBFile >> seqNumber;
133 kmerLocations[i].push_back(seqNumber);
139 /**************************************************************************************************/