X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=kmerdb.cpp;h=33761a8416524fa3a0fd10b7136ea5a097d3fd08;hb=725a3d4ff2442c79bfde0a75ed3e0904edcf03b7;hp=40cc1f6612504aef61f24dff4830611fa770727a;hpb=02909d6cae9963ba00dc746969a370fa8ca934fc;p=mothur.git diff --git a/kmerdb.cpp b/kmerdb.cpp index 40cc1f6..33761a8 100644 --- a/kmerdb.cpp +++ b/kmerdb.cpp @@ -29,124 +29,186 @@ /**************************************************************************************************/ -KmerDB::KmerDB(string fastaFileName, int kSize) : Database(fastaFileName), kmerSize(kSize) { - - string kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer"; - ifstream kmerFileTest(kmerDBName.c_str()); - - int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 }; +KmerDB::KmerDB(string fastaFileName, int kSize) : Database(), kmerSize(kSize) { + try { - maxKmer = power4s[kmerSize]; - kmerLocations.resize(maxKmer+1); - - if(!kmerFileTest){ // if we can open the kmer db file, then read it in... - cout << "Generating the " << kmerDBName << " database...\t"; cout.flush(); - generateKmerDB(kmerDBName); - } - else{ // ...otherwise generate it. - cout << "Reading in the " << kmerDBName << " database...\t"; cout.flush(); - readKmerDB(kmerDBName, kmerFileTest); + kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer"; + + int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 }; + count = 0; + + maxKmer = power4s[kmerSize]; + kmerLocations.resize(maxKmer+1); + } - cout << "DONE." << endl << endl; cout.flush(); + catch(exception& e) { + m->errorOut(e, "KmerDB", "KmerDB"); + exit(1); + } } /**************************************************************************************************/ +KmerDB::KmerDB() : Database() {} +/**************************************************************************************************/ -KmerDB::~KmerDB(){ - - //for (int i = 0; i < templateSequences.size(); i++) { delete templateSequences[i]; } - // templateSequences.clear(); -} +KmerDB::~KmerDB(){} /**************************************************************************************************/ -Sequence KmerDB::findClosestSequence(Sequence* candidateSeq){ - - Kmer kmer(kmerSize); +vector KmerDB::findClosestSequences(Sequence* candidateSeq, int num){ + try { + vector topMatches; + Kmer kmer(kmerSize); + searchScore = 0; + + vector matches(numSeqs, 0); // a record of the sequences with shared kmers + vector timesKmerFound(kmerLocations.size()+1, 0); // a record of the kmers that we have already found + + int numKmers = candidateSeq->getNumBases() - kmerSize + 1; - searchScore = 0; - int maxSequence = 0; - - vector matches(numSeqs, 0); // a record of the sequences with shared kmers - vector timesKmerFound(kmerLocations.size()+1, 0); // a record of the kmers that we have already found - - int numKmers = candidateSeq->getNumBases() - kmerSize + 1; - - for(int i=0;igetUnaligned(), i); // go through the query sequence and get a kmer number - if(timesKmerFound[kmerNumber] == 0){ // if we haven't seen it before... - for(int j=0;jgetUnaligned(), i); // go through the query sequence and get a kmer number + if(timesKmerFound[kmerNumber] == 0){ // if we haven't seen it before... + for(int j=0;j searchScore){ // the query sequence - searchScore = matches[i]; - maxSequence = i; + + vector seqMatches; + for(int i=0;ierrorOut(e, "KmerDB", "findClosestSequences"); + exit(1); + } } /**************************************************************************************************/ -void KmerDB::generateKmerDB(string kmerDBName){ - - Kmer kmer(kmerSize); +void KmerDB::generateDB(){ + try { + + ofstream kmerFile; // once we have the kmerLocations folder print it out + openOutputFile(kmerDBName, kmerFile); // to a file + + for(int i=0;ierrorOut(e, "KmerDB", "generateDB"); + exit(1); + } - for(int i=0;i seenBefore(maxKmer+1,0); for(int j=0;jerrorOut(e, "KmerDB", "addSequence"); + exit(1); + } } - /**************************************************************************************************/ -void KmerDB::readKmerDB(string kmerDBName, ifstream& kmerDBFile){ +void KmerDB::readKmerDB(ifstream& kmerDBFile){ + try { + + kmerDBFile.seekg(0); // start at the beginning of the file + + string seqName; + int seqNumber; + + for(int i=0;i> seqName >> numValues; + + for(int j=0;j> seqNumber; // 1. number of sequences with the kmer number + kmerLocations[i].push_back(seqNumber); // 2. sequence indices + } + } + kmerDBFile.close(); + + } + catch(exception& e) { + m->errorOut(e, "KmerDB", "readKmerDB"); + exit(1); + } +} - kmerDBFile.seekg(0); // start at the beginning of the file - - string seqName; - int seqNumber; +/**************************************************************************************************/ +int KmerDB::getCount(int kmer) { + try { + if (kmer < 0) { return 0; } //if user gives negative number + else if (kmer > maxKmer) { return 0; } //or a kmer that is bigger than maxkmer + else { return kmerLocations[kmer].size(); } // kmer is in vector range + } + catch(exception& e) { + m->errorOut(e, "KmerDB", "getCount"); + exit(1); + } +} +/**************************************************************************************************/ +vector KmerDB::getSequencesWithKmer(int kmer) { + try { + + vector seqs; - for(int i=0;i> seqName >> numValues; + if (kmer < 0) { } //if user gives negative number + else if (kmer > maxKmer) { } //or a kmer that is bigger than maxkmer + else { seqs = kmerLocations[kmer]; } - for(int j=0;j> seqNumber; // 1. number of sequences with the kmer number - kmerLocations[i].push_back(seqNumber); // 2. sequence indices - } + return seqs; } - kmerDBFile.close(); + catch(exception& e) { + m->errorOut(e, "KmerDB", "getSequencesWithKmer"); + exit(1); + } } +/**************************************************************************************************/ + /**************************************************************************************************/ + +