X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=kmerdb.cpp;h=9a5d23500be955b459413beee989601f6c0ec78f;hp=86dba09e64b42bb6480b4e5a2d5b99ab88df8303;hb=cf9987b67aa49777a4c91c2d21f96e58bf17aa82;hpb=3abb236c602eb168ee112f080b563ebe2c705029 diff --git a/kmerdb.cpp b/kmerdb.cpp index 86dba09..9a5d235 100644 --- a/kmerdb.cpp +++ b/kmerdb.cpp @@ -29,57 +29,46 @@ /**************************************************************************************************/ -KmerDB::KmerDB(string fastaFileName, int kSize) : Database(fastaFileName), kmerSize(kSize) { +KmerDB::KmerDB(string fastaFileName, int kSize) : Database(), kmerSize(kSize) { try { - string kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer"; - ifstream kmerFileTest(kmerDBName.c_str()); + kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer"; int power4s[14] = { 1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216, 67108864 }; + count = 0; maxKmer = power4s[kmerSize]; kmerLocations.resize(maxKmer+1); - if(!kmerFileTest){ // if we can open the kmer db file, then read it in... - mothurOut("Generating the " + kmerDBName + " database...\t"); cout.flush(); - generateKmerDB(kmerDBName); - } - else{ // ...otherwise generate it. - mothurOut("Reading in the " + kmerDBName + " database...\t"); cout.flush(); - readKmerDB(kmerDBName, kmerFileTest); - } - mothurOut("DONE."); mothurOutEndLine(); mothurOutEndLine(); cout.flush(); - } catch(exception& e) { - errorOut(e, "KmerDB", "KmerDB"); + m->errorOut(e, "KmerDB", "KmerDB"); exit(1); } } /**************************************************************************************************/ +KmerDB::KmerDB() : Database() {} +/**************************************************************************************************/ -KmerDB::~KmerDB(){ - - //for (int i = 0; i < templateSequences.size(); i++) { delete templateSequences[i]; } - // templateSequences.clear(); -} +KmerDB::~KmerDB(){} /**************************************************************************************************/ -Sequence KmerDB::findClosestSequence(Sequence* candidateSeq){ +vector KmerDB::findClosestSequences(Sequence* candidateSeq, int num){ try { - - Kmer kmer(kmerSize); + if (num > numSeqs) { m->mothurOut("[WARNING]: you requested " + toString(num) + " closest sequences, but the template only contains " + toString(numSeqs) + ", adjusting."); m->mothurOutEndLine(); num = numSeqs; } + vector topMatches; + Kmer kmer(kmerSize); searchScore = 0; - int maxSequence = 0; + Scores.clear(); vector matches(numSeqs, 0); // a record of the sequences with shared kmers vector timesKmerFound(kmerLocations.size()+1, 0); // a record of the kmers that we have already found int numKmers = candidateSeq->getNumBases() - kmerSize + 1; - + for(int i=0;igetUnaligned(), i); // go through the query sequence and get a kmer number if(timesKmerFound[kmerNumber] == 0){ // if we haven't seen it before... @@ -90,48 +79,59 @@ Sequence KmerDB::findClosestSequence(Sequence* candidateSeq){ timesKmerFound[kmerNumber] = 1; // ok, we've seen the kmer now } - for(int i=0;i searchScore){ // the query sequence - searchScore = matches[i]; - maxSequence = i; + if (num != 1) { + vector seqMatches; seqMatches.resize(numSeqs); + for(int i=0;i bestMatch) { + bestIndex = i; + bestMatch = matches[i]; + } + } + + searchScore = bestMatch; + searchScore = 100 * searchScore / (float) numKmers; // return the Sequence object corresponding to the db + topMatches.push_back(bestIndex); + Scores.push_back(searchScore); } - - searchScore = 100 * searchScore / (float) numKmers; // return the Sequence object corresponding to the db - - return templateSequences[maxSequence]; // sequence with the most shared kmers. - + return topMatches; } catch(exception& e) { - errorOut(e, "KmerDB", "findClosestSequence"); + m->errorOut(e, "KmerDB", "findClosestSequences"); exit(1); } } /**************************************************************************************************/ -void KmerDB::generateKmerDB(string kmerDBName){ +void KmerDB::generateDB(){ try { - - Kmer kmer(kmerSize); - - for(int i=0;i seenBefore(maxKmer+1,0); - for(int j=0;jopenOutputFile(kmerDBName, kmerFile); // to a file + + //output version + kmerFile << "#" << m->getVersion() << endl; for(int i=0;ierrorOut(e, "KmerDB", "generateDB"); exit(1); } } - /**************************************************************************************************/ - -void KmerDB::readKmerDB(string kmerDBName, ifstream& kmerDBFile){ +void KmerDB::addSequence(Sequence seq) { try { + Kmer kmer(kmerSize); + + string unaligned = seq.getUnaligned(); // ...take the unaligned sequence... + int numKmers = unaligned.length() - kmerSize + 1; + + vector seenBefore(maxKmer+1,0); + for(int j=0;jerrorOut(e, "KmerDB", "addSequence"); + exit(1); + } +} +/**************************************************************************************************/ + +void KmerDB::readKmerDB(ifstream& kmerDBFile){ + try { + kmerDBFile.seekg(0); // start at the beginning of the file + //read version + string line = m->getline(kmerDBFile); m->gobble(kmerDBFile); + string seqName; int seqNumber; - + for(int i=0;i> seqName >> numValues; for(int j=0;jerrorOut(e, "KmerDB", "readKmerDB"); exit(1); } } /**************************************************************************************************/ +int KmerDB::getCount(int kmer) { + try { + if (kmer < 0) { return 0; } //if user gives negative number + else if (kmer > maxKmer) { return 0; } //or a kmer that is bigger than maxkmer + else { return kmerLocations[kmer].size(); } // kmer is in vector range + } + catch(exception& e) { + m->errorOut(e, "KmerDB", "getCount"); + exit(1); + } +} +/**************************************************************************************************/ +int KmerDB::getReversed(int kmerNumber) { + try { + Kmer kmer(kmerSize); + + if (kmerNumber < 0) { return 0; } //if user gives negative number + else if (kmerNumber > maxKmer) { return 0; } //or a kmer that is bigger than maxkmer + else { return kmer.getReverseKmerNumber(kmerNumber); } // kmer is in vector range + } + catch(exception& e) { + m->errorOut(e, "KmerDB", "getReversed"); + exit(1); + } +} +/**************************************************************************************************/ +vector KmerDB::getSequencesWithKmer(int kmer) { + try { + + vector seqs; + + if (kmer < 0) { } //if user gives negative number + else if (kmer > maxKmer) { } //or a kmer that is bigger than maxkmer + else { seqs = kmerLocations[kmer]; } + + return seqs; + } + catch(exception& e) { + m->errorOut(e, "KmerDB", "getSequencesWithKmer"); + exit(1); + } +} +/**************************************************************************************************/ + + +/**************************************************************************************************/ + +