]> git.donarmstrong.com Git - mothur.git/blobdiff - kmerdb.cpp
fixes while testing 1.33.0
[mothur.git] / kmerdb.cpp
index bf8679e57a932af0567df8a456f038b76fbf0fae..9a5d23500be955b459413beee989601f6c0ec78f 100644 (file)
@@ -42,12 +42,14 @@ KmerDB::KmerDB(string fastaFileName, int kSize) : Database(), kmerSize(kSize) {
                
        }
        catch(exception& e) {
-               errorOut(e, "KmerDB", "KmerDB");
+               m->errorOut(e, "KmerDB", "KmerDB");
                exit(1);
        }       
 
 }
 /**************************************************************************************************/
+KmerDB::KmerDB() : Database() {}
+/**************************************************************************************************/
 
 KmerDB::~KmerDB(){}
 
@@ -55,9 +57,12 @@ KmerDB::~KmerDB(){}
 
 vector<int> KmerDB::findClosestSequences(Sequence* candidateSeq, int num){
        try {
+               if (num > numSeqs) { m->mothurOut("[WARNING]: you requested " + toString(num) + " closest sequences, but the template only contains " + toString(numSeqs) + ", adjusting."); m->mothurOutEndLine(); num = numSeqs; }
+               
                vector<int> topMatches;
                Kmer kmer(kmerSize);
                searchScore = 0;
+               Scores.clear();
                
                vector<int> matches(numSeqs, 0);                                                //      a record of the sequences with shared kmers
                vector<int> timesKmerFound(kmerLocations.size()+1, 0);  //      a record of the kmers that we have already found
@@ -73,28 +78,46 @@ vector<int> KmerDB::findClosestSequences(Sequence* candidateSeq, int num){
                        }
                        timesKmerFound[kmerNumber] = 1;                                         //      ok, we've seen the kmer now
                }
-       
-               vector<seqMatch> seqMatches;
-               for(int i=0;i<numSeqs;i++){             
-                       seqMatch temp(i, matches[i]);
-                       seqMatches.push_back(temp);
-               }
                
-               //sorts putting largest matches first
-               sort(seqMatches.begin(), seqMatches.end(), compareSeqMatches);
+               if (num != 1) {
+                       vector<seqMatch> seqMatches; seqMatches.resize(numSeqs);
+                       for(int i=0;i<numSeqs;i++){             
+                               seqMatches[i].seq = i;
+                               seqMatches[i].match = matches[i];
+                       }
+                       
+                       //sorts putting largest matches first
+                       sort(seqMatches.begin(), seqMatches.end(), compareSeqMatches);
+                       
+                       searchScore = seqMatches[0].match;
+                       searchScore = 100 * searchScore / (float) numKmers;             //      return the Sequence object corresponding to the db
                
-               searchScore = seqMatches[0].match;
-               searchScore = 100 * searchScore / (float) numKmers;             //      return the Sequence object corresponding to the db
-       
-               //save top matches
-               for (int i = 0; i < num; i++) {
-                       topMatches.push_back(seqMatches[i].seq);
+                       //save top matches
+                       for (int i = 0; i < num; i++) {
+                               topMatches.push_back(seqMatches[i].seq);
+                               float thisScore = 100 * seqMatches[i].match / (float) numKmers;
+                               Scores.push_back(thisScore);
+                       }
+               }else{
+                       int bestIndex = 0;
+                       int bestMatch = -1;
+                       for(int i=0;i<numSeqs;i++){     
+                               
+                               if (matches[i] > bestMatch) {
+                                       bestIndex = i;
+                                       bestMatch = matches[i];
+                               }
+                       }
+                       
+                       searchScore = bestMatch;
+                       searchScore = 100 * searchScore / (float) numKmers;             //      return the Sequence object corresponding to the db
+                       topMatches.push_back(bestIndex);
+                       Scores.push_back(searchScore);
                }
-               
                return topMatches;              
        }
        catch(exception& e) {
-               errorOut(e, "KmerDB", "findClosestSequences");
+               m->errorOut(e, "KmerDB", "findClosestSequences");
                exit(1);
        }       
 }
@@ -105,7 +128,10 @@ void KmerDB::generateDB(){
        try {
                
                ofstream kmerFile;                                                                              //      once we have the kmerLocations folder print it out
-               openOutputFile(kmerDBName, kmerFile);                                   //      to a file
+               m->openOutputFile(kmerDBName, kmerFile);                                        //      to a file
+               
+               //output version
+               kmerFile << "#" << m->getVersion() << endl;
                
                for(int i=0;i<maxKmer;i++){                                                             //      step through all of the possible kmer numbers
                        kmerFile << i << ' ' << kmerLocations[i].size();        //      print the kmer number and the number of sequences with
@@ -118,7 +144,7 @@ void KmerDB::generateDB(){
                
        }
        catch(exception& e) {
-               errorOut(e, "KmerDB", "generateDB");
+               m->errorOut(e, "KmerDB", "generateDB");
                exit(1);
        }       
        
@@ -143,7 +169,7 @@ void KmerDB::addSequence(Sequence seq) {
                count++;
        }
        catch(exception& e) {
-               errorOut(e, "KmerDB", "addSequence");
+               m->errorOut(e, "KmerDB", "addSequence");
                exit(1);
        }       
 }
@@ -154,6 +180,9 @@ void KmerDB::readKmerDB(ifstream& kmerDBFile){
                                        
                kmerDBFile.seekg(0);                                                                    //      start at the beginning of the file
                
+               //read version
+               string line = m->getline(kmerDBFile); m->gobble(kmerDBFile);
+               
                string seqName;
                int seqNumber;
 
@@ -170,9 +199,57 @@ void KmerDB::readKmerDB(ifstream& kmerDBFile){
                
        }
        catch(exception& e) {
-               errorOut(e, "KmerDB", "readKmerDB");
+               m->errorOut(e, "KmerDB", "readKmerDB");
+               exit(1);
+       }       
+}
+
+/**************************************************************************************************/
+int KmerDB::getCount(int kmer) {
+       try {
+               if (kmer < 0) { return 0; }  //if user gives negative number
+               else if (kmer > maxKmer) {      return 0;       }  //or a kmer that is bigger than maxkmer
+               else {  return kmerLocations[kmer].size();      }  // kmer is in vector range
+       }
+       catch(exception& e) {
+               m->errorOut(e, "KmerDB", "getCount");
+               exit(1);
+       }       
+}
+/**************************************************************************************************/
+int KmerDB::getReversed(int kmerNumber) {
+       try {
+               Kmer kmer(kmerSize);
+               
+               if (kmerNumber < 0) { return 0; }  //if user gives negative number
+               else if (kmerNumber > maxKmer) {        return 0;       }  //or a kmer that is bigger than maxkmer
+               else {  return kmer.getReverseKmerNumber(kmerNumber);   }  // kmer is in vector range
+       }
+       catch(exception& e) {
+               m->errorOut(e, "KmerDB", "getReversed");
                exit(1);
        }       
 }
+/**************************************************************************************************/
+vector<int> KmerDB::getSequencesWithKmer(int kmer) {
+       try {
+               
+               vector<int> seqs;
+       
+               if (kmer < 0) { }  //if user gives negative number
+               else if (kmer > maxKmer) {      }  //or a kmer that is bigger than maxkmer
+               else {  seqs = kmerLocations[kmer];     }
+               
+               return seqs;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "KmerDB", "getSequencesWithKmer");
+               exit(1);
+       }       
+}
+/**************************************************************************************************/
+
 
 /**************************************************************************************************/
+
+