]> git.donarmstrong.com Git - mothur.git/blob - suffixdb.cpp
added logfile feature
[mothur.git] / suffixdb.cpp
1 /*
2  *  suffixdb.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 12/16/08.
6  *  Copyright 2008 Patrick D. Schloss. All rights reserved.
7  *
8  *      This is a child class of the Database abstract datatype.  The class is basically a database of suffix trees and an
9  *      encapsulation of the method for finding the most similar tree to an inputted sequence.  the suffixForest objecct
10  *      is a vector of SuffixTrees, with each template sequence being represented by a different SuffixTree.  The class also
11  *      provides a method to take an unaligned sequence and find the closest sequence in the suffixForest.  The search
12  *      method is inspired by the article and Perl source code provided at http://www.ddj.com/web-development/184416093.  I 
13  *      would estimate that the time complexity is O(LN) for each search, which is slower than the kmer searching, but 
14  *      faster than blast
15  *
16  */
17
18 #include "database.hpp"
19 #include "sequence.hpp"
20 #include "suffixtree.hpp"
21 #include "suffixdb.hpp"
22
23 /**************************************************************************************************/
24
25 SuffixDB::SuffixDB(string fastaFileName) : Database(fastaFileName) {
26
27         suffixForest.resize(numSeqs);
28         mothurOut("Generating the suffix tree database...\t");  cout.flush();
29         for(int i=0;i<numSeqs;i++){                                                             //      The parent class' constructor generates the vector of
30                 suffixForest[i].loadSequence(templateSequences[i]);     //      template Sequence objects.  Here each of these objects
31         }                                                                                                               //      is used to generate a suffix tree, aka the suffix forest
32         mothurOut("DONE."); mothurOutEndLine(); mothurOutEndLine(); cout.flush();
33
34 }
35
36 /**************************************************************************************************/
37
38 Sequence SuffixDB::findClosestSequence(Sequence* candidateSeq){
39
40         int minValue = 2000;
41         int closestSequenceNo = 0;
42         string processedSeq = candidateSeq->convert2ints();             //      the candidate sequence needs to be a string of ints
43         for(int i=0;i<suffixForest.size();i++){                                 //      scan through the forest and see what the minimum
44                 int count = suffixForest[i].countSuffixes(processedSeq, minValue);      //      return score is and keep track of the
45                 if(count == minValue){                                                          //      template sequence index that corresponds to that score
46                         closestSequenceNo = i;
47                 }
48         }
49         searchScore = 100 * (1. - minValue / (float)processedSeq.length());
50         return templateSequences[closestSequenceNo];                    //      return the Sequence object that has the minimum score
51         
52 }
53
54 /**************************************************************************************************/
55
56 SuffixDB::~SuffixDB(){                                                                                                          
57         
58         for (int i = (suffixForest.size()-1); i >= 0; i--) {  suffixForest.pop_back();  }
59         // templateSequences.clear();
60
61 }
62 /**************************************************************************************************/