]> git.donarmstrong.com Git - mothur.git/blob - suffixdb.cpp
added alignment code
[mothur.git] / suffixdb.cpp
1 /*
2  *  suffixdb.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 12/16/08.
6  *  Copyright 2008 Patrick D. Schloss. All rights reserved.
7  *
8  *      This is a child class of the Database abstract datatype.  The class is basically a database of suffix trees and an
9  *      encapsulation of the method for finding the most similar tree to an inputted sequence.  the suffixForest objecct
10  *      is a vector of SuffixTrees, with each template sequence being represented by a different SuffixTree.  The class also
11  *      provides a method to take an unaligned sequence and find the closest sequence in the suffixForest.  The search
12  *      method is inspired by the article and Perl source code provided at http://www.ddj.com/web-development/184416093.  I 
13  *      would estimate that the time complexity is O(LN) for each search, which is slower than the kmer searching, but 
14  *      faster than blast
15  *
16  */
17
18 using namespace std;
19
20 #include "database.hpp"
21 #include "sequence.hpp"
22 #include "suffixtree.hpp"
23 #include "suffixdb.hpp"
24
25 /**************************************************************************************************/
26
27 SuffixDB::SuffixDB(string fastaFileName) : Database(fastaFileName) {
28
29         suffixForest.resize(numSeqs);
30         cout << "Generating the suffix tree database...\t";     cout.flush();
31         for(int i=0;i<numSeqs;i++){                                                             //      The parent class' constructor generates the vector of
32                 suffixForest[i].loadSequence(templateSequences[i]);     //      template Sequence objects.  Here each of these objects
33         }                                                                                                               //      is used to generate a suffix tree, aka the suffix forest
34         cout << "DONE." << endl << endl;        cout.flush();
35
36 }
37
38 /**************************************************************************************************/
39
40 Sequence* SuffixDB::findClosestSequence(Sequence* candidateSeq){
41
42         int minValue = 2000;
43         int closestSequenceNo = 0;
44         string processedSeq = candidateSeq->convert2ints();             //      the candidate sequence needs to be a string of ints
45         for(int i=0;i<suffixForest.size();i++){                                 //      scan through the forest and see what the minimum
46                 int count = suffixForest[i].countSuffixes(processedSeq, minValue);      //      return score is and keep track of the
47                 if(count == minValue){                                                          //      template sequence index that corresponds to that score
48                         closestSequenceNo = i;
49                 }
50         }
51         searchScore = 100 * (1. - minValue / (float)processedSeq.length());
52         return templateSequences[closestSequenceNo];                    //      return the Sequence object that has the minimum score
53         
54 }
55
56 /**************************************************************************************************/