5 * Created by Pat Schloss on 12/16/08.
6 * Copyright 2008 Patrick D. Schloss. All rights reserved.
8 * This is a child class of the Database abstract datatype. The class is basically a database of suffix trees and an
9 * encapsulation of the method for finding the most similar tree to an inputted sequence. the suffixForest objecct
10 * is a vector of SuffixTrees, with each template sequence being represented by a different SuffixTree. The class also
11 * provides a method to take an unaligned sequence and find the closest sequence in the suffixForest. The search
12 * method is inspired by the article and Perl source code provided at http://www.ddj.com/web-development/184416093. I
13 * would estimate that the time complexity is O(LN) for each search, which is slower than the kmer searching, but
18 #include "database.hpp"
19 #include "sequence.hpp"
20 #include "suffixtree.hpp"
21 #include "suffixdb.hpp"
23 /**************************************************************************************************/
25 SuffixDB::SuffixDB(string fastaFileName) : Database(fastaFileName) {
27 suffixForest.resize(numSeqs);
28 mothurOut("Generating the suffix tree database...\t"); cout.flush();
29 for(int i=0;i<numSeqs;i++){ // The parent class' constructor generates the vector of
30 suffixForest[i].loadSequence(templateSequences[i]); // template Sequence objects. Here each of these objects
31 } // is used to generate a suffix tree, aka the suffix forest
32 mothurOut("DONE."); mothurOutEndLine(); mothurOutEndLine(); cout.flush();
36 /**************************************************************************************************/
38 Sequence SuffixDB::findClosestSequence(Sequence* candidateSeq){
41 int closestSequenceNo = 0;
42 string processedSeq = candidateSeq->convert2ints(); // the candidate sequence needs to be a string of ints
43 for(int i=0;i<suffixForest.size();i++){ // scan through the forest and see what the minimum
44 int count = suffixForest[i].countSuffixes(processedSeq, minValue); // return score is and keep track of the
45 if(count == minValue){ // template sequence index that corresponds to that score
46 closestSequenceNo = i;
49 searchScore = 100 * (1. - minValue / (float)processedSeq.length());
50 return templateSequences[closestSequenceNo]; // return the Sequence object that has the minimum score
54 /**************************************************************************************************/
56 SuffixDB::~SuffixDB(){
58 for (int i = (suffixForest.size()-1); i >= 0; i--) { suffixForest.pop_back(); }
59 // templateSequences.clear();
62 /**************************************************************************************************/