5 * Created by Pat Schloss on 12/16/08.
6 * Copyright 2008 Patrick D. Schloss. All rights reserved.
8 * This is a child class of the Database abstract datatype. The class is basically a database of suffix trees and an
9 * encapsulation of the method for finding the most similar tree to an inputted sequence. the suffixForest objecct
10 * is a vector of SuffixTrees, with each template sequence being represented by a different SuffixTree. The class also
11 * provides a method to take an unaligned sequence and find the closest sequence in the suffixForest. The search
12 * method is inspired by the article and Perl source code provided at http://www.ddj.com/web-development/184416093. I
13 * would estimate that the time complexity is O(LN) for each search, which is slower than the kmer searching, but
18 #include "database.hpp"
19 #include "sequence.hpp"
20 #include "suffixtree.hpp"
21 #include "suffixdb.hpp"
23 /**************************************************************************************************/
25 SuffixDB::SuffixDB(int numSeqs) : Database() {
26 suffixForest.resize(numSeqs);
29 /**************************************************************************************************/
30 //assumes sequences have been added using addSequence
31 vector<int> SuffixDB::findClosestSequences(Sequence* candidateSeq, int num){
33 vector<int> topMatches;
34 string processedSeq = candidateSeq->convert2ints(); // the candidate sequence needs to be a string of ints
36 vector<seqMatch> seqMatches;
37 for(int i=0;i<suffixForest.size();i++){ // scan through the forest and see what the minimum
38 int count = suffixForest[i].countSuffixes(processedSeq); // return score is and keep track of the
39 seqMatch temp(i, count);
40 seqMatches.push_back(temp);
43 //sorts putting smallest matches first
44 sort(seqMatches.begin(), seqMatches.end(), compareSeqMatchesReverse);
46 searchScore = seqMatches[0].match;
47 searchScore = 100 * (1. - searchScore / (float)processedSeq.length());
50 for (int i = 0; i < num; i++) {
51 topMatches.push_back(seqMatches[i].seq);
54 // return the Sequence object that has the minimum score
58 errorOut(e, "SuffixDB", "findClosestSequences");
62 /**************************************************************************************************/
63 //adding the sequences generates the db
64 void SuffixDB::addSequence(Sequence seq) {
66 suffixForest[count].loadSequence(seq);
70 errorOut(e, "SuffixDB", "addSequence");
74 /**************************************************************************************************/
76 SuffixDB::~SuffixDB(){
77 for (int i = (suffixForest.size()-1); i >= 0; i--) { suffixForest.pop_back(); }
79 /**************************************************************************************************/