5 * Created by Pat Schloss on 12/16/08.
6 * Copyright 2008 Patrick D. Schloss. All rights reserved.
8 * This is a child class of the Database abstract datatype. The class is basically a database of suffix trees and an
9 * encapsulation of the method for finding the most similar tree to an inputted sequence. the suffixForest objecct
10 * is a vector of SuffixTrees, with each template sequence being represented by a different SuffixTree. The class also
11 * provides a method to take an unaligned sequence and find the closest sequence in the suffixForest. The search
12 * method is inspired by the article and Perl source code provided at http://www.ddj.com/web-development/184416093. I
13 * would estimate that the time complexity is O(LN) for each search, which is slower than the kmer searching, but
18 #include "database.hpp"
19 #include "sequence.hpp"
20 #include "suffixtree.hpp"
21 #include "suffixdb.hpp"
23 /**************************************************************************************************/
25 SuffixDB::SuffixDB(int numSeqs) : Database() {
26 suffixForest.resize(numSeqs);
29 /**************************************************************************************************/
31 SuffixDB::SuffixDB() : Database() {
35 /**************************************************************************************************/
36 //assumes sequences have been added using addSequence
37 vector<int> SuffixDB::findClosestSequences(Sequence* candidateSeq, int num){
39 vector<int> topMatches;
40 string processedSeq = candidateSeq->convert2ints(); // the candidate sequence needs to be a string of ints
42 vector<seqMatch> seqMatches;
43 for(int i=0;i<suffixForest.size();i++){ // scan through the forest and see what the minimum
44 int count = suffixForest[i].countSuffixes(processedSeq); // return score is and keep track of the
45 seqMatch temp(i, count);
46 seqMatches.push_back(temp);
49 //sorts putting smallest matches first
50 sort(seqMatches.begin(), seqMatches.end(), compareSeqMatchesReverse);
52 searchScore = seqMatches[0].match;
53 searchScore = 100 * (1. - searchScore / (float)processedSeq.length());
56 for (int i = 0; i < num; i++) {
57 topMatches.push_back(seqMatches[i].seq);
60 // return the Sequence object that has the minimum score
64 m->errorOut(e, "SuffixDB", "findClosestSequences");
68 /**************************************************************************************************/
69 //adding the sequences generates the db
70 void SuffixDB::addSequence(Sequence seq) {
72 suffixForest[count].loadSequence(seq);
76 m->errorOut(e, "SuffixDB", "addSequence");
80 /**************************************************************************************************/
82 SuffixDB::~SuffixDB(){
83 for (int i = (suffixForest.size()-1); i >= 0; i--) { suffixForest.pop_back(); }
85 /**************************************************************************************************/