]> git.donarmstrong.com Git - mothur.git/blob - suffixdb.cpp
39a9b54945cf1fb0a7d4f8f29ecc323381784d1d
[mothur.git] / suffixdb.cpp
1 /*
2  *  suffixdb.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 12/16/08.
6  *  Copyright 2008 Patrick D. Schloss. All rights reserved.
7  *
8  *      This is a child class of the Database abstract datatype.  The class is basically a database of suffix trees and an
9  *      encapsulation of the method for finding the most similar tree to an inputted sequence.  the suffixForest objecct
10  *      is a vector of SuffixTrees, with each template sequence being represented by a different SuffixTree.  The class also
11  *      provides a method to take an unaligned sequence and find the closest sequence in the suffixForest.  The search
12  *      method is inspired by the article and Perl source code provided at http://www.ddj.com/web-development/184416093.  I 
13  *      would estimate that the time complexity is O(LN) for each search, which is slower than the kmer searching, but 
14  *      faster than blast
15  *
16  */
17
18 #include "database.hpp"
19 #include "sequence.hpp"
20 #include "suffixtree.hpp"
21 #include "suffixdb.hpp"
22
23 /**************************************************************************************************/
24
25 SuffixDB::SuffixDB(int numSeqs) : Database() {
26         suffixForest.resize(numSeqs);
27         count = 0;
28 }
29 /**************************************************************************************************/
30 //assumes sequences have been added using addSequence
31 vector<int> SuffixDB::findClosestSequences(Sequence* candidateSeq, int num){
32         try {
33                 vector<int> topMatches;
34                 string processedSeq = candidateSeq->convert2ints();             //      the candidate sequence needs to be a string of ints
35                 
36                 vector<seqMatch> seqMatches;
37                 for(int i=0;i<suffixForest.size();i++){                                 //      scan through the forest and see what the minimum
38                         int count = suffixForest[i].countSuffixes(processedSeq);        //      return score is and keep track of the
39                         seqMatch temp(i, count);
40                         seqMatches.push_back(temp);
41                 }
42                 
43                 //sorts putting smallest matches first
44                 sort(seqMatches.begin(), seqMatches.end(), compareSeqMatchesReverse);
45                 
46                 searchScore = seqMatches[0].match;
47                 searchScore = 100 * (1. - searchScore / (float)processedSeq.length());
48                 
49                 //save top matches
50                 for (int i = 0; i < num; i++) {
51                         topMatches.push_back(seqMatches[i].seq);
52                 }
53
54                 //      return the Sequence object that has the minimum score
55                 return topMatches;
56         }
57         catch(exception& e) {
58                 errorOut(e, "SuffixDB", "findClosestSequences");
59                 exit(1);
60         }       
61 }
62 /**************************************************************************************************/
63 //adding the sequences generates the db
64 void SuffixDB::addSequence(Sequence seq) {
65         try {
66                 suffixForest[count].loadSequence(seq);          
67                 count++;
68         }
69         catch(exception& e) {
70                 errorOut(e, "SuffixDB", "addSequence");
71                 exit(1);
72         }
73 }
74 /**************************************************************************************************/
75
76 SuffixDB::~SuffixDB(){                                                                                                          
77         for (int i = (suffixForest.size()-1); i >= 0; i--) {  suffixForest.pop_back();  }
78 }
79 /**************************************************************************************************/