]> git.donarmstrong.com Git - mothur.git/blob - suffixdb.cpp
sffinfo bug with flow grams right index when clipQualRight=0
[mothur.git] / suffixdb.cpp
1 /*
2  *  suffixdb.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 12/16/08.
6  *  Copyright 2008 Patrick D. Schloss. All rights reserved.
7  *
8  *      This is a child class of the Database abstract datatype.  The class is basically a database of suffix trees and an
9  *      encapsulation of the method for finding the most similar tree to an inputted sequence.  the suffixForest objecct
10  *      is a vector of SuffixTrees, with each template sequence being represented by a different SuffixTree.  The class also
11  *      provides a method to take an unaligned sequence and find the closest sequence in the suffixForest.  The search
12  *      method is inspired by the article and Perl source code provided at http://www.ddj.com/web-development/184416093.  I 
13  *      would estimate that the time complexity is O(LN) for each search, which is slower than the kmer searching, but 
14  *      faster than blast
15  *
16  */
17
18 #include "database.hpp"
19 #include "sequence.hpp"
20 #include "suffixtree.hpp"
21 #include "suffixdb.hpp"
22
23 /**************************************************************************************************/
24
25 SuffixDB::SuffixDB(int numSeqs) : Database() {
26         suffixForest.resize(numSeqs);
27         count = 0;
28 }
29 /**************************************************************************************************/
30
31 SuffixDB::SuffixDB() : Database() {
32         count = 0;
33 }
34
35 /**************************************************************************************************/
36 //assumes sequences have been added using addSequence
37 vector<int> SuffixDB::findClosestSequences(Sequence* candidateSeq, int num){
38         try {
39                 vector<int> topMatches;
40                 string processedSeq = candidateSeq->convert2ints();             //      the candidate sequence needs to be a string of ints
41                 
42                 vector<seqMatch> seqMatches;
43                 for(int i=0;i<suffixForest.size();i++){                                 //      scan through the forest and see what the minimum
44                         int count = suffixForest[i].countSuffixes(processedSeq);        //      return score is and keep track of the
45                         seqMatch temp(i, count);
46                         seqMatches.push_back(temp);
47                 }
48                 
49                 //sorts putting smallest matches first
50                 sort(seqMatches.begin(), seqMatches.end(), compareSeqMatchesReverse);
51                 
52                 searchScore = seqMatches[0].match;
53                 searchScore = 100 * (1. - searchScore / (float)processedSeq.length());
54                 
55                 //save top matches
56                 for (int i = 0; i < num; i++) {
57                         topMatches.push_back(seqMatches[i].seq);
58                 }
59
60                 //      return the Sequence object that has the minimum score
61                 return topMatches;
62         }
63         catch(exception& e) {
64                 m->errorOut(e, "SuffixDB", "findClosestSequences");
65                 exit(1);
66         }       
67 }
68 /**************************************************************************************************/
69 //adding the sequences generates the db
70 void SuffixDB::addSequence(Sequence seq) {
71         try {
72                 suffixForest[count].loadSequence(seq);          
73                 count++;
74         }
75         catch(exception& e) {
76                 m->errorOut(e, "SuffixDB", "addSequence");
77                 exit(1);
78         }
79 }
80 /**************************************************************************************************/
81
82 SuffixDB::~SuffixDB(){                                                                                                          
83         for (int i = (suffixForest.size()-1); i >= 0; i--) {  suffixForest.pop_back();  }
84 }
85 /**************************************************************************************************/