+++ /dev/null
-/*
- * suffixdb.cpp
- *
- *
- * Created by Pat Schloss on 12/16/08.
- * Copyright 2008 Patrick D. Schloss. All rights reserved.
- *
- * This is a child class of the Database abstract datatype. The class is basically a database of suffix trees and an
- * encapsulation of the method for finding the most similar tree to an inputted sequence. the suffixForest objecct
- * is a vector of SuffixTrees, with each template sequence being represented by a different SuffixTree. The class also
- * provides a method to take an unaligned sequence and find the closest sequence in the suffixForest. The search
- * method is inspired by the article and Perl source code provided at http://www.ddj.com/web-development/184416093. I
- * would estimate that the time complexity is O(LN) for each search, which is slower than the kmer searching, but
- * faster than blast
- *
- */
-
-#include "database.hpp"
-#include "sequence.hpp"
-#include "suffixtree.hpp"
-#include "suffixdb.hpp"
-
-/**************************************************************************************************/
-
-SuffixDB::SuffixDB(int numSeqs) : Database() {
- suffixForest.resize(numSeqs);
- count = 0;
-}
-/**************************************************************************************************/
-
-SuffixDB::SuffixDB() : Database() {
- count = 0;
-}
-
-/**************************************************************************************************/
-//assumes sequences have been added using addSequence
-vector<int> SuffixDB::findClosestSequences(Sequence* candidateSeq, int num){
- try {
- vector<int> topMatches;
- string processedSeq = candidateSeq->convert2ints(); // the candidate sequence needs to be a string of ints
-
- vector<seqMatch> seqMatches;
- for(int i=0;i<suffixForest.size();i++){ // scan through the forest and see what the minimum
- int count = suffixForest[i].countSuffixes(processedSeq); // return score is and keep track of the
- seqMatch temp(i, count);
- seqMatches.push_back(temp);
- }
-
- //sorts putting smallest matches first
- sort(seqMatches.begin(), seqMatches.end(), compareSeqMatchesReverse);
-
- searchScore = seqMatches[0].match;
- searchScore = 100 * (1. - searchScore / (float)processedSeq.length());
-
- //save top matches
- for (int i = 0; i < num; i++) {
- topMatches.push_back(seqMatches[i].seq);
- }
-
- // return the Sequence object that has the minimum score
- return topMatches;
- }
- catch(exception& e) {
- m->errorOut(e, "SuffixDB", "findClosestSequences");
- exit(1);
- }
-}
-/**************************************************************************************************/
-//adding the sequences generates the db
-void SuffixDB::addSequence(Sequence seq) {
- try {
- suffixForest[count].loadSequence(seq);
- count++;
- }
- catch(exception& e) {
- m->errorOut(e, "SuffixDB", "addSequence");
- exit(1);
- }
-}
-/**************************************************************************************************/
-
-SuffixDB::~SuffixDB(){
- for (int i = (suffixForest.size()-1); i >= 0; i--) { suffixForest.pop_back(); }
-}
-/**************************************************************************************************/