X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=suffixdb.cpp;fp=suffixdb.cpp;h=b4639f93fb3f025926cf1daef66a9187d6ab8fce;hb=526a868606faa50caf86e7399f7554c0335b39e5;hp=0000000000000000000000000000000000000000;hpb=c35f02a218ce8f430a75850b4d9fabb96b3a022b;p=mothur.git diff --git a/suffixdb.cpp b/suffixdb.cpp new file mode 100644 index 0000000..b4639f9 --- /dev/null +++ b/suffixdb.cpp @@ -0,0 +1,56 @@ +/* + * suffixdb.cpp + * + * + * Created by Pat Schloss on 12/16/08. + * Copyright 2008 Patrick D. Schloss. All rights reserved. + * + * This is a child class of the Database abstract datatype. The class is basically a database of suffix trees and an + * encapsulation of the method for finding the most similar tree to an inputted sequence. the suffixForest objecct + * is a vector of SuffixTrees, with each template sequence being represented by a different SuffixTree. The class also + * provides a method to take an unaligned sequence and find the closest sequence in the suffixForest. The search + * method is inspired by the article and Perl source code provided at http://www.ddj.com/web-development/184416093. I + * would estimate that the time complexity is O(LN) for each search, which is slower than the kmer searching, but + * faster than blast + * + */ + +using namespace std; + +#include "database.hpp" +#include "sequence.hpp" +#include "suffixtree.hpp" +#include "suffixdb.hpp" + +/**************************************************************************************************/ + +SuffixDB::SuffixDB(string fastaFileName) : Database(fastaFileName) { + + suffixForest.resize(numSeqs); + cout << "Generating the suffix tree database...\t"; cout.flush(); + for(int i=0;iconvert2ints(); // the candidate sequence needs to be a string of ints + for(int i=0;i