5 * Created by westcott on 11/3/09.
6 * Copyright 2009 Schloss Lab. All rights reserved.
11 #include "sequence.hpp"
13 #include "suffixdb.hpp"
14 #include "blastdb.hpp"
15 #include "distancedb.hpp"
17 /**************************************************************************************************/
18 Classify::Classify(string tfile, string tempFile, string method, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch) : taxFile(tfile), templateFile(tempFile) {
21 readTaxonomy(taxFile);
23 int start = time(NULL);
25 //need to know number of template seqs for suffixdb
26 if (method == "suffix") {
28 openInputFile(tempFile, inFASTA);
29 numSeqs = count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
33 mothurOut("Generating search database... "); cout.flush();
35 bool needToGenerate = true;
37 if(method == "kmer") {
38 database = new KmerDB(tempFile, kmerSize);
40 kmerDBName = tempFile.substr(0,tempFile.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
41 ifstream kmerFileTest(kmerDBName.c_str());
42 if(kmerFileTest){ needToGenerate = false; }
44 else if(method == "suffix") { database = new SuffixDB(numSeqs); }
45 else if(method == "blast") { database = new BlastDB(gapOpen, gapExtend, match, misMatch); }
46 else if(method == "distance") { database = new DistanceDB(); }
48 mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");
50 database = new KmerDB(tempFile, 8);
55 openInputFile(tempFile, fastaFile);
57 while (!fastaFile.eof()) {
58 Sequence temp(fastaFile);
61 names.push_back(temp.getName());
63 database->addSequence(temp);
67 database->generateDB();
69 }else if ((method == "kmer") && (!needToGenerate)) {
70 ifstream kmerFileTest(kmerDBName.c_str());
71 database->readKmerDB(kmerFileTest);
74 openInputFile(tempFile, fastaFile);
76 while (!fastaFile.eof()) {
77 Sequence temp(fastaFile);
80 names.push_back(temp.getName());
85 database->setNumSeqs(names.size());
87 mothurOut("DONE."); mothurOutEndLine();
88 mothurOut("It took " + toString(time(NULL) - start) + " seconds generate search database. "); mothurOutEndLine();
92 errorOut(e, "Classify", "Classify");
96 /**************************************************************************************************/
98 void Classify::readTaxonomy(string file) {
101 phyloTree = new PhyloTree();
104 openInputFile(file, inTax);
107 mothurOut("Reading in the " + file + " taxonomy...\t"); cout.flush();
109 string name, taxInfo;
110 //read template seqs and save
111 while (!inTax.eof()) {
112 inTax >> name >> taxInfo;
114 taxonomy[name] = taxInfo;
116 phyloTree->addSeqToTree(name, taxInfo);
121 phyloTree->assignHeirarchyIDs(0);
125 mothurOutEndLine(); cout.flush();
128 catch(exception& e) {
129 errorOut(e, "Classify", "readTaxonomy");
133 /**************************************************************************************************/
135 vector<string> Classify::parseTax(string tax) {
137 vector<string> taxons;
139 tax = tax.substr(0, tax.length()-1); //get rid of last ';'
143 while (tax.find_first_of(';') != -1) {
144 individual = tax.substr(0,tax.find_first_of(';'));
145 tax = tax.substr(tax.find_first_of(';')+1, tax.length());
146 taxons.push_back(individual);
150 taxons.push_back(tax);
154 catch(exception& e) {
155 errorOut(e, "Classify", "parseTax");
159 /**************************************************************************************************/