5 * Created by westcott on 11/3/09.
6 * Copyright 2009 Schloss Lab. All rights reserved.
11 #include "sequence.hpp"
13 #include "suffixdb.hpp"
14 #include "blastdb.hpp"
16 /**************************************************************************************************/
18 Classify::Classify(string tfile, string tempFile, string method, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch) : taxFile(tfile), templateFile(tempFile) {
20 readTaxonomy(taxFile);
22 int start = time(NULL);
24 //need to know number of template seqs for suffixdb
25 if (method == "suffix") {
27 openInputFile(tempFile, inFASTA);
28 numSeqs = count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
32 mothurOut("Generating search database... "); cout.flush();
34 bool needToGenerate = true;
36 if(method == "kmer") {
37 database = new KmerDB(tempFile, kmerSize);
39 kmerDBName = tempFile.substr(0,tempFile.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
40 ifstream kmerFileTest(kmerDBName.c_str());
41 if(kmerFileTest){ needToGenerate = false; }
43 else if(method == "suffix") { database = new SuffixDB(numSeqs); }
44 else if(method == "blast") { database = new BlastDB(gapOpen, gapExtend, match, misMatch); }
46 mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");
48 database = new KmerDB(tempFile, 8);
53 openInputFile(tempFile, fastaFile);
55 while (!fastaFile.eof()) {
56 Sequence temp(fastaFile);
59 names.push_back(temp.getName());
61 database->addSequence(temp);
65 database->generateDB();
67 }else if ((method == "kmer") && (!needToGenerate)) {
68 ifstream kmerFileTest(kmerDBName.c_str());
69 database->readKmerDB(kmerFileTest);
72 openInputFile(tempFile, fastaFile);
74 while (!fastaFile.eof()) {
75 Sequence temp(fastaFile);
78 names.push_back(temp.getName());
83 database->setNumSeqs(names.size());
85 mothurOut("DONE."); mothurOutEndLine();
86 mothurOut("It took " + toString(time(NULL) - start) + " seconds generate search database. "); mothurOutEndLine();
90 errorOut(e, "Classify", "Classify");
94 /**************************************************************************************************/
96 void Classify::readTaxonomy(string file) {
99 phyloTree = new PhyloTree();
102 openInputFile(file, inTax);
105 mothurOut("Reading in the " + file + " taxonomy...\t"); cout.flush();
107 string name, taxInfo;
108 //read template seqs and save
109 while (!inTax.eof()) {
110 inTax >> name >> taxInfo;
112 taxonomy[name] = taxInfo;
114 //itTax = taxList.find(taxInfo);
115 //if (itTax == taxList.end()) { //this is new taxonomy
116 //taxList[taxInfo] = 1;
117 //}else { taxList[taxInfo]++; }
118 phyloTree->addSeqToTree(name, taxInfo);
123 phyloTree->assignHeirarchyIDs(0);
127 mothurOutEndLine(); cout.flush();
130 catch(exception& e) {
131 errorOut(e, "Classify", "readTaxonomy");
135 /**************************************************************************************************/
137 vector<string> Classify::parseTax(string tax) {
139 vector<string> taxons;
141 tax = tax.substr(0, tax.length()-1); //get rid of last ';'
145 while (tax.find_first_of(';') != -1) {
146 individual = tax.substr(0,tax.find_first_of(';'));
147 tax = tax.substr(tax.find_first_of(';')+1, tax.length());
148 taxons.push_back(individual);
152 taxons.push_back(tax);
156 catch(exception& e) {
157 errorOut(e, "Classify", "parseTax");
161 /**************************************************************************************************/