]> git.donarmstrong.com Git - mothur.git/blob - classify.cpp
started work on classify.seqs command. changed the database class so that it does...
[mothur.git] / classify.cpp
1 /*
2  *  classify.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 11/3/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "classify.h"
11 #include "sequence.hpp"
12 #include "kmerdb.hpp"
13 #include "suffixdb.hpp"
14 #include "blastdb.hpp"
15
16 /**************************************************************************************************/
17
18 Classify::Classify(string tfile, string tempFile, string method, int kmerSize, int gapOpen, int gapExtend, int match, int misMatch) : taxFile(tfile), templateFile(tempFile) {          
19         try {                                                                                   
20                 
21                 readTaxonomy(taxFile);
22                 
23                 int numSeqs = 0;
24                 //need to know number of template seqs for suffixdb
25                 if (method == "suffix") {
26                         ifstream inFASTA;
27                         openInputFile(tempFile, inFASTA);
28                         numSeqs = count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
29                         inFASTA.close();
30                 }
31
32                 mothurOut("Generating search database...    "); cout.flush();
33                                 
34                 bool needToGenerate = true;
35                 string kmerDBName;
36                 if(method == "kmer")                    {       
37                         database = new KmerDB(tempFile, kmerSize);                      
38                         
39                         kmerDBName = tempFile.substr(0,tempFile.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
40                         ifstream kmerFileTest(kmerDBName.c_str());
41                         if(kmerFileTest){       needToGenerate = false;         }
42                 }
43                 else if(method == "suffix")             {       database = new SuffixDB(numSeqs);                                                               }
44                 else if(method == "blast")              {       database = new BlastDB(gapOpen, gapExtend, match, misMatch);    }
45                 else {
46                         mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");
47                         mothurOutEndLine();
48                         database = new KmerDB(tempFile, 8);
49                 }
50                 
51                 if (needToGenerate) {
52                         ifstream fastaFile;
53                         openInputFile(tempFile, fastaFile);
54                 
55                         while (!fastaFile.eof()) {
56                                 Sequence temp(fastaFile);
57                                 gobble(fastaFile);
58                         
59                                 names.push_back(temp.getName());
60                                 database->addSequence(temp);    
61                         }
62                         fastaFile.close();
63
64                         database->generateDB();
65                         
66                 }else if ((method == "kmer") && (!needToGenerate)) {    
67                         ifstream kmerFileTest(kmerDBName.c_str());
68                         database->readKmerDB(kmerFileTest);     
69                         
70                         ifstream fastaFile;
71                         openInputFile(tempFile, fastaFile);
72                 
73                         while (!fastaFile.eof()) {
74                                 Sequence temp(fastaFile);
75                                 gobble(fastaFile);
76                         
77                                 names.push_back(temp.getName());
78                         }
79                         fastaFile.close();
80                 }
81                 
82                 database->setNumSeqs(names.size());
83                 
84                 mothurOut("DONE."); mothurOutEndLine();
85
86         }
87         catch(exception& e) {
88                 errorOut(e, "Classify", "Classify");
89                 exit(1);
90         }
91 }
92 /**************************************************************************************************/
93
94 void Classify::readTaxonomy(string file) {
95         try {
96         
97                 ifstream inTax;
98                 openInputFile(file, inTax);
99         
100                 mothurOutEndLine();
101                 mothurOut("Reading in the " + file + " taxonomy...\t"); cout.flush();
102                 
103                 string name, taxInfo;
104                 //read template seqs and save
105                 while (!inTax.eof()) {
106                         inTax >> name >> taxInfo;
107                         
108                         taxonomy[name] = taxInfo;
109                 
110                         gobble(inTax);
111                 }
112                 inTax.close();
113         
114                 mothurOut("DONE.");
115                 mothurOutEndLine();     cout.flush();
116         
117         }
118         catch(exception& e) {
119                 errorOut(e, "Classify", "readTaxonomy");
120                 exit(1);
121         }
122 }
123 /**************************************************************************************************/
124