]> git.donarmstrong.com Git - mothur.git/blob - classify.cpp
created mothurOut class to handle logfiles
[mothur.git] / classify.cpp
1 /*
2  *  classify.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 11/3/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "classify.h"
11 #include "sequence.hpp"
12 #include "kmerdb.hpp"
13 #include "suffixdb.hpp"
14 #include "blastdb.hpp"
15 #include "distancedb.hpp"
16
17 /**************************************************************************************************/
18 Classify::Classify(string tfile, string tempFile, string method, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch) : taxFile(tfile), templateFile(tempFile) {          
19         try {   
20                 m = MothurOut::getInstance();                                                                   
21                 readTaxonomy(taxFile);  
22                 
23                 int start = time(NULL);
24                 int numSeqs = 0;
25                 //need to know number of template seqs for suffixdb
26                 if (method == "suffix") {
27                         ifstream inFASTA;
28                         openInputFile(tempFile, inFASTA);
29                         numSeqs = count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
30                         inFASTA.close();
31                 }
32
33                 m->mothurOut("Generating search database...    "); cout.flush();
34                                 
35                 bool needToGenerate = true;
36                 string kmerDBName;
37                 if(method == "kmer")                    {       
38                         database = new KmerDB(tempFile, kmerSize);                      
39                         
40                         kmerDBName = tempFile.substr(0,tempFile.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
41                         ifstream kmerFileTest(kmerDBName.c_str());
42                         if(kmerFileTest){       needToGenerate = false;         }
43                 }
44                 else if(method == "suffix")             {       database = new SuffixDB(numSeqs);                                                               }
45                 else if(method == "blast")              {       database = new BlastDB(gapOpen, gapExtend, match, misMatch);    }
46                 else if(method == "distance")   {       database = new DistanceDB();    }
47                 else {
48                         m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");
49                         m->mothurOutEndLine();
50                         database = new KmerDB(tempFile, 8);
51                 }
52                 
53                 if (needToGenerate) {
54                         ifstream fastaFile;
55                         openInputFile(tempFile, fastaFile);
56                         
57                         while (!fastaFile.eof()) {
58                                 Sequence temp(fastaFile);
59                                 gobble(fastaFile);
60                         
61                                 names.push_back(temp.getName());
62                                                                 
63                                 database->addSequence(temp);    
64                         }
65                         fastaFile.close();
66
67                         database->generateDB();
68                         
69                 }else if ((method == "kmer") && (!needToGenerate)) {    
70                         ifstream kmerFileTest(kmerDBName.c_str());
71                         database->readKmerDB(kmerFileTest);     
72                         
73                         ifstream fastaFile;
74                         openInputFile(tempFile, fastaFile);
75                         
76                         while (!fastaFile.eof()) {
77                                 Sequence temp(fastaFile);
78                                 gobble(fastaFile);
79                         
80                                 names.push_back(temp.getName());
81                         }
82                         fastaFile.close();
83                 }
84                 
85                 database->setNumSeqs(names.size());
86                 
87                 m->mothurOut("DONE."); m->mothurOutEndLine();
88                 m->mothurOut("It took " + toString(time(NULL) - start) + " seconds generate search database. "); m->mothurOutEndLine();
89
90         }
91         catch(exception& e) {
92                 m->errorOut(e, "Classify", "Classify");
93                 exit(1);
94         }
95 }
96 /**************************************************************************************************/
97
98 void Classify::readTaxonomy(string file) {
99         try {
100                 
101                 phyloTree = new PhyloTree();
102                 
103                 ifstream inTax;
104                 openInputFile(file, inTax);
105         
106                 m->mothurOutEndLine();
107                 m->mothurOut("Reading in the " + file + " taxonomy...\t");      cout.flush();
108                 
109                 string name, taxInfo;
110                 //read template seqs and save
111                 while (!inTax.eof()) {
112                         inTax >> name >> taxInfo;
113                         
114                         taxonomy[name] = taxInfo;
115                         
116                         phyloTree->addSeqToTree(name, taxInfo);
117                 
118                         gobble(inTax);
119                 }
120                 
121                 phyloTree->assignHeirarchyIDs(0);
122                 inTax.close();
123         
124                 m->mothurOut("DONE.");
125                 m->mothurOutEndLine();  cout.flush();
126         
127         }
128         catch(exception& e) {
129                 m->errorOut(e, "Classify", "readTaxonomy");
130                 exit(1);
131         }
132 }
133 /**************************************************************************************************/
134
135 vector<string> Classify::parseTax(string tax) {
136         try {
137                 vector<string> taxons;
138                 
139                 tax = tax.substr(0, tax.length()-1);  //get rid of last ';'
140         
141                 //parse taxonomy
142                 string individual;
143                 while (tax.find_first_of(';') != -1) {
144                         individual = tax.substr(0,tax.find_first_of(';'));
145                         tax = tax.substr(tax.find_first_of(';')+1, tax.length());
146                         taxons.push_back(individual);
147                         
148                 }
149                 //get last one
150                 taxons.push_back(tax);
151                 
152                 return taxons;
153         }
154         catch(exception& e) {
155                 m->errorOut(e, "Classify", "parseTax");
156                 exit(1);
157         }
158 }
159 /**************************************************************************************************/
160