]> git.donarmstrong.com Git - mothur.git/blob - classify.cpp
finished work on classify.seqs bayesian method and various bug fixes
[mothur.git] / classify.cpp
1 /*
2  *  classify.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 11/3/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "classify.h"
11 #include "sequence.hpp"
12 #include "kmerdb.hpp"
13 #include "suffixdb.hpp"
14 #include "blastdb.hpp"
15
16 /**************************************************************************************************/
17
18 Classify::Classify(string tfile, string tempFile, string method, int kmerSize, int gapOpen, int gapExtend, int match, int misMatch) : taxFile(tfile), templateFile(tempFile) {          
19         try {                                                                                   
20                 readTaxonomy(taxFile);  
21                 
22                 int start = time(NULL);
23                 int numSeqs = 0;
24                 //need to know number of template seqs for suffixdb
25                 if (method == "suffix") {
26                         ifstream inFASTA;
27                         openInputFile(tempFile, inFASTA);
28                         numSeqs = count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
29                         inFASTA.close();
30                 }
31
32                 mothurOut("Generating search database...    "); cout.flush();
33                                 
34                 bool needToGenerate = true;
35                 string kmerDBName;
36                 if(method == "kmer")                    {       
37                         database = new KmerDB(tempFile, kmerSize);                      
38                         
39                         kmerDBName = tempFile.substr(0,tempFile.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
40                         ifstream kmerFileTest(kmerDBName.c_str());
41                         if(kmerFileTest){       needToGenerate = false;         }
42                 }
43                 else if(method == "suffix")             {       database = new SuffixDB(numSeqs);                                                               }
44                 else if(method == "blast")              {       database = new BlastDB(gapOpen, gapExtend, match, misMatch);    }
45                 else {
46                         mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");
47                         mothurOutEndLine();
48                         database = new KmerDB(tempFile, 8);
49                 }
50                 
51                 if (needToGenerate) {
52                         ifstream fastaFile;
53                         openInputFile(tempFile, fastaFile);
54                         
55                         while (!fastaFile.eof()) {
56                                 Sequence temp(fastaFile);
57                                 gobble(fastaFile);
58                         
59                                 names.push_back(temp.getName());
60                                                                 
61                                 database->addSequence(temp);    
62                         }
63                         fastaFile.close();
64
65                         database->generateDB();
66                         
67                 }else if ((method == "kmer") && (!needToGenerate)) {    
68                         ifstream kmerFileTest(kmerDBName.c_str());
69                         database->readKmerDB(kmerFileTest);     
70                         
71                         ifstream fastaFile;
72                         openInputFile(tempFile, fastaFile);
73                         
74                         while (!fastaFile.eof()) {
75                                 Sequence temp(fastaFile);
76                                 gobble(fastaFile);
77                         
78                                 names.push_back(temp.getName());
79                         }
80                         fastaFile.close();
81                 }
82                 
83                 database->setNumSeqs(names.size());
84                 
85                 mothurOut("DONE."); mothurOutEndLine();
86                 mothurOut("It took " + toString(time(NULL) - start) + " seconds generate search database. "); mothurOutEndLine();
87
88         }
89         catch(exception& e) {
90                 errorOut(e, "Classify", "Classify");
91                 exit(1);
92         }
93 }
94 /**************************************************************************************************/
95
96 void Classify::readTaxonomy(string file) {
97         try {
98                 
99                 phyloTree = new PhyloTree();
100                 
101                 ifstream inTax;
102                 openInputFile(file, inTax);
103         
104                 mothurOutEndLine();
105                 mothurOut("Reading in the " + file + " taxonomy...\t"); cout.flush();
106                 
107                 string name, taxInfo;
108                 //read template seqs and save
109                 while (!inTax.eof()) {
110                         inTax >> name >> taxInfo;
111                         
112                         taxonomy[name] = taxInfo;
113                         
114                         //itTax = taxList.find(taxInfo);
115                         //if (itTax == taxList.end()) { //this is new taxonomy
116                                 //taxList[taxInfo] = 1;
117                         //}else { taxList[taxInfo]++;   }
118                         phyloTree->addSeqToTree(name, taxInfo);
119                 
120                         gobble(inTax);
121                 }
122                 
123                 phyloTree->assignHeirarchyIDs(0);
124                 inTax.close();
125         
126                 mothurOut("DONE.");
127                 mothurOutEndLine();     cout.flush();
128         
129         }
130         catch(exception& e) {
131                 errorOut(e, "Classify", "readTaxonomy");
132                 exit(1);
133         }
134 }
135 /**************************************************************************************************/
136
137 vector<string> Classify::parseTax(string tax) {
138         try {
139                 vector<string> taxons;
140                 
141                 tax = tax.substr(0, tax.length()-1);  //get rid of last ';'
142         
143                 //parse taxonomy
144                 string individual;
145                 while (tax.find_first_of(';') != -1) {
146                         individual = tax.substr(0,tax.find_first_of(';'));
147                         tax = tax.substr(tax.find_first_of(';')+1, tax.length());
148                         taxons.push_back(individual);
149                         
150                 }
151                 //get last one
152                 taxons.push_back(tax);
153                 
154                 return taxons;
155         }
156         catch(exception& e) {
157                 errorOut(e, "Classify", "parseTax");
158                 exit(1);
159         }
160 }
161 /**************************************************************************************************/
162