]> git.donarmstrong.com Git - mothur.git/blob - classify.cpp
worked on hcluster. made .single command run using a sharedfile. and various other...
[mothur.git] / classify.cpp
1 /*
2  *  classify.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 11/3/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "classify.h"
11 #include "sequence.hpp"
12 #include "kmerdb.hpp"
13 #include "suffixdb.hpp"
14 #include "blastdb.hpp"
15
16 /**************************************************************************************************/
17 Classify::Classify(string tfile, string tempFile, string method, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch) : taxFile(tfile), templateFile(tempFile) {          
18         try {   
19                                                                                         
20                 readTaxonomy(taxFile);  
21                 
22                 int start = time(NULL);
23                 int numSeqs = 0;
24                 //need to know number of template seqs for suffixdb
25                 if (method == "suffix") {
26                         ifstream inFASTA;
27                         openInputFile(tempFile, inFASTA);
28                         numSeqs = count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
29                         inFASTA.close();
30                 }
31
32                 mothurOut("Generating search database...    "); cout.flush();
33                                 
34                 bool needToGenerate = true;
35                 string kmerDBName;
36                 if(method == "kmer")                    {       
37                         database = new KmerDB(tempFile, kmerSize);                      
38                         
39                         kmerDBName = tempFile.substr(0,tempFile.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
40                         ifstream kmerFileTest(kmerDBName.c_str());
41                         if(kmerFileTest){       needToGenerate = false;         }
42                 }
43                 else if(method == "suffix")             {       database = new SuffixDB(numSeqs);                                                               }
44                 else if(method == "blast")              {       database = new BlastDB(gapOpen, gapExtend, match, misMatch);    }
45                 else {
46                         mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");
47                         mothurOutEndLine();
48                         database = new KmerDB(tempFile, 8);
49                 }
50                 
51                 if (needToGenerate) {
52                         ifstream fastaFile;
53                         openInputFile(tempFile, fastaFile);
54                         
55                         while (!fastaFile.eof()) {
56                                 Sequence temp(fastaFile);
57                                 gobble(fastaFile);
58                         
59                                 names.push_back(temp.getName());
60                                                                 
61                                 database->addSequence(temp);    
62                         }
63                         fastaFile.close();
64
65                         database->generateDB();
66                         
67                 }else if ((method == "kmer") && (!needToGenerate)) {    
68                         ifstream kmerFileTest(kmerDBName.c_str());
69                         database->readKmerDB(kmerFileTest);     
70                         
71                         ifstream fastaFile;
72                         openInputFile(tempFile, fastaFile);
73                         
74                         while (!fastaFile.eof()) {
75                                 Sequence temp(fastaFile);
76                                 gobble(fastaFile);
77                         
78                                 names.push_back(temp.getName());
79                         }
80                         fastaFile.close();
81                 }
82                 
83                 database->setNumSeqs(names.size());
84                 
85                 mothurOut("DONE."); mothurOutEndLine();
86                 mothurOut("It took " + toString(time(NULL) - start) + " seconds generate search database. "); mothurOutEndLine();
87
88         }
89         catch(exception& e) {
90                 errorOut(e, "Classify", "Classify");
91                 exit(1);
92         }
93 }
94 /**************************************************************************************************/
95
96 void Classify::readTaxonomy(string file) {
97         try {
98                 
99                 phyloTree = new PhyloTree();
100                 
101                 ifstream inTax;
102                 openInputFile(file, inTax);
103         
104                 mothurOutEndLine();
105                 mothurOut("Reading in the " + file + " taxonomy...\t"); cout.flush();
106                 
107                 string name, taxInfo;
108                 //read template seqs and save
109                 while (!inTax.eof()) {
110                         inTax >> name >> taxInfo;
111                         
112                         taxonomy[name] = taxInfo;
113                         
114                         phyloTree->addSeqToTree(name, taxInfo);
115                 
116                         gobble(inTax);
117                 }
118                 
119                 phyloTree->assignHeirarchyIDs(0);
120                 inTax.close();
121         
122                 mothurOut("DONE.");
123                 mothurOutEndLine();     cout.flush();
124         
125         }
126         catch(exception& e) {
127                 errorOut(e, "Classify", "readTaxonomy");
128                 exit(1);
129         }
130 }
131 /**************************************************************************************************/
132
133 vector<string> Classify::parseTax(string tax) {
134         try {
135                 vector<string> taxons;
136                 
137                 tax = tax.substr(0, tax.length()-1);  //get rid of last ';'
138         
139                 //parse taxonomy
140                 string individual;
141                 while (tax.find_first_of(';') != -1) {
142                         individual = tax.substr(0,tax.find_first_of(';'));
143                         tax = tax.substr(tax.find_first_of(';')+1, tax.length());
144                         taxons.push_back(individual);
145                         
146                 }
147                 //get last one
148                 taxons.push_back(tax);
149                 
150                 return taxons;
151         }
152         catch(exception& e) {
153                 errorOut(e, "Classify", "parseTax");
154                 exit(1);
155         }
156 }
157 /**************************************************************************************************/
158