]> git.donarmstrong.com Git - mothur.git/blob - bayesian.cpp
added save parameter to align.seqs, chimera commands, classify.seqs, and seq.error...
[mothur.git] / bayesian.cpp
1 /*
2  *  bayesian.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 11/3/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "bayesian.h"
11 #include "kmer.hpp"
12 #include "phylosummary.h"
13 #include "referencedb.h"
14 /**************************************************************************************************/
15 Bayesian::Bayesian(string tfile, string tempFile, string method, int ksize, int cutoff, int i) : 
16 Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i)  {
17         try {
18                 ReferenceDB* rdb = ReferenceDB::getInstance();
19                 
20                 string baseName = tempFile;
21                 if (baseName == "saved") { baseName = rdb->getSavedReference(); }
22                 
23                 string baseTName = tfile;
24                 if (baseTName == "saved") { baseTName = rdb->getSavedTaxonomy(); }
25                 
26                 /************calculate the probablity that each word will be in a specific taxonomy*************/
27                 string tfileroot = baseTName.substr(0,baseTName.find_last_of(".")+1);
28                 string tempfileroot = m->getRootName(m->getSimpleName(baseName));
29                 string phyloTreeName = tfileroot + "tree.train";
30                 string phyloTreeSumName = tfileroot + "tree.sum";
31                 string probFileName = tfileroot + tempfileroot + char('0'+ kmerSize) + "mer.prob";
32                 string probFileName2 = tfileroot + tempfileroot + char('0'+ kmerSize) + "mer.numNonZero";
33                 
34                 ofstream out;
35                 ofstream out2;
36                 
37                 ifstream phyloTreeTest(phyloTreeName.c_str());
38                 ifstream probFileTest2(probFileName2.c_str());
39                 ifstream probFileTest(probFileName.c_str());
40                 ifstream probFileTest3(phyloTreeSumName.c_str());
41                 
42                 int start = time(NULL);
43                 
44                 //if they are there make sure they were created after this release date
45                 bool FilesGood = false;
46                 if(probFileTest && probFileTest2 && phyloTreeTest && probFileTest3){
47                         FilesGood = checkReleaseDate(probFileTest, probFileTest2, phyloTreeTest, probFileTest3);
48                 }
49                 
50                 //if you want to save, but you dont need to calculate then just read
51                 if (rdb->save && probFileTest && probFileTest2 && phyloTreeTest && probFileTest3 && FilesGood) {  
52                         ifstream saveIn;
53                         m->openInputFile(tempFile, saveIn);
54                         
55                         while (!saveIn.eof()) {
56                                 Sequence temp(saveIn);
57                                 m->gobble(saveIn);
58                                 
59                                 rdb->referenceSeqs.push_back(temp); 
60                         }
61                         saveIn.close();                 
62                 }
63                 
64                 if(probFileTest && probFileTest2 && phyloTreeTest && probFileTest3 && FilesGood){       
65                         if (tempFile == "saved") { m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + " that are saved in memory.");     m->mothurOutEndLine(); }
66                         
67                         m->mothurOut("Reading template taxonomy...     "); cout.flush();
68                         
69                         phyloTree = new PhyloTree(phyloTreeTest, phyloTreeName);
70                         
71                         m->mothurOut("DONE."); m->mothurOutEndLine();
72                         
73                         genusNodes = phyloTree->getGenusNodes(); 
74                         genusTotals = phyloTree->getGenusTotals();
75                         
76                         if (tfile == "saved") { 
77                                 m->mothurOutEndLine();  m->mothurOut("Using probabilties from " + rdb->getSavedTaxonomy() + " that are saved in memory...    ");        cout.flush();; 
78                                 wordGenusProb = rdb->wordGenusProb;
79                         }else {
80                                 m->mothurOut("Reading template probabilities...     "); cout.flush();
81                                 readProbFile(probFileTest, probFileTest2, probFileName, probFileName2);
82                         }       
83                         
84                         //save probabilities
85                         if (rdb->save) { rdb->wordGenusProb = wordGenusProb; }
86                 }else{
87                 
88                         //create search database and names vector
89                         generateDatabaseAndNames(tfile, tempFile, method, ksize, 0.0, 0.0, 0.0, 0.0);
90                         
91                         //prevents errors caused by creating shortcut files if you had an error in the sanity check.
92                         if (m->control_pressed) {  remove(phyloTreeName.c_str());  remove(probFileName.c_str()); remove(probFileName2.c_str()); }
93                         else{ 
94                                 genusNodes = phyloTree->getGenusNodes(); 
95                                 genusTotals = phyloTree->getGenusTotals();
96                                 
97                                 m->mothurOut("Calculating template taxonomy tree...     "); cout.flush();
98                                 
99                                 phyloTree->printTreeNodes(phyloTreeName);
100                                                         
101                                 m->mothurOut("DONE."); m->mothurOutEndLine();
102                                 
103                                 m->mothurOut("Calculating template probabilities...     "); cout.flush();
104                                 
105                                 numKmers = database->getMaxKmer() + 1;
106                         
107                                 //initialze probabilities
108                                 wordGenusProb.resize(numKmers);
109                         //cout << numKmers << '\t' << genusNodes.size() << endl;
110                                 for (int j = 0; j < wordGenusProb.size(); j++) {        wordGenusProb[j].resize(genusNodes.size());             }
111                         //cout << numKmers << '\t' << genusNodes.size() << endl;        
112                                 ofstream out;
113                                 ofstream out2;
114                                 
115                                 #ifdef USE_MPI
116                                         int pid;
117                                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
118
119                                         if (pid == 0) {  
120                                 #endif
121
122                                 
123                                 m->openOutputFile(probFileName, out);
124                                 
125                                 //output mothur version
126                                 out << "#" << m->getVersion() << endl;
127                                 
128                                 out << numKmers << endl;
129                                 
130                                 m->openOutputFile(probFileName2, out2);
131                                 
132                                 //output mothur version
133                                 out2 << "#" << m->getVersion() << endl;
134                                 
135                                 #ifdef USE_MPI
136                                         }
137                                 #endif
138
139                                 
140                                 //for each word
141                                 for (int i = 0; i < numKmers; i++) {
142                                         if (m->control_pressed) {  break; }
143                                         
144                                         #ifdef USE_MPI
145                                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
146
147                                                 if (pid == 0) {  
148                                         #endif
149
150                                         out << i << '\t';
151                                         
152                                         #ifdef USE_MPI
153                                                 }
154                                         #endif
155                                         
156                                         vector<int> seqsWithWordi = database->getSequencesWithKmer(i);
157                                         
158                                         map<int, int> count;
159                                         for (int k = 0; k < genusNodes.size(); k++) {  count[genusNodes[k]] = 0;  }                     
160                                                         
161                                         //for each sequence with that word
162                                         for (int j = 0; j < seqsWithWordi.size(); j++) {
163                                                 int temp = phyloTree->getIndex(names[seqsWithWordi[j]]);
164                                                 count[temp]++;  //increment count of seq in this genus who have this word
165                                         }
166                                         
167                                         //probabilityInTemplate = (# of seqs with that word in template + 0.50) / (total number of seqs in template + 1);
168                                         float probabilityInTemplate = (seqsWithWordi.size() + 0.50) / (float) (names.size() + 1);
169                                         
170                                         int numNotZero = 0;
171                                         for (int k = 0; k < genusNodes.size(); k++) {
172                                                 //probabilityInThisTaxonomy = (# of seqs with that word in this taxonomy + probabilityInTemplate) / (total number of seqs in this taxonomy + 1);
173                                                 
174                                                 
175                                                 wordGenusProb[i][k] = log((count[genusNodes[k]] + probabilityInTemplate) / (float) (genusTotals[k] + 1));  
176                                                                         
177                                                 if (count[genusNodes[k]] != 0) { 
178                                                         #ifdef USE_MPI
179                                                                 int pid;
180                                                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
181                                                 
182                                                                 if (pid == 0) {  
183                                                         #endif
184
185                                                         out << k << '\t' << wordGenusProb[i][k] << '\t'; 
186                                                         
187                                                         #ifdef USE_MPI
188                                                                 }
189                                                         #endif
190
191                                                         numNotZero++;  
192                                                 }
193                                         }
194                                         
195                                         #ifdef USE_MPI
196                                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
197                                 
198                                                 if (pid == 0) {  
199                                         #endif
200                                         
201                                         out << endl;
202                                         out2 << probabilityInTemplate << '\t' << numNotZero << endl;
203                                         
204                                         #ifdef USE_MPI
205                                                 }
206                                         #endif
207                                 }
208                                 
209                                 #ifdef USE_MPI
210                                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
211                                 
212                                         if (pid == 0) {  
213                                 #endif
214                                 
215                                 out.close();
216                                 out2.close();
217                                 
218                                 #ifdef USE_MPI
219                                         }
220                                 #endif
221                                 
222                                 //read in new phylotree with less info. - its faster
223                                 ifstream phyloTreeTest(phyloTreeName.c_str());
224                                 delete phyloTree;
225                                 
226                                 phyloTree = new PhyloTree(phyloTreeTest, phyloTreeName);
227                                 
228                                 //save probabilities
229                                 if (rdb->save) { rdb->wordGenusProb = wordGenusProb; }
230                         }
231                 }
232         
233                 m->mothurOut("DONE."); m->mothurOutEndLine();
234                 m->mothurOut("It took " + toString(time(NULL) - start) + " seconds get probabilities. "); m->mothurOutEndLine();
235         }
236         catch(exception& e) {
237                 m->errorOut(e, "Bayesian", "Bayesian");
238                 exit(1);
239         }
240 }
241 /**************************************************************************************************/
242 Bayesian::~Bayesian() {
243         try {
244                 
245                  delete phyloTree; 
246                  if (database != NULL) {  delete database; }
247         }
248         catch(exception& e) {
249                 m->errorOut(e, "Bayesian", "~Bayesian");
250                 exit(1);
251         }
252 }
253
254 /**************************************************************************************************/
255 string Bayesian::getTaxonomy(Sequence* seq) {
256         try {
257                 string tax = "";
258                 Kmer kmer(kmerSize);
259                 
260                 //get words contained in query
261                 //getKmerString returns a string where the index in the string is hte kmer number 
262                 //and the character at that index can be converted to be the number of times that kmer was seen
263                 
264                 string queryKmerString = kmer.getKmerString(seq->getUnaligned()); 
265
266                 vector<int> queryKmers;
267                 for (int i = 0; i < queryKmerString.length()-1; i++) {  // the -1 is to ignore any kmer with an N in it
268                         if (queryKmerString[i] != '!') { //this kmer is in the query
269                                 queryKmers.push_back(i);
270                         }
271                 }
272                 
273                 if (queryKmers.size() == 0) {  m->mothurOut(seq->getName() + "is bad."); m->mothurOutEndLine(); return "bad seq"; }
274                 
275                 
276                 int index = getMostProbableTaxonomy(queryKmers);
277                 
278                 if (m->control_pressed) { return tax; }
279                                         
280                 //bootstrap - to set confidenceScore
281                 int numToSelect = queryKmers.size() / 8;
282         
283                 tax = bootstrapResults(queryKmers, index, numToSelect);
284                                 
285                 return tax;     
286         }
287         catch(exception& e) {
288                 m->errorOut(e, "Bayesian", "getTaxonomy");
289                 exit(1);
290         }
291 }
292 /**************************************************************************************************/
293 string Bayesian::bootstrapResults(vector<int> kmers, int tax, int numToSelect) {
294         try {
295                                 
296                 map<int, int> confidenceScores; 
297                 
298                 //initialize confidences to 0 
299                 int seqIndex = tax;
300                 TaxNode seq = phyloTree->get(tax);
301                 confidenceScores[tax] = 0;
302                 
303                 while (seq.level != 0) { //while you are not at the root
304                         seqIndex = seq.parent;
305                         confidenceScores[seqIndex] = 0;
306                         seq = phyloTree->get(seq.parent);
307                 }
308                                 
309                 map<int, int>::iterator itBoot;
310                 map<int, int>::iterator itBoot2;
311                 map<int, int>::iterator itConvert;
312                         
313                 for (int i = 0; i < iters; i++) {
314                         if (m->control_pressed) { return "control"; }
315                         
316                         vector<int> temp;
317                         for (int j = 0; j < numToSelect; j++) {
318                                 int index = int(rand() % kmers.size());
319                                 
320                                 //add word to temp
321                                 temp.push_back(kmers[index]);
322                         }
323                         
324                         //get taxonomy
325                         int newTax = getMostProbableTaxonomy(temp);
326                         //int newTax = 1;
327                         TaxNode taxonomyTemp = phyloTree->get(newTax);
328                         
329                         //add to confidence results
330                         while (taxonomyTemp.level != 0) { //while you are not at the root
331                                 itBoot2 = confidenceScores.find(newTax); //is this a classification we already have a count on
332                                 
333                                 if (itBoot2 != confidenceScores.end()) { //this is a classification we need a confidence for
334                                         (itBoot2->second)++;
335                                 }
336                                 
337                                 newTax = taxonomyTemp.parent;
338                                 taxonomyTemp = phyloTree->get(newTax);
339                         }
340         
341                 }
342                 
343                 string confidenceTax = "";
344                 simpleTax = "";
345                 
346                 int seqTaxIndex = tax;
347                 TaxNode seqTax = phyloTree->get(tax);
348                 
349                 while (seqTax.level != 0) { //while you are not at the root
350                                         
351                                 itBoot2 = confidenceScores.find(seqTaxIndex); //is this a classification we already have a count on
352                                 
353                                 int confidence = 0;
354                                 if (itBoot2 != confidenceScores.end()) { //already in confidence scores
355                                         confidence = itBoot2->second;
356                                 }
357                                 
358                                 if (((confidence/(float)iters) * 100) >= confidenceThreshold) {
359                                         confidenceTax = seqTax.name + "(" + toString(((confidence/(float)iters) * 100)) + ");" + confidenceTax;
360                                         simpleTax = seqTax.name + ";" + simpleTax;
361                                 }
362                                 
363                                 seqTaxIndex = seqTax.parent;
364                                 seqTax = phyloTree->get(seqTax.parent);
365                 }
366                 
367                 if (confidenceTax == "") { confidenceTax = "unclassified;"; simpleTax = "unclassified;"; }
368                 return confidenceTax;
369                 
370         }
371         catch(exception& e) {
372                 m->errorOut(e, "Bayesian", "bootstrapResults");
373                 exit(1);
374         }
375 }
376 /**************************************************************************************************/
377 int Bayesian::getMostProbableTaxonomy(vector<int> queryKmer) {
378         try {
379                 int indexofGenus = 0;
380                 
381                 double maxProbability = -1000000.0;
382                 //find taxonomy with highest probability that this sequence is from it
383                 
384                 
385 //              cout << genusNodes.size() << endl;
386                 
387                 
388                 for (int k = 0; k < genusNodes.size(); k++) {
389                         //for each taxonomy calc its probability
390                         
391                         double prob = 0.0000;
392                         for (int i = 0; i < queryKmer.size(); i++) {
393                                 prob += wordGenusProb[queryKmer[i]][k];
394                         }
395                         
396 //                      cout << phyloTree->get(genusNodes[k]).name << '\t' << prob << endl;
397
398                         //is this the taxonomy with the greatest probability?
399                         if (prob > maxProbability) { 
400                                 indexofGenus = genusNodes[k];
401                                 maxProbability = prob;
402                         }
403                 }
404                 
405                         
406                 return indexofGenus;
407         }
408         catch(exception& e) {
409                 m->errorOut(e, "Bayesian", "getMostProbableTaxonomy");
410                 exit(1);
411         }
412 }
413 /*************************************************************************************************
414 map<string, int> Bayesian::parseTaxMap(string newTax) {
415         try{
416         
417                 map<string, int> parsed;
418                 
419                 newTax = newTax.substr(0, newTax.length()-1);  //get rid of last ';'
420         
421                 //parse taxonomy
422                 string individual;
423                 while (newTax.find_first_of(';') != -1) {
424                         individual = newTax.substr(0,newTax.find_first_of(';'));
425                         newTax = newTax.substr(newTax.find_first_of(';')+1, newTax.length());
426                         parsed[individual] = 1;
427                 }
428                 
429                 //get last one
430                 parsed[newTax] = 1;
431
432                 return parsed;
433                 
434         }
435         catch(exception& e) {
436                 m->errorOut(e, "Bayesian", "parseTax");
437                 exit(1);
438         }
439 }
440 /**************************************************************************************************/
441 void Bayesian::readProbFile(ifstream& in, ifstream& inNum, string inName, string inNumName) {
442         try{
443                 
444                 #ifdef USE_MPI
445                         
446                         int pid, num, num2, processors;
447                         vector<unsigned long int> positions;
448                         vector<unsigned long int> positions2;
449                         
450                         MPI_Status status; 
451                         MPI_File inMPI;
452                         MPI_File inMPI2;
453                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
454                         MPI_Comm_size(MPI_COMM_WORLD, &processors);
455                         int tag = 2001;
456
457                         char inFileName[1024];
458                         strcpy(inFileName, inNumName.c_str());
459                         
460                         char inFileName2[1024];
461                         strcpy(inFileName2, inName.c_str());
462
463                         MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
464                         MPI_File_open(MPI_COMM_WORLD, inFileName2, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI2);  //comm, filename, mode, info, filepointer
465
466                         if (pid == 0) {
467                                 positions = m->setFilePosEachLine(inNumName, num);
468                                 positions2 = m->setFilePosEachLine(inName, num2);
469                                 
470                                 for(int i = 1; i < processors; i++) { 
471                                         MPI_Send(&num, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
472                                         MPI_Send(&positions[0], (num+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
473                                         
474                                         MPI_Send(&num2, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
475                                         MPI_Send(&positions2[0], (num2+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
476                                 }
477
478                         }else{
479                                 MPI_Recv(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
480                                 positions.resize(num+1);
481                                 MPI_Recv(&positions[0], (num+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
482                                 
483                                 MPI_Recv(&num2, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
484                                 positions2.resize(num2+1);
485                                 MPI_Recv(&positions2[0], (num2+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
486                         }
487                         
488                         //read version
489                         int length = positions2[1] - positions2[0];
490                         char* buf5 = new char[length];
491
492                         MPI_File_read_at(inMPI2, positions2[0], buf5, length, MPI_CHAR, &status);
493                         delete buf5;
494
495                         //read numKmers
496                         length = positions2[2] - positions2[1];
497                         char* buf = new char[length];
498
499                         MPI_File_read_at(inMPI2, positions2[1], buf, length, MPI_CHAR, &status);
500
501                         string tempBuf = buf;
502                         if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
503                         delete buf;
504
505                         istringstream iss (tempBuf,istringstream::in);
506                         iss >> numKmers;  
507                         
508                         //initialze probabilities
509                         wordGenusProb.resize(numKmers);
510                         
511                         for (int j = 0; j < wordGenusProb.size(); j++) {        wordGenusProb[j].resize(genusNodes.size());             }
512                         
513                         int kmer, name;  
514                         vector<int> numbers; numbers.resize(numKmers);
515                         float prob;
516                         vector<float> zeroCountProb; zeroCountProb.resize(numKmers);    
517                         
518                         //read version
519                         length = positions[1] - positions[0];
520                         char* buf6 = new char[length];
521
522                         MPI_File_read_at(inMPI2, positions[0], buf6, length, MPI_CHAR, &status);
523                         delete buf6;
524                         
525                         //read file 
526                         for(int i=1;i<num;i++){
527                                 //read next sequence
528                                 length = positions[i+1] - positions[i];
529                                 char* buf4 = new char[length];
530
531                                 MPI_File_read_at(inMPI, positions[i], buf4, length, MPI_CHAR, &status);
532
533                                 tempBuf = buf4;
534                                 if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
535                                 delete buf4;
536
537                                 istringstream iss (tempBuf,istringstream::in);
538                                 iss >> zeroCountProb[i] >> numbers[i];  
539                         }
540                         
541                         MPI_File_close(&inMPI);
542                         
543                         for(int i=2;i<num2;i++){
544                                 //read next sequence
545                                 length = positions2[i+1] - positions2[i];
546                                 char* buf4 = new char[length];
547
548                                 MPI_File_read_at(inMPI2, positions2[i], buf4, length, MPI_CHAR, &status);
549
550                                 tempBuf = buf4;
551                                 if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
552                                 delete buf4;
553
554                                 istringstream iss (tempBuf,istringstream::in);
555                                 
556                                 iss >> kmer;
557                                 
558                                 //set them all to zero value
559                                 for (int i = 0; i < genusNodes.size(); i++) {
560                                         wordGenusProb[kmer][i] = log(zeroCountProb[kmer] / (float) (genusTotals[i]+1));
561                                 }
562                                 
563                                 //get probs for nonzero values
564                                 for (int i = 0; i < numbers[kmer]; i++) {
565                                         iss >> name >> prob;
566                                         wordGenusProb[kmer][name] = prob;
567                                 }
568                                 
569                         }
570                         MPI_File_close(&inMPI2);
571                         MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
572                 #else
573                         //read version
574                         string line = m->getline(in); m->gobble(in);
575                         
576                         in >> numKmers; m->gobble(in);
577                         
578                         //initialze probabilities
579                         wordGenusProb.resize(numKmers);
580                         
581                         for (int j = 0; j < wordGenusProb.size(); j++) {        wordGenusProb[j].resize(genusNodes.size());             }
582                         
583                         int kmer, name, count;  count = 0;
584                         vector<int> num; num.resize(numKmers);
585                         float prob;
586                         vector<float> zeroCountProb; zeroCountProb.resize(numKmers);            
587                         
588                         //read version
589                         string line2 = m->getline(inNum); m->gobble(inNum);
590                         
591                         while (inNum) {
592                                 inNum >> zeroCountProb[count] >> num[count];  
593                                 count++;
594                                 m->gobble(inNum);
595                         }
596                         inNum.close();
597                 
598                         while(in) {
599                                 in >> kmer;
600                                 
601                                 //set them all to zero value
602                                 for (int i = 0; i < genusNodes.size(); i++) {
603                                         wordGenusProb[kmer][i] = log(zeroCountProb[kmer] / (float) (genusTotals[i]+1));
604                                 }
605                                 
606                                 //get probs for nonzero values
607                                 for (int i = 0; i < num[kmer]; i++) {
608                                         in >> name >> prob;
609                                         wordGenusProb[kmer][name] = prob;
610                                 }
611                                 
612                                 m->gobble(in);
613                         }
614                         in.close();
615                         
616                 #endif
617         }
618         catch(exception& e) {
619                 m->errorOut(e, "Bayesian", "readProbFile");
620                 exit(1);
621         }
622 }
623 /**************************************************************************************************/
624 bool Bayesian::checkReleaseDate(ifstream& file1, ifstream& file2, ifstream& file3, ifstream& file4) {
625         try {
626                 
627                 bool good = true;
628                 
629                 vector<string> lines;
630                 lines.push_back(m->getline(file1));  
631                 lines.push_back(m->getline(file2)); 
632                 lines.push_back(m->getline(file3)); 
633                 lines.push_back(m->getline(file4)); 
634
635                 //before we added this check
636                 if ((lines[0][0] != '#') || (lines[1][0] != '#') || (lines[2][0] != '#') || (lines[3][0] != '#')) {  good = false;  }
637                 else {
638                         //rip off #
639                         for (int i = 0; i < lines.size(); i++) { lines[i] = lines[i].substr(1);  }
640                         
641                         //get mothurs current version
642                         string version = m->getVersion();
643                         
644                         vector<string> versionVector;
645                         m->splitAtChar(version, versionVector, '.');
646                         
647                         //check each files version
648                         for (int i = 0; i < lines.size(); i++) { 
649                                 vector<string> linesVector;
650                                 m->splitAtChar(lines[i], linesVector, '.');
651                         
652                                 if (versionVector.size() != linesVector.size()) { good = false; break; }
653                                 else {
654                                         for (int j = 0; j < versionVector.size(); j++) {
655                                                 int num1, num2;
656                                                 convert(versionVector[j], num1);
657                                                 convert(linesVector[j], num2);
658                                                 
659                                                 //if mothurs version is newer than this files version, then we want to remake it
660                                                 if (num1 > num2) {  good = false; break;  }
661                                         }
662                                 }
663                                 
664                                 if (!good) { break; }
665                         }
666                 }
667                 
668                 if (!good) {  file1.close(); file2.close(); file3.close(); file4.close();  }
669                 else { file1.seekg(0);  file2.seekg(0);  file3.seekg(0);  file4.seekg(0);  }
670                 
671                 return good;
672         }
673         catch(exception& e) {
674                 m->errorOut(e, "Bayesian", "checkReleaseDate");
675                 exit(1);
676         }
677 }
678 /**************************************************************************************************/
679
680
681
682
683
684