]> git.donarmstrong.com Git - mothur.git/commitdiff
added versioning info to all shortcut files mothur makes.
authorwestcott <westcott>
Fri, 6 Aug 2010 13:36:36 +0000 (13:36 +0000)
committerwestcott <westcott>
Fri, 6 Aug 2010 13:36:36 +0000 (13:36 +0000)
22 files changed:
alignmentdb.cpp
bayesian.cpp
bayesian.h
chimerapintailcommand.cpp
chimeraslayer.cpp
classify.cpp
classify.h
classifyseqscommand.cpp
database.hpp
decalc.cpp
distancedb.cpp
distancedb.hpp
kmerdb.cpp
knn.cpp
knn.h
makefile
mothur.cpp
mothur.h
mothurout.h
phylosummary.cpp
phylotree.cpp
pintail.cpp

index bf46bec168858853a58730fd80b4309798982fa6..8febfe286c6b14b6d8dd2d373cec860a6f099897 100644 (file)
-/*\r
- *  alignmentdb.cpp\r
- *  Mothur\r
- *\r
- *  Created by westcott on 11/4/09.\r
- *  Copyright 2009 Schloss Lab. All rights reserved.\r
- *\r
- */\r
-\r
-#include "alignmentdb.h"\r
-#include "kmerdb.hpp"\r
-#include "suffixdb.hpp"\r
-#include "blastdb.hpp"\r
-\r
-\r
-/**************************************************************************************************/\r
-AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch){           //      This assumes that the template database is in fasta format, may \r
-       try {                                                                                   //      need to alter this in the future?\r
-               m = MothurOut::getInstance();\r
-               longest = 0;\r
-               method = s;\r
-               bool needToGenerate = true;\r
-               \r
-               m->mothurOutEndLine();\r
-               m->mothurOut("Reading in the " + fastaFileName + " template sequences...\t");   cout.flush();\r
-               \r
-               #ifdef USE_MPI  \r
-                       int pid, processors;\r
-                       vector<unsigned long int> positions;\r
-               \r
-                       MPI_Status status; \r
-                       MPI_File inMPI;\r
-                       MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are\r
-                       MPI_Comm_size(MPI_COMM_WORLD, &processors);\r
-                       int tag = 2001;\r
-       \r
-                       char inFileName[1024];\r
-                       strcpy(inFileName, fastaFileName.c_str());\r
-       \r
-                       MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer\r
-                       \r
-                       if (pid == 0) {\r
-                               positions = setFilePosFasta(fastaFileName, numSeqs); //fills MPIPos, returns numSeqs\r
-\r
-                               //send file positions to all processes\r
-                               for(int i = 1; i < processors; i++) { \r
-                                       MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);\r
-                                       MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);\r
-                               }\r
-                       }else{\r
-                               MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);\r
-                               positions.resize(numSeqs+1);\r
-                               MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);\r
-                       }\r
-               \r
-                       //read file \r
-                       for(int i=0;i<numSeqs;i++){\r
-                               \r
-                               if (m->control_pressed) {  templateSequences.clear(); break;  }\r
-                               \r
-                               //read next sequence\r
-                               int length = positions[i+1] - positions[i];\r
-                               char* buf4 = new char[length];\r
-                       \r
-                               MPI_File_read_at(inMPI, positions[i], buf4, length, MPI_CHAR, &status);\r
-               \r
-                               string tempBuf = buf4;\r
-                               if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }\r
-                               delete buf4;\r
-\r
-                               istringstream iss (tempBuf,istringstream::in);\r
-               \r
-                               Sequence temp(iss);  \r
-                               if (temp.getName() != "") {\r
-                                       templateSequences.push_back(temp);\r
-                                       //save longest base\r
-                                       if (temp.getUnaligned().length() > longest)  { longest = temp.getUnaligned().length()+1; }\r
-                               }\r
-                       }\r
-                       \r
-                       MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case\r
-                       \r
-                       MPI_File_close(&inMPI);\r
-               \r
-       #else\r
-               ifstream fastaFile;\r
-               openInputFile(fastaFileName, fastaFile);\r
-\r
-               while (!fastaFile.eof()) {\r
-                       Sequence temp(fastaFile);  gobble(fastaFile);\r
-                       \r
-                       if (m->control_pressed) {  templateSequences.clear(); break;  }\r
-                       \r
-                       if (temp.getName() != "") {\r
-                               templateSequences.push_back(temp);\r
-                               //save longest base\r
-                               if (temp.getUnaligned().length() > longest)  { longest = temp.getUnaligned().length()+1; }\r
-                       }\r
-               }\r
-               fastaFile.close();\r
-               \r
-       #endif\r
-       \r
-               numSeqs = templateSequences.size();\r
-               //all of this is elsewhere already!\r
-               \r
-               m->mothurOut("DONE.");\r
-               m->mothurOutEndLine();  cout.flush();\r
-               \r
-               //in case you delete the seqs and then ask for them\r
-               emptySequence = Sequence();\r
-               emptySequence.setName("no_match");\r
-               emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");\r
-               emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");\r
-               \r
-               \r
-               string kmerDBName;\r
-               if(method == "kmer")                    {       \r
-                       search = new KmerDB(fastaFileName, kmerSize);                   \r
-                       \r
-                       #ifdef USE_MPI\r
-                       #else\r
-                               kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";\r
-                               ifstream kmerFileTest(kmerDBName.c_str());\r
-                       \r
-                               if(kmerFileTest){       needToGenerate = false;         }\r
-                       #endif\r
-               }\r
-               else if(method == "suffix")             {       search = new SuffixDB(numSeqs);                                                         }\r
-               else if(method == "blast")              {       search = new BlastDB(gapOpen, gapExtend, match, misMatch);      }\r
-               else {\r
-                       m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");\r
-                       m->mothurOutEndLine();\r
-                       search = new KmerDB(fastaFileName, 8);\r
-               }\r
-               \r
-               if (!(m->control_pressed)) {\r
-                       if (needToGenerate) {\r
-                               //add sequences to search \r
-                               for (int i = 0; i < templateSequences.size(); i++) {\r
-                                       search->addSequence(templateSequences[i]);\r
-                                       \r
-                                       if (m->control_pressed) {  templateSequences.clear(); break;  }\r
-                               }\r
-                               \r
-                               if (m->control_pressed) {  templateSequences.clear();  }\r
-                               \r
-                               search->generateDB();\r
-                               \r
-                       }else if ((method == "kmer") && (!needToGenerate)) {\r
-                               ifstream kmerFileTest(kmerDBName.c_str());\r
-                               search->readKmerDB(kmerFileTest);       \r
-                       }\r
-                       \r
-                       search->setNumSeqs(numSeqs);\r
-               }\r
-       }\r
-       catch(exception& e) {\r
-               m->errorOut(e, "AlignmentDB", "AlignmentDB");\r
-               exit(1);\r
-       }\r
-}\r
-/**************************************************************************************************/\r
-AlignmentDB::AlignmentDB(string s){             \r
-       try {                                                                                   \r
-               m = MothurOut::getInstance();\r
-               method = s;\r
-               \r
-               if(method == "suffix")          {       search = new SuffixDB();        }\r
-               else if(method == "blast")      {       search = new BlastDB();         }\r
-               else                                            {       search = new KmerDB();          }\r
-\r
-                               \r
-               //in case you delete the seqs and then ask for them\r
-               emptySequence = Sequence();\r
-               emptySequence.setName("no_match");\r
-               emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");\r
-               emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");\r
-               \r
-       }\r
-       catch(exception& e) {\r
-               m->errorOut(e, "AlignmentDB", "AlignmentDB");\r
-               exit(1);\r
-       }\r
-}\r
-/**************************************************************************************************/\r
-AlignmentDB::~AlignmentDB() {  delete search;  }\r
-/**************************************************************************************************/\r
-Sequence AlignmentDB::findClosestSequence(Sequence* seq) {\r
-       try{\r
-       \r
-               vector<int> spot = search->findClosestSequences(seq, 1);\r
-\r
-               if (spot.size() != 0)   {               return templateSequences[spot[0]];      }\r
-               else                                    {               return emptySequence;                           }\r
-               \r
-       }\r
-       catch(exception& e) {\r
-               m->errorOut(e, "AlignmentDB", "findClosestSequence");\r
-               exit(1);\r
-       }\r
-}\r
-/**************************************************************************************************/\r
-\r
-\r
-\r
-\r
-\r
-\r
+/*
+ *  alignmentdb.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 11/4/09.
+ *  Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "alignmentdb.h"
+#include "kmerdb.hpp"
+#include "suffixdb.hpp"
+#include "blastdb.hpp"
+
+
+/**************************************************************************************************/
+AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch){           //      This assumes that the template database is in fasta format, may 
+       try {                                                                                   //      need to alter this in the future?
+               m = MothurOut::getInstance();
+               longest = 0;
+               method = s;
+               bool needToGenerate = true;
+               
+               m->mothurOutEndLine();
+               m->mothurOut("Reading in the " + fastaFileName + " template sequences...\t");   cout.flush();
+               
+               #ifdef USE_MPI  
+                       int pid, processors;
+                       vector<unsigned long int> positions;
+               
+                       MPI_Status status; 
+                       MPI_File inMPI;
+                       MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+                       MPI_Comm_size(MPI_COMM_WORLD, &processors);
+                       int tag = 2001;
+       
+                       char inFileName[1024];
+                       strcpy(inFileName, fastaFileName.c_str());
+       
+                       MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
+                       
+                       if (pid == 0) {
+                               positions = setFilePosFasta(fastaFileName, numSeqs); //fills MPIPos, returns numSeqs
+
+                               //send file positions to all processes
+                               for(int i = 1; i < processors; i++) { 
+                                       MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                       MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+                               }
+                       }else{
+                               MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+                               positions.resize(numSeqs+1);
+                               MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
+                       }
+               
+                       //read file 
+                       for(int i=0;i<numSeqs;i++){
+                               
+                               if (m->control_pressed) {  templateSequences.clear(); break;  }
+                               
+                               //read next sequence
+                               int length = positions[i+1] - positions[i];
+                               char* buf4 = new char[length];
+                       
+                               MPI_File_read_at(inMPI, positions[i], buf4, length, MPI_CHAR, &status);
+               
+                               string tempBuf = buf4;
+                               if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
+                               delete buf4;
+
+                               istringstream iss (tempBuf,istringstream::in);
+               
+                               Sequence temp(iss);  
+                               if (temp.getName() != "") {
+                                       templateSequences.push_back(temp);
+                                       //save longest base
+                                       if (temp.getUnaligned().length() > longest)  { longest = temp.getUnaligned().length()+1; }
+                               }
+                       }
+                       
+                       MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
+                       
+                       MPI_File_close(&inMPI);
+               
+       #else
+               ifstream fastaFile;
+               openInputFile(fastaFileName, fastaFile);
+
+               while (!fastaFile.eof()) {
+                       Sequence temp(fastaFile);  gobble(fastaFile);
+                       
+                       if (m->control_pressed) {  templateSequences.clear(); break;  }
+                       
+                       if (temp.getName() != "") {
+                               templateSequences.push_back(temp);
+                               //save longest base
+                               if (temp.getUnaligned().length() > longest)  { longest = temp.getUnaligned().length()+1; }
+                       }
+               }
+               fastaFile.close();
+               
+       #endif
+       
+               numSeqs = templateSequences.size();
+               //all of this is elsewhere already!
+               
+               m->mothurOut("DONE.");
+               m->mothurOutEndLine();  cout.flush();
+               
+               //in case you delete the seqs and then ask for them
+               emptySequence = Sequence();
+               emptySequence.setName("no_match");
+               emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
+               emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
+               
+               
+               string kmerDBName;
+               if(method == "kmer")                    {       
+                       search = new KmerDB(fastaFileName, kmerSize);                   
+                       
+                       #ifdef USE_MPI
+                       #else
+                               kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
+                               ifstream kmerFileTest(kmerDBName.c_str());
+                               
+                               if(kmerFileTest){       
+                                       bool GoodFile = checkReleaseVersion(kmerFileTest, m->getVersion());
+                                       if (GoodFile) {  needToGenerate = false;        }
+                               }
+                       #endif
+               }
+               else if(method == "suffix")             {       search = new SuffixDB(numSeqs);                                                         }
+               else if(method == "blast")              {       search = new BlastDB(gapOpen, gapExtend, match, misMatch);      }
+               else {
+                       m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");
+                       m->mothurOutEndLine();
+                       search = new KmerDB(fastaFileName, 8);
+               }
+               
+               if (!(m->control_pressed)) {
+                       if (needToGenerate) {
+                               //add sequences to search 
+                               for (int i = 0; i < templateSequences.size(); i++) {
+                                       search->addSequence(templateSequences[i]);
+                                       
+                                       if (m->control_pressed) {  templateSequences.clear(); break;  }
+                               }
+                               
+                               if (m->control_pressed) {  templateSequences.clear();  }
+                               
+                               search->generateDB();
+                               
+                       }else if ((method == "kmer") && (!needToGenerate)) {
+                               ifstream kmerFileTest(kmerDBName.c_str());
+                               search->readKmerDB(kmerFileTest);       
+                       }
+                       
+                       search->setNumSeqs(numSeqs);
+               }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "AlignmentDB", "AlignmentDB");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+AlignmentDB::AlignmentDB(string s){             
+       try {                                                                                   
+               m = MothurOut::getInstance();
+               method = s;
+               
+               if(method == "suffix")          {       search = new SuffixDB();        }
+               else if(method == "blast")      {       search = new BlastDB();         }
+               else                                            {       search = new KmerDB();          }
+
+                               
+               //in case you delete the seqs and then ask for them
+               emptySequence = Sequence();
+               emptySequence.setName("no_match");
+               emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
+               emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "AlignmentDB", "AlignmentDB");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+AlignmentDB::~AlignmentDB() {  delete search;  }
+/**************************************************************************************************/
+Sequence AlignmentDB::findClosestSequence(Sequence* seq) {
+       try{
+       
+               vector<int> spot = search->findClosestSequences(seq, 1);
+
+               if (spot.size() != 0)   {               return templateSequences[spot[0]];      }
+               else                                    {               return emptySequence;                           }
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "AlignmentDB", "findClosestSequence");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+
+
+
+
+
+
index ae96a220ae2646c59a45a36152ed2d0b099c0c64..e596f672ed45f8f9f977e5264417e6781b959c38 100644 (file)
@@ -34,7 +34,13 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i)  {
                
                int start = time(NULL);
                
-               if(probFileTest && probFileTest2 && phyloTreeTest && probFileTest3){    
+               //if they are there make sure they were created after this release date
+               bool FilesGood = false;
+               if(probFileTest && probFileTest2 && phyloTreeTest && probFileTest3){
+                       FilesGood = checkReleaseDate(probFileTest, probFileTest2, phyloTreeTest, probFileTest3);
+               }
+               
+               if(probFileTest && probFileTest2 && phyloTreeTest && probFileTest3 && FilesGood){       
                        m->mothurOut("Reading template taxonomy...     "); cout.flush();
                        
                        phyloTree = new PhyloTree(phyloTreeTest, phyloTreeName);
@@ -86,10 +92,16 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i)  {
                                
                                openOutputFile(probFileName, out);
                                
+                               //output mothur version
+                               out << "#" << m->getVersion() << endl;
+                               
                                out << numKmers << endl;
                                
                                openOutputFile(probFileName2, out2);
                                
+                               //output mothur version
+                               out2 << "#" << m->getVersion() << endl;
+                               
                                #ifdef USE_MPI
                                        }
                                #endif
@@ -416,12 +428,19 @@ void Bayesian::readProbFile(ifstream& in, ifstream& inNum, string inName, string
                                positions2.resize(num2+1);
                                MPI_Recv(&positions2[0], (num2+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
                        }
-               
-                       //read numKmers
+                       
+                       //read version
                        int length = positions2[1] - positions2[0];
+                       char* buf5 = new char[length];
+
+                       MPI_File_read_at(inMPI2, positions2[0], buf5, length, MPI_CHAR, &status);
+                       delete buf5;
+
+                       //read numKmers
+                       length = positions2[2] - positions2[1];
                        char* buf = new char[length];
 
-                       MPI_File_read_at(inMPI2, positions2[0], buf, length, MPI_CHAR, &status);
+                       MPI_File_read_at(inMPI2, positions2[1], buf, length, MPI_CHAR, &status);
 
                        string tempBuf = buf;
                        if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
@@ -438,10 +457,17 @@ void Bayesian::readProbFile(ifstream& in, ifstream& inNum, string inName, string
                        int kmer, name;  
                        vector<int> numbers; numbers.resize(numKmers);
                        float prob;
-                       vector<float> zeroCountProb; zeroCountProb.resize(numKmers);            
+                       vector<float> zeroCountProb; zeroCountProb.resize(numKmers);    
+                       
+                       //read version
+                       length = positions[1] - positions[0];
+                       char* buf6 = new char[length];
 
+                       MPI_File_read_at(inMPI2, positions[0], buf6, length, MPI_CHAR, &status);
+                       delete buf6;
+                       
                        //read file 
-                       for(int i=0;i<num;i++){
+                       for(int i=1;i<num;i++){
                                //read next sequence
                                length = positions[i+1] - positions[i];
                                char* buf4 = new char[length];
@@ -458,7 +484,7 @@ void Bayesian::readProbFile(ifstream& in, ifstream& inNum, string inName, string
                        
                        MPI_File_close(&inMPI);
                        
-                       for(int i=1;i<num2;i++){
+                       for(int i=2;i<num2;i++){
                                //read next sequence
                                length = positions2[i+1] - positions2[i];
                                char* buf4 = new char[length];
@@ -488,7 +514,9 @@ void Bayesian::readProbFile(ifstream& in, ifstream& inNum, string inName, string
                        MPI_File_close(&inMPI2);
                        MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
                #else
-               
+                       //read version
+                       string line = getline(in); gobble(in);
+                       
                        in >> numKmers; gobble(in);
                        
                        //initialze probabilities
@@ -500,7 +528,10 @@ void Bayesian::readProbFile(ifstream& in, ifstream& inNum, string inName, string
                        vector<int> num; num.resize(numKmers);
                        float prob;
                        vector<float> zeroCountProb; zeroCountProb.resize(numKmers);            
-               
+                       
+                       //read version
+                       string line2 = getline(inNum); gobble(inNum);
+                       
                        while (inNum) {
                                inNum >> zeroCountProb[count] >> num[count];  
                                count++;
@@ -534,6 +565,61 @@ void Bayesian::readProbFile(ifstream& in, ifstream& inNum, string inName, string
        }
 }
 /**************************************************************************************************/
+bool Bayesian::checkReleaseDate(ifstream& file1, ifstream& file2, ifstream& file3, ifstream& file4) {
+       try {
+               
+               bool good = true;
+               
+               vector<string> lines;
+               lines.push_back(getline(file1));  
+               lines.push_back(getline(file2)); 
+               lines.push_back(getline(file3)); 
+               lines.push_back(getline(file4)); 
+
+               //before we added this check
+               if ((lines[0][0] != '#') || (lines[1][0] != '#') || (lines[2][0] != '#') || (lines[3][0] != '#')) {  good = false;  }
+               else {
+                       //rip off #
+                       for (int i = 0; i < lines.size(); i++) { lines[i] = lines[i].substr(1);  }
+                       
+                       //get mothurs current version
+                       string version = m->getVersion();
+                       
+                       vector<string> versionVector;
+                       splitAtChar(version, versionVector, '.');
+                       
+                       //check each files version
+                       for (int i = 0; i < lines.size(); i++) { 
+                               vector<string> linesVector;
+                               splitAtChar(lines[i], linesVector, '.');
+                       
+                               if (versionVector.size() != linesVector.size()) { good = false; break; }
+                               else {
+                                       for (int j = 0; j < versionVector.size(); j++) {
+                                               int num1, num2;
+                                               convert(versionVector[j], num1);
+                                               convert(linesVector[j], num2);
+                                               
+                                               //if mothurs version is newer than this files version, then we want to remake it
+                                               if (num1 > num2) {  good = false; break;  }
+                                       }
+                               }
+                               
+                               if (!good) { break; }
+                       }
+               }
+               
+               if (!good) {  file1.close(); file2.close(); file3.close(); file4.close();  }
+               else { file1.seekg(0);  file2.seekg(0);  file3.seekg(0);  file4.seekg(0);  }
+               
+               return good;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Bayesian", "checkReleaseDate");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
 
 
 
index fa6590ab98a393d5d4907102db4401704071e5f9..012def96d8d2d04c8c258963668e261f125a9871 100644 (file)
@@ -35,6 +35,7 @@ private:
        string bootstrapResults(vector<int>, int, int);
        int getMostProbableTaxonomy(vector<int>);
        void readProbFile(ifstream&, ifstream&, string, string);
+       bool checkReleaseDate(ifstream&, ifstream&, ifstream&, ifstream&);
        
 };
 
index f11e1cafe9b330aebd0edd1d12611255daebbf42..5818ab22c0bfc3e0245372de038e4c43d95a2289 100644 (file)
@@ -173,12 +173,17 @@ ChimeraPintailCommand::ChimeraPintailCommand(string option)  {
                                //check for consfile
                                string tempConsFile = getRootName(inputDir + getSimpleName(templatefile)) + "freq";
                                ifstream FileTest(tempConsFile.c_str());
-                               if(FileTest){   m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  consfile = tempConsFile;  FileTest.close();     }
+                               if(FileTest){   
+                                       bool GoodFile = checkReleaseVersion(FileTest, m->getVersion());
+                                       if (GoodFile) {  
+                                               m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  consfile = tempConsFile;  FileTest.close();     
+                                       }
+                               }
                        }       
                        
                        quanfile = validParameter.validFile(parameters, "quantile", true);
                        if (quanfile == "not open") { abort = true; }
-                       else if (quanfile == "not found") { quanfile = "";  }
+                       else if (quanfile == "not found") { quanfile = ""; }
                }
        }
        catch(exception& e) {
@@ -251,7 +256,12 @@ int ChimeraPintailCommand::execute(){
                        }
                        
                        ifstream FileTest(tempQuan.c_str());
-                       if(FileTest){   m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  quanfile = tempQuan;  FileTest.close();     }
+                       if(FileTest){   
+                               bool GoodFile = checkReleaseVersion(FileTest, m->getVersion());
+                               if (GoodFile) {  
+                                       m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  quanfile = tempQuan;  FileTest.close();     
+                               }
+                       }
                        
                        chimera = new Pintail(fastaFileNames[s], templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir);
                        
index b92e8c8d355f6d105c141857bd96c19538ba194b..ff13590cd3a91cb8960e26af803ca7fa63918035 100644 (file)
@@ -102,8 +102,14 @@ int ChimeraSlayer::doPrep() {
                        //leftside
                        kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
                        ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
+                       bool needToGenerateLeft = true;
                        
-                       if(!kmerFileTestLeft){  
+                       if(kmerFileTestLeft){   
+                               bool GoodFile = checkReleaseVersion(kmerFileTestLeft, m->getVersion());
+                               if (GoodFile) {  needToGenerateLeft = false;    }
+                       }
+                       
+                       if(needToGenerateLeft){ 
                        
                                for (int i = 0; i < templateSeqs.size(); i++) {
                                        
@@ -127,8 +133,14 @@ int ChimeraSlayer::doPrep() {
                        //rightside
                        kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
                        ifstream kmerFileTestRight(kmerDBNameRight.c_str());
+                       bool needToGenerateRight = true;
+                       
+                       if(kmerFileTestRight){  
+                               bool GoodFile = checkReleaseVersion(kmerFileTestRight, m->getVersion());
+                               if (GoodFile) {  needToGenerateRight = false;   }
+                       }
                        
-                       if(!kmerFileTestRight){ 
+                       if(needToGenerateRight){        
                        
                                for (int i = 0; i < templateSeqs.size(); i++) {
                                        if (m->control_pressed) { return 0; } 
index 6bf2cb57ee2833d91ba0520f8e1e67a5c05ec348..59a615881418f25ace4a0db84b1a0d35f9d9fcd5 100644 (file)
@@ -108,7 +108,10 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me
                        
                        kmerDBName = tempFile.substr(0,tempFile.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
                        ifstream kmerFileTest(kmerDBName.c_str());
-                       if(kmerFileTest){       needToGenerate = false;         }
+                       if(kmerFileTest){       
+                               bool GoodFile = checkReleaseVersion(kmerFileTest, m->getVersion());
+                               if (GoodFile) {  needToGenerate = false;        }
+                       }
                }
                else if(method == "suffix")             {       database = new SuffixDB(numSeqs);                                                               }
                else if(method == "blast")              {       database = new BlastDB(gapOpen, gapExtend, match, misMatch);    }
index bd9f34bc7eb3f51b6b360ce4b4f18251f194e155..e92569d6f27defcf14b0e6d2527bf9d743bd0988 100644 (file)
@@ -31,6 +31,7 @@ public:
        virtual string getTaxonomy(Sequence*) = 0;
        virtual string getSimpleTax()  { return simpleTax;      }
        virtual void generateDatabaseAndNames(string, string, string, int, float, float, float, float);
+       virtual void setDistName(string s) {} //for knn, so if distance method is selected with knn you can create the smallest distance file in the right place.
        
 protected:
 
index a95f436dd7ffdf4403d316be77a1f8ab580f21f5..6c42418c2ae788939206a41ee650d1d3866ca4c3 100644 (file)
@@ -409,6 +409,11 @@ int ClassifySeqsCommand::execute(){
                        string tempTaxonomyFile = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + "taxonomy.temp";
                        string taxSummary = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + RippedTaxName + "tax.summary";
                        
+                       if ((method == "knn") && (search == "distance")) { 
+                               string DistName = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + "match.dist";
+                               classify->setDistName(DistName);  outputNames.push_back(DistName);
+                       }
+                       
                        outputNames.push_back(newTaxonomyFile);
                        outputNames.push_back(taxSummary);
                        
index 3191fdfb02d5323129af949cf1eb50aea50ff3c8..9293f13d5f091251d77115d2f3dc5344868d2c5c 100644 (file)
@@ -47,6 +47,7 @@ public:
        virtual ~Database();
        virtual void generateDB() = 0; 
        virtual void addSequence(Sequence) = 0;  //add sequence to search engine
+       virtual string getName(int) { return ""; }  
        virtual vector<int> findClosestSequences(Sequence*, int) = 0;  // returns indexes of n closest sequences to query
        virtual vector<int> findClosestMegaBlast(Sequence*, int){return results;}
        virtual float getSearchScore();
index 2e214bffe969181c9f83061edc69a491610fe8a8..94b6c938009bb412ad62420bf6dfea2b6428e6e7 100644 (file)
@@ -295,6 +295,8 @@ vector<float> DeCalculator::calcFreq(vector<Sequence*> seqs, string filename) {
                
                openOutputFile(freqfile, outFreq);
                
+               outFreq << "#" << m->getVersion() << endl;
+               
                string length = toString(seqs.size());  //if there are 5000 seqs in the template then set precision to 3
                int precision = length.length() - 1;
                
index 5c49a936e9293c629e05ea5565f334a43e5cc37c..ca6ffe8ba61217ac0b66b353e8f33461f4916e52 100644 (file)
@@ -48,6 +48,8 @@ vector<int> DistanceDB::findClosestSequences(Sequence* query, int numWanted){
                bool templateSameLength = true;
                string sequence = query->getAligned();
                vector<seqDist> dists;
+               
+               searchScore = -1.0;
        
                if (numWanted > data.size()) { m->mothurOut("numwanted is larger than the number of template sequences, using "+ toString(data.size()) + "."); m->mothurOutEndLine(); numWanted = data.size(); }
                
@@ -66,6 +68,9 @@ vector<int> DistanceDB::findClosestSequences(Sequence* query, int numWanted){
                        
                        sort(dists.begin(), dists.end(), compareSequenceDistance);  //sorts by distance lowest to highest
                        
+                       //save distance of best match
+                       searchScore = dists[0].dist;
+                       
                        //fill topmatches with numwanted closest sequences indexes
                        for (int i = 0; i < numWanted; i++) {
                                topMatches.push_back(dists[i].seq2);
index bfb5090bcda99d7396bfcf0092048c6d3e10eb49..2624d6d6440190520e02af09c43264bf365c9f33 100644 (file)
@@ -22,7 +22,8 @@ public:
        ~DistanceDB() { delete distCalculator; }
        
        void generateDB() {} //doesn't generate a search db 
-       void addSequence(Sequence);  
+       void addSequence(Sequence); 
+       string getName(int i) { return data[i].getName(); } 
        vector<int> findClosestSequences(Sequence*, int);  // returns indexes of n closest sequences to query
        
        #ifdef USE_MPI  
index 33761a8416524fa3a0fd10b7136ea5a097d3fd08..bd5b9762b41dbc63e116172ad00e419755fdbbf1 100644 (file)
@@ -109,6 +109,9 @@ void KmerDB::generateDB(){
                ofstream kmerFile;                                                                              //      once we have the kmerLocations folder print it out
                openOutputFile(kmerDBName, kmerFile);                                   //      to a file
                
+               //output version
+               kmerFile << m->getVersion() << endl;
+               
                for(int i=0;i<maxKmer;i++){                                                             //      step through all of the possible kmer numbers
                        kmerFile << i << ' ' << kmerLocations[i].size();        //      print the kmer number and the number of sequences with
                        for(int j=0;j<kmerLocations[i].size();j++){                     //      that kmer.  then print out the indices of the sequences
@@ -156,6 +159,9 @@ void KmerDB::readKmerDB(ifstream& kmerDBFile){
                                        
                kmerDBFile.seekg(0);                                                                    //      start at the beginning of the file
                
+               //read version
+               string line = getline(kmerDBFile); gobble(kmerDBFile);
+               
                string seqName;
                int seqNumber;
 
diff --git a/knn.cpp b/knn.cpp
index beee4aff06b91ff23c55fbc7253c0616b6a7f995..a8430723fdd653899e9239b22adaa941af113012 100644 (file)
--- a/knn.cpp
+++ b/knn.cpp
@@ -11,7 +11,7 @@
 
 /**************************************************************************************************/
 Knn::Knn(string tfile, string tempFile, string method, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch, int n) 
-: Classify(), num(n)  {
+: Classify(), num(n), search(method) {
        try {
                //create search database and names vector
                generateDatabaseAndNames(tfile, tempFile, method, kmerSize, gapOpen, gapExtend, match, misMatch);
@@ -22,6 +22,20 @@ Knn::Knn(string tfile, string tempFile, string method, int kmerSize, float gapOp
        }
 }
 /**************************************************************************************************/
+void Knn::setDistName(string s) {
+       try {
+               outDistName = s;
+               ofstream outDistance;
+               openOutputFile(outDistName, outDistance);
+               outDistance << "Name\tBestMatch\tDistance" << endl;
+               outDistance.close();
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Knn", "setDistName");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
 Knn::~Knn() {
        try {
                 delete phyloTree; 
@@ -39,7 +53,9 @@ string Knn::getTaxonomy(Sequence* seq) {
                
                //use database to find closest seq
                vector<int> closest = database->findClosestSequences(seq, num);
-               
+       
+               if (search == "distance") { ofstream outDistance; openOutputFileAppend(outDistName, outDistance); outDistance << seq->getName() << '\t' << database->getName(closest[0]) << '\t' << database->getSearchScore() << endl; outDistance.close();  }
+       
                if (m->control_pressed) { return tax; }
 
                vector<string> closestNames;
diff --git a/knn.h b/knn.h
index 45cb0f1cf63b5a084417c5d7cb0fd0f59970f98f..19653821d0b1b367f4509e6d74816d96c968b9b7 100644 (file)
--- a/knn.h
+++ b/knn.h
@@ -21,11 +21,14 @@ public:
        Knn(string, string, string, int, float, float, float, float, int);
        ~Knn();
        
+       void setDistName(string s);
        string getTaxonomy(Sequence*);
        
 private:
        int num;
        string findCommonTaxonomy(vector<string>);
+       string search, outDistName;
+       
 };
 
 /**************************************************************************************************/
index 46ccb9d106edee81bdfdea884576c39d37d09c56..dc5065db04a20db83b186cfcc3fe81e2d5901356 100644 (file)
--- a/makefile
+++ b/makefile
 CXXFLAGS += -O3
 
 MOTHUR_FILES = "\"../Release\""
+
+RELEASE_DATE = "\"8/5/2010\""
+VERSION = "\"1.12.3\""
+
+CXXFLAGS += -DRELEASE_DATE=${RELEASE_DATE} -DVERSION=${VERSION}
+
 ifeq  ($(strip $(MOTHUR_FILES)),"\"Enter_your_default_path_here\"")
 else
        CXXFLAGS += -DMOTHUR_FILES=${MOTHUR_FILES}
index ea3ea3e521cf6f00c48156df3a6c57e70ef97bda..eb5e8f92c105f708d56a6d4ce681e94aaa0453c3 100644 (file)
@@ -97,10 +97,16 @@ int main(int argc, char *argv[]){
                        m->mothurOutEndLine(); m->mothurOutEndLine();
                #endif
                
+               //get releaseDate from Make
+               string releaseDate = RELEASE_DATE; 
+               string mothurVersion = VERSION; 
+               m->setReleaseDate(releaseDate);
+               m->setVersion(mothurVersion);
+               
                //header
-               m->mothurOut("mothur v.1.12.2");
+               m->mothurOut("mothur v." + mothurVersion);
                m->mothurOutEndLine();          
-               m->mothurOut("Last updated: 7/30/2010");
+               m->mothurOut("Last updated: " + releaseDate);
                m->mothurOutEndLine();  
                m->mothurOutEndLine();          
                m->mothurOut("by");
index f1b13920d0a4c8941214dbf3e42c96ed4ce39486..0c4429efd468d3d1cbcf2ecc64aefe89cdf8369e 100644 (file)
--- a/mothur.h
+++ b/mothur.h
@@ -222,7 +222,7 @@ inline void gobble(istream& f){
 }
 /***********************************************************************/
 
-inline string getline(ifstream& fileHandle) {
+inline string getline(istringstream& fileHandle) {
        try {
        
                string line = "";
@@ -244,7 +244,30 @@ inline string getline(ifstream& fileHandle) {
                exit(1);
        }
 }
+/***********************************************************************/
 
+inline string getline(ifstream& fileHandle) {
+       try {
+       
+               string line = "";
+               
+               while (!fileHandle.eof())       {
+                       //get next character
+                       char c = fileHandle.get(); 
+                       
+                       //are you at the end of the line
+                       if ((c == '\n') || (c == '\r') || (c == '\f')){  break; }       
+                       else {          line += c;              }
+               }
+               
+               return line;
+               
+       }
+       catch(exception& e) {
+               cout << "Error in mothur function getline" << endl;
+               exit(1);
+       }
+}
 /***********************************************************************/
 
 inline bool isTrue(string f){
@@ -1098,7 +1121,51 @@ inline vector<unsigned long int> setFilePosEachLine(string filename, int& num) {
                
                        return positions;
 }
+/**************************************************************************************************/
+inline bool checkReleaseVersion(ifstream& file, string version) {
+       try {
+               
+               bool good = true;
+               
+               string line = getline(file);  
 
+               //before we added this check
+               if (line[0] != '#') {  good = false;  }
+               else {
+                       //rip off #
+                       line = line.substr(1);
+                       
+                       vector<string> versionVector;
+                       splitAtChar(version, versionVector, '.');
+                       
+                       //check file version
+                       vector<string> linesVector;
+                       splitAtChar(line, linesVector, '.');
+                       
+                       if (versionVector.size() != linesVector.size()) { good = false; }
+                       else {
+                               for (int j = 0; j < versionVector.size(); j++) {
+                                       int num1, num2;
+                                       convert(versionVector[j], num1);
+                                       convert(linesVector[j], num2);
+                                       
+                                       //if mothurs version is newer than this files version, then we want to remake it
+                                       if (num1 > num2) {  good = false; break;  }
+                               }
+                       }
+                       
+               }
+               
+               if (!good) {  file.close();  }
+               else { file.seekg(0);  }
+               
+               return good;
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function checkReleaseVersion. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
+               exit(1);
+       }
+}
 /**************************************************************************************************/
 #endif
 
index 1b89caf933ea4c436743ac992206904e36b51d9e..92363f5b6f26996b2e83badf681befa1ae9e3e0a 100644 (file)
@@ -27,6 +27,12 @@ class MothurOut {
                void closeLog();
                string getDefaultPath() { return defaultPath; }
                void setDefaultPath(string);
+               
+               string getReleaseDate() { return releaseDate; }
+               void setReleaseDate(string r) { releaseDate = r; }
+               string getVersion() { return version; }
+               void setVersion(string r) { version = r; }
+
 
                int control_pressed;
                bool executing;
@@ -41,6 +47,8 @@ class MothurOut {
 
                string logFileName;
                string defaultPath;
+               string releaseDate, version;
+               
                ofstream out;
                
                int mem_usage(double&, double&);
index 08cedeb26cbc134c2527b67abb0a5289c4013206..870f35fa3b994a78ed165d1179c8ee2d6daf207b 100644 (file)
@@ -250,6 +250,10 @@ void PhyloSummary::print(int i, ofstream& out){
 /**************************************************************************************************/
 void PhyloSummary::readTreeStruct(ifstream& in){
        try {
+       
+               //read version
+               string line = getline(in); gobble(in);
+               
                int num;
                
                in >> num; gobble(in);
index e2b0805656064d0930de37933a1e599301573881..2ea219350c1f8bf8099b7cfc40d28ddcdfc9d872 100644 (file)
@@ -54,6 +54,9 @@ PhyloTree::PhyloTree(ifstream& in, string filename){
                        istringstream iss (tempBuf,istringstream::in);
                        delete buffer;
                        
+                       //read version
+                       getline(iss); gobble(iss);
+                       
                        iss >> numNodes; gobble(iss);
                        
                        tree.resize(numNodes);
@@ -78,6 +81,9 @@ PhyloTree::PhyloTree(ifstream& in, string filename){
                        MPI_File_close(&inMPI);
                        
                #else
+                       //read version
+                       string line = getline(in); gobble(in);
+                       
                        in >> numNodes; gobble(in);
                        
                        tree.resize(numNodes);
@@ -474,6 +480,10 @@ string PhyloTree::getFullTaxonomy(string seqName) {
 
 void PhyloTree::print(ofstream& out, vector<TaxNode>& copy){
        try {
+       
+               //output mothur version
+               out << "#" << m->getVersion() << endl;
+               
                out << copy.size() << endl;
                
                out << maxLevel << endl;
@@ -511,6 +521,9 @@ void PhyloTree::printTreeNodes(string treefilename) {
                        ofstream outTree;
                        openOutputFile(treefilename, outTree);
                        
+                       //output mothur version
+                       outTree << "#" << m->getVersion() << endl;
+                       
                        //print treenodes
                        outTree << tree.size() << endl;
                        for (int i = 0; i < tree.size(); i++) {
index 5bfdc44128dd300cc22a33e8fa696d032dcab774..8a159ca381bd3378ee3f2f63d9dabdc692c1676d 100644 (file)
@@ -183,7 +183,7 @@ int Pintail::doPrep() {
                        
                        if (m->control_pressed) {  return 0;  }
                
-                       string outputString = "";
+                       string outputString = "#" + m->getVersion() + "\n";
                        
                        //adjust quantiles
                        for (int i = 0; i < quantilesMembers.size(); i++) {
@@ -449,6 +449,9 @@ vector<float> Pintail::readFreq() {
                if (tempBuf.length() > size) { tempBuf = tempBuf.substr(0, size);  }
                istringstream iss (tempBuf,istringstream::in);
                
+               //read version
+               string line = getline(iss); gobble(iss);
+               
                while(!iss.eof()) {
                        iss >> pos >> num;
        
@@ -472,6 +475,9 @@ vector<float> Pintail::readFreq() {
 
                ifstream in;
                openInputFile(consfile, in);
+               
+               //read version
+               string line = getline(in); gobble(in);
                                
                while(!in.eof()){
                        
@@ -648,6 +654,9 @@ vector< vector<float> > Pintail::readQuantiles() {
                istringstream iss (tempBuf,istringstream::in);
                delete buffer;
                
+               //read version
+               string line = getline(iss); gobble(iss);
+               
                while(!iss.eof()) {
                        iss >> num >> ten >> twentyfive >> fifty >> seventyfive >> ninetyfive >> ninetynine; 
                        
@@ -671,6 +680,9 @@ vector< vector<float> > Pintail::readQuantiles() {
 
                ifstream in;
                openInputFile(quanfile, in);
+               
+               //read version
+               string line = getline(in); gobble(in);
                        
                while(!in.eof()){