From 050220fe7822cc660615972a0054cf4a83eefbe4 Mon Sep 17 00:00:00 2001 From: westcott Date: Fri, 6 Aug 2010 13:36:36 +0000 Subject: [PATCH] added versioning info to all shortcut files mothur makes. --- alignmentdb.cpp | 421 +++++++++++++++++++------------------- bayesian.cpp | 104 +++++++++- bayesian.h | 1 + chimerapintailcommand.cpp | 16 +- chimeraslayer.cpp | 16 +- classify.cpp | 5 +- classify.h | 1 + classifyseqscommand.cpp | 5 + database.hpp | 1 + decalc.cpp | 2 + distancedb.cpp | 5 + distancedb.hpp | 3 +- kmerdb.cpp | 6 + knn.cpp | 20 +- knn.h | 3 + makefile | 6 + mothur.cpp | 10 +- mothur.h | 69 ++++++- mothurout.h | 8 + phylosummary.cpp | 4 + phylotree.cpp | 13 ++ pintail.cpp | 14 +- 22 files changed, 502 insertions(+), 231 deletions(-) diff --git a/alignmentdb.cpp b/alignmentdb.cpp index bf46bec..8febfe2 100644 --- a/alignmentdb.cpp +++ b/alignmentdb.cpp @@ -1,209 +1,212 @@ -/* - * alignmentdb.cpp - * Mothur - * - * Created by westcott on 11/4/09. - * Copyright 2009 Schloss Lab. All rights reserved. - * - */ - -#include "alignmentdb.h" -#include "kmerdb.hpp" -#include "suffixdb.hpp" -#include "blastdb.hpp" - - -/**************************************************************************************************/ -AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch){ // This assumes that the template database is in fasta format, may - try { // need to alter this in the future? - m = MothurOut::getInstance(); - longest = 0; - method = s; - bool needToGenerate = true; - - m->mothurOutEndLine(); - m->mothurOut("Reading in the " + fastaFileName + " template sequences...\t"); cout.flush(); - - #ifdef USE_MPI - int pid, processors; - vector positions; - - MPI_Status status; - MPI_File inMPI; - MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are - MPI_Comm_size(MPI_COMM_WORLD, &processors); - int tag = 2001; - - char inFileName[1024]; - strcpy(inFileName, fastaFileName.c_str()); - - MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer - - if (pid == 0) { - positions = setFilePosFasta(fastaFileName, numSeqs); //fills MPIPos, returns numSeqs - - //send file positions to all processes - for(int i = 1; i < processors; i++) { - MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD); - MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD); - } - }else{ - MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status); - positions.resize(numSeqs+1); - MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status); - } - - //read file - for(int i=0;icontrol_pressed) { templateSequences.clear(); break; } - - //read next sequence - int length = positions[i+1] - positions[i]; - char* buf4 = new char[length]; - - MPI_File_read_at(inMPI, positions[i], buf4, length, MPI_CHAR, &status); - - string tempBuf = buf4; - if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); } - delete buf4; - - istringstream iss (tempBuf,istringstream::in); - - Sequence temp(iss); - if (temp.getName() != "") { - templateSequences.push_back(temp); - //save longest base - if (temp.getUnaligned().length() > longest) { longest = temp.getUnaligned().length()+1; } - } - } - - MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case - - MPI_File_close(&inMPI); - - #else - ifstream fastaFile; - openInputFile(fastaFileName, fastaFile); - - while (!fastaFile.eof()) { - Sequence temp(fastaFile); gobble(fastaFile); - - if (m->control_pressed) { templateSequences.clear(); break; } - - if (temp.getName() != "") { - templateSequences.push_back(temp); - //save longest base - if (temp.getUnaligned().length() > longest) { longest = temp.getUnaligned().length()+1; } - } - } - fastaFile.close(); - - #endif - - numSeqs = templateSequences.size(); - //all of this is elsewhere already! - - m->mothurOut("DONE."); - m->mothurOutEndLine(); cout.flush(); - - //in case you delete the seqs and then ask for them - emptySequence = Sequence(); - emptySequence.setName("no_match"); - emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX"); - emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX"); - - - string kmerDBName; - if(method == "kmer") { - search = new KmerDB(fastaFileName, kmerSize); - - #ifdef USE_MPI - #else - kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer"; - ifstream kmerFileTest(kmerDBName.c_str()); - - if(kmerFileTest){ needToGenerate = false; } - #endif - } - else if(method == "suffix") { search = new SuffixDB(numSeqs); } - else if(method == "blast") { search = new BlastDB(gapOpen, gapExtend, match, misMatch); } - else { - m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8."); - m->mothurOutEndLine(); - search = new KmerDB(fastaFileName, 8); - } - - if (!(m->control_pressed)) { - if (needToGenerate) { - //add sequences to search - for (int i = 0; i < templateSequences.size(); i++) { - search->addSequence(templateSequences[i]); - - if (m->control_pressed) { templateSequences.clear(); break; } - } - - if (m->control_pressed) { templateSequences.clear(); } - - search->generateDB(); - - }else if ((method == "kmer") && (!needToGenerate)) { - ifstream kmerFileTest(kmerDBName.c_str()); - search->readKmerDB(kmerFileTest); - } - - search->setNumSeqs(numSeqs); - } - } - catch(exception& e) { - m->errorOut(e, "AlignmentDB", "AlignmentDB"); - exit(1); - } -} -/**************************************************************************************************/ -AlignmentDB::AlignmentDB(string s){ - try { - m = MothurOut::getInstance(); - method = s; - - if(method == "suffix") { search = new SuffixDB(); } - else if(method == "blast") { search = new BlastDB(); } - else { search = new KmerDB(); } - - - //in case you delete the seqs and then ask for them - emptySequence = Sequence(); - emptySequence.setName("no_match"); - emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX"); - emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX"); - - } - catch(exception& e) { - m->errorOut(e, "AlignmentDB", "AlignmentDB"); - exit(1); - } -} -/**************************************************************************************************/ -AlignmentDB::~AlignmentDB() { delete search; } -/**************************************************************************************************/ -Sequence AlignmentDB::findClosestSequence(Sequence* seq) { - try{ - - vector spot = search->findClosestSequences(seq, 1); - - if (spot.size() != 0) { return templateSequences[spot[0]]; } - else { return emptySequence; } - - } - catch(exception& e) { - m->errorOut(e, "AlignmentDB", "findClosestSequence"); - exit(1); - } -} -/**************************************************************************************************/ - - - - - - +/* + * alignmentdb.cpp + * Mothur + * + * Created by westcott on 11/4/09. + * Copyright 2009 Schloss Lab. All rights reserved. + * + */ + +#include "alignmentdb.h" +#include "kmerdb.hpp" +#include "suffixdb.hpp" +#include "blastdb.hpp" + + +/**************************************************************************************************/ +AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch){ // This assumes that the template database is in fasta format, may + try { // need to alter this in the future? + m = MothurOut::getInstance(); + longest = 0; + method = s; + bool needToGenerate = true; + + m->mothurOutEndLine(); + m->mothurOut("Reading in the " + fastaFileName + " template sequences...\t"); cout.flush(); + + #ifdef USE_MPI + int pid, processors; + vector positions; + + MPI_Status status; + MPI_File inMPI; + MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are + MPI_Comm_size(MPI_COMM_WORLD, &processors); + int tag = 2001; + + char inFileName[1024]; + strcpy(inFileName, fastaFileName.c_str()); + + MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer + + if (pid == 0) { + positions = setFilePosFasta(fastaFileName, numSeqs); //fills MPIPos, returns numSeqs + + //send file positions to all processes + for(int i = 1; i < processors; i++) { + MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD); + MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD); + } + }else{ + MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status); + positions.resize(numSeqs+1); + MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status); + } + + //read file + for(int i=0;icontrol_pressed) { templateSequences.clear(); break; } + + //read next sequence + int length = positions[i+1] - positions[i]; + char* buf4 = new char[length]; + + MPI_File_read_at(inMPI, positions[i], buf4, length, MPI_CHAR, &status); + + string tempBuf = buf4; + if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); } + delete buf4; + + istringstream iss (tempBuf,istringstream::in); + + Sequence temp(iss); + if (temp.getName() != "") { + templateSequences.push_back(temp); + //save longest base + if (temp.getUnaligned().length() > longest) { longest = temp.getUnaligned().length()+1; } + } + } + + MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case + + MPI_File_close(&inMPI); + + #else + ifstream fastaFile; + openInputFile(fastaFileName, fastaFile); + + while (!fastaFile.eof()) { + Sequence temp(fastaFile); gobble(fastaFile); + + if (m->control_pressed) { templateSequences.clear(); break; } + + if (temp.getName() != "") { + templateSequences.push_back(temp); + //save longest base + if (temp.getUnaligned().length() > longest) { longest = temp.getUnaligned().length()+1; } + } + } + fastaFile.close(); + + #endif + + numSeqs = templateSequences.size(); + //all of this is elsewhere already! + + m->mothurOut("DONE."); + m->mothurOutEndLine(); cout.flush(); + + //in case you delete the seqs and then ask for them + emptySequence = Sequence(); + emptySequence.setName("no_match"); + emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX"); + emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX"); + + + string kmerDBName; + if(method == "kmer") { + search = new KmerDB(fastaFileName, kmerSize); + + #ifdef USE_MPI + #else + kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer"; + ifstream kmerFileTest(kmerDBName.c_str()); + + if(kmerFileTest){ + bool GoodFile = checkReleaseVersion(kmerFileTest, m->getVersion()); + if (GoodFile) { needToGenerate = false; } + } + #endif + } + else if(method == "suffix") { search = new SuffixDB(numSeqs); } + else if(method == "blast") { search = new BlastDB(gapOpen, gapExtend, match, misMatch); } + else { + m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8."); + m->mothurOutEndLine(); + search = new KmerDB(fastaFileName, 8); + } + + if (!(m->control_pressed)) { + if (needToGenerate) { + //add sequences to search + for (int i = 0; i < templateSequences.size(); i++) { + search->addSequence(templateSequences[i]); + + if (m->control_pressed) { templateSequences.clear(); break; } + } + + if (m->control_pressed) { templateSequences.clear(); } + + search->generateDB(); + + }else if ((method == "kmer") && (!needToGenerate)) { + ifstream kmerFileTest(kmerDBName.c_str()); + search->readKmerDB(kmerFileTest); + } + + search->setNumSeqs(numSeqs); + } + } + catch(exception& e) { + m->errorOut(e, "AlignmentDB", "AlignmentDB"); + exit(1); + } +} +/**************************************************************************************************/ +AlignmentDB::AlignmentDB(string s){ + try { + m = MothurOut::getInstance(); + method = s; + + if(method == "suffix") { search = new SuffixDB(); } + else if(method == "blast") { search = new BlastDB(); } + else { search = new KmerDB(); } + + + //in case you delete the seqs and then ask for them + emptySequence = Sequence(); + emptySequence.setName("no_match"); + emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX"); + emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX"); + + } + catch(exception& e) { + m->errorOut(e, "AlignmentDB", "AlignmentDB"); + exit(1); + } +} +/**************************************************************************************************/ +AlignmentDB::~AlignmentDB() { delete search; } +/**************************************************************************************************/ +Sequence AlignmentDB::findClosestSequence(Sequence* seq) { + try{ + + vector spot = search->findClosestSequences(seq, 1); + + if (spot.size() != 0) { return templateSequences[spot[0]]; } + else { return emptySequence; } + + } + catch(exception& e) { + m->errorOut(e, "AlignmentDB", "findClosestSequence"); + exit(1); + } +} +/**************************************************************************************************/ + + + + + + diff --git a/bayesian.cpp b/bayesian.cpp index ae96a22..e596f67 100644 --- a/bayesian.cpp +++ b/bayesian.cpp @@ -34,7 +34,13 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i) { int start = time(NULL); - if(probFileTest && probFileTest2 && phyloTreeTest && probFileTest3){ + //if they are there make sure they were created after this release date + bool FilesGood = false; + if(probFileTest && probFileTest2 && phyloTreeTest && probFileTest3){ + FilesGood = checkReleaseDate(probFileTest, probFileTest2, phyloTreeTest, probFileTest3); + } + + if(probFileTest && probFileTest2 && phyloTreeTest && probFileTest3 && FilesGood){ m->mothurOut("Reading template taxonomy... "); cout.flush(); phyloTree = new PhyloTree(phyloTreeTest, phyloTreeName); @@ -86,10 +92,16 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i) { openOutputFile(probFileName, out); + //output mothur version + out << "#" << m->getVersion() << endl; + out << numKmers << endl; openOutputFile(probFileName2, out2); + //output mothur version + out2 << "#" << m->getVersion() << endl; + #ifdef USE_MPI } #endif @@ -416,12 +428,19 @@ void Bayesian::readProbFile(ifstream& in, ifstream& inNum, string inName, string positions2.resize(num2+1); MPI_Recv(&positions2[0], (num2+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status); } - - //read numKmers + + //read version int length = positions2[1] - positions2[0]; + char* buf5 = new char[length]; + + MPI_File_read_at(inMPI2, positions2[0], buf5, length, MPI_CHAR, &status); + delete buf5; + + //read numKmers + length = positions2[2] - positions2[1]; char* buf = new char[length]; - MPI_File_read_at(inMPI2, positions2[0], buf, length, MPI_CHAR, &status); + MPI_File_read_at(inMPI2, positions2[1], buf, length, MPI_CHAR, &status); string tempBuf = buf; if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); } @@ -438,10 +457,17 @@ void Bayesian::readProbFile(ifstream& in, ifstream& inNum, string inName, string int kmer, name; vector numbers; numbers.resize(numKmers); float prob; - vector zeroCountProb; zeroCountProb.resize(numKmers); + vector zeroCountProb; zeroCountProb.resize(numKmers); + + //read version + length = positions[1] - positions[0]; + char* buf6 = new char[length]; + MPI_File_read_at(inMPI2, positions[0], buf6, length, MPI_CHAR, &status); + delete buf6; + //read file - for(int i=0;i> numKmers; gobble(in); //initialze probabilities @@ -500,7 +528,10 @@ void Bayesian::readProbFile(ifstream& in, ifstream& inNum, string inName, string vector num; num.resize(numKmers); float prob; vector zeroCountProb; zeroCountProb.resize(numKmers); - + + //read version + string line2 = getline(inNum); gobble(inNum); + while (inNum) { inNum >> zeroCountProb[count] >> num[count]; count++; @@ -534,6 +565,61 @@ void Bayesian::readProbFile(ifstream& in, ifstream& inNum, string inName, string } } /**************************************************************************************************/ +bool Bayesian::checkReleaseDate(ifstream& file1, ifstream& file2, ifstream& file3, ifstream& file4) { + try { + + bool good = true; + + vector lines; + lines.push_back(getline(file1)); + lines.push_back(getline(file2)); + lines.push_back(getline(file3)); + lines.push_back(getline(file4)); + + //before we added this check + if ((lines[0][0] != '#') || (lines[1][0] != '#') || (lines[2][0] != '#') || (lines[3][0] != '#')) { good = false; } + else { + //rip off # + for (int i = 0; i < lines.size(); i++) { lines[i] = lines[i].substr(1); } + + //get mothurs current version + string version = m->getVersion(); + + vector versionVector; + splitAtChar(version, versionVector, '.'); + + //check each files version + for (int i = 0; i < lines.size(); i++) { + vector linesVector; + splitAtChar(lines[i], linesVector, '.'); + + if (versionVector.size() != linesVector.size()) { good = false; break; } + else { + for (int j = 0; j < versionVector.size(); j++) { + int num1, num2; + convert(versionVector[j], num1); + convert(linesVector[j], num2); + + //if mothurs version is newer than this files version, then we want to remake it + if (num1 > num2) { good = false; break; } + } + } + + if (!good) { break; } + } + } + + if (!good) { file1.close(); file2.close(); file3.close(); file4.close(); } + else { file1.seekg(0); file2.seekg(0); file3.seekg(0); file4.seekg(0); } + + return good; + } + catch(exception& e) { + m->errorOut(e, "Bayesian", "checkReleaseDate"); + exit(1); + } +} +/**************************************************************************************************/ diff --git a/bayesian.h b/bayesian.h index fa6590a..012def9 100644 --- a/bayesian.h +++ b/bayesian.h @@ -35,6 +35,7 @@ private: string bootstrapResults(vector, int, int); int getMostProbableTaxonomy(vector); void readProbFile(ifstream&, ifstream&, string, string); + bool checkReleaseDate(ifstream&, ifstream&, ifstream&, ifstream&); }; diff --git a/chimerapintailcommand.cpp b/chimerapintailcommand.cpp index f11e1ca..5818ab2 100644 --- a/chimerapintailcommand.cpp +++ b/chimerapintailcommand.cpp @@ -173,12 +173,17 @@ ChimeraPintailCommand::ChimeraPintailCommand(string option) { //check for consfile string tempConsFile = getRootName(inputDir + getSimpleName(templatefile)) + "freq"; ifstream FileTest(tempConsFile.c_str()); - if(FileTest){ m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); consfile = tempConsFile; FileTest.close(); } + if(FileTest){ + bool GoodFile = checkReleaseVersion(FileTest, m->getVersion()); + if (GoodFile) { + m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); consfile = tempConsFile; FileTest.close(); + } + } } quanfile = validParameter.validFile(parameters, "quantile", true); if (quanfile == "not open") { abort = true; } - else if (quanfile == "not found") { quanfile = ""; } + else if (quanfile == "not found") { quanfile = ""; } } } catch(exception& e) { @@ -251,7 +256,12 @@ int ChimeraPintailCommand::execute(){ } ifstream FileTest(tempQuan.c_str()); - if(FileTest){ m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); quanfile = tempQuan; FileTest.close(); } + if(FileTest){ + bool GoodFile = checkReleaseVersion(FileTest, m->getVersion()); + if (GoodFile) { + m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); quanfile = tempQuan; FileTest.close(); + } + } chimera = new Pintail(fastaFileNames[s], templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir); diff --git a/chimeraslayer.cpp b/chimeraslayer.cpp index b92e8c8..ff13590 100644 --- a/chimeraslayer.cpp +++ b/chimeraslayer.cpp @@ -102,8 +102,14 @@ int ChimeraSlayer::doPrep() { //leftside kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer"; ifstream kmerFileTestLeft(kmerDBNameLeft.c_str()); + bool needToGenerateLeft = true; - if(!kmerFileTestLeft){ + if(kmerFileTestLeft){ + bool GoodFile = checkReleaseVersion(kmerFileTestLeft, m->getVersion()); + if (GoodFile) { needToGenerateLeft = false; } + } + + if(needToGenerateLeft){ for (int i = 0; i < templateSeqs.size(); i++) { @@ -127,8 +133,14 @@ int ChimeraSlayer::doPrep() { //rightside kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer"; ifstream kmerFileTestRight(kmerDBNameRight.c_str()); + bool needToGenerateRight = true; + + if(kmerFileTestRight){ + bool GoodFile = checkReleaseVersion(kmerFileTestRight, m->getVersion()); + if (GoodFile) { needToGenerateRight = false; } + } - if(!kmerFileTestRight){ + if(needToGenerateRight){ for (int i = 0; i < templateSeqs.size(); i++) { if (m->control_pressed) { return 0; } diff --git a/classify.cpp b/classify.cpp index 6bf2cb5..59a6158 100644 --- a/classify.cpp +++ b/classify.cpp @@ -108,7 +108,10 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me kmerDBName = tempFile.substr(0,tempFile.find_last_of(".")+1) + char('0'+ kmerSize) + "mer"; ifstream kmerFileTest(kmerDBName.c_str()); - if(kmerFileTest){ needToGenerate = false; } + if(kmerFileTest){ + bool GoodFile = checkReleaseVersion(kmerFileTest, m->getVersion()); + if (GoodFile) { needToGenerate = false; } + } } else if(method == "suffix") { database = new SuffixDB(numSeqs); } else if(method == "blast") { database = new BlastDB(gapOpen, gapExtend, match, misMatch); } diff --git a/classify.h b/classify.h index bd9f34b..e92569d 100644 --- a/classify.h +++ b/classify.h @@ -31,6 +31,7 @@ public: virtual string getTaxonomy(Sequence*) = 0; virtual string getSimpleTax() { return simpleTax; } virtual void generateDatabaseAndNames(string, string, string, int, float, float, float, float); + virtual void setDistName(string s) {} //for knn, so if distance method is selected with knn you can create the smallest distance file in the right place. protected: diff --git a/classifyseqscommand.cpp b/classifyseqscommand.cpp index a95f436..6c42418 100644 --- a/classifyseqscommand.cpp +++ b/classifyseqscommand.cpp @@ -409,6 +409,11 @@ int ClassifySeqsCommand::execute(){ string tempTaxonomyFile = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + "taxonomy.temp"; string taxSummary = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + RippedTaxName + "tax.summary"; + if ((method == "knn") && (search == "distance")) { + string DistName = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + "match.dist"; + classify->setDistName(DistName); outputNames.push_back(DistName); + } + outputNames.push_back(newTaxonomyFile); outputNames.push_back(taxSummary); diff --git a/database.hpp b/database.hpp index 3191fdf..9293f13 100644 --- a/database.hpp +++ b/database.hpp @@ -47,6 +47,7 @@ public: virtual ~Database(); virtual void generateDB() = 0; virtual void addSequence(Sequence) = 0; //add sequence to search engine + virtual string getName(int) { return ""; } virtual vector findClosestSequences(Sequence*, int) = 0; // returns indexes of n closest sequences to query virtual vector findClosestMegaBlast(Sequence*, int){return results;} virtual float getSearchScore(); diff --git a/decalc.cpp b/decalc.cpp index 2e214bf..94b6c93 100644 --- a/decalc.cpp +++ b/decalc.cpp @@ -295,6 +295,8 @@ vector DeCalculator::calcFreq(vector seqs, string filename) { openOutputFile(freqfile, outFreq); + outFreq << "#" << m->getVersion() << endl; + string length = toString(seqs.size()); //if there are 5000 seqs in the template then set precision to 3 int precision = length.length() - 1; diff --git a/distancedb.cpp b/distancedb.cpp index 5c49a93..ca6ffe8 100644 --- a/distancedb.cpp +++ b/distancedb.cpp @@ -48,6 +48,8 @@ vector DistanceDB::findClosestSequences(Sequence* query, int numWanted){ bool templateSameLength = true; string sequence = query->getAligned(); vector dists; + + searchScore = -1.0; if (numWanted > data.size()) { m->mothurOut("numwanted is larger than the number of template sequences, using "+ toString(data.size()) + "."); m->mothurOutEndLine(); numWanted = data.size(); } @@ -66,6 +68,9 @@ vector DistanceDB::findClosestSequences(Sequence* query, int numWanted){ sort(dists.begin(), dists.end(), compareSequenceDistance); //sorts by distance lowest to highest + //save distance of best match + searchScore = dists[0].dist; + //fill topmatches with numwanted closest sequences indexes for (int i = 0; i < numWanted; i++) { topMatches.push_back(dists[i].seq2); diff --git a/distancedb.hpp b/distancedb.hpp index bfb5090..2624d6d 100644 --- a/distancedb.hpp +++ b/distancedb.hpp @@ -22,7 +22,8 @@ public: ~DistanceDB() { delete distCalculator; } void generateDB() {} //doesn't generate a search db - void addSequence(Sequence); + void addSequence(Sequence); + string getName(int i) { return data[i].getName(); } vector findClosestSequences(Sequence*, int); // returns indexes of n closest sequences to query #ifdef USE_MPI diff --git a/kmerdb.cpp b/kmerdb.cpp index 33761a8..bd5b976 100644 --- a/kmerdb.cpp +++ b/kmerdb.cpp @@ -109,6 +109,9 @@ void KmerDB::generateDB(){ ofstream kmerFile; // once we have the kmerLocations folder print it out openOutputFile(kmerDBName, kmerFile); // to a file + //output version + kmerFile << m->getVersion() << endl; + for(int i=0;ierrorOut(e, "Knn", "setDistName"); + exit(1); + } +} +/**************************************************************************************************/ Knn::~Knn() { try { delete phyloTree; @@ -39,7 +53,9 @@ string Knn::getTaxonomy(Sequence* seq) { //use database to find closest seq vector closest = database->findClosestSequences(seq, num); - + + if (search == "distance") { ofstream outDistance; openOutputFileAppend(outDistName, outDistance); outDistance << seq->getName() << '\t' << database->getName(closest[0]) << '\t' << database->getSearchScore() << endl; outDistance.close(); } + if (m->control_pressed) { return tax; } vector closestNames; diff --git a/knn.h b/knn.h index 45cb0f1..1965382 100644 --- a/knn.h +++ b/knn.h @@ -21,11 +21,14 @@ public: Knn(string, string, string, int, float, float, float, float, int); ~Knn(); + void setDistName(string s); string getTaxonomy(Sequence*); private: int num; string findCommonTaxonomy(vector); + string search, outDistName; + }; /**************************************************************************************************/ diff --git a/makefile b/makefile index 46ccb9d..dc5065d 100644 --- a/makefile +++ b/makefile @@ -14,6 +14,12 @@ CXXFLAGS += -O3 MOTHUR_FILES = "\"../Release\"" + +RELEASE_DATE = "\"8/5/2010\"" +VERSION = "\"1.12.3\"" + +CXXFLAGS += -DRELEASE_DATE=${RELEASE_DATE} -DVERSION=${VERSION} + ifeq ($(strip $(MOTHUR_FILES)),"\"Enter_your_default_path_here\"") else CXXFLAGS += -DMOTHUR_FILES=${MOTHUR_FILES} diff --git a/mothur.cpp b/mothur.cpp index ea3ea3e..eb5e8f9 100644 --- a/mothur.cpp +++ b/mothur.cpp @@ -97,10 +97,16 @@ int main(int argc, char *argv[]){ m->mothurOutEndLine(); m->mothurOutEndLine(); #endif + //get releaseDate from Make + string releaseDate = RELEASE_DATE; + string mothurVersion = VERSION; + m->setReleaseDate(releaseDate); + m->setVersion(mothurVersion); + //header - m->mothurOut("mothur v.1.12.2"); + m->mothurOut("mothur v." + mothurVersion); m->mothurOutEndLine(); - m->mothurOut("Last updated: 7/30/2010"); + m->mothurOut("Last updated: " + releaseDate); m->mothurOutEndLine(); m->mothurOutEndLine(); m->mothurOut("by"); diff --git a/mothur.h b/mothur.h index f1b1392..0c4429e 100644 --- a/mothur.h +++ b/mothur.h @@ -222,7 +222,7 @@ inline void gobble(istream& f){ } /***********************************************************************/ -inline string getline(ifstream& fileHandle) { +inline string getline(istringstream& fileHandle) { try { string line = ""; @@ -244,7 +244,30 @@ inline string getline(ifstream& fileHandle) { exit(1); } } +/***********************************************************************/ +inline string getline(ifstream& fileHandle) { + try { + + string line = ""; + + while (!fileHandle.eof()) { + //get next character + char c = fileHandle.get(); + + //are you at the end of the line + if ((c == '\n') || (c == '\r') || (c == '\f')){ break; } + else { line += c; } + } + + return line; + + } + catch(exception& e) { + cout << "Error in mothur function getline" << endl; + exit(1); + } +} /***********************************************************************/ inline bool isTrue(string f){ @@ -1098,7 +1121,51 @@ inline vector setFilePosEachLine(string filename, int& num) { return positions; } +/**************************************************************************************************/ +inline bool checkReleaseVersion(ifstream& file, string version) { + try { + + bool good = true; + + string line = getline(file); + //before we added this check + if (line[0] != '#') { good = false; } + else { + //rip off # + line = line.substr(1); + + vector versionVector; + splitAtChar(version, versionVector, '.'); + + //check file version + vector linesVector; + splitAtChar(line, linesVector, '.'); + + if (versionVector.size() != linesVector.size()) { good = false; } + else { + for (int j = 0; j < versionVector.size(); j++) { + int num1, num2; + convert(versionVector[j], num1); + convert(linesVector[j], num2); + + //if mothurs version is newer than this files version, then we want to remake it + if (num1 > num2) { good = false; break; } + } + } + + } + + if (!good) { file.close(); } + else { file.seekg(0); } + + return good; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function checkReleaseVersion. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n"; + exit(1); + } +} /**************************************************************************************************/ #endif diff --git a/mothurout.h b/mothurout.h index 1b89caf..92363f5 100644 --- a/mothurout.h +++ b/mothurout.h @@ -27,6 +27,12 @@ class MothurOut { void closeLog(); string getDefaultPath() { return defaultPath; } void setDefaultPath(string); + + string getReleaseDate() { return releaseDate; } + void setReleaseDate(string r) { releaseDate = r; } + string getVersion() { return version; } + void setVersion(string r) { version = r; } + int control_pressed; bool executing; @@ -41,6 +47,8 @@ class MothurOut { string logFileName; string defaultPath; + string releaseDate, version; + ofstream out; int mem_usage(double&, double&); diff --git a/phylosummary.cpp b/phylosummary.cpp index 08cedeb..870f35f 100644 --- a/phylosummary.cpp +++ b/phylosummary.cpp @@ -250,6 +250,10 @@ void PhyloSummary::print(int i, ofstream& out){ /**************************************************************************************************/ void PhyloSummary::readTreeStruct(ifstream& in){ try { + + //read version + string line = getline(in); gobble(in); + int num; in >> num; gobble(in); diff --git a/phylotree.cpp b/phylotree.cpp index e2b0805..2ea2193 100644 --- a/phylotree.cpp +++ b/phylotree.cpp @@ -54,6 +54,9 @@ PhyloTree::PhyloTree(ifstream& in, string filename){ istringstream iss (tempBuf,istringstream::in); delete buffer; + //read version + getline(iss); gobble(iss); + iss >> numNodes; gobble(iss); tree.resize(numNodes); @@ -78,6 +81,9 @@ PhyloTree::PhyloTree(ifstream& in, string filename){ MPI_File_close(&inMPI); #else + //read version + string line = getline(in); gobble(in); + in >> numNodes; gobble(in); tree.resize(numNodes); @@ -474,6 +480,10 @@ string PhyloTree::getFullTaxonomy(string seqName) { void PhyloTree::print(ofstream& out, vector& copy){ try { + + //output mothur version + out << "#" << m->getVersion() << endl; + out << copy.size() << endl; out << maxLevel << endl; @@ -511,6 +521,9 @@ void PhyloTree::printTreeNodes(string treefilename) { ofstream outTree; openOutputFile(treefilename, outTree); + //output mothur version + outTree << "#" << m->getVersion() << endl; + //print treenodes outTree << tree.size() << endl; for (int i = 0; i < tree.size(); i++) { diff --git a/pintail.cpp b/pintail.cpp index 5bfdc44..8a159ca 100644 --- a/pintail.cpp +++ b/pintail.cpp @@ -183,7 +183,7 @@ int Pintail::doPrep() { if (m->control_pressed) { return 0; } - string outputString = ""; + string outputString = "#" + m->getVersion() + "\n"; //adjust quantiles for (int i = 0; i < quantilesMembers.size(); i++) { @@ -449,6 +449,9 @@ vector Pintail::readFreq() { if (tempBuf.length() > size) { tempBuf = tempBuf.substr(0, size); } istringstream iss (tempBuf,istringstream::in); + //read version + string line = getline(iss); gobble(iss); + while(!iss.eof()) { iss >> pos >> num; @@ -472,6 +475,9 @@ vector Pintail::readFreq() { ifstream in; openInputFile(consfile, in); + + //read version + string line = getline(in); gobble(in); while(!in.eof()){ @@ -648,6 +654,9 @@ vector< vector > Pintail::readQuantiles() { istringstream iss (tempBuf,istringstream::in); delete buffer; + //read version + string line = getline(iss); gobble(iss); + while(!iss.eof()) { iss >> num >> ten >> twentyfive >> fifty >> seventyfive >> ninetyfive >> ninetynine; @@ -671,6 +680,9 @@ vector< vector > Pintail::readQuantiles() { ifstream in; openInputFile(quanfile, in); + + //read version + string line = getline(in); gobble(in); while(!in.eof()){ -- 2.39.2