-/*\r
- * alignmentdb.cpp\r
- * Mothur\r
- *\r
- * Created by westcott on 11/4/09.\r
- * Copyright 2009 Schloss Lab. All rights reserved.\r
- *\r
- */\r
-\r
-#include "alignmentdb.h"\r
-#include "kmerdb.hpp"\r
-#include "suffixdb.hpp"\r
-#include "blastdb.hpp"\r
-\r
-\r
-/**************************************************************************************************/\r
-AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch){ // This assumes that the template database is in fasta format, may \r
- try { // need to alter this in the future?\r
- m = MothurOut::getInstance();\r
- longest = 0;\r
- method = s;\r
- bool needToGenerate = true;\r
- \r
- m->mothurOutEndLine();\r
- m->mothurOut("Reading in the " + fastaFileName + " template sequences...\t"); cout.flush();\r
- \r
- #ifdef USE_MPI \r
- int pid, processors;\r
- vector<unsigned long int> positions;\r
- \r
- MPI_Status status; \r
- MPI_File inMPI;\r
- MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are\r
- MPI_Comm_size(MPI_COMM_WORLD, &processors);\r
- int tag = 2001;\r
- \r
- char inFileName[1024];\r
- strcpy(inFileName, fastaFileName.c_str());\r
- \r
- MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer\r
- \r
- if (pid == 0) {\r
- positions = setFilePosFasta(fastaFileName, numSeqs); //fills MPIPos, returns numSeqs\r
-\r
- //send file positions to all processes\r
- for(int i = 1; i < processors; i++) { \r
- MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);\r
- MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);\r
- }\r
- }else{\r
- MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);\r
- positions.resize(numSeqs+1);\r
- MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);\r
- }\r
- \r
- //read file \r
- for(int i=0;i<numSeqs;i++){\r
- \r
- if (m->control_pressed) { templateSequences.clear(); break; }\r
- \r
- //read next sequence\r
- int length = positions[i+1] - positions[i];\r
- char* buf4 = new char[length];\r
- \r
- MPI_File_read_at(inMPI, positions[i], buf4, length, MPI_CHAR, &status);\r
- \r
- string tempBuf = buf4;\r
- if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }\r
- delete buf4;\r
-\r
- istringstream iss (tempBuf,istringstream::in);\r
- \r
- Sequence temp(iss); \r
- if (temp.getName() != "") {\r
- templateSequences.push_back(temp);\r
- //save longest base\r
- if (temp.getUnaligned().length() > longest) { longest = temp.getUnaligned().length()+1; }\r
- }\r
- }\r
- \r
- MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case\r
- \r
- MPI_File_close(&inMPI);\r
- \r
- #else\r
- ifstream fastaFile;\r
- openInputFile(fastaFileName, fastaFile);\r
-\r
- while (!fastaFile.eof()) {\r
- Sequence temp(fastaFile); gobble(fastaFile);\r
- \r
- if (m->control_pressed) { templateSequences.clear(); break; }\r
- \r
- if (temp.getName() != "") {\r
- templateSequences.push_back(temp);\r
- //save longest base\r
- if (temp.getUnaligned().length() > longest) { longest = temp.getUnaligned().length()+1; }\r
- }\r
- }\r
- fastaFile.close();\r
- \r
- #endif\r
- \r
- numSeqs = templateSequences.size();\r
- //all of this is elsewhere already!\r
- \r
- m->mothurOut("DONE.");\r
- m->mothurOutEndLine(); cout.flush();\r
- \r
- //in case you delete the seqs and then ask for them\r
- emptySequence = Sequence();\r
- emptySequence.setName("no_match");\r
- emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");\r
- emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");\r
- \r
- \r
- string kmerDBName;\r
- if(method == "kmer") { \r
- search = new KmerDB(fastaFileName, kmerSize); \r
- \r
- #ifdef USE_MPI\r
- #else\r
- kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";\r
- ifstream kmerFileTest(kmerDBName.c_str());\r
- \r
- if(kmerFileTest){ needToGenerate = false; }\r
- #endif\r
- }\r
- else if(method == "suffix") { search = new SuffixDB(numSeqs); }\r
- else if(method == "blast") { search = new BlastDB(gapOpen, gapExtend, match, misMatch); }\r
- else {\r
- m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");\r
- m->mothurOutEndLine();\r
- search = new KmerDB(fastaFileName, 8);\r
- }\r
- \r
- if (!(m->control_pressed)) {\r
- if (needToGenerate) {\r
- //add sequences to search \r
- for (int i = 0; i < templateSequences.size(); i++) {\r
- search->addSequence(templateSequences[i]);\r
- \r
- if (m->control_pressed) { templateSequences.clear(); break; }\r
- }\r
- \r
- if (m->control_pressed) { templateSequences.clear(); }\r
- \r
- search->generateDB();\r
- \r
- }else if ((method == "kmer") && (!needToGenerate)) {\r
- ifstream kmerFileTest(kmerDBName.c_str());\r
- search->readKmerDB(kmerFileTest); \r
- }\r
- \r
- search->setNumSeqs(numSeqs);\r
- }\r
- }\r
- catch(exception& e) {\r
- m->errorOut(e, "AlignmentDB", "AlignmentDB");\r
- exit(1);\r
- }\r
-}\r
-/**************************************************************************************************/\r
-AlignmentDB::AlignmentDB(string s){ \r
- try { \r
- m = MothurOut::getInstance();\r
- method = s;\r
- \r
- if(method == "suffix") { search = new SuffixDB(); }\r
- else if(method == "blast") { search = new BlastDB(); }\r
- else { search = new KmerDB(); }\r
-\r
- \r
- //in case you delete the seqs and then ask for them\r
- emptySequence = Sequence();\r
- emptySequence.setName("no_match");\r
- emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");\r
- emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");\r
- \r
- }\r
- catch(exception& e) {\r
- m->errorOut(e, "AlignmentDB", "AlignmentDB");\r
- exit(1);\r
- }\r
-}\r
-/**************************************************************************************************/\r
-AlignmentDB::~AlignmentDB() { delete search; }\r
-/**************************************************************************************************/\r
-Sequence AlignmentDB::findClosestSequence(Sequence* seq) {\r
- try{\r
- \r
- vector<int> spot = search->findClosestSequences(seq, 1);\r
-\r
- if (spot.size() != 0) { return templateSequences[spot[0]]; }\r
- else { return emptySequence; }\r
- \r
- }\r
- catch(exception& e) {\r
- m->errorOut(e, "AlignmentDB", "findClosestSequence");\r
- exit(1);\r
- }\r
-}\r
-/**************************************************************************************************/\r
-\r
-\r
-\r
-\r
-\r
-\r
+/*
+ * alignmentdb.cpp
+ * Mothur
+ *
+ * Created by westcott on 11/4/09.
+ * Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "alignmentdb.h"
+#include "kmerdb.hpp"
+#include "suffixdb.hpp"
+#include "blastdb.hpp"
+
+
+/**************************************************************************************************/
+AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch){ // This assumes that the template database is in fasta format, may
+ try { // need to alter this in the future?
+ m = MothurOut::getInstance();
+ longest = 0;
+ method = s;
+ bool needToGenerate = true;
+
+ m->mothurOutEndLine();
+ m->mothurOut("Reading in the " + fastaFileName + " template sequences...\t"); cout.flush();
+
+ #ifdef USE_MPI
+ int pid, processors;
+ vector<unsigned long int> positions;
+
+ MPI_Status status;
+ MPI_File inMPI;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+ MPI_Comm_size(MPI_COMM_WORLD, &processors);
+ int tag = 2001;
+
+ char inFileName[1024];
+ strcpy(inFileName, fastaFileName.c_str());
+
+ MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer
+
+ if (pid == 0) {
+ positions = setFilePosFasta(fastaFileName, numSeqs); //fills MPIPos, returns numSeqs
+
+ //send file positions to all processes
+ for(int i = 1; i < processors; i++) {
+ MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+ MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+ }
+ }else{
+ MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+ positions.resize(numSeqs+1);
+ MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
+ }
+
+ //read file
+ for(int i=0;i<numSeqs;i++){
+
+ if (m->control_pressed) { templateSequences.clear(); break; }
+
+ //read next sequence
+ int length = positions[i+1] - positions[i];
+ char* buf4 = new char[length];
+
+ MPI_File_read_at(inMPI, positions[i], buf4, length, MPI_CHAR, &status);
+
+ string tempBuf = buf4;
+ if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
+ delete buf4;
+
+ istringstream iss (tempBuf,istringstream::in);
+
+ Sequence temp(iss);
+ if (temp.getName() != "") {
+ templateSequences.push_back(temp);
+ //save longest base
+ if (temp.getUnaligned().length() > longest) { longest = temp.getUnaligned().length()+1; }
+ }
+ }
+
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
+
+ MPI_File_close(&inMPI);
+
+ #else
+ ifstream fastaFile;
+ openInputFile(fastaFileName, fastaFile);
+
+ while (!fastaFile.eof()) {
+ Sequence temp(fastaFile); gobble(fastaFile);
+
+ if (m->control_pressed) { templateSequences.clear(); break; }
+
+ if (temp.getName() != "") {
+ templateSequences.push_back(temp);
+ //save longest base
+ if (temp.getUnaligned().length() > longest) { longest = temp.getUnaligned().length()+1; }
+ }
+ }
+ fastaFile.close();
+
+ #endif
+
+ numSeqs = templateSequences.size();
+ //all of this is elsewhere already!
+
+ m->mothurOut("DONE.");
+ m->mothurOutEndLine(); cout.flush();
+
+ //in case you delete the seqs and then ask for them
+ emptySequence = Sequence();
+ emptySequence.setName("no_match");
+ emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
+ emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
+
+
+ string kmerDBName;
+ if(method == "kmer") {
+ search = new KmerDB(fastaFileName, kmerSize);
+
+ #ifdef USE_MPI
+ #else
+ kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
+ ifstream kmerFileTest(kmerDBName.c_str());
+
+ if(kmerFileTest){
+ bool GoodFile = checkReleaseVersion(kmerFileTest, m->getVersion());
+ if (GoodFile) { needToGenerate = false; }
+ }
+ #endif
+ }
+ else if(method == "suffix") { search = new SuffixDB(numSeqs); }
+ else if(method == "blast") { search = new BlastDB(gapOpen, gapExtend, match, misMatch); }
+ else {
+ m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");
+ m->mothurOutEndLine();
+ search = new KmerDB(fastaFileName, 8);
+ }
+
+ if (!(m->control_pressed)) {
+ if (needToGenerate) {
+ //add sequences to search
+ for (int i = 0; i < templateSequences.size(); i++) {
+ search->addSequence(templateSequences[i]);
+
+ if (m->control_pressed) { templateSequences.clear(); break; }
+ }
+
+ if (m->control_pressed) { templateSequences.clear(); }
+
+ search->generateDB();
+
+ }else if ((method == "kmer") && (!needToGenerate)) {
+ ifstream kmerFileTest(kmerDBName.c_str());
+ search->readKmerDB(kmerFileTest);
+ }
+
+ search->setNumSeqs(numSeqs);
+ }
+ }
+ catch(exception& e) {
+ m->errorOut(e, "AlignmentDB", "AlignmentDB");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+AlignmentDB::AlignmentDB(string s){
+ try {
+ m = MothurOut::getInstance();
+ method = s;
+
+ if(method == "suffix") { search = new SuffixDB(); }
+ else if(method == "blast") { search = new BlastDB(); }
+ else { search = new KmerDB(); }
+
+
+ //in case you delete the seqs and then ask for them
+ emptySequence = Sequence();
+ emptySequence.setName("no_match");
+ emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
+ emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "AlignmentDB", "AlignmentDB");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+AlignmentDB::~AlignmentDB() { delete search; }
+/**************************************************************************************************/
+Sequence AlignmentDB::findClosestSequence(Sequence* seq) {
+ try{
+
+ vector<int> spot = search->findClosestSequences(seq, 1);
+
+ if (spot.size() != 0) { return templateSequences[spot[0]]; }
+ else { return emptySequence; }
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "AlignmentDB", "findClosestSequence");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+
+
+
+
+
+
int start = time(NULL);
- if(probFileTest && probFileTest2 && phyloTreeTest && probFileTest3){
+ //if they are there make sure they were created after this release date
+ bool FilesGood = false;
+ if(probFileTest && probFileTest2 && phyloTreeTest && probFileTest3){
+ FilesGood = checkReleaseDate(probFileTest, probFileTest2, phyloTreeTest, probFileTest3);
+ }
+
+ if(probFileTest && probFileTest2 && phyloTreeTest && probFileTest3 && FilesGood){
m->mothurOut("Reading template taxonomy... "); cout.flush();
phyloTree = new PhyloTree(phyloTreeTest, phyloTreeName);
openOutputFile(probFileName, out);
+ //output mothur version
+ out << "#" << m->getVersion() << endl;
+
out << numKmers << endl;
openOutputFile(probFileName2, out2);
+ //output mothur version
+ out2 << "#" << m->getVersion() << endl;
+
#ifdef USE_MPI
}
#endif
positions2.resize(num2+1);
MPI_Recv(&positions2[0], (num2+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
}
-
- //read numKmers
+
+ //read version
int length = positions2[1] - positions2[0];
+ char* buf5 = new char[length];
+
+ MPI_File_read_at(inMPI2, positions2[0], buf5, length, MPI_CHAR, &status);
+ delete buf5;
+
+ //read numKmers
+ length = positions2[2] - positions2[1];
char* buf = new char[length];
- MPI_File_read_at(inMPI2, positions2[0], buf, length, MPI_CHAR, &status);
+ MPI_File_read_at(inMPI2, positions2[1], buf, length, MPI_CHAR, &status);
string tempBuf = buf;
if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
int kmer, name;
vector<int> numbers; numbers.resize(numKmers);
float prob;
- vector<float> zeroCountProb; zeroCountProb.resize(numKmers);
+ vector<float> zeroCountProb; zeroCountProb.resize(numKmers);
+
+ //read version
+ length = positions[1] - positions[0];
+ char* buf6 = new char[length];
+ MPI_File_read_at(inMPI2, positions[0], buf6, length, MPI_CHAR, &status);
+ delete buf6;
+
//read file
- for(int i=0;i<num;i++){
+ for(int i=1;i<num;i++){
//read next sequence
length = positions[i+1] - positions[i];
char* buf4 = new char[length];
MPI_File_close(&inMPI);
- for(int i=1;i<num2;i++){
+ for(int i=2;i<num2;i++){
//read next sequence
length = positions2[i+1] - positions2[i];
char* buf4 = new char[length];
MPI_File_close(&inMPI2);
MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
#else
-
+ //read version
+ string line = getline(in); gobble(in);
+
in >> numKmers; gobble(in);
//initialze probabilities
vector<int> num; num.resize(numKmers);
float prob;
vector<float> zeroCountProb; zeroCountProb.resize(numKmers);
-
+
+ //read version
+ string line2 = getline(inNum); gobble(inNum);
+
while (inNum) {
inNum >> zeroCountProb[count] >> num[count];
count++;
}
}
/**************************************************************************************************/
+bool Bayesian::checkReleaseDate(ifstream& file1, ifstream& file2, ifstream& file3, ifstream& file4) {
+ try {
+
+ bool good = true;
+
+ vector<string> lines;
+ lines.push_back(getline(file1));
+ lines.push_back(getline(file2));
+ lines.push_back(getline(file3));
+ lines.push_back(getline(file4));
+
+ //before we added this check
+ if ((lines[0][0] != '#') || (lines[1][0] != '#') || (lines[2][0] != '#') || (lines[3][0] != '#')) { good = false; }
+ else {
+ //rip off #
+ for (int i = 0; i < lines.size(); i++) { lines[i] = lines[i].substr(1); }
+
+ //get mothurs current version
+ string version = m->getVersion();
+
+ vector<string> versionVector;
+ splitAtChar(version, versionVector, '.');
+
+ //check each files version
+ for (int i = 0; i < lines.size(); i++) {
+ vector<string> linesVector;
+ splitAtChar(lines[i], linesVector, '.');
+
+ if (versionVector.size() != linesVector.size()) { good = false; break; }
+ else {
+ for (int j = 0; j < versionVector.size(); j++) {
+ int num1, num2;
+ convert(versionVector[j], num1);
+ convert(linesVector[j], num2);
+
+ //if mothurs version is newer than this files version, then we want to remake it
+ if (num1 > num2) { good = false; break; }
+ }
+ }
+
+ if (!good) { break; }
+ }
+ }
+
+ if (!good) { file1.close(); file2.close(); file3.close(); file4.close(); }
+ else { file1.seekg(0); file2.seekg(0); file3.seekg(0); file4.seekg(0); }
+
+ return good;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Bayesian", "checkReleaseDate");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
string bootstrapResults(vector<int>, int, int);
int getMostProbableTaxonomy(vector<int>);
void readProbFile(ifstream&, ifstream&, string, string);
+ bool checkReleaseDate(ifstream&, ifstream&, ifstream&, ifstream&);
};
//check for consfile
string tempConsFile = getRootName(inputDir + getSimpleName(templatefile)) + "freq";
ifstream FileTest(tempConsFile.c_str());
- if(FileTest){ m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); consfile = tempConsFile; FileTest.close(); }
+ if(FileTest){
+ bool GoodFile = checkReleaseVersion(FileTest, m->getVersion());
+ if (GoodFile) {
+ m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); consfile = tempConsFile; FileTest.close();
+ }
+ }
}
quanfile = validParameter.validFile(parameters, "quantile", true);
if (quanfile == "not open") { abort = true; }
- else if (quanfile == "not found") { quanfile = ""; }
+ else if (quanfile == "not found") { quanfile = ""; }
}
}
catch(exception& e) {
}
ifstream FileTest(tempQuan.c_str());
- if(FileTest){ m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); quanfile = tempQuan; FileTest.close(); }
+ if(FileTest){
+ bool GoodFile = checkReleaseVersion(FileTest, m->getVersion());
+ if (GoodFile) {
+ m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); quanfile = tempQuan; FileTest.close();
+ }
+ }
chimera = new Pintail(fastaFileNames[s], templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir);
//leftside
kmerDBNameLeft = leftTemplateFileName.substr(0,leftTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
ifstream kmerFileTestLeft(kmerDBNameLeft.c_str());
+ bool needToGenerateLeft = true;
- if(!kmerFileTestLeft){
+ if(kmerFileTestLeft){
+ bool GoodFile = checkReleaseVersion(kmerFileTestLeft, m->getVersion());
+ if (GoodFile) { needToGenerateLeft = false; }
+ }
+
+ if(needToGenerateLeft){
for (int i = 0; i < templateSeqs.size(); i++) {
//rightside
kmerDBNameRight = rightTemplateFileName.substr(0,rightTemplateFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
ifstream kmerFileTestRight(kmerDBNameRight.c_str());
+ bool needToGenerateRight = true;
+
+ if(kmerFileTestRight){
+ bool GoodFile = checkReleaseVersion(kmerFileTestRight, m->getVersion());
+ if (GoodFile) { needToGenerateRight = false; }
+ }
- if(!kmerFileTestRight){
+ if(needToGenerateRight){
for (int i = 0; i < templateSeqs.size(); i++) {
if (m->control_pressed) { return 0; }
kmerDBName = tempFile.substr(0,tempFile.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
ifstream kmerFileTest(kmerDBName.c_str());
- if(kmerFileTest){ needToGenerate = false; }
+ if(kmerFileTest){
+ bool GoodFile = checkReleaseVersion(kmerFileTest, m->getVersion());
+ if (GoodFile) { needToGenerate = false; }
+ }
}
else if(method == "suffix") { database = new SuffixDB(numSeqs); }
else if(method == "blast") { database = new BlastDB(gapOpen, gapExtend, match, misMatch); }
virtual string getTaxonomy(Sequence*) = 0;
virtual string getSimpleTax() { return simpleTax; }
virtual void generateDatabaseAndNames(string, string, string, int, float, float, float, float);
+ virtual void setDistName(string s) {} //for knn, so if distance method is selected with knn you can create the smallest distance file in the right place.
protected:
string tempTaxonomyFile = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + "taxonomy.temp";
string taxSummary = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + RippedTaxName + "tax.summary";
+ if ((method == "knn") && (search == "distance")) {
+ string DistName = outputDir + getRootName(getSimpleName(fastaFileNames[s])) + "match.dist";
+ classify->setDistName(DistName); outputNames.push_back(DistName);
+ }
+
outputNames.push_back(newTaxonomyFile);
outputNames.push_back(taxSummary);
virtual ~Database();
virtual void generateDB() = 0;
virtual void addSequence(Sequence) = 0; //add sequence to search engine
+ virtual string getName(int) { return ""; }
virtual vector<int> findClosestSequences(Sequence*, int) = 0; // returns indexes of n closest sequences to query
virtual vector<int> findClosestMegaBlast(Sequence*, int){return results;}
virtual float getSearchScore();
openOutputFile(freqfile, outFreq);
+ outFreq << "#" << m->getVersion() << endl;
+
string length = toString(seqs.size()); //if there are 5000 seqs in the template then set precision to 3
int precision = length.length() - 1;
bool templateSameLength = true;
string sequence = query->getAligned();
vector<seqDist> dists;
+
+ searchScore = -1.0;
if (numWanted > data.size()) { m->mothurOut("numwanted is larger than the number of template sequences, using "+ toString(data.size()) + "."); m->mothurOutEndLine(); numWanted = data.size(); }
sort(dists.begin(), dists.end(), compareSequenceDistance); //sorts by distance lowest to highest
+ //save distance of best match
+ searchScore = dists[0].dist;
+
//fill topmatches with numwanted closest sequences indexes
for (int i = 0; i < numWanted; i++) {
topMatches.push_back(dists[i].seq2);
~DistanceDB() { delete distCalculator; }
void generateDB() {} //doesn't generate a search db
- void addSequence(Sequence);
+ void addSequence(Sequence);
+ string getName(int i) { return data[i].getName(); }
vector<int> findClosestSequences(Sequence*, int); // returns indexes of n closest sequences to query
#ifdef USE_MPI
ofstream kmerFile; // once we have the kmerLocations folder print it out
openOutputFile(kmerDBName, kmerFile); // to a file
+ //output version
+ kmerFile << m->getVersion() << endl;
+
for(int i=0;i<maxKmer;i++){ // step through all of the possible kmer numbers
kmerFile << i << ' ' << kmerLocations[i].size(); // print the kmer number and the number of sequences with
for(int j=0;j<kmerLocations[i].size();j++){ // that kmer. then print out the indices of the sequences
kmerDBFile.seekg(0); // start at the beginning of the file
+ //read version
+ string line = getline(kmerDBFile); gobble(kmerDBFile);
+
string seqName;
int seqNumber;
/**************************************************************************************************/
Knn::Knn(string tfile, string tempFile, string method, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch, int n)
-: Classify(), num(n) {
+: Classify(), num(n), search(method) {
try {
//create search database and names vector
generateDatabaseAndNames(tfile, tempFile, method, kmerSize, gapOpen, gapExtend, match, misMatch);
}
}
/**************************************************************************************************/
+void Knn::setDistName(string s) {
+ try {
+ outDistName = s;
+ ofstream outDistance;
+ openOutputFile(outDistName, outDistance);
+ outDistance << "Name\tBestMatch\tDistance" << endl;
+ outDistance.close();
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Knn", "setDistName");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
Knn::~Knn() {
try {
delete phyloTree;
//use database to find closest seq
vector<int> closest = database->findClosestSequences(seq, num);
-
+
+ if (search == "distance") { ofstream outDistance; openOutputFileAppend(outDistName, outDistance); outDistance << seq->getName() << '\t' << database->getName(closest[0]) << '\t' << database->getSearchScore() << endl; outDistance.close(); }
+
if (m->control_pressed) { return tax; }
vector<string> closestNames;
Knn(string, string, string, int, float, float, float, float, int);
~Knn();
+ void setDistName(string s);
string getTaxonomy(Sequence*);
private:
int num;
string findCommonTaxonomy(vector<string>);
+ string search, outDistName;
+
};
/**************************************************************************************************/
CXXFLAGS += -O3
MOTHUR_FILES = "\"../Release\""
+
+RELEASE_DATE = "\"8/5/2010\""
+VERSION = "\"1.12.3\""
+
+CXXFLAGS += -DRELEASE_DATE=${RELEASE_DATE} -DVERSION=${VERSION}
+
ifeq ($(strip $(MOTHUR_FILES)),"\"Enter_your_default_path_here\"")
else
CXXFLAGS += -DMOTHUR_FILES=${MOTHUR_FILES}
m->mothurOutEndLine(); m->mothurOutEndLine();
#endif
+ //get releaseDate from Make
+ string releaseDate = RELEASE_DATE;
+ string mothurVersion = VERSION;
+ m->setReleaseDate(releaseDate);
+ m->setVersion(mothurVersion);
+
//header
- m->mothurOut("mothur v.1.12.2");
+ m->mothurOut("mothur v." + mothurVersion);
m->mothurOutEndLine();
- m->mothurOut("Last updated: 7/30/2010");
+ m->mothurOut("Last updated: " + releaseDate);
m->mothurOutEndLine();
m->mothurOutEndLine();
m->mothurOut("by");
}
/***********************************************************************/
-inline string getline(ifstream& fileHandle) {
+inline string getline(istringstream& fileHandle) {
try {
string line = "";
exit(1);
}
}
+/***********************************************************************/
+inline string getline(ifstream& fileHandle) {
+ try {
+
+ string line = "";
+
+ while (!fileHandle.eof()) {
+ //get next character
+ char c = fileHandle.get();
+
+ //are you at the end of the line
+ if ((c == '\n') || (c == '\r') || (c == '\f')){ break; }
+ else { line += c; }
+ }
+
+ return line;
+
+ }
+ catch(exception& e) {
+ cout << "Error in mothur function getline" << endl;
+ exit(1);
+ }
+}
/***********************************************************************/
inline bool isTrue(string f){
return positions;
}
+/**************************************************************************************************/
+inline bool checkReleaseVersion(ifstream& file, string version) {
+ try {
+
+ bool good = true;
+
+ string line = getline(file);
+ //before we added this check
+ if (line[0] != '#') { good = false; }
+ else {
+ //rip off #
+ line = line.substr(1);
+
+ vector<string> versionVector;
+ splitAtChar(version, versionVector, '.');
+
+ //check file version
+ vector<string> linesVector;
+ splitAtChar(line, linesVector, '.');
+
+ if (versionVector.size() != linesVector.size()) { good = false; }
+ else {
+ for (int j = 0; j < versionVector.size(); j++) {
+ int num1, num2;
+ convert(versionVector[j], num1);
+ convert(linesVector[j], num2);
+
+ //if mothurs version is newer than this files version, then we want to remake it
+ if (num1 > num2) { good = false; break; }
+ }
+ }
+
+ }
+
+ if (!good) { file.close(); }
+ else { file.seekg(0); }
+
+ return good;
+ }
+ catch(exception& e) {
+ cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function checkReleaseVersion. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
+ exit(1);
+ }
+}
/**************************************************************************************************/
#endif
void closeLog();
string getDefaultPath() { return defaultPath; }
void setDefaultPath(string);
+
+ string getReleaseDate() { return releaseDate; }
+ void setReleaseDate(string r) { releaseDate = r; }
+ string getVersion() { return version; }
+ void setVersion(string r) { version = r; }
+
int control_pressed;
bool executing;
string logFileName;
string defaultPath;
+ string releaseDate, version;
+
ofstream out;
int mem_usage(double&, double&);
/**************************************************************************************************/
void PhyloSummary::readTreeStruct(ifstream& in){
try {
+
+ //read version
+ string line = getline(in); gobble(in);
+
int num;
in >> num; gobble(in);
istringstream iss (tempBuf,istringstream::in);
delete buffer;
+ //read version
+ getline(iss); gobble(iss);
+
iss >> numNodes; gobble(iss);
tree.resize(numNodes);
MPI_File_close(&inMPI);
#else
+ //read version
+ string line = getline(in); gobble(in);
+
in >> numNodes; gobble(in);
tree.resize(numNodes);
void PhyloTree::print(ofstream& out, vector<TaxNode>& copy){
try {
+
+ //output mothur version
+ out << "#" << m->getVersion() << endl;
+
out << copy.size() << endl;
out << maxLevel << endl;
ofstream outTree;
openOutputFile(treefilename, outTree);
+ //output mothur version
+ outTree << "#" << m->getVersion() << endl;
+
//print treenodes
outTree << tree.size() << endl;
for (int i = 0; i < tree.size(); i++) {
if (m->control_pressed) { return 0; }
- string outputString = "";
+ string outputString = "#" + m->getVersion() + "\n";
//adjust quantiles
for (int i = 0; i < quantilesMembers.size(); i++) {
if (tempBuf.length() > size) { tempBuf = tempBuf.substr(0, size); }
istringstream iss (tempBuf,istringstream::in);
+ //read version
+ string line = getline(iss); gobble(iss);
+
while(!iss.eof()) {
iss >> pos >> num;
ifstream in;
openInputFile(consfile, in);
+
+ //read version
+ string line = getline(in); gobble(in);
while(!in.eof()){
istringstream iss (tempBuf,istringstream::in);
delete buffer;
+ //read version
+ string line = getline(iss); gobble(iss);
+
while(!iss.eof()) {
iss >> num >> ten >> twentyfive >> fifty >> seventyfive >> ninetyfive >> ninetynine;
ifstream in;
openInputFile(quanfile, in);
+
+ //read version
+ string line = getline(in); gobble(in);
while(!in.eof()){