-/*\r
- * alignmentdb.cpp\r
- * Mothur\r
- *\r
- * Created by westcott on 11/4/09.\r
- * Copyright 2009 Schloss Lab. All rights reserved.\r
- *\r
- */\r
-\r
-#include "alignmentdb.h"\r
-#include "kmerdb.hpp"\r
-#include "suffixdb.hpp"\r
-#include "blastdb.hpp"\r
-\r
-\r
-/**************************************************************************************************/\r
-AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch){ // This assumes that the template database is in fasta format, may \r
- try { // need to alter this in the future?\r
- m = MothurOut::getInstance();\r
- longest = 0;\r
- method = s;\r
- bool needToGenerate = true;\r
- \r
- m->mothurOutEndLine();\r
- m->mothurOut("Reading in the " + fastaFileName + " template sequences...\t"); cout.flush();\r
- \r
- #ifdef USE_MPI \r
- int pid, processors;\r
- vector<unsigned long int> positions;\r
- \r
- MPI_Status status; \r
- MPI_File inMPI;\r
- MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are\r
- MPI_Comm_size(MPI_COMM_WORLD, &processors);\r
- int tag = 2001;\r
- \r
- char inFileName[1024];\r
- strcpy(inFileName, fastaFileName.c_str());\r
- \r
- MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer\r
- \r
- if (pid == 0) {\r
- positions = setFilePosFasta(fastaFileName, numSeqs); //fills MPIPos, returns numSeqs\r
-\r
- //send file positions to all processes\r
- for(int i = 1; i < processors; i++) { \r
- MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);\r
- MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);\r
- }\r
- }else{\r
- MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);\r
- positions.resize(numSeqs+1);\r
- MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);\r
- }\r
- \r
- //read file \r
- for(int i=0;i<numSeqs;i++){\r
- \r
- if (m->control_pressed) { templateSequences.clear(); break; }\r
- \r
- //read next sequence\r
- int length = positions[i+1] - positions[i];\r
- char* buf4 = new char[length];\r
- \r
- MPI_File_read_at(inMPI, positions[i], buf4, length, MPI_CHAR, &status);\r
- \r
- string tempBuf = buf4;\r
- if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }\r
- delete buf4;\r
-\r
- istringstream iss (tempBuf,istringstream::in);\r
- \r
- Sequence temp(iss); \r
- if (temp.getName() != "") {\r
- templateSequences.push_back(temp);\r
- //save longest base\r
- if (temp.getUnaligned().length() > longest) { longest = temp.getUnaligned().length()+1; }\r
- }\r
- }\r
- \r
- MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case\r
- \r
- MPI_File_close(&inMPI);\r
- \r
- #else\r
- ifstream fastaFile;\r
- openInputFile(fastaFileName, fastaFile);\r
-\r
- while (!fastaFile.eof()) {\r
- Sequence temp(fastaFile); gobble(fastaFile);\r
- \r
- if (m->control_pressed) { templateSequences.clear(); break; }\r
- \r
- if (temp.getName() != "") {\r
- templateSequences.push_back(temp);\r
- //save longest base\r
- if (temp.getUnaligned().length() > longest) { longest = temp.getUnaligned().length()+1; }\r
- }\r
- }\r
- fastaFile.close();\r
- \r
- #endif\r
- \r
- numSeqs = templateSequences.size();\r
- //all of this is elsewhere already!\r
- \r
- m->mothurOut("DONE.");\r
- m->mothurOutEndLine(); cout.flush();\r
- \r
- //in case you delete the seqs and then ask for them\r
- emptySequence = Sequence();\r
- emptySequence.setName("no_match");\r
- emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");\r
- emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");\r
- \r
- \r
- string kmerDBName;\r
- if(method == "kmer") { \r
- search = new KmerDB(fastaFileName, kmerSize); \r
- \r
- #ifdef USE_MPI\r
- #else\r
- kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";\r
- ifstream kmerFileTest(kmerDBName.c_str());\r
- \r
- if(kmerFileTest){ needToGenerate = false; }\r
- #endif\r
- }\r
- else if(method == "suffix") { search = new SuffixDB(numSeqs); }\r
- else if(method == "blast") { search = new BlastDB(gapOpen, gapExtend, match, misMatch); }\r
- else {\r
- m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");\r
- m->mothurOutEndLine();\r
- search = new KmerDB(fastaFileName, 8);\r
- }\r
- \r
- if (!(m->control_pressed)) {\r
- if (needToGenerate) {\r
- //add sequences to search \r
- for (int i = 0; i < templateSequences.size(); i++) {\r
- search->addSequence(templateSequences[i]);\r
- \r
- if (m->control_pressed) { templateSequences.clear(); break; }\r
- }\r
- \r
- if (m->control_pressed) { templateSequences.clear(); }\r
- \r
- search->generateDB();\r
- \r
- }else if ((method == "kmer") && (!needToGenerate)) {\r
- ifstream kmerFileTest(kmerDBName.c_str());\r
- search->readKmerDB(kmerFileTest); \r
- }\r
- \r
- search->setNumSeqs(numSeqs);\r
- }\r
- }\r
- catch(exception& e) {\r
- m->errorOut(e, "AlignmentDB", "AlignmentDB");\r
- exit(1);\r
- }\r
-}\r
-/**************************************************************************************************/\r
-AlignmentDB::AlignmentDB(string s){ \r
- try { \r
- m = MothurOut::getInstance();\r
- method = s;\r
- \r
- if(method == "suffix") { search = new SuffixDB(); }\r
- else if(method == "blast") { search = new BlastDB(); }\r
- else { search = new KmerDB(); }\r
-\r
- \r
- //in case you delete the seqs and then ask for them\r
- emptySequence = Sequence();\r
- emptySequence.setName("no_match");\r
- emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");\r
- emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");\r
- \r
- }\r
- catch(exception& e) {\r
- m->errorOut(e, "AlignmentDB", "AlignmentDB");\r
- exit(1);\r
- }\r
-}\r
-/**************************************************************************************************/\r
-AlignmentDB::~AlignmentDB() { delete search; }\r
-/**************************************************************************************************/\r
-Sequence AlignmentDB::findClosestSequence(Sequence* seq) {\r
- try{\r
- \r
- vector<int> spot = search->findClosestSequences(seq, 1);\r
-\r
- if (spot.size() != 0) { return templateSequences[spot[0]]; }\r
- else { return emptySequence; }\r
- \r
- }\r
- catch(exception& e) {\r
- m->errorOut(e, "AlignmentDB", "findClosestSequence");\r
- exit(1);\r
- }\r
-}\r
-/**************************************************************************************************/\r
-\r
-\r
-\r
-\r
-\r
-\r
+/*
+ * alignmentdb.cpp
+ * Mothur
+ *
+ * Created by westcott on 11/4/09.
+ * Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "alignmentdb.h"
+#include "kmerdb.hpp"
+#include "suffixdb.hpp"
+#include "blastdb.hpp"
+#include "referencedb.h"
+
+/**************************************************************************************************/
+AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch, int tid){ // This assumes that the template database is in fasta format, may
+ try { // need to alter this in the future?
+ m = MothurOut::getInstance();
+ longest = 0;
+ method = s;
+ bool needToGenerate = true;
+ ReferenceDB* rdb = ReferenceDB::getInstance();
+ bool silent = false;
+ threadID = tid;
+
+ if (fastaFileName == "saved-silent") {
+ fastaFileName = "saved"; silent = true;
+ }
+
+ if (fastaFileName == "saved") {
+ int start = time(NULL);
+
+ if (!silent) { m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + " that are saved in memory."); m->mothurOutEndLine(); }
+
+ for (int i = 0; i < rdb->referenceSeqs.size(); i++) {
+ templateSequences.push_back(rdb->referenceSeqs[i]);
+ //save longest base
+ if (rdb->referenceSeqs[i].getUnaligned().length() >= longest) { longest = (rdb->referenceSeqs[i].getUnaligned().length()+1); }
+ }
+ fastaFileName = rdb->getSavedReference();
+
+ numSeqs = templateSequences.size();
+ if (!silent) { m->mothurOut("It took " + toString(time(NULL) - start) + " to load " + toString(rdb->referenceSeqs.size()) + " sequences.");m->mothurOutEndLine(); }
+
+ }else {
+ int start = time(NULL);
+ m->mothurOutEndLine();
+ m->mothurOut("Reading in the " + fastaFileName + " template sequences...\t"); cout.flush();
+
+ #ifdef USE_MPI
+ int pid, processors;
+ vector<unsigned long long> positions;
+
+ MPI_Status status;
+ MPI_File inMPI;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+ MPI_Comm_size(MPI_COMM_WORLD, &processors);
+ int tag = 2001;
+
+ char inFileName[1024];
+ strcpy(inFileName, fastaFileName.c_str());
+
+ MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer
+
+ if (pid == 0) {
+ positions = m->setFilePosFasta(fastaFileName, numSeqs); //fills MPIPos, returns numSeqs
+
+ //send file positions to all processes
+ for(int i = 1; i < processors; i++) {
+ MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+ MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+ }
+ }else{
+ MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+ positions.resize(numSeqs+1);
+ MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
+ }
+
+ //read file
+ for(int i=0;i<numSeqs;i++){
+
+ if (m->control_pressed) { templateSequences.clear(); break; }
+
+ //read next sequence
+ int length = positions[i+1] - positions[i];
+ char* buf4 = new char[length];
+
+ MPI_File_read_at(inMPI, positions[i], buf4, length, MPI_CHAR, &status);
+
+ string tempBuf = buf4;
+ if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
+ delete buf4;
+
+ istringstream iss (tempBuf,istringstream::in);
+
+ Sequence temp(iss);
+ if (temp.getName() != "") {
+ templateSequences.push_back(temp);
+
+ if (rdb->save) { rdb->referenceSeqs.push_back(temp); }
+
+ //save longest base
+ if (temp.getUnaligned().length() >= longest) { longest = temp.getUnaligned().length()+1; }
+ }
+ }
+
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
+
+ MPI_File_close(&inMPI);
+
+ #else
+ ifstream fastaFile;
+ m->openInputFile(fastaFileName, fastaFile);
+
+ while (!fastaFile.eof()) {
+ Sequence temp(fastaFile); m->gobble(fastaFile);
+
+ if (m->control_pressed) { templateSequences.clear(); break; }
+
+ if (temp.getName() != "") {
+ templateSequences.push_back(temp);
+
+ if (rdb->save) { rdb->referenceSeqs.push_back(temp); }
+
+ //save longest base
+ if (temp.getUnaligned().length() >= longest) { longest = (temp.getUnaligned().length()+1); }
+ }
+ }
+ fastaFile.close();
+ #endif
+
+ numSeqs = templateSequences.size();
+ //all of this is elsewhere already!
+
+ m->mothurOut("DONE.");
+ m->mothurOutEndLine(); cout.flush();
+ m->mothurOut("It took " + toString(time(NULL) - start) + " to read " + toString(templateSequences.size()) + " sequences."); m->mothurOutEndLine();
+
+ }
+
+
+ //in case you delete the seqs and then ask for them
+ emptySequence = Sequence();
+ emptySequence.setName("no_match");
+ emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
+ emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
+
+
+ string kmerDBName;
+ if(method == "kmer") {
+ search = new KmerDB(fastaFileName, kmerSize);
+
+ #ifdef USE_MPI
+ #else
+ kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
+ ifstream kmerFileTest(kmerDBName.c_str());
+
+ if(kmerFileTest){
+ bool GoodFile = m->checkReleaseVersion(kmerFileTest, m->getVersion());
+ if (GoodFile) { needToGenerate = false; }
+ }
+ #endif
+ }
+ else if(method == "suffix") { search = new SuffixDB(numSeqs); }
+ else if(method == "blast") { search = new BlastDB(fastaFileName.substr(0,fastaFileName.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch, "", threadID); }
+ else {
+ method = "kmer";
+ m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");
+ m->mothurOutEndLine();
+ search = new KmerDB(fastaFileName, 8);
+ }
+
+ if (!(m->control_pressed)) {
+ if (needToGenerate) {
+ //add sequences to search
+ for (int i = 0; i < templateSequences.size(); i++) {
+ search->addSequence(templateSequences[i]);
+
+ if (m->control_pressed) { templateSequences.clear(); break; }
+ }
+
+ if (m->control_pressed) { templateSequences.clear(); }
+
+ search->generateDB();
+
+ }else if ((method == "kmer") && (!needToGenerate)) {
+ ifstream kmerFileTest(kmerDBName.c_str());
+ search->readKmerDB(kmerFileTest);
+ }
+
+ search->setNumSeqs(numSeqs);
+ }
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "AlignmentDB", "AlignmentDB");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+AlignmentDB::AlignmentDB(string s){
+ try {
+ m = MothurOut::getInstance();
+ method = s;
+
+ if(method == "suffix") { search = new SuffixDB(); }
+ else if(method == "blast") { search = new BlastDB("", 0); }
+ else { search = new KmerDB(); }
+
+
+ //in case you delete the seqs and then ask for them
+ emptySequence = Sequence();
+ emptySequence.setName("no_match");
+ emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
+ emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "AlignmentDB", "AlignmentDB");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+AlignmentDB::~AlignmentDB() { delete search; }
+/**************************************************************************************************/
+Sequence AlignmentDB::findClosestSequence(Sequence* seq) {
+ try{
+
+ vector<int> spot = search->findClosestSequences(seq, 1);
+
+ if (spot.size() != 0) { return templateSequences[spot[0]]; }
+ else { return emptySequence; }
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "AlignmentDB", "findClosestSequence");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+
+
+
+
+
+