]> git.donarmstrong.com Git - mothur.git/blobdiff - alignmentdb.cpp
changes while testing
[mothur.git] / alignmentdb.cpp
index 364ef12588223c0300eccd767aac6f17e6630aa1..9fec737778e01704e98f5c813e3c0554e7ab0981 100644 (file)
-/*\r
- *  alignmentdb.cpp\r
- *  Mothur\r
- *\r
- *  Created by westcott on 11/4/09.\r
- *  Copyright 2009 Schloss Lab. All rights reserved.\r
- *\r
- */\r
-\r
-#include "alignmentdb.h"\r
-#include "kmerdb.hpp"\r
-#include "suffixdb.hpp"\r
-#include "blastdb.hpp"\r
-\r
-\r
-/**************************************************************************************************/\r
-AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch){           //      This assumes that the template database is in fasta format, may \r
-       try {                                                                                   //      need to alter this in the future?\r
-               m = MothurOut::getInstance();\r
-               longest = 0;\r
-               method = s;\r
-               bool needToGenerate = true;\r
-               \r
-               m->mothurOutEndLine();\r
-               m->mothurOut("Reading in the " + fastaFileName + " template sequences...\t");   cout.flush();\r
-               \r
-               #ifdef USE_MPI  \r
-                       int pid;\r
-                       vector<long> positions;\r
-               \r
-                       MPI_Status status; \r
-                       MPI_File inMPI;\r
-                       MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are\r
-       \r
-                       char inFileName[1024];\r
-                       strcpy(inFileName, fastaFileName.c_str());\r
-       \r
-                       MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer\r
-                       \r
-                       if (pid == 0) {\r
-                               positions = setFilePosFasta(fastaFileName, numSeqs); //fills MPIPos, returns numSeqs\r
-\r
-                               //send file positions to all processes\r
-                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs\r
-                               MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos     \r
-                       }else{\r
-                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs\r
-                               positions.resize(numSeqs+1);\r
-                               MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions\r
-                       }\r
-               \r
-                       //read file \r
-                       for(int i=0;i<numSeqs;i++){\r
-                               \r
-                               if (m->control_pressed) {  templateSequences.clear(); break;  }\r
-                               \r
-                               //read next sequence\r
-                               int length = positions[i+1] - positions[i];\r
-                               char* buf4 = new char[length];\r
-                       \r
-                               MPI_File_read_at(inMPI, positions[i], buf4, length, MPI_CHAR, &status);\r
-               \r
-                               string tempBuf = buf4;\r
-                               if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }\r
-                               delete buf4;\r
-\r
-                               istringstream iss (tempBuf,istringstream::in);\r
-               \r
-                               Sequence temp(iss);  \r
-                               if (temp.getName() != "") {\r
-                                       templateSequences.push_back(temp);\r
-                                       //save longest base\r
-                                       if (temp.getUnaligned().length() > longest)  { longest = temp.getUnaligned().length()+1; }\r
-                               }\r
-                       }\r
-               \r
-                       MPI_File_close(&inMPI);\r
-               \r
-       #else\r
-               ifstream fastaFile;\r
-               openInputFile(fastaFileName, fastaFile);\r
-\r
-               while (!fastaFile.eof()) {\r
-                       Sequence temp(fastaFile);  gobble(fastaFile);\r
-                       \r
-                       if (m->control_pressed) {  templateSequences.clear(); break;  }\r
-                       \r
-                       if (temp.getName() != "") {\r
-                               templateSequences.push_back(temp);\r
-                               //save longest base\r
-                               if (temp.getUnaligned().length() > longest)  { longest = temp.getUnaligned().length()+1; }\r
-                       }\r
-               }\r
-               fastaFile.close();\r
-               \r
-       #endif\r
-       \r
-               numSeqs = templateSequences.size();\r
-               //all of this is elsewhere already!\r
-               \r
-               m->mothurOut("DONE.");\r
-               m->mothurOutEndLine();  cout.flush();\r
-               \r
-               //in case you delete the seqs and then ask for them\r
-               emptySequence = Sequence();\r
-               emptySequence.setName("no_match");\r
-               emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");\r
-               emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");\r
-               \r
-               \r
-               string kmerDBName;\r
-               if(method == "kmer")                    {       \r
-                       search = new KmerDB(fastaFileName, kmerSize);                   \r
-                       \r
-                       #ifdef USE_MPI\r
-                       #else\r
-                               kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";\r
-                               ifstream kmerFileTest(kmerDBName.c_str());\r
-                       \r
-                               if(kmerFileTest){       needToGenerate = false;         }\r
-                       #endif\r
-               }\r
-               else if(method == "suffix")             {       search = new SuffixDB(numSeqs);                                                         }\r
-               else if(method == "blast")              {       search = new BlastDB(gapOpen, gapExtend, match, misMatch);      }\r
-               else {\r
-                       m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");\r
-                       m->mothurOutEndLine();\r
-                       search = new KmerDB(fastaFileName, 8);\r
-               }\r
-               \r
-               if (!(m->control_pressed)) {\r
-                       if (needToGenerate) {\r
-                               //add sequences to search \r
-                               for (int i = 0; i < templateSequences.size(); i++) {\r
-                                       search->addSequence(templateSequences[i]);\r
-                                       \r
-                                       if (m->control_pressed) {  templateSequences.clear(); break;  }\r
-                               }\r
-                               \r
-                               if (m->control_pressed) {  templateSequences.clear();  }\r
-                               \r
-                               search->generateDB();\r
-                               \r
-                       }else if ((method == "kmer") && (!needToGenerate)) {\r
-                               ifstream kmerFileTest(kmerDBName.c_str());\r
-                               search->readKmerDB(kmerFileTest);       \r
-                       }\r
-                       \r
-                       search->setNumSeqs(numSeqs);\r
-               }\r
-       }\r
-       catch(exception& e) {\r
-               m->errorOut(e, "AlignmentDB", "AlignmentDB");\r
-               exit(1);\r
-       }\r
-}\r
-/**************************************************************************************************/\r
-AlignmentDB::AlignmentDB(string s){             \r
-       try {                                                                                   \r
-               m = MothurOut::getInstance();\r
-               method = s;\r
-               \r
-               if(method == "suffix")          {       search = new SuffixDB();        }\r
-               else if(method == "blast")      {       search = new BlastDB();         }\r
-               else                                            {       search = new KmerDB();          }\r
-\r
-                               \r
-               //in case you delete the seqs and then ask for them\r
-               emptySequence = Sequence();\r
-               emptySequence.setName("no_match");\r
-               emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");\r
-               emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");\r
-               \r
-       }\r
-       catch(exception& e) {\r
-               m->errorOut(e, "AlignmentDB", "AlignmentDB");\r
-               exit(1);\r
-       }\r
-}\r
-/**************************************************************************************************/\r
-AlignmentDB::~AlignmentDB() {  delete search;  }\r
-/**************************************************************************************************/\r
-Sequence AlignmentDB::findClosestSequence(Sequence* seq) {\r
-       try{\r
-       \r
-               vector<int> spot = search->findClosestSequences(seq, 1);\r
-\r
-               if (spot.size() != 0)   {               return templateSequences[spot[0]];      }\r
-               else                                    {               return emptySequence;                           }\r
-               \r
-       }\r
-       catch(exception& e) {\r
-               m->errorOut(e, "AlignmentDB", "findClosestSequence");\r
-               exit(1);\r
-       }\r
-}\r
-/**************************************************************************************************/\r
-\r
-\r
-\r
-\r
-\r
-\r
+/*
+ *  alignmentdb.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 11/4/09.
+ *  Copyright 2009 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "alignmentdb.h"
+#include "kmerdb.hpp"
+#include "suffixdb.hpp"
+#include "blastdb.hpp"
+#include "referencedb.h"
+
+/**************************************************************************************************/
+AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch, int tid){          //      This assumes that the template database is in fasta format, may 
+       try {                                                                                   //      need to alter this in the future?
+               m = MothurOut::getInstance();
+               longest = 0;
+               method = s;
+               bool needToGenerate = true;
+               ReferenceDB* rdb = ReferenceDB::getInstance();
+               bool silent = false;
+               threadID = tid;
+               
+               if (fastaFileName == "saved-silent") {
+                       fastaFileName = "saved"; silent = true;
+               }
+               
+               if (fastaFileName == "saved") {
+                       int start = time(NULL);
+                       
+                       if (!silent) { m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + " that are saved in memory."); m->mothurOutEndLine(); }
+
+                       for (int i = 0; i < rdb->referenceSeqs.size(); i++) {
+                               templateSequences.push_back(rdb->referenceSeqs[i]);
+                               //save longest base
+                               if (rdb->referenceSeqs[i].getUnaligned().length() >= longest)  { longest = (rdb->referenceSeqs[i].getUnaligned().length()+1); }
+                       }
+                       fastaFileName = rdb->getSavedReference();
+                       
+                       numSeqs = templateSequences.size();
+                       if (!silent) { m->mothurOut("It took " + toString(time(NULL) - start) + " to load " + toString(rdb->referenceSeqs.size()) + " sequences.");m->mothurOutEndLine();  }
+            
+               }else {
+                       int start = time(NULL);
+                       m->mothurOutEndLine();
+                       m->mothurOut("Reading in the " + fastaFileName + " template sequences...\t");   cout.flush();
+                       //bool aligned = false;
+            int tempLength = 0;
+            
+                       #ifdef USE_MPI  
+                               int pid, processors;
+                               vector<unsigned long long> positions;
+                       
+                               MPI_Status status; 
+                               MPI_File inMPI;
+                               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+                               MPI_Comm_size(MPI_COMM_WORLD, &processors);
+                               int tag = 2001;
+               
+                               char inFileName[1024];
+                               strcpy(inFileName, fastaFileName.c_str());
+               
+                               MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
+                               
+                               if (pid == 0) {
+                                       positions = m->setFilePosFasta(fastaFileName, numSeqs); //fills MPIPos, returns numSeqs
+
+                                       //send file positions to all processes
+                                       for(int i = 1; i < processors; i++) { 
+                                               MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                               MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+                                       }
+                               }else{
+                                       MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+                                       positions.resize(numSeqs+1);
+                                       MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
+                               }
+                       
+                               //read file 
+                               for(int i=0;i<numSeqs;i++){
+                                       
+                                       if (m->control_pressed) {  templateSequences.clear(); break;  }
+                                       
+                                       //read next sequence
+                                       int length = positions[i+1] - positions[i];
+                                       char* buf4 = new char[length];
+                               
+                                       MPI_File_read_at(inMPI, positions[i], buf4, length, MPI_CHAR, &status);
+                       
+                                       string tempBuf = buf4;
+                                       if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
+                                       delete buf4;
+
+                                       istringstream iss (tempBuf,istringstream::in);
+                       
+                                       Sequence temp(iss);  
+                                       if (temp.getName() != "") {
+                                               templateSequences.push_back(temp);
+                                               
+                                               if (rdb->save) { rdb->referenceSeqs.push_back(temp); }
+                                               
+                                               //save longest base
+                                               if (temp.getUnaligned().length() >= longest)  { longest = temp.getUnaligned().length()+1; }
+                        if (tempLength != 0) {
+                            if (tempLength != temp.getAligned().length()) { m->mothurOut("[ERROR]: template is not aligned, aborting.\n"); m->control_pressed=true; }
+                        }else { tempLength = temp.getAligned().length(); }
+                                       }
+                               }
+                               
+                               MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
+                               
+                               MPI_File_close(&inMPI);
+                       
+               #else
+                       ifstream fastaFile;
+                       m->openInputFile(fastaFileName, fastaFile);
+
+                       while (!fastaFile.eof()) {
+                               Sequence temp(fastaFile);  m->gobble(fastaFile);
+                               
+                               if (m->control_pressed) {  templateSequences.clear(); break;  }
+                               
+                               if (temp.getName() != "") {
+                                       templateSequences.push_back(temp);
+                                       
+                                       if (rdb->save) { rdb->referenceSeqs.push_back(temp); }
+                                       
+                                       //save longest base
+                                       if (temp.getUnaligned().length() >= longest)  { longest = (temp.getUnaligned().length()+1); }
+                    
+                    if (tempLength != 0) {
+                        if (tempLength != temp.getAligned().length()) { m->mothurOut("[ERROR]: template is not aligned, aborting.\n"); m->control_pressed=true; }
+                    }else { tempLength = temp.getAligned().length(); }
+                               }
+                       }
+                       fastaFile.close();
+               #endif
+               
+                       numSeqs = templateSequences.size();
+                       //all of this is elsewhere already!
+                       
+                       m->mothurOut("DONE.");
+                       m->mothurOutEndLine();  cout.flush();
+                       m->mothurOut("It took " + toString(time(NULL) - start) + " to read  " + toString(templateSequences.size()) + " sequences."); m->mothurOutEndLine();  
+
+               }
+               
+               
+               //in case you delete the seqs and then ask for them
+               emptySequence = Sequence();
+               emptySequence.setName("no_match");
+               emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
+               emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
+               
+               
+               string kmerDBName;
+               if(method == "kmer")                    {       
+                       search = new KmerDB(fastaFileName, kmerSize);                   
+                       
+                       #ifdef USE_MPI
+                       #else
+                               kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
+                               ifstream kmerFileTest(kmerDBName.c_str());
+                               
+                               if(kmerFileTest){       
+                                       bool GoodFile = m->checkReleaseVersion(kmerFileTest, m->getVersion());
+                                       if (GoodFile) {  needToGenerate = false;        }
+                               }
+                       #endif
+               }
+               else if(method == "suffix")             {       search = new SuffixDB(numSeqs);                                                         }
+               else if(method == "blast")              {       search = new BlastDB(fastaFileName.substr(0,fastaFileName.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch, "", threadID);     }
+               else {
+                       method = "kmer";
+                       m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");
+                       m->mothurOutEndLine();
+                       search = new KmerDB(fastaFileName, 8);
+               }
+               
+               if (!(m->control_pressed)) {
+                       if (needToGenerate) {
+                               //add sequences to search 
+                               for (int i = 0; i < templateSequences.size(); i++) {
+                                       search->addSequence(templateSequences[i]);
+                                       
+                                       if (m->control_pressed) {  templateSequences.clear(); break;  }
+                               }
+                               
+                               if (m->control_pressed) {  templateSequences.clear();  }
+                               
+                               search->generateDB();
+                               
+                       }else if ((method == "kmer") && (!needToGenerate)) {
+                               ifstream kmerFileTest(kmerDBName.c_str());
+                               search->readKmerDB(kmerFileTest);       
+                       }
+               
+                       search->setNumSeqs(numSeqs);
+               }
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "AlignmentDB", "AlignmentDB");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+AlignmentDB::AlignmentDB(string s){             
+       try {                                                                                   
+               m = MothurOut::getInstance();
+               method = s;
+               
+               if(method == "suffix")          {       search = new SuffixDB();        }
+               else if(method == "blast")      {       search = new BlastDB("", 0);            }
+               else                                            {       search = new KmerDB();          }
+
+                               
+               //in case you delete the seqs and then ask for them
+               emptySequence = Sequence();
+               emptySequence.setName("no_match");
+               emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
+               emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "AlignmentDB", "AlignmentDB");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+AlignmentDB::~AlignmentDB() {  delete search;  }
+/**************************************************************************************************/
+Sequence AlignmentDB::findClosestSequence(Sequence* seq) {
+       try{
+       
+               vector<int> spot = search->findClosestSequences(seq, 1);
+       
+               if (spot.size() != 0)   {               return templateSequences[spot[0]];      }
+               else                                    {               return emptySequence;                           }
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "AlignmentDB", "findClosestSequence");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+
+
+
+
+
+