]> git.donarmstrong.com Git - mothur.git/blobdiff - alignmentdb.cpp
1.9
[mothur.git] / alignmentdb.cpp
index 4b324b49c0f081356ab84a119e9657b05199dcb4..1ea1b9803e16b306bc2ea9deeb310eeff1d5fc06 100644 (file)
-/*
- *  alignmentdb.cpp
- *  Mothur
- *
- *  Created by westcott on 11/4/09.
- *  Copyright 2009 Schloss Lab. All rights reserved.
- *
- */
-
-#include "alignmentdb.h"
-#include "kmerdb.hpp"
-#include "suffixdb.hpp"
-#include "blastdb.hpp"
-
-
-/**************************************************************************************************/
-AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch){           //      This assumes that the template database is in fasta format, may 
-       try {                                                                                   //      need to alter this in the future?
-               m = MothurOut::getInstance();
-               longest = 0;
-               method = s;
-               bool needToGenerate = true;
-               
-               m->mothurOutEndLine();
-               m->mothurOut("Reading in the " + fastaFileName + " template sequences...\t");   cout.flush();
-               
-               #ifdef USE_MPI  
-                       int pid;
-                       vector<long> positions;
-               
-                       MPI_Status status; 
-                       MPI_File inMPI;
-                       MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
-       
-                       char inFileName[fastaFileName.length()];
-                       strcpy(inFileName, fastaFileName.c_str());
-       
-                       MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
-                       
-                       if (pid == 0) {
-                               positions = setFilePosFasta(fastaFileName, numSeqs); //fills MPIPos, returns numSeqs
-
-                               //send file positions to all processes
-                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs
-                               MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos     
-                       }else{
-                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
-                               positions.resize(numSeqs+1);
-                               MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
-                       }
-                       
-                       //read file 
-                       for(int i=0;i<numSeqs;i++){
-                               
-                               if (m->control_pressed) {  templateSequences.clear(); break;  }
-                               
-                               //read next sequence
-                               int length = positions[i+1] - positions[i];
-                               char buf4[length];
-                               MPI_File_read_at(inMPI, positions[i], buf4, length, MPI_CHAR, &status);
-                               
-                               string tempBuf = buf4;
-                               if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
-                               
-                               istringstream iss (tempBuf,istringstream::in);
-               
-                               Sequence temp(iss);  
-                               if (temp.getName() != "") {
-                                       templateSequences.push_back(temp);
-                                       //save longest base
-                                       if (temp.getUnaligned().length() > longest)  { longest = temp.getUnaligned().length()+1; }
-                               }
-                       }
-                       
-                       MPI_File_close(&inMPI);
-       #else
-               ifstream fastaFile;
-               openInputFile(fastaFileName, fastaFile);
-
-               while (!fastaFile.eof()) {
-                       Sequence temp(fastaFile);  gobble(fastaFile);
-                       
-                       if (m->control_pressed) {  templateSequences.clear(); break;  }
-                       
-                       if (temp.getName() != "") {
-                               templateSequences.push_back(temp);
-                               //save longest base
-                               if (temp.getUnaligned().length() > longest)  { longest = temp.getUnaligned().length()+1; }
-                       }
-               }
-               fastaFile.close();
-               
-       #endif
-       
-               numSeqs = templateSequences.size();
-               //all of this is elsewhere already!
-               
-               m->mothurOut("DONE.");
-               m->mothurOutEndLine();  cout.flush();
-               
-               //in case you delete the seqs and then ask for them
-               emptySequence = Sequence();
-               emptySequence.setName("no_match");
-               emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
-               emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
-               
-               
-               string kmerDBName;
-               if(method == "kmer")                    {       
-                       search = new KmerDB(fastaFileName, kmerSize);                   
-                       
-                       #ifdef USE_MPI
-                       #else
-                               kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
-                               ifstream kmerFileTest(kmerDBName.c_str());
-                       
-                               if(kmerFileTest){       needToGenerate = false;         }
-                       #endif
-               }
-               else if(method == "suffix")             {       search = new SuffixDB(numSeqs);                                                         }
-               else if(method == "blast")              {       search = new BlastDB(gapOpen, gapExtend, match, misMatch);      }
-               else {
-                       m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");
-                       m->mothurOutEndLine();
-                       search = new KmerDB(fastaFileName, 8);
-               }
-               
-               if (!(m->control_pressed)) {
-                       if (needToGenerate) {
-                               //add sequences to search 
-                               for (int i = 0; i < templateSequences.size(); i++) {
-                                       search->addSequence(templateSequences[i]);
-                                       
-                                       if (m->control_pressed) {  templateSequences.clear(); break;  }
-                               }
-                               
-                               if (m->control_pressed) {  templateSequences.clear();  }
-                               
-                               search->generateDB();
-                               
-                       }else if ((method == "kmer") && (!needToGenerate)) {
-                               ifstream kmerFileTest(kmerDBName.c_str());
-                               search->readKmerDB(kmerFileTest);       
-                       }
-                       
-                       search->setNumSeqs(numSeqs);
-               }
-       }
-       catch(exception& e) {
-               m->errorOut(e, "AlignmentDB", "AlignmentDB");
-               exit(1);
-       }
-}
-/**************************************************************************************************/
-AlignmentDB::AlignmentDB(string s){             
-       try {                                                                                   
-               m = MothurOut::getInstance();
-               method = s;
-               
-               if(method == "suffix")          {       search = new SuffixDB();        }
-               else if(method == "blast")      {       search = new BlastDB();         }
-               else                                            {       search = new KmerDB();          }
-
-                               
-               //in case you delete the seqs and then ask for them
-               emptySequence = Sequence();
-               emptySequence.setName("no_match");
-               emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
-               emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
-               
-       }
-       catch(exception& e) {
-               m->errorOut(e, "AlignmentDB", "AlignmentDB");
-               exit(1);
-       }
-}
-/**************************************************************************************************/
-AlignmentDB::~AlignmentDB() {  delete search;  }
-/**************************************************************************************************/
-Sequence AlignmentDB::findClosestSequence(Sequence* seq) {
-       try{
-       
-               vector<int> spot = search->findClosestSequences(seq, 1);
-
-               if (spot.size() != 0)   {               return templateSequences[spot[0]];      }
-               else                                    {               return emptySequence;                           }
-               
-       }
-       catch(exception& e) {
-               m->errorOut(e, "AlignmentDB", "findClosestSequence");
-               exit(1);
-       }
-}
-#ifdef USE_MPI 
-/**************************************************************************************************/
-int AlignmentDB::MPISend(int receiver) {
-       try {
-               
-               //send numSeqs - int
-               MPI_Send(&numSeqs, 1, MPI_INT, receiver, 2001, MPI_COMM_WORLD); 
-                                                                       
-               //send longest - int
-               MPI_Send(&longest, 1, MPI_INT, receiver, 2001, MPI_COMM_WORLD); 
-       
-               //send templateSequences
-               for (int i = 0; i < templateSequences.size(); i++) {
-                       templateSequences[i].MPISend(receiver);
-               }
-               
-               //send Database
-               search->MPISend(receiver);
-               
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "AlignmentDB", "MPISend");
-               exit(1);
-       }
-}
-/**************************************************************************************************/
-int AlignmentDB::MPIRecv(int sender) {
-       try {
-               MPI_Status status;
-               //receive numSeqs - int
-               MPI_Recv(&numSeqs, 1, MPI_INT, sender, 2001, MPI_COMM_WORLD, &status);
-               
-               //receive longest - int
-               MPI_Recv(&longest, 1, MPI_INT, sender, 2001, MPI_COMM_WORLD, &status);
-
-               //receive templateSequences
-               templateSequences.resize(numSeqs);
-               for (int i = 0; i < templateSequences.size(); i++) {
-                       templateSequences[i].MPIRecv(sender);
-               }
-
-               //receive Database
-               search->MPIRecv(sender);
-       
-               for (int i = 0; i < templateSequences.size(); i++) {
-                       search->addSequence(templateSequences[i]);
-               }
-               search->generateDB();
-               search->setNumSeqs(numSeqs);
-
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "AlignmentDB", "MPIRecv");
-               exit(1);
-       }
-}
-#endif
-/**************************************************************************************************/
-
-
-
-
-
-
+/*\r
+ *  alignmentdb.cpp\r
+ *  Mothur\r
+ *\r
+ *  Created by westcott on 11/4/09.\r
+ *  Copyright 2009 Schloss Lab. All rights reserved.\r
+ *\r
+ */\r
+\r
+#include "alignmentdb.h"\r
+#include "kmerdb.hpp"\r
+#include "suffixdb.hpp"\r
+#include "blastdb.hpp"\r
+\r
+\r
+/**************************************************************************************************/\r
+AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch){           //      This assumes that the template database is in fasta format, may \r
+       try {                                                                                   //      need to alter this in the future?\r
+               m = MothurOut::getInstance();\r
+               longest = 0;\r
+               method = s;\r
+               bool needToGenerate = true;\r
+               \r
+               m->mothurOutEndLine();\r
+               m->mothurOut("Reading in the " + fastaFileName + " template sequences...\t");   cout.flush();\r
+               \r
+               #ifdef USE_MPI  \r
+                       int pid;\r
+                       vector<long> positions;\r
+               \r
+                       MPI_Status status; \r
+                       MPI_File inMPI;\r
+                       MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are\r
+       \r
+                       char inFileName[1024];\r
+                       strcpy(inFileName, fastaFileName.c_str());\r
+       \r
+                       MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer\r
+                       \r
+                       if (pid == 0) {\r
+                               positions = setFilePosFasta(fastaFileName, numSeqs); //fills MPIPos, returns numSeqs\r
+\r
+                               //send file positions to all processes\r
+                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs\r
+                               MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos     \r
+                       }else{\r
+                               MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs\r
+                               positions.resize(numSeqs+1);\r
+                               MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions\r
+                       }\r
+               \r
+                       //read file \r
+                       for(int i=0;i<numSeqs;i++){\r
+                               \r
+                               if (m->control_pressed) {  templateSequences.clear(); break;  }\r
+                               \r
+                               //read next sequence\r
+                               int length = positions[i+1] - positions[i];\r
+                               char* buf4 = new char[length];\r
+                       \r
+                               MPI_File_read_at(inMPI, positions[i], buf4, length, MPI_CHAR, &status);\r
+               \r
+                               string tempBuf = buf4;\r
+                               if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }\r
+                               delete buf4;\r
+\r
+                               istringstream iss (tempBuf,istringstream::in);\r
+               \r
+                               Sequence temp(iss);  \r
+                               if (temp.getName() != "") {\r
+                                       templateSequences.push_back(temp);\r
+                                       //save longest base\r
+                                       if (temp.getUnaligned().length() > longest)  { longest = temp.getUnaligned().length()+1; }\r
+                               }\r
+                       }\r
+               \r
+                       MPI_File_close(&inMPI);\r
+               \r
+       #else\r
+               ifstream fastaFile;\r
+               openInputFile(fastaFileName, fastaFile);\r
+\r
+               while (!fastaFile.eof()) {\r
+                       Sequence temp(fastaFile);  gobble(fastaFile);\r
+                       \r
+                       if (m->control_pressed) {  templateSequences.clear(); break;  }\r
+                       \r
+                       if (temp.getName() != "") {\r
+                               templateSequences.push_back(temp);\r
+                               //save longest base\r
+                               if (temp.getUnaligned().length() > longest)  { longest = temp.getUnaligned().length()+1; }\r
+                       }\r
+               }\r
+               fastaFile.close();\r
+               \r
+       #endif\r
+       \r
+               numSeqs = templateSequences.size();\r
+               //all of this is elsewhere already!\r
+               \r
+               m->mothurOut("DONE.");\r
+               m->mothurOutEndLine();  cout.flush();\r
+               \r
+               //in case you delete the seqs and then ask for them\r
+               emptySequence = Sequence();\r
+               emptySequence.setName("no_match");\r
+               emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");\r
+               emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");\r
+               \r
+               \r
+               string kmerDBName;\r
+               if(method == "kmer")                    {       \r
+                       search = new KmerDB(fastaFileName, kmerSize);                   \r
+                       \r
+                       #ifdef USE_MPI\r
+                       #else\r
+                               kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";\r
+                               ifstream kmerFileTest(kmerDBName.c_str());\r
+                       \r
+                               if(kmerFileTest){       needToGenerate = false;         }\r
+                       #endif\r
+               }\r
+               else if(method == "suffix")             {       search = new SuffixDB(numSeqs);                                                         }\r
+               else if(method == "blast")              {       search = new BlastDB(gapOpen, gapExtend, match, misMatch);      }\r
+               else {\r
+                       m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");\r
+                       m->mothurOutEndLine();\r
+                       search = new KmerDB(fastaFileName, 8);\r
+               }\r
+               \r
+               if (!(m->control_pressed)) {\r
+                       if (needToGenerate) {\r
+                               //add sequences to search \r
+                               for (int i = 0; i < templateSequences.size(); i++) {\r
+                                       search->addSequence(templateSequences[i]);\r
+                                       \r
+                                       if (m->control_pressed) {  templateSequences.clear(); break;  }\r
+                               }\r
+                               \r
+                               if (m->control_pressed) {  templateSequences.clear();  }\r
+                               \r
+                               search->generateDB();\r
+                               \r
+                       }else if ((method == "kmer") && (!needToGenerate)) {\r
+                               ifstream kmerFileTest(kmerDBName.c_str());\r
+                               search->readKmerDB(kmerFileTest);       \r
+                       }\r
+                       \r
+                       search->setNumSeqs(numSeqs);\r
+               }\r
+       }\r
+       catch(exception& e) {\r
+               m->errorOut(e, "AlignmentDB", "AlignmentDB");\r
+               exit(1);\r
+       }\r
+}\r
+/**************************************************************************************************/\r
+AlignmentDB::AlignmentDB(string s){             \r
+       try {                                                                                   \r
+               m = MothurOut::getInstance();\r
+               method = s;\r
+               \r
+               if(method == "suffix")          {       search = new SuffixDB();        }\r
+               else if(method == "blast")      {       search = new BlastDB();         }\r
+               else                                            {       search = new KmerDB();          }\r
+\r
+                               \r
+               //in case you delete the seqs and then ask for them\r
+               emptySequence = Sequence();\r
+               emptySequence.setName("no_match");\r
+               emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");\r
+               emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");\r
+               \r
+       }\r
+       catch(exception& e) {\r
+               m->errorOut(e, "AlignmentDB", "AlignmentDB");\r
+               exit(1);\r
+       }\r
+}\r
+/**************************************************************************************************/\r
+AlignmentDB::~AlignmentDB() {  delete search;  }\r
+/**************************************************************************************************/\r
+Sequence AlignmentDB::findClosestSequence(Sequence* seq) {\r
+       try{\r
+       \r
+               vector<int> spot = search->findClosestSequences(seq, 1);\r
+\r
+               if (spot.size() != 0)   {               return templateSequences[spot[0]];      }\r
+               else                                    {               return emptySequence;                           }\r
+               \r
+       }\r
+       catch(exception& e) {\r
+               m->errorOut(e, "AlignmentDB", "findClosestSequence");\r
+               exit(1);\r
+       }\r
+}\r
+#ifdef USE_MPI \r
+/**************************************************************************************************/\r
+int AlignmentDB::MPISend(int receiver) {\r
+       try {\r
+               \r
+               //send numSeqs - int\r
+               MPI_Send(&numSeqs, 1, MPI_INT, receiver, 2001, MPI_COMM_WORLD); \r
+                                                                       \r
+               //send longest - int\r
+               MPI_Send(&longest, 1, MPI_INT, receiver, 2001, MPI_COMM_WORLD); \r
+       \r
+               //send templateSequences\r
+               for (int i = 0; i < templateSequences.size(); i++) {\r
+                       templateSequences[i].MPISend(receiver);\r
+               }\r
+               \r
+               //send Database\r
+               search->MPISend(receiver);\r
+               \r
+               return 0;\r
+       }\r
+       catch(exception& e) {\r
+               m->errorOut(e, "AlignmentDB", "MPISend");\r
+               exit(1);\r
+       }\r
+}\r
+/**************************************************************************************************/\r
+int AlignmentDB::MPIRecv(int sender) {\r
+       try {\r
+               MPI_Status status;\r
+               //receive numSeqs - int\r
+               MPI_Recv(&numSeqs, 1, MPI_INT, sender, 2001, MPI_COMM_WORLD, &status);\r
+               \r
+               //receive longest - int\r
+               MPI_Recv(&longest, 1, MPI_INT, sender, 2001, MPI_COMM_WORLD, &status);\r
+\r
+               //receive templateSequences\r
+               templateSequences.resize(numSeqs);\r
+               for (int i = 0; i < templateSequences.size(); i++) {\r
+                       templateSequences[i].MPIRecv(sender);\r
+               }\r
+\r
+               //receive Database\r
+               search->MPIRecv(sender);\r
+       \r
+               for (int i = 0; i < templateSequences.size(); i++) {\r
+                       search->addSequence(templateSequences[i]);\r
+               }\r
+               search->generateDB();\r
+               search->setNumSeqs(numSeqs);\r
+\r
+               return 0;\r
+       }\r
+       catch(exception& e) {\r
+               m->errorOut(e, "AlignmentDB", "MPIRecv");\r
+               exit(1);\r
+       }\r
+}\r
+#endif\r
+/**************************************************************************************************/\r
+\r
+\r
+\r
+\r
+\r
+\r