5 * Created by westcott on 11/4/09.
\r
6 * Copyright 2009 Schloss Lab. All rights reserved.
\r
10 #include "alignmentdb.h"
\r
11 #include "kmerdb.hpp"
\r
12 #include "suffixdb.hpp"
\r
13 #include "blastdb.hpp"
\r
16 /**************************************************************************************************/
\r
17 AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch){ // This assumes that the template database is in fasta format, may
\r
18 try { // need to alter this in the future?
\r
19 m = MothurOut::getInstance();
\r
22 bool needToGenerate = true;
\r
24 m->mothurOutEndLine();
\r
25 m->mothurOut("Reading in the " + fastaFileName + " template sequences...\t"); cout.flush();
\r
29 vector<long> positions;
\r
33 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
\r
35 char inFileName[1024];
\r
36 strcpy(inFileName, fastaFileName.c_str());
\r
38 MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer
\r
41 positions = setFilePosFasta(fastaFileName, numSeqs); //fills MPIPos, returns numSeqs
\r
43 //send file positions to all processes
\r
44 MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //send numSeqs
\r
45 MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos
\r
47 MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
\r
48 positions.resize(numSeqs+1);
\r
49 MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
\r
53 for(int i=0;i<numSeqs;i++){
\r
55 if (m->control_pressed) { templateSequences.clear(); break; }
\r
57 //read next sequence
\r
58 int length = positions[i+1] - positions[i];
\r
59 char* buf4 = new char[length];
\r
61 MPI_File_read_at(inMPI, positions[i], buf4, length, MPI_CHAR, &status);
\r
63 string tempBuf = buf4;
\r
64 if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
\r
67 istringstream iss (tempBuf,istringstream::in);
\r
69 Sequence temp(iss);
\r
70 if (temp.getName() != "") {
\r
71 templateSequences.push_back(temp);
\r
73 if (temp.getUnaligned().length() > longest) { longest = temp.getUnaligned().length()+1; }
\r
77 MPI_File_close(&inMPI);
\r
81 openInputFile(fastaFileName, fastaFile);
\r
83 while (!fastaFile.eof()) {
\r
84 Sequence temp(fastaFile); gobble(fastaFile);
\r
86 if (m->control_pressed) { templateSequences.clear(); break; }
\r
88 if (temp.getName() != "") {
\r
89 templateSequences.push_back(temp);
\r
91 if (temp.getUnaligned().length() > longest) { longest = temp.getUnaligned().length()+1; }
\r
98 numSeqs = templateSequences.size();
\r
99 //all of this is elsewhere already!
\r
101 m->mothurOut("DONE.");
\r
102 m->mothurOutEndLine(); cout.flush();
\r
104 //in case you delete the seqs and then ask for them
\r
105 emptySequence = Sequence();
\r
106 emptySequence.setName("no_match");
\r
107 emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
\r
108 emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
\r
112 if(method == "kmer") {
\r
113 search = new KmerDB(fastaFileName, kmerSize);
\r
117 kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
\r
118 ifstream kmerFileTest(kmerDBName.c_str());
\r
120 if(kmerFileTest){ needToGenerate = false; }
\r
123 else if(method == "suffix") { search = new SuffixDB(numSeqs); }
\r
124 else if(method == "blast") { search = new BlastDB(gapOpen, gapExtend, match, misMatch); }
\r
126 m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");
\r
127 m->mothurOutEndLine();
\r
128 search = new KmerDB(fastaFileName, 8);
\r
131 if (!(m->control_pressed)) {
\r
132 if (needToGenerate) {
\r
133 //add sequences to search
\r
134 for (int i = 0; i < templateSequences.size(); i++) {
\r
135 search->addSequence(templateSequences[i]);
\r
137 if (m->control_pressed) { templateSequences.clear(); break; }
\r
140 if (m->control_pressed) { templateSequences.clear(); }
\r
142 search->generateDB();
\r
144 }else if ((method == "kmer") && (!needToGenerate)) {
\r
145 ifstream kmerFileTest(kmerDBName.c_str());
\r
146 search->readKmerDB(kmerFileTest);
\r
149 search->setNumSeqs(numSeqs);
\r
152 catch(exception& e) {
\r
153 m->errorOut(e, "AlignmentDB", "AlignmentDB");
\r
157 /**************************************************************************************************/
\r
158 AlignmentDB::AlignmentDB(string s){
\r
160 m = MothurOut::getInstance();
\r
163 if(method == "suffix") { search = new SuffixDB(); }
\r
164 else if(method == "blast") { search = new BlastDB(); }
\r
165 else { search = new KmerDB(); }
\r
168 //in case you delete the seqs and then ask for them
\r
169 emptySequence = Sequence();
\r
170 emptySequence.setName("no_match");
\r
171 emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
\r
172 emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
\r
175 catch(exception& e) {
\r
176 m->errorOut(e, "AlignmentDB", "AlignmentDB");
\r
180 /**************************************************************************************************/
\r
181 AlignmentDB::~AlignmentDB() { delete search; }
\r
182 /**************************************************************************************************/
\r
183 Sequence AlignmentDB::findClosestSequence(Sequence* seq) {
\r
186 vector<int> spot = search->findClosestSequences(seq, 1);
\r
188 if (spot.size() != 0) { return templateSequences[spot[0]]; }
\r
189 else { return emptySequence; }
\r
192 catch(exception& e) {
\r
193 m->errorOut(e, "AlignmentDB", "findClosestSequence");
\r
198 /**************************************************************************************************/
\r
199 int AlignmentDB::MPISend(int receiver) {
\r
202 //send numSeqs - int
\r
203 MPI_Send(&numSeqs, 1, MPI_INT, receiver, 2001, MPI_COMM_WORLD);
\r
205 //send longest - int
\r
206 MPI_Send(&longest, 1, MPI_INT, receiver, 2001, MPI_COMM_WORLD);
\r
208 //send templateSequences
\r
209 for (int i = 0; i < templateSequences.size(); i++) {
\r
210 templateSequences[i].MPISend(receiver);
\r
214 search->MPISend(receiver);
\r
218 catch(exception& e) {
\r
219 m->errorOut(e, "AlignmentDB", "MPISend");
\r
223 /**************************************************************************************************/
\r
224 int AlignmentDB::MPIRecv(int sender) {
\r
227 //receive numSeqs - int
\r
228 MPI_Recv(&numSeqs, 1, MPI_INT, sender, 2001, MPI_COMM_WORLD, &status);
\r
230 //receive longest - int
\r
231 MPI_Recv(&longest, 1, MPI_INT, sender, 2001, MPI_COMM_WORLD, &status);
\r
233 //receive templateSequences
\r
234 templateSequences.resize(numSeqs);
\r
235 for (int i = 0; i < templateSequences.size(); i++) {
\r
236 templateSequences[i].MPIRecv(sender);
\r
240 search->MPIRecv(sender);
\r
242 for (int i = 0; i < templateSequences.size(); i++) {
\r
243 search->addSequence(templateSequences[i]);
\r
245 search->generateDB();
\r
246 search->setNumSeqs(numSeqs);
\r
250 catch(exception& e) {
\r
251 m->errorOut(e, "AlignmentDB", "MPIRecv");
\r
256 /**************************************************************************************************/
\r