5 * Created by westcott on 11/4/09.
6 * Copyright 2009 Schloss Lab. All rights reserved.
10 #include "alignmentdb.h"
12 #include "suffixdb.hpp"
13 #include "blastdb.hpp"
16 /**************************************************************************************************/
17 AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch){ // This assumes that the template database is in fasta format, may
18 try { // need to alter this in the future?
19 m = MothurOut::getInstance();
22 bool needToGenerate = true;
24 m->mothurOutEndLine();
25 m->mothurOut("Reading in the " + fastaFileName + " template sequences...\t"); cout.flush();
29 vector<long> positions;
33 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
35 char inFileName[fastaFileName.length()];
36 strcpy(inFileName, fastaFileName.c_str());
38 MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer
41 positions = setFilePosFasta(fastaFileName, numSeqs); //fills MPIPos, returns numSeqs
43 //send file positions to all processes
44 MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //send numSeqs
45 MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos
47 MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
48 positions.resize(numSeqs+1);
49 MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
53 for(int i=0;i<numSeqs;i++){
55 if (m->control_pressed) { templateSequences.clear(); break; }
58 int length = positions[i+1] - positions[i];
60 MPI_File_read_at(inMPI, positions[i], buf4, length, MPI_CHAR, &status);
62 string tempBuf = buf4;
63 if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
65 istringstream iss (tempBuf,istringstream::in);
68 if (temp.getName() != "") {
69 templateSequences.push_back(temp);
71 if (temp.getUnaligned().length() > longest) { longest = temp.getUnaligned().length()+1; }
75 MPI_File_close(&inMPI);
78 openInputFile(fastaFileName, fastaFile);
80 while (!fastaFile.eof()) {
81 Sequence temp(fastaFile); gobble(fastaFile);
83 if (m->control_pressed) { templateSequences.clear(); break; }
85 if (temp.getName() != "") {
86 templateSequences.push_back(temp);
88 if (temp.getUnaligned().length() > longest) { longest = temp.getUnaligned().length()+1; }
95 numSeqs = templateSequences.size();
96 //all of this is elsewhere already!
98 m->mothurOut("DONE.");
99 m->mothurOutEndLine(); cout.flush();
101 //in case you delete the seqs and then ask for them
102 emptySequence = Sequence();
103 emptySequence.setName("no_match");
104 emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
105 emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
109 if(method == "kmer") {
110 search = new KmerDB(fastaFileName, kmerSize);
114 kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
115 ifstream kmerFileTest(kmerDBName.c_str());
117 if(kmerFileTest){ needToGenerate = false; }
120 else if(method == "suffix") { search = new SuffixDB(numSeqs); }
121 else if(method == "blast") { search = new BlastDB(gapOpen, gapExtend, match, misMatch); }
123 m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");
124 m->mothurOutEndLine();
125 search = new KmerDB(fastaFileName, 8);
128 if (!(m->control_pressed)) {
129 if (needToGenerate) {
130 //add sequences to search
131 for (int i = 0; i < templateSequences.size(); i++) {
132 search->addSequence(templateSequences[i]);
134 if (m->control_pressed) { templateSequences.clear(); break; }
137 if (m->control_pressed) { templateSequences.clear(); }
139 search->generateDB();
141 }else if ((method == "kmer") && (!needToGenerate)) {
142 ifstream kmerFileTest(kmerDBName.c_str());
143 search->readKmerDB(kmerFileTest);
146 search->setNumSeqs(numSeqs);
149 catch(exception& e) {
150 m->errorOut(e, "AlignmentDB", "AlignmentDB");
154 /**************************************************************************************************/
155 AlignmentDB::AlignmentDB(string s){
157 m = MothurOut::getInstance();
160 if(method == "suffix") { search = new SuffixDB(); }
161 else if(method == "blast") { search = new BlastDB(); }
162 else { search = new KmerDB(); }
165 //in case you delete the seqs and then ask for them
166 emptySequence = Sequence();
167 emptySequence.setName("no_match");
168 emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
169 emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
172 catch(exception& e) {
173 m->errorOut(e, "AlignmentDB", "AlignmentDB");
177 /**************************************************************************************************/
178 AlignmentDB::~AlignmentDB() { delete search; }
179 /**************************************************************************************************/
180 Sequence AlignmentDB::findClosestSequence(Sequence* seq) {
183 vector<int> spot = search->findClosestSequences(seq, 1);
185 if (spot.size() != 0) { return templateSequences[spot[0]]; }
186 else { return emptySequence; }
189 catch(exception& e) {
190 m->errorOut(e, "AlignmentDB", "findClosestSequence");
195 /**************************************************************************************************/
196 int AlignmentDB::MPISend(int receiver) {
200 MPI_Send(&numSeqs, 1, MPI_INT, receiver, 2001, MPI_COMM_WORLD);
203 MPI_Send(&longest, 1, MPI_INT, receiver, 2001, MPI_COMM_WORLD);
205 //send templateSequences
206 for (int i = 0; i < templateSequences.size(); i++) {
207 templateSequences[i].MPISend(receiver);
211 search->MPISend(receiver);
215 catch(exception& e) {
216 m->errorOut(e, "AlignmentDB", "MPISend");
220 /**************************************************************************************************/
221 int AlignmentDB::MPIRecv(int sender) {
224 //receive numSeqs - int
225 MPI_Recv(&numSeqs, 1, MPI_INT, sender, 2001, MPI_COMM_WORLD, &status);
227 //receive longest - int
228 MPI_Recv(&longest, 1, MPI_INT, sender, 2001, MPI_COMM_WORLD, &status);
230 //receive templateSequences
231 templateSequences.resize(numSeqs);
232 for (int i = 0; i < templateSequences.size(); i++) {
233 templateSequences[i].MPIRecv(sender);
237 search->MPIRecv(sender);
239 for (int i = 0; i < templateSequences.size(); i++) {
240 search->addSequence(templateSequences[i]);
242 search->generateDB();
243 search->setNumSeqs(numSeqs);
247 catch(exception& e) {
248 m->errorOut(e, "AlignmentDB", "MPIRecv");
253 /**************************************************************************************************/