5 * Created by westcott on 11/4/09.
6 * Copyright 2009 Schloss Lab. All rights reserved.
10 #include "alignmentdb.h"
12 #include "suffixdb.hpp"
13 #include "blastdb.hpp"
14 #include "referencedb.h"
16 /**************************************************************************************************/
18 AlignmentDB::AlignmentDB(const AlignmentDB& adb) : numSeqs(adb.numSeqs), longest(adb.longest), method(adb.method), emptySequence(adb.emptySequence), threadID(adb.threadID) {
21 m = MothurOut::getInstance();
22 if (adb.method == "blast") {
23 search = new BlastDB(*((BlastDB*)adb.search));
24 }else if(adb.method == "kmer") {
25 search = new KmerDB(*((KmerDB*)adb.search));
26 }else if(adb.method == "suffix") {
27 search = new SuffixDB(*((SuffixDB*)adb.search));
29 m->mothurOut("[ERROR]: cannot create copy of alignment database, unrecognized method - " + adb.method); m->mothurOutEndLine();
32 for (int i = 0; i < adb.templateSequences.size(); i++) {
33 Sequence temp(adb.templateSequences[i]);
34 templateSequences.push_back(temp);
38 m->errorOut(e, "AlignmentDB", "AlignmentDB");
43 /**************************************************************************************************/
44 AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch, int tid){ // This assumes that the template database is in fasta format, may
45 try { // need to alter this in the future?
46 m = MothurOut::getInstance();
49 bool needToGenerate = true;
50 ReferenceDB* rdb = ReferenceDB::getInstance();
54 if (fastaFileName == "saved-silent") {
55 fastaFileName = "saved"; silent = true;
58 if (fastaFileName == "saved") {
59 int start = time(NULL);
61 if (!silent) { m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + " that are saved in memory."); m->mothurOutEndLine(); }
63 for (int i = 0; i < rdb->referenceSeqs.size(); i++) {
64 templateSequences.push_back(rdb->referenceSeqs[i]);
66 if (rdb->referenceSeqs[i].getUnaligned().length() >= longest) { longest = (rdb->referenceSeqs[i].getUnaligned().length()+1); }
68 fastaFileName = rdb->getSavedReference();
70 numSeqs = templateSequences.size();
71 if (!silent) { m->mothurOut("It took " + toString(time(NULL) - start) + " to load " + toString(rdb->referenceSeqs.size()) + " sequences.");m->mothurOutEndLine(); }
74 int start = time(NULL);
75 m->mothurOutEndLine();
76 m->mothurOut("Reading in the " + fastaFileName + " template sequences...\t"); cout.flush();
80 vector<unsigned long long> positions;
84 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
85 MPI_Comm_size(MPI_COMM_WORLD, &processors);
88 char inFileName[1024];
89 strcpy(inFileName, fastaFileName.c_str());
91 MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer
94 positions = m->setFilePosFasta(fastaFileName, numSeqs); //fills MPIPos, returns numSeqs
96 //send file positions to all processes
97 for(int i = 1; i < processors; i++) {
98 MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
99 MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
102 MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
103 positions.resize(numSeqs+1);
104 MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
108 for(int i=0;i<numSeqs;i++){
110 if (m->control_pressed) { templateSequences.clear(); break; }
113 int length = positions[i+1] - positions[i];
114 char* buf4 = new char[length];
116 MPI_File_read_at(inMPI, positions[i], buf4, length, MPI_CHAR, &status);
118 string tempBuf = buf4;
119 if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
122 istringstream iss (tempBuf,istringstream::in);
125 if (temp.getName() != "") {
126 templateSequences.push_back(temp);
128 if (rdb->save) { rdb->referenceSeqs.push_back(temp); }
131 if (temp.getUnaligned().length() >= longest) { longest = temp.getUnaligned().length()+1; }
135 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
137 MPI_File_close(&inMPI);
141 m->openInputFile(fastaFileName, fastaFile);
143 while (!fastaFile.eof()) {
144 Sequence temp(fastaFile); m->gobble(fastaFile);
146 if (m->control_pressed) { templateSequences.clear(); break; }
148 if (temp.getName() != "") {
149 templateSequences.push_back(temp);
151 if (rdb->save) { rdb->referenceSeqs.push_back(temp); }
154 if (temp.getUnaligned().length() >= longest) { longest = (temp.getUnaligned().length()+1); }
160 numSeqs = templateSequences.size();
161 //all of this is elsewhere already!
163 m->mothurOut("DONE.");
164 m->mothurOutEndLine(); cout.flush();
165 m->mothurOut("It took " + toString(time(NULL) - start) + " to read " + toString(templateSequences.size()) + " sequences."); m->mothurOutEndLine();
170 //in case you delete the seqs and then ask for them
171 emptySequence = Sequence();
172 emptySequence.setName("no_match");
173 emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
174 emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
178 if(method == "kmer") {
179 search = new KmerDB(fastaFileName, kmerSize);
183 kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
184 ifstream kmerFileTest(kmerDBName.c_str());
187 bool GoodFile = m->checkReleaseVersion(kmerFileTest, m->getVersion());
188 if (GoodFile) { needToGenerate = false; }
192 else if(method == "suffix") { search = new SuffixDB(numSeqs); }
193 else if(method == "blast") { search = new BlastDB(fastaFileName.substr(0,fastaFileName.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch, "", threadID); }
196 m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");
197 m->mothurOutEndLine();
198 search = new KmerDB(fastaFileName, 8);
201 if (!(m->control_pressed)) {
202 if (needToGenerate) {
203 //add sequences to search
204 for (int i = 0; i < templateSequences.size(); i++) {
205 search->addSequence(templateSequences[i]);
207 if (m->control_pressed) { templateSequences.clear(); break; }
210 if (m->control_pressed) { templateSequences.clear(); }
212 search->generateDB();
214 }else if ((method == "kmer") && (!needToGenerate)) {
215 ifstream kmerFileTest(kmerDBName.c_str());
216 search->readKmerDB(kmerFileTest);
219 search->setNumSeqs(numSeqs);
223 catch(exception& e) {
224 m->errorOut(e, "AlignmentDB", "AlignmentDB");
228 /**************************************************************************************************/
229 AlignmentDB::AlignmentDB(string s){
231 m = MothurOut::getInstance();
234 if(method == "suffix") { search = new SuffixDB(); }
235 else if(method == "blast") { search = new BlastDB("", 0); }
236 else { search = new KmerDB(); }
239 //in case you delete the seqs and then ask for them
240 emptySequence = Sequence();
241 emptySequence.setName("no_match");
242 emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
243 emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
246 catch(exception& e) {
247 m->errorOut(e, "AlignmentDB", "AlignmentDB");
251 /**************************************************************************************************/
252 AlignmentDB::~AlignmentDB() { delete search; }
253 /**************************************************************************************************/
254 Sequence AlignmentDB::findClosestSequence(Sequence* seq) {
257 vector<int> spot = search->findClosestSequences(seq, 1);
259 if (spot.size() != 0) { return templateSequences[spot[0]]; }
260 else { return emptySequence; }
263 catch(exception& e) {
264 m->errorOut(e, "AlignmentDB", "findClosestSequence");
268 /**************************************************************************************************/