5 * Created by westcott on 11/4/09.
6 * Copyright 2009 Schloss Lab. All rights reserved.
10 #include "alignmentdb.h"
12 #include "suffixdb.hpp"
13 #include "blastdb.hpp"
16 /**************************************************************************************************/
17 AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch){ // This assumes that the template database is in fasta format, may
18 try { // need to alter this in the future?
19 m = MothurOut::getInstance();
22 bool needToGenerate = true;
24 m->mothurOutEndLine();
25 m->mothurOut("Reading in the " + fastaFileName + " template sequences...\t"); cout.flush();
29 vector<unsigned long int> positions;
33 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
34 MPI_Comm_size(MPI_COMM_WORLD, &processors);
37 char inFileName[1024];
38 strcpy(inFileName, fastaFileName.c_str());
40 MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer
43 positions = setFilePosFasta(fastaFileName, numSeqs); //fills MPIPos, returns numSeqs
45 //send file positions to all processes
46 for(int i = 1; i < processors; i++) {
47 MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
48 MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
51 MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
52 positions.resize(numSeqs+1);
53 MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
57 for(int i=0;i<numSeqs;i++){
59 if (m->control_pressed) { templateSequences.clear(); break; }
62 int length = positions[i+1] - positions[i];
63 char* buf4 = new char[length];
65 MPI_File_read_at(inMPI, positions[i], buf4, length, MPI_CHAR, &status);
67 string tempBuf = buf4;
68 if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
71 istringstream iss (tempBuf,istringstream::in);
74 if (temp.getName() != "") {
75 templateSequences.push_back(temp);
77 if (temp.getUnaligned().length() > longest) { longest = temp.getUnaligned().length()+1; }
81 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
83 MPI_File_close(&inMPI);
87 openInputFile(fastaFileName, fastaFile);
89 while (!fastaFile.eof()) {
90 Sequence temp(fastaFile); gobble(fastaFile);
92 if (m->control_pressed) { templateSequences.clear(); break; }
94 if (temp.getName() != "") {
95 templateSequences.push_back(temp);
97 if (temp.getUnaligned().length() > longest) { longest = temp.getUnaligned().length()+1; }
104 numSeqs = templateSequences.size();
105 //all of this is elsewhere already!
107 m->mothurOut("DONE.");
108 m->mothurOutEndLine(); cout.flush();
110 //in case you delete the seqs and then ask for them
111 emptySequence = Sequence();
112 emptySequence.setName("no_match");
113 emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
114 emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
118 if(method == "kmer") {
119 search = new KmerDB(fastaFileName, kmerSize);
123 kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
124 ifstream kmerFileTest(kmerDBName.c_str());
127 bool GoodFile = checkReleaseVersion(kmerFileTest, m->getVersion());
128 if (GoodFile) { needToGenerate = false; }
132 else if(method == "suffix") { search = new SuffixDB(numSeqs); }
133 else if(method == "blast") { search = new BlastDB(gapOpen, gapExtend, match, misMatch); }
135 m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");
136 m->mothurOutEndLine();
137 search = new KmerDB(fastaFileName, 8);
140 if (!(m->control_pressed)) {
141 if (needToGenerate) {
142 //add sequences to search
143 for (int i = 0; i < templateSequences.size(); i++) {
144 search->addSequence(templateSequences[i]);
146 if (m->control_pressed) { templateSequences.clear(); break; }
149 if (m->control_pressed) { templateSequences.clear(); }
151 search->generateDB();
153 }else if ((method == "kmer") && (!needToGenerate)) {
154 ifstream kmerFileTest(kmerDBName.c_str());
155 search->readKmerDB(kmerFileTest);
158 search->setNumSeqs(numSeqs);
161 catch(exception& e) {
162 m->errorOut(e, "AlignmentDB", "AlignmentDB");
166 /**************************************************************************************************/
167 AlignmentDB::AlignmentDB(string s){
169 m = MothurOut::getInstance();
172 if(method == "suffix") { search = new SuffixDB(); }
173 else if(method == "blast") { search = new BlastDB(); }
174 else { search = new KmerDB(); }
177 //in case you delete the seqs and then ask for them
178 emptySequence = Sequence();
179 emptySequence.setName("no_match");
180 emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
181 emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
184 catch(exception& e) {
185 m->errorOut(e, "AlignmentDB", "AlignmentDB");
189 /**************************************************************************************************/
190 AlignmentDB::~AlignmentDB() { delete search; }
191 /**************************************************************************************************/
192 Sequence AlignmentDB::findClosestSequence(Sequence* seq) {
195 vector<int> spot = search->findClosestSequences(seq, 1);
197 if (spot.size() != 0) { return templateSequences[spot[0]]; }
198 else { return emptySequence; }
201 catch(exception& e) {
202 m->errorOut(e, "AlignmentDB", "findClosestSequence");
206 /**************************************************************************************************/