]> git.donarmstrong.com Git - mothur.git/blob - alignmentdb.cpp
working on windows paralellization, added trimOligos class to be used by trim.flows...
[mothur.git] / alignmentdb.cpp
1 /*
2  *  alignmentdb.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 11/4/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "alignmentdb.h"
11 #include "kmerdb.hpp"
12 #include "suffixdb.hpp"
13 #include "blastdb.hpp"
14 #include "referencedb.h"
15
16 /**************************************************************************************************/
17 //deep copy
18 AlignmentDB::AlignmentDB(const AlignmentDB& adb) : numSeqs(adb.numSeqs), longest(adb.longest), method(adb.method), emptySequence(adb.emptySequence), threadID(adb.threadID) {
19         try {
20                 
21                 m = MothurOut::getInstance();
22                 if (adb.method == "blast") {
23                         search = new BlastDB(*((BlastDB*)adb.search));
24                 }else if(adb.method == "kmer") {
25                         search = new KmerDB(*((KmerDB*)adb.search));
26                 }else if(adb.method == "suffix") {
27                         search = new SuffixDB(*((SuffixDB*)adb.search));
28                 }else {
29                         m->mothurOut("[ERROR]: cannot create copy of alignment database, unrecognized method - " + adb.method); m->mothurOutEndLine();
30                 }
31                 
32                 for (int i = 0; i < adb.templateSequences.size(); i++) {
33                         Sequence temp(adb.templateSequences[i]);
34                         templateSequences.push_back(temp);
35                 }
36         }
37         catch(exception& e) {
38                 m->errorOut(e, "AlignmentDB", "AlignmentDB");
39                 exit(1);
40         }
41         
42 }
43 /**************************************************************************************************/
44 AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch, int tid){           //      This assumes that the template database is in fasta format, may 
45         try {                                                                                   //      need to alter this in the future?
46                 m = MothurOut::getInstance();
47                 longest = 0;
48                 method = s;
49                 bool needToGenerate = true;
50                 ReferenceDB* rdb = ReferenceDB::getInstance();
51                 bool silent = false;
52                 threadID = tid;
53                 
54                 if (fastaFileName == "saved-silent") {
55                         fastaFileName = "saved"; silent = true;
56                 }
57                 
58                 if (fastaFileName == "saved") {
59                         int start = time(NULL);
60                         
61                         if (!silent) { m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + " that are saved in memory."); m->mothurOutEndLine(); }
62
63                         for (int i = 0; i < rdb->referenceSeqs.size(); i++) {
64                                 templateSequences.push_back(rdb->referenceSeqs[i]);
65                                 //save longest base
66                                 if (rdb->referenceSeqs[i].getUnaligned().length() >= longest)  { longest = (rdb->referenceSeqs[i].getUnaligned().length()+1); }
67                         }
68                         fastaFileName = rdb->getSavedReference();
69                         
70                         numSeqs = templateSequences.size();
71                         if (!silent) { m->mothurOut("It took " + toString(time(NULL) - start) + " to load " + toString(rdb->referenceSeqs.size()) + " sequences.");m->mothurOutEndLine();  }
72
73                 }else {
74                         int start = time(NULL);
75                         m->mothurOutEndLine();
76                         m->mothurOut("Reading in the " + fastaFileName + " template sequences...\t");   cout.flush();
77                         
78                         #ifdef USE_MPI  
79                                 int pid, processors;
80                                 vector<unsigned long long> positions;
81                         
82                                 MPI_Status status; 
83                                 MPI_File inMPI;
84                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
85                                 MPI_Comm_size(MPI_COMM_WORLD, &processors);
86                                 int tag = 2001;
87                 
88                                 char inFileName[1024];
89                                 strcpy(inFileName, fastaFileName.c_str());
90                 
91                                 MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
92                                 
93                                 if (pid == 0) {
94                                         positions = m->setFilePosFasta(fastaFileName, numSeqs); //fills MPIPos, returns numSeqs
95
96                                         //send file positions to all processes
97                                         for(int i = 1; i < processors; i++) { 
98                                                 MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
99                                                 MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
100                                         }
101                                 }else{
102                                         MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
103                                         positions.resize(numSeqs+1);
104                                         MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
105                                 }
106                         
107                                 //read file 
108                                 for(int i=0;i<numSeqs;i++){
109                                         
110                                         if (m->control_pressed) {  templateSequences.clear(); break;  }
111                                         
112                                         //read next sequence
113                                         int length = positions[i+1] - positions[i];
114                                         char* buf4 = new char[length];
115                                 
116                                         MPI_File_read_at(inMPI, positions[i], buf4, length, MPI_CHAR, &status);
117                         
118                                         string tempBuf = buf4;
119                                         if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
120                                         delete buf4;
121
122                                         istringstream iss (tempBuf,istringstream::in);
123                         
124                                         Sequence temp(iss);  
125                                         if (temp.getName() != "") {
126                                                 templateSequences.push_back(temp);
127                                                 
128                                                 if (rdb->save) { rdb->referenceSeqs.push_back(temp); }
129                                                 
130                                                 //save longest base
131                                                 if (temp.getUnaligned().length() >= longest)  { longest = temp.getUnaligned().length()+1; }
132                                         }
133                                 }
134                                 
135                                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
136                                 
137                                 MPI_File_close(&inMPI);
138                         
139                 #else
140                         ifstream fastaFile;
141                         m->openInputFile(fastaFileName, fastaFile);
142
143                         while (!fastaFile.eof()) {
144                                 Sequence temp(fastaFile);  m->gobble(fastaFile);
145                                 
146                                 if (m->control_pressed) {  templateSequences.clear(); break;  }
147                                 
148                                 if (temp.getName() != "") {
149                                         templateSequences.push_back(temp);
150                                         
151                                         if (rdb->save) { rdb->referenceSeqs.push_back(temp); }
152                                         
153                                         //save longest base
154                                         if (temp.getUnaligned().length() >= longest)  { longest = (temp.getUnaligned().length()+1); }
155                                 }
156                         }
157                         fastaFile.close();
158                 #endif
159                 
160                         numSeqs = templateSequences.size();
161                         //all of this is elsewhere already!
162                         
163                         m->mothurOut("DONE.");
164                         m->mothurOutEndLine();  cout.flush();
165                         m->mothurOut("It took " + toString(time(NULL) - start) + " to read  " + toString(templateSequences.size()) + " sequences."); m->mothurOutEndLine();  
166
167                 }
168                 
169                 
170                 //in case you delete the seqs and then ask for them
171                 emptySequence = Sequence();
172                 emptySequence.setName("no_match");
173                 emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
174                 emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
175                 
176                 
177                 string kmerDBName;
178                 if(method == "kmer")                    {       
179                         search = new KmerDB(fastaFileName, kmerSize);                   
180                         
181                         #ifdef USE_MPI
182                         #else
183                                 kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
184                                 ifstream kmerFileTest(kmerDBName.c_str());
185                                 
186                                 if(kmerFileTest){       
187                                         bool GoodFile = m->checkReleaseVersion(kmerFileTest, m->getVersion());
188                                         if (GoodFile) {  needToGenerate = false;        }
189                                 }
190                         #endif
191                 }
192                 else if(method == "suffix")             {       search = new SuffixDB(numSeqs);                                                         }
193                 else if(method == "blast")              {       search = new BlastDB(fastaFileName.substr(0,fastaFileName.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch, "", threadID);     }
194                 else {
195                         method = "kmer";
196                         m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");
197                         m->mothurOutEndLine();
198                         search = new KmerDB(fastaFileName, 8);
199                 }
200                 
201                 if (!(m->control_pressed)) {
202                         if (needToGenerate) {
203                                 //add sequences to search 
204                                 for (int i = 0; i < templateSequences.size(); i++) {
205                                         search->addSequence(templateSequences[i]);
206                                         
207                                         if (m->control_pressed) {  templateSequences.clear(); break;  }
208                                 }
209                                 
210                                 if (m->control_pressed) {  templateSequences.clear();  }
211                                 
212                                 search->generateDB();
213                                 
214                         }else if ((method == "kmer") && (!needToGenerate)) {
215                                 ifstream kmerFileTest(kmerDBName.c_str());
216                                 search->readKmerDB(kmerFileTest);       
217                         }
218                 
219                         search->setNumSeqs(numSeqs);
220                 }
221                 
222         }
223         catch(exception& e) {
224                 m->errorOut(e, "AlignmentDB", "AlignmentDB");
225                 exit(1);
226         }
227 }
228 /**************************************************************************************************/
229 AlignmentDB::AlignmentDB(string s){              
230         try {                                                                                   
231                 m = MothurOut::getInstance();
232                 method = s;
233                 
234                 if(method == "suffix")          {       search = new SuffixDB();        }
235                 else if(method == "blast")      {       search = new BlastDB("", 0);            }
236                 else                                            {       search = new KmerDB();          }
237
238                                 
239                 //in case you delete the seqs and then ask for them
240                 emptySequence = Sequence();
241                 emptySequence.setName("no_match");
242                 emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
243                 emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
244                 
245         }
246         catch(exception& e) {
247                 m->errorOut(e, "AlignmentDB", "AlignmentDB");
248                 exit(1);
249         }
250 }
251 /**************************************************************************************************/
252 AlignmentDB::~AlignmentDB() {  delete search;   }
253 /**************************************************************************************************/
254 Sequence AlignmentDB::findClosestSequence(Sequence* seq) {
255         try{
256         
257                 vector<int> spot = search->findClosestSequences(seq, 1);
258         
259                 if (spot.size() != 0)   {               return templateSequences[spot[0]];      }
260                 else                                    {               return emptySequence;                           }
261                 
262         }
263         catch(exception& e) {
264                 m->errorOut(e, "AlignmentDB", "findClosestSequence");
265                 exit(1);
266         }
267 }
268 /**************************************************************************************************/
269
270
271
272
273
274