]> git.donarmstrong.com Git - mothur.git/blob - alignmentdb.cpp
added MPI code, broke up chimera.seqs into 5 separated commands, added parse.sff...
[mothur.git] / alignmentdb.cpp
1 /*
2  *  alignmentdb.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 11/4/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "alignmentdb.h"
11 #include "kmerdb.hpp"
12 #include "suffixdb.hpp"
13 #include "blastdb.hpp"
14
15
16 /**************************************************************************************************/
17 AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch){            //      This assumes that the template database is in fasta format, may 
18         try {                                                                                   //      need to alter this in the future?
19                 m = MothurOut::getInstance();
20                 longest = 0;
21                 method = s;
22                 bool needToGenerate = true;
23                 
24                 m->mothurOutEndLine();
25                 m->mothurOut("Reading in the " + fastaFileName + " template sequences...\t");   cout.flush();
26                 
27                 #ifdef USE_MPI  
28                         int pid;
29                         vector<long> positions;
30                 
31                         MPI_Status status; 
32                         MPI_File inMPI;
33                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
34         
35                         char inFileName[fastaFileName.length()];
36                         strcpy(inFileName, fastaFileName.c_str());
37         
38                         MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
39                         
40                         if (pid == 0) {
41                                 positions = setFilePosFasta(fastaFileName, numSeqs); //fills MPIPos, returns numSeqs
42
43                                 //send file positions to all processes
44                                 MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs
45                                 MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos     
46                         }else{
47                                 MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs
48                                 positions.resize(numSeqs+1);
49                                 MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions
50                         }
51                         
52                         //read file 
53                         for(int i=0;i<numSeqs;i++){
54                                 
55                                 if (m->control_pressed) {  templateSequences.clear(); break;  }
56                                 
57                                 //read next sequence
58                                 int length = positions[i+1] - positions[i];
59                                 char buf4[length];
60                                 MPI_File_read_at(inMPI, positions[i], buf4, length, MPI_CHAR, &status);
61                                 
62                                 string tempBuf = buf4;
63                                 if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
64                                 
65                                 istringstream iss (tempBuf,istringstream::in);
66                 
67                                 Sequence temp(iss);  
68                                 if (temp.getName() != "") {
69                                         templateSequences.push_back(temp);
70                                         //save longest base
71                                         if (temp.getUnaligned().length() > longest)  { longest = temp.getUnaligned().length()+1; }
72                                 }
73                         }
74                         
75                         MPI_File_close(&inMPI);
76         #else
77                 ifstream fastaFile;
78                 openInputFile(fastaFileName, fastaFile);
79
80                 while (!fastaFile.eof()) {
81                         Sequence temp(fastaFile);  gobble(fastaFile);
82                         
83                         if (m->control_pressed) {  templateSequences.clear(); break;  }
84                         
85                         if (temp.getName() != "") {
86                                 templateSequences.push_back(temp);
87                                 //save longest base
88                                 if (temp.getUnaligned().length() > longest)  { longest = temp.getUnaligned().length()+1; }
89                         }
90                 }
91                 fastaFile.close();
92                 
93         #endif
94         
95                 numSeqs = templateSequences.size();
96                 //all of this is elsewhere already!
97                 
98                 m->mothurOut("DONE.");
99                 m->mothurOutEndLine();  cout.flush();
100                 
101                 //in case you delete the seqs and then ask for them
102                 emptySequence = Sequence();
103                 emptySequence.setName("no_match");
104                 emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
105                 emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
106                 
107                 
108                 string kmerDBName;
109                 if(method == "kmer")                    {       
110                         search = new KmerDB(fastaFileName, kmerSize);                   
111                         
112                         #ifdef USE_MPI
113                         #else
114                                 kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
115                                 ifstream kmerFileTest(kmerDBName.c_str());
116                         
117                                 if(kmerFileTest){       needToGenerate = false;         }
118                         #endif
119                 }
120                 else if(method == "suffix")             {       search = new SuffixDB(numSeqs);                                                         }
121                 else if(method == "blast")              {       search = new BlastDB(gapOpen, gapExtend, match, misMatch);      }
122                 else {
123                         m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");
124                         m->mothurOutEndLine();
125                         search = new KmerDB(fastaFileName, 8);
126                 }
127                 
128                 if (!(m->control_pressed)) {
129                         if (needToGenerate) {
130                                 //add sequences to search 
131                                 for (int i = 0; i < templateSequences.size(); i++) {
132                                         search->addSequence(templateSequences[i]);
133                                         
134                                         if (m->control_pressed) {  templateSequences.clear(); break;  }
135                                 }
136                                 
137                                 if (m->control_pressed) {  templateSequences.clear();  }
138                                 
139                                 search->generateDB();
140                                 
141                         }else if ((method == "kmer") && (!needToGenerate)) {
142                                 ifstream kmerFileTest(kmerDBName.c_str());
143                                 search->readKmerDB(kmerFileTest);       
144                         }
145                         
146                         search->setNumSeqs(numSeqs);
147                 }
148         }
149         catch(exception& e) {
150                 m->errorOut(e, "AlignmentDB", "AlignmentDB");
151                 exit(1);
152         }
153 }
154 /**************************************************************************************************/
155 AlignmentDB::AlignmentDB(string s){              
156         try {                                                                                   
157                 m = MothurOut::getInstance();
158                 method = s;
159                 
160                 if(method == "suffix")          {       search = new SuffixDB();        }
161                 else if(method == "blast")      {       search = new BlastDB();         }
162                 else                                            {       search = new KmerDB();          }
163
164                                 
165                 //in case you delete the seqs and then ask for them
166                 emptySequence = Sequence();
167                 emptySequence.setName("no_match");
168                 emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
169                 emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
170                 
171         }
172         catch(exception& e) {
173                 m->errorOut(e, "AlignmentDB", "AlignmentDB");
174                 exit(1);
175         }
176 }
177 /**************************************************************************************************/
178 AlignmentDB::~AlignmentDB() {  delete search;   }
179 /**************************************************************************************************/
180 Sequence AlignmentDB::findClosestSequence(Sequence* seq) {
181         try{
182         
183                 vector<int> spot = search->findClosestSequences(seq, 1);
184
185                 if (spot.size() != 0)   {               return templateSequences[spot[0]];      }
186                 else                                    {               return emptySequence;                           }
187                 
188         }
189         catch(exception& e) {
190                 m->errorOut(e, "AlignmentDB", "findClosestSequence");
191                 exit(1);
192         }
193 }
194 #ifdef USE_MPI  
195 /**************************************************************************************************/
196 int AlignmentDB::MPISend(int receiver) {
197         try {
198                 
199                 //send numSeqs - int
200                 MPI_Send(&numSeqs, 1, MPI_INT, receiver, 2001, MPI_COMM_WORLD); 
201                                                                         
202                 //send longest - int
203                 MPI_Send(&longest, 1, MPI_INT, receiver, 2001, MPI_COMM_WORLD); 
204         
205                 //send templateSequences
206                 for (int i = 0; i < templateSequences.size(); i++) {
207                         templateSequences[i].MPISend(receiver);
208                 }
209                 
210                 //send Database
211                 search->MPISend(receiver);
212                 
213                 return 0;
214         }
215         catch(exception& e) {
216                 m->errorOut(e, "AlignmentDB", "MPISend");
217                 exit(1);
218         }
219 }
220 /**************************************************************************************************/
221 int AlignmentDB::MPIRecv(int sender) {
222         try {
223                 MPI_Status status;
224                 //receive numSeqs - int
225                 MPI_Recv(&numSeqs, 1, MPI_INT, sender, 2001, MPI_COMM_WORLD, &status);
226                 
227                 //receive longest - int
228                 MPI_Recv(&longest, 1, MPI_INT, sender, 2001, MPI_COMM_WORLD, &status);
229
230                 //receive templateSequences
231                 templateSequences.resize(numSeqs);
232                 for (int i = 0; i < templateSequences.size(); i++) {
233                         templateSequences[i].MPIRecv(sender);
234                 }
235
236                 //receive Database
237                 search->MPIRecv(sender);
238         
239                 for (int i = 0; i < templateSequences.size(); i++) {
240                         search->addSequence(templateSequences[i]);
241                 }
242                 search->generateDB();
243                 search->setNumSeqs(numSeqs);
244
245                 return 0;
246         }
247         catch(exception& e) {
248                 m->errorOut(e, "AlignmentDB", "MPIRecv");
249                 exit(1);
250         }
251 }
252 #endif
253 /**************************************************************************************************/
254
255
256
257
258
259