]> git.donarmstrong.com Git - mothur.git/blob - alignmentdb.cpp
1.9
[mothur.git] / alignmentdb.cpp
1 /*\r
2  *  alignmentdb.cpp\r
3  *  Mothur\r
4  *\r
5  *  Created by westcott on 11/4/09.\r
6  *  Copyright 2009 Schloss Lab. All rights reserved.\r
7  *\r
8  */\r
9 \r
10 #include "alignmentdb.h"\r
11 #include "kmerdb.hpp"\r
12 #include "suffixdb.hpp"\r
13 #include "blastdb.hpp"\r
14 \r
15 \r
16 /**************************************************************************************************/\r
17 AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch){            //      This assumes that the template database is in fasta format, may \r
18         try {                                                                                   //      need to alter this in the future?\r
19                 m = MothurOut::getInstance();\r
20                 longest = 0;\r
21                 method = s;\r
22                 bool needToGenerate = true;\r
23                 \r
24                 m->mothurOutEndLine();\r
25                 m->mothurOut("Reading in the " + fastaFileName + " template sequences...\t");   cout.flush();\r
26                 \r
27                 #ifdef USE_MPI  \r
28                         int pid;\r
29                         vector<long> positions;\r
30                 \r
31                         MPI_Status status; \r
32                         MPI_File inMPI;\r
33                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are\r
34         \r
35                         char inFileName[1024];\r
36                         strcpy(inFileName, fastaFileName.c_str());\r
37         \r
38                         MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer\r
39                         \r
40                         if (pid == 0) {\r
41                                 positions = setFilePosFasta(fastaFileName, numSeqs); //fills MPIPos, returns numSeqs\r
42 \r
43                                 //send file positions to all processes\r
44                                 MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD);  //send numSeqs\r
45                                 MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //send file pos     \r
46                         }else{\r
47                                 MPI_Bcast(&numSeqs, 1, MPI_INT, 0, MPI_COMM_WORLD); //get numSeqs\r
48                                 positions.resize(numSeqs+1);\r
49                                 MPI_Bcast(&positions[0], (numSeqs+1), MPI_LONG, 0, MPI_COMM_WORLD); //get file positions\r
50                         }\r
51                 \r
52                         //read file \r
53                         for(int i=0;i<numSeqs;i++){\r
54                                 \r
55                                 if (m->control_pressed) {  templateSequences.clear(); break;  }\r
56                                 \r
57                                 //read next sequence\r
58                                 int length = positions[i+1] - positions[i];\r
59                                 char* buf4 = new char[length];\r
60                         \r
61                                 MPI_File_read_at(inMPI, positions[i], buf4, length, MPI_CHAR, &status);\r
62                 \r
63                                 string tempBuf = buf4;\r
64                                 if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }\r
65                                 delete buf4;\r
66 \r
67                                 istringstream iss (tempBuf,istringstream::in);\r
68                 \r
69                                 Sequence temp(iss);  \r
70                                 if (temp.getName() != "") {\r
71                                         templateSequences.push_back(temp);\r
72                                         //save longest base\r
73                                         if (temp.getUnaligned().length() > longest)  { longest = temp.getUnaligned().length()+1; }\r
74                                 }\r
75                         }\r
76                 \r
77                         MPI_File_close(&inMPI);\r
78                 \r
79         #else\r
80                 ifstream fastaFile;\r
81                 openInputFile(fastaFileName, fastaFile);\r
82 \r
83                 while (!fastaFile.eof()) {\r
84                         Sequence temp(fastaFile);  gobble(fastaFile);\r
85                         \r
86                         if (m->control_pressed) {  templateSequences.clear(); break;  }\r
87                         \r
88                         if (temp.getName() != "") {\r
89                                 templateSequences.push_back(temp);\r
90                                 //save longest base\r
91                                 if (temp.getUnaligned().length() > longest)  { longest = temp.getUnaligned().length()+1; }\r
92                         }\r
93                 }\r
94                 fastaFile.close();\r
95                 \r
96         #endif\r
97         \r
98                 numSeqs = templateSequences.size();\r
99                 //all of this is elsewhere already!\r
100                 \r
101                 m->mothurOut("DONE.");\r
102                 m->mothurOutEndLine();  cout.flush();\r
103                 \r
104                 //in case you delete the seqs and then ask for them\r
105                 emptySequence = Sequence();\r
106                 emptySequence.setName("no_match");\r
107                 emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");\r
108                 emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");\r
109                 \r
110                 \r
111                 string kmerDBName;\r
112                 if(method == "kmer")                    {       \r
113                         search = new KmerDB(fastaFileName, kmerSize);                   \r
114                         \r
115                         #ifdef USE_MPI\r
116                         #else\r
117                                 kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";\r
118                                 ifstream kmerFileTest(kmerDBName.c_str());\r
119                         \r
120                                 if(kmerFileTest){       needToGenerate = false;         }\r
121                         #endif\r
122                 }\r
123                 else if(method == "suffix")             {       search = new SuffixDB(numSeqs);                                                         }\r
124                 else if(method == "blast")              {       search = new BlastDB(gapOpen, gapExtend, match, misMatch);      }\r
125                 else {\r
126                         m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");\r
127                         m->mothurOutEndLine();\r
128                         search = new KmerDB(fastaFileName, 8);\r
129                 }\r
130                 \r
131                 if (!(m->control_pressed)) {\r
132                         if (needToGenerate) {\r
133                                 //add sequences to search \r
134                                 for (int i = 0; i < templateSequences.size(); i++) {\r
135                                         search->addSequence(templateSequences[i]);\r
136                                         \r
137                                         if (m->control_pressed) {  templateSequences.clear(); break;  }\r
138                                 }\r
139                                 \r
140                                 if (m->control_pressed) {  templateSequences.clear();  }\r
141                                 \r
142                                 search->generateDB();\r
143                                 \r
144                         }else if ((method == "kmer") && (!needToGenerate)) {\r
145                                 ifstream kmerFileTest(kmerDBName.c_str());\r
146                                 search->readKmerDB(kmerFileTest);       \r
147                         }\r
148                         \r
149                         search->setNumSeqs(numSeqs);\r
150                 }\r
151         }\r
152         catch(exception& e) {\r
153                 m->errorOut(e, "AlignmentDB", "AlignmentDB");\r
154                 exit(1);\r
155         }\r
156 }\r
157 /**************************************************************************************************/\r
158 AlignmentDB::AlignmentDB(string s){              \r
159         try {                                                                                   \r
160                 m = MothurOut::getInstance();\r
161                 method = s;\r
162                 \r
163                 if(method == "suffix")          {       search = new SuffixDB();        }\r
164                 else if(method == "blast")      {       search = new BlastDB();         }\r
165                 else                                            {       search = new KmerDB();          }\r
166 \r
167                                 \r
168                 //in case you delete the seqs and then ask for them\r
169                 emptySequence = Sequence();\r
170                 emptySequence.setName("no_match");\r
171                 emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");\r
172                 emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");\r
173                 \r
174         }\r
175         catch(exception& e) {\r
176                 m->errorOut(e, "AlignmentDB", "AlignmentDB");\r
177                 exit(1);\r
178         }\r
179 }\r
180 /**************************************************************************************************/\r
181 AlignmentDB::~AlignmentDB() {  delete search;   }\r
182 /**************************************************************************************************/\r
183 Sequence AlignmentDB::findClosestSequence(Sequence* seq) {\r
184         try{\r
185         \r
186                 vector<int> spot = search->findClosestSequences(seq, 1);\r
187 \r
188                 if (spot.size() != 0)   {               return templateSequences[spot[0]];      }\r
189                 else                                    {               return emptySequence;                           }\r
190                 \r
191         }\r
192         catch(exception& e) {\r
193                 m->errorOut(e, "AlignmentDB", "findClosestSequence");\r
194                 exit(1);\r
195         }\r
196 }\r
197 #ifdef USE_MPI  \r
198 /**************************************************************************************************/\r
199 int AlignmentDB::MPISend(int receiver) {\r
200         try {\r
201                 \r
202                 //send numSeqs - int\r
203                 MPI_Send(&numSeqs, 1, MPI_INT, receiver, 2001, MPI_COMM_WORLD); \r
204                                                                         \r
205                 //send longest - int\r
206                 MPI_Send(&longest, 1, MPI_INT, receiver, 2001, MPI_COMM_WORLD); \r
207         \r
208                 //send templateSequences\r
209                 for (int i = 0; i < templateSequences.size(); i++) {\r
210                         templateSequences[i].MPISend(receiver);\r
211                 }\r
212                 \r
213                 //send Database\r
214                 search->MPISend(receiver);\r
215                 \r
216                 return 0;\r
217         }\r
218         catch(exception& e) {\r
219                 m->errorOut(e, "AlignmentDB", "MPISend");\r
220                 exit(1);\r
221         }\r
222 }\r
223 /**************************************************************************************************/\r
224 int AlignmentDB::MPIRecv(int sender) {\r
225         try {\r
226                 MPI_Status status;\r
227                 //receive numSeqs - int\r
228                 MPI_Recv(&numSeqs, 1, MPI_INT, sender, 2001, MPI_COMM_WORLD, &status);\r
229                 \r
230                 //receive longest - int\r
231                 MPI_Recv(&longest, 1, MPI_INT, sender, 2001, MPI_COMM_WORLD, &status);\r
232 \r
233                 //receive templateSequences\r
234                 templateSequences.resize(numSeqs);\r
235                 for (int i = 0; i < templateSequences.size(); i++) {\r
236                         templateSequences[i].MPIRecv(sender);\r
237                 }\r
238 \r
239                 //receive Database\r
240                 search->MPIRecv(sender);\r
241         \r
242                 for (int i = 0; i < templateSequences.size(); i++) {\r
243                         search->addSequence(templateSequences[i]);\r
244                 }\r
245                 search->generateDB();\r
246                 search->setNumSeqs(numSeqs);\r
247 \r
248                 return 0;\r
249         }\r
250         catch(exception& e) {\r
251                 m->errorOut(e, "AlignmentDB", "MPIRecv");\r
252                 exit(1);\r
253         }\r
254 }\r
255 #endif\r
256 /**************************************************************************************************/\r
257 \r
258 \r
259 \r
260 \r
261 \r
262 \r