]> git.donarmstrong.com Git - mothur.git/blob - alignmentdb.cpp
1.21.0
[mothur.git] / alignmentdb.cpp
1 /*
2  *  alignmentdb.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 11/4/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "alignmentdb.h"
11 #include "kmerdb.hpp"
12 #include "suffixdb.hpp"
13 #include "blastdb.hpp"
14 #include "referencedb.h"
15
16
17 /**************************************************************************************************/
18 AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch){            //      This assumes that the template database is in fasta format, may 
19         try {                                                                                   //      need to alter this in the future?
20                 m = MothurOut::getInstance();
21                 longest = 0;
22                 method = s;
23                 bool needToGenerate = true;
24                 ReferenceDB* rdb = ReferenceDB::getInstance();
25                 
26                 if (fastaFileName == "saved") {
27                         int start = time(NULL);
28                         m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + " that are saved in memory.");        m->mothurOutEndLine();
29
30                         for (int i = 0; i < rdb->referenceSeqs.size(); i++) {
31                                 templateSequences.push_back(rdb->referenceSeqs[i]);
32                                 //save longest base
33                                 if (rdb->referenceSeqs[i].getUnaligned().length() >= longest)  { longest = (rdb->referenceSeqs[i].getUnaligned().length()+1); }
34                         }
35                         fastaFileName = rdb->getSavedReference();
36                         
37                         numSeqs = templateSequences.size();
38                         m->mothurOut("It took " + toString(time(NULL) - start) + " to load " + toString(rdb->referenceSeqs.size()) + " sequences.");m->mothurOutEndLine();  
39
40                 }else {
41                         int start = time(NULL);
42                         m->mothurOutEndLine();
43                         m->mothurOut("Reading in the " + fastaFileName + " template sequences...\t");   cout.flush();
44                         
45                         #ifdef USE_MPI  
46                                 int pid, processors;
47                                 vector<unsigned long int> positions;
48                         
49                                 MPI_Status status; 
50                                 MPI_File inMPI;
51                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
52                                 MPI_Comm_size(MPI_COMM_WORLD, &processors);
53                                 int tag = 2001;
54                 
55                                 char inFileName[1024];
56                                 strcpy(inFileName, fastaFileName.c_str());
57                 
58                                 MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
59                                 
60                                 if (pid == 0) {
61                                         positions = m->setFilePosFasta(fastaFileName, numSeqs); //fills MPIPos, returns numSeqs
62
63                                         //send file positions to all processes
64                                         for(int i = 1; i < processors; i++) { 
65                                                 MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
66                                                 MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
67                                         }
68                                 }else{
69                                         MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
70                                         positions.resize(numSeqs+1);
71                                         MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
72                                 }
73                         
74                                 //read file 
75                                 for(int i=0;i<numSeqs;i++){
76                                         
77                                         if (m->control_pressed) {  templateSequences.clear(); break;  }
78                                         
79                                         //read next sequence
80                                         int length = positions[i+1] - positions[i];
81                                         char* buf4 = new char[length];
82                                 
83                                         MPI_File_read_at(inMPI, positions[i], buf4, length, MPI_CHAR, &status);
84                         
85                                         string tempBuf = buf4;
86                                         if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
87                                         delete buf4;
88
89                                         istringstream iss (tempBuf,istringstream::in);
90                         
91                                         Sequence temp(iss);  
92                                         if (temp.getName() != "") {
93                                                 templateSequences.push_back(temp);
94                                                 
95                                                 if (rdb->save) { rdb->referenceSeqs.push_back(temp); }
96                                                 
97                                                 //save longest base
98                                                 if (temp.getUnaligned().length() >= longest)  { longest = temp.getUnaligned().length()+1; }
99                                         }
100                                 }
101                                 
102                                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
103                                 
104                                 MPI_File_close(&inMPI);
105                         
106                 #else
107                         ifstream fastaFile;
108                         m->openInputFile(fastaFileName, fastaFile);
109
110                         while (!fastaFile.eof()) {
111                                 Sequence temp(fastaFile);  m->gobble(fastaFile);
112                                 
113                                 if (m->control_pressed) {  templateSequences.clear(); break;  }
114                                 
115                                 if (temp.getName() != "") {
116                                         templateSequences.push_back(temp);
117                                         
118                                         if (rdb->save) { rdb->referenceSeqs.push_back(temp); }
119                                         
120                                         //save longest base
121                                         if (temp.getUnaligned().length() >= longest)  { longest = (temp.getUnaligned().length()+1); }
122                                 }
123                         }
124                         fastaFile.close();
125                 #endif
126                 
127                         numSeqs = templateSequences.size();
128                         //all of this is elsewhere already!
129                         
130                         m->mothurOut("DONE.");
131                         m->mothurOutEndLine();  cout.flush();
132                         m->mothurOut("It took " + toString(time(NULL) - start) + " to read  " + toString(templateSequences.size()) + " sequences."); m->mothurOutEndLine();  
133
134                 }
135                 
136                 
137                 //in case you delete the seqs and then ask for them
138                 emptySequence = Sequence();
139                 emptySequence.setName("no_match");
140                 emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
141                 emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
142                 
143                 
144                 string kmerDBName;
145                 if(method == "kmer")                    {       
146                         search = new KmerDB(fastaFileName, kmerSize);                   
147                         
148                         #ifdef USE_MPI
149                         #else
150                                 kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
151                                 ifstream kmerFileTest(kmerDBName.c_str());
152                                 
153                                 if(kmerFileTest){       
154                                         bool GoodFile = m->checkReleaseVersion(kmerFileTest, m->getVersion());
155                                         if (GoodFile) {  needToGenerate = false;        }
156                                 }
157                         #endif
158                 }
159                 else if(method == "suffix")             {       search = new SuffixDB(numSeqs);                                                         }
160                 else if(method == "blast")              {       search = new BlastDB(fastaFileName.substr(0,fastaFileName.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch, "");       }
161                 else {
162                         m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");
163                         m->mothurOutEndLine();
164                         search = new KmerDB(fastaFileName, 8);
165                 }
166                 
167                 if (!(m->control_pressed)) {
168                         if (needToGenerate) {
169                                 //add sequences to search 
170                                 for (int i = 0; i < templateSequences.size(); i++) {
171                                         search->addSequence(templateSequences[i]);
172                                         
173                                         if (m->control_pressed) {  templateSequences.clear(); break;  }
174                                 }
175                                 
176                                 if (m->control_pressed) {  templateSequences.clear();  }
177                                 
178                                 search->generateDB();
179                                 
180                         }else if ((method == "kmer") && (!needToGenerate)) {
181                                 ifstream kmerFileTest(kmerDBName.c_str());
182                                 search->readKmerDB(kmerFileTest);       
183                         }
184                 
185                         search->setNumSeqs(numSeqs);
186                 }
187                 
188         }
189         catch(exception& e) {
190                 m->errorOut(e, "AlignmentDB", "AlignmentDB");
191                 exit(1);
192         }
193 }
194 /**************************************************************************************************/
195 AlignmentDB::AlignmentDB(string s){              
196         try {                                                                                   
197                 m = MothurOut::getInstance();
198                 method = s;
199                 
200                 if(method == "suffix")          {       search = new SuffixDB();        }
201                 else if(method == "blast")      {       search = new BlastDB("");               }
202                 else                                            {       search = new KmerDB();          }
203
204                                 
205                 //in case you delete the seqs and then ask for them
206                 emptySequence = Sequence();
207                 emptySequence.setName("no_match");
208                 emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
209                 emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
210                 
211         }
212         catch(exception& e) {
213                 m->errorOut(e, "AlignmentDB", "AlignmentDB");
214                 exit(1);
215         }
216 }
217 /**************************************************************************************************/
218 AlignmentDB::~AlignmentDB() {  delete search;   }
219 /**************************************************************************************************/
220 Sequence AlignmentDB::findClosestSequence(Sequence* seq) {
221         try{
222         
223                 vector<int> spot = search->findClosestSequences(seq, 1);
224         
225                 if (spot.size() != 0)   {               return templateSequences[spot[0]];      }
226                 else                                    {               return emptySequence;                           }
227                 
228         }
229         catch(exception& e) {
230                 m->errorOut(e, "AlignmentDB", "findClosestSequence");
231                 exit(1);
232         }
233 }
234 /**************************************************************************************************/
235
236
237
238
239
240