]> git.donarmstrong.com Git - mothur.git/blob - alignmentdb.cpp
working on make.contigs. fixed trim.seqs filename issue
[mothur.git] / alignmentdb.cpp
1 /*
2  *  alignmentdb.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 11/4/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "alignmentdb.h"
11 #include "kmerdb.hpp"
12 #include "suffixdb.hpp"
13 #include "blastdb.hpp"
14 #include "referencedb.h"
15
16 /**************************************************************************************************/
17 AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch, int tid){           //      This assumes that the template database is in fasta format, may 
18         try {                                                                                   //      need to alter this in the future?
19                 m = MothurOut::getInstance();
20                 longest = 0;
21                 method = s;
22                 bool needToGenerate = true;
23                 ReferenceDB* rdb = ReferenceDB::getInstance();
24                 bool silent = false;
25                 threadID = tid;
26                 
27                 if (fastaFileName == "saved-silent") {
28                         fastaFileName = "saved"; silent = true;
29                 }
30                 
31                 if (fastaFileName == "saved") {
32                         int start = time(NULL);
33                         
34                         if (!silent) { m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + " that are saved in memory."); m->mothurOutEndLine(); }
35
36                         for (int i = 0; i < rdb->referenceSeqs.size(); i++) {
37                                 templateSequences.push_back(rdb->referenceSeqs[i]);
38                                 //save longest base
39                                 if (rdb->referenceSeqs[i].getUnaligned().length() >= longest)  { longest = (rdb->referenceSeqs[i].getUnaligned().length()+1); }
40                         }
41                         fastaFileName = rdb->getSavedReference();
42                         
43                         numSeqs = templateSequences.size();
44                         if (!silent) { m->mothurOut("It took " + toString(time(NULL) - start) + " to load " + toString(rdb->referenceSeqs.size()) + " sequences.");m->mothurOutEndLine();  }
45             
46                 }else {
47                         int start = time(NULL);
48                         m->mothurOutEndLine();
49                         m->mothurOut("Reading in the " + fastaFileName + " template sequences...\t");   cout.flush();
50                         bool aligned = false;
51             int tempLength = 0;
52             
53                         #ifdef USE_MPI  
54                                 int pid, processors;
55                                 vector<unsigned long long> positions;
56                         
57                                 MPI_Status status; 
58                                 MPI_File inMPI;
59                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
60                                 MPI_Comm_size(MPI_COMM_WORLD, &processors);
61                                 int tag = 2001;
62                 
63                                 char inFileName[1024];
64                                 strcpy(inFileName, fastaFileName.c_str());
65                 
66                                 MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
67                                 
68                                 if (pid == 0) {
69                                         positions = m->setFilePosFasta(fastaFileName, numSeqs); //fills MPIPos, returns numSeqs
70
71                                         //send file positions to all processes
72                                         for(int i = 1; i < processors; i++) { 
73                                                 MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
74                                                 MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
75                                         }
76                                 }else{
77                                         MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
78                                         positions.resize(numSeqs+1);
79                                         MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
80                                 }
81                         
82                                 //read file 
83                                 for(int i=0;i<numSeqs;i++){
84                                         
85                                         if (m->control_pressed) {  templateSequences.clear(); break;  }
86                                         
87                                         //read next sequence
88                                         int length = positions[i+1] - positions[i];
89                                         char* buf4 = new char[length];
90                                 
91                                         MPI_File_read_at(inMPI, positions[i], buf4, length, MPI_CHAR, &status);
92                         
93                                         string tempBuf = buf4;
94                                         if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
95                                         delete buf4;
96
97                                         istringstream iss (tempBuf,istringstream::in);
98                         
99                                         Sequence temp(iss);  
100                                         if (temp.getName() != "") {
101                                                 templateSequences.push_back(temp);
102                                                 
103                                                 if (rdb->save) { rdb->referenceSeqs.push_back(temp); }
104                                                 
105                                                 //save longest base
106                                                 if (temp.getUnaligned().length() >= longest)  { longest = temp.getUnaligned().length()+1; }
107                         if (tempLength != 0) {
108                             if (tempLength != temp.getAligned().length()) { m->mothurOut("[ERROR]: template is not aligned, aborting.\n"); m->control_pressed=true; }
109                         }else { tempLength = temp.getAligned().length(); }
110                                         }
111                                 }
112                                 
113                                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
114                                 
115                                 MPI_File_close(&inMPI);
116                         
117                 #else
118                         ifstream fastaFile;
119                         m->openInputFile(fastaFileName, fastaFile);
120
121                         while (!fastaFile.eof()) {
122                                 Sequence temp(fastaFile);  m->gobble(fastaFile);
123                                 
124                                 if (m->control_pressed) {  templateSequences.clear(); break;  }
125                                 
126                                 if (temp.getName() != "") {
127                                         templateSequences.push_back(temp);
128                                         
129                                         if (rdb->save) { rdb->referenceSeqs.push_back(temp); }
130                                         
131                                         //save longest base
132                                         if (temp.getUnaligned().length() >= longest)  { longest = (temp.getUnaligned().length()+1); }
133                     
134                     if (tempLength != 0) {
135                         if (tempLength != temp.getAligned().length()) { m->mothurOut("[ERROR]: template is not aligned, aborting.\n"); m->control_pressed=true; }
136                     }else { tempLength = temp.getAligned().length(); }
137                                 }
138                         }
139                         fastaFile.close();
140                 #endif
141                 
142                         numSeqs = templateSequences.size();
143                         //all of this is elsewhere already!
144                         
145                         m->mothurOut("DONE.");
146                         m->mothurOutEndLine();  cout.flush();
147                         m->mothurOut("It took " + toString(time(NULL) - start) + " to read  " + toString(templateSequences.size()) + " sequences."); m->mothurOutEndLine();  
148
149                 }
150                 
151                 
152                 //in case you delete the seqs and then ask for them
153                 emptySequence = Sequence();
154                 emptySequence.setName("no_match");
155                 emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
156                 emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
157                 
158                 
159                 string kmerDBName;
160                 if(method == "kmer")                    {       
161                         search = new KmerDB(fastaFileName, kmerSize);                   
162                         
163                         #ifdef USE_MPI
164                         #else
165                                 kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
166                                 ifstream kmerFileTest(kmerDBName.c_str());
167                                 
168                                 if(kmerFileTest){       
169                                         bool GoodFile = m->checkReleaseVersion(kmerFileTest, m->getVersion());
170                                         if (GoodFile) {  needToGenerate = false;        }
171                                 }
172                         #endif
173                 }
174                 else if(method == "suffix")             {       search = new SuffixDB(numSeqs);                                                         }
175                 else if(method == "blast")              {       search = new BlastDB(fastaFileName.substr(0,fastaFileName.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch, "", threadID);     }
176                 else {
177                         method = "kmer";
178                         m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");
179                         m->mothurOutEndLine();
180                         search = new KmerDB(fastaFileName, 8);
181                 }
182                 
183                 if (!(m->control_pressed)) {
184                         if (needToGenerate) {
185                                 //add sequences to search 
186                                 for (int i = 0; i < templateSequences.size(); i++) {
187                                         search->addSequence(templateSequences[i]);
188                                         
189                                         if (m->control_pressed) {  templateSequences.clear(); break;  }
190                                 }
191                                 
192                                 if (m->control_pressed) {  templateSequences.clear();  }
193                                 
194                                 search->generateDB();
195                                 
196                         }else if ((method == "kmer") && (!needToGenerate)) {
197                                 ifstream kmerFileTest(kmerDBName.c_str());
198                                 search->readKmerDB(kmerFileTest);       
199                         }
200                 
201                         search->setNumSeqs(numSeqs);
202                 }
203                 
204         }
205         catch(exception& e) {
206                 m->errorOut(e, "AlignmentDB", "AlignmentDB");
207                 exit(1);
208         }
209 }
210 /**************************************************************************************************/
211 AlignmentDB::AlignmentDB(string s){              
212         try {                                                                                   
213                 m = MothurOut::getInstance();
214                 method = s;
215                 
216                 if(method == "suffix")          {       search = new SuffixDB();        }
217                 else if(method == "blast")      {       search = new BlastDB("", 0);            }
218                 else                                            {       search = new KmerDB();          }
219
220                                 
221                 //in case you delete the seqs and then ask for them
222                 emptySequence = Sequence();
223                 emptySequence.setName("no_match");
224                 emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
225                 emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
226                 
227         }
228         catch(exception& e) {
229                 m->errorOut(e, "AlignmentDB", "AlignmentDB");
230                 exit(1);
231         }
232 }
233 /**************************************************************************************************/
234 AlignmentDB::~AlignmentDB() {  delete search;   }
235 /**************************************************************************************************/
236 Sequence AlignmentDB::findClosestSequence(Sequence* seq) {
237         try{
238         
239                 vector<int> spot = search->findClosestSequences(seq, 1);
240         
241                 if (spot.size() != 0)   {               return templateSequences[spot[0]];      }
242                 else                                    {               return emptySequence;                           }
243                 
244         }
245         catch(exception& e) {
246                 m->errorOut(e, "AlignmentDB", "findClosestSequence");
247                 exit(1);
248         }
249 }
250 /**************************************************************************************************/
251
252
253
254
255
256