]> git.donarmstrong.com Git - mothur.git/blob - alignmentdb.cpp
changed tags on temp blast db to include template name
[mothur.git] / alignmentdb.cpp
1 /*
2  *  alignmentdb.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 11/4/09.
6  *  Copyright 2009 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "alignmentdb.h"
11 #include "kmerdb.hpp"
12 #include "suffixdb.hpp"
13 #include "blastdb.hpp"
14
15
16 /**************************************************************************************************/
17 AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch){            //      This assumes that the template database is in fasta format, may 
18         try {                                                                                   //      need to alter this in the future?
19                 m = MothurOut::getInstance();
20                 longest = 0;
21                 method = s;
22                 bool needToGenerate = true;
23                 
24                 m->mothurOutEndLine();
25                 m->mothurOut("Reading in the " + fastaFileName + " template sequences...\t");   cout.flush();
26                 
27                 #ifdef USE_MPI  
28                         int pid, processors;
29                         vector<unsigned long int> positions;
30                 
31                         MPI_Status status; 
32                         MPI_File inMPI;
33                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
34                         MPI_Comm_size(MPI_COMM_WORLD, &processors);
35                         int tag = 2001;
36         
37                         char inFileName[1024];
38                         strcpy(inFileName, fastaFileName.c_str());
39         
40                         MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
41                         
42                         if (pid == 0) {
43                                 positions = m->setFilePosFasta(fastaFileName, numSeqs); //fills MPIPos, returns numSeqs
44
45                                 //send file positions to all processes
46                                 for(int i = 1; i < processors; i++) { 
47                                         MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
48                                         MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
49                                 }
50                         }else{
51                                 MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
52                                 positions.resize(numSeqs+1);
53                                 MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
54                         }
55                 
56                         //read file 
57                         for(int i=0;i<numSeqs;i++){
58                                 
59                                 if (m->control_pressed) {  templateSequences.clear(); break;  }
60                                 
61                                 //read next sequence
62                                 int length = positions[i+1] - positions[i];
63                                 char* buf4 = new char[length];
64                         
65                                 MPI_File_read_at(inMPI, positions[i], buf4, length, MPI_CHAR, &status);
66                 
67                                 string tempBuf = buf4;
68                                 if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
69                                 delete buf4;
70
71                                 istringstream iss (tempBuf,istringstream::in);
72                 
73                                 Sequence temp(iss);  
74                                 if (temp.getName() != "") {
75                                         templateSequences.push_back(temp);
76                                         //save longest base
77                                         if (temp.getUnaligned().length() >= longest)  { longest = temp.getUnaligned().length()+1; }
78                                 }
79                         }
80                         
81                         MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
82                         
83                         MPI_File_close(&inMPI);
84                 
85         #else
86                 ifstream fastaFile;
87                 m->openInputFile(fastaFileName, fastaFile);
88
89                 while (!fastaFile.eof()) {
90                         Sequence temp(fastaFile);  m->gobble(fastaFile);
91                         
92                         if (m->control_pressed) {  templateSequences.clear(); break;  }
93                         
94                         if (temp.getName() != "") {
95                                 templateSequences.push_back(temp);
96                                 //save longest base
97                                 if (temp.getUnaligned().length() >= longest)  { longest = (temp.getUnaligned().length()+1); }
98                         }
99                 }
100                 fastaFile.close();
101         #endif
102         
103                 numSeqs = templateSequences.size();
104                 //all of this is elsewhere already!
105                 
106                 m->mothurOut("DONE.");
107                 m->mothurOutEndLine();  cout.flush();
108                 
109                 //in case you delete the seqs and then ask for them
110                 emptySequence = Sequence();
111                 emptySequence.setName("no_match");
112                 emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
113                 emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
114                 
115                 
116                 string kmerDBName;
117                 if(method == "kmer")                    {       
118                         search = new KmerDB(fastaFileName, kmerSize);                   
119                         
120                         #ifdef USE_MPI
121                         #else
122                                 kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
123                                 ifstream kmerFileTest(kmerDBName.c_str());
124                                 
125                                 if(kmerFileTest){       
126                                         bool GoodFile = m->checkReleaseVersion(kmerFileTest, m->getVersion());
127                                         if (GoodFile) {  needToGenerate = false;        }
128                                 }
129                         #endif
130                 }
131                 else if(method == "suffix")             {       search = new SuffixDB(numSeqs);                                                         }
132                 else if(method == "blast")              {       search = new BlastDB(fastaFileName.substr(0,fastaFileName.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch);   }
133                 else {
134                         m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");
135                         m->mothurOutEndLine();
136                         search = new KmerDB(fastaFileName, 8);
137                 }
138                 
139                 if (!(m->control_pressed)) {
140                         if (needToGenerate) {
141                                 //add sequences to search 
142                                 for (int i = 0; i < templateSequences.size(); i++) {
143                                         search->addSequence(templateSequences[i]);
144                                         
145                                         if (m->control_pressed) {  templateSequences.clear(); break;  }
146                                 }
147                                 
148                                 if (m->control_pressed) {  templateSequences.clear();  }
149                                 
150                                 search->generateDB();
151                                 
152                         }else if ((method == "kmer") && (!needToGenerate)) {
153                                 ifstream kmerFileTest(kmerDBName.c_str());
154                                 search->readKmerDB(kmerFileTest);       
155                         }
156                 
157                         search->setNumSeqs(numSeqs);
158                 }
159         }
160         catch(exception& e) {
161                 m->errorOut(e, "AlignmentDB", "AlignmentDB");
162                 exit(1);
163         }
164 }
165 /**************************************************************************************************/
166 AlignmentDB::AlignmentDB(string s){              
167         try {                                                                                   
168                 m = MothurOut::getInstance();
169                 method = s;
170                 
171                 if(method == "suffix")          {       search = new SuffixDB();        }
172                 else if(method == "blast")      {       search = new BlastDB();         }
173                 else                                            {       search = new KmerDB();          }
174
175                                 
176                 //in case you delete the seqs and then ask for them
177                 emptySequence = Sequence();
178                 emptySequence.setName("no_match");
179                 emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
180                 emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
181                 
182         }
183         catch(exception& e) {
184                 m->errorOut(e, "AlignmentDB", "AlignmentDB");
185                 exit(1);
186         }
187 }
188 /**************************************************************************************************/
189 AlignmentDB::~AlignmentDB() {  delete search;   }
190 /**************************************************************************************************/
191 Sequence AlignmentDB::findClosestSequence(Sequence* seq) {
192         try{
193         
194                 vector<int> spot = search->findClosestSequences(seq, 1);
195
196                 if (spot.size() != 0)   {               return templateSequences[spot[0]];      }
197                 else                                    {               return emptySequence;                           }
198                 
199         }
200         catch(exception& e) {
201                 m->errorOut(e, "AlignmentDB", "findClosestSequence");
202                 exit(1);
203         }
204 }
205 /**************************************************************************************************/
206
207
208
209
210
211