]> git.donarmstrong.com Git - mothur.git/blob - alignmentdb.cpp
added cluster.split command
[mothur.git] / alignmentdb.cpp
1 /*\r
2  *  alignmentdb.cpp\r
3  *  Mothur\r
4  *\r
5  *  Created by westcott on 11/4/09.\r
6  *  Copyright 2009 Schloss Lab. All rights reserved.\r
7  *\r
8  */\r
9 \r
10 #include "alignmentdb.h"\r
11 #include "kmerdb.hpp"\r
12 #include "suffixdb.hpp"\r
13 #include "blastdb.hpp"\r
14 \r
15 \r
16 /**************************************************************************************************/\r
17 AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch){            //      This assumes that the template database is in fasta format, may \r
18         try {                                                                                   //      need to alter this in the future?\r
19                 m = MothurOut::getInstance();\r
20                 longest = 0;\r
21                 method = s;\r
22                 bool needToGenerate = true;\r
23                 \r
24                 m->mothurOutEndLine();\r
25                 m->mothurOut("Reading in the " + fastaFileName + " template sequences...\t");   cout.flush();\r
26                 \r
27                 #ifdef USE_MPI  \r
28                         int pid, processors;\r
29                         vector<long> positions;\r
30                 \r
31                         MPI_Status status; \r
32                         MPI_File inMPI;\r
33                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are\r
34                         MPI_Comm_size(MPI_COMM_WORLD, &processors);\r
35                         int tag = 2001;\r
36         \r
37                         char inFileName[1024];\r
38                         strcpy(inFileName, fastaFileName.c_str());\r
39         \r
40                         MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer\r
41                         \r
42                         if (pid == 0) {\r
43                                 positions = setFilePosFasta(fastaFileName, numSeqs); //fills MPIPos, returns numSeqs\r
44 \r
45                                 //send file positions to all processes\r
46                                 for(int i = 1; i < processors; i++) { \r
47                                         MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);\r
48                                         MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);\r
49                                 }\r
50                         }else{\r
51                                 MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);\r
52                                 positions.resize(numSeqs+1);\r
53                                 MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);\r
54                         }\r
55                 \r
56                         //read file \r
57                         for(int i=0;i<numSeqs;i++){\r
58                                 \r
59                                 if (m->control_pressed) {  templateSequences.clear(); break;  }\r
60                                 \r
61                                 //read next sequence\r
62                                 int length = positions[i+1] - positions[i];\r
63                                 char* buf4 = new char[length];\r
64                         \r
65                                 MPI_File_read_at(inMPI, positions[i], buf4, length, MPI_CHAR, &status);\r
66                 \r
67                                 string tempBuf = buf4;\r
68                                 if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }\r
69                                 delete buf4;\r
70 \r
71                                 istringstream iss (tempBuf,istringstream::in);\r
72                 \r
73                                 Sequence temp(iss);  \r
74                                 if (temp.getName() != "") {\r
75                                         templateSequences.push_back(temp);\r
76                                         //save longest base\r
77                                         if (temp.getUnaligned().length() > longest)  { longest = temp.getUnaligned().length()+1; }\r
78                                 }\r
79                         }\r
80                         \r
81                         MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case\r
82                         \r
83                         MPI_File_close(&inMPI);\r
84                 \r
85         #else\r
86                 ifstream fastaFile;\r
87                 openInputFile(fastaFileName, fastaFile);\r
88 \r
89                 while (!fastaFile.eof()) {\r
90                         Sequence temp(fastaFile);  gobble(fastaFile);\r
91                         \r
92                         if (m->control_pressed) {  templateSequences.clear(); break;  }\r
93                         \r
94                         if (temp.getName() != "") {\r
95                                 templateSequences.push_back(temp);\r
96                                 //save longest base\r
97                                 if (temp.getUnaligned().length() > longest)  { longest = temp.getUnaligned().length()+1; }\r
98                         }\r
99                 }\r
100                 fastaFile.close();\r
101                 \r
102         #endif\r
103         \r
104                 numSeqs = templateSequences.size();\r
105                 //all of this is elsewhere already!\r
106                 \r
107                 m->mothurOut("DONE.");\r
108                 m->mothurOutEndLine();  cout.flush();\r
109                 \r
110                 //in case you delete the seqs and then ask for them\r
111                 emptySequence = Sequence();\r
112                 emptySequence.setName("no_match");\r
113                 emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");\r
114                 emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");\r
115                 \r
116                 \r
117                 string kmerDBName;\r
118                 if(method == "kmer")                    {       \r
119                         search = new KmerDB(fastaFileName, kmerSize);                   \r
120                         \r
121                         #ifdef USE_MPI\r
122                         #else\r
123                                 kmerDBName = fastaFileName.substr(0,fastaFileName.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";\r
124                                 ifstream kmerFileTest(kmerDBName.c_str());\r
125                         \r
126                                 if(kmerFileTest){       needToGenerate = false;         }\r
127                         #endif\r
128                 }\r
129                 else if(method == "suffix")             {       search = new SuffixDB(numSeqs);                                                         }\r
130                 else if(method == "blast")              {       search = new BlastDB(gapOpen, gapExtend, match, misMatch);      }\r
131                 else {\r
132                         m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");\r
133                         m->mothurOutEndLine();\r
134                         search = new KmerDB(fastaFileName, 8);\r
135                 }\r
136                 \r
137                 if (!(m->control_pressed)) {\r
138                         if (needToGenerate) {\r
139                                 //add sequences to search \r
140                                 for (int i = 0; i < templateSequences.size(); i++) {\r
141                                         search->addSequence(templateSequences[i]);\r
142                                         \r
143                                         if (m->control_pressed) {  templateSequences.clear(); break;  }\r
144                                 }\r
145                                 \r
146                                 if (m->control_pressed) {  templateSequences.clear();  }\r
147                                 \r
148                                 search->generateDB();\r
149                                 \r
150                         }else if ((method == "kmer") && (!needToGenerate)) {\r
151                                 ifstream kmerFileTest(kmerDBName.c_str());\r
152                                 search->readKmerDB(kmerFileTest);       \r
153                         }\r
154                         \r
155                         search->setNumSeqs(numSeqs);\r
156                 }\r
157         }\r
158         catch(exception& e) {\r
159                 m->errorOut(e, "AlignmentDB", "AlignmentDB");\r
160                 exit(1);\r
161         }\r
162 }\r
163 /**************************************************************************************************/\r
164 AlignmentDB::AlignmentDB(string s){              \r
165         try {                                                                                   \r
166                 m = MothurOut::getInstance();\r
167                 method = s;\r
168                 \r
169                 if(method == "suffix")          {       search = new SuffixDB();        }\r
170                 else if(method == "blast")      {       search = new BlastDB();         }\r
171                 else                                            {       search = new KmerDB();          }\r
172 \r
173                                 \r
174                 //in case you delete the seqs and then ask for them\r
175                 emptySequence = Sequence();\r
176                 emptySequence.setName("no_match");\r
177                 emptySequence.setUnaligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");\r
178                 emptySequence.setAligned("XXXXXXXXXXXXXXXXXXXXXXXXXXXXX");\r
179                 \r
180         }\r
181         catch(exception& e) {\r
182                 m->errorOut(e, "AlignmentDB", "AlignmentDB");\r
183                 exit(1);\r
184         }\r
185 }\r
186 /**************************************************************************************************/\r
187 AlignmentDB::~AlignmentDB() {  delete search;   }\r
188 /**************************************************************************************************/\r
189 Sequence AlignmentDB::findClosestSequence(Sequence* seq) {\r
190         try{\r
191         \r
192                 vector<int> spot = search->findClosestSequences(seq, 1);\r
193 \r
194                 if (spot.size() != 0)   {               return templateSequences[spot[0]];      }\r
195                 else                                    {               return emptySequence;                           }\r
196                 \r
197         }\r
198         catch(exception& e) {\r
199                 m->errorOut(e, "AlignmentDB", "findClosestSequence");\r
200                 exit(1);\r
201         }\r
202 }\r
203 /**************************************************************************************************/\r
204 \r
205 \r
206 \r
207 \r
208 \r
209 \r