]> git.donarmstrong.com Git - mothur.git/blob - screenseqscommand.h
paralellized screen.seqs for windows.
[mothur.git] / screenseqscommand.h
1 #ifndef SCREENSEQSCOMMAND_H
2 #define SCREENSEQSCOMMAND_H
3
4 /*
5  *  screenseqscommand.h
6  *  Mothur
7  *
8  *  Created by Pat Schloss on 6/3/09.
9  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
10  *
11  */
12 #include "mothur.h"
13 #include "command.hpp"
14 #include "sequence.hpp"
15
16 class ScreenSeqsCommand : public Command {
17         
18 public:
19         ScreenSeqsCommand(string);
20         ScreenSeqsCommand();
21         ~ScreenSeqsCommand() {}
22         
23         vector<string> setParameters();
24         string getCommandName()                 { return "screen.seqs";                         }
25         string getCommandCategory()             { return "Sequence Processing";         }
26         string getHelpString(); 
27         string getCitation() { return "http://www.mothur.org/wiki/Screen.seqs"; }
28         string getDescription()         { return "enables you to keep sequences that fulfill certain user defined criteria"; }
29
30         int execute(); 
31         void help() { m->mothurOut(getHelpString()); }  
32         
33         
34 private:
35
36         struct linePair {
37                 unsigned long long start;
38                 unsigned long long end;
39                 linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
40         };
41
42         vector<linePair> lines;
43
44         int screenNameGroupFile(set<string>);
45         int screenGroupFile(set<string>);
46         int screenAlignReport(set<string>);
47         int screenQual(set<string>);
48         int screenTaxonomy(set<string>);
49         
50         int driver(linePair, string, string, string, set<string>&);
51         int createProcesses(string, string, string, set<string>&);
52         
53         #ifdef USE_MPI
54         int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long long>&, set<string>&);
55         #endif
56
57         bool abort;
58         string fastafile, namefile, groupfile, alignreport, outputDir, qualfile, taxonomy;
59         int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength, processors, criteria;
60         vector<string> outputNames;
61         vector<string> optimize;
62         map<string, int> nameMap;
63         int readNames();
64         
65         int getSummary(vector<unsigned long long>&);
66         int createProcessesCreateSummary(vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, string);
67         int driverCreateSummary(vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, string, linePair);        
68 };
69
70 /**************************************************************************************************/
71 //custom data structure for threads to use.
72 // This is passed by void pointer so it can be any data type
73 // that can be passed using a single void pointer (LPVOID).
74 struct sumData {
75         vector<int> startPosition;
76         vector<int> endPosition;
77         vector<int> seqLength; 
78         vector<int> ambigBases; 
79         vector<int> longHomoPolymer; 
80         string filename, namefile; 
81         unsigned long long start;
82         unsigned long long end;
83         int count;
84         MothurOut* m;
85         map<string, int> nameMap;
86         
87         
88         sumData(){}
89         sumData(string f, MothurOut* mout, unsigned long long st, unsigned long long en, string nf, map<string, int> nam) {
90                 filename = f;
91         namefile = nf;
92                 m = mout;
93                 start = st;
94                 end = en;
95                 nameMap = nam;
96                 count = 0;
97         }
98 };
99 /**************************************************************************************************/
100 //custom data structure for threads to use.
101 // This is passed by void pointer so it can be any data type
102 // that can be passed using a single void pointer (LPVOID).
103 struct sumScreenData {
104     int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength;
105         unsigned long long start;
106         unsigned long long end;
107         int count;
108         MothurOut* m;
109         string goodFName, badAccnosFName, filename;
110     set<string>* badSeqNames;
111         
112         
113         sumScreenData(){}
114         sumScreenData(int s, int e, int a, int h, int minl, int maxl, string f, MothurOut* mout, unsigned long long st, unsigned long long en, string gf, string bf, set<string>* bn) {
115                 startPos = s;
116                 endPos = e;
117                 minLength = minl;
118         maxLength = maxl;
119                 maxAmbig = a;
120                 maxHomoP = h;
121                 filename = f;
122         goodFName = gf;
123         badAccnosFName = bf;
124                 m = mout;
125                 start = st;
126                 end = en;
127                 badSeqNames = bn;
128                 count = 0;
129         }
130 };
131
132
133 /**************************************************************************************************/
134 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
135 #else
136 static DWORD WINAPI MySumThreadFunction(LPVOID lpParam){ 
137         sumData* pDataArray;
138         pDataArray = (sumData*)lpParam;
139         
140         try {
141                 ifstream in;
142                 pDataArray->m->openInputFile(pDataArray->filename, in);
143         
144                 //print header if you are process 0
145                 if ((pDataArray->start == 0) || (pDataArray->start == 1)) {
146                         in.seekg(0);
147                 }else { //this accounts for the difference in line endings. 
148                         in.seekg(pDataArray->start-1); pDataArray->m->gobble(in); 
149                 }
150                 
151                 pDataArray->count = pDataArray->end;
152                 for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
153                         
154                         if (pDataArray->m->control_pressed) { in.close();  pDataArray->count = 1; return 1; }
155                         
156                         Sequence current(in); pDataArray->m->gobble(in); 
157                         
158                         if (current.getName() != "") {
159                                 
160                                 int num = 1;
161                                 if (pDataArray->namefile != "") {
162                                         //make sure this sequence is in the namefile, else error 
163                                         map<string, int>::iterator it = pDataArray->nameMap.find(current.getName());
164                                         
165                                         if (it == pDataArray->nameMap.end()) { pDataArray->m->mothurOut("[ERROR]: " + current.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); pDataArray->m->control_pressed = true; }
166                                         else { num = it->second; }
167                                 }
168                                 
169                                 //for each sequence this sequence represents
170                                 for (int i = 0; i < num; i++) {
171                                         pDataArray->startPosition.push_back(current.getStartPos());
172                                         pDataArray->endPosition.push_back(current.getEndPos());
173                                         pDataArray->seqLength.push_back(current.getNumBases());
174                                         pDataArray->ambigBases.push_back(current.getAmbigBases());
175                                         pDataArray->longHomoPolymer.push_back(current.getLongHomoPolymer());
176                                 }
177             }
178                 }
179                 
180                 in.close();
181                 
182                 return 0;
183                 
184         }
185         catch(exception& e) {
186                 pDataArray->m->errorOut(e, "ScreenSeqsCommand", "MySumThreadFunction");
187                 exit(1);
188         }
189
190
191 /**************************************************************************************************/
192
193 static DWORD WINAPI MySumScreenThreadFunction(LPVOID lpParam){ 
194         sumScreenData* pDataArray;
195         pDataArray = (sumScreenData*)lpParam;
196         
197         try {
198         
199         ofstream goodFile;
200                 pDataArray->m->openOutputFile(pDataArray->goodFName, goodFile);
201                 
202                 ofstream badAccnosFile;
203                 pDataArray->m->openOutputFile(pDataArray->badAccnosFName, badAccnosFile);
204                 
205                 ifstream in;
206                 pDataArray->m->openInputFile(pDataArray->filename, in);
207         
208                 //print header if you are process 0
209                 if ((pDataArray->start == 0) || (pDataArray->start == 1)) {
210                         in.seekg(0);
211                 }else { //this accounts for the difference in line endings. 
212                         in.seekg(pDataArray->start-1); pDataArray->m->gobble(in); 
213                 }
214                 
215                 pDataArray->count = pDataArray->end;
216                 for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
217                         
218                         if (pDataArray->m->control_pressed) { in.close(); badAccnosFile.close(); goodFile.close(); pDataArray->count = 1; return 1; }
219                         
220                         Sequence currSeq(in); pDataArray->m->gobble(in); 
221                         
222                         if (currSeq.getName() != "") {
223                                 bool goodSeq = 1;               //      innocent until proven guilty
224                                 if(goodSeq == 1 && pDataArray->startPos != -1 && pDataArray->startPos < currSeq.getStartPos())                  {       goodSeq = 0;    }
225                                 if(goodSeq == 1 && pDataArray->endPos != -1 && pDataArray->endPos > currSeq.getEndPos())                                {       goodSeq = 0;    }
226                                 if(goodSeq == 1 && pDataArray->maxAmbig != -1 && pDataArray->maxAmbig < currSeq.getAmbigBases())                {       goodSeq = 0;    }
227                                 if(goodSeq == 1 && pDataArray->maxHomoP != -1 && pDataArray->maxHomoP < currSeq.getLongHomoPolymer())   {       goodSeq = 0;    }
228                                 if(goodSeq == 1 && pDataArray->minLength != -1 && pDataArray->minLength > currSeq.getNumBases())                {       goodSeq = 0;    }
229                                 if(goodSeq == 1 && pDataArray->maxLength != -1 && pDataArray->maxLength < currSeq.getNumBases())                {       goodSeq = 0;    }
230                                 
231                                 if(goodSeq == 1){
232                                         currSeq.printSequence(goodFile);        
233                                 }
234                                 else{
235                                         badAccnosFile << currSeq.getName() << endl;
236                                         pDataArray->badSeqNames->insert(currSeq.getName());
237                                 }
238     
239                         }               
240             //report progress
241                         if((i+1) % 100 == 0){   pDataArray->m->mothurOut("Processing sequence: " + toString(i+1)); pDataArray->m->mothurOutEndLine();           }
242                 }
243                 //report progress
244                 if((pDataArray->count) % 100 != 0){     pDataArray->m->mothurOut("Processing sequence: " + toString(pDataArray->count)); pDataArray->m->mothurOutEndLine();             }
245                 
246
247                 
248                 in.close();
249         goodFile.close();
250         badAccnosFile.close();
251                 
252                 return 0;
253                 
254         }
255         catch(exception& e) {
256                 pDataArray->m->errorOut(e, "ScreenSeqsCommand", "MySumScreenThreadFunction");
257                 exit(1);
258         }
259
260
261 #endif
262
263 /**************************************************************************************************/
264
265
266
267 #endif