]> git.donarmstrong.com Git - mothur.git/blob - screenseqscommand.h
changed reading of name file to use buffered reads. note the splitAtWhiteSpace functi...
[mothur.git] / screenseqscommand.h
1 #ifndef SCREENSEQSCOMMAND_H
2 #define SCREENSEQSCOMMAND_H
3
4 /*
5  *  screenseqscommand.h
6  *  Mothur
7  *
8  *  Created by Pat Schloss on 6/3/09.
9  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
10  *
11  */
12 #include "mothur.h"
13 #include "command.hpp"
14 #include "sequence.hpp"
15
16 class ScreenSeqsCommand : public Command {
17         
18 public:
19         ScreenSeqsCommand(string);
20         ScreenSeqsCommand();
21         ~ScreenSeqsCommand() {}
22         
23         vector<string> setParameters();
24         string getCommandName()                 { return "screen.seqs";                         }
25         string getCommandCategory()             { return "Sequence Processing";         }
26         string getHelpString(); 
27         string getCitation() { return "http://www.mothur.org/wiki/Screen.seqs"; }
28         string getDescription()         { return "enables you to keep sequences that fulfill certain user defined criteria"; }
29
30         int execute(); 
31         void help() { m->mothurOut(getHelpString()); }  
32         
33         
34 private:
35
36         struct linePair {
37                 unsigned long long start;
38                 unsigned long long end;
39                 linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
40         };
41
42         vector<linePair> lines;
43
44         int screenNameGroupFile(set<string>);
45         int screenGroupFile(set<string>);
46         int screenAlignReport(set<string>);
47         int screenQual(set<string>);
48         int screenTaxonomy(set<string>);
49         
50         int driver(linePair, string, string, string, set<string>&);
51         int createProcesses(string, string, string, set<string>&);
52         
53         #ifdef USE_MPI
54         int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long long>&, set<string>&);
55         #endif
56
57         bool abort;
58         string fastafile, namefile, groupfile, alignreport, outputDir, qualfile, taxonomy;
59         int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength, processors, criteria;
60         vector<string> outputNames;
61         vector<string> optimize;
62         map<string, int> nameMap;
63         
64         int getSummary(vector<unsigned long long>&);
65         int createProcessesCreateSummary(vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, string);
66         int driverCreateSummary(vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, string, linePair);        
67 };
68
69 /**************************************************************************************************/
70 //custom data structure for threads to use.
71 // This is passed by void pointer so it can be any data type
72 // that can be passed using a single void pointer (LPVOID).
73 struct sumData {
74         vector<int> startPosition;
75         vector<int> endPosition;
76         vector<int> seqLength; 
77         vector<int> ambigBases; 
78         vector<int> longHomoPolymer; 
79         string filename, namefile; 
80         unsigned long long start;
81         unsigned long long end;
82         int count;
83         MothurOut* m;
84         map<string, int> nameMap;
85         
86         
87         sumData(){}
88         sumData(string f, MothurOut* mout, unsigned long long st, unsigned long long en, string nf, map<string, int> nam) {
89                 filename = f;
90         namefile = nf;
91                 m = mout;
92                 start = st;
93                 end = en;
94                 nameMap = nam;
95                 count = 0;
96         }
97 };
98 /**************************************************************************************************/
99 //custom data structure for threads to use.
100 // This is passed by void pointer so it can be any data type
101 // that can be passed using a single void pointer (LPVOID).
102 struct sumScreenData {
103     int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength;
104         unsigned long long start;
105         unsigned long long end;
106         int count;
107         MothurOut* m;
108         string goodFName, badAccnosFName, filename;
109     set<string> badSeqNames;
110         
111         
112         sumScreenData(){}
113         sumScreenData(int s, int e, int a, int h, int minl, int maxl, string f, MothurOut* mout, unsigned long long st, unsigned long long en, string gf, string bf) {
114                 startPos = s;
115                 endPos = e;
116                 minLength = minl;
117         maxLength = maxl;
118                 maxAmbig = a;
119                 maxHomoP = h;
120                 filename = f;
121         goodFName = gf;
122         badAccnosFName = bf;
123                 m = mout;
124                 start = st;
125                 end = en;
126                 count = 0;
127         }
128 };
129
130
131 /**************************************************************************************************/
132 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
133 #else
134 static DWORD WINAPI MySumThreadFunction(LPVOID lpParam){ 
135         sumData* pDataArray;
136         pDataArray = (sumData*)lpParam;
137         
138         try {
139                 ifstream in;
140                 pDataArray->m->openInputFile(pDataArray->filename, in);
141         
142                 //print header if you are process 0
143                 if ((pDataArray->start == 0) || (pDataArray->start == 1)) {
144                         in.seekg(0);
145                 }else { //this accounts for the difference in line endings. 
146                         in.seekg(pDataArray->start-1); pDataArray->m->gobble(in); 
147                 }
148                 
149                 pDataArray->count = pDataArray->end;
150                 for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
151                         
152                         if (pDataArray->m->control_pressed) { in.close();  pDataArray->count = 1; return 1; }
153                         
154                         Sequence current(in); pDataArray->m->gobble(in); 
155                         
156                         if (current.getName() != "") {
157                                 
158                                 int num = 1;
159                                 if (pDataArray->namefile != "") {
160                                         //make sure this sequence is in the namefile, else error 
161                                         map<string, int>::iterator it = pDataArray->nameMap.find(current.getName());
162                                         
163                                         if (it == pDataArray->nameMap.end()) { pDataArray->m->mothurOut("[ERROR]: " + current.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); pDataArray->m->control_pressed = true; }
164                                         else { num = it->second; }
165                                 }
166                                 
167                                 //for each sequence this sequence represents
168                                 for (int i = 0; i < num; i++) {
169                                         pDataArray->startPosition.push_back(current.getStartPos());
170                                         pDataArray->endPosition.push_back(current.getEndPos());
171                                         pDataArray->seqLength.push_back(current.getNumBases());
172                                         pDataArray->ambigBases.push_back(current.getAmbigBases());
173                                         pDataArray->longHomoPolymer.push_back(current.getLongHomoPolymer());
174                                 }
175             }
176                 }
177                 
178                 in.close();
179                 
180                 return 0;
181                 
182         }
183         catch(exception& e) {
184                 pDataArray->m->errorOut(e, "ScreenSeqsCommand", "MySumThreadFunction");
185                 exit(1);
186         }
187
188
189 /**************************************************************************************************/
190
191 static DWORD WINAPI MySumScreenThreadFunction(LPVOID lpParam){ 
192         sumScreenData* pDataArray;
193         pDataArray = (sumScreenData*)lpParam;
194         
195         try {
196         
197         ofstream goodFile;
198                 pDataArray->m->openOutputFile(pDataArray->goodFName, goodFile);
199                 
200                 ofstream badAccnosFile;
201                 pDataArray->m->openOutputFile(pDataArray->badAccnosFName, badAccnosFile);
202                 
203                 ifstream in;
204                 pDataArray->m->openInputFile(pDataArray->filename, in);
205         
206                 //print header if you are process 0
207                 if ((pDataArray->start == 0) || (pDataArray->start == 1)) {
208                         in.seekg(0);
209                 }else { //this accounts for the difference in line endings. 
210                         in.seekg(pDataArray->start-1); pDataArray->m->gobble(in); 
211                 }
212                 
213                 pDataArray->count = pDataArray->end;
214                 for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
215                         
216                         if (pDataArray->m->control_pressed) { in.close(); badAccnosFile.close(); goodFile.close(); pDataArray->count = 1; return 1; }
217                         
218                         Sequence currSeq(in); pDataArray->m->gobble(in); 
219                         
220                         if (currSeq.getName() != "") {
221                                 bool goodSeq = 1;               //      innocent until proven guilty
222                                 if(goodSeq == 1 && pDataArray->startPos != -1 && pDataArray->startPos < currSeq.getStartPos())                  {       goodSeq = 0;    }
223                                 if(goodSeq == 1 && pDataArray->endPos != -1 && pDataArray->endPos > currSeq.getEndPos())                                {       goodSeq = 0;    }
224                                 if(goodSeq == 1 && pDataArray->maxAmbig != -1 && pDataArray->maxAmbig < currSeq.getAmbigBases())                {       goodSeq = 0;    }
225                                 if(goodSeq == 1 && pDataArray->maxHomoP != -1 && pDataArray->maxHomoP < currSeq.getLongHomoPolymer())   {       goodSeq = 0;    }
226                                 if(goodSeq == 1 && pDataArray->minLength != -1 && pDataArray->minLength > currSeq.getNumBases())                {       goodSeq = 0;    }
227                                 if(goodSeq == 1 && pDataArray->maxLength != -1 && pDataArray->maxLength < currSeq.getNumBases())                {       goodSeq = 0;    }
228                                 
229                                 if(goodSeq == 1){
230                                         currSeq.printSequence(goodFile);        
231                                 }
232                                 else{
233                                         badAccnosFile << currSeq.getName() << endl;
234                                         pDataArray->badSeqNames.insert(currSeq.getName());
235                                 }
236     
237                         }               
238             //report progress
239                         if((i+1) % 100 == 0){   pDataArray->m->mothurOut("Processing sequence: " + toString(i+1)); pDataArray->m->mothurOutEndLine();           }
240                 }
241                 //report progress
242                 if((pDataArray->count) % 100 != 0){     pDataArray->m->mothurOut("Processing sequence: " + toString(pDataArray->count)); pDataArray->m->mothurOutEndLine();             }
243                 
244
245                 
246                 in.close();
247         goodFile.close();
248         badAccnosFile.close();
249                 
250                 return 0;
251                 
252         }
253         catch(exception& e) {
254                 pDataArray->m->errorOut(e, "ScreenSeqsCommand", "MySumScreenThreadFunction");
255                 exit(1);
256         }
257
258
259 #endif
260
261 /**************************************************************************************************/
262
263
264
265 #endif