]> git.donarmstrong.com Git - mothur.git/blob - screenseqscommand.h
Revert to previous commit
[mothur.git] / screenseqscommand.h
1 #ifndef SCREENSEQSCOMMAND_H
2 #define SCREENSEQSCOMMAND_H
3
4 /*
5  *  screenseqscommand.h
6  *  Mothur
7  *
8  *  Created by Pat Schloss on 6/3/09.
9  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
10  *
11  */
12 #include "mothur.h"
13 #include "command.hpp"
14 #include "sequence.hpp"
15
16 class ScreenSeqsCommand : public Command {
17         
18 public:
19         ScreenSeqsCommand(string);
20         ScreenSeqsCommand();
21         ~ScreenSeqsCommand() {}
22         
23         vector<string> setParameters();
24         string getCommandName()                 { return "screen.seqs";                         }
25         string getCommandCategory()             { return "Sequence Processing";         }
26         string getHelpString(); 
27         string getCitation() { return "http://www.mothur.org/wiki/Screen.seqs"; }
28         string getDescription()         { return "enables you to keep sequences that fulfill certain user defined criteria"; }
29
30         int execute(); 
31         void help() { m->mothurOut(getHelpString()); }  
32         
33         
34 private:
35
36         struct linePair {
37                 unsigned long long start;
38                 unsigned long long end;
39                 linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
40         };
41
42         vector<linePair> lines;
43
44         int screenNameGroupFile(set<string>);
45         int screenGroupFile(set<string>);
46         int screenAlignReport(set<string>);
47         int screenQual(set<string>);
48         int screenTaxonomy(set<string>);
49         
50         int driver(linePair, string, string, string, set<string>&);
51         int createProcesses(string, string, string, set<string>&);
52         
53         #ifdef USE_MPI
54         int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long long>&, set<string>&);
55         #endif
56
57         bool abort;
58         string fastafile, namefile, groupfile, alignreport, outputDir, qualfile, taxonomy;
59         int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength, processors, criteria;
60         vector<string> outputNames;
61         vector<string> optimize;
62         map<string, int> nameMap;
63         int readNames();
64         
65         int getSummary(vector<unsigned long long>&);
66         int createProcessesCreateSummary(vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, string);
67         int driverCreateSummary(vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, string, linePair);        
68 };
69
70 /**************************************************************************************************/
71 //custom data structure for threads to use.
72 // This is passed by void pointer so it can be any data type
73 // that can be passed using a single void pointer (LPVOID).
74 struct sumData {
75         vector<int> startPosition;
76         vector<int> endPosition;
77         vector<int> seqLength; 
78         vector<int> ambigBases; 
79         vector<int> longHomoPolymer; 
80         string filename, namefile; 
81         unsigned long long start;
82         unsigned long long end;
83         int count;
84         MothurOut* m;
85         map<string, int> nameMap;
86         
87         
88         sumData(){}
89         sumData(string f, MothurOut* mout, unsigned long long st, unsigned long long en, string nf, map<string, int> nam) {
90                 filename = f;
91         namefile = nf;
92                 m = mout;
93                 start = st;
94                 end = en;
95                 nameMap = nam;
96                 count = 0;
97         }
98 };
99 /**************************************************************************************************/
100 //custom data structure for threads to use.
101 // This is passed by void pointer so it can be any data type
102 // that can be passed using a single void pointer (LPVOID).
103 struct sumScreenData {
104     int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength;
105         unsigned long long start;
106         unsigned long long end;
107         int count;
108         MothurOut* m;
109         string goodFName, badAccnosFName, filename;
110     set<string> badSeqNames;
111         
112         
113         sumScreenData(){}
114         sumScreenData(int s, int e, int a, int h, int minl, int maxl, string f, MothurOut* mout, unsigned long long st, unsigned long long en, string gf, string bf) {
115                 startPos = s;
116                 endPos = e;
117                 minLength = minl;
118         maxLength = maxl;
119                 maxAmbig = a;
120                 maxHomoP = h;
121                 filename = f;
122         goodFName = gf;
123         badAccnosFName = bf;
124                 m = mout;
125                 start = st;
126                 end = en;
127                 count = 0;
128         }
129 };
130
131
132 /**************************************************************************************************/
133 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
134 #else
135 static DWORD WINAPI MySumThreadFunction(LPVOID lpParam){ 
136         sumData* pDataArray;
137         pDataArray = (sumData*)lpParam;
138         
139         try {
140                 ifstream in;
141                 pDataArray->m->openInputFile(pDataArray->filename, in);
142         
143                 //print header if you are process 0
144                 if ((pDataArray->start == 0) || (pDataArray->start == 1)) {
145                         in.seekg(0);
146                 }else { //this accounts for the difference in line endings. 
147                         in.seekg(pDataArray->start-1); pDataArray->m->gobble(in); 
148                 }
149                 
150                 pDataArray->count = pDataArray->end;
151                 for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
152                         
153                         if (pDataArray->m->control_pressed) { in.close();  pDataArray->count = 1; return 1; }
154                         
155                         Sequence current(in); pDataArray->m->gobble(in); 
156                         
157                         if (current.getName() != "") {
158                                 
159                                 int num = 1;
160                                 if (pDataArray->namefile != "") {
161                                         //make sure this sequence is in the namefile, else error 
162                                         map<string, int>::iterator it = pDataArray->nameMap.find(current.getName());
163                                         
164                                         if (it == pDataArray->nameMap.end()) { pDataArray->m->mothurOut("[ERROR]: " + current.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); pDataArray->m->control_pressed = true; }
165                                         else { num = it->second; }
166                                 }
167                                 
168                                 //for each sequence this sequence represents
169                                 for (int i = 0; i < num; i++) {
170                                         pDataArray->startPosition.push_back(current.getStartPos());
171                                         pDataArray->endPosition.push_back(current.getEndPos());
172                                         pDataArray->seqLength.push_back(current.getNumBases());
173                                         pDataArray->ambigBases.push_back(current.getAmbigBases());
174                                         pDataArray->longHomoPolymer.push_back(current.getLongHomoPolymer());
175                                 }
176             }
177                 }
178                 
179                 in.close();
180                 
181                 return 0;
182                 
183         }
184         catch(exception& e) {
185                 pDataArray->m->errorOut(e, "ScreenSeqsCommand", "MySumThreadFunction");
186                 exit(1);
187         }
188
189
190 /**************************************************************************************************/
191
192 static DWORD WINAPI MySumScreenThreadFunction(LPVOID lpParam){ 
193         sumScreenData* pDataArray;
194         pDataArray = (sumScreenData*)lpParam;
195         
196         try {
197         
198         ofstream goodFile;
199                 pDataArray->m->openOutputFile(pDataArray->goodFName, goodFile);
200                 
201                 ofstream badAccnosFile;
202                 pDataArray->m->openOutputFile(pDataArray->badAccnosFName, badAccnosFile);
203                 
204                 ifstream in;
205                 pDataArray->m->openInputFile(pDataArray->filename, in);
206         
207                 //print header if you are process 0
208                 if ((pDataArray->start == 0) || (pDataArray->start == 1)) {
209                         in.seekg(0);
210                 }else { //this accounts for the difference in line endings. 
211                         in.seekg(pDataArray->start-1); pDataArray->m->gobble(in); 
212                 }
213                 
214                 pDataArray->count = pDataArray->end;
215                 for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
216                         
217                         if (pDataArray->m->control_pressed) { in.close(); badAccnosFile.close(); goodFile.close(); pDataArray->count = 1; return 1; }
218                         
219                         Sequence currSeq(in); pDataArray->m->gobble(in); 
220                         
221                         if (currSeq.getName() != "") {
222                                 bool goodSeq = 1;               //      innocent until proven guilty
223                                 if(goodSeq == 1 && pDataArray->startPos != -1 && pDataArray->startPos < currSeq.getStartPos())                  {       goodSeq = 0;    }
224                                 if(goodSeq == 1 && pDataArray->endPos != -1 && pDataArray->endPos > currSeq.getEndPos())                                {       goodSeq = 0;    }
225                                 if(goodSeq == 1 && pDataArray->maxAmbig != -1 && pDataArray->maxAmbig < currSeq.getAmbigBases())                {       goodSeq = 0;    }
226                                 if(goodSeq == 1 && pDataArray->maxHomoP != -1 && pDataArray->maxHomoP < currSeq.getLongHomoPolymer())   {       goodSeq = 0;    }
227                                 if(goodSeq == 1 && pDataArray->minLength != -1 && pDataArray->minLength > currSeq.getNumBases())                {       goodSeq = 0;    }
228                                 if(goodSeq == 1 && pDataArray->maxLength != -1 && pDataArray->maxLength < currSeq.getNumBases())                {       goodSeq = 0;    }
229                                 
230                                 if(goodSeq == 1){
231                                         currSeq.printSequence(goodFile);        
232                                 }
233                                 else{
234                                         badAccnosFile << currSeq.getName() << endl;
235                                         pDataArray->badSeqNames.insert(currSeq.getName());
236                                 }
237     
238                         }               
239             //report progress
240                         if((i+1) % 100 == 0){   pDataArray->m->mothurOut("Processing sequence: " + toString(i+1)); pDataArray->m->mothurOutEndLine();           }
241                 }
242                 //report progress
243                 if((pDataArray->count) % 100 != 0){     pDataArray->m->mothurOut("Processing sequence: " + toString(pDataArray->count)); pDataArray->m->mothurOutEndLine();             }
244                 
245
246                 
247                 in.close();
248         goodFile.close();
249         badAccnosFile.close();
250                 
251                 return 0;
252                 
253         }
254         catch(exception& e) {
255                 pDataArray->m->errorOut(e, "ScreenSeqsCommand", "MySumScreenThreadFunction");
256                 exit(1);
257         }
258
259
260 #endif
261
262 /**************************************************************************************************/
263
264
265
266 #endif