]> git.donarmstrong.com Git - mothur.git/blob - screenseqscommand.h
Merge remote-tracking branch 'origin/master'
[mothur.git] / screenseqscommand.h
1 #ifndef SCREENSEQSCOMMAND_H
2 #define SCREENSEQSCOMMAND_H
3
4 /*
5  *  screenseqscommand.h
6  *  Mothur
7  *
8  *  Created by Pat Schloss on 6/3/09.
9  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
10  *
11  */
12 #include "mothur.h"
13 #include "command.hpp"
14 #include "sequence.hpp"
15
16 class ScreenSeqsCommand : public Command {
17         
18 public:
19         ScreenSeqsCommand(string);
20         ScreenSeqsCommand();
21         ~ScreenSeqsCommand() {}
22         
23         vector<string> setParameters();
24         string getCommandName()                 { return "screen.seqs";                         }
25         string getCommandCategory()             { return "Sequence Processing";         }
26         
27         string getHelpString(); 
28     string getOutputPattern(string);    
29         string getCitation() { return "http://www.mothur.org/wiki/Screen.seqs"; }
30         string getDescription()         { return "enables you to keep sequences that fulfill certain user defined criteria"; }
31
32         int execute(); 
33         void help() { m->mothurOut(getHelpString()); }  
34         
35         
36 private:
37
38         struct linePair {
39                 unsigned long long start;
40                 unsigned long long end;
41                 linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
42         };
43
44         vector<linePair> lines;
45
46         int screenNameGroupFile(set<string>);
47         int screenGroupFile(set<string>);
48     int screenCountFile(set<string>);
49         int screenAlignReport(set<string>);
50         int screenQual(set<string>);
51         int screenTaxonomy(set<string>);
52         
53         int driver(linePair, string, string, string, set<string>&);
54         int createProcesses(string, string, string, set<string>&);
55         
56         #ifdef USE_MPI
57         int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long long>&, set<string>&);
58         #endif
59
60         bool abort;
61         string fastafile, namefile, groupfile, alignreport, outputDir, qualfile, taxonomy, countfile;
62         int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength, processors, criteria;
63         vector<string> outputNames;
64         vector<string> optimize;
65         map<string, int> nameMap;
66         
67         int getSummary(vector<unsigned long long>&);
68         int createProcessesCreateSummary(vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, string);
69         int driverCreateSummary(vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, string, linePair);        
70 };
71
72 /**************************************************************************************************/
73 //custom data structure for threads to use.
74 // This is passed by void pointer so it can be any data type
75 // that can be passed using a single void pointer (LPVOID).
76 struct sumData {
77         vector<int> startPosition;
78         vector<int> endPosition;
79         vector<int> seqLength; 
80         vector<int> ambigBases; 
81         vector<int> longHomoPolymer; 
82         string filename, namefile; 
83         unsigned long long start;
84         unsigned long long end;
85         int count;
86         MothurOut* m;
87         map<string, int> nameMap;
88         
89         
90         sumData(){}
91         sumData(string f, MothurOut* mout, unsigned long long st, unsigned long long en, string nf, map<string, int> nam) {
92                 filename = f;
93         namefile = nf;
94                 m = mout;
95                 start = st;
96                 end = en;
97                 nameMap = nam;
98                 count = 0;
99         }
100 };
101 /**************************************************************************************************/
102 //custom data structure for threads to use.
103 // This is passed by void pointer so it can be any data type
104 // that can be passed using a single void pointer (LPVOID).
105 struct sumScreenData {
106     int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength;
107         unsigned long long start;
108         unsigned long long end;
109         int count;
110         MothurOut* m;
111         string goodFName, badAccnosFName, filename;
112     set<string> badSeqNames;
113         
114         
115         sumScreenData(){}
116         sumScreenData(int s, int e, int a, int h, int minl, int maxl, string f, MothurOut* mout, unsigned long long st, unsigned long long en, string gf, string bf) {
117                 startPos = s;
118                 endPos = e;
119                 minLength = minl;
120         maxLength = maxl;
121                 maxAmbig = a;
122                 maxHomoP = h;
123                 filename = f;
124         goodFName = gf;
125         badAccnosFName = bf;
126                 m = mout;
127                 start = st;
128                 end = en;
129                 count = 0;
130         }
131 };
132
133
134 /**************************************************************************************************/
135 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
136 #else
137 static DWORD WINAPI MySumThreadFunction(LPVOID lpParam){ 
138         sumData* pDataArray;
139         pDataArray = (sumData*)lpParam;
140         
141         try {
142                 ifstream in;
143                 pDataArray->m->openInputFile(pDataArray->filename, in);
144         
145                 //print header if you are process 0
146                 if ((pDataArray->start == 0) || (pDataArray->start == 1)) {
147                         in.seekg(0);
148                 }else { //this accounts for the difference in line endings. 
149                         in.seekg(pDataArray->start-1); pDataArray->m->gobble(in); 
150                 }
151                 
152                 pDataArray->count = pDataArray->end;
153                 for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
154                         
155                         if (pDataArray->m->control_pressed) { in.close();  pDataArray->count = 1; return 1; }
156                         
157                         Sequence current(in); pDataArray->m->gobble(in); 
158                         
159                         if (current.getName() != "") {
160                                 
161                                 int num = 1;
162                                 if (pDataArray->namefile != "") {
163                                         //make sure this sequence is in the namefile, else error 
164                                         map<string, int>::iterator it = pDataArray->nameMap.find(current.getName());
165                                         
166                                         if (it == pDataArray->nameMap.end()) { pDataArray->m->mothurOut("[ERROR]: " + current.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); pDataArray->m->control_pressed = true; }
167                                         else { num = it->second; }
168                                 }
169                                 
170                                 //for each sequence this sequence represents
171                                 for (int i = 0; i < num; i++) {
172                                         pDataArray->startPosition.push_back(current.getStartPos());
173                                         pDataArray->endPosition.push_back(current.getEndPos());
174                                         pDataArray->seqLength.push_back(current.getNumBases());
175                                         pDataArray->ambigBases.push_back(current.getAmbigBases());
176                                         pDataArray->longHomoPolymer.push_back(current.getLongHomoPolymer());
177                                 }
178             }
179                 }
180                 
181                 in.close();
182                 
183                 return 0;
184                 
185         }
186         catch(exception& e) {
187                 pDataArray->m->errorOut(e, "ScreenSeqsCommand", "MySumThreadFunction");
188                 exit(1);
189         }
190
191
192 /**************************************************************************************************/
193
194 static DWORD WINAPI MySumScreenThreadFunction(LPVOID lpParam){ 
195         sumScreenData* pDataArray;
196         pDataArray = (sumScreenData*)lpParam;
197         
198         try {
199         
200         ofstream goodFile;
201                 pDataArray->m->openOutputFile(pDataArray->goodFName, goodFile);
202                 
203                 ofstream badAccnosFile;
204                 pDataArray->m->openOutputFile(pDataArray->badAccnosFName, badAccnosFile);
205                 
206                 ifstream in;
207                 pDataArray->m->openInputFile(pDataArray->filename, in);
208         
209                 //print header if you are process 0
210                 if ((pDataArray->start == 0) || (pDataArray->start == 1)) {
211                         in.seekg(0);
212                 }else { //this accounts for the difference in line endings. 
213                         in.seekg(pDataArray->start-1); pDataArray->m->gobble(in); 
214                 }
215                 
216                 pDataArray->count = pDataArray->end;
217                 for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
218                         
219                         if (pDataArray->m->control_pressed) { in.close(); badAccnosFile.close(); goodFile.close(); pDataArray->count = 1; return 1; }
220                         
221                         Sequence currSeq(in); pDataArray->m->gobble(in); 
222                         
223                         if (currSeq.getName() != "") {
224                                 bool goodSeq = 1;               //      innocent until proven guilty
225                                 if(goodSeq == 1 && pDataArray->startPos != -1 && pDataArray->startPos < currSeq.getStartPos())                  {       goodSeq = 0;    }
226                                 if(goodSeq == 1 && pDataArray->endPos != -1 && pDataArray->endPos > currSeq.getEndPos())                                {       goodSeq = 0;    }
227                                 if(goodSeq == 1 && pDataArray->maxAmbig != -1 && pDataArray->maxAmbig < currSeq.getAmbigBases())                {       goodSeq = 0;    }
228                                 if(goodSeq == 1 && pDataArray->maxHomoP != -1 && pDataArray->maxHomoP < currSeq.getLongHomoPolymer())   {       goodSeq = 0;    }
229                                 if(goodSeq == 1 && pDataArray->minLength != -1 && pDataArray->minLength > currSeq.getNumBases())                {       goodSeq = 0;    }
230                                 if(goodSeq == 1 && pDataArray->maxLength != -1 && pDataArray->maxLength < currSeq.getNumBases())                {       goodSeq = 0;    }
231                                 
232                                 if(goodSeq == 1){
233                                         currSeq.printSequence(goodFile);        
234                                 }
235                                 else{
236                                         badAccnosFile << currSeq.getName() << endl;
237                                         pDataArray->badSeqNames.insert(currSeq.getName());
238                                 }
239     
240                         }               
241             //report progress
242                         if((i+1) % 100 == 0){   pDataArray->m->mothurOut("Processing sequence: " + toString(i+1)); pDataArray->m->mothurOutEndLine();           }
243                 }
244                 //report progress
245                 if((pDataArray->count) % 100 != 0){     pDataArray->m->mothurOut("Processing sequence: " + toString(pDataArray->count)); pDataArray->m->mothurOutEndLine();             }
246                 
247
248                 
249                 in.close();
250         goodFile.close();
251         badAccnosFile.close();
252                 
253                 return 0;
254                 
255         }
256         catch(exception& e) {
257                 pDataArray->m->errorOut(e, "ScreenSeqsCommand", "MySumScreenThreadFunction");
258                 exit(1);
259         }
260
261
262 #endif
263
264 /**************************************************************************************************/
265
266
267
268 #endif