]> git.donarmstrong.com Git - mothur.git/blob - screenseqscommand.h
Merge remote-tracking branch 'mothur/master'
[mothur.git] / screenseqscommand.h
1 #ifndef SCREENSEQSCOMMAND_H
2 #define SCREENSEQSCOMMAND_H
3
4 /*
5  *  screenseqscommand.h
6  *  Mothur
7  *
8  *  Created by Pat Schloss on 6/3/09.
9  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
10  *
11  */
12 #include "mothur.h"
13 #include "command.hpp"
14 #include "sequence.hpp"
15
16 class ScreenSeqsCommand : public Command {
17         
18 public:
19         ScreenSeqsCommand(string);
20         ScreenSeqsCommand();
21         ~ScreenSeqsCommand() {}
22         
23         vector<string> setParameters();
24         string getCommandName()                 { return "screen.seqs";                         }
25         string getCommandCategory()             { return "Sequence Processing";         }
26         string getOutputFileNameTag(string, string);
27         string getHelpString(); 
28         string getCitation() { return "http://www.mothur.org/wiki/Screen.seqs"; }
29         string getDescription()         { return "enables you to keep sequences that fulfill certain user defined criteria"; }
30
31         int execute(); 
32         void help() { m->mothurOut(getHelpString()); }  
33         
34         
35 private:
36
37         struct linePair {
38                 unsigned long long start;
39                 unsigned long long end;
40                 linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
41         };
42
43         vector<linePair> lines;
44
45         int screenNameGroupFile(set<string>);
46         int screenGroupFile(set<string>);
47     int screenCountFile(set<string>);
48         int screenAlignReport(set<string>);
49         int screenQual(set<string>);
50         int screenTaxonomy(set<string>);
51         
52         int driver(linePair, string, string, string, set<string>&);
53         int createProcesses(string, string, string, set<string>&);
54         
55         #ifdef USE_MPI
56         int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long long>&, set<string>&);
57         #endif
58
59         bool abort;
60         string fastafile, namefile, groupfile, alignreport, outputDir, qualfile, taxonomy, countfile;
61         int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength, processors, criteria;
62         vector<string> outputNames;
63         vector<string> optimize;
64         map<string, int> nameMap;
65         
66         int getSummary(vector<unsigned long long>&);
67         int createProcessesCreateSummary(vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, string);
68         int driverCreateSummary(vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, string, linePair);        
69 };
70
71 /**************************************************************************************************/
72 //custom data structure for threads to use.
73 // This is passed by void pointer so it can be any data type
74 // that can be passed using a single void pointer (LPVOID).
75 struct sumData {
76         vector<int> startPosition;
77         vector<int> endPosition;
78         vector<int> seqLength; 
79         vector<int> ambigBases; 
80         vector<int> longHomoPolymer; 
81         string filename, namefile; 
82         unsigned long long start;
83         unsigned long long end;
84         int count;
85         MothurOut* m;
86         map<string, int> nameMap;
87         
88         
89         sumData(){}
90         sumData(string f, MothurOut* mout, unsigned long long st, unsigned long long en, string nf, map<string, int> nam) {
91                 filename = f;
92         namefile = nf;
93                 m = mout;
94                 start = st;
95                 end = en;
96                 nameMap = nam;
97                 count = 0;
98         }
99 };
100 /**************************************************************************************************/
101 //custom data structure for threads to use.
102 // This is passed by void pointer so it can be any data type
103 // that can be passed using a single void pointer (LPVOID).
104 struct sumScreenData {
105     int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength;
106         unsigned long long start;
107         unsigned long long end;
108         int count;
109         MothurOut* m;
110         string goodFName, badAccnosFName, filename;
111     set<string> badSeqNames;
112         
113         
114         sumScreenData(){}
115         sumScreenData(int s, int e, int a, int h, int minl, int maxl, string f, MothurOut* mout, unsigned long long st, unsigned long long en, string gf, string bf) {
116                 startPos = s;
117                 endPos = e;
118                 minLength = minl;
119         maxLength = maxl;
120                 maxAmbig = a;
121                 maxHomoP = h;
122                 filename = f;
123         goodFName = gf;
124         badAccnosFName = bf;
125                 m = mout;
126                 start = st;
127                 end = en;
128                 count = 0;
129         }
130 };
131
132
133 /**************************************************************************************************/
134 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
135 #else
136 static DWORD WINAPI MySumThreadFunction(LPVOID lpParam){ 
137         sumData* pDataArray;
138         pDataArray = (sumData*)lpParam;
139         
140         try {
141                 ifstream in;
142                 pDataArray->m->openInputFile(pDataArray->filename, in);
143         
144                 //print header if you are process 0
145                 if ((pDataArray->start == 0) || (pDataArray->start == 1)) {
146                         in.seekg(0);
147                 }else { //this accounts for the difference in line endings. 
148                         in.seekg(pDataArray->start-1); pDataArray->m->gobble(in); 
149                 }
150                 
151                 pDataArray->count = pDataArray->end;
152                 for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
153                         
154                         if (pDataArray->m->control_pressed) { in.close();  pDataArray->count = 1; return 1; }
155                         
156                         Sequence current(in); pDataArray->m->gobble(in); 
157                         
158                         if (current.getName() != "") {
159                                 
160                                 int num = 1;
161                                 if (pDataArray->namefile != "") {
162                                         //make sure this sequence is in the namefile, else error 
163                                         map<string, int>::iterator it = pDataArray->nameMap.find(current.getName());
164                                         
165                                         if (it == pDataArray->nameMap.end()) { pDataArray->m->mothurOut("[ERROR]: " + current.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); pDataArray->m->control_pressed = true; }
166                                         else { num = it->second; }
167                                 }
168                                 
169                                 //for each sequence this sequence represents
170                                 for (int i = 0; i < num; i++) {
171                                         pDataArray->startPosition.push_back(current.getStartPos());
172                                         pDataArray->endPosition.push_back(current.getEndPos());
173                                         pDataArray->seqLength.push_back(current.getNumBases());
174                                         pDataArray->ambigBases.push_back(current.getAmbigBases());
175                                         pDataArray->longHomoPolymer.push_back(current.getLongHomoPolymer());
176                                 }
177             }
178                 }
179                 
180                 in.close();
181                 
182                 return 0;
183                 
184         }
185         catch(exception& e) {
186                 pDataArray->m->errorOut(e, "ScreenSeqsCommand", "MySumThreadFunction");
187                 exit(1);
188         }
189
190
191 /**************************************************************************************************/
192
193 static DWORD WINAPI MySumScreenThreadFunction(LPVOID lpParam){ 
194         sumScreenData* pDataArray;
195         pDataArray = (sumScreenData*)lpParam;
196         
197         try {
198         
199         ofstream goodFile;
200                 pDataArray->m->openOutputFile(pDataArray->goodFName, goodFile);
201                 
202                 ofstream badAccnosFile;
203                 pDataArray->m->openOutputFile(pDataArray->badAccnosFName, badAccnosFile);
204                 
205                 ifstream in;
206                 pDataArray->m->openInputFile(pDataArray->filename, in);
207         
208                 //print header if you are process 0
209                 if ((pDataArray->start == 0) || (pDataArray->start == 1)) {
210                         in.seekg(0);
211                 }else { //this accounts for the difference in line endings. 
212                         in.seekg(pDataArray->start-1); pDataArray->m->gobble(in); 
213                 }
214                 
215                 pDataArray->count = pDataArray->end;
216                 for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
217                         
218                         if (pDataArray->m->control_pressed) { in.close(); badAccnosFile.close(); goodFile.close(); pDataArray->count = 1; return 1; }
219                         
220                         Sequence currSeq(in); pDataArray->m->gobble(in); 
221                         
222                         if (currSeq.getName() != "") {
223                                 bool goodSeq = 1;               //      innocent until proven guilty
224                                 if(goodSeq == 1 && pDataArray->startPos != -1 && pDataArray->startPos < currSeq.getStartPos())                  {       goodSeq = 0;    }
225                                 if(goodSeq == 1 && pDataArray->endPos != -1 && pDataArray->endPos > currSeq.getEndPos())                                {       goodSeq = 0;    }
226                                 if(goodSeq == 1 && pDataArray->maxAmbig != -1 && pDataArray->maxAmbig < currSeq.getAmbigBases())                {       goodSeq = 0;    }
227                                 if(goodSeq == 1 && pDataArray->maxHomoP != -1 && pDataArray->maxHomoP < currSeq.getLongHomoPolymer())   {       goodSeq = 0;    }
228                                 if(goodSeq == 1 && pDataArray->minLength != -1 && pDataArray->minLength > currSeq.getNumBases())                {       goodSeq = 0;    }
229                                 if(goodSeq == 1 && pDataArray->maxLength != -1 && pDataArray->maxLength < currSeq.getNumBases())                {       goodSeq = 0;    }
230                                 
231                                 if(goodSeq == 1){
232                                         currSeq.printSequence(goodFile);        
233                                 }
234                                 else{
235                                         badAccnosFile << currSeq.getName() << endl;
236                                         pDataArray->badSeqNames.insert(currSeq.getName());
237                                 }
238     
239                         }               
240             //report progress
241                         if((i+1) % 100 == 0){   pDataArray->m->mothurOut("Processing sequence: " + toString(i+1)); pDataArray->m->mothurOutEndLine();           }
242                 }
243                 //report progress
244                 if((pDataArray->count) % 100 != 0){     pDataArray->m->mothurOut("Processing sequence: " + toString(pDataArray->count)); pDataArray->m->mothurOutEndLine();             }
245                 
246
247                 
248                 in.close();
249         goodFile.close();
250         badAccnosFile.close();
251                 
252                 return 0;
253                 
254         }
255         catch(exception& e) {
256                 pDataArray->m->errorOut(e, "ScreenSeqsCommand", "MySumScreenThreadFunction");
257                 exit(1);
258         }
259
260
261 #endif
262
263 /**************************************************************************************************/
264
265
266
267 #endif