1 #ifndef SCREENSEQSCOMMAND_H
2 #define SCREENSEQSCOMMAND_H
8 * Created by Pat Schloss on 6/3/09.
9 * Copyright 2009 Patrick D. Schloss. All rights reserved.
13 #include "command.hpp"
14 #include "sequence.hpp"
16 class ScreenSeqsCommand : public Command {
19 ScreenSeqsCommand(string);
21 ~ScreenSeqsCommand() {}
23 vector<string> setParameters();
24 string getCommandName() { return "screen.seqs"; }
25 string getCommandCategory() { return "Sequence Processing"; }
26 string getOutputFileNameTag(string, string);
27 string getHelpString();
28 string getCitation() { return "http://www.mothur.org/wiki/Screen.seqs"; }
29 string getDescription() { return "enables you to keep sequences that fulfill certain user defined criteria"; }
32 void help() { m->mothurOut(getHelpString()); }
38 unsigned long long start;
39 unsigned long long end;
40 linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
43 vector<linePair> lines;
45 int screenNameGroupFile(set<string>);
46 int screenGroupFile(set<string>);
47 int screenAlignReport(set<string>);
48 int screenQual(set<string>);
49 int screenTaxonomy(set<string>);
51 int driver(linePair, string, string, string, set<string>&);
52 int createProcesses(string, string, string, set<string>&);
55 int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long long>&, set<string>&);
59 string fastafile, namefile, groupfile, alignreport, outputDir, qualfile, taxonomy;
60 int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength, processors, criteria;
61 vector<string> outputNames;
62 vector<string> optimize;
63 map<string, int> nameMap;
65 int getSummary(vector<unsigned long long>&);
66 int createProcessesCreateSummary(vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, string);
67 int driverCreateSummary(vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, string, linePair);
70 /**************************************************************************************************/
71 //custom data structure for threads to use.
72 // This is passed by void pointer so it can be any data type
73 // that can be passed using a single void pointer (LPVOID).
75 vector<int> startPosition;
76 vector<int> endPosition;
77 vector<int> seqLength;
78 vector<int> ambigBases;
79 vector<int> longHomoPolymer;
80 string filename, namefile;
81 unsigned long long start;
82 unsigned long long end;
85 map<string, int> nameMap;
89 sumData(string f, MothurOut* mout, unsigned long long st, unsigned long long en, string nf, map<string, int> nam) {
99 /**************************************************************************************************/
100 //custom data structure for threads to use.
101 // This is passed by void pointer so it can be any data type
102 // that can be passed using a single void pointer (LPVOID).
103 struct sumScreenData {
104 int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength;
105 unsigned long long start;
106 unsigned long long end;
109 string goodFName, badAccnosFName, filename;
110 set<string> badSeqNames;
114 sumScreenData(int s, int e, int a, int h, int minl, int maxl, string f, MothurOut* mout, unsigned long long st, unsigned long long en, string gf, string bf) {
132 /**************************************************************************************************/
133 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
135 static DWORD WINAPI MySumThreadFunction(LPVOID lpParam){
137 pDataArray = (sumData*)lpParam;
141 pDataArray->m->openInputFile(pDataArray->filename, in);
143 //print header if you are process 0
144 if ((pDataArray->start == 0) || (pDataArray->start == 1)) {
146 }else { //this accounts for the difference in line endings.
147 in.seekg(pDataArray->start-1); pDataArray->m->gobble(in);
150 pDataArray->count = pDataArray->end;
151 for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
153 if (pDataArray->m->control_pressed) { in.close(); pDataArray->count = 1; return 1; }
155 Sequence current(in); pDataArray->m->gobble(in);
157 if (current.getName() != "") {
160 if (pDataArray->namefile != "") {
161 //make sure this sequence is in the namefile, else error
162 map<string, int>::iterator it = pDataArray->nameMap.find(current.getName());
164 if (it == pDataArray->nameMap.end()) { pDataArray->m->mothurOut("[ERROR]: " + current.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); pDataArray->m->control_pressed = true; }
165 else { num = it->second; }
168 //for each sequence this sequence represents
169 for (int i = 0; i < num; i++) {
170 pDataArray->startPosition.push_back(current.getStartPos());
171 pDataArray->endPosition.push_back(current.getEndPos());
172 pDataArray->seqLength.push_back(current.getNumBases());
173 pDataArray->ambigBases.push_back(current.getAmbigBases());
174 pDataArray->longHomoPolymer.push_back(current.getLongHomoPolymer());
184 catch(exception& e) {
185 pDataArray->m->errorOut(e, "ScreenSeqsCommand", "MySumThreadFunction");
190 /**************************************************************************************************/
192 static DWORD WINAPI MySumScreenThreadFunction(LPVOID lpParam){
193 sumScreenData* pDataArray;
194 pDataArray = (sumScreenData*)lpParam;
199 pDataArray->m->openOutputFile(pDataArray->goodFName, goodFile);
201 ofstream badAccnosFile;
202 pDataArray->m->openOutputFile(pDataArray->badAccnosFName, badAccnosFile);
205 pDataArray->m->openInputFile(pDataArray->filename, in);
207 //print header if you are process 0
208 if ((pDataArray->start == 0) || (pDataArray->start == 1)) {
210 }else { //this accounts for the difference in line endings.
211 in.seekg(pDataArray->start-1); pDataArray->m->gobble(in);
214 pDataArray->count = pDataArray->end;
215 for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
217 if (pDataArray->m->control_pressed) { in.close(); badAccnosFile.close(); goodFile.close(); pDataArray->count = 1; return 1; }
219 Sequence currSeq(in); pDataArray->m->gobble(in);
221 if (currSeq.getName() != "") {
222 bool goodSeq = 1; // innocent until proven guilty
223 if(goodSeq == 1 && pDataArray->startPos != -1 && pDataArray->startPos < currSeq.getStartPos()) { goodSeq = 0; }
224 if(goodSeq == 1 && pDataArray->endPos != -1 && pDataArray->endPos > currSeq.getEndPos()) { goodSeq = 0; }
225 if(goodSeq == 1 && pDataArray->maxAmbig != -1 && pDataArray->maxAmbig < currSeq.getAmbigBases()) { goodSeq = 0; }
226 if(goodSeq == 1 && pDataArray->maxHomoP != -1 && pDataArray->maxHomoP < currSeq.getLongHomoPolymer()) { goodSeq = 0; }
227 if(goodSeq == 1 && pDataArray->minLength != -1 && pDataArray->minLength > currSeq.getNumBases()) { goodSeq = 0; }
228 if(goodSeq == 1 && pDataArray->maxLength != -1 && pDataArray->maxLength < currSeq.getNumBases()) { goodSeq = 0; }
231 currSeq.printSequence(goodFile);
234 badAccnosFile << currSeq.getName() << endl;
235 pDataArray->badSeqNames.insert(currSeq.getName());
240 if((i+1) % 100 == 0){ pDataArray->m->mothurOut("Processing sequence: " + toString(i+1)); pDataArray->m->mothurOutEndLine(); }
243 if((pDataArray->count) % 100 != 0){ pDataArray->m->mothurOut("Processing sequence: " + toString(pDataArray->count)); pDataArray->m->mothurOutEndLine(); }
249 badAccnosFile.close();
254 catch(exception& e) {
255 pDataArray->m->errorOut(e, "ScreenSeqsCommand", "MySumScreenThreadFunction");
262 /**************************************************************************************************/