1 #ifndef FILTERSEQSCOMMAND_H
\r
2 #define FILTERSEQSCOMMAND_H
\r
5 * filterseqscommand.h
\r
8 * Created by Thomas Ryabin on 5/4/09.
\r
9 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
\r
13 #include "command.hpp"
\r
14 #include "filters.h"
\r
17 class FilterSeqsCommand : public Command {
\r
20 FilterSeqsCommand(string);
\r
21 FilterSeqsCommand();
\r
22 ~FilterSeqsCommand() {};
\r
24 vector<string> setParameters();
\r
25 string getCommandName() { return "filter.seqs"; }
\r
26 string getCommandCategory() { return "Sequence Processing"; }
\r
28 string getHelpString();
\r
29 string getOutputPattern(string);
\r
30 string getCitation() { return "http://www.mothur.org/wiki/Filter.seqs"; }
\r
31 string getDescription() { return "removes columns from alignments based on a criteria defined by the user"; }
\r
34 void help() { m->mothurOut(getHelpString()); }
\r
38 unsigned long long start;
\r
39 unsigned long long end;
\r
40 linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
\r
43 vector<linePair*> lines;
\r
44 vector<int> processIDS;
\r
45 map<int, vector<unsigned long long> > savedPositions;
\r
47 string vertical, filter, fasta, hard, outputDir, filterFileName;
\r
48 vector<string> fastafileNames;
\r
49 int alignmentLength, processors;
\r
50 vector<int> bufferSizes;
\r
51 vector<string> outputNames;
\r
58 string createFilter();
\r
59 int filterSequences();
\r
60 int createProcessesCreateFilter(Filters&, string);
\r
61 int createProcessesRunFilter(string, string, string);
\r
62 int driverRunFilter(string, string, string, linePair*);
\r
63 int driverCreateFilter(Filters& F, string filename, linePair* line);
\r
65 int driverMPIRun(int, int, MPI_File&, MPI_File&, vector<unsigned long long>&);
\r
66 int MPICreateFilter(int, int, Filters&, MPI_File&, vector<unsigned long long>&);
\r
72 /**************************************************************************************************/
\r
73 //custom data structure for threads to use.
\r
74 // This is passed by void pointer so it can be any data type
\r
75 // that can be passed using a single void pointer (LPVOID).
\r
78 int count, tid, alignmentLength;
\r
79 unsigned long long start, end;
\r
81 string filename, vertical, hard;
\r
86 filterData(string fn, MothurOut* mout, unsigned long long st, unsigned long long en, int aLength, char tr, string vert, float so, string ha, int t) {
\r
93 alignmentLength = aLength;
\r
100 /**************************************************************************************************/
\r
101 //custom data structure for threads to use.
\r
102 // This is passed by void pointer so it can be any data type
\r
103 // that can be passed using a single void pointer (LPVOID).
\r
104 struct filterRunData {
\r
105 int count, tid, alignmentLength;
\r
106 unsigned long long start, end;
\r
109 string filter, outputFilename;
\r
112 filterRunData(string f, string fn, string ofn, MothurOut* mout, unsigned long long st, unsigned long long en, int aLength, int t) {
\r
114 outputFilename = ofn;
\r
120 alignmentLength = aLength;
\r
125 /**************************************************************************************************/
\r
126 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
\r
128 static DWORD WINAPI MyCreateFilterThreadFunction(LPVOID lpParam){
\r
129 filterData* pDataArray;
\r
130 pDataArray = (filterData*)lpParam;
\r
134 if (pDataArray->soft != 0) { pDataArray->F.setSoft(pDataArray->soft); }
\r
135 if (pDataArray->trump != '*') { pDataArray->F.setTrump(pDataArray->trump); }
\r
137 pDataArray->F.setLength(pDataArray->alignmentLength);
\r
139 if(pDataArray->trump != '*' || pDataArray->m->isTrue(pDataArray->vertical) || pDataArray->soft != 0){
\r
140 pDataArray->F.initialize();
\r
143 if(pDataArray->hard.compare("") != 0) { pDataArray->F.doHard(pDataArray->hard); }
\r
144 else { pDataArray->F.setFilter(string(pDataArray->alignmentLength, '1')); }
\r
147 pDataArray->m->openInputFile(pDataArray->filename, in);
\r
149 //print header if you are process 0
\r
150 if ((pDataArray->start == 0) || (pDataArray->start == 1)) {
\r
152 }else { //this accounts for the difference in line endings.
\r
153 in.seekg(pDataArray->start-1); pDataArray->m->gobble(in);
\r
156 pDataArray->count = 0;
\r
157 for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
\r
159 if (pDataArray->m->control_pressed) { in.close(); pDataArray->count = 1; return 1; }
\r
161 Sequence current(in); pDataArray->m->gobble(in);
\r
163 if (current.getName() != "") {
\r
164 if (current.getAligned().length() != pDataArray->alignmentLength) { pDataArray->m->mothurOut("Sequences are not all the same length, please correct."); pDataArray->m->mothurOutEndLine(); pDataArray->m->control_pressed = true; }
\r
166 if(pDataArray->trump != '*') { pDataArray->F.doTrump(current); }
\r
167 if(pDataArray->m->isTrue(pDataArray->vertical) || pDataArray->soft != 0) { pDataArray->F.getFreqs(current); }
\r
169 pDataArray->count++;
\r
171 if((i) % 100 == 0){ pDataArray->m->mothurOutJustToScreen(toString(i)+"\n"); }
\r
174 if((pDataArray->count) % 100 != 0){ pDataArray->m->mothurOutJustToScreen(toString(pDataArray->count)+"\n"); }
\r
181 catch(exception& e) {
\r
182 pDataArray->m->errorOut(e, "FilterSeqsCommand", "MyCreateFilterThreadFunction");
\r
186 /**************************************************************************************************/
\r
187 static DWORD WINAPI MyRunFilterThreadFunction(LPVOID lpParam){
\r
188 filterRunData* pDataArray;
\r
189 pDataArray = (filterRunData*)lpParam;
\r
194 pDataArray->m->openOutputFile(pDataArray->outputFilename, out);
\r
197 pDataArray->m->openInputFile(pDataArray->filename, in);
\r
199 //print header if you are process 0
\r
200 if ((pDataArray->start == 0) || (pDataArray->start == 1)) {
\r
202 }else { //this accounts for the difference in line endings.
\r
203 in.seekg(pDataArray->start-1); pDataArray->m->gobble(in);
\r
206 pDataArray->count = 0;
\r
207 for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
\r
209 if (pDataArray->m->control_pressed) { in.close(); out.close(); pDataArray->count = 1; return 1; }
\r
211 Sequence seq(in); pDataArray->m->gobble(in);
\r
212 if (seq.getName() != "") {
\r
213 string align = seq.getAligned();
\r
214 string filterSeq = "";
\r
216 for(int j=0;j<pDataArray->alignmentLength;j++){
\r
217 if(pDataArray->filter[j] == '1'){
\r
218 filterSeq += align[j];
\r
222 out << '>' << seq.getName() << endl << filterSeq << endl;
\r
224 pDataArray->count++;
\r
226 if((i) % 100 == 0){ pDataArray->m->mothurOutJustToScreen(toString(i)+"\n"); }
\r
229 if((pDataArray->count) % 100 != 0){ pDataArray->m->mothurOutJustToScreen(toString(pDataArray->count)+"\n"); }
\r
237 catch(exception& e) {
\r
238 pDataArray->m->errorOut(e, "FilterSeqsCommand", "MyRunFilterThreadFunction");
\r
242 /**************************************************************************************************/
\r