1 #ifndef FILTERSEQSCOMMAND_H
\r
2 #define FILTERSEQSCOMMAND_H
\r
5 * filterseqscommand.h
\r
8 * Created by Thomas Ryabin on 5/4/09.
\r
9 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
\r
13 #include "command.hpp"
\r
14 #include "filters.h"
\r
17 class FilterSeqsCommand : public Command {
\r
20 FilterSeqsCommand(string);
\r
21 FilterSeqsCommand();
\r
22 ~FilterSeqsCommand() {};
\r
24 vector<string> setParameters();
\r
25 string getCommandName() { return "filter.seqs"; }
\r
26 string getCommandCategory() { return "Sequence Processing"; }
\r
27 string getHelpString();
\r
28 string getCitation() { return "http://www.mothur.org/wiki/Filter.seqs"; }
\r
29 string getDescription() { return "removes columns from alignments based on a criteria defined by the user"; }
\r
32 void help() { m->mothurOut(getHelpString()); }
\r
36 unsigned long long start;
\r
37 unsigned long long end;
\r
38 linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
\r
41 vector<linePair*> lines;
\r
42 vector<int> processIDS;
\r
43 map<int, vector<unsigned long long> > savedPositions;
\r
45 string vertical, filter, fasta, hard, outputDir, filterFileName;
\r
46 vector<string> fastafileNames;
\r
47 int alignmentLength, processors;
\r
48 vector<int> bufferSizes;
\r
49 vector<string> outputNames;
\r
56 string createFilter();
\r
57 int filterSequences();
\r
58 int createProcessesCreateFilter(Filters&, string);
\r
59 int createProcessesRunFilter(string, string, string);
\r
60 int driverRunFilter(string, string, string, linePair*);
\r
61 int driverCreateFilter(Filters& F, string filename, linePair* line);
\r
63 int driverMPIRun(int, int, MPI_File&, MPI_File&, vector<unsigned long long>&);
\r
64 int MPICreateFilter(int, int, Filters&, MPI_File&, vector<unsigned long long>&);
\r
70 /**************************************************************************************************/
\r
71 //custom data structure for threads to use.
\r
72 // This is passed by void pointer so it can be any data type
\r
73 // that can be passed using a single void pointer (LPVOID).
\r
76 int count, tid, alignmentLength;
\r
77 unsigned long long start, end;
\r
79 string filename, vertical, hard;
\r
84 filterData(string fn, MothurOut* mout, unsigned long long st, unsigned long long en, int aLength, char tr, string vert, float so, string ha, int t) {
\r
91 alignmentLength = aLength;
\r
98 /**************************************************************************************************/
\r
99 //custom data structure for threads to use.
\r
100 // This is passed by void pointer so it can be any data type
\r
101 // that can be passed using a single void pointer (LPVOID).
\r
102 struct filterRunData {
\r
103 int count, tid, alignmentLength;
\r
104 unsigned long long start, end;
\r
107 string filter, outputFilename;
\r
110 filterRunData(string f, string fn, string ofn, MothurOut* mout, unsigned long long st, unsigned long long en, int aLength, int t) {
\r
112 outputFilename = ofn;
\r
118 alignmentLength = aLength;
\r
123 /**************************************************************************************************/
\r
124 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
\r
126 static DWORD WINAPI MyCreateFilterThreadFunction(LPVOID lpParam){
\r
127 filterData* pDataArray;
\r
128 pDataArray = (filterData*)lpParam;
\r
132 if (pDataArray->soft != 0) { pDataArray->F.setSoft(pDataArray->soft); }
\r
133 if (pDataArray->trump != '*') { pDataArray->F.setTrump(pDataArray->trump); }
\r
135 pDataArray->F.setLength(pDataArray->alignmentLength);
\r
137 if(pDataArray->trump != '*' || pDataArray->m->isTrue(pDataArray->vertical) || pDataArray->soft != 0){
\r
138 pDataArray->F.initialize();
\r
141 if(pDataArray->hard.compare("") != 0) { pDataArray->F.doHard(pDataArray->hard); }
\r
142 else { pDataArray->F.setFilter(string(pDataArray->alignmentLength, '1')); }
\r
145 pDataArray->m->openInputFile(pDataArray->filename, in);
\r
147 //print header if you are process 0
\r
148 if ((pDataArray->start == 0) || (pDataArray->start == 1)) {
\r
150 }else { //this accounts for the difference in line endings.
\r
151 in.seekg(pDataArray->start-1); pDataArray->m->gobble(in);
\r
154 pDataArray->count = pDataArray->end;
\r
155 for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
\r
157 if (pDataArray->m->control_pressed) { in.close(); pDataArray->count = 1; return 1; }
\r
159 Sequence current(in); pDataArray->m->gobble(in);
\r
161 if (current.getName() != "") {
\r
162 if (current.getAligned().length() != pDataArray->alignmentLength) { pDataArray->m->mothurOut("Sequences are not all the same length, please correct."); pDataArray->m->mothurOutEndLine(); pDataArray->m->control_pressed = true; }
\r
164 if(pDataArray->trump != '*') { pDataArray->F.doTrump(current); }
\r
165 if(pDataArray->m->isTrue(pDataArray->vertical) || pDataArray->soft != 0) { pDataArray->F.getFreqs(current); }
\r
169 if((i) % 100 == 0){ pDataArray->m->mothurOut(toString(i)); pDataArray->m->mothurOutEndLine(); }
\r
172 if((pDataArray->count) % 100 != 0){ pDataArray->m->mothurOut(toString(pDataArray->count)); pDataArray->m->mothurOutEndLine(); }
\r
179 catch(exception& e) {
\r
180 pDataArray->m->errorOut(e, "FilterSeqsCommand", "MyCreateFilterThreadFunction");
\r
184 /**************************************************************************************************/
\r
185 static DWORD WINAPI MyRunFilterThreadFunction(LPVOID lpParam){
\r
186 filterRunData* pDataArray;
\r
187 pDataArray = (filterRunData*)lpParam;
\r
192 pDataArray->m->openOutputFile(pDataArray->outputFilename, out);
\r
195 pDataArray->m->openInputFile(pDataArray->filename, in);
\r
197 //print header if you are process 0
\r
198 if ((pDataArray->start == 0) || (pDataArray->start == 1)) {
\r
200 }else { //this accounts for the difference in line endings.
\r
201 in.seekg(pDataArray->start-1); pDataArray->m->gobble(in);
\r
204 pDataArray->count = pDataArray->end;
\r
205 for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
\r
207 if (pDataArray->m->control_pressed) { in.close(); out.close(); pDataArray->count = 1; return 1; }
\r
209 Sequence seq(in); pDataArray->m->gobble(in);
\r
210 if (seq.getName() != "") {
\r
211 string align = seq.getAligned();
\r
212 string filterSeq = "";
\r
214 for(int j=0;j<pDataArray->alignmentLength;j++){
\r
215 if(pDataArray->filter[j] == '1'){
\r
216 filterSeq += align[j];
\r
220 out << '>' << seq.getName() << endl << filterSeq << endl;
\r
224 if((i) % 100 == 0){ pDataArray->m->mothurOut(toString(i)); pDataArray->m->mothurOutEndLine(); }
\r
227 if((pDataArray->count) % 100 != 0){ pDataArray->m->mothurOut(toString(pDataArray->count)); pDataArray->m->mothurOutEndLine(); }
\r
235 catch(exception& e) {
\r
236 pDataArray->m->errorOut(e, "FilterSeqsCommand", "MyRunFilterThreadFunction");
\r
240 /**************************************************************************************************/
\r