X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=filterseqscommand.h;h=ed7cfd1f98ddb6f44c8c7f690447d8dea1d4f6f0;hp=cb02732d1ede7c7a70ec5a518426bca18a0019bf;hb=c48d91112209b841444923670dca5454da0e2a4d;hpb=284fd95c611ccc3b1a7875c4dacfca06d1f50ed6 diff --git a/filterseqscommand.h b/filterseqscommand.h index cb02732..ed7cfd1 100644 --- a/filterseqscommand.h +++ b/filterseqscommand.h @@ -18,19 +18,31 @@ class FilterSeqsCommand : public Command { public: FilterSeqsCommand(string); + FilterSeqsCommand(); ~FilterSeqsCommand() {}; - int execute(); - void help(); + + vector setParameters(); + string getCommandName() { return "filter.seqs"; } + string getCommandCategory() { return "Sequence Processing"; } + + string getHelpString(); + string getOutputPattern(string); + string getCitation() { return "http://www.mothur.org/wiki/Filter.seqs"; } + string getDescription() { return "removes columns from alignments based on a criteria defined by the user"; } + + int execute(); + void help() { m->mothurOut(getHelpString()); } private: struct linePair { - unsigned long int start; - unsigned long int end; - linePair(unsigned long int i, unsigned long int j) : start(i), end(j) {} + unsigned long long start; + unsigned long long end; + linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {} }; vector lines; vector processIDS; + map > savedPositions; string vertical, filter, fasta, hard, outputDir, filterFileName; vector fastafileNames; @@ -46,14 +58,189 @@ private: string createFilter(); int filterSequences(); int createProcessesCreateFilter(Filters&, string); - int createProcessesRunFilter(string, string); + int createProcessesRunFilter(string, string, string); int driverRunFilter(string, string, string, linePair*); int driverCreateFilter(Filters& F, string filename, linePair* line); #ifdef USE_MPI - int driverMPIRun(int, int, MPI_File&, MPI_File&, vector&); - int MPICreateFilter(int, int, Filters&, MPI_File&, vector&); + int driverMPIRun(int, int, MPI_File&, MPI_File&, vector&); + int MPICreateFilter(int, int, Filters&, MPI_File&, vector&); #endif }; + +/**************************************************************************************************/ +//custom data structure for threads to use. +// This is passed by void pointer so it can be any data type +// that can be passed using a single void pointer (LPVOID). +struct filterData { + Filters F; + int count, tid, alignmentLength; + unsigned long long start, end; + MothurOut* m; + string filename, vertical, hard; + char trump; + float soft; + + filterData(){} + filterData(string fn, MothurOut* mout, unsigned long long st, unsigned long long en, int aLength, char tr, string vert, float so, string ha, int t) { + filename = fn; + m = mout; + start = st; + end = en; + tid = t; + trump = tr; + alignmentLength = aLength; + vertical = vert; + soft = so; + hard = ha; + count = 0; + } +}; +/**************************************************************************************************/ +//custom data structure for threads to use. +// This is passed by void pointer so it can be any data type +// that can be passed using a single void pointer (LPVOID). +struct filterRunData { + int count, tid, alignmentLength; + unsigned long long start, end; + MothurOut* m; + string filename; + string filter, outputFilename; + + filterRunData(){} + filterRunData(string f, string fn, string ofn, MothurOut* mout, unsigned long long st, unsigned long long en, int aLength, int t) { + filter = f; + outputFilename = ofn; + filename = fn; + m = mout; + start = st; + end = en; + tid = t; + alignmentLength = aLength; + count = 0; + } +}; + +/**************************************************************************************************/ +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) +#else +static DWORD WINAPI MyCreateFilterThreadFunction(LPVOID lpParam){ + filterData* pDataArray; + pDataArray = (filterData*)lpParam; + + try { + + if (pDataArray->soft != 0) { pDataArray->F.setSoft(pDataArray->soft); } + if (pDataArray->trump != '*') { pDataArray->F.setTrump(pDataArray->trump); } + + pDataArray->F.setLength(pDataArray->alignmentLength); + + if(pDataArray->trump != '*' || pDataArray->m->isTrue(pDataArray->vertical) || pDataArray->soft != 0){ + pDataArray->F.initialize(); + } + + if(pDataArray->hard.compare("") != 0) { pDataArray->F.doHard(pDataArray->hard); } + else { pDataArray->F.setFilter(string(pDataArray->alignmentLength, '1')); } + + ifstream in; + pDataArray->m->openInputFile(pDataArray->filename, in); + + //print header if you are process 0 + if ((pDataArray->start == 0) || (pDataArray->start == 1)) { + in.seekg(0); + }else { //this accounts for the difference in line endings. + in.seekg(pDataArray->start-1); pDataArray->m->gobble(in); + } + + pDataArray->count = 0; + for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process + + if (pDataArray->m->control_pressed) { in.close(); pDataArray->count = 1; return 1; } + + Sequence current(in); pDataArray->m->gobble(in); + + if (current.getName() != "") { + if (current.getAligned().length() != pDataArray->alignmentLength) { pDataArray->m->mothurOut("Sequences are not all the same length, please correct."); pDataArray->m->mothurOutEndLine(); pDataArray->m->control_pressed = true; } + + if(pDataArray->trump != '*') { pDataArray->F.doTrump(current); } + if(pDataArray->m->isTrue(pDataArray->vertical) || pDataArray->soft != 0) { pDataArray->F.getFreqs(current); } + } + pDataArray->count++; + //report progress + if((i) % 100 == 0){ pDataArray->m->mothurOutJustToScreen(toString(i)+"\n"); } + } + + if((pDataArray->count) % 100 != 0){ pDataArray->m->mothurOutJustToScreen(toString(pDataArray->count)+"\n"); } + + in.close(); + + return 0; + + } + catch(exception& e) { + pDataArray->m->errorOut(e, "FilterSeqsCommand", "MyCreateFilterThreadFunction"); + exit(1); + } +} +/**************************************************************************************************/ +static DWORD WINAPI MyRunFilterThreadFunction(LPVOID lpParam){ + filterRunData* pDataArray; + pDataArray = (filterRunData*)lpParam; + + try { + + ofstream out; + pDataArray->m->openOutputFile(pDataArray->outputFilename, out); + + ifstream in; + pDataArray->m->openInputFile(pDataArray->filename, in); + + //print header if you are process 0 + if ((pDataArray->start == 0) || (pDataArray->start == 1)) { + in.seekg(0); + }else { //this accounts for the difference in line endings. + in.seekg(pDataArray->start-1); pDataArray->m->gobble(in); + } + + pDataArray->count = 0; + for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process + + if (pDataArray->m->control_pressed) { in.close(); out.close(); pDataArray->count = 1; return 1; } + + Sequence seq(in); pDataArray->m->gobble(in); + if (seq.getName() != "") { + string align = seq.getAligned(); + string filterSeq = ""; + + for(int j=0;jalignmentLength;j++){ + if(pDataArray->filter[j] == '1'){ + filterSeq += align[j]; + } + } + + out << '>' << seq.getName() << endl << filterSeq << endl; + } + pDataArray->count++; + //report progress + if((i) % 100 == 0){ pDataArray->m->mothurOutJustToScreen(toString(i)+"\n"); } + } + + if((pDataArray->count) % 100 != 0){ pDataArray->m->mothurOutJustToScreen(toString(pDataArray->count)+"\n"); } + + in.close(); + out.close(); + + return 0; + + } + catch(exception& e) { + pDataArray->m->errorOut(e, "FilterSeqsCommand", "MyRunFilterThreadFunction"); + exit(1); + } +} +/**************************************************************************************************/ +#endif + + #endif