X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=filterseqscommand.h;h=ed7cfd1f98ddb6f44c8c7f690447d8dea1d4f6f0;hp=af4c03a950a0f42ad4fd9d7453ad698423381bd0;hb=c48d91112209b841444923670dca5454da0e2a4d;hpb=5b7ac70116137b52dd7884b76c5bca660a5fea38 diff --git a/filterseqscommand.h b/filterseqscommand.h index af4c03a..ed7cfd1 100644 --- a/filterseqscommand.h +++ b/filterseqscommand.h @@ -1,47 +1,246 @@ -#ifndef FILTERSEQSCOMMAND_H -#define FILTERSEQSCOMMAND_H - -/* - * filterseqscommand.h - * Mothur - * - * Created by Thomas Ryabin on 5/4/09. - * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. - * - */ - -#include "command.hpp" -#include "mothur.h" -#include "globaldata.hpp" -#include "readfasta.h" -#include "readnexus.h" -#include "readclustal.h" -#include "readseqsphylip.h" - -using namespace std; - -class FilterSeqsCommand : public Command { - -public: - FilterSeqsCommand(); - ~FilterSeqsCommand() {}; - int execute(); - -private: - void doTrump(); - void doSoft(); - void doHard(); - void doVertical(); - - int alignmentLength; - int numSeqs; - - GlobalData* globaldata; - ReadSeqs* readSeqs; - SequenceDB* db; - - string filter; - -}; - -#endif +#ifndef FILTERSEQSCOMMAND_H +#define FILTERSEQSCOMMAND_H + +/* + * filterseqscommand.h + * Mothur + * + * Created by Thomas Ryabin on 5/4/09. + * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved. + * + */ + +#include "command.hpp" +#include "filters.h" + +class Sequence; +class FilterSeqsCommand : public Command { + +public: + FilterSeqsCommand(string); + FilterSeqsCommand(); + ~FilterSeqsCommand() {}; + + vector setParameters(); + string getCommandName() { return "filter.seqs"; } + string getCommandCategory() { return "Sequence Processing"; } + + string getHelpString(); + string getOutputPattern(string); + string getCitation() { return "http://www.mothur.org/wiki/Filter.seqs"; } + string getDescription() { return "removes columns from alignments based on a criteria defined by the user"; } + + int execute(); + void help() { m->mothurOut(getHelpString()); } + +private: + struct linePair { + unsigned long long start; + unsigned long long end; + linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {} + }; + + vector lines; + vector processIDS; + map > savedPositions; + + string vertical, filter, fasta, hard, outputDir, filterFileName; + vector fastafileNames; + int alignmentLength, processors; + vector bufferSizes; + vector outputNames; + + char trump; + bool abort; + float soft; + int numSeqs; + + string createFilter(); + int filterSequences(); + int createProcessesCreateFilter(Filters&, string); + int createProcessesRunFilter(string, string, string); + int driverRunFilter(string, string, string, linePair*); + int driverCreateFilter(Filters& F, string filename, linePair* line); + #ifdef USE_MPI + int driverMPIRun(int, int, MPI_File&, MPI_File&, vector&); + int MPICreateFilter(int, int, Filters&, MPI_File&, vector&); + #endif + +}; + + +/**************************************************************************************************/ +//custom data structure for threads to use. +// This is passed by void pointer so it can be any data type +// that can be passed using a single void pointer (LPVOID). +struct filterData { + Filters F; + int count, tid, alignmentLength; + unsigned long long start, end; + MothurOut* m; + string filename, vertical, hard; + char trump; + float soft; + + filterData(){} + filterData(string fn, MothurOut* mout, unsigned long long st, unsigned long long en, int aLength, char tr, string vert, float so, string ha, int t) { + filename = fn; + m = mout; + start = st; + end = en; + tid = t; + trump = tr; + alignmentLength = aLength; + vertical = vert; + soft = so; + hard = ha; + count = 0; + } +}; +/**************************************************************************************************/ +//custom data structure for threads to use. +// This is passed by void pointer so it can be any data type +// that can be passed using a single void pointer (LPVOID). +struct filterRunData { + int count, tid, alignmentLength; + unsigned long long start, end; + MothurOut* m; + string filename; + string filter, outputFilename; + + filterRunData(){} + filterRunData(string f, string fn, string ofn, MothurOut* mout, unsigned long long st, unsigned long long en, int aLength, int t) { + filter = f; + outputFilename = ofn; + filename = fn; + m = mout; + start = st; + end = en; + tid = t; + alignmentLength = aLength; + count = 0; + } +}; + +/**************************************************************************************************/ +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) +#else +static DWORD WINAPI MyCreateFilterThreadFunction(LPVOID lpParam){ + filterData* pDataArray; + pDataArray = (filterData*)lpParam; + + try { + + if (pDataArray->soft != 0) { pDataArray->F.setSoft(pDataArray->soft); } + if (pDataArray->trump != '*') { pDataArray->F.setTrump(pDataArray->trump); } + + pDataArray->F.setLength(pDataArray->alignmentLength); + + if(pDataArray->trump != '*' || pDataArray->m->isTrue(pDataArray->vertical) || pDataArray->soft != 0){ + pDataArray->F.initialize(); + } + + if(pDataArray->hard.compare("") != 0) { pDataArray->F.doHard(pDataArray->hard); } + else { pDataArray->F.setFilter(string(pDataArray->alignmentLength, '1')); } + + ifstream in; + pDataArray->m->openInputFile(pDataArray->filename, in); + + //print header if you are process 0 + if ((pDataArray->start == 0) || (pDataArray->start == 1)) { + in.seekg(0); + }else { //this accounts for the difference in line endings. + in.seekg(pDataArray->start-1); pDataArray->m->gobble(in); + } + + pDataArray->count = 0; + for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process + + if (pDataArray->m->control_pressed) { in.close(); pDataArray->count = 1; return 1; } + + Sequence current(in); pDataArray->m->gobble(in); + + if (current.getName() != "") { + if (current.getAligned().length() != pDataArray->alignmentLength) { pDataArray->m->mothurOut("Sequences are not all the same length, please correct."); pDataArray->m->mothurOutEndLine(); pDataArray->m->control_pressed = true; } + + if(pDataArray->trump != '*') { pDataArray->F.doTrump(current); } + if(pDataArray->m->isTrue(pDataArray->vertical) || pDataArray->soft != 0) { pDataArray->F.getFreqs(current); } + } + pDataArray->count++; + //report progress + if((i) % 100 == 0){ pDataArray->m->mothurOutJustToScreen(toString(i)+"\n"); } + } + + if((pDataArray->count) % 100 != 0){ pDataArray->m->mothurOutJustToScreen(toString(pDataArray->count)+"\n"); } + + in.close(); + + return 0; + + } + catch(exception& e) { + pDataArray->m->errorOut(e, "FilterSeqsCommand", "MyCreateFilterThreadFunction"); + exit(1); + } +} +/**************************************************************************************************/ +static DWORD WINAPI MyRunFilterThreadFunction(LPVOID lpParam){ + filterRunData* pDataArray; + pDataArray = (filterRunData*)lpParam; + + try { + + ofstream out; + pDataArray->m->openOutputFile(pDataArray->outputFilename, out); + + ifstream in; + pDataArray->m->openInputFile(pDataArray->filename, in); + + //print header if you are process 0 + if ((pDataArray->start == 0) || (pDataArray->start == 1)) { + in.seekg(0); + }else { //this accounts for the difference in line endings. + in.seekg(pDataArray->start-1); pDataArray->m->gobble(in); + } + + pDataArray->count = 0; + for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process + + if (pDataArray->m->control_pressed) { in.close(); out.close(); pDataArray->count = 1; return 1; } + + Sequence seq(in); pDataArray->m->gobble(in); + if (seq.getName() != "") { + string align = seq.getAligned(); + string filterSeq = ""; + + for(int j=0;jalignmentLength;j++){ + if(pDataArray->filter[j] == '1'){ + filterSeq += align[j]; + } + } + + out << '>' << seq.getName() << endl << filterSeq << endl; + } + pDataArray->count++; + //report progress + if((i) % 100 == 0){ pDataArray->m->mothurOutJustToScreen(toString(i)+"\n"); } + } + + if((pDataArray->count) % 100 != 0){ pDataArray->m->mothurOutJustToScreen(toString(pDataArray->count)+"\n"); } + + in.close(); + out.close(); + + return 0; + + } + catch(exception& e) { + pDataArray->m->errorOut(e, "FilterSeqsCommand", "MyRunFilterThreadFunction"); + exit(1); + } +} +/**************************************************************************************************/ +#endif + + +#endif