X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=screenseqscommand.h;h=007b6d6302d4062c09c62d9017fa663f3a902072;hb=5d00ef3e809832f08efdd691a9eace8ac20feb07;hp=49d992ac193c7aa42ad4492c9190bda40671fc0b;hpb=755185afe1c287b8c6eddf9eedd293a38fc9f998;p=mothur.git diff --git a/screenseqscommand.h b/screenseqscommand.h index 49d992a..007b6d6 100644 --- a/screenseqscommand.h +++ b/screenseqscommand.h @@ -11,6 +11,7 @@ */ #include "mothur.h" #include "command.hpp" +#include "sequence.hpp" class ScreenSeqsCommand : public Command { @@ -38,8 +39,7 @@ private: linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {} }; - vector processIDS; //processid - vector lines; + vector lines; int screenNameGroupFile(set); int screenGroupFile(set); @@ -47,7 +47,7 @@ private: int screenQual(set); int screenTaxonomy(set); - int driver(linePair*, string, string, string, set&); + int driver(linePair, string, string, string, set&); int createProcesses(string, string, string, set&); #ifdef USE_MPI @@ -64,7 +64,204 @@ private: int getSummary(vector&); int createProcessesCreateSummary(vector&, vector&, vector&, vector&, vector&, string); - int driverCreateSummary(vector&, vector&, vector&, vector&, vector&, string, linePair*); + int driverCreateSummary(vector&, vector&, vector&, vector&, vector&, string, linePair); }; +/**************************************************************************************************/ +//custom data structure for threads to use. +// This is passed by void pointer so it can be any data type +// that can be passed using a single void pointer (LPVOID). +struct sumData { + vector startPosition; + vector endPosition; + vector seqLength; + vector ambigBases; + vector longHomoPolymer; + string filename, namefile; + unsigned long long start; + unsigned long long end; + int count; + MothurOut* m; + map nameMap; + + + sumData(){} + sumData(string f, MothurOut* mout, unsigned long long st, unsigned long long en, string nf, map nam) { + filename = f; + namefile = nf; + m = mout; + start = st; + end = en; + nameMap = nam; + count = 0; + } +}; +/**************************************************************************************************/ +//custom data structure for threads to use. +// This is passed by void pointer so it can be any data type +// that can be passed using a single void pointer (LPVOID). +struct sumScreenData { + int startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength; + unsigned long long start; + unsigned long long end; + int count; + MothurOut* m; + string goodFName, badAccnosFName, filename; + set* badSeqNames; + + + sumScreenData(){} + sumScreenData(int s, int e, int a, int h, int minl, int maxl, string f, MothurOut* mout, unsigned long long st, unsigned long long en, string gf, string bf, set* bn) { + startPos = s; + endPos = e; + minLength = minl; + maxLength = maxl; + maxAmbig = a; + maxHomoP = h; + filename = f; + goodFName = gf; + badAccnosFName = bf; + m = mout; + start = st; + end = en; + badSeqNames = bn; + count = 0; + } +}; + + +/**************************************************************************************************/ +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#else +static DWORD WINAPI MySumThreadFunction(LPVOID lpParam){ + sumData* pDataArray; + pDataArray = (sumData*)lpParam; + + try { + ifstream in; + pDataArray->m->openInputFile(pDataArray->filename, in); + + //print header if you are process 0 + if ((pDataArray->start == 0) || (pDataArray->start == 1)) { + in.seekg(0); + }else { //this accounts for the difference in line endings. + in.seekg(pDataArray->start-1); pDataArray->m->gobble(in); + } + + pDataArray->count = pDataArray->end; + for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process + + if (pDataArray->m->control_pressed) { in.close(); pDataArray->count = 1; return 1; } + + Sequence current(in); pDataArray->m->gobble(in); + + if (current.getName() != "") { + + int num = 1; + if (pDataArray->namefile != "") { + //make sure this sequence is in the namefile, else error + map::iterator it = pDataArray->nameMap.find(current.getName()); + + if (it == pDataArray->nameMap.end()) { pDataArray->m->mothurOut("[ERROR]: " + current.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); pDataArray->m->control_pressed = true; } + else { num = it->second; } + } + + //for each sequence this sequence represents + for (int i = 0; i < num; i++) { + pDataArray->startPosition.push_back(current.getStartPos()); + pDataArray->endPosition.push_back(current.getEndPos()); + pDataArray->seqLength.push_back(current.getNumBases()); + pDataArray->ambigBases.push_back(current.getAmbigBases()); + pDataArray->longHomoPolymer.push_back(current.getLongHomoPolymer()); + } + } + } + + in.close(); + + return 0; + + } + catch(exception& e) { + pDataArray->m->errorOut(e, "ScreenSeqsCommand", "MySumThreadFunction"); + exit(1); + } +} + +/**************************************************************************************************/ + +static DWORD WINAPI MySumScreenThreadFunction(LPVOID lpParam){ + sumScreenData* pDataArray; + pDataArray = (sumScreenData*)lpParam; + + try { + + ofstream goodFile; + pDataArray->m->openOutputFile(pDataArray->goodFName, goodFile); + + ofstream badAccnosFile; + pDataArray->m->openOutputFile(pDataArray->badAccnosFName, badAccnosFile); + + ifstream in; + pDataArray->m->openInputFile(pDataArray->filename, in); + + //print header if you are process 0 + if ((pDataArray->start == 0) || (pDataArray->start == 1)) { + in.seekg(0); + }else { //this accounts for the difference in line endings. + in.seekg(pDataArray->start-1); pDataArray->m->gobble(in); + } + + pDataArray->count = pDataArray->end; + for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process + + if (pDataArray->m->control_pressed) { in.close(); badAccnosFile.close(); goodFile.close(); pDataArray->count = 1; return 1; } + + Sequence currSeq(in); pDataArray->m->gobble(in); + + if (currSeq.getName() != "") { + bool goodSeq = 1; // innocent until proven guilty + if(goodSeq == 1 && pDataArray->startPos != -1 && pDataArray->startPos < currSeq.getStartPos()) { goodSeq = 0; } + if(goodSeq == 1 && pDataArray->endPos != -1 && pDataArray->endPos > currSeq.getEndPos()) { goodSeq = 0; } + if(goodSeq == 1 && pDataArray->maxAmbig != -1 && pDataArray->maxAmbig < currSeq.getAmbigBases()) { goodSeq = 0; } + if(goodSeq == 1 && pDataArray->maxHomoP != -1 && pDataArray->maxHomoP < currSeq.getLongHomoPolymer()) { goodSeq = 0; } + if(goodSeq == 1 && pDataArray->minLength != -1 && pDataArray->minLength > currSeq.getNumBases()) { goodSeq = 0; } + if(goodSeq == 1 && pDataArray->maxLength != -1 && pDataArray->maxLength < currSeq.getNumBases()) { goodSeq = 0; } + + if(goodSeq == 1){ + currSeq.printSequence(goodFile); + } + else{ + badAccnosFile << currSeq.getName() << endl; + pDataArray->badSeqNames->insert(currSeq.getName()); + } + + } + //report progress + if((i+1) % 100 == 0){ pDataArray->m->mothurOut("Processing sequence: " + toString(i+1)); pDataArray->m->mothurOutEndLine(); } + } + //report progress + if((pDataArray->count) % 100 != 0){ pDataArray->m->mothurOut("Processing sequence: " + toString(pDataArray->count)); pDataArray->m->mothurOutEndLine(); } + + + + in.close(); + goodFile.close(); + badAccnosFile.close(); + + return 0; + + } + catch(exception& e) { + pDataArray->m->errorOut(e, "ScreenSeqsCommand", "MySumScreenThreadFunction"); + exit(1); + } +} + +#endif + +/**************************************************************************************************/ + + + #endif