X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=trimflowscommand.h;h=f5493eb7517db72c20048da57080a587ae675f7e;hp=215b1a2f69bf717e11e2a8f25ce1761e7c0951f9;hb=df7e3ff9f68ef157b0328a2d353c3258c5d45d89;hpb=d635b39347cd81943ea50de7b813a0a5d743b0c0 diff --git a/trimflowscommand.h b/trimflowscommand.h index 215b1a2..f5493eb 100644 --- a/trimflowscommand.h +++ b/trimflowscommand.h @@ -15,61 +15,65 @@ #include "sequence.hpp" #include "flowdata.h" #include "groupmap.h" +#include "trimoligos.h" class TrimFlowsCommand : public Command { public: TrimFlowsCommand(string); TrimFlowsCommand(); - ~TrimFlowsCommand(); - vector getRequiredParameters(); - vector getValidParameters(); - vector getRequiredFiles(); - map > getOutputFiles() { return outputTypes; } - int execute(); - void help(); + ~TrimFlowsCommand() {} + + vector setParameters(); + string getCommandName() { return "trim.flows"; } + string getCommandCategory() { return "Sequence Processing"; } + + string getHelpString(); + string getOutputPattern(string); + string getCitation() { return "http://www.mothur.org/wiki/Trim.flows"; } + string getDescription() { return "trim.flows"; } + + + int execute(); + void help() { m->mothurOut(getHelpString()); } private: bool abort; -// GroupMap* groupMap; - struct linePair { - unsigned long int start; - unsigned long int end; - linePair(unsigned long int i, unsigned long int j) : start(i), end(j) {} + unsigned long long start; + unsigned long long end; + linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {} }; int comboStarts; vector processIDS; //processid vector lines; - vector qLines; - map > outputTypes; + + vector getFlowFileBreaks(); + int createProcessesCreateTrim(string, string, string, string, vector >); + int driverCreateTrim(string, string, string, string, vector >, linePair*); + string reverseOligo(string); + vector outputNames; set filesToRemove; - - void getOligos(vector >&); //a rewrite of what is in trimseqscommand.h - int stripBarcode(Sequence&, int&); //largely redundant with trimseqscommand.h - int stripForward(Sequence&, int&); //largely redundant with trimseqscommand.h - bool stripReverse(Sequence&); //largely redundant with trimseqscommand.h - bool compareDNASeq(string, string); //largely redundant with trimseqscommand.h - int countDiffs(string, string); //largely redundant with trimseqscommand.h - bool allFiles; -// int processors; + int processors; int numFPrimers, numRPrimers; - int totalFlows, minFlows, minLength, maxLength, maxHomoP, tdiffs, bdiffs, pdiffs; + int maxFlows, minFlows, minLength, maxLength, maxHomoP, tdiffs, bdiffs, pdiffs, sdiffs, ldiffs, numLinkers, numSpacers; + int numFlows; float signal, noise; bool fasta; - + string flowOrder; string flowFileName, oligoFileName, outputDir; - map barcodes; map primers; vector revPrimer; + vector linker; + vector spacer; vector primerNameVector; //needed here? vector barcodeNameVector; //needed here? @@ -77,13 +81,185 @@ private: map combos; //needed here? map groupToIndex; //needed here? +}; + +/**************************************************************************************************/ +//custom data structure for threads to use. +// This is passed by void pointer so it can be any data type +// that can be passed using a single void pointer (LPVOID). +struct trimFlowData { + string flowFileName; + string trimFlowFileName; + string scrapFlowFileName; + string fastaFileName; + string flowOrder; + vector > barcodePrimerComboFileNames; + map barcodes; + map primers; + vector revPrimer; + bool fasta, allFiles; + unsigned long long start; + unsigned long long end; + MothurOut* m; + float signal, noise; + int numFlows, maxFlows, minFlows, maxHomoP, tdiffs, bdiffs, pdiffs, threadID, count; - int driverCreateTrim(string, string, string, string); - -// int createProcessesCreateTrim(string, string, string, string, string, string, string, vector, vector){}; - int setLines(string, string, vector&, vector&){}; - + trimFlowData(){} + trimFlowData(string ff, string tf, string sf, string f, string fo, vector > bfn, map bar, map pri, vector rev, bool fa, bool al, unsigned long long st, unsigned long long en, MothurOut* mout, float sig, float n, int numF, int maxF, int minF, int maxH, int td, int bd, int pd, int tid) { + flowFileName = ff; + trimFlowFileName = tf; + scrapFlowFileName = sf; + fastaFileName = f; + flowOrder = fo; + barcodePrimerComboFileNames = bfn; + barcodes = bar; + primers = pri; + revPrimer = rev; + fasta = fa; + allFiles = al; + start = st; + end = en; + m = mout; + signal = sig; + noise = n; + numFlows = numF; + maxFlows = maxF; + minFlows = minF; + maxHomoP = maxH; + tdiffs = td; + bdiffs = bd; + pdiffs = pd; + threadID = tid; + } }; +/**************************************************************************************************/ +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) +#else +static DWORD WINAPI MyTrimFlowThreadFunction(LPVOID lpParam){ + trimFlowData* pDataArray; + pDataArray = (trimFlowData*)lpParam; + + try { + ofstream trimFlowFile; + pDataArray->m->openOutputFile(pDataArray->trimFlowFileName, trimFlowFile); + trimFlowFile.setf(ios::fixed, ios::floatfield); trimFlowFile.setf(ios::showpoint); + + ofstream scrapFlowFile; + pDataArray->m->openOutputFile(pDataArray->scrapFlowFileName, scrapFlowFile); + scrapFlowFile.setf(ios::fixed, ios::floatfield); scrapFlowFile.setf(ios::showpoint); + + ofstream fastaFile; + if(pDataArray->fasta){ pDataArray->m->openOutputFile(pDataArray->fastaFileName, fastaFile); } + + ifstream flowFile; + pDataArray->m->openInputFile(pDataArray->flowFileName, flowFile); + + flowFile.seekg(pDataArray->start); + + if(pDataArray->start == 0){ + flowFile >> pDataArray->numFlows; pDataArray->m->gobble(flowFile); + scrapFlowFile << pDataArray->maxFlows << endl; + trimFlowFile << pDataArray->maxFlows << endl; + if(pDataArray->allFiles){ + for(int i=0;ibarcodePrimerComboFileNames.size();i++){ + for(int j=0;jbarcodePrimerComboFileNames[0].size();j++){ + ofstream temp; + pDataArray->m->openOutputFile(pDataArray->barcodePrimerComboFileNames[i][j], temp); + temp << pDataArray->maxFlows << endl; + temp.close(); + } + } + } + } + + FlowData flowData(pDataArray->numFlows, pDataArray->signal, pDataArray->noise, pDataArray->maxHomoP, pDataArray->flowOrder); + cout << " thread flowdata address " << &flowData << '\t' << &flowFile << endl; + TrimOligos trimOligos(pDataArray->pdiffs, pDataArray->bdiffs, pDataArray->primers, pDataArray->barcodes, pDataArray->revPrimer); + + pDataArray->count = pDataArray->end; + cout << pDataArray->threadID << '\t' << pDataArray->count << endl; + int count = 0; + for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process + + if (pDataArray->m->control_pressed) { break; } + cout << pDataArray->threadID << '\t' << count << endl; + int success = 1; + int currentSeqDiffs = 0; + string trashCode = ""; + + flowData.getNext(flowFile); + cout << "thread good bit " << flowFile.good() << endl; + flowData.capFlows(pDataArray->maxFlows); + + Sequence currSeq = flowData.getSequence(); + if(!flowData.hasMinFlows(pDataArray->minFlows)){ //screen to see if sequence is of a minimum number of flows + success = 0; + trashCode += 'l'; + } + + int primerIndex = 0; + int barcodeIndex = 0; + + if(pDataArray->barcodes.size() != 0){ + success = trimOligos.stripBarcode(currSeq, barcodeIndex); + if(success > pDataArray->bdiffs) { trashCode += 'b'; } + else{ currentSeqDiffs += success; } + } + + if(pDataArray->primers.size() != 0){ + success = trimOligos.stripForward(currSeq, primerIndex); + if(success > pDataArray->pdiffs) { trashCode += 'f'; } + else{ currentSeqDiffs += success; } + } + + if (currentSeqDiffs > pDataArray->tdiffs) { trashCode += 't'; } + + if(pDataArray->revPrimer.size() != 0){ + success = trimOligos.stripReverse(currSeq); + if(!success) { trashCode += 'r'; } + } + + if(trashCode.length() == 0){ + + flowData.printFlows(trimFlowFile); + + if(pDataArray->fasta) { currSeq.setAligned(currSeq.getUnaligned()); currSeq.printSequence(fastaFile); } + + if(pDataArray->allFiles){ + ofstream output; + pDataArray->m->openOutputFileAppend(pDataArray->barcodePrimerComboFileNames[barcodeIndex][primerIndex], output); + output.setf(ios::fixed, ios::floatfield); trimFlowFile.setf(ios::showpoint); + + flowData.printFlows(output); + output.close(); + } + } + else{ + flowData.printFlows(scrapFlowFile, trashCode); + } + + count++; + cout << pDataArray->threadID << '\t' << currSeq.getName() << endl; + //report progress + if((count) % 10000 == 0){ pDataArray->m->mothurOut(toString(count)); pDataArray->m->mothurOutEndLine(); } + + } + //report progress + if((count) % 10000 != 0){ pDataArray->m->mothurOut(toString(count)); pDataArray->m->mothurOutEndLine(); } + + trimFlowFile.close(); + scrapFlowFile.close(); + flowFile.close(); + if(pDataArray->fasta){ fastaFile.close(); } + + } + catch(exception& e) { + pDataArray->m->errorOut(e, "TrimFlowsCommand", "MyTrimFlowsThreadFunction"); + exit(1); + } +} +#endif + #endif