X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=trimseqscommand.h;h=2448669816c6936f55bff8b3ad256ce15f1de474;hp=2ab7de3e7fc62835429581757fda19f25abad030;hb=fefd5ee1517abd3bc38b469cb2dffc85a1571c7e;hpb=14cddf859d97118481161bf3e85a647976020758 diff --git a/trimseqscommand.h b/trimseqscommand.h index 2ab7de3..2448669 100644 --- a/trimseqscommand.h +++ b/trimseqscommand.h @@ -16,6 +16,7 @@ #include "qualityscores.h" #include "trimoligos.h" #include "counttable.h" +#include "oligos.h" class TrimSeqsCommand : public Command { @@ -44,32 +45,22 @@ private: linePair() {} }; - bool getOligos(vector >&, vector >&, vector >&); + Oligos oligos; + bool getOligos(vector >&, vector >&, vector >&, map&); bool keepFirstTrim(Sequence&, QualityScores&); bool removeLastTrim(Sequence&, QualityScores&); bool cullLength(Sequence&); bool cullHomoP(Sequence&); bool cullAmbigs(Sequence&); - string reverseOligo(string); - bool abort, createGroup; string fastaFile, oligoFile, qFileName, groupfile, nameFile, countfile, outputDir; - bool flip, allFiles, qtrim, keepforward; - int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, processors, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, comboStarts; + bool flip, allFiles, qtrim, keepforward, pairedOligos, reorient, logtransform; + int numBarcodes, numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, processors, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, comboStarts; int qWindowSize, qWindowStep, keepFirst, removeLast; double qRollAverage, qThreshold, qWindowAverage, qAverage; - vector revPrimer, outputNames; set filesToRemove; - map barcodes; - vector groupVector; - map primers; - vector linker; - vector spacer; - map combos; - map groupToIndex; - vector primerNameVector; //needed here? - vector barcodeNameVector; //needed here? + vector groupVector,outputNames; map groupCounts; map nameMap; map nameCount; //for countfile name -> repCount @@ -91,34 +82,25 @@ private: struct trimData { unsigned long long start, end; MothurOut* m; - string filename, qFileName, trimFileName, scrapFileName, trimQFileName, scrapQFileName, trimNFileName, scrapNFileName, trimCFileName, scrapCFileName, groupFileName, nameFile, countfile; + string filename, qFileName, oligosfile, trimFileName, scrapFileName, trimQFileName, scrapQFileName, trimNFileName, scrapNFileName, trimCFileName, scrapCFileName, groupFileName, nameFile, countfile; vector > fastaFileNames; vector > qualFileNames; vector > nameFileNames; unsigned long long lineStart, lineEnd, qlineStart, qlineEnd; - bool flip, allFiles, qtrim, keepforward, createGroup; - int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs; + bool flip, allFiles, qtrim, keepforward, createGroup, reorient, logtransform; + int maxAmbig, maxHomoP, minLength, maxLength, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs; int qWindowSize, qWindowStep, keepFirst, removeLast, count; double qRollAverage, qThreshold, qWindowAverage, qAverage; - vector revPrimer; - map barcodes; - map primers; map nameCount; - vector linker; - vector spacer; - map combos; - vector primerNameVector; - vector barcodeNameVector; map groupCounts; map nameMap; map groupMap; trimData(){} trimData(string fn, string qn, string nf, string cf, string tn, string sn, string tqn, string sqn, string tnn, string snn, string tcn, string scn,string gn, vector > ffn, vector > qfn, vector > nfn, unsigned long long lstart, unsigned long long lend, unsigned long long qstart, unsigned long long qend, MothurOut* mout, - int pd, int bd, int ld, int sd, int td, map pri, map bar, vector revP, vector li, vector spa, - vector priNameVector, vector barNameVector, bool cGroup, bool aFiles, bool keepF, int keepfi, int removeL, - int WindowStep, int WindowSize, int WindowAverage, bool trim, double Threshold, double Average, double RollAverage, - int minL, int maxA, int maxH, int maxL, bool fli, map nm, map ncount) { + int pd, int bd, int ld, int sd, int td, string o, bool cGroup, bool aFiles, bool keepF, int keepfi, int removeL, + int WindowStep, int WindowSize, int WindowAverage, bool trim, double Threshold, double Average, double RollAverage, bool lt, + int minL, int maxA, int maxH, int maxL, bool fli, bool reo, map nm, map ncount) { filename = fn; qFileName = qn; nameFile = nf; @@ -141,19 +123,13 @@ struct trimData { qlineEnd = qend; m = mout; nameCount = ncount; + oligosfile = o; pdiffs = pd; bdiffs = bd; ldiffs = ld; sdiffs = sd; tdiffs = td; - barcodes = bar; - primers = pri; numFPrimers = primers.size(); - revPrimer = revP; numRPrimers = revPrimer.size(); - linker = li; numLinkers = linker.size(); - spacer = spa; numSpacers = spacer.size(); - primerNameVector = priNameVector; - barcodeNameVector = barNameVector; createGroup = cGroup; allFiles = aFiles; @@ -167,11 +143,13 @@ struct trimData { qThreshold = Threshold; qAverage = Average; qRollAverage = RollAverage; + logtransform = lt; minLength = minL; maxAmbig = maxA; maxHomoP = maxH; maxLength = maxL; flip = fli; + reorient = reo; nameMap = nm; count = 0; } @@ -251,13 +229,37 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){ } } - - TrimOligos trimOligos(pDataArray->pdiffs, pDataArray->bdiffs, pDataArray->ldiffs, pDataArray->sdiffs, pDataArray->primers, pDataArray->barcodes, pDataArray->revPrimer, pDataArray->linker, pDataArray->spacer); + int numBarcodes, numLinkers, numSpacers, numRPrimers, numFPrimers; + bool pairedOligos; + Oligos oligos(pDataArray->oligosfile); + if (oligos.hasPairedBarcodes()) { + pairedOligos = true; + numFPrimers = oligos.getPairedPrimers().size(); + numBarcodes = oligos.getPairedBarcodes().size(); + }else { + pairedOligos = false; + numFPrimers = oligos.getPrimers().size(); + numBarcodes = oligos.getBarcodes().size(); + } + + numLinkers = oligos.getLinkers().size(); + numSpacers = oligos.getSpacers().size(); + numRPrimers = oligos.getReversePrimers().size(); + + TrimOligos* trimOligos = NULL; + if (pairedOligos) { trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, oligos.getPairedPrimers(), oligos.getPairedBarcodes()); } + else { trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, pDataArray->ldiffs, pDataArray->sdiffs, oligos.getPrimers(), oligos.getBarcodes(), oligos.getReversePrimers(), oligos.getLinkers(), oligos.getSpacers()); } + + TrimOligos* rtrimOligos = NULL; + if (pDataArray->reorient) { + rtrimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, oligos.getReorientedPairedPrimers(), oligos.getReorientedPairedBarcodes()); numBarcodes = oligos.getReorientedPairedBarcodes().size(); + } pDataArray->count = 0; for(int i = 0; i < pDataArray->lineEnd; i++){ //end is the number of sequences to process - if (pDataArray->m->control_pressed) { + if (pDataArray->m->control_pressed) { + delete trimOligos; if (pDataArray->reorient) { delete rtrimOligos; } inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close(); if ((pDataArray->createGroup) && (pDataArray->countfile == "")) { outGroupsFile.close(); } if(pDataArray->qFileName != "") { qFile.close(); scrapQualFile.close(); trimQualFile.close(); } @@ -273,11 +275,14 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){ int currentSeqsDiffs = 0; Sequence currSeq(inFASTA); pDataArray->m->gobble(inFASTA); + Sequence savedSeq(currSeq.getName(), currSeq.getAligned()); - QualityScores currQual; + QualityScores currQual; QualityScores savedQual; if(pDataArray->qFileName != ""){ currQual = QualityScores(qFile); pDataArray->m->gobble(qFile); + savedQual.setName(currQual.getName()); savedQual.setScores(currQual.getScores()); } + string origSeq = currSeq.getUnaligned(); if (origSeq != "") { @@ -286,38 +291,76 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){ int barcodeIndex = 0; int primerIndex = 0; - if(pDataArray->numLinkers != 0){ - success = trimOligos.stripLinker(currSeq, currQual); + if(numLinkers != 0){ + success = trimOligos->stripLinker(currSeq, currQual); if(success > pDataArray->ldiffs) { trashCode += 'k'; } else{ currentSeqsDiffs += success; } } - if(pDataArray->barcodes.size() != 0){ - success = trimOligos.stripBarcode(currSeq, currQual, barcodeIndex); + if(numBarcodes != 0){ + success = trimOligos->stripBarcode(currSeq, currQual, barcodeIndex); if(success > pDataArray->bdiffs) { trashCode += 'b'; } else{ currentSeqsDiffs += success; } } - if(pDataArray->numSpacers != 0){ - success = trimOligos.stripSpacer(currSeq, currQual); + if(numSpacers != 0){ + success = trimOligos->stripSpacer(currSeq, currQual); if(success > pDataArray->sdiffs) { trashCode += 's'; } else{ currentSeqsDiffs += success; } } - if(pDataArray->numFPrimers != 0){ - success = trimOligos.stripForward(currSeq, currQual, primerIndex, pDataArray->keepforward); + if(numFPrimers != 0){ + success = trimOligos->stripForward(currSeq, currQual, primerIndex, pDataArray->keepforward); if(success > pDataArray->pdiffs) { trashCode += 'f'; } else{ currentSeqsDiffs += success; } } if (currentSeqsDiffs > pDataArray->tdiffs) { trashCode += 't'; } - if(pDataArray->numRPrimers != 0){ - success = trimOligos.stripReverse(currSeq, currQual); + if(numRPrimers != 0){ + success = trimOligos->stripReverse(currSeq, currQual); if(!success) { trashCode += 'r'; } } + if (pDataArray->reorient && (trashCode != "")) { //if you failed and want to check the reverse + int thisSuccess = 0; + string thisTrashCode = ""; + int thisCurrentSeqsDiffs = 0; + + int thisBarcodeIndex = 0; + int thisPrimerIndex = 0; + + if(numBarcodes != 0){ + thisSuccess = rtrimOligos->stripBarcode(savedSeq, savedQual, thisBarcodeIndex); + if(thisSuccess > pDataArray->bdiffs) { thisTrashCode += 'b'; } + else{ thisCurrentSeqsDiffs += thisSuccess; } + } + + if(numFPrimers != 0){ + thisSuccess = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, pDataArray->keepforward); + if(thisSuccess > pDataArray->pdiffs) { thisTrashCode += 'f'; } + else{ thisCurrentSeqsDiffs += thisSuccess; } + } + + if (thisCurrentSeqsDiffs > pDataArray->tdiffs) { thisTrashCode += 't'; } + + if (thisTrashCode == "") { + trashCode = thisTrashCode; + success = thisSuccess; + currentSeqsDiffs = thisCurrentSeqsDiffs; + barcodeIndex = thisBarcodeIndex; + primerIndex = thisPrimerIndex; + savedSeq.reverseComplement(); + currSeq.setAligned(savedSeq.getAligned()); + if(pDataArray->qFileName != ""){ + savedQual.flipQScores(); + currQual.setScores(savedQual.getScores()); + } + }else { trashCode += "(" + thisTrashCode + ")"; } + } + + if(pDataArray->keepFirst != 0){ //success = keepFirstTrim(currSeq, currQual); success = 1; @@ -349,9 +392,9 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){ int origLength = currSeq.getNumBases(); if(pDataArray->qThreshold != 0) { success = currQual.stripQualThreshold(currSeq, pDataArray->qThreshold); } - else if(pDataArray->qAverage != 0) { success = currQual.cullQualAverage(currSeq, pDataArray->qAverage); } - else if(pDataArray->qRollAverage != 0) { success = currQual.stripQualRollingAverage(currSeq, pDataArray->qRollAverage); } - else if(pDataArray->qWindowAverage != 0){ success = currQual.stripQualWindowAverage(currSeq, pDataArray->qWindowStep, pDataArray->qWindowSize, pDataArray->qWindowAverage); } + else if(pDataArray->qAverage != 0) { success = currQual.cullQualAverage(currSeq, pDataArray->qAverage, pDataArray->logtransform); } + else if(pDataArray->qRollAverage != 0) { success = currQual.stripQualRollingAverage(currSeq, pDataArray->qRollAverage, pDataArray->logtransform); } + else if(pDataArray->qWindowAverage != 0){ success = currQual.stripQualWindowAverage(currSeq, pDataArray->qWindowStep, pDataArray->qWindowSize, pDataArray->qWindowAverage, pDataArray->logtransform); } else { success = 1; } //you don't want to trim, if it fails above then scrap it @@ -397,20 +440,7 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){ if(trashCode.length() == 0){ string thisGroup = ""; - if (pDataArray->createGroup) { - if(pDataArray->barcodes.size() != 0){ - string thisGroup = pDataArray->barcodeNameVector[barcodeIndex]; - if (pDataArray->primers.size() != 0) { - if (pDataArray->primerNameVector[primerIndex] != "") { - if(thisGroup != "") { - thisGroup += "." + pDataArray->primerNameVector[primerIndex]; - }else { - thisGroup = pDataArray->primerNameVector[primerIndex]; - } - } - } - } - } + if (pDataArray->createGroup) { thisGroup = oligos.getGroupName(barcodeIndex, primerIndex); } int pos = thisGroup.find("ignore"); if (pos == string::npos) { @@ -438,7 +468,7 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){ } if (pDataArray->createGroup) { - if(pDataArray->barcodes.size() != 0){ + if(numBarcodes != 0){ if (pDataArray->countfile == "") { outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; } else { pDataArray->groupMap[currSeq.getName()] = thisGroup; } @@ -509,13 +539,14 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){ } //report progress - if((i) % 1000 == 0){ pDataArray->m->mothurOut(toString(i)); pDataArray->m->mothurOutEndLine(); } + if((pDataArray->count) % 1000 == 0){ pDataArray->m->mothurOut(toString(pDataArray->count)); pDataArray->m->mothurOutEndLine(); } } //report progress if((pDataArray->count) % 1000 != 0){ pDataArray->m->mothurOut(toString(pDataArray->count)); pDataArray->m->mothurOutEndLine(); } - + if (pDataArray->reorient) { delete rtrimOligos; } + delete trimOligos; inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();