X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=trimseqscommand.h;h=2448669816c6936f55bff8b3ad256ce15f1de474;hp=4d0a6112cb6e288600ae944e9340563f5973da47;hb=fefd5ee1517abd3bc38b469cb2dffc85a1571c7e;hpb=7439a3c94e96c9e0e78ca53f0356415a879f8b80 diff --git a/trimseqscommand.h b/trimseqscommand.h index 4d0a611..2448669 100644 --- a/trimseqscommand.h +++ b/trimseqscommand.h @@ -16,6 +16,7 @@ #include "qualityscores.h" #include "trimoligos.h" #include "counttable.h" +#include "oligos.h" class TrimSeqsCommand : public Command { @@ -44,34 +45,22 @@ private: linePair() {} }; - bool getOligos(vector >&, vector >&, vector >&); + Oligos oligos; + bool getOligos(vector >&, vector >&, vector >&, map&); bool keepFirstTrim(Sequence&, QualityScores&); bool removeLastTrim(Sequence&, QualityScores&); bool cullLength(Sequence&); bool cullHomoP(Sequence&); bool cullAmbigs(Sequence&); - string reverseOligo(string); - bool abort, createGroup; string fastaFile, oligoFile, qFileName, groupfile, nameFile, countfile, outputDir; - bool flip, allFiles, qtrim, keepforward, pairedOligos, reorient; - int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, processors, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, comboStarts; + bool flip, allFiles, qtrim, keepforward, pairedOligos, reorient, logtransform; + int numBarcodes, numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, processors, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, comboStarts; int qWindowSize, qWindowStep, keepFirst, removeLast; double qRollAverage, qThreshold, qWindowAverage, qAverage; - vector revPrimer, outputNames; set filesToRemove; - map pairedBarcodes; - map pairedPrimers; - map barcodes; - vector groupVector; - map primers; - vector linker; - vector spacer; - map combos; - map groupToIndex; - vector primerNameVector; //needed here? - vector barcodeNameVector; //needed here? + vector groupVector,outputNames; map groupCounts; map nameMap; map nameCount; //for countfile name -> repCount @@ -93,35 +82,24 @@ private: struct trimData { unsigned long long start, end; MothurOut* m; - string filename, qFileName, trimFileName, scrapFileName, trimQFileName, scrapQFileName, trimNFileName, scrapNFileName, trimCFileName, scrapCFileName, groupFileName, nameFile, countfile; + string filename, qFileName, oligosfile, trimFileName, scrapFileName, trimQFileName, scrapQFileName, trimNFileName, scrapNFileName, trimCFileName, scrapCFileName, groupFileName, nameFile, countfile; vector > fastaFileNames; vector > qualFileNames; vector > nameFileNames; unsigned long long lineStart, lineEnd, qlineStart, qlineEnd; - bool flip, allFiles, qtrim, keepforward, createGroup, pairedOligos, reorient; - int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs; + bool flip, allFiles, qtrim, keepforward, createGroup, reorient, logtransform; + int maxAmbig, maxHomoP, minLength, maxLength, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs; int qWindowSize, qWindowStep, keepFirst, removeLast, count; double qRollAverage, qThreshold, qWindowAverage, qAverage; - vector revPrimer; - map barcodes; - map primers; map nameCount; - vector linker; - vector spacer; - map combos; - vector primerNameVector; - vector barcodeNameVector; map groupCounts; map nameMap; map groupMap; - map pairedBarcodes; - map pairedPrimers; trimData(){} trimData(string fn, string qn, string nf, string cf, string tn, string sn, string tqn, string sqn, string tnn, string snn, string tcn, string scn,string gn, vector > ffn, vector > qfn, vector > nfn, unsigned long long lstart, unsigned long long lend, unsigned long long qstart, unsigned long long qend, MothurOut* mout, - int pd, int bd, int ld, int sd, int td, map pri, map bar, vector revP, vector li, vector spa, map pbr, map ppr, bool po, - vector priNameVector, vector barNameVector, bool cGroup, bool aFiles, bool keepF, int keepfi, int removeL, - int WindowStep, int WindowSize, int WindowAverage, bool trim, double Threshold, double Average, double RollAverage, + int pd, int bd, int ld, int sd, int td, string o, bool cGroup, bool aFiles, bool keepF, int keepfi, int removeL, + int WindowStep, int WindowSize, int WindowAverage, bool trim, double Threshold, double Average, double RollAverage, bool lt, int minL, int maxA, int maxH, int maxL, bool fli, bool reo, map nm, map ncount) { filename = fn; qFileName = qn; @@ -145,22 +123,13 @@ struct trimData { qlineEnd = qend; m = mout; nameCount = ncount; + oligosfile = o; pdiffs = pd; bdiffs = bd; ldiffs = ld; sdiffs = sd; tdiffs = td; - barcodes = bar; - pairedPrimers = ppr; - pairedBarcodes = pbr; - pairedOligos = po; - primers = pri; numFPrimers = primers.size(); - revPrimer = revP; numRPrimers = revPrimer.size(); - linker = li; numLinkers = linker.size(); - spacer = spa; numSpacers = spacer.size(); - primerNameVector = priNameVector; - barcodeNameVector = barNameVector; createGroup = cGroup; allFiles = aFiles; @@ -174,6 +143,7 @@ struct trimData { qThreshold = Threshold; qAverage = Average; qRollAverage = RollAverage; + logtransform = lt; minLength = minL; maxAmbig = maxA; maxHomoP = maxH; @@ -259,31 +229,37 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){ } } + int numBarcodes, numLinkers, numSpacers, numRPrimers, numFPrimers; + bool pairedOligos; + Oligos oligos(pDataArray->oligosfile); + if (oligos.hasPairedBarcodes()) { + pairedOligos = true; + numFPrimers = oligos.getPairedPrimers().size(); + numBarcodes = oligos.getPairedBarcodes().size(); + }else { + pairedOligos = false; + numFPrimers = oligos.getPrimers().size(); + numBarcodes = oligos.getBarcodes().size(); + } + + numLinkers = oligos.getLinkers().size(); + numSpacers = oligos.getSpacers().size(); + numRPrimers = oligos.getReversePrimers().size(); + TrimOligos* trimOligos = NULL; - int numBarcodes = pDataArray->barcodes.size(); - if (pDataArray->pairedOligos) { trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, pDataArray->pairedPrimers, pDataArray->pairedBarcodes); numBarcodes = pDataArray->pairedBarcodes.size(); } - else { trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, pDataArray->ldiffs, pDataArray->sdiffs, pDataArray->primers, pDataArray->barcodes, pDataArray->revPrimer, pDataArray->linker, pDataArray->spacer); } + if (pairedOligos) { trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, oligos.getPairedPrimers(), oligos.getPairedBarcodes()); } + else { trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, pDataArray->ldiffs, pDataArray->sdiffs, oligos.getPrimers(), oligos.getBarcodes(), oligos.getReversePrimers(), oligos.getLinkers(), oligos.getSpacers()); } TrimOligos* rtrimOligos = NULL; if (pDataArray->reorient) { - //create reoriented primer and barcode pairs - map rpairedPrimers, rpairedBarcodes; - for (map::iterator it = pDataArray->pairedPrimers.begin(); it != pDataArray->pairedPrimers.end(); it++) { - oligosPair tempPair(trimOligos->reverseOligo((it->second).reverse), (trimOligos->reverseOligo((it->second).forward))); //reversePrimer, rc ForwardPrimer - rpairedPrimers[it->first] = tempPair; - } - for (map::iterator it = pDataArray->pairedBarcodes.begin(); it != pDataArray->pairedBarcodes.end(); it++) { - oligosPair tempPair(trimOligos->reverseOligo((it->second).reverse), (trimOligos->reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode - rpairedBarcodes[it->first] = tempPair; - } - rtrimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size(); + rtrimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, oligos.getReorientedPairedPrimers(), oligos.getReorientedPairedBarcodes()); numBarcodes = oligos.getReorientedPairedBarcodes().size(); } pDataArray->count = 0; for(int i = 0; i < pDataArray->lineEnd; i++){ //end is the number of sequences to process if (pDataArray->m->control_pressed) { - delete trimOligos; + delete trimOligos; if (pDataArray->reorient) { delete rtrimOligos; } inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close(); if ((pDataArray->createGroup) && (pDataArray->countfile == "")) { outGroupsFile.close(); } if(pDataArray->qFileName != "") { qFile.close(); scrapQualFile.close(); trimQualFile.close(); } @@ -299,11 +275,14 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){ int currentSeqsDiffs = 0; Sequence currSeq(inFASTA); pDataArray->m->gobble(inFASTA); + Sequence savedSeq(currSeq.getName(), currSeq.getAligned()); - QualityScores currQual; + QualityScores currQual; QualityScores savedQual; if(pDataArray->qFileName != ""){ currQual = QualityScores(qFile); pDataArray->m->gobble(qFile); + savedQual.setName(currQual.getName()); savedQual.setScores(currQual.getScores()); } + string origSeq = currSeq.getUnaligned(); if (origSeq != "") { @@ -312,7 +291,7 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){ int barcodeIndex = 0; int primerIndex = 0; - if(pDataArray->numLinkers != 0){ + if(numLinkers != 0){ success = trimOligos->stripLinker(currSeq, currQual); if(success > pDataArray->ldiffs) { trashCode += 'k'; } else{ currentSeqsDiffs += success; } @@ -324,14 +303,14 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){ else{ currentSeqsDiffs += success; } } - if(pDataArray->numSpacers != 0){ + if(numSpacers != 0){ success = trimOligos->stripSpacer(currSeq, currQual); if(success > pDataArray->sdiffs) { trashCode += 's'; } else{ currentSeqsDiffs += success; } } - if(pDataArray->numFPrimers != 0){ + if(numFPrimers != 0){ success = trimOligos->stripForward(currSeq, currQual, primerIndex, pDataArray->keepforward); if(success > pDataArray->pdiffs) { trashCode += 'f'; } else{ currentSeqsDiffs += success; } @@ -339,7 +318,7 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){ if (currentSeqsDiffs > pDataArray->tdiffs) { trashCode += 't'; } - if(pDataArray->numRPrimers != 0){ + if(numRPrimers != 0){ success = trimOligos->stripReverse(currSeq, currQual); if(!success) { trashCode += 'r'; } } @@ -353,13 +332,13 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){ int thisPrimerIndex = 0; if(numBarcodes != 0){ - thisSuccess = rtrimOligos->stripBarcode(currSeq, currQual, thisBarcodeIndex); + thisSuccess = rtrimOligos->stripBarcode(savedSeq, savedQual, thisBarcodeIndex); if(thisSuccess > pDataArray->bdiffs) { thisTrashCode += 'b'; } else{ thisCurrentSeqsDiffs += thisSuccess; } } - if(pDataArray->numFPrimers != 0){ - thisSuccess = rtrimOligos->stripForward(currSeq, currQual, thisPrimerIndex, pDataArray->keepforward); + if(numFPrimers != 0){ + thisSuccess = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, pDataArray->keepforward); if(thisSuccess > pDataArray->pdiffs) { thisTrashCode += 'f'; } else{ thisCurrentSeqsDiffs += thisSuccess; } } @@ -372,7 +351,13 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){ currentSeqsDiffs = thisCurrentSeqsDiffs; barcodeIndex = thisBarcodeIndex; primerIndex = thisPrimerIndex; - } + savedSeq.reverseComplement(); + currSeq.setAligned(savedSeq.getAligned()); + if(pDataArray->qFileName != ""){ + savedQual.flipQScores(); + currQual.setScores(savedQual.getScores()); + } + }else { trashCode += "(" + thisTrashCode + ")"; } } @@ -407,9 +392,9 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){ int origLength = currSeq.getNumBases(); if(pDataArray->qThreshold != 0) { success = currQual.stripQualThreshold(currSeq, pDataArray->qThreshold); } - else if(pDataArray->qAverage != 0) { success = currQual.cullQualAverage(currSeq, pDataArray->qAverage); } - else if(pDataArray->qRollAverage != 0) { success = currQual.stripQualRollingAverage(currSeq, pDataArray->qRollAverage); } - else if(pDataArray->qWindowAverage != 0){ success = currQual.stripQualWindowAverage(currSeq, pDataArray->qWindowStep, pDataArray->qWindowSize, pDataArray->qWindowAverage); } + else if(pDataArray->qAverage != 0) { success = currQual.cullQualAverage(currSeq, pDataArray->qAverage, pDataArray->logtransform); } + else if(pDataArray->qRollAverage != 0) { success = currQual.stripQualRollingAverage(currSeq, pDataArray->qRollAverage, pDataArray->logtransform); } + else if(pDataArray->qWindowAverage != 0){ success = currQual.stripQualWindowAverage(currSeq, pDataArray->qWindowStep, pDataArray->qWindowSize, pDataArray->qWindowAverage, pDataArray->logtransform); } else { success = 1; } //you don't want to trim, if it fails above then scrap it @@ -455,20 +440,7 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){ if(trashCode.length() == 0){ string thisGroup = ""; - if (pDataArray->createGroup) { - if(numBarcodes != 0){ - thisGroup = pDataArray->barcodeNameVector[barcodeIndex]; - if (pDataArray->numFPrimers != 0) { - if (pDataArray->primerNameVector[primerIndex] != "") { - if(thisGroup != "") { - thisGroup += "." + pDataArray->primerNameVector[primerIndex]; - }else { - thisGroup = pDataArray->primerNameVector[primerIndex]; - } - } - } - } - } + if (pDataArray->createGroup) { thisGroup = oligos.getGroupName(barcodeIndex, primerIndex); } int pos = thisGroup.find("ignore"); if (pos == string::npos) { @@ -567,12 +539,13 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){ } //report progress - if((i) % 1000 == 0){ pDataArray->m->mothurOut(toString(i)); pDataArray->m->mothurOutEndLine(); } + if((pDataArray->count) % 1000 == 0){ pDataArray->m->mothurOut(toString(pDataArray->count)); pDataArray->m->mothurOutEndLine(); } } //report progress if((pDataArray->count) % 1000 != 0){ pDataArray->m->mothurOut(toString(pDataArray->count)); pDataArray->m->mothurOutEndLine(); } + if (pDataArray->reorient) { delete rtrimOligos; } delete trimOligos; inFASTA.close(); trimFASTAFile.close();