X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=makecontigscommand.h;h=67e86bc4ad124a55da7271ca7685cbea3f3946a5;hp=62de6d4f23a8bfc1d602f27418fc878f0e166004;hb=fefd5ee1517abd3bc38b469cb2dffc85a1571c7e;hpb=196c22d0f93ba48e8ec54ab76608b6e3ba5e68cc diff --git a/makecontigscommand.h b/makecontigscommand.h index 62de6d4..67e86bc 100644 --- a/makecontigscommand.h +++ b/makecontigscommand.h @@ -18,6 +18,7 @@ #include "blastalign.hpp" #include "noalign.hpp" #include "trimoligos.h" +#include "oligos.h" struct fastqRead { vector scores; @@ -32,9 +33,12 @@ struct fastqRead { struct pairFastqRead { fastqRead forward; fastqRead reverse; + fastqRead findex; + fastqRead rindex; pairFastqRead() {}; pairFastqRead(fastqRead f, fastqRead r) : forward(f), reverse(r){}; + pairFastqRead(fastqRead f, fastqRead r, fastqRead fi, fastqRead ri) : forward(f), reverse(r), findex(fi), rindex(ri) {}; ~pairFastqRead() {}; }; /**************************************************************************************************/ @@ -59,18 +63,12 @@ public: void help() { m->mothurOut(getHelpString()); } private: - bool abort, allFiles, trimOverlap, createFileGroup, createOligosGroup; - string outputDir, ffastqfile, rfastqfile, align, oligosfile, rfastafile, ffastafile, rqualfile, fqualfile, file, format; + bool abort, allFiles, trimOverlap, createFileGroup, createOligosGroup, makeCount, noneOk, reorient; + string outputDir, ffastqfile, rfastqfile, align, oligosfile, rfastafile, ffastafile, rqualfile, fqualfile, findexfile, rindexfile, file, format, inputDir; float match, misMatch, gapOpen, gapExtend; - int processors, longestBase, insert, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, deltaq; + int processors, longestBase, insert, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, deltaq, numBarcodes, numFPrimers, numLinkers, numSpacers, numRPrimers; vector outputNames; - - map barcodes; - map primers; - vector linker; - vector spacer; - vector primerNameVector; - vector barcodeNameVector; + Oligos* oligos; vector convertTable; map groupCounts; @@ -81,14 +79,14 @@ private: fastqRead readFastq(ifstream&, bool&); vector< vector< vector > > preProcessData(unsigned long int&); vector< vector > readFileNames(string); - vector< vector > readFastqFiles(unsigned long int&, string, string); + vector< vector > readFastqFiles(unsigned long int&, string, string, string, string); vector< vector > readFastaFiles(unsigned long int&, string, string); //bool checkReads(fastqRead&, fastqRead&, string, string); int createProcesses(vector< vector >, string, string, string, vector >, int); int driver(vector, string, string, string, vector >, int, string); - bool getOligos(vector >&, string); - string reverseOligo(string); - vector getReads(bool ignoref, bool ignorer, fastqRead forward, fastqRead reverse, map& uniques); + bool getOligos(vector >&, string, map&); + vector getReads(bool ignoref, bool ignorer, fastqRead forward, fastqRead reverse, map& uniques, bool); + vector mergeReads(vector frReads, vector friReads, map& pairUniques); }; /**************************************************************************************************/ @@ -101,22 +99,19 @@ struct contigsData { string outputFasta; string outputScrapFasta; string outputMisMatches; - string align, group; + string align, group, oligosfile; vector files; vector > fastaFileNames; MothurOut* m; float match, misMatch, gapOpen, gapExtend; int count, insert, threadID, pdiffs, bdiffs, tdiffs, deltaq; - bool allFiles, createOligosGroup, createFileGroup, done, trimOverlap; + bool allFiles, createOligosGroup, createFileGroup, done, trimOverlap, reorient; map groupCounts; map groupMap; - vector primerNameVector; - vector barcodeNameVector; - map barcodes; - map primers; + contigsData(){} - contigsData(string g, vector f, string of, string osf, string om, string al, MothurOut* mout, float ma, float misMa, float gapO, float gapE, int thr, int delt, map br, map pr, vector > ffn, vectorbnv, vector pnv, int pdf, int bdf, int tdf, bool cg, bool cfg, bool all, bool to, int tid) { + contigsData(string g, vector f, string of, string osf, string om, string al, MothurOut* mout, float ma, float misMa, float gapO, float gapE, int thr, int delt, vector > ffn, string olig, bool ro, int pdf, int bdf, int tdf, bool cg, bool cfg, bool all, bool to, int tid) { files = f; outputFasta = of; outputMisMatches = om; @@ -131,10 +126,7 @@ struct contigsData { count = 0; outputScrapFasta = osf; fastaFileNames = ffn; - barcodes = br; - primers = pr; - barcodeNameVector = bnv; - primerNameVector = pnv; + oligosfile = olig; pdiffs = pdf; bdiffs = bdf; tdiffs = tdf; @@ -144,6 +136,7 @@ struct contigsData { createFileGroup = cfg; threadID = tid; deltaq = delt; + reorient = ro; done=false; } }; @@ -166,8 +159,10 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){ string thisfqualfile = pDataArray->files[1]; string thisrfastafile = pDataArray->files[2]; string thisrqualfile = pDataArray->files[3]; + string thisfindexfile = pDataArray->files[4]; + string thisrindexfile = pDataArray->files[5]; - if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: ffasta = " + thisffastafile + ".\n[DEBUG]: fqual = " + thisfqualfile + ".\n[DEBUG]: rfasta = " + thisrfastafile + ".\n[DEBUG]: rqual = " + thisrqualfile + ".\n"); } + if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: ffasta = " + thisffastafile + ".\n[DEBUG]: fqual = " + thisfqualfile + ".\n[DEBUG]: rfasta = " + thisrfastafile + ".\n[DEBUG]: rqual = " + thisrqualfile + ".\n[DEBUG]: findex = " + thisfindexfile + ".\n[DEBUG]: rindex = " + thisrindexfile + ".\n"); } if(pDataArray->allFiles){ for (int i = 0; i < pDataArray->fastaFileNames.size(); i++) { //clears old file @@ -180,7 +175,7 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){ } } - ifstream inFFasta, inRFasta, inFQual, inRQual; + ifstream inFFasta, inRFasta, inFQual, inRQual, inFIndex, inRIndex; ofstream outFasta, outMisMatch, outScrapFasta; pDataArray->m->openInputFile(thisffastafile, inFFasta); pDataArray->m->openInputFile(thisrfastafile, inRFasta); @@ -188,13 +183,27 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){ pDataArray->m->openInputFile(thisfqualfile, inFQual); pDataArray->m->openInputFile(thisrqualfile, inRQual); } + + if (thisfindexfile != "") { pDataArray->m->openInputFile(thisfindexfile, inFIndex); } + if (thisrindexfile != "") { pDataArray->m->openInputFile(thisrindexfile, inRIndex); } + pDataArray->m->openOutputFile(pDataArray->outputFasta, outFasta); pDataArray->m->openOutputFile(pDataArray->outputMisMatches, outMisMatch); pDataArray->m->openOutputFile(pDataArray->outputScrapFasta, outScrapFasta); outMisMatch << "Name\tLength\tOverlap_Length\tOverlap_Start\tOverlap_End\tMisMatches\tNum_Ns\n"; - TrimOligos trimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, pDataArray->primers, pDataArray->barcodes); + Oligos oligos; + if (pDataArray->oligosfile != "") { oligos.read(pDataArray->oligosfile); } + int numFPrimers = oligos.getPairedPrimers().size(); + int numBarcodes = oligos.getPairedBarcodes().size(); + + + TrimOligos trimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, oligos.getPairedPrimers(), oligos.getPairedBarcodes()); + TrimOligos* rtrimOligos = NULL; + if (pDataArray->reorient) { + rtrimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, oligos.getReorientedPairedPrimers(), oligos.getReorientedPairedBarcodes()); numBarcodes = oligos.getReorientedPairedBarcodes().size(); + } while ((!inFFasta.eof()) && (!inRFasta.eof())) { @@ -213,12 +222,34 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){ rQual = new QualityScores(inRQual); pDataArray->m->gobble(inRQual); } + Sequence findexBarcode("findex", "NONE"); Sequence rindexBarcode("rindex", "NONE"); + if (thisfindexfile != "") { + Sequence temp(inFIndex); pDataArray->m->gobble(inFIndex); + findexBarcode.setAligned(temp.getAligned()); + } + + if (thisrindexfile != "") { + Sequence temp(inRIndex); pDataArray->m->gobble(inRIndex); + rindexBarcode.setAligned(temp.getAligned()); + } + int barcodeIndex = 0; int primerIndex = 0; + Sequence savedFSeq(fSeq.getName(), fSeq.getAligned()); Sequence savedRSeq(rSeq.getName(), rSeq.getAligned()); + Sequence savedFindex(findexBarcode.getName(), findexBarcode.getAligned()); Sequence savedRIndex(rindexBarcode.getName(), rindexBarcode.getAligned()); + QualityScores* savedFQual = NULL; QualityScores* savedRQual = NULL; + if (thisfqualfile != "") { + savedFQual = new QualityScores(fQual->getName(), fQual->getQualityScores()); + savedRQual = new QualityScores(rQual->getName(), rQual->getQualityScores()); + } - if(pDataArray->barcodes.size() != 0){ + if(numBarcodes != 0){ if (thisfqualfile != "") { - success = trimOligos.stripBarcode(fSeq, rSeq, *fQual, *rQual, barcodeIndex); + if ((thisfindexfile != "") || (thisrindexfile != "")) { + success = trimOligos.stripBarcode(findexBarcode, rindexBarcode, *fQual, *rQual, barcodeIndex); + }else { + success = trimOligos.stripBarcode(fSeq, rSeq, *fQual, *rQual, barcodeIndex); + } }else { success = trimOligos.stripBarcode(fSeq, rSeq, barcodeIndex); } @@ -226,7 +257,7 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){ else{ currentSeqsDiffs += success; } } - if(pDataArray->primers.size() != 0){ + if(numFPrimers != 0){ if (thisfqualfile != "") { success = trimOligos.stripForward(fSeq, rSeq, *fQual, *rQual, primerIndex); }else { @@ -238,6 +269,57 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){ if (currentSeqsDiffs > pDataArray->tdiffs) { trashCode += 't'; } + if (pDataArray->reorient && (trashCode != "")) { //if you failed and want to check the reverse + int thisSuccess = 0; + string thisTrashCode = ""; + int thisCurrentSeqsDiffs = 0; + + int thisBarcodeIndex = 0; + int thisPrimerIndex = 0; + + if(numBarcodes != 0){ + if (thisfqualfile != "") { + if ((thisfindexfile != "") || (thisrindexfile != "")) { + thisSuccess = rtrimOligos->stripBarcode(savedFindex, savedRIndex, *savedFQual, *savedRQual, thisBarcodeIndex); + }else { + thisSuccess = rtrimOligos->stripBarcode(savedFSeq, savedRSeq, *savedFQual, *savedRQual, thisBarcodeIndex); + } + }else { + thisSuccess = rtrimOligos->stripBarcode(savedFSeq, savedRSeq, thisBarcodeIndex); + } + if(thisSuccess > pDataArray->bdiffs) { thisTrashCode += 'b'; } + else{ thisCurrentSeqsDiffs += thisSuccess; } + } + + if(numFPrimers != 0){ + if (thisfqualfile != "") { + thisSuccess = rtrimOligos->stripForward(savedFSeq, savedRSeq, *savedFQual, *savedRQual, thisPrimerIndex); + }else { + thisSuccess = rtrimOligos->stripForward(savedFSeq, savedRSeq, thisPrimerIndex); + } + if(thisSuccess > pDataArray->pdiffs) { thisTrashCode += 'f'; } + else{ thisCurrentSeqsDiffs += thisSuccess; } + } + + if (thisCurrentSeqsDiffs > pDataArray->tdiffs) { thisTrashCode += 't'; } + + if (thisTrashCode == "") { + trashCode = thisTrashCode; + success = thisSuccess; + currentSeqsDiffs = thisCurrentSeqsDiffs; + barcodeIndex = thisBarcodeIndex; + primerIndex = thisPrimerIndex; + savedFSeq.reverseComplement(); + savedRSeq.reverseComplement(); + fSeq.setAligned(savedFSeq.getAligned()); + rSeq.setAligned(savedRSeq.getAligned()); + if(thisfqualfile != ""){ + savedFQual->flipQScores(); savedRQual->flipQScores(); + fQual->setScores(savedFQual->getScores()); rQual->setScores(savedRQual->getScores()); + } + }else { trashCode += "(" + thisTrashCode + ")"; } + } + //flip the reverse reads rSeq.reverseComplement(); if (thisfqualfile != "") { rQual->flipQScores(); } @@ -259,7 +341,7 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){ if (thisfqualfile != "") { scores1 = fQual->getQualityScores(); scores2 = rQual->getQualityScores(); - delete fQual; delete rQual; + delete fQual; delete rQual; delete savedFQual; delete savedRQual; } int overlapStart = fSeq.getStartPos(); @@ -319,29 +401,17 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){ if(trashCode.length() == 0){ bool ignore = false; if (pDataArray->createOligosGroup) { - if(pDataArray->barcodes.size() != 0){ - string thisGroup = pDataArray->barcodeNameVector[barcodeIndex]; - if (pDataArray->primers.size() != 0) { - if (pDataArray->primerNameVector[primerIndex] != "") { - if(thisGroup != "") { - thisGroup += "." + pDataArray->primerNameVector[primerIndex]; - }else { - thisGroup = pDataArray->primerNameVector[primerIndex]; - } - } - } - - if (pDataArray->m->debug) { pDataArray->m->mothurOut(", group= " + thisGroup + "\n"); } - - int pos = thisGroup.find("ignore"); - if (pos == string::npos) { - pDataArray->groupMap[fSeq.getName()] = thisGroup; + string thisGroup = oligos.getGroupName(barcodeIndex, primerIndex); + if (pDataArray->m->debug) { pDataArray->m->mothurOut(", group= " + thisGroup + "\n"); } + + int pos = thisGroup.find("ignore"); + if (pos == string::npos) { + pDataArray->groupMap[fSeq.getName()] = thisGroup; - map::iterator it = pDataArray->groupCounts.find(thisGroup); - if (it == pDataArray->groupCounts.end()) { pDataArray->groupCounts[thisGroup] = 1; } - else { pDataArray->groupCounts[it->first] ++; } - }else { ignore = true; } - } + map::iterator it = pDataArray->groupCounts.find(thisGroup); + if (it == pDataArray->groupCounts.end()) { pDataArray->groupCounts[thisGroup] = 1; } + else { pDataArray->groupCounts[it->first] ++; } + }else { ignore = true; } }else if (pDataArray->createFileGroup) { int pos = pDataArray->group.find("ignore"); if (pos == string::npos) { @@ -389,6 +459,7 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){ inRQual.close(); } delete alignment; + if (pDataArray->reorient) { delete rtrimOligos; } pDataArray->done = true; if (pDataArray->m->control_pressed) { pDataArray->m->mothurRemove(pDataArray->outputFasta); pDataArray->m->mothurRemove(pDataArray->outputMisMatches); pDataArray->m->mothurRemove(pDataArray->outputScrapFasta); }