X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=makecontigscommand.h;h=1ea38358c7ec4c56e2d1adb926f7767301daef9d;hp=a23d397d202ab1fd71ecb250c7c7246bcb82db3c;hb=d1c97b8c04bb75faca1e76ffad60b37a4d789d3d;hpb=eb71e28b7b7afd82540f4a8f0bac9429c5b9d713 diff --git a/makecontigscommand.h b/makecontigscommand.h index a23d397..1ea3835 100644 --- a/makecontigscommand.h +++ b/makecontigscommand.h @@ -32,9 +32,12 @@ struct fastqRead { struct pairFastqRead { fastqRead forward; fastqRead reverse; + fastqRead findex; + fastqRead rindex; pairFastqRead() {}; pairFastqRead(fastqRead f, fastqRead r) : forward(f), reverse(r){}; + pairFastqRead(fastqRead f, fastqRead r, fastqRead fi, fastqRead ri) : forward(f), reverse(r), findex(fi), rindex(ri) {}; ~pairFastqRead() {}; }; /**************************************************************************************************/ @@ -59,8 +62,8 @@ public: void help() { m->mothurOut(getHelpString()); } private: - bool abort, allFiles, createGroup; - string outputDir, ffastqfile, rfastqfile, align, oligosfile, rfastafile, ffastafile, rqualfile, fqualfile, file, format; + bool abort, allFiles, trimOverlap, createFileGroup, createOligosGroup, makeCount, noneOk; + string outputDir, ffastqfile, rfastqfile, align, oligosfile, rfastafile, ffastafile, rqualfile, fqualfile, findexfile, rindexfile, file, format; float match, misMatch, gapOpen, gapExtend; int processors, longestBase, insert, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, deltaq; vector outputNames; @@ -75,19 +78,21 @@ private: map groupCounts; map groupMap; + map file2Group; vector convertQual(string); fastqRead readFastq(ifstream&, bool&); vector< vector< vector > > preProcessData(unsigned long int&); vector< vector > readFileNames(string); - vector< vector > readFastqFiles(unsigned long int&, string, string); + vector< vector > readFastqFiles(unsigned long int&, string, string, string, string); vector< vector > readFastaFiles(unsigned long int&, string, string); //bool checkReads(fastqRead&, fastqRead&, string, string); - int createProcesses(vector< vector >, string, string, string, vector >); - int driver(vector, string, string, string, vector >, int); + int createProcesses(vector< vector >, string, string, string, vector >, int); + int driver(vector, string, string, string, vector >, int, string); bool getOligos(vector >&, string); string reverseOligo(string); - vector getReads(bool ignoref, bool ignorer, fastqRead forward, fastqRead reverse, map& uniques); + vector getReads(bool ignoref, bool ignorer, fastqRead forward, fastqRead reverse, map& uniques, bool); + vector mergeReads(vector frReads, vector friReads, map& pairUniques); }; /**************************************************************************************************/ @@ -100,13 +105,13 @@ struct contigsData { string outputFasta; string outputScrapFasta; string outputMisMatches; - string align; + string align, group; vector files; vector > fastaFileNames; MothurOut* m; float match, misMatch, gapOpen, gapExtend; int count, insert, threadID, pdiffs, bdiffs, tdiffs, deltaq; - bool allFiles, createGroup, done; + bool allFiles, createOligosGroup, createFileGroup, done, trimOverlap; map groupCounts; map groupMap; vector primerNameVector; @@ -115,7 +120,7 @@ struct contigsData { map primers; contigsData(){} - contigsData(vector f, string of, string osf, string om, string al, MothurOut* mout, float ma, float misMa, float gapO, float gapE, int thr, int delt, map br, map pr, vector > ffn, vectorbnv, vector pnv, int pdf, int bdf, int tdf, bool cg, bool all, int tid) { + contigsData(string g, vector f, string of, string osf, string om, string al, MothurOut* mout, float ma, float misMa, float gapO, float gapE, int thr, int delt, map br, map pr, vector > ffn, vectorbnv, vector pnv, int pdf, int bdf, int tdf, bool cg, bool cfg, bool all, bool to, int tid) { files = f; outputFasta = of; outputMisMatches = om; @@ -126,6 +131,7 @@ struct contigsData { gapExtend = gapE; insert = thr; align = al; + group = g; count = 0; outputScrapFasta = osf; fastaFileNames = ffn; @@ -137,7 +143,9 @@ struct contigsData { bdiffs = bdf; tdiffs = tdf; allFiles = all; - createGroup = cg; + trimOverlap = to; + createOligosGroup = cg; + createFileGroup = cfg; threadID = tid; deltaq = delt; done=false; @@ -162,8 +170,10 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){ string thisfqualfile = pDataArray->files[1]; string thisrfastafile = pDataArray->files[2]; string thisrqualfile = pDataArray->files[3]; + string thisfindexfile = pDataArray->files[4]; + string thisrindexfile = pDataArray->files[5]; - if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: ffasta = " + thisffastafile + ".\n[DEBUG]: fqual = " + thisfqualfile + ".\n[DEBUG]: rfasta = " + thisrfastafile + ".\n[DEBUG]: rqual = " + thisrqualfile + ".\n"); } + if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: ffasta = " + thisffastafile + ".\n[DEBUG]: fqual = " + thisfqualfile + ".\n[DEBUG]: rfasta = " + thisrfastafile + ".\n[DEBUG]: rqual = " + thisrqualfile + ".\n[DEBUG]: findex = " + thisfindexfile + ".\n[DEBUG]: rindex = " + thisrindexfile + ".\n"); } if(pDataArray->allFiles){ for (int i = 0; i < pDataArray->fastaFileNames.size(); i++) { //clears old file @@ -176,7 +186,7 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){ } } - ifstream inFFasta, inRFasta, inFQual, inRQual; + ifstream inFFasta, inRFasta, inFQual, inRQual, inFIndex, inRIndex; ofstream outFasta, outMisMatch, outScrapFasta; pDataArray->m->openInputFile(thisffastafile, inFFasta); pDataArray->m->openInputFile(thisrfastafile, inRFasta); @@ -184,11 +194,15 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){ pDataArray->m->openInputFile(thisfqualfile, inFQual); pDataArray->m->openInputFile(thisrqualfile, inRQual); } + + if (thisfindexfile != "") { pDataArray->m->openInputFile(thisfindexfile, inFIndex); } + if (thisrindexfile != "") { pDataArray->m->openInputFile(thisrindexfile, inRIndex); } + pDataArray->m->openOutputFile(pDataArray->outputFasta, outFasta); pDataArray->m->openOutputFile(pDataArray->outputMisMatches, outMisMatch); pDataArray->m->openOutputFile(pDataArray->outputScrapFasta, outScrapFasta); - if (pDataArray->threadID == 0) { outMisMatch << "Name\tLength\tOverlap_Length\tOverlap_Start\tOverlap_End\tMisMatches\tNum_Ns\n"; } + outMisMatch << "Name\tLength\tOverlap_Length\tOverlap_Start\tOverlap_End\tMisMatches\tNum_Ns\n"; TrimOligos trimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, pDataArray->primers, pDataArray->barcodes); @@ -209,12 +223,27 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){ rQual = new QualityScores(inRQual); pDataArray->m->gobble(inRQual); } + Sequence findexBarcode("findex", "NONE"); Sequence rindexBarcode("rindex", "NONE"); + if (thisfindexfile != "") { + Sequence temp(inFIndex); pDataArray->m->gobble(inFIndex); + findexBarcode.setAligned(temp.getAligned()); + } + + if (thisrindexfile != "") { + Sequence temp(inRIndex); pDataArray->m->gobble(inRIndex); + rindexBarcode.setAligned(temp.getAligned()); + } + int barcodeIndex = 0; int primerIndex = 0; if(pDataArray->barcodes.size() != 0){ if (thisfqualfile != "") { - success = trimOligos.stripBarcode(fSeq, rSeq, *fQual, *rQual, barcodeIndex); + if ((thisfindexfile != "") || (thisrindexfile != "")) { + success = trimOligos.stripBarcode(findexBarcode, rindexBarcode, *fQual, *rQual, barcodeIndex); + }else { + success = trimOligos.stripBarcode(fSeq, rSeq, *fQual, *rQual, barcodeIndex); + } }else { success = trimOligos.stripBarcode(fSeq, rSeq, barcodeIndex); } @@ -310,9 +339,11 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){ for (int i = overlapEnd; i < length; i++) { contig += seq1[i]; } } + if (pDataArray->trimOverlap) { contig = contig.substr(overlapStart-1, oend-oStart); if (contig.length() == 0) { trashCode += "l"; } } + if(trashCode.length() == 0){ bool ignore = false; - if (pDataArray->createGroup) { + if (pDataArray->createOligosGroup) { if(pDataArray->barcodes.size() != 0){ string thisGroup = pDataArray->barcodeNameVector[barcodeIndex]; if (pDataArray->primers.size() != 0) { @@ -336,7 +367,17 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){ else { pDataArray->groupCounts[it->first] ++; } }else { ignore = true; } } + }else if (pDataArray->createFileGroup) { + int pos = pDataArray->group.find("ignore"); + if (pos == string::npos) { + pDataArray->groupMap[fSeq.getName()] = pDataArray->group; + + map::iterator it = pDataArray->groupCounts.find(pDataArray->group); + if (it == pDataArray->groupCounts.end()) { pDataArray->groupCounts[pDataArray->group] = 1; } + else { pDataArray->groupCounts[it->first]++; } + }else { ignore = true; } } + if(pDataArray->allFiles && !ignore){ ofstream output;