X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=pcrseqscommand.h;h=ba9062437d78df8aa337bc5948b603b1acf8e351;hp=9fc40419411ff2535f01eb150699b30227225a3b;hb=cf9987b67aa49777a4c91c2d21f96e58bf17aa82;hpb=1a5c2356c1b955c6ec024b2baf9f46377ee7c72e diff --git a/pcrseqscommand.h b/pcrseqscommand.h index 9fc4041..ba90624 100644 --- a/pcrseqscommand.h +++ b/pcrseqscommand.h @@ -46,12 +46,12 @@ private: vector lines; bool getOligos(vector >&, vector >&, vector >&); - bool abort, keepprimer, keepdots; + bool abort, keepprimer, keepdots, fileAligned; string fastafile, oligosfile, taxfile, groupfile, namefile, countfile, ecolifile, outputDir, nomatch; - int start, end, processors, length; + int start, end, processors, length, pdiffs; vector revPrimer, outputNames; - vector primers; + map primers; int writeAccnos(set); int readName(set&); @@ -60,13 +60,12 @@ private: int readCount(set); bool readOligos(); bool readEcoli(); - int driverPcr(string, string, string, set&, linePair); + int driverPcr(string, string, string, string, set&, linePair, int&, bool&); int createProcesses(string, string, string, set&); - bool findForward(Sequence&, int&, int&); - bool findReverse(Sequence&, int&, int&); bool isAligned(string, map&); - bool compareDNASeq(string, string); string reverseOligo(string); + int adjustDots(string, string, int, int); + }; /**************************************************************************************************/ @@ -75,19 +74,19 @@ private: // that can be passed using a single void pointer (LPVOID). struct pcrData { string filename; - string goodFasta, badFasta, oligosfile, ecolifile, nomatch; + string goodFasta, badFasta, oligosfile, ecolifile, nomatch, locationsName; unsigned long long fstart; unsigned long long fend; - int count, start, end, length; + int count, start, end, length, pdiffs, pstart, pend; MothurOut* m; - vector primers; + map primers; vector revPrimer; set badSeqNames; - bool keepprimer, keepdots; + bool keepprimer, keepdots, fileAligned, adjustNeeded; pcrData(){} - pcrData(string f, string gf, string bfn, MothurOut* mout, string ol, string ec, vector pr, vector rpr, string nm, bool kp, bool kd, int st, int en, int l, unsigned long long fst, unsigned long long fen) { + pcrData(string f, string gf, string bfn, string loc, MothurOut* mout, string ol, string ec, map pr, vector rpr, string nm, bool kp, bool kd, int st, int en, int l, int pd, unsigned long long fst, unsigned long long fen) { filename = f; goodFasta = gf; badFasta = bfn; @@ -99,12 +98,18 @@ struct pcrData { nomatch = nm; keepprimer = kp; keepdots = kd; + end = en; start = st; - end = en; length = l; fstart = fst; fend = fen; + pdiffs = pd; + locationsName = loc; count = 0; + fileAligned = true; + adjustNeeded = false; + pstart = -1; + pend = -1; } }; /**************************************************************************************************/ @@ -120,6 +125,9 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){ ofstream badFile; pDataArray->m->openOutputFile(pDataArray->badFasta, badFile); + + ofstream locationsFile; + pDataArray->m->openOutputFile(pDataArray->locationsName, locationsFile); ifstream inFASTA; pDataArray->m->openInputFile(pDataArray->filename, inFASTA); @@ -132,14 +140,27 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){ } set lengths; - pDataArray->count = pDataArray->fend; + //pdiffs, bdiffs, primers, barcodes, revPrimers + map faked; + set locations; //locations = beginning locations + + TrimOligos trim(pDataArray->pdiffs, 0, pDataArray->primers, faked, pDataArray->revPrimer); + for(int i = 0; i < pDataArray->fend; i++){ //end is the number of sequences to process - + pDataArray->count++; if (pDataArray->m->control_pressed) { break; } Sequence currSeq(inFASTA); pDataArray->m->gobble(inFASTA); - + + if (pDataArray->fileAligned) { //assume aligned until proven otherwise + lengths.insert(currSeq.getAligned().length()); + if (lengths.size() > 1) { pDataArray->fileAligned = false; } + } + string trashCode = ""; + string locationsString = ""; + int thisPStart = -1; + int thisPEnd = -1; if (currSeq.getName() != "") { bool goodSeq = true; @@ -160,75 +181,41 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){ //process primers if (pDataArray->primers.size() != 0) { int primerStart = 0; int primerEnd = 0; - //bool good = findForward(currSeq, primerStart, primerEnd); - /////////////////////////////////////////////////////////////// - bool good = false; - string rawSequence = currSeq.getUnaligned(); - - for(int j=0;jprimers.size();j++){ - string oligo = pDataArray->primers[j]; - - if (pDataArray->m->control_pressed) { primerStart = 0; primerEnd = 0; good = false; break; } - - if(rawSequence.length() < oligo.length()) { break; } - - //search for primer - int olength = oligo.length(); - for (int l = 0; l < rawSequence.length()-olength; l++){ - if (pDataArray->m->control_pressed) { primerStart = 0; primerEnd = 0; good = false; break; } - string rawChunk = rawSequence.substr(l, olength); - //compareDNASeq(oligo, rawChunk) - //////////////////////////////////////////////////////// - bool success = 1; - for(int k=0;knomatch == "reject") { goodSeq = false; } trashCode += "f"; } else{ //are you aligned if (aligned) { if (!pDataArray->keepprimer) { - if (pDataArray->keepdots) { currSeq.filterToPos(mapAligned[primerEnd]); } - else { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerEnd])); } + if (pDataArray->keepdots) { currSeq.filterToPos(mapAligned[primerEnd-1]+1); } + else { + currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerEnd-1]+1)); + if (pDataArray->fileAligned) { + thisPStart = mapAligned[primerEnd-1]+1; //locations.insert(mapAligned[primerEnd-1]+1); + locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerEnd-1]+1) + "\n"; + } +} } else { if (pDataArray->keepdots) { currSeq.filterToPos(mapAligned[primerStart]); } - else { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart])); } + else { + currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart])); + if (pDataArray->fileAligned) { + thisPStart = mapAligned[primerStart]; //locations.insert(mapAligned[primerStart]); + locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerStart]) + "\n"; + } + } } + /////////////////////////////////////////////////////////////// + mapAligned.clear(); + string seq = currSeq.getAligned(); + int countBases = 0; + for (int k = 0; k < seq.length(); k++) { + if (!isalpha(seq[k])) { ; } + else { mapAligned[countBases] = k; countBases++; } + } + /////////////////////////////////////////////////////////////// }else { if (!pDataArray->keepprimer) { currSeq.setAligned(currSeq.getUnaligned().substr(primerEnd)); } else { currSeq.setAligned(currSeq.getUnaligned().substr(primerStart)); } @@ -239,71 +226,33 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){ //process reverse primers if (pDataArray->revPrimer.size() != 0) { int primerStart = 0; int primerEnd = 0; - bool good = false; - //findReverse(currSeq, primerStart, primerEnd); - /////////////////////////////////////////////////////////////// - string rawSequence = currSeq.getUnaligned(); - - for(int j=0;jrevPrimer.size();j++){ - string oligo = pDataArray->revPrimer[j]; - if (pDataArray->m->control_pressed) { primerStart = 0; primerEnd = 0; good = false; break; } - if(rawSequence.length() < oligo.length()) { break; } - - //search for primer - int olength = oligo.length(); - for (int l = rawSequence.length()-olength; l >= 0; l--){ - - string rawChunk = rawSequence.substr(l, olength); - //compareDNASeq(oligo, rawChunk) - //////////////////////////////////////////////////////// - bool success = 1; - for(int k=0;knomatch == "reject") { goodSeq = false; } trashCode += "r"; } else{ //are you aligned if (aligned) { if (!pDataArray->keepprimer) { if (pDataArray->keepdots) { currSeq.filterFromPos(mapAligned[primerStart]); } - else { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerStart])); } + else { + currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerStart])); + if (pDataArray->fileAligned) { + thisPEnd = mapAligned[primerStart]; //locations.insert(mapAligned[primerStart]); + locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerStart]) + "\n"; + } + + } } else { - if (pDataArray->keepdots) { currSeq.filterFromPos(mapAligned[primerEnd]); } - else { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerEnd])); } + if (pDataArray->keepdots) { currSeq.filterFromPos(mapAligned[primerEnd-1]+1); } + else { + currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerEnd-1]+1)); + if (pDataArray->fileAligned) { + thisPEnd = mapAligned[primerEnd-1]+1; //locations.insert(mapAligned[primerEnd-1]+1); + locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerEnd-1]+1) + "\n"; + } + + } } } else { if (!pDataArray->keepprimer) { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerStart)); } @@ -313,8 +262,7 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){ } }else if (pDataArray->ecolifile != "") { //make sure the seqs are aligned - lengths.insert(currSeq.getAligned().length()); - if (lengths.size() > 1) { pDataArray->m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); pDataArray->m->control_pressed = true; break; } + if (!pDataArray->fileAligned) { pDataArray->m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); pDataArray->m->control_pressed = true; break; } else if (currSeq.getAligned().length() != pDataArray->length) { pDataArray->m->mothurOut("[ERROR]: seqs are not the same length as ecoli seq. When using ecoli option your sequences must be aligned and the same length as the ecoli sequence.\n"); pDataArray->m->control_pressed = true; break; }else { @@ -329,8 +277,7 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){ } }else{ //using start and end to trim //make sure the seqs are aligned - lengths.insert(currSeq.getAligned().length()); - if (lengths.size() > 1) { pDataArray->m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); pDataArray->m->control_pressed = true; break; } + if (!pDataArray->fileAligned) { pDataArray->m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); pDataArray->m->control_pressed = true; break; } else { if (pDataArray->end != -1) { if (pDataArray->end > currSeq.getAligned().length()) { pDataArray->m->mothurOut("[ERROR]: end is longer than your sequence length, aborting.\n"); pDataArray->m->control_pressed = true; break; } @@ -353,7 +300,11 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){ } } - if(goodSeq == 1) { currSeq.printSequence(goodFile); } + if(goodSeq == 1) { + currSeq.printSequence(goodFile); + if (locationsString != "") { locationsFile << locationsString; } + if (thisPStart != -1) { locations.insert(thisPStart); } + } else { pDataArray->badSeqNames.insert(currSeq.getName()); currSeq.setName(currSeq.getName() + '|' + trashCode); @@ -362,17 +313,24 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){ } //report progress - if((i+1) % 100 == 0){ pDataArray->m->mothurOut("Processing sequence: " + toString(i+1)); pDataArray->m->mothurOutEndLine(); } + if((i+1) % 100 == 0){ pDataArray->m->mothurOutJustToScreen("Processing sequence: " + toString(i+1)+"\n"); } } //report progress - if((pDataArray->count) % 100 != 0){ pDataArray->m->mothurOut("Thread Processing sequence: " + toString(pDataArray->count)); pDataArray->m->mothurOutEndLine(); } + if((pDataArray->count) % 100 != 0){ pDataArray->m->mothurOutJustToScreen("Thread Processing sequence: " + toString(pDataArray->count)+"\n"); } goodFile.close(); inFASTA.close(); badFile.close(); + locationsFile.close(); + + if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: fileAligned = " + toString(pDataArray->fileAligned) +'\n'); } + + if (pDataArray->fileAligned && !pDataArray->keepdots) { //print out smallest start value and largest end value + if (locations.size() > 1) { pDataArray->adjustNeeded = true; } + if (pDataArray->primers.size() != 0) { set::iterator it = locations.begin(); pDataArray->pstart = *it; } + } return 0; - } catch(exception& e) { pDataArray->m->errorOut(e, "PcrSeqsCommand", "MyPcrThreadFunction");