From 1a968f34ae2d2680eaf189a197d1a21b8dfd6c03 Mon Sep 17 00:00:00 2001 From: SarahsWork Date: Fri, 26 Apr 2013 09:25:37 -0400 Subject: [PATCH] added adjustDots function to pcr.seqs, keeps sequences aligned in case where primers are found at different locations. --- bayesian.cpp | 2 +- needlemanoverlap.cpp | 2 +- pcrseqscommand.h | 95 +++++++++++++++---- prcseqscommand.cpp | 212 ++++++++++++++++++++++++++++++++++++++----- 4 files changed, 267 insertions(+), 44 deletions(-) diff --git a/bayesian.cpp b/bayesian.cpp index 6eaab6f..6f65965 100644 --- a/bayesian.cpp +++ b/bayesian.cpp @@ -298,7 +298,7 @@ string Bayesian::getTaxonomy(Sequence* seq) { } } - if (queryKmers.size() == 0) { m->mothurOut(seq->getName() + "is bad."); m->mothurOutEndLine(); simpleTax = "unknown;"; return "unknown;"; } + if (queryKmers.size() == 0) { m->mothurOut(seq->getName() + " is bad. It has no kmers of length " + toString(kmerSize) + "."); m->mothurOutEndLine(); simpleTax = "unknown;"; return "unknown;"; } int index = getMostProbableTaxonomy(queryKmers); diff --git a/needlemanoverlap.cpp b/needlemanoverlap.cpp index 9334a7f..1c2ef22 100644 --- a/needlemanoverlap.cpp +++ b/needlemanoverlap.cpp @@ -154,7 +154,7 @@ void NeedlemanOverlap::alignPrimer(string A, string B){ } catch(exception& e) { - m->errorOut(e, "NeedlemanOverlap", "align"); + m->errorOut(e, "NeedlemanOverlap", "alignPrimer"); exit(1); } diff --git a/pcrseqscommand.h b/pcrseqscommand.h index 581675e..f4481ab 100644 --- a/pcrseqscommand.h +++ b/pcrseqscommand.h @@ -46,7 +46,7 @@ private: vector lines; bool getOligos(vector >&, vector >&, vector >&); - bool abort, keepprimer, keepdots; + bool abort, keepprimer, keepdots, fileAligned; string fastafile, oligosfile, taxfile, groupfile, namefile, countfile, ecolifile, outputDir, nomatch; int start, end, processors, length, pdiffs; @@ -60,10 +60,12 @@ private: int readCount(set); bool readOligos(); bool readEcoli(); - int driverPcr(string, string, string, set&, linePair); + int driverPcr(string, string, string, string, set&, linePair, int&, int&, bool&); int createProcesses(string, string, string, set&); bool isAligned(string, map&); string reverseOligo(string); + int adjustDots(string, string, int, int); + }; /**************************************************************************************************/ @@ -72,19 +74,19 @@ private: // that can be passed using a single void pointer (LPVOID). struct pcrData { string filename; - string goodFasta, badFasta, oligosfile, ecolifile, nomatch; + string goodFasta, badFasta, oligosfile, ecolifile, nomatch, locationsName; unsigned long long fstart; unsigned long long fend; - int count, start, end, length, pdiffs; + int count, start, end, length, pdiffs, pstart, pend; MothurOut* m; map primers; vector revPrimer; set badSeqNames; - bool keepprimer, keepdots; + bool keepprimer, keepdots, fileAligned, adjustNeeded; pcrData(){} - pcrData(string f, string gf, string bfn, MothurOut* mout, string ol, string ec, map pr, vector rpr, string nm, bool kp, bool kd, int st, int en, int l, int pd, unsigned long long fst, unsigned long long fen) { + pcrData(string f, string gf, string bfn, string loc, MothurOut* mout, string ol, string ec, map pr, vector rpr, string nm, bool kp, bool kd, int st, int en, int l, int pd, unsigned long long fst, unsigned long long fen) { filename = f; goodFasta = gf; badFasta = bfn; @@ -102,7 +104,12 @@ struct pcrData { fstart = fst; fend = fen; pdiffs = pd; + locationsName = loc; count = 0; + fileAligned = true; + adjustNeeded = false; + pstart = -1; + pend = -1; } }; /**************************************************************************************************/ @@ -118,6 +125,9 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){ ofstream badFile; pDataArray->m->openOutputFile(pDataArray->badFasta, badFile); + + ofstream locationsFile; + pDataArray->m->openOutputFile(pDataArray->locationsName, locationsFile); ifstream inFASTA; pDataArray->m->openInputFile(pDataArray->filename, inFASTA); @@ -132,6 +142,8 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){ set lengths; //pdiffs, bdiffs, primers, barcodes, revPrimers map faked; + vector< set > locations; //locations[0] = beginning locations, locations[1] = ending locations + locations.resize(2); TrimOligos trim(pDataArray->pdiffs, 0, pDataArray->primers, faked, pDataArray->revPrimer); for(int i = 0; i < pDataArray->fend; i++){ //end is the number of sequences to process @@ -139,8 +151,16 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){ if (pDataArray->m->control_pressed) { break; } Sequence currSeq(inFASTA); pDataArray->m->gobble(inFASTA); - + + if (pDataArray->fileAligned) { //assume aligned until proven otherwise + lengths.insert(currSeq.getAligned().length()); + if (lengths.size() > 1) { pDataArray->fileAligned = false; } + } + string trashCode = ""; + string locationsString = ""; + int thisPStart = -1; + int thisPEnd = -1; if (currSeq.getName() != "") { bool goodSeq = true; @@ -168,12 +188,24 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){ //are you aligned if (aligned) { if (!pDataArray->keepprimer) { - if (pDataArray->keepdots) { currSeq.filterToPos(mapAligned[primerEnd]); } - else { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerEnd])); } + if (pDataArray->keepdots) { currSeq.filterToPos(mapAligned[primerEnd-1]+1); } + else { + currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerEnd-1]+1)); + if (pDataArray->fileAligned) { + thisPStart = mapAligned[primerEnd-1]+1; //locations[0].insert(mapAligned[primerEnd-1]+1); + locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerEnd-1]+1) + "\n"; + } +} } else { if (pDataArray->keepdots) { currSeq.filterToPos(mapAligned[primerStart]); } - else { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart])); } + else { + currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart])); + if (pDataArray->fileAligned) { + thisPStart = mapAligned[primerStart]; //locations[0].insert(mapAligned[primerStart]); + locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerStart]) + "\n"; + } + } } /////////////////////////////////////////////////////////////// mapAligned.clear(); @@ -202,11 +234,25 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){ if (aligned) { if (!pDataArray->keepprimer) { if (pDataArray->keepdots) { currSeq.filterFromPos(mapAligned[primerStart]); } - else { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerStart])); } + else { + currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerStart])); + if (pDataArray->fileAligned) { + thisPEnd = mapAligned[primerStart]; //locations[1].insert(mapAligned[primerStart]); + locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerStart]) + "\n"; + } + + } } else { - if (pDataArray->keepdots) { currSeq.filterFromPos(mapAligned[primerEnd]); } - else { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerEnd])); } + if (pDataArray->keepdots) { currSeq.filterFromPos(mapAligned[primerEnd-1]+1); } + else { + currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerEnd-1]+1)); + if (pDataArray->fileAligned) { + thisPEnd = mapAligned[primerEnd-1]+1; //locations[1].insert(mapAligned[primerEnd-1]+1); + locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerEnd-1]+1) + "\n"; + } + + } } } else { if (!pDataArray->keepprimer) { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerStart)); } @@ -216,8 +262,7 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){ } }else if (pDataArray->ecolifile != "") { //make sure the seqs are aligned - lengths.insert(currSeq.getAligned().length()); - if (lengths.size() > 1) { pDataArray->m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); pDataArray->m->control_pressed = true; break; } + if (!pDataArray->fileAligned) { pDataArray->m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); pDataArray->m->control_pressed = true; break; } else if (currSeq.getAligned().length() != pDataArray->length) { pDataArray->m->mothurOut("[ERROR]: seqs are not the same length as ecoli seq. When using ecoli option your sequences must be aligned and the same length as the ecoli sequence.\n"); pDataArray->m->control_pressed = true; break; }else { @@ -232,8 +277,7 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){ } }else{ //using start and end to trim //make sure the seqs are aligned - lengths.insert(currSeq.getAligned().length()); - if (lengths.size() > 1) { pDataArray->m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); pDataArray->m->control_pressed = true; break; } + if (!pDataArray->fileAligned) { pDataArray->m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); pDataArray->m->control_pressed = true; break; } else { if (pDataArray->end != -1) { if (pDataArray->end > currSeq.getAligned().length()) { pDataArray->m->mothurOut("[ERROR]: end is longer than your sequence length, aborting.\n"); pDataArray->m->control_pressed = true; break; } @@ -256,7 +300,12 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){ } } - if(goodSeq == 1) { currSeq.printSequence(goodFile); } + if(goodSeq == 1) { + currSeq.printSequence(goodFile); + if (locationsString != "") { locationsFile << locationsString; } + if (thisPStart != -1) { locations[0].insert(thisPStart); } + if (thisPEnd != -1) { locations[1].insert(thisPEnd); } + } else { pDataArray->badSeqNames.insert(currSeq.getName()); currSeq.setName(currSeq.getName() + '|' + trashCode); @@ -273,9 +322,17 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){ goodFile.close(); inFASTA.close(); badFile.close(); + locationsFile.close(); + + if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: fileAligned = " + toString(pDataArray->fileAligned) +'\n'); } + + if (pDataArray->fileAligned && !pDataArray->keepdots) { //print out smallest start value and largest end value + if ((locations[0].size() > 1) || (locations[1].size() > 1)) { pDataArray->adjustNeeded = true; } + if (pDataArray->primers.size() != 0) { set::iterator it = locations[0].begin(); pDataArray->pstart = *it; } + if (pDataArray->revPrimer.size() != 0) { set::reverse_iterator it2 = locations[1].rbegin(); pDataArray->pend = *it2; } + } return 0; - } catch(exception& e) { pDataArray->m->errorOut(e, "PcrSeqsCommand", "MyPcrThreadFunction"); diff --git a/prcseqscommand.cpp b/prcseqscommand.cpp index f2ca9db..d31d687 100644 --- a/prcseqscommand.cpp +++ b/prcseqscommand.cpp @@ -312,6 +312,7 @@ int PcrSeqsCommand::execute(){ if (abort == true) { if (calledHelp) { return 0; } return 2; } int start = time(NULL); + fileAligned = true; string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); } @@ -352,8 +353,7 @@ int PcrSeqsCommand::execute(){ if (m->control_pressed) { return 0; } set badNames; - if(processors == 1) { numFastaSeqs = driverPcr(fastafile, trimSeqFile, badSeqFile, badNames, lines[0]); } - else { numFastaSeqs = createProcesses(fastafile, trimSeqFile, badSeqFile, badNames); } + numFastaSeqs = createProcesses(fastafile, trimSeqFile, badSeqFile, badNames); if (m->control_pressed) { return 0; } @@ -429,6 +429,8 @@ int PcrSeqsCommand::createProcesses(string filename, string goodFileName, string vector processIDS; int process = 1; int num = 0; + int pstart = -1; int pend = -1; + bool adjustNeeded = false; #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) @@ -440,12 +442,14 @@ int PcrSeqsCommand::createProcesses(string filename, string goodFileName, string processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later process++; }else if (pid == 0){ - num = driverPcr(filename, goodFileName + toString(getpid()) + ".temp", badFileName + toString(getpid()) + ".temp", badSeqNames, lines[process]); + string locationsFile = toString(getpid()) + ".temp"; + num = driverPcr(filename, goodFileName + toString(getpid()) + ".temp", badFileName + toString(getpid()) + ".temp", locationsFile, badSeqNames, lines[process], pstart, pend, adjustNeeded); //pass numSeqs to parent ofstream out; string tempFile = filename + toString(getpid()) + ".num.temp"; m->openOutputFile(tempFile, out); + out << pstart << '\t' << pend << '\t' << adjustNeeded << endl; out << num << '\t' << badSeqNames.size() << endl; for (set::iterator it = badSeqNames.begin(); it != badSeqNames.end(); it++) { out << (*it) << endl; @@ -460,7 +464,8 @@ int PcrSeqsCommand::createProcesses(string filename, string goodFileName, string } } - num = driverPcr(filename, goodFileName, badFileName, badSeqNames, lines[0]); + string locationsFile = toString(getpid()) + ".temp"; + num = driverPcr(filename, goodFileName, badFileName, locationsFile, badSeqNames, lines[0], pstart, pend, adjustNeeded); //force parent to wait until all the processes are done for (int i=0;iopenInputFile(tempFile, in); int numBadNames = 0; string name = ""; - if (!in.eof()) { int tempNum = 0; in >> tempNum >> numBadNames; num += tempNum; m->gobble(in); } + int tpstart = -1; int tpend = -1; bool tempAdjust = false; + + if (!in.eof()) { + in >> tpstart >> tpend >> tempAdjust; m->gobble(in); + + if (tempAdjust) { adjustNeeded = true; } + if (tpstart != -1) { + if (tpstart != pstart) { adjustNeeded = true; } + if (tpstart < pstart) { pstart = tpstart; } //smallest start + } + if (tpend != -1) { + if (tpend != pend) { adjustNeeded = true; } + if (tpend > pend) { pend = tpend; } //largest end + } + int tempNum = 0; in >> tempNum >> numBadNames; num += tempNum; m->gobble(in); + } for (int j = 0; j < numBadNames; j++) { in >> name; m->gobble(in); badSeqNames.insert(name); @@ -485,6 +505,9 @@ int PcrSeqsCommand::createProcesses(string filename, string goodFileName, string m->appendFiles((badFileName + toString(processIDS[i]) + ".temp"), badFileName); m->mothurRemove((badFileName + toString(processIDS[i]) + ".temp")); + + m->appendFiles((toString(processIDS[i]) + ".temp"), locationsFile); + m->mothurRemove((toString(processIDS[i]) + ".temp")); } #else @@ -498,6 +521,11 @@ int PcrSeqsCommand::createProcesses(string filename, string goodFileName, string DWORD dwThreadIdArray[processors-1]; HANDLE hThreadArray[processors-1]; + string locationsFile = "locationsFile.txt"; + m->mothurRemove(locationsFile); + m->mothurRemove(goodFileName); + m->mothurRemove(badFileName); + //Create processor worker threads. for( int i=0; icount != pDataArray[i]->fend) { m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->fend) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; } + if (pDataArray[i]->adjustNeeded) { adjustNeeded = true; } + if (pDataArray[i]->pstart != -1) { + if (pDataArray[i]->pstart != pstart) { adjustNeeded = true; } + if (pDataArray[i]->pstart < pstart) { pstart = pDataArray[i]->pstart; } + } //smallest start + if (pDataArray[i]->pend != -1) { + if (pDataArray[i]->pend != pend) { adjustNeeded = true; } + if (pDataArray[i]->pend > pend) { pend = pDataArray[i]->pend; } + } //largest end + for (set::iterator it = pDataArray[i]->badSeqNames.begin(); it != pDataArray[i]->badSeqNames.end(); it++) { badSeqNames.insert(*it); } CloseHandle(hThreadArray[i]); delete pDataArray[i]; @@ -536,9 +574,13 @@ int PcrSeqsCommand::createProcesses(string filename, string goodFileName, string m->appendFiles((badFileName + toString(processIDS[i]) + ".temp"), badFileName); m->mothurRemove((badFileName + toString(processIDS[i]) + ".temp")); + + m->appendFiles((locationsFile+toString(processIDS[i]) + ".temp"), locationsFile); + m->mothurRemove((locationsFile+toString(processIDS[i]) + ".temp")); } #endif + if (fileAligned && adjustNeeded) { adjustDots(goodFileName, locationsFile, pstart, pend); } return num; @@ -550,13 +592,16 @@ int PcrSeqsCommand::createProcesses(string filename, string goodFileName, string } //********************************************************************************************************************** -int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta, set& badSeqNames, linePair filePos){ +int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta, string locationsName, set& badSeqNames, linePair filePos, int& pstart, int& pend, bool& adjustNeeded){ try { ofstream goodFile; m->openOutputFile(goodFasta, goodFile); ofstream badFile; m->openOutputFile(badFasta, badFile); + + ofstream locationsFile; + m->openOutputFile(locationsName, locationsFile); ifstream inFASTA; m->openInputFile(filename, inFASTA); @@ -566,6 +611,8 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta bool done = false; int count = 0; set lengths; + vector< set > locations; //locations[0] = beginning locations, locations[1] = ending locations + locations.resize(2); //pdiffs, bdiffs, primers, barcodes, revPrimers map faked; @@ -577,7 +624,15 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta Sequence currSeq(inFASTA); m->gobble(inFASTA); + if (fileAligned) { //assume aligned until proven otherwise + lengths.insert(currSeq.getAligned().length()); + if (lengths.size() > 1) { fileAligned = false; } + } + string trashCode = ""; + string locationsString = ""; + int thisPStart = -1; + int thisPEnd = -1; if (currSeq.getName() != "") { if (m->debug) { m->mothurOut("[DEBUG]: seq name = " + currSeq.getName() + ".\n"); } @@ -596,13 +651,25 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta else{ //are you aligned if (aligned) { - if (!keepprimer) { - if (keepdots) { currSeq.filterToPos(mapAligned[primerEnd]); } - else { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerEnd])); } + if (!keepprimer) { + if (keepdots) { currSeq.filterToPos(mapAligned[primerEnd-1]+1); } //mapAligned[primerEnd-1] is the location of the last base in the primer. we want to trim to the space just after that. The -1 & +1 ensures if the primer is followed by gaps they are not trimmed causing an aligned sequence dataset to become unaligned. + else { + currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerEnd-1]+1)); + if (fileAligned) { + thisPStart = mapAligned[primerEnd-1]+1; //locations[0].insert(mapAligned[primerEnd-1]+1); + locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerEnd-1]+1) + "\n"; + } + } } else { if (keepdots) { currSeq.filterToPos(mapAligned[primerStart]); } - else { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart])); } + else { + currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart])); + if (fileAligned) { + thisPStart = mapAligned[primerStart]; //locations[0].insert(mapAligned[primerStart]); + locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerStart]) + "\n"; + } + } } isAligned(currSeq.getAligned(), mapAligned); }else { @@ -617,16 +684,28 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta int primerStart = 0; int primerEnd = 0; bool good = trim.findReverse(currSeq, primerStart, primerEnd); if(!good){ if (nomatch == "reject") { goodSeq = false; } trashCode += "r"; } - else{ - //are you aligned + else{ + //are you aligned if (aligned) { if (!keepprimer) { if (keepdots) { currSeq.filterFromPos(mapAligned[primerStart]); } - else { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerStart])); } + else { + currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerStart])); + if (fileAligned) { + thisPEnd = mapAligned[primerStart]; //locations[1].insert(mapAligned[primerStart]); + locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerStart]) + "\n"; + } + } } else { - if (keepdots) { currSeq.filterFromPos(mapAligned[primerEnd]); } - else { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerEnd])); } + if (keepdots) { currSeq.filterFromPos(mapAligned[primerEnd-1]+1); } + else { + currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerEnd-1]+1)); + if (fileAligned) { + thisPEnd = mapAligned[primerEnd-1]+1; //locations[1].insert(mapAligned[primerEnd-1]+1); + locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerEnd-1]+1) + "\n"; + } + } } } else { @@ -637,8 +716,7 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta } }else if (ecolifile != "") { //make sure the seqs are aligned - lengths.insert(currSeq.getAligned().length()); - if (lengths.size() > 1) { m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); m->control_pressed = true; break; } + if (!fileAligned) { m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); m->control_pressed = true; break; } else if (currSeq.getAligned().length() != length) { m->mothurOut("[ERROR]: seqs are not the same length as ecoli seq. When using ecoli option your sequences must be aligned and the same length as the ecoli sequence.\n"); m->control_pressed = true; break; }else { @@ -653,8 +731,7 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta } }else{ //using start and end to trim //make sure the seqs are aligned - lengths.insert(currSeq.getAligned().length()); - if (lengths.size() > 1) { m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); m->control_pressed = true; break; } + if (!fileAligned) { m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); m->control_pressed = true; break; } else { if (end != -1) { if (end > currSeq.getAligned().length()) { m->mothurOut("[ERROR]: end is longer than your sequence length, aborting.\n"); m->control_pressed = true; break; } @@ -679,7 +756,13 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta //trimming removed all bases if (currSeq.getUnaligned() == "") { goodSeq = false; } - if(goodSeq == 1) { currSeq.printSequence(goodFile); } + if(goodSeq == 1) { + currSeq.printSequence(goodFile); + if (m->debug) { m->mothurOut("[DEBUG]: " + locationsString + "\n"); } + if (locationsString != "") { locationsFile << locationsString; } + if (thisPStart != -1) { locations[0].insert(thisPStart); } + if (thisPEnd != -1) { locations[1].insert(thisPEnd); } + } else { badSeqNames.insert(currSeq.getName()); currSeq.setName(currSeq.getName() + '|' + trashCode); @@ -704,7 +787,16 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta badFile.close(); goodFile.close(); inFASTA.close(); - + locationsFile.close(); + + if (m->debug) { m->mothurOut("[DEBUG]: fileAligned = " + toString(fileAligned) +'\n'); } + + if (fileAligned && !keepdots) { //print out smallest start value and largest end value + if ((locations[0].size() > 1) || (locations[1].size() > 1)) { adjustNeeded = true; } + if (primers.size() != 0) { set::iterator it = locations[0].begin(); pstart = *it; } + if (revPrimer.size() != 0) { set::reverse_iterator it2 = locations[1].rbegin(); pend = *it2; } + } + return count; } catch(exception& e) { @@ -731,6 +823,80 @@ bool PcrSeqsCommand::isAligned(string seq, map& aligned){ exit(1); } } +//********************************************************************************************************************** +int PcrSeqsCommand::adjustDots(string goodFasta, string locations, int pstart, int pend){ + try { + ifstream inFasta; + m->openInputFile(goodFasta, inFasta); + + ifstream inLocations; + m->openInputFile(locations, inLocations); + + ofstream out; + m->openOutputFile(goodFasta+".temp", out); + + set lengths; + //cout << pstart << '\t' << pend << endl; + + while(!inFasta.eof()) { + if(m->control_pressed) { break; } + + Sequence seq(inFasta); m->gobble(inFasta); + + string name = ""; + int thisStart = -1; int thisEnd = -1; + if (primers.size() != 0) { inLocations >> name >> thisStart; m->gobble(inLocations); } + if (revPrimer.size() != 0) { inLocations >> name >> thisEnd; m->gobble(inLocations); } + //cout << seq.getName() << '\t' << thisStart << '\t' << thisEnd << '\t' << seq.getAligned().length() << endl; + //cout << seq.getName() << '\t' << pstart << '\t' << pend << endl; + + if (name != seq.getName()) { m->mothurOut("[ERROR]: name mismatch in pcr.seqs.\n"); } + else { + if (pstart != -1) { + if (thisStart != -1) { + if (thisStart != pstart) { + string dots = ""; + for (int i = pstart; i < thisStart; i++) { dots += "."; } + thisEnd += dots.length(); + dots += seq.getAligned(); + seq.setAligned(dots); + } + } + } + + if (pend != -1) { + if (thisEnd != -1) { + if (thisEnd != pend) { + string dots = seq.getAligned(); + for (int i = thisEnd; i < pend; i++) { dots += "."; } + seq.setAligned(dots); + } + } + } + lengths.insert(seq.getAligned().length()); + } + + seq.printSequence(out); + } + inFasta.close(); + inLocations.close(); + out.close(); + m->mothurRemove(locations); + m->mothurRemove(goodFasta); + m->renameFile(goodFasta+".temp", goodFasta); + + //cout << "final lengths = \n"; + //for (set::iterator it = lengths.begin(); it != lengths.end(); it++) { + // cout << *it << endl; + // } + + return 0; + } + catch(exception& e) { + m->errorOut(e, "PcrSeqsCommand", "adjustDots"); + exit(1); + } +} //********************************************************************/ string PcrSeqsCommand::reverseOligo(string oligo){ try { -- 2.39.2