From a9b365d93c0cc3a515cac9a9736ea0d607315236 Mon Sep 17 00:00:00 2001 From: SarahsWork Date: Wed, 6 Mar 2013 14:45:11 -0500 Subject: [PATCH] added pacbio parameter to fastq.info. fixed issue with paired primers in trim.seqs --- parsefastaqcommand.cpp | 31 ++++++++++++++++++++++++------- parsefastaqcommand.h | 2 +- qualityscores.h | 2 +- trimseqscommand.cpp | 36 +++++++++++++++++++++++++----------- trimseqscommand.h | 15 ++++++++++----- 5 files changed, 61 insertions(+), 25 deletions(-) diff --git a/parsefastaqcommand.cpp b/parsefastaqcommand.cpp index 704e752..74e3e2b 100644 --- a/parsefastaqcommand.cpp +++ b/parsefastaqcommand.cpp @@ -16,6 +16,7 @@ vector ParseFastaQCommand::setParameters(){ CommandParameter pfastq("fastq", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pfastq); CommandParameter pfasta("fasta", "Boolean", "", "T", "", "", "","fasta",false,false); parameters.push_back(pfasta); CommandParameter pqual("qfile", "Boolean", "", "T", "", "", "","qfile",false,false); parameters.push_back(pqual); + CommandParameter ppacbio("pacbio", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ppacbio); CommandParameter pformat("format", "Multiple", "sanger-illumina-solexa-illumina1.8+", "sanger", "", "", "","",false,false,true); parameters.push_back(pformat); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); @@ -39,6 +40,7 @@ string ParseFastaQCommand::getHelpString(){ helpString += "The format parameter is used to indicate whether your sequences are sanger, solexa, illumina1.8+ or illumina, default=sanger.\n"; helpString += "The fasta parameter allows you to indicate whether you want a fasta file generated. Default=T.\n"; helpString += "The qfile parameter allows you to indicate whether you want a quality file generated. Default=T.\n"; + helpString += "The pacbio parameter allows you to indicate .... When set to true, quality scores of 0 will results in a corresponding base of N. Default=F.\n"; helpString += "Example fastq.info(fastaq=test.fastaq).\n"; helpString += "Note: No spaces between parameter labels (i.e. fastq), '=' and yourFastQFile.\n"; return helpString; @@ -132,7 +134,11 @@ ParseFastaQCommand::ParseFastaQCommand(string option){ fasta = m->isTrue(temp); temp = validParameter.validFile(parameters, "qfile", false); if(temp == "not found"){ temp = "T"; } - qual = m->isTrue(temp); + qual = m->isTrue(temp); + + temp = validParameter.validFile(parameters, "pacbio", false); if(temp == "not found"){ temp = "F"; } + pacbio = m->isTrue(temp); + format = validParameter.validFile(parameters, "format", false); if (format == "not found"){ format = "sanger"; } @@ -209,22 +215,33 @@ int ParseFastaQCommand::execute(){ if (name2 != "") { if (name != name2) { m->mothurOut("[ERROR]: names do not match. read " + name + " for fasta and " + name2 + " for quality."); m->mothurOutEndLine(); m->control_pressed = true; break; } } if (quality.length() != sequence.length()) { m->mothurOut("[ERROR]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores."); m->mothurOutEndLine(); m->control_pressed = true; break; } - //print sequence info to files - if (fasta) { outFasta << ">" << name << endl << sequence << endl; } - - if (qual) { - vector qualScores = convertQual(quality); + vector qualScores; + if (qual) { + qualScores = convertQual(quality); outQual << ">" << name << endl; for (int i = 0; i < qualScores.size(); i++) { outQual << qualScores[i] << " "; } outQual << endl; } + + if (m->control_pressed) { break; } + + if (pacbio) { + if (!qual) { qualScores = convertQual(quality); } //get scores if we didn't already + for (int i = 0; i < qualScores.size(); i++) { + if (qualScores[i] == 0){ sequence[i] = 'N'; } + } + } + + //print sequence info to files + if (fasta) { outFasta << ">" << name << endl << sequence << endl; } + } in.close(); if (fasta) { outFasta.close(); } if (qual) { outQual.close(); } - if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(fastaFile); m->mothurRemove(qualFile); return 0; } + if (m->control_pressed) { outputTypes.clear(); outputNames.clear(); m->mothurRemove(fastaFile); m->mothurRemove(qualFile); return 0; } //set fasta file as new current fastafile string current = ""; diff --git a/parsefastaqcommand.h b/parsefastaqcommand.h index cb86bd6..5ae6b9c 100644 --- a/parsefastaqcommand.h +++ b/parsefastaqcommand.h @@ -36,7 +36,7 @@ private: vector outputNames; string outputDir, fastaQFile, format; - bool abort, fasta, qual; + bool abort, fasta, qual, pacbio; vector convertQual(string); vector convertTable; diff --git a/qualityscores.h b/qualityscores.h index 8769973..500d3e9 100644 --- a/qualityscores.h +++ b/qualityscores.h @@ -38,7 +38,7 @@ public: void updateReverseMap(vector >&, int, int, int); void setName(string n); void setScores(vector qs) { qScores = qs; seqLength = qScores.size(); } - + vector getScores() { return qScores; } private: diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp index 4b90cda..935b9a9 100644 --- a/trimseqscommand.cpp +++ b/trimseqscommand.cpp @@ -686,10 +686,12 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string for (map::iterator it = pairedPrimers.begin(); it != pairedPrimers.end(); it++) { oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reversePrimer, rc ForwardPrimer rpairedPrimers[it->first] = tempPair; + //cout << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << primerNameVector[it->first] << endl; } for (map::iterator it = pairedBarcodes.begin(); it != pairedBarcodes.end(); it++) { oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode rpairedBarcodes[it->first] = tempPair; + //cout << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << barcodeNameVector[it->first] << endl; } rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size(); } @@ -712,13 +714,15 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string Sequence currSeq(inFASTA); m->gobble(inFASTA); //cout << currSeq.getName() << '\t' << currSeq.getUnaligned().length() << endl; + Sequence savedSeq(currSeq.getName(), currSeq.getAligned()); - QualityScores currQual; + QualityScores currQual; QualityScores savedQual; if(qFileName != ""){ currQual = QualityScores(qFile); m->gobble(qFile); + savedQual.setName(currQual.getName()); savedQual.setScores(currQual.getScores()); //cout << currQual.getName() << endl; } - + string origSeq = currSeq.getUnaligned(); if (origSeq != "") { @@ -747,7 +751,11 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string if(numFPrimers != 0){ success = trimOligos->stripForward(currSeq, currQual, primerIndex, keepforward); - if(success > pdiffs) { trashCode += 'f'; } + if(success > pdiffs) { + //if (pairedOligos) { trashCode += trimOligos->getTrashCode(); } + //else { trashCode += 'f'; } + trashCode += 'f'; + } else{ currentSeqsDiffs += success; } } @@ -767,17 +775,21 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string int thisPrimerIndex = 0; if(numBarcodes != 0){ - thisSuccess = rtrimOligos->stripBarcode(currSeq, currQual, thisBarcodeIndex); + thisSuccess = rtrimOligos->stripBarcode(savedSeq, savedQual, thisBarcodeIndex); if(thisSuccess > bdiffs) { thisTrashCode += 'b'; } else{ thisCurrentSeqsDiffs += thisSuccess; } } if(numFPrimers != 0){ - thisSuccess = rtrimOligos->stripForward(currSeq, currQual, thisPrimerIndex, keepforward); - if(thisSuccess > pdiffs) { thisTrashCode += 'f'; } + thisSuccess = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, keepforward); + if(thisSuccess > pdiffs) { + //if (pairedOligos) { thisTrashCode += rtrimOligos->getTrashCode(); } + //else { thisTrashCode += 'f'; } + thisTrashCode += 'f'; + } else{ thisCurrentSeqsDiffs += thisSuccess; } } - + if (thisCurrentSeqsDiffs > tdiffs) { thisTrashCode += 't'; } if (thisTrashCode == "") { @@ -786,9 +798,11 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string currentSeqsDiffs = thisCurrentSeqsDiffs; barcodeIndex = thisBarcodeIndex; primerIndex = thisPrimerIndex; - currSeq.reverseComplement(); + savedSeq.reverseComplement(); + currSeq.setAligned(savedSeq.getAligned()); if(qFileName != ""){ - currQual.flipQScores(); + savedQual.flipQScores(); + currQual.setScores(savedQual.getScores()); } } } @@ -1551,7 +1565,7 @@ bool TrimSeqsCommand::getOligos(vector >& fastaFileNames, vector< // get rest of line in case there is a primer name while (!inOligos.eof()) { char c = inOligos.get(); - if (c == 10 || c == 13){ break; } + if (c == 10 || c == 13 || c == -1){ break; } else if (c == 32 || c == 9){;} //space or tab else { group += c; } } @@ -1586,7 +1600,7 @@ bool TrimSeqsCommand::getOligos(vector >& fastaFileNames, vector< string temp = ""; while (!inOligos.eof()) { char c = inOligos.get(); - if (c == 10 || c == 13){ break; } + if (c == 10 || c == 13 || c == -1){ break; } else if (c == 32 || c == 9){;} //space or tab else { temp += c; } } diff --git a/trimseqscommand.h b/trimseqscommand.h index 882f716..de3d839 100644 --- a/trimseqscommand.h +++ b/trimseqscommand.h @@ -299,11 +299,14 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){ int currentSeqsDiffs = 0; Sequence currSeq(inFASTA); pDataArray->m->gobble(inFASTA); + Sequence savedSeq(currSeq.getName(), currSeq.getAligned()); - QualityScores currQual; + QualityScores currQual; QualityScores savedQual; if(pDataArray->qFileName != ""){ currQual = QualityScores(qFile); pDataArray->m->gobble(qFile); + savedQual.setName(currQual.getName()); savedQual.setScores(currQual.getScores()); } + string origSeq = currSeq.getUnaligned(); if (origSeq != "") { @@ -353,13 +356,13 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){ int thisPrimerIndex = 0; if(numBarcodes != 0){ - thisSuccess = rtrimOligos->stripBarcode(currSeq, currQual, thisBarcodeIndex); + thisSuccess = rtrimOligos->stripBarcode(savedSeq, savedQual, thisBarcodeIndex); if(thisSuccess > pDataArray->bdiffs) { thisTrashCode += 'b'; } else{ thisCurrentSeqsDiffs += thisSuccess; } } if(pDataArray->numFPrimers != 0){ - thisSuccess = rtrimOligos->stripForward(currSeq, currQual, thisPrimerIndex, pDataArray->keepforward); + thisSuccess = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, pDataArray->keepforward); if(thisSuccess > pDataArray->pdiffs) { thisTrashCode += 'f'; } else{ thisCurrentSeqsDiffs += thisSuccess; } } @@ -372,9 +375,11 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){ currentSeqsDiffs = thisCurrentSeqsDiffs; barcodeIndex = thisBarcodeIndex; primerIndex = thisPrimerIndex; - currSeq.reverseComplement(); + savedSeq.reverseComplement(); + currSeq.setAligned(savedSeq.getAligned()); if(pDataArray->qFileName != ""){ - currQual.flipQScores(); + savedQual.flipQScores(); + currQual.setScores(savedQual.getScores()); } } } -- 2.39.2