From 21784ce4eb9b292db6f6191ed504f5e20754c810 Mon Sep 17 00:00:00 2001 From: pschloss Date: Mon, 8 Jun 2009 14:34:08 +0000 Subject: [PATCH] added some features to trim.seqs --- Mothur.xcodeproj/project.pbxproj | 8 +-- trimseqscommand.cpp | 87 +++++++++++++++++++++++++++----- trimseqscommand.h | 9 ++-- validparameter.cpp | 2 +- 4 files changed, 83 insertions(+), 23 deletions(-) diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index 6ea76d7..f6eeacb 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -463,10 +463,10 @@ 37E5F3E20F29FD4200F8D827 /* treenode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = treenode.cpp; sourceTree = SOURCE_ROOT; }; 37E5F4900F2A3DA800F8D827 /* readtreecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = readtreecommand.h; sourceTree = SOURCE_ROOT; }; 37E5F4910F2A3DA800F8D827 /* readtreecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = readtreecommand.cpp; sourceTree = SOURCE_ROOT; }; - 7E09C5120FDA79C5002ECAE5 /* reversecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = reversecommand.h; sourceTree = ""; }; - 7E09C5130FDA79C5002ECAE5 /* reversecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = reversecommand.cpp; sourceTree = ""; }; - 7E09C5340FDA7F65002ECAE5 /* trimseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = trimseqscommand.h; sourceTree = ""; }; - 7E09C5350FDA7F65002ECAE5 /* trimseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = trimseqscommand.cpp; sourceTree = ""; }; + 7E09C5120FDA79C5002ECAE5 /* reversecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = reversecommand.h; sourceTree = SOURCE_ROOT; }; + 7E09C5130FDA79C5002ECAE5 /* reversecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = reversecommand.cpp; sourceTree = SOURCE_ROOT; }; + 7E09C5340FDA7F65002ECAE5 /* trimseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = trimseqscommand.h; sourceTree = SOURCE_ROOT; }; + 7E09C5350FDA7F65002ECAE5 /* trimseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = trimseqscommand.cpp; sourceTree = SOURCE_ROOT; }; 7E412F420F8D213C00381DD0 /* libshuff.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = libshuff.h; sourceTree = SOURCE_ROOT; }; 7E412F470F8D21B600381DD0 /* slibshuff.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = slibshuff.h; sourceTree = SOURCE_ROOT; }; 7E412F480F8D21B600381DD0 /* slibshuff.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = slibshuff.cpp; sourceTree = SOURCE_ROOT; }; diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp index 95a85ac..259adc1 100644 --- a/trimseqscommand.cpp +++ b/trimseqscommand.cpp @@ -14,18 +14,30 @@ TrimSeqsCommand::TrimSeqsCommand(){ try { - oligos = 0; - totalBarcodeCount = 0; - matchBarcodeCount = 0; - globaldata = GlobalData::getInstance(); + + oligos = 0; + if(globaldata->getFastaFile() == ""){ cout << "you need to at least enter a fasta file name" << endl; } - if(isTrue(globaldata->getFlip())) { flip = 1; } - if(globaldata->getOligosFile() != "") { oligos = 1; } - if(!flip && !oligos) { cout << "huh?" << endl; } + if(isTrue(globaldata->getFlip())) { flip = 1; } + if(globaldata->getOligosFile() != "") { oligos = 1; } + + if(globaldata->getMaxAmbig() != "-1") { maxAmbig = atoi(globaldata->getMaxAmbig().c_str()); } + else { maxAmbig = -1; } + + if(globaldata->getMaxHomoPolymer() != "-1") { maxHomoP = atoi(globaldata->getMaxHomoPolymer().c_str()); } + else { maxHomoP = 0; } + + if(globaldata->getMinLength() != "-1") { minLength = atoi(globaldata->getMinLength().c_str()); } + else { minLength = 0; } + + if(globaldata->getMaxLength() != "-1") { maxLength = atoi(globaldata->getMaxLength().c_str()); } + else { maxLength = 0; } + + if(!flip && !oligos && !maxLength && !minLength && (maxAmbig==-1) && !maxHomoP ){ cout << "huh?" << endl; } } catch(exception& e) { @@ -83,6 +95,19 @@ int TrimSeqsCommand::execute(){ success = stripReverse(currSeq); if(!success){ trashCode += 'r'; } } + if(minLength > 0 || maxLength > 0){ + success = cullLength(currSeq); + if(!success){ trashCode += 'l'; } + } + if(maxHomoP > 0){ + success = cullHomoP(currSeq); + if(!success){ trashCode += 'h'; } + } + if(maxAmbig != -1){ + success = cullAmbigs(currSeq); + if(!success){ trashCode += 'n'; } + } + if(flip){ currSeq.reverseComplement(); } // should go last if(trashCode.length() == 0){ @@ -168,12 +193,10 @@ bool TrimSeqsCommand::stripBarcode(Sequence& seq, string& group){ if (rawSequence.compare(0,oligo.length(),oligo) == 0){ group = it->second; seq.setUnaligned(rawSequence.substr(oligo.length())); - matchBarcodeCount++; success = 1; break; } } - totalBarcodeCount++; return success; } @@ -195,13 +218,11 @@ bool TrimSeqsCommand::stripForward(Sequence& seq){ if (rawSequence.compare(0,oligo.length(),oligo) == 0){ seq.setUnaligned(rawSequence.substr(oligo.length())); - matchFPrimerCount++; success = 1; break; } } - totalFPrimerCount++; return success; } @@ -223,13 +244,53 @@ bool TrimSeqsCommand::stripReverse(Sequence& seq){ if(rawSequence.compare(rawSequence.length()-oligo.length(),oligo.length(),oligo) == 0){ seq.setUnaligned(rawSequence.substr(rawSequence.length()-oligo.length())); - matchRPrimerCount++; success = 1; break; } } + return success; + +} + +//*************************************************************************************************************** + +bool TrimSeqsCommand::cullLength(Sequence& seq){ + + int length = seq.getNumBases(); + bool success = 0; //guilty until proven innocent + + if(length >= minLength && maxLength == 0) { success = 1; } + else if(length >= minLength && length <= maxLength) { success = 1; } + else { success = 0; } + + return success; + +} + +//*************************************************************************************************************** + +bool TrimSeqsCommand::cullHomoP(Sequence& seq){ + + int longHomoP = seq.getLongHomoPolymer(); + bool success = 0; //guilty until proven innocent + + if(longHomoP <= maxHomoP){ success = 1; } + else { success = 0; } + + return success; + +} + +//*************************************************************************************************************** + +bool TrimSeqsCommand::cullAmbigs(Sequence& seq){ + + int numNs = seq.getAmbigBases(); + bool success = 0; //guilty until proven innocent + + if(numNs <= maxAmbig){ success = 1; } + else { success = 0; } - totalRPrimerCount++; return success; } diff --git a/trimseqscommand.h b/trimseqscommand.h index 565d528..800ec3d 100644 --- a/trimseqscommand.h +++ b/trimseqscommand.h @@ -26,15 +26,14 @@ private: bool stripBarcode(Sequence&, string&); bool stripForward(Sequence&); bool stripReverse(Sequence&); + bool cullLength(Sequence&); + bool cullHomoP(Sequence&); + bool cullAmbigs(Sequence&); GlobalData* globaldata; - int totalBarcodeCount, matchBarcodeCount; // to be removed - int totalFPrimerCount, matchFPrimerCount; // to be removed - int totalRPrimerCount, matchRPrimerCount; // to be removed - bool oligos, flip; - int numFPrimers, numRPrimers; + int numFPrimers, numRPrimers, maxAmbig, maxHomoP, minLength, maxLength; vector forPrimer, revPrimer; map barcodes; }; diff --git a/validparameter.cpp b/validparameter.cpp index 4845425..ae5fc3f 100644 --- a/validparameter.cpp +++ b/validparameter.cpp @@ -285,7 +285,7 @@ void ValidParameters::initCommandParameters() { string reverseseqsArray[] = {"fasta"}; commandParameters["reverse.seqs"] = addParameters(reverseseqsArray, sizeof(reverseseqsArray)/sizeof(string)); - string trimseqsArray[] = {"fasta", "flip", "oligos"}; + string trimseqsArray[] = {"fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength"}; commandParameters["trim.seqs"] = addParameters(trimseqsArray, sizeof(trimseqsArray)/sizeof(string)); string vennArray[] = {"groups","line","label","calc"}; -- 2.39.2