From e1df26dfc188e331d236b08b66b7dfc68e35e5aa Mon Sep 17 00:00:00 2001 From: martinahansen Date: Tue, 28 Jul 2009 08:02:15 +0000 Subject: [PATCH] added quality threshold to read_fastq git-svn-id: http://biopieces.googlecode.com/svn/trunk@603 74ccb610-7750-0410-82ae-013aeee3265d --- bp_bin/read_fastq | 39 +++++++++++++++++++++++++++++++++++++-- code_perl/Maasha/Fastq.pm | 2 +- 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/bp_bin/read_fastq b/bp_bin/read_fastq index ce1db6d..8aeedf9 100755 --- a/bp_bin/read_fastq +++ b/bp_bin/read_fastq @@ -41,7 +41,8 @@ my ( $options, $in, $out, $record, $data_in, $num, $entry ); $options = Maasha::Biopieces::parse_options( [ { long => 'data_in', short => 'i', type => 'files!', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, - { long => 'num', short => 'n', type => 'uint', mandatory => 'no', default => undef, allowed => undef, disallowed => '0' }, + { long => 'num', short => 'n', type => 'uint', mandatory => 'no', default => undef, allowed => undef, disallowed => 0 }, + { long => 'quality', short => 'q', type => 'uint', mandatory => 'no', default => 20, allowed => undef, disallowed => undef }, ] ); @@ -60,7 +61,10 @@ if ( $options->{ 'data_in' } ) while ( $entry = Maasha::Fastq::get_entry( $data_in ) ) { - if ( $record = Maasha::Fastq::fastq2biopiece( $entry ) ) { + if ( $record = Maasha::Fastq::fastq2biopiece( $entry ) ) + { + lowercase_low_scores( $record, $options->{ 'quality' } ); + Maasha::Biopieces::put_record( $record, $out ); } @@ -79,6 +83,37 @@ Maasha::Biopieces::close_stream( $out ); # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +sub lowercase_low_scores +{ + # Martin A. Hansen, July 2009. + + # Lowercase residues in sequence that are below a given score threshold. + + # This one would be lovely to have written in C. + + my ( $record, # Biopiece record + $quality, # quality threshold + ) = @_; + + # Returns nothing. + + my ( @seqs, @scores, $i ); + + @seqs = split //, $record->{ 'SEQ' }; + @scores = split /;/, $record->{ 'SCORES' }; + + for ( $i = 0; $i < @scores; $i++ ) { + $seqs[ $i ] = lc $seqs[ $i ] if $scores[ $i ] < $quality; + } + + $record->{ 'SEQ' } = join "", @seqs; + $record->{ 'SCORES' } = join ";", @scores; +} + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + BEGIN { Maasha::Biopieces::status_set(); diff --git a/code_perl/Maasha/Fastq.pm b/code_perl/Maasha/Fastq.pm index 5149166..be3c87c 100644 --- a/code_perl/Maasha/Fastq.pm +++ b/code_perl/Maasha/Fastq.pm @@ -107,7 +107,7 @@ sub fastq2biopiece # Converts a FASTQ entry to a Biopiece record, where # the FASTQ quality scores are converted to numerics. - my ( $entry, # FASTQ entry, + my ( $entry, # FASTQ entry, ) = @_; # Returns a hash. -- 2.39.5