From: martinahansen Date: Tue, 4 Aug 2009 15:20:57 +0000 (+0000) Subject: added a skip_scores switch to read_solexa and read_fastq X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=e6d927577f0e4b46ccd4f978f6c5dcbf334c25aa;p=biopieces.git added a skip_scores switch to read_solexa and read_fastq git-svn-id: http://biopieces.googlecode.com/svn/trunk@624 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/bp_bin/read_fastq b/bp_bin/read_fastq index 088f8d6..df2ad6e 100755 --- a/bp_bin/read_fastq +++ b/bp_bin/read_fastq @@ -40,9 +40,10 @@ my ( $options, $in, $out, $record, $data_in, $num, $entry ); $options = Maasha::Biopieces::parse_options( [ - { long => 'data_in', short => 'i', type => 'files!', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, - { long => 'num', short => 'n', type => 'uint', mandatory => 'no', default => undef, allowed => undef, disallowed => 0 }, - { long => 'quality', short => 'q', type => 'uint', mandatory => 'no', default => 20, allowed => undef, disallowed => undef }, + { long => 'data_in', short => 'i', type => 'files!', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'num', short => 'n', type => 'uint', mandatory => 'no', default => undef, allowed => undef, disallowed => 0 }, + { long => 'quality', short => 'q', type => 'uint', mandatory => 'no', default => 20, allowed => undef, disallowed => undef }, + { long => 'skip_scores', short => 's', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, ] ); @@ -63,10 +64,13 @@ if ( $options->{ 'data_in' } ) { if ( $record = Maasha::Fastq::fastq2biopiece( $entry ) ) { - Maasha::Fastq::lowercase_low_scores( $record->{ 'SEQ' }, $record->{ 'SCORES' }, $options->{ 'quality' } ); + if ( not $options->{ 'skip_scores' } ) + { + Maasha::Fastq::lowercase_low_scores( $record->{ 'SEQ' }, $record->{ 'SCORES' }, $options->{ 'quality' } ); - $record->{ 'SCORES' } =~ s/(.)/ord( $1 ) - 33 . ";"/ge; # http://maq.sourceforge.net/fastq.shtml - $record->{ 'SCORE_MEAN' } = sprintf( "%.2f", Maasha::Calc::mean( [ split /;/, $record->{ 'SCORES' } ] ) ); + $record->{ 'SCORES' } =~ s/(.)/ord( $1 ) - 33 . ";"/ge; # http://maq.sourceforge.net/fastq.shtml + $record->{ 'SCORE_MEAN' } = sprintf( "%.2f", Maasha::Calc::mean( [ split /;/, $record->{ 'SCORES' } ] ) ); + } Maasha::Biopieces::put_record( $record, $out ); } diff --git a/bp_bin/read_solexa b/bp_bin/read_solexa index 9bb251a..b1a84a4 100755 --- a/bp_bin/read_solexa +++ b/bp_bin/read_solexa @@ -42,10 +42,11 @@ my ( $options, $in, $out, $record, $data_in, $num, $entry, @seqs, @scores, $i ); $options = Maasha::Biopieces::parse_options( [ - { long => 'data_in', short => 'i', type => 'files!', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, - { long => 'num', short => 'n', type => 'uint', mandatory => 'no', default => undef, allowed => undef, disallowed => '0' }, - { long => 'format', short => 'f', type => 'string', mandatory => 'no', default => 'octal', allowed => 'octal,decimal', disallowed => undef }, - { long => 'quality', short => 'q', type => 'uint', mandatory => 'no', default => 20, allowed => undef, disallowed => undef }, + { long => 'data_in', short => 'i', type => 'files!', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'num', short => 'n', type => 'uint', mandatory => 'no', default => undef, allowed => undef, disallowed => '0' }, + { long => 'format', short => 'f', type => 'string', mandatory => 'no', default => 'octal', allowed => 'octal,decimal', disallowed => undef }, + { long => 'quality', short => 'q', type => 'uint', mandatory => 'no', default => 20, allowed => undef, disallowed => undef }, + { long => 'skip_scores', short => 's', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, ] ); @@ -66,12 +67,15 @@ if ( $options->{ 'data_in' } ) { if ( $record = Maasha::Fastq::fastq2biopiece( $entry ) ) { - $record->{ 'SCORES' } =~ s/(\d+) ?/Maasha::Fastq::score2phred( $1 )/ge if $options->{ 'format' } eq 'decimal'; - Maasha::Fastq::solexa2phred( $record->{ 'SCORES' } ); - Maasha::Fastq::lowercase_low_scores( $record->{ 'SEQ' }, $record->{ 'SCORES' }, $options->{ 'quality' } ); - - $record->{ 'SCORES' } =~ s/(.)/ord( $1 ) - 33 . ";"/ge; # http://maq.sourceforge.net/fastq.shtml - $record->{ 'SCORE_MEAN' } = sprintf( "%.2f", Maasha::Calc::mean( [ split /;/, $record->{ 'SCORES' } ] ) ); + if ( not $options->{ 'skip_scores' } ) + { + $record->{ 'SCORES' } =~ s/(\d+) ?/Maasha::Fastq::score2phred( $1 )/ge if $options->{ 'format' } eq 'decimal'; + Maasha::Fastq::solexa2phred( $record->{ 'SCORES' } ); + Maasha::Fastq::lowercase_low_scores( $record->{ 'SEQ' }, $record->{ 'SCORES' }, $options->{ 'quality' } ); + + $record->{ 'SCORES' } =~ s/(.)/ord( $1 ) - 33 . ";"/ge; # http://maq.sourceforge.net/fastq.shtml + $record->{ 'SCORE_MEAN' } = sprintf( "%.2f", Maasha::Calc::mean( [ split /;/, $record->{ 'SCORES' } ] ) ); + } Maasha::Biopieces::put_record( $record, $out ); } diff --git a/code_perl/Maasha/Fastq.pm b/code_perl/Maasha/Fastq.pm index 4782486..38ce471 100644 --- a/code_perl/Maasha/Fastq.pm +++ b/code_perl/Maasha/Fastq.pm @@ -220,6 +220,7 @@ sub fastq2biopiece $record->{ 'SEQ' } = $entry->[ SEQ ]; $record->{ 'SEQ_NAME' } = $entry->[ SEQ_NAME ]; $record->{ 'SCORES' } = $entry->[ SCORES ]; + $record->{ 'SEQ_LEN' } = length $entry->[ SEQ ]; return wantarray ? %{ $record } : $record; }