-#!/usr/bin/env perl
+#!/usr/bin/env ruby
-# Copyright (C) 2007-2010 Martin A. Hansen.
+# Copyright (C) 2007-2012 Martin A. Hansen.
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# http://www.gnu.org/copyleft/gpl.html
-
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-# Calculate the mean or local mean of quality SCORES in the stream.
-
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+# This program is part of the Biopieces framework (www.biopieces.org).
-use warnings;
-use strict;
-use Maasha::Biopieces;
-use Maasha::Fastq;
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+# Calculate the mean or local mean of quality SCORES in the stream.
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-my ( $options, $in, $out, $record, $mean, $pos );
-
-$options = Maasha::Biopieces::parse_options(
- [
- { long => 'local', short => 'l', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
- { long => 'window_size', short => 'w', type => 'uint', mandatory => 'no', default => 5, allowed => undef, disallowed => 0 },
- { long => 'min', short => 'm', type => 'float', mandatory => 'no', default => 15, allowed => undef, disallowed => undef },
- ]
-);
-
-$in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } );
-$out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } );
-
-while ( $record = Maasha::Biopieces::get_record( $in ) )
-{
- if ( $record->{ 'SCORES' } )
- {
- if ( $options->{ "local" } )
+require 'maasha/biopieces'
+require 'maasha/seq'
+require 'inline'
+
+# Opening class Seq to add scores_mean_local(_C) methods.
+class Seq
+ # Method to run a sliding window of a specified size across a Phred type
+ # scores string and calculate for each window the mean score and return
+ # the minimum mean score.
+ def scores_mean_local(window_size)
+ scores_mean_local_C(self.qual, self.length, Seq::SCORE_BASE, window_size)
+ end
+
+ inline do |builder|
+ builder.c %{
+ VALUE scores_mean_local_C(
+ VALUE _qual,
+ VALUE _qual_len,
+ VALUE _score_base,
+ VALUE _window_size
+ )
+ {
+ unsigned char *qual = (unsigned char *) StringValuePtr(_qual);
+ unsigned int qual_len = FIX2UINT(_qual_len);
+ unsigned int score_base = FIX2UINT(_score_base);
+ unsigned int window_size = FIX2UINT(_window_size);
+ unsigned int sum = 0;
+ unsigned int i = 0;
+ float mean = 0.0;
+
+ // fill window
+ for (i = 0; i < window_size; i++)
+ sum += qual[i] - score_base;
+
+ mean = sum / window_size;
+
+ // run window across the rest of the scores
+ while (i < qual_len)
{
- ( $mean, $pos ) = Maasha::Fastq::solexa_str_mean_window( $record->{ 'SCORES' }, $options->{ 'window_size' }, $options->{ 'min' } );
- $record->{ 'SCORES_LOCAL_POS' } = $pos;
- $record->{ 'SCORES_LOCAL_MEAN' } = sprintf( "%.2f", $mean);
- }
- else
- {
- $record->{ 'SCORES_MEAN' } = sprintf( "%.2f", Maasha::Fastq::solexa_str_mean( $record->{ 'SCORES' } ) );
- }
- }
-
- Maasha::Biopieces::put_record( $record, $out );
-}
-
-Maasha::Biopieces::close_stream( $in );
-Maasha::Biopieces::close_stream( $out );
-
-
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-
-BEGIN
-{
- Maasha::Biopieces::status_set();
-}
+ sum += qual[i] - score_base;
+ sum -= qual[i - window_size] - score_base;
+ mean = ( mean < sum / window_size ) ? mean : sum / window_size;
+
+ i++;
+ }
-END
-{
- Maasha::Biopieces::status_log();
-}
+ return rb_float_new(mean);
+ }
+ }
+ end
+end
+
+casts = []
+casts << {:long=>'local', :short=>'l', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'window_size', :short=>'w', :type=>'uint', :mandatory=>false, :default=>5, :allowed=>nil, :disallowed=>'0'}
+casts << {:long=>'min', :short=>'m', :type=>'float', :mandatory=>false, :default=>15, :allowed=>nil, :disallowed=>nil}
+
+options = Biopieces.options_parse(ARGV, casts)
+
+Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
+ input.each_record do |record|
+ if record[:SEQ] and record[:SCORES] and record[:SEQ].length > 0
+ entry = Seq.new_bp(record)
+
+ if options[:local]
+ record[:SCORES_MEAN_LOCAL] = entry.scores_mean_local(options[:window_size]).round(2)
+ else
+ record[:SCORES_MEAN] = entry.scores_mean.round(2)
+ end
+ end
+
+ output.puts record
+ end
+end
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<