X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bp_bin%2Fmean_scores;h=ed9ab8c409e82e24d1e30c5e053afebe4674a651;hb=af282a65d141826c15944437b07a0353dd14e79c;hp=bd47e5b8223faa76793c026f6982a50a048c16b6;hpb=32c672cb89e2046c08a0f65b7df1fe19f2f9bcd2;p=biopieces.git diff --git a/bp_bin/mean_scores b/bp_bin/mean_scores index bd47e5b..ed9ab8c 100755 --- a/bp_bin/mean_scores +++ b/bp_bin/mean_scores @@ -1,6 +1,6 @@ -#!/usr/bin/env perl +#!/usr/bin/env ruby -# Copyright (C) 2007-2010 Martin A. Hansen. +# Copyright (C) 2007-2012 Martin A. Hansen. # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -18,72 +18,91 @@ # http://www.gnu.org/copyleft/gpl.html - -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - -# Calculate the mean or local mean of quality SCORES in the stream. - # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +# This program is part of the Biopieces framework (www.biopieces.org). -use warnings; -use strict; -use Maasha::Biopieces; -use Maasha::Fastq; +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +# Calculate the mean or local mean of quality SCORES in the stream. # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - -my ( $options, $in, $out, $record, $mean, $pos ); - -$options = Maasha::Biopieces::parse_options( - [ - { long => 'local', short => 'l', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, - { long => 'window_size', short => 'w', type => 'uint', mandatory => 'no', default => 5, allowed => undef, disallowed => 0 }, - { long => 'min', short => 'm', type => 'float', mandatory => 'no', default => 15, allowed => undef, disallowed => undef }, - ] -); - -$in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } ); -$out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } ); - -while ( $record = Maasha::Biopieces::get_record( $in ) ) -{ - if ( $record->{ 'SCORES' } ) - { - if ( $options->{ "local" } ) +require 'maasha/biopieces' +require 'maasha/seq' +require 'inline' + +# Opening class Seq to add scores_mean_local(_C) methods. +class Seq + # Method to run a sliding window of a specified size across a Phred type + # scores string and calculate for each window the mean score and return + # the minimum mean score. + def scores_mean_local(window_size) + scores_mean_local_C(self.qual, self.length, Seq::SCORE_BASE, window_size) + end + + inline do |builder| + builder.c %{ + VALUE scores_mean_local_C( + VALUE _qual, + VALUE _qual_len, + VALUE _score_base, + VALUE _window_size + ) + { + unsigned char *qual = (unsigned char *) StringValuePtr(_qual); + unsigned int qual_len = FIX2UINT(_qual_len); + unsigned int score_base = FIX2UINT(_score_base); + unsigned int window_size = FIX2UINT(_window_size); + unsigned int sum = 0; + unsigned int i = 0; + float mean = 0.0; + + // fill window + for (i = 0; i < window_size; i++) + sum += qual[i] - score_base; + + mean = sum / window_size; + + // run window across the rest of the scores + while (i < qual_len) { - ( $mean, $pos ) = Maasha::Fastq::solexa_str_mean_window( $record->{ 'SCORES' }, $options->{ 'window_size' }, $options->{ 'min' } ); - $record->{ 'SCORES_LOCAL_POS' } = $pos; - $record->{ 'SCORES_LOCAL_MEAN' } = sprintf( "%.2f", $mean); - } - else - { - $record->{ 'SCORES_MEAN' } = sprintf( "%.2f", Maasha::Fastq::solexa_str_mean( $record->{ 'SCORES' } ) ); - } - } - - Maasha::Biopieces::put_record( $record, $out ); -} - -Maasha::Biopieces::close_stream( $in ); -Maasha::Biopieces::close_stream( $out ); - - -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - - -BEGIN -{ - Maasha::Biopieces::status_set(); -} + sum += qual[i] - score_base; + sum -= qual[i - window_size] - score_base; + mean = ( mean < sum / window_size ) ? mean : sum / window_size; + + i++; + } -END -{ - Maasha::Biopieces::status_log(); -} + return rb_float_new(mean); + } + } + end +end + +casts = [] +casts << {:long=>'local', :short=>'l', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'window_size', :short=>'w', :type=>'uint', :mandatory=>false, :default=>5, :allowed=>nil, :disallowed=>'0'} +casts << {:long=>'min', :short=>'m', :type=>'float', :mandatory=>false, :default=>15, :allowed=>nil, :disallowed=>nil} + +options = Biopieces.options_parse(ARGV, casts) + +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + input.each_record do |record| + if record[:SEQ] and record[:SCORES] and record[:SEQ].length > 0 + entry = Seq.new_bp(record) + + if options[:local] + record[:SCORES_MEAN_LOCAL] = entry.scores_mean_local(options[:window_size]).round(2) + else + record[:SCORES_MEAN] = entry.scores_mean.round(2) + end + end + + output.puts record + end +end # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<