-#!/usr/bin/env perl
+#!/usr/bin/env ruby
-# Copyright (C) 2007-2010 Martin A. Hansen.
+# Copyright (C) 2007-2012 Martin A. Hansen.
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# http://www.gnu.org/copyleft/gpl.html
-
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-# Create a lineplot from quality scores in the stream.
-
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-use warnings;
-use strict;
-use Maasha::Biopieces;
-use Maasha::Fastq;
-use Maasha::Plot;
-use Maasha::Calc;
-
+# Plot a histogram of mean sequence quality scores.
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-my ( $options, $in, $out, $default, $terminals, $formats, $record, %count_hash, %sum_hash, $i, @scores, @data_list, $result, $fh, $tmp_dir );
-
-$default = "Quality Scores";
-$terminals = "dumb,x11,aqua,post,svg";
-$formats = "solexa,phred,decimal";
-
-$options = Maasha::Biopieces::parse_options(
- [
- { long => 'no_stream', short => 'x', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
- { long => 'format' , short => 'f', type => 'string', mandatory => 'no', default => "solexa", allowed => $formats, disallowed => undef },
- { long => 'terminal', short => 't', type => 'string', mandatory => 'no', default => 'dumb', allowed => $terminals, disallowed => undef },
- { long => 'title', short => 'T', type => 'string', mandatory => 'no', default => $default, allowed => undef, disallowed => undef },
- { long => 'xlabel', short => 'X', type => 'string', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
- { long => 'ylabel', short => 'Y', type => 'string', mandatory => 'no', default => undef, allowed => undef, disallowed => undef },
- ]
-);
-
-$in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } );
-$out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } );
-
-while ( $record = Maasha::Biopieces::get_record( $in ) )
-{
- if ( $record->{ 'SCORES' } )
- {
- if ( $options->{ 'format' } eq "solexa" )
- {
- for ( $i = 0; $i < length $record->{ 'SCORES' }; $i++ )
- {
- $count_hash{ $i }++;
- $sum_hash{ $i } += Maasha::Fastq::solexa2dec( substr $record->{ "SCORES" }, $i, 1 );
- }
- }
- elsif ( $options->{ 'format' } eq "phred" )
- {
- for ( $i = 0; $i < length $record->{ 'SCORES' }; $i++ )
- {
- $count_hash{ $i }++;
- $sum_hash{ $i } += Maasha::Fastq::phred2dec( substr $record->{ "SCORES" }, $i, 1 );
- }
- }
- else
- {
- @scores = split ";", $record->{ 'SCORES' };
-
- for ( $i = 0; $i < @scores; $i++ )
- {
- $count_hash{ $i }++;
- $sum_hash{ $i } += $scores[ $i ];
- }
- }
- }
-
- Maasha::Biopieces::put_record( $record, $out ) if not $options->{ "no_stream" };
-}
-
-for ( $i = 0; $i < keys %count_hash; $i++ ) {
- push @data_list, [ $sum_hash{ $i } / $count_hash{ $i } ];
-}
-
-$tmp_dir = Maasha::Biopieces::get_tmpdir();
-
-$result = Maasha::Plot::lineplot_simple( \@data_list, $options, $tmp_dir );
-
-$fh = Maasha::Biopieces::write_stream( $options->{ "data_out" } );
-
-print $fh "$_\n" foreach @{ $result };
-
-close $fh;
-
-Maasha::Biopieces::close_stream( $in );
-Maasha::Biopieces::close_stream( $out );
-
-
-# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-
-BEGIN
-{
- Maasha::Biopieces::status_set();
-}
-
-
-END
-{
- Maasha::Biopieces::status_log();
-}
+require 'maasha/biopieces'
+require 'maasha/seq'
+require 'gnuplot'
+require 'narray'
+
+terminals = "dumb,x11,aqua,post,pdf,png,svg"
+title = "Mean Quality Scores"
+xlabel = "Sequence position"
+ylabel = "Mean score"
+
+casts = []
+casts << {:long=>'no_stream', :short=>'x', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'count', :short=>'c', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'data_out', :short=>'o', :type=>'file', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'terminal', :short=>'t', :type=>'string', :mandatory=>false, :default=>'dumb', :allowed=>terminals, :disallowed=>nil}
+casts << {:long=>'title', :short=>'T', :type=>'string', :mandatory=>false, :default=>title, :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'xlabel', :short=>'X', :type=>'string', :mandatory=>false, :default=>xlabel, :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'ylabel', :short=>'Y', :type=>'string', :mandatory=>false, :default=>ylabel, :allowed=>nil, :disallowed=>nil}
+
+options = Biopieces.options_parse(ARGV, casts)
+
+SCORES_MAX = 100_000
+
+scores_vec = NArray.int(SCORES_MAX)
+count_vec = NArray.int(SCORES_MAX)
+max = 0
+
+Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
+ input.each_record do |record|
+ if record[:SCORES]
+ scores = record[:SCORES]
+
+ if scores.length > 0
+ raise BiopiecesError, "score string too long: #{scores.length} > #{SCORES_MAX}" if scores.length > SCORES_MAX
+
+ scores_vec[0 ... scores.length] += NArray.to_na(scores, "byte") - Seq::SCORE_BASE
+ count_vec[0 ... scores.length] += 1
+
+ max = scores.length if scores.length > max
+ end
+ end
+
+ output.puts record unless options[:no_stream]
+ end
+end
+
+mean_vec = NArray.sfloat(max)
+mean_vec = scores_vec[0 ... max].to_f / count_vec[0 ... max]
+count_vec = count_vec[0 ... max].to_f
+count_vec *= (Seq::SCORE_MAX / count_vec.max(0).to_f)
+
+x = (1 .. max).to_a
+y1 = mean_vec.to_a
+y2 = count_vec.to_a
+
+Gnuplot.open do |gp|
+ Gnuplot::Plot.new(gp) do |plot|
+ plot.terminal options[:terminal]
+ plot.title options[:title]
+ plot.xlabel options[:xlabel]
+ plot.ylabel options[:ylabel]
+ plot.output options[:data_out] || "/dev/stderr"
+ plot.xrange "[#{x.min - 1}:#{x.max + 1}]"
+ plot.yrange "[#{Seq::SCORE_MIN}:#{Seq::SCORE_MAX}]"
+ plot.style "fill solid 0.5 border"
+ plot.xtics "out"
+ plot.ytics "out"
+
+ plot.data << Gnuplot::DataSet.new([x, y1]) do |ds|
+ ds.with = "boxes"
+ ds.title = "mean score"
+ end
+
+ if options[:count]
+ plot.data << Gnuplot::DataSet.new([x, y2]) do |ds|
+ ds.with = "lines lt rgb \"black\""
+ ds.title = "relative count"
+ end
+ end
+ end
+end
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<