From: martinahansen Date: Mon, 2 May 2011 13:41:55 +0000 (+0000) Subject: ported plot_scores to ruby X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=c99b48ef600e53e67be86bd408a9efbd3204859b;p=biopieces.git ported plot_scores to ruby git-svn-id: http://biopieces.googlecode.com/svn/trunk@1352 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/bp_bin/plot_scores b/bp_bin/plot_scores index 6b1360e..af37cc2 100755 --- a/bp_bin/plot_scores +++ b/bp_bin/plot_scores @@ -1,6 +1,6 @@ -#!/usr/bin/env perl +#!/usr/bin/env ruby -# Copyright (C) 2007-2010 Martin A. Hansen. +# Copyright (C) 2007-2011 Martin A. Hansen. # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -18,13 +18,9 @@ # http://www.gnu.org/copyleft/gpl.html - -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - -# Create a lineplot from quality scores in the stream. - # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +# This program is part of the Biopieces framework (www.biopieces.org). use warnings; use strict; @@ -32,74 +28,70 @@ use Maasha::Biopieces; use Maasha::Fastq; use Maasha::Plot; +# Plot a histogram of mean sequence quality scores. # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -my ( $options, $in, $out, $default, $terminals, $record, %count_hash, %sum_hash, $i, @scores, @data_list, $result, $fh, $tmp_dir ); - -$default = "Quality Scores"; -$terminals = "dumb,x11,aqua,post,svg"; - -$options = Maasha::Biopieces::parse_options( - [ - { long => 'no_stream', short => 'x', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, - { long => 'data_out', short => 'o', type => 'file', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, - { long => 'terminal', short => 't', type => 'string', mandatory => 'no', default => 'dumb', allowed => $terminals, disallowed => undef }, - { long => 'title', short => 'T', type => 'string', mandatory => 'no', default => $default, allowed => undef, disallowed => undef }, - { long => 'xlabel', short => 'X', type => 'string', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, - { long => 'ylabel', short => 'Y', type => 'string', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, - ] -); - -$in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } ); -$out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } ); - -while ( $record = Maasha::Biopieces::get_record( $in ) ) -{ - if ( $record->{ 'SCORES' } ) - { - for ( $i = 0; $i < length $record->{ 'SCORES' }; $i++ ) - { - $count_hash{ $i }++; - $sum_hash{ $i } += Maasha::Fastq::solexa2dec( substr $record->{ "SCORES" }, $i, 1 ); - } - } - - Maasha::Biopieces::put_record( $record, $out ) if not $options->{ "no_stream" }; -} - -for ( $i = 0; $i < keys %count_hash; $i++ ) { - push @data_list, [ $sum_hash{ $i } / $count_hash{ $i } ]; -} - -$tmp_dir = Maasha::Biopieces::get_tmpdir(); - -$result = Maasha::Plot::lineplot_simple( \@data_list, $options, $tmp_dir ); - -$fh = Maasha::Biopieces::write_stream( $options->{ "data_out" } ); - -print $fh "$_\n" foreach @{ $result }; - -close $fh; - -Maasha::Biopieces::close_stream( $in ); -Maasha::Biopieces::close_stream( $out ); - - -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - - -BEGIN -{ - Maasha::Biopieces::status_set(); -} - - -END -{ - Maasha::Biopieces::status_log(); -} +require 'biopieces' +require 'gnuplot' +require 'pp' + +terminals = "dumb,x11,aqua,post,pdf,png,svg" + +casts = [] +casts << {:long=>'no_stream', :short=>'x', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'data_out', :short=>'o', :type=>'file', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'terminal', :short=>'t', :type=>'string', :mandatory=>false, :default=>'dumb', :allowed=>terminals, :disallowed=>nil} + +bp = Biopieces.new + +options = bp.parse(ARGV, casts) + +BASE_SOLEXA = 64 + +sum_hash = Hash.new(0) +count_hash = Hash.new(0) + +bp.each_record do |record| + if record[:SCORES] + scores = record[:SCORES] + (0 ... scores.length).each do |i| + sum_hash[i] += (scores[i].ord - BASE_SOLEXA) + count_hash[i] += 1 + end + end + + bp.puts record unless options[:no_stream] +end + +x = [] +y = [] + +(0 ... sum_hash.size).each do |i| + x << i + y << sum_hash[i].to_f / count_hash[i].to_f +end + +Gnuplot.open do |gp| + Gnuplot::Plot.new(gp) do |plot| + plot.terminal options[:terminal] + plot.title "Mean Quality Scores" + plot.ylabel "Mean score" + plot.xlabel "Sequence position" + plot.output options[:data_out] if options[:data_out] + plot.xrange "[#{x.min - 1}:#{x.max + 1}]" + plot.yrange "[0:40]" + plot.style "fill solid 0.5 border" + plot.xtics "out" + plot.ytics "out" + + plot.data << Gnuplot::DataSet.new([x, y]) do |ds| + ds.with = "boxes" + ds.notitle + end + end +end # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<