#!/usr/bin/env ruby
-# Copyright (C) 2007-2011 Martin A. Hansen.
+# Copyright (C) 2007-2012 Martin A. Hansen.
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
-require 'biopieces'
+require 'maasha/biopieces'
+require 'maasha/seq'
require 'gnuplot'
-require 'pp'
+require 'narray'
terminals = "dumb,x11,aqua,post,pdf,png,svg"
title = "Mean Quality Scores"
casts = []
casts << {:long=>'no_stream', :short=>'x', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'count', :short=>'c', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
casts << {:long=>'data_out', :short=>'o', :type=>'file', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
casts << {:long=>'terminal', :short=>'t', :type=>'string', :mandatory=>false, :default=>'dumb', :allowed=>terminals, :disallowed=>nil}
casts << {:long=>'title', :short=>'T', :type=>'string', :mandatory=>false, :default=>title, :allowed=>nil, :disallowed=>nil}
casts << {:long=>'xlabel', :short=>'X', :type=>'string', :mandatory=>false, :default=>xlabel, :allowed=>nil, :disallowed=>nil}
casts << {:long=>'ylabel', :short=>'Y', :type=>'string', :mandatory=>false, :default=>ylabel, :allowed=>nil, :disallowed=>nil}
-bp = Biopieces.new
+options = Biopieces.options_parse(ARGV, casts)
+
+SCORES_MAX = 100_000
-options = bp.parse(ARGV, casts)
+scores_vec = NArray.int(SCORES_MAX)
+count_vec = NArray.int(SCORES_MAX)
+max = 0
-BASE_SOLEXA = 64
+Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
+ input.each_record do |record|
+ if record[:SCORES]
+ scores = record[:SCORES]
-sum_hash = Hash.new(0)
-count_hash = Hash.new(0)
+ if scores.length > 0
+ raise BiopiecesError, "score string too long: #{scores.length} > #{SCORES_MAX}" if scores.length > SCORES_MAX
-bp.each_record do |record|
- if record[:SCORES]
- scores = record[:SCORES]
- (0 ... scores.length).each do |i|
- sum_hash[i] += (scores[i].ord - BASE_SOLEXA)
- count_hash[i] += 1
+ scores_vec[0 ... scores.length] += NArray.to_na(scores, "byte") - Seq::SCORE_BASE
+ count_vec[0 ... scores.length] += 1
+
+ max = scores.length if scores.length > max
+ end
end
- end
- bp.puts record unless options[:no_stream]
+ output.puts record unless options[:no_stream]
+ end
end
-x = []
-y = []
+mean_vec = NArray.sfloat(max)
+mean_vec = scores_vec[0 ... max].to_f / count_vec[0 ... max]
+count_vec = count_vec[0 ... max].to_f
+count_vec *= (Seq::SCORE_MAX / count_vec.max(0).to_f)
-(0 ... sum_hash.size).each do |i|
- x << i + 1
- y << sum_hash[i].to_f / count_hash[i].to_f
-end
+x = (1 .. max).to_a
+y1 = mean_vec.to_a
+y2 = count_vec.to_a
Gnuplot.open do |gp|
Gnuplot::Plot.new(gp) do |plot|
plot.title options[:title]
plot.xlabel options[:xlabel]
plot.ylabel options[:ylabel]
- plot.output options[:data_out] if options[:data_out]
+ plot.output options[:data_out] || "/dev/stderr"
plot.xrange "[#{x.min - 1}:#{x.max + 1}]"
- plot.yrange "[0:40]"
+ plot.yrange "[#{Seq::SCORE_MIN}:#{Seq::SCORE_MAX}]"
plot.style "fill solid 0.5 border"
plot.xtics "out"
plot.ytics "out"
- plot.data << Gnuplot::DataSet.new([x, y]) do |ds|
- ds.with = "boxes"
- ds.notitle
+ plot.data << Gnuplot::DataSet.new([x, y1]) do |ds|
+ ds.with = "boxes"
+ ds.title = "mean score"
+ end
+
+ if options[:count]
+ plot.data << Gnuplot::DataSet.new([x, y2]) do |ds|
+ ds.with = "lines lt rgb \"black\""
+ ds.title = "relative count"
+ end
end
end
end