3 # Copyright (C) 2007-2012 Martin A. Hansen.
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 # http://www.gnu.org/copyleft/gpl.html
21 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
23 # Plot the nucleotide distribution in percent of sequences in the stream.
25 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
27 require 'maasha/biopieces'
35 terminals = "dumb,x11,aqua,post,pdf,png,svg"
36 title = "Nucleotide Distribution"
37 xlabel = "Sequence position"
38 ylabel = "Nucleotide distribution (%)"
41 casts << {:long=>'no_stream', :short=>'x', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
42 casts << {:long=>'count', :short=>'c', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
43 casts << {:long=>'data_out', :short=>'o', :type=>'file', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
44 casts << {:long=>'terminal', :short=>'t', :type=>'string', :mandatory=>false, :default=>'dumb', :allowed=>terminals, :disallowed=>nil}
45 casts << {:long=>'title', :short=>'T', :type=>'string', :mandatory=>false, :default=>title, :allowed=>nil, :disallowed=>nil}
46 casts << {:long=>'xlabel', :short=>'X', :type=>'string', :mandatory=>false, :default=>xlabel, :allowed=>nil, :disallowed=>nil}
47 casts << {:long=>'ylabel', :short=>'Y', :type=>'string', :mandatory=>false, :default=>ylabel, :allowed=>nil, :disallowed=>nil}
49 options = Biopieces.options_parse(ARGV, casts)
53 vec_a = NArray.int(VEC_MAX)
54 vec_t = NArray.int(VEC_MAX)
55 vec_c = NArray.int(VEC_MAX)
56 vec_g = NArray.int(VEC_MAX)
57 vec_n = NArray.int(VEC_MAX)
58 vec_tot = NArray.float(VEC_MAX)
60 Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
61 input.each_record do |record|
63 seq = record[:SEQ].upcase
66 raise BiopiecesError, "sequence too long: #{seq.length} > #{VEC_MAX}" if seq.length > VEC_MAX
68 vec_seq = NArray.to_na(seq, "byte")
70 vec_a[0 ... seq.length] += vec_seq.eq('A'.ord)
71 vec_t[0 ... seq.length] += vec_seq.eq('T'.ord)
72 vec_c[0 ... seq.length] += vec_seq.eq('C'.ord)
73 vec_g[0 ... seq.length] += vec_seq.eq('G'.ord)
74 vec_n[0 ... seq.length] += vec_seq.eq('N'.ord)
75 vec_tot[0 ... seq.length] += 1
77 max_len = seq.length if seq.length > max_len
81 output.puts record unless options[:no_stream]
86 x = (1 .. max_len).to_a
87 a = ((vec_a / vec_tot) * 100)[0 ... max_len].to_a
88 t = ((vec_t / vec_tot) * 100)[0 ... max_len].to_a
89 c = ((vec_c / vec_tot) * 100)[0 ... max_len].to_a
90 g = ((vec_g / vec_tot) * 100)[0 ... max_len].to_a
91 n = ((vec_n / vec_tot) * 100)[0 ... max_len].to_a
93 vec_tot *= (Y_MAX / vec_tot.max(0).to_f)
103 Gnuplot::Plot.new(gp) do |plot|
104 plot.terminal options[:terminal]
105 plot.title options[:title]
106 plot.xlabel options[:xlabel]
107 plot.ylabel options[:ylabel]
108 plot.output options[:data_out] || "/dev/stderr"
111 plot.yrange "[0:#{Y_MAX}]"
112 plot.xrange "[0:#{max_len}]"
115 plot.key "outside right top vertical Left reverse enhanced autotitles columnhead nobox"
116 plot.key "invert samplen 4 spacing 1 width 0 height 0"
117 plot.style "fill solid 0.5 border"
118 plot.style "histogram rowstacked"
119 plot.style "data histograms"
120 plot.boxwidth "0.75 absolute"
122 plot.data << Gnuplot::DataSet.new(n) do |ds|
124 ds.with = "histogram lt rgb \"black\""
128 plot.data << Gnuplot::DataSet.new(g) do |ds|
130 ds.with = "histogram lt rgb \"yellow\""
134 plot.data << Gnuplot::DataSet.new(c) do |ds|
136 ds.with = "histogram lt rgb \"blue\""
140 plot.data << Gnuplot::DataSet.new(t) do |ds|
142 ds.with = "histogram lt rgb \"green\""
146 plot.data << Gnuplot::DataSet.new(a) do |ds|
148 ds.with = "histogram lt rgb \"red\""
153 plot.data << Gnuplot::DataSet.new([x, y]) do |ds|
154 ds.with = "lines lt rgb \"black\""
162 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<