3 # Copyright (C) 2007-2012 Martin A. Hansen.
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 # http://www.gnu.org/copyleft/gpl.html
21 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
23 # Plot the nucleotide distribution in percent of sequences in the stream.
25 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
27 require 'maasha/biopieces'
34 terminals = "dumb,x11,aqua,post,pdf,png,svg"
35 title = "Nucleotide Distribution"
36 xlabel = "Sequence position"
37 ylabel = "Nucleotide distribution (%)"
40 casts << {:long=>'no_stream', :short=>'x', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
41 casts << {:long=>'data_out', :short=>'o', :type=>'file', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
42 casts << {:long=>'terminal', :short=>'t', :type=>'string', :mandatory=>false, :default=>'dumb', :allowed=>terminals, :disallowed=>nil}
43 casts << {:long=>'title', :short=>'T', :type=>'string', :mandatory=>false, :default=>title, :allowed=>nil, :disallowed=>nil}
44 casts << {:long=>'xlabel', :short=>'X', :type=>'string', :mandatory=>false, :default=>xlabel, :allowed=>nil, :disallowed=>nil}
45 casts << {:long=>'ylabel', :short=>'Y', :type=>'string', :mandatory=>false, :default=>ylabel, :allowed=>nil, :disallowed=>nil}
47 options = Biopieces.options_parse(ARGV, casts)
51 vec_a = NArray.int(VEC_MAX)
52 vec_t = NArray.int(VEC_MAX)
53 vec_c = NArray.int(VEC_MAX)
54 vec_g = NArray.int(VEC_MAX)
55 vec_n = NArray.int(VEC_MAX)
56 vec_tot = NArray.float(VEC_MAX)
58 Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
59 input.each_record do |record|
61 seq = record[:SEQ].upcase
63 raise BiopiecesError, "sequence too long: #{seq.length} > #{VEC_MAX}" if seq.length > VEC_MAX
65 vec_seq = NArray.to_na(seq, "byte")
67 vec_a[0 ... seq.length] += vec_seq.eq('A'.ord)
68 vec_t[0 ... seq.length] += vec_seq.eq('T'.ord)
69 vec_c[0 ... seq.length] += vec_seq.eq('C'.ord)
70 vec_g[0 ... seq.length] += vec_seq.eq('G'.ord)
71 vec_n[0 ... seq.length] += vec_seq.eq('N'.ord)
72 vec_tot[0 ... seq.length] += 1
74 max_len = seq.length if seq.length > max_len
77 output.puts record unless options[:no_stream]
81 x = (1 .. max_len).to_a
82 a = ((vec_a / vec_tot) * 100)[0 ... max_len].to_a
83 t = ((vec_t / vec_tot) * 100)[0 ... max_len].to_a
84 c = ((vec_c / vec_tot) * 100)[0 ... max_len].to_a
85 g = ((vec_g / vec_tot) * 100)[0 ... max_len].to_a
86 n = ((vec_n / vec_tot) * 100)[0 ... max_len].to_a
95 Gnuplot::Plot.new(gp) do |plot|
96 plot.terminal options[:terminal]
97 plot.title options[:title]
98 plot.xlabel options[:xlabel]
99 plot.ylabel options[:ylabel]
100 plot.output options[:data_out] if options[:data_out]
103 plot.yrange "[0:100]"
104 plot.xrange "[0:#{max_len}]"
107 plot.key "outside right top vertical Left reverse enhanced autotitles columnhead nobox"
108 plot.key "invert samplen 4 spacing 1 width 0 height 0"
109 plot.style "fill solid 0.5 border"
110 plot.style "histogram rowstacked"
111 plot.style "data histograms"
112 plot.boxwidth "0.75 absolute"
114 plot.data << Gnuplot::DataSet.new(n) do |ds|
116 ds.with = "histogram lt rgb \"black\""
120 plot.data << Gnuplot::DataSet.new(g) do |ds|
122 ds.with = "histogram lt rgb \"yellow\""
126 plot.data << Gnuplot::DataSet.new(c) do |ds|
128 ds.with = "histogram lt rgb \"blue\""
132 plot.data << Gnuplot::DataSet.new(t) do |ds|
134 ds.with = "histogram lt rgb \"green\""
138 plot.data << Gnuplot::DataSet.new(a) do |ds|
140 ds.with = "histogram lt rgb \"red\""
147 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<