X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bp_bin%2Fplot_nucleotide_distribution;h=d8d276f6d796c9217d682042fbf4acd3ea3f940b;hb=5de6112b70b59420b245ce636a8b2e3c90acbe00;hp=ea9b0db2513c9ca662788728ebbe60acbe6e9efa;hpb=a669616e5e94a3722bea4e50c7dab64b84ef85f9;p=biopieces.git diff --git a/bp_bin/plot_nucleotide_distribution b/bp_bin/plot_nucleotide_distribution index ea9b0db..d8d276f 100755 --- a/bp_bin/plot_nucleotide_distribution +++ b/bp_bin/plot_nucleotide_distribution @@ -62,100 +62,103 @@ Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| if record[:SEQ] seq = record[:SEQ].upcase - raise BiopiecesError, "sequence too long: #{seq.length} > #{VEC_MAX}" if seq.length > VEC_MAX + unless seq.empty? + raise BiopiecesError, "sequence too long: #{seq.length} > #{VEC_MAX}" if seq.length > VEC_MAX - vec_seq = NArray.to_na(seq, "byte") + vec_seq = NArray.to_na(seq, "byte") - vec_a[0 ... seq.length] += vec_seq.eq('A'.ord) - vec_t[0 ... seq.length] += vec_seq.eq('T'.ord) - vec_c[0 ... seq.length] += vec_seq.eq('C'.ord) - vec_g[0 ... seq.length] += vec_seq.eq('G'.ord) - vec_n[0 ... seq.length] += vec_seq.eq('N'.ord) - vec_tot[0 ... seq.length] += 1 + vec_a[0 ... seq.length] += vec_seq.eq('A'.ord) + vec_t[0 ... seq.length] += vec_seq.eq('T'.ord) + vec_c[0 ... seq.length] += vec_seq.eq('C'.ord) + vec_g[0 ... seq.length] += vec_seq.eq('G'.ord) + vec_n[0 ... seq.length] += vec_seq.eq('N'.ord) + vec_tot[0 ... seq.length] += 1 - max_len = seq.length if seq.length > max_len + max_len = seq.length if seq.length > max_len + end end output.puts record unless options[:no_stream] end end -x = (1 .. max_len).to_a -a = ((vec_a / vec_tot) * 100)[0 ... max_len].to_a -t = ((vec_t / vec_tot) * 100)[0 ... max_len].to_a -c = ((vec_c / vec_tot) * 100)[0 ... max_len].to_a -g = ((vec_g / vec_tot) * 100)[0 ... max_len].to_a -n = ((vec_n / vec_tot) * 100)[0 ... max_len].to_a - -vec_tot *= (Y_MAX / vec_tot.max(0).to_f) -y = vec_tot.to_a - -a.unshift 0.0 -t.unshift 0.0 -c.unshift 0.0 -g.unshift 0.0 -n.unshift 0.0 - -Gnuplot.open do |gp| - Gnuplot::Plot.new(gp) do |plot| - plot.terminal options[:terminal] - plot.title options[:title] - plot.xlabel options[:xlabel] - plot.ylabel options[:ylabel] - plot.output options[:data_out] if options[:data_out] - plot.ytics "out" - plot.xtics "out" - plot.yrange "[0:#{Y_MAX}]" - plot.xrange "[0:#{max_len}]" - plot.auto "fix" - plot.offsets "1" - plot.key "outside right top vertical Left reverse enhanced autotitles columnhead nobox" - plot.key "invert samplen 4 spacing 1 width 0 height 0" - plot.style "fill solid 0.5 border" - plot.style "histogram rowstacked" - plot.style "data histograms" - plot.boxwidth "0.75 absolute" - - plot.data << Gnuplot::DataSet.new(n) do |ds| - ds.using = "1" - ds.with = "histogram lt rgb \"black\"" - ds.title = "N" - end +if max_len > 0 + x = (1 .. max_len).to_a + a = ((vec_a / vec_tot) * 100)[0 ... max_len].to_a + t = ((vec_t / vec_tot) * 100)[0 ... max_len].to_a + c = ((vec_c / vec_tot) * 100)[0 ... max_len].to_a + g = ((vec_g / vec_tot) * 100)[0 ... max_len].to_a + n = ((vec_n / vec_tot) * 100)[0 ... max_len].to_a + + vec_tot *= (Y_MAX / vec_tot.max(0).to_f) + y = vec_tot.to_a + + a.unshift 0.0 + t.unshift 0.0 + c.unshift 0.0 + g.unshift 0.0 + n.unshift 0.0 + + Gnuplot.open do |gp| + Gnuplot::Plot.new(gp) do |plot| + plot.terminal options[:terminal] + plot.title options[:title] + plot.xlabel options[:xlabel] + plot.ylabel options[:ylabel] + plot.output options[:data_out] || "/dev/stderr" + plot.ytics "out" + plot.xtics "out" + plot.yrange "[0:#{Y_MAX}]" + plot.xrange "[0:#{max_len}]" + plot.auto "fix" + plot.offsets "1" + plot.key "outside right top vertical Left reverse enhanced autotitles columnhead nobox" + plot.key "invert samplen 4 spacing 1 width 0 height 0" + plot.style "fill solid 0.5 border" + plot.style "histogram rowstacked" + plot.style "data histograms" + plot.boxwidth "0.75 absolute" + + plot.data << Gnuplot::DataSet.new(n) do |ds| + ds.using = "1" + ds.with = "histogram lt rgb \"black\"" + ds.title = "N" + end - plot.data << Gnuplot::DataSet.new(g) do |ds| - ds.using = "1" - ds.with = "histogram lt rgb \"yellow\"" - ds.title = "G" - end + plot.data << Gnuplot::DataSet.new(g) do |ds| + ds.using = "1" + ds.with = "histogram lt rgb \"yellow\"" + ds.title = "G" + end - plot.data << Gnuplot::DataSet.new(c) do |ds| - ds.using = "1" - ds.with = "histogram lt rgb \"blue\"" - ds.title = "C" - end + plot.data << Gnuplot::DataSet.new(c) do |ds| + ds.using = "1" + ds.with = "histogram lt rgb \"blue\"" + ds.title = "C" + end - plot.data << Gnuplot::DataSet.new(t) do |ds| - ds.using = "1" - ds.with = "histogram lt rgb \"green\"" - ds.title = "T" - end + plot.data << Gnuplot::DataSet.new(t) do |ds| + ds.using = "1" + ds.with = "histogram lt rgb \"green\"" + ds.title = "T" + end - plot.data << Gnuplot::DataSet.new(a) do |ds| - ds.using = "1" - ds.with = "histogram lt rgb \"red\"" - ds.title = "A" - end + plot.data << Gnuplot::DataSet.new(a) do |ds| + ds.using = "1" + ds.with = "histogram lt rgb \"red\"" + ds.title = "A" + end - if options[:count] - plot.data << Gnuplot::DataSet.new([x, y]) do |ds| - ds.with = "lines lt rgb \"black\"" - ds.title = "relative count" + if options[:count] + plot.data << Gnuplot::DataSet.new([x, y]) do |ds| + ds.with = "lines lt rgb \"black\"" + ds.title = "count" + end end end end end - # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<