]> git.donarmstrong.com Git - biopieces.git/commitdiff
added plot_distribution biopiece
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Wed, 25 May 2011 09:31:29 +0000 (09:31 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Wed, 25 May 2011 09:31:29 +0000 (09:31 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@1431 74ccb610-7750-0410-82ae-013aeee3265d

bp_bin/plot_distribution [new file with mode: 0755]
bp_test/in/plot_distribution.in [new file with mode: 0644]
bp_test/out/plot_distribution.out.1 [new file with mode: 0644]
bp_test/test/test_plot_distribution [new file with mode: 0755]

diff --git a/bp_bin/plot_distribution b/bp_bin/plot_distribution
new file mode 100755 (executable)
index 0000000..d2d4d70
--- /dev/null
@@ -0,0 +1,99 @@
+#!/usr/bin/env ruby
+
+# Copyright (C) 2007-2011 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# Plot the distribution of numerical values for a specified key.
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+require 'maasha/biopieces'
+require 'gnuplot'
+require 'pp'
+
+terminals = "dumb,x11,aqua,post,pdf,png,svg"
+title     = "Distribution"
+ylabel    = "n"
+
+casts = []
+casts << {:long=>'key',        :short=>'k', :type=>'string', :mandatory=>true,  :default=>nil,    :allowed=>nil,       :disallowed=>nil}
+casts << {:long=>'no_stream',  :short=>'x', :type=>'flag',   :mandatory=>false, :default=>nil,    :allowed=>nil,       :disallowed=>nil}
+casts << {:long=>'data_out',   :short=>'o', :type=>'file',   :mandatory=>false, :default=>nil,    :allowed=>nil,       :disallowed=>nil}
+casts << {:long=>'terminal',   :short=>'t', :type=>'string', :mandatory=>false, :default=>'dumb', :allowed=>terminals, :disallowed=>nil}
+casts << {:long=>'title',      :short=>'T', :type=>'string', :mandatory=>false, :default=>title,  :allowed=>nil,       :disallowed=>nil}
+casts << {:long=>'xlabel',     :short=>'X', :type=>'string', :mandatory=>false, :default=>nil,    :allowed=>nil,       :disallowed=>nil}
+casts << {:long=>'ylabel',     :short=>'Y', :type=>'string', :mandatory=>false, :default=>ylabel, :allowed=>nil,       :disallowed=>nil}
+casts << {:long=>'logscale_y', :short=>'L', :type=>'flag',   :mandatory=>false, :default=>nil,    :allowed=>nil,       :disallowed=>nil}
+
+options = Biopieces.options_parse(ARGV, casts)
+
+key = options[:key]
+
+options[:xlabel] = key unless options[:xlabel]
+options[:ylabel] = "log10(#{options[:ylabel]})" if options[:logscale_y]
+
+count_hash = Hash.new(0)
+
+Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
+  input.each_record do |record|
+    if record.has_key? key
+      count_hash[record[key].to_i] += 1
+    end
+
+    output.puts record unless options[:no_stream]
+  end
+end
+
+x_max = count_hash.keys.max
+
+x = []
+y = []
+
+(0 .. x_max).each do |i|
+  x << i
+  y << count_hash[i]
+end
+
+Gnuplot.open do |gp|
+  Gnuplot::Plot.new(gp) do |plot|
+    plot.terminal options[:terminal]
+    plot.title    options[:title]
+    plot.xlabel   options[:xlabel]
+    plot.ylabel   options[:ylabel]
+    plot.output   options[:data_out] if options[:data_out]
+    plot.logscale "y"                if options[:logscale_y]
+    plot.xrange   "[#{x.min - 1}:#{x.max + 1}]"
+    plot.style    "fill solid 0.5 border"
+    plot.xtics    "out"
+    plot.ytics    "out"
+    
+    plot.data << Gnuplot::DataSet.new([x, y]) do |ds|
+      ds.with = "boxes"
+      ds.notitle
+    end
+  end
+end
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+__END__
diff --git a/bp_test/in/plot_distribution.in b/bp_test/in/plot_distribution.in
new file mode 100644 (file)
index 0000000..36fbe1c
--- /dev/null
@@ -0,0 +1,400 @@
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1040:5263#TTAGGC/1
+SEQ: TTCGGCATCGGCGGCGACGTTGGCGGCGG
+SEQ_LEN: 29
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1041:14486#TTAGGC/1
+SEQ: CATGGCGTATGCCAGACGGCCAGAACGATGGCCGCCGGGCTTCA
+SEQ_LEN: 44
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1043:19446#TTAGGC/1
+SEQ: CGGTACTGATCGAGTGTCAGGCTGTTGATCGCCGCGGGCGG
+SEQ_LEN: 41
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1044:7943#TTAGGC/1
+SEQ: CTGATGCATGAAGATAGTCGGATGCACAATATACACGGCTAACGCNNAGG
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1045:16499#TTAGGC/1
+SEQ: CTTGGTGCCCGTCACGCGCACTGCGTCGCCCTGAATGCTCGCCTGNNCCT
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1047:4566#TTAGGC/1
+SEQ: CACTGAAGGGTTCGTCCATCAGGATGATGCGAGGATTCAGCGCCANNGCT
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1047:18609#TTAGGC/1
+SEQ: ATCTGGCCAAGGTCAGACAGAGCTTCCCGTAACTTCGGCACAGCGNNATC
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1047:20035#TTAGGC/1
+SEQ: CGTCACGTGGCGTGAATCCAATCTCGGCTATGCGCTGATCGGCGCNNC
+SEQ_LEN: 48
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1048:14107#TTAGGC/1
+SEQ: GAAAGTGCAATCTACCGTTGTCGATATGCTGTCCGGCCTTGGCTACNGTG
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1049:17891#TTAGGC/1
+SEQ: CATCACCCGCTTCGAACTCCCGCACAAAGGCCGGCGTTGGCGAACGATTT
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1052:16071#TTAGGC/1
+SEQ: AGCATTCGATCCTGTCGCTGGCGCGCGCCGACATGCCCGGCCTCGCCGCG
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1054:4943#TTAGGC/1
+SEQ: ATGGTAATTCATATGGATTCCATACCATTACGACGCAAATTAACCAAAAC
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1054:16130#TTAGGC/1
+SEQ: GCCGAGCGCATCGGCCATGCCGTCGATATCGACGGGATTGACGATCAGCG
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1057:20743#TTAGGC/1
+SEQ: AGAACGGAATCCACGGGTACTTTCCCCTGACACGCCTCAGTGAACGCATT
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1058:14364#TTAGGC/1
+SEQ: TTTGCCTTTGCGCTTCGTGCATTGCCAGTCGGTGTGTTTGCCTTTGCGCT
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1060:7058#TTAGGC/1
+SEQ: ACGTTGAGCCCGGCGCAACTCGGGGATGAGCAACTGATACGCCAGTATCT
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1061:4896#TTAGGC/1
+SEQ: CAGCATGCATTCGAGTTTGTCCTGACGTCGATGTGTTGTCAACCAGTTTT
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1062:11414#TTAGGC/1
+SEQ: ATATCGGCAATCAGCCAGAGCGCCGTCCATGTCTGATGCAGCCGGAAGCA
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1064:11144#TTAGGC/1
+SEQ: GCGATCGAACTCGCGAACAGCATCGACGCGCATCCGCACGATCTCGCCGC
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1064:11650#TTAGGC/1
+SEQ: TCACGTGGTCGACGCCGCATCCCGAGATGGCGGGCAAGCTGGTGAAGGAG
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1065:1762#TTAGGC/1
+SEQ: GCTCGCATGTGGCGCGCTATTGACGCGAGAAGGCGGTGTCGCGCATGGGG
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1066:12167#TTAGGC/1
+SEQ: TGGATCGAGCGCTTCGTGAACTCGGGGCCGGCCGGATTGGTCGCGACATT
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1066:8766#TTAGGC/1
+SEQ: CCAGTCGCCGTCGCGCACCACAGCGAGCGGCCCGCCCGCACGCGCTTCGG
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1067:6098#TTAGGC/1
+SEQ: GAAGTTGTCGTGGCTGGCGGG
+SEQ_LEN: 21
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1068:4009#TTAGGC/1
+SEQ: AACATCGACAGCTTCATGCCGTCGCGCAACAGAAGCCGCTTGCCGTAG
+SEQ_LEN: 48
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1068:9940#TTAGGC/1
+SEQ: CCGAGATCGAAACGAAAGCCGTCGATATTGAACGCGGTCGACCAGTAGCG
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1070:7315#TTAGGC/1
+SEQ: AATTGGACTCTCATCAGGGAGAGCACATTGTCATCGAGATCTATCATCCG
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1070:1529#TTAGGC/1
+SEQ: CACTGCGTTCAGGCACTCGCGGGGATGGGACTCGAAGACGTCCTGAAAGA
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1071:6053#TTAGGC/1
+SEQ: CCAGGTGATCGCCGACAATCTCGACATCTCGTACGGCGAAGACGAGTGCC
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1074:7751#TTAGGC/1
+SEQ: CGTGATTCAGCGCATCGAGCGCGCCGATCGCCTCCGCGCACAGCAACAC
+SEQ_LEN: 49
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1078:12910#TTAGGC/1
+SEQ: CCTGACCGTGATTGCCTGTCGATACGGTCGCGACACCGCGACGTCGAACG
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1079:12829#TTAGGC/1
+SEQ: ATGAGTTTCTTTACTGTTGATGCCGGATGGGCGTATCGACCGCACACCCG
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1079:14497#TTAGGC/1
+SEQ: ACATCAGGCCCGCGCGATAATGGCAGCGCGTCGTGCGGCGTCCAAGCGGA
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1081:3124#TTAGGC/1
+SEQ: GATCTCGACGACGCCCGAGCGCTCCGGGCTCAGATAGTTGCTCGCGTTG
+SEQ_LEN: 49
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1081:16684#TTAGGC/1
+SEQ: AGTCGCGTTGGTACATACCCGAAGGTCCGAAAAACTGCGTCGGCGCGGCC
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1082:10836#TTAGGC/1
+SEQ: CCGGGAACCGACTGACCCGACGAGAACGCGAGATACGCCTGCGTCGACGC
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1083:13749#TTAGGC/1
+SEQ: ATCTGCTCGCGCAACGGCCCGCCGGAAAACCGTACGAAGGCTATTGGGAA
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1083:6966#TTAGGC/1
+SEQ: ACGACGTAGAAGACCAGCATCAGGCCGAGCAGGCCGAACCACACGTAGGC
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1083:18591#TTAGGC/1
+SEQ: ACGAGTTCATAAGGCTCGCTACGCGAAGCAAGATGCAGCGGAAGGATGCA
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1084:15972#TTAGGC/1
+SEQ: GAGCGACATGATCACGCGCGGCAAGGCGCTGTACTGGGGCACGTCCGAAT
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1084:17452#TTAGGC/1
+SEQ: TCGATCTGTCGGTCTACCTGCTCGGATTTACGACCGAAGTGCATGCGCCT
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1089:2172#TTAGGC/1
+SEQ: CCAGCGCGCGGCAACGAAACCCAATACGGTGACGAGAAGAAAGAATGCGA
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1091:19756#TTAGGC/1
+SEQ: TCATGAAGCCTTCGCTCGACTCGATCTTCATGTGCGCTTCCGGGCCCGAG
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1092:19968#TTAGGC/1
+SEQ: TGCGCGACTGTACTCGCGCATCTGATTGAACTGGACTCATCGAATGAACA
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1093:13872#TTAGGC/1
+SEQ: ACTACCCGTACCTTGCAGATTGGCCGAGTTGTTGAAGTTGTCGACCAG
+SEQ_LEN: 48
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1094:10801#TTAGGC/1
+SEQ: CCGCCGTCACTATGGCGACGGACTCGACGACGAAGCACTCGTGCGTC
+SEQ_LEN: 47
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1094:5031#TTAGGC/1
+SEQ: CGAACCAGGCGGTCGCCCGCAAACTGCACGGCCGTGACGATCGCGATCAG
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1095:20114#TTAGGC/1
+SEQ: AGTCACGGGCGGCGTGAGCTACGTGCAGGACATGCAGTTGCCGGGCATGC
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1096:11317#TTAGGC/1
+SEQ: TTACGGATCGAACGTGCGGTGGAGGGTGTGTTCCATCGCGGCTTTCTTGG
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1097:10042#TTAGGC/1
+SEQ: CGTGTCCGAGCAACTCGCGCGCGTCCCGGCGATGTTCAAGGTCATCCGCA
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1097:15029#TTAGGC/1
+SEQ: ATAGGAACTGAAGTGGCCGACGTCGGCAAACCGCGTGATGGTACCGG
+SEQ_LEN: 47
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1097:3441#TTAGGC/1
+SEQ: CGACCACCGTGCCTTGCGTATTCAGCCAAGGTTTGAAGCAGTCGTACCCG
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1098:19843#TTAGGC/1
+SEQ: CGAGACCTAGCCAGTCGTATGGATTCAGCCCGTTGAAGTAGCGCATCTGC
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1098:4101#TTAGGC/1
+SEQ: CTTTCTCCACTTCCGGGCGTCCAAGCAAACCTGATGGACGAAACATCAGG
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1101:7948#TTAGGC/1
+SEQ: CTGATGCATGAAGATAGTCGGATGCACAATATACACGGATAACGCGCAGG
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1102:15999#TTAGGC/1
+SEQ: TAGACAGGGCTTTAGGGGAGTCCCGAAAGGACCCGGAAATTCGTCGCCAT
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1103:20454#TTAGGC/1
+SEQ: AGTGAACTGAGTACGATGATGACGCCTGAGGCACGCGTCCAGCATGTGCT
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1104:11744#TTAGGC/1
+SEQ: CCGTCGTTCGATCCGATGTTCATGCAGTCGAACTACGGCATCGCCACGAA
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1105:17297#TTAGGC/1
+SEQ: GCGACCTTCGTGCAGGTGGTCGAGGCGGGAAGATTTGCGCTGGCG
+SEQ_LEN: 45
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1107:1551#TTAGGC/1
+SEQ: CTTCGTCTTGTTGGCGCTGAGGCTCGCGCGAAGCGGCTCCATCAGGTCGA
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1108:19284#TTAGGC/1
+SEQ: TTGTGCGGCTGTTGCACGGTGATCGTCAACGGCGAGAGCGTGTCGGGGTG
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1109:4660#TTAGGC/1
+SEQ: TGACCCGTCAGCTTCAGATACGGATATGACTTGTCGTCGCGAAACAGG
+SEQ_LEN: 48
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1110:10184#TTAGGC/1
+SEQ: GACGGATCGAGCTTGCTGTATTGCACGATCGAGGCCACCGAATCGTTGAT
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1110:15460#TTAGGC/1
+SEQ: AGTGGACAGCGGCGCGCGCAACGTCGACCACATTCTCACGCAATCGGTCC
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1112:8480#TTAGGC/1
+SEQ: CAAACGGCGCTCTGCCCAACCAGGTTCATCGGGATTTGCGAAAACTGTTG
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1112:18963#TTAGGC/1
+SEQ: ATGCCAGCGCAAAACACATGCAAGCCAGACAGCTTGCGCGGCAAACACCG
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1113:19218#TTAGGC/1
+SEQ: ACGACGATGTGTTCGATATCGCGGATGCTGCCCGTCTTGTTGTTCGCGGG
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1114:17685#TTAGGC/1
+SEQ: TGCCGCAGTAAAGATTGTTTTAGTACAGGGTATTGTGGGGACCACCTCCC
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1115:3487#TTAGGC/1
+SEQ: CAGCAGAGTCGTGAGCAGGATTTGCCACCACAGCAGCGAAAGCAGGCCGT
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1116:20290#TTAGGC/1
+SEQ: AAGCGGTGGCGAGGAAGGCACCTGCGAAGCGGACGCCGCAGAACCAGGCA
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1116:14355#TTAGGC/1
+SEQ: CGGCCGCAGAAGAAGAAAATCATCTCGCGTACACGCGCCTACCATGGCGT
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1117:17429#TTAGGC/1
+SEQ: CGGAAGAACAGGAAAAAAGCTGGCGGAATGCGTCATCGGGCATACGCGCG
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1117:18986#TTAGGC/1
+SEQ: ATGAGCCTCAGATCCTGTATTCGACCACGGTCACCGTCACGGGCGGCG
+SEQ_LEN: 48
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1119:2120#TTAGGC/1
+SEQ: CTTCGAGCTGCAGTTTGCTTGCCTGCCTGACTAGCGTGGCGAATGAATCT
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1119:20399#TTAGGC/1
+SEQ: GCGGCGCAACGATCAGCGGATCGATGAAACCCGCACGCTGTGCGTCGCGC
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1119:20845#TTAGGC/1
+SEQ: CACCTGCGCGGTGGGATCGACGGCCCACGGAATGATGCGGATCGTCGACG
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1120:1523#TTAGGC/1
+SEQ: TCGAAAAGCTCACGCATCTGTTCATCGCGACGGAAGACGGCCTGCGCGAC
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1121:5939#TTAGGC/1
+SEQ: ACGCGTCACGCAAGAAGTCGACGAAGCCCGCAAGCAGATCGGCACGCTC
+SEQ_LEN: 49
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1122:20522#TTAGGC/1
+SEQ: TGCGGGTCGGTCTGGTCGTTGATTTGAGTGGACCAAGCGTGCTCAGGCGC
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1123:18898#TTAGGC/1
+SEQ: GACATCGTCCCACCGGCCAGCAAACCGAGCAGCATGATCGAGAATATCTC
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1124:3469#TTAGGC/1
+SEQ: GACATTGGCCGCGCGGTTTCCGTGGCTCAACAC
+SEQ_LEN: 33
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1125:12642#TTAGGC/1
+SEQ: CAATGCGCGCATGCTGTTGCGCGACGCGCCAGTGCTCGATCCGCACACGC
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1128:8823#TTAGGC/1
+SEQ: ATAGTCGCCGTTGCGCGCGGCTTCCGTAGGCATCGCCACGAGCAGAGTGT
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1130:18433#TTAGGC/1
+SEQ: CTCATGATCGCGTCGGTGTCCGACGAGTTCAACCCCTGCTCGTAACGATG
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1130:17193#TTAGGC/1
+SEQ: CACCTTCGCATGCGCTGATGAAAAAGAAACGCCTCACGCTTGGCGATCTG
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1131:6653#TTAGGC/1
+SEQ: TACGTGATGGTCCCGGAATGTCAACTGCGTGGCATAATTGACCGGACACG
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1132:15841#TTAGGC/1
+SEQ: CAAAGCAAGGAGACGCTGGGAGCCGCCGCCGCGTATCTGACCATGCCGCC
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1133:4812#TTAGGC/1
+SEQ: GTAATACGCCTTGCACAGCGCCACCTTGACCATCCGGCCGATCGCGCCGC
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1134:16540#TTAGGC/1
+SEQ: GCGCGTAGGGCGTCGCCTCTGAGAACTGCGGATCGGTGGCAGACAGCGAG
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1138:5468#TTAGGC/1
+SEQ: GGCTTCTCGAACTCAGGCTTCCCGGCGGCGCGGTCCGCGGCGGCCGCGTG
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1139:2772#TTAGGC/1
+SEQ: ATCGCGCGCGACTGGCCGATCAGACGCGGCAGGCGGATTGTGCTGCC
+SEQ_LEN: 47
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1139:4047#TTAGGC/1
+SEQ: GTCGAATCCGCCCGGCGGCGAACTCGACTGATTCGCCTCATCGCATGAA
+SEQ_LEN: 49
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1141:2098#TTAGGC/1
+SEQ: ATGCGTAGCTGATGGGAAGCAGGTCAATATTCCTGCACCAGTGTGAAATG
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1141:15959#TTAGGC/1
+SEQ: CTTCCGAGCAGTTCGGCGCGCTGGTGCCCATCTACGGCCCGATTGCGACG
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1141:6113#TTAGGC/1
+SEQ: ATTATCAGACGCTCGCGTTCGAGTATTACCGGACGCTGCGCGAAATGGCG
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1142:7568#TTAGGC/1
+SEQ: TAGTTCAGCGGCGCGGGAATCGTGCCCTGCACGTTCGTCATGTATTCGTG
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1143:7454#TTAGGC/1
+SEQ: ACGCGCCCCTCGCGCGGCGGCGGTGGCGCGGATGCGTCGGGCGCG
+SEQ_LEN: 45
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1144:9741#TTAGGC/1
+SEQ: CCACCCGGCCGTCAGTGGTGAAATGTGGCCGGATGTAGCGCTCGTAGTGT
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1144:3624#TTAGGC/1
+SEQ: CGCGAACGTGTCGAAGTGCCGGAATACCGACACGCTCTGATGCTCGAACG
+SEQ_LEN: 50
+---
+SEQ_NAME: ILLUMINA-52179E_0004:2:1:1145:11219#TTAGGC/1
+SEQ: CGCCTGGCTTTTATGCGGCGCTGCAGGCTATTGAAAGACTCGATCTACGC
+SEQ_LEN: 50
+---
diff --git a/bp_test/out/plot_distribution.out.1 b/bp_test/out/plot_distribution.out.1
new file mode 100644 (file)
index 0000000..ebe6291
--- /dev/null
@@ -0,0 +1,24 @@
+\f
+                                  Distribution
+       +             +            +            +            +             +
+  90 +++-------------+------------+------------+------------+-------------+++
+      |                                                                    |
+  80 ++                                                                  **++
+      |                                                                  **|
+  70 ++                                                                  **++
+  60 ++                                                                  **++
+      |                                                                  **|
+  50 ++                                                                  **++
+      |                                                                  **|
+  40 ++                                                                  **++
+      |                                                                  **|
+  30 ++                                                                  **++
+  20 ++                                                                  **++
+      |                                                                  **|
+  10 ++                                                                  **++
+      |                                                              ******|
+   0 +++-------------+------------+**--------**+--***-------+**--**********++
+       +             +            +            +            +             +
+       0             10           20           30           40            50
+                                     SEQ_LEN
+
diff --git a/bp_test/test/test_plot_distribution b/bp_test/test/test_plot_distribution
new file mode 100755 (executable)
index 0000000..768fab8
--- /dev/null
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+source "$BP_DIR/bp_test/lib/test.sh"
+
+run "$bp -k SEQ_LEN -I $in -o $tmp -x"
+assert_no_diff $tmp $out.1
+clean