3 # Copyright (C) 2007-2013 Martin A. Hansen.
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 # http://www.gnu.org/copyleft/gpl.html
21 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
23 # This program is part of the Biopieces framework (www.biopieces.org).
25 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
27 # Determine basic stats for records in the stream.
29 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
31 require 'maasha/biopieces'
34 casts << {long: 'keys', short: 'k', type: 'list', mandatory: false, default: nil, allowed: nil, disallowed: nil}
35 casts << {long: 'no_keys', short: 'K', type: 'list', mandatory: false, default: nil, allowed: nil, disallowed: nil}
37 options = Biopieces.options_parse(ARGV, casts)
39 stats = Hash.new { |h, k| h[k] = {} }
42 begin num = Integer(value)
45 begin num = Float(value)
52 def min(value1, value2)
53 return value2 unless value1
57 def max(value1, value2)
58 return value2 unless value1
62 keys = options[:keys].map { |k| k.to_sym } if options[:keys]
63 no_keys = options[:no_keys].each_with_object({}) { |i, h| h[i.to_sym] = true } if options[:no_keys]
66 Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
67 input.each do |record|
69 get_keys = keys || record.keys
70 get_keys.reject! { |k| no_keys[k] } if no_keys
73 get_keys.each do |key|
77 stats[key][:sum] ||= 0
78 stats[key][:count] ||= 0
80 if num = to_number(value)
81 stats[key][:min] = min(stats[key][:min], num)
82 stats[key][:max] = max(stats[key][:max], num)
83 stats[key][:type] = :numeric
84 stats[key][:sum] += num
86 stats[key][:min] = min(stats[key][:min], value.length)
87 stats[key][:max] = max(stats[key][:max], value.length)
88 stats[key][:type] = :alphabetic
89 stats[key][:sum] += value.length
92 stats[key][:count] += 1
96 stats.each do |key, value|
99 :TYPE => value[:type].to_s.capitalize,
100 :COUNT => value[:count],
101 :MIN => value[:min].is_a?(Float) ? "%0.2f" % value[:min] : value[:min],
102 :MAX => value[:max].is_a?(Float) ? "%0.2f" % value[:max] : value[:max],
103 :SUM => value[:sum].is_a?(Float) ? "%0.2f" % value[:sum] : value[:sum],
104 :MEAN => "%0.2f" % (value[:sum] / value[:count].to_f)
107 output.puts stat_record
112 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<