X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bp_bin%2Fanalyze_vals;h=40dd0e0239737e08b16eb75d815f6445a8ed7a72;hb=5de6112b70b59420b245ce636a8b2e3c90acbe00;hp=aa9d12ae536b280e196598fd54a03cbc03caad2b;hpb=4c574917dc12688ccd8899c48f0635a82e6617d1;p=biopieces.git diff --git a/bp_bin/analyze_vals b/bp_bin/analyze_vals index aa9d12a..40dd0e0 100755 --- a/bp_bin/analyze_vals +++ b/bp_bin/analyze_vals @@ -31,8 +31,10 @@ require 'maasha/biopieces' casts = [] -casts << {long: 'keys', short: 'k', type: 'list', mandatory: false, default: nil, allowed: nil, disallowed: nil} -casts << {long: 'no_keys', short: 'K', type: 'list', mandatory: false, default: nil, allowed: nil, disallowed: nil} +casts << {long: 'keys', short: 'k', type: 'list', mandatory: false, default: nil, allowed: nil, disallowed: nil} +casts << {long: 'no_keys', short: 'K', type: 'list', mandatory: false, default: nil, allowed: nil, disallowed: nil} +casts << {long: 'no_stream', short: 'x', type: 'flag', mandatory: false, default: nil, allowed: nil, disallowed: nil} +casts << {long: 'data_out', short: 'o', type: 'file', mandatory: false, default: nil, allowed: nil, disallowed: nil} options = Biopieces.options_parse(ARGV, casts) @@ -74,23 +76,31 @@ Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| key = key.to_sym value = record[key] - stats[key][:sum] ||= 0 - stats[key][:count] ||= 0 - - if num = to_number(value) - stats[key][:min] = min(stats[key][:min], num) - stats[key][:max] = max(stats[key][:max], num) - stats[key][:type] = :numeric - stats[key][:sum] += num - else - stats[key][:min] = min(stats[key][:min], value.length) - stats[key][:max] = max(stats[key][:max], value.length) - stats[key][:type] = :alphabetic - stats[key][:sum] += value.length + if value + stats[key][:sum] ||= 0 + stats[key][:count] ||= 0 + + if num = to_number(value) + stats[key][:min] = min(stats[key][:min], num) + stats[key][:max] = max(stats[key][:max], num) + stats[key][:type] = :numeric + stats[key][:sum] += num + else + stats[key][:min] = min(stats[key][:min], value.length) + stats[key][:max] = max(stats[key][:max], value.length) + stats[key][:type] = :alphabetic + stats[key][:sum] += value.length + end + + stats[key][:count] += 1 end - - stats[key][:count] += 1 end + + output.puts record unless options[:no_stream] + end + + if options[:data_out] + data_out = File.open(options[:data_out], 'w') end stats.each do |key, value| @@ -98,13 +108,17 @@ Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| :KEY => key, :TYPE => value[:type].to_s.capitalize, :COUNT => value[:count], - :MIN => value[:min], - :MAX => value[:max], - :SUM => value[:sum], - :MEAN => (value[:sum] / value[:count].to_f).round(1) + :MIN => value[:min].is_a?(Float) ? "%0.2f" % value[:min] : value[:min], + :MAX => value[:max].is_a?(Float) ? "%0.2f" % value[:max] : value[:max], + :SUM => value[:sum].is_a?(Float) ? "%0.2f" % value[:sum] : value[:sum], + :MEAN => "%0.2f" % (value[:sum] / value[:count].to_f) } - output.puts stat_record + if options[:data_out] + data_out.puts stat_record + else + output.puts stat_record + end end end