X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bp_bin%2Fanalyze_vals;h=726c91c53c62f5c22560b77260f45d094340b018;hb=f4d855b1960c1ee94da3e5300babf4939a16e57d;hp=c8c800fbc01100b8cd9e82c26358933e470ca42e;hpb=f246d97ce9ddc906801ed22b4e79567dc8914c45;p=biopieces.git diff --git a/bp_bin/analyze_vals b/bp_bin/analyze_vals index c8c800f..726c91c 100755 --- a/bp_bin/analyze_vals +++ b/bp_bin/analyze_vals @@ -1,6 +1,161 @@ #!/usr/bin/env perl +# Copyright (C) 2007-2009 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# Determine type, count, min, max, sum and mean for values in stream. + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + use warnings; use strict; +use Maasha::Biopieces; +use Maasha::Calc; +use Data::Dumper; + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +my ( $options, $in, $out, $record, $analysis, $key, $len, %skip_hash, $data_out, + %key_hash, $skip, $keys, $types, $counts, $mins, $maxs, $sums, $means ); + +$options = Maasha::Biopieces::parse_options( + [ + { long => 'no_stream', short => 'x', type => 'flag', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'data_out', short => 'o', type => 'file', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'keys', short => 'k', type => 'list', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + { long => 'no_keys', short => 'K', type => 'list', mandatory => 'no', default => undef, allowed => undef, disallowed => undef }, + ] +); + +$in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } ); +$out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } ); + +$data_out = Maasha::Biopieces::write_stream( $options->{ "data_out" }, $options->{ "compress" } ); +$data_out ||= \*STDOUT; + +map { $skip_hash{ $_ } = 1 } @{ $options->{ "no_keys" } }; +map { $key_hash{ $_ } = 1; $skip = 1 } @{ $options->{ "keys" } }; + +while ( $record = Maasha::Biopieces::get_record( $in ) ) +{ + foreach $key ( keys %{ $record } ) + { + next if $skip and not exists $key_hash{ $key }; + next if $skip_hash{ $key }; + + if ( Maasha::Calc::is_a_number( $record->{ $key } ) ) + { + if ( not exists $analysis->{ $key } ) + { + $analysis->{ $key }->{ "MIN" } = $record->{ $key }; + $analysis->{ $key }->{ "MAX" } = $record->{ $key }; + } + + $analysis->{ $key }->{ "MAX" } = Maasha::Calc::max( $analysis->{ $key }->{ "MAX" }, $record->{ $key } ); + $analysis->{ $key }->{ "MIN" } = Maasha::Calc::min( $analysis->{ $key }->{ "MIN" }, $record->{ $key } ); + + $analysis->{ $key }->{ "TYPE" } = "num"; + $analysis->{ $key }->{ "SUM" } += $record->{ $key }; + } + else + { + $len = length $record->{ $key }; + + if ( not exists $analysis->{ $key } ) + { + $analysis->{ $key }->{ "MIN" } = $len; + $analysis->{ $key }->{ "MAX" } = $len; + } + + $analysis->{ $key }->{ "MAX" } = Maasha::Calc::max( $analysis->{ $key }->{ "MAX" }, $len ); + $analysis->{ $key }->{ "MIN" } = Maasha::Calc::min( $analysis->{ $key }->{ "MIN" }, $len ); + + $analysis->{ $key }->{ "TYPE" } = "alph"; + $analysis->{ $key }->{ "SUM" } += $len; + } + + $analysis->{ $key }->{ "COUNT" }++; + } + + Maasha::Biopieces::put_record( $record, $out ) if not $options->{ "no_stream" }; +} + +foreach $key ( keys %{ $analysis } ) +{ + $analysis->{ $key }->{ "MEAN" } = sprintf "%.2f", $analysis->{ $key }->{ "SUM" } / $analysis->{ $key }->{ "COUNT" }; + $analysis->{ $key }->{ "SUM" } = sprintf "%.2f", $analysis->{ $key }->{ "SUM" }; +} + +$keys = "KEY "; +$types = "TYPE "; +$counts = "COUNT"; +$mins = "MIN "; +$maxs = "MAX "; +$sums = "SUM "; +$means = "MEAN "; + +foreach $key ( sort keys %{ $analysis } ) +{ + $keys .= sprintf "% 15s", $key; + $types .= sprintf "% 15s", $analysis->{ $key }->{ "TYPE" }; + $counts .= sprintf "% 15s", $analysis->{ $key }->{ "COUNT" }; + $mins .= sprintf "% 15s", $analysis->{ $key }->{ "MIN" }; + $maxs .= sprintf "% 15s", $analysis->{ $key }->{ "MAX" }; + $sums .= sprintf "% 15s", $analysis->{ $key }->{ "SUM" }; + $means .= sprintf "% 15s", $analysis->{ $key }->{ "MEAN" }; +} + +print $data_out "$keys\n"; +print $data_out "$types\n"; +print $data_out "$counts\n"; +print $data_out "$mins\n"; +print $data_out "$maxs\n"; +print $data_out "$sums\n"; +print $data_out "$means\n"; + +close $data_out; + +Maasha::Biopieces::close_stream( $in ); +Maasha::Biopieces::close_stream( $out ); + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +BEGIN +{ + Maasha::Biopieces::status_set(); +} + + +END +{ + Maasha::Biopieces::status_log(); +} + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + -use Maasha::Biotools; +__END__