From 9ea055e04d4639375c29e058b6d35d89913f173e Mon Sep 17 00:00:00 2001 From: martinahansen Date: Sun, 24 May 2009 15:48:16 +0000 Subject: [PATCH] magrated analyze biopieces git-svn-id: http://biopieces.googlecode.com/svn/trunk@396 74ccb610-7750-0410-82ae-013aeee3265d --- code_perl/Maasha/BioRun.pm | 201 ------------------------------------- code_perl/Maasha/UCSC.pm | 4 +- 2 files changed, 2 insertions(+), 203 deletions(-) diff --git a/code_perl/Maasha/BioRun.pm b/code_perl/Maasha/BioRun.pm index e5845d3..53c09c7 100644 --- a/code_perl/Maasha/BioRun.pm +++ b/code_perl/Maasha/BioRun.pm @@ -133,8 +133,6 @@ sub run_script elsif ( $script eq "assemble_tag_contigs" ) { script_assemble_tag_contigs( $in, $out, $options ) } elsif ( $script eq "length_seq" ) { script_length_seq( $in, $out, $options ) } elsif ( $script eq "uppercase_seq" ) { script_uppercase_seq( $in, $out, $options ) } - elsif ( $script eq "analyze_seq" ) { script_analyze_seq( $in, $out, $options ) } - elsif ( $script eq "analyze_tags" ) { script_analyze_tags( $in, $out, $options ) } elsif ( $script eq "complexity_seq" ) { script_complexity_seq( $in, $out, $options ) } elsif ( $script eq "oligo_freq" ) { script_oligo_freq( $in, $out, $options ) } elsif ( $script eq "create_weight_matrix" ) { script_create_weight_matrix( $in, $out, $options ) } @@ -184,8 +182,6 @@ sub run_script elsif ( $script eq "plot_matches" ) { script_plot_matches( $in, $out, $options ) } elsif ( $script eq "plot_seqlogo" ) { script_plot_seqlogo( $in, $out, $options ) } elsif ( $script eq "plot_phastcons_profiles" ) { script_plot_phastcons_profiles( $in, $out, $options ) } - elsif ( $script eq "analyze_bed" ) { script_analyze_bed( $in, $out, $options ) } - elsif ( $script eq "analyze_vals" ) { script_analyze_vals( $in, $out, $options ) } elsif ( $script eq "length_vals" ) { script_length_vals( $in, $out, $options ) } elsif ( $script eq "sum_vals" ) { script_sum_vals( $in, $out, $options ) } elsif ( $script eq "mean_vals" ) { script_mean_vals( $in, $out, $options ) } @@ -561,13 +557,6 @@ sub get_options ylabel|Y=s ); } - elsif ( $script eq "analyze_vals" ) - { - @options = qw( - no_stream|x - keys|k=s - ); - } elsif ( $script eq "head_records" ) { @options = qw( @@ -1824,83 +1813,6 @@ sub script_length_seq } -sub script_analyze_seq -{ - # Martin A. Hansen, August 2007. - - # Analyze sequence composition of sequences in stream. - - my ( $in, # handle to in stream - $out, # handle to out stream - ) = @_; - - # Returns nothing. - - my ( $record, $analysis ); - - while ( $record = Maasha::Biopieces::get_record( $in ) ) - { - if ( $record->{ "SEQ" } ) - { - $analysis = Maasha::Seq::seq_analyze( $record->{ "SEQ" } ); - - map { $record->{ $_ } = $analysis->{ $_ } } keys %{ $analysis }; - } - - Maasha::Biopieces::put_record( $record, $out ); - } -} - - -sub script_analyze_tags -{ - # Martin A. Hansen, August 2008. - - # Analyze sequence tags in stream. - - my ( $in, # handle to in stream - $out, # handle to out stream - ) = @_; - - # Returns nothing. - - my ( $record, $analysis, %len_hash, %clone_hash, $clones, $key, $tag_record ); - - while ( $record = Maasha::Biopieces::get_record( $in ) ) - { - if ( $record->{ "SEQ_NAME" } and $record->{ "SEQ" } ) - { - if ( $record->{ "SEQ_NAME" } =~ /_(\d+)$/ ) - { - $clones = $1; - - $len_hash{ length( $record->{ "SEQ" } ) }++; - $clone_hash{ length( $record->{ "SEQ" } ) } += $clones; - } - } - elsif ( $record->{ "Q_ID" } and $record->{ "BED_LEN" } ) - { - if ( $record->{ "Q_ID" } =~ /_(\d+)$/ ) - { - $clones = $1; - - $len_hash{ $record->{ "BED_LEN" } }++; - $clone_hash{ $record->{ "BED_LEN" } } += $clones; - } - } - } - - foreach $key ( sort { $a <=> $b } keys %len_hash ) - { - $tag_record->{ "TAG_LEN" } = $key; - $tag_record->{ "TAG_COUNT" } = $len_hash{ $key }; - $tag_record->{ "TAG_CLONES" } = $clone_hash{ $key }; - - Maasha::Biopieces::put_record( $tag_record, $out ); - } -} - - sub script_complexity_seq { # Martin A. Hansen, May 2008. @@ -3294,119 +3206,6 @@ sub script_plot_phastcons_profiles } -sub script_analyze_bed -{ - # Martin A. Hansen, March 2008. - - # Analyze BED entries in stream. - - my ( $in, # handle to in stream - $out, # handle to out stream - $options, # options hash - ) = @_; - - # Returns nothing. - - my ( $record ); - - while ( $record = Maasha::Biopieces::get_record( $in ) ) - { - $record = Maasha::UCSC::bed_analyze( $record ) if $record->{ "REC_TYPE" } eq "BED"; - - Maasha::Biopieces::put_record( $record, $out ); - } -} - - -sub script_analyze_vals -{ - # Martin A. Hansen, August 2007. - - # Analyze values for given keys in stream. - - my ( $in, # handle to in stream - $out, # handle to out stream - $options, # options hash - ) = @_; - - # Returns nothing. - - my ( $record, $key, @keys, %key_hash, $analysis, $len ); - - map { $key_hash{ $_ } = 1 } @{ $options->{ "keys" } }; - - while ( $record = Maasha::Biopieces::get_record( $in ) ) - { - foreach $key ( keys %{ $record } ) - { - next if $options->{ "keys" } and not exists $key_hash{ $key }; - - $analysis->{ $key }->{ "COUNT" }++; - - if ( Maasha::Calc::is_a_number( $record->{ $key } ) ) - { - $analysis->{ $key }->{ "TYPE" } = "num"; - $analysis->{ $key }->{ "SUM" } += $record->{ $key }; - $analysis->{ $key }->{ "MAX" } = $record->{ $key } if $record->{ $key } > $analysis->{ $key }->{ "MAX" } or not $analysis->{ $key }->{ "MAX" }; - $analysis->{ $key }->{ "MIN" } = $record->{ $key } if $record->{ $key } < $analysis->{ $key }->{ "MIN" } or not $analysis->{ $key }->{ "MIN" }; - } - else - { - $len = length $record->{ $key }; - - $analysis->{ $key }->{ "TYPE" } = "alph"; - $analysis->{ $key }->{ "SUM" } += $len; - $analysis->{ $key }->{ "MAX" } = $len if $len > $analysis->{ $key }->{ "MAX" } or not $analysis->{ $key }->{ "MAX" }; - $analysis->{ $key }->{ "MIN" } = $len if $len < $analysis->{ $key }->{ "MIM" } or not $analysis->{ $key }->{ "MIN" }; - } - } - - Maasha::Biopieces::put_record( $record, $out ) if not $options->{ "no_stream" }; - } - - foreach $key ( keys %{ $analysis } ) - { - $analysis->{ $key }->{ "MEAN" } = sprintf "%.2f", $analysis->{ $key }->{ "SUM" } / $analysis->{ $key }->{ "COUNT" }; - $analysis->{ $key }->{ "SUM" } = sprintf "%.2f", $analysis->{ $key }->{ "SUM" }; - } - - my ( $keys, $types, $counts, $mins, $maxs, $sums, $means ); - - $keys = "KEY "; - $types = "TYPE "; - $counts = "COUNT"; - $mins = "MIN "; - $maxs = "MAX "; - $sums = "SUM "; - $means = "MEAN "; - - if ( $options->{ "keys" } ) { - @keys = @{ $options->{ "keys" } }; - } else { - @keys = keys %{ $analysis }; - } - - foreach $key ( @keys ) - { - $keys .= sprintf "% 15s", $key; - $types .= sprintf "% 15s", $analysis->{ $key }->{ "TYPE" }; - $counts .= sprintf "% 15s", $analysis->{ $key }->{ "COUNT" }; - $mins .= sprintf "% 15s", $analysis->{ $key }->{ "MIN" }; - $maxs .= sprintf "% 15s", $analysis->{ $key }->{ "MAX" }; - $sums .= sprintf "% 15s", $analysis->{ $key }->{ "SUM" }; - $means .= sprintf "% 15s", $analysis->{ $key }->{ "MEAN" }; - } - - print $out "$keys\n"; - print $out "$types\n"; - print $out "$counts\n"; - print $out "$mins\n"; - print $out "$maxs\n"; - print $out "$sums\n"; - print $out "$means\n"; -} - - sub script_head_records { # Martin A. Hansen, August 2007. diff --git a/code_perl/Maasha/UCSC.pm b/code_perl/Maasha/UCSC.pm index 179b8f1..e4c275a 100644 --- a/code_perl/Maasha/UCSC.pm +++ b/code_perl/Maasha/UCSC.pm @@ -378,7 +378,7 @@ sub bed_analyze for ( $i = 0; $i < $entry->{ "BLOCK_COUNT" }; $i++ ) { - $exon_len = @lens[ $i ]; + $exon_len = $lens[ $i ]; $entry->{ "EXON_LEN_$i" } = $exon_len; @@ -400,7 +400,7 @@ sub bed_analyze { for ( $i = 1; $i < $entry->{ "BLOCK_COUNT" }; $i++ ) { - $intron_len = @begs[ $i ] - ( @begs[ $i - 1 ] + @lens[ $i - 1 ] ); + $intron_len = $begs[ $i ] - ( $begs[ $i - 1 ] + $lens[ $i - 1 ] ); $entry->{ "INTRON_LEN_" . ( $i - 1 ) } = $intron_len; -- 2.39.5