-#!/usr/bin/env perl -w
+#!/usr/bin/env perl
# Copyright (C) 2007-2009 Martin A. Hansen.
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-# Analyze BED entries in the stream.
+# Analyze sequences in the stream.
+
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+use warnings;
use strict;
use Maasha::Biopieces;
+use Maasha::Common;
use Maasha::Seq;
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-my ( $run_time_beg, $run_time_end, $options, $in, $out, $record, $analysis );
+my ( $options, $in, $out, $record, $analysis );
$options = Maasha::Biopieces::parse_options();
while ( $record = Maasha::Biopieces::get_record( $in ) )
{
- if ( $record->{ "SEQ" } )
+ seq_analyze( $record ) if $record->{ "SEQ" };
+
+ Maasha::Biopieces::put_record( $record, $out );
+}
+
+Maasha::Biopieces::close_stream( $in );
+Maasha::Biopieces::close_stream( $out );
+
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+
+sub seq_analyze
+{
+ # Martin A. Hansen, July 2009.
+
+ # Analyzes the composition of the string in the record and appends
+ # the analysis to the record.
+
+ my ( $record, # Biopiece record with a SEQ entry.
+ ) = @_;
+
+ # Returns nothing.
+
+ my ( %char_hash, @indels, @alph, $char, $gc, $at, $lc, $max, $indels );
+
+ %char_hash = Maasha::Common::str_analyze( $record->{ 'SEQ' } );
+
+ $record->{ 'SEQ_TYPE' } = Maasha::Seq::seq_guess_type( $record->{ 'SEQ' } );
+ $record->{ 'SEQ_LEN' } = length $record->{ 'SEQ' };
+
+ @alph = Maasha::Seq::seq_alph( $record->{ 'SEQ_TYPE' } . "_AMBI" );
+ @indels = qw( - ~ . _ );
+
+ $max = 0;
+
+ foreach $char ( @alph )
{
- $analysis = Maasha::Seq::seq_analyze( $record->{ "SEQ" } );
+ $char_hash{ $char } += $char_hash{ lc $char } || 0;
+
+ $record->{ "RES[$char]" } = $char_hash{ $char };
+
+ $max = $char_hash{ $char } if $char_hash{ $char } > $max;
- map { $record->{ $_ } = $analysis->{ $_ } } keys %{ $analysis };
+ $record->{ "RES_SUM" } += $char_hash{ $char };
}
- Maasha::Biopieces::put_record( $record, $out );
+ $indels = 0;
+
+ map { $record->{ "RES[$_]" } = $char_hash{ $_ }; $indels += $char_hash{ $_ } } @indels;
+
+ if ( $record->{ "SEQ_TYPE" } =~ /DNA|RNA/i )
+ {
+ $gc = $char_hash{ "G" } + $char_hash{ "C" };
+ $at = $char_hash{ "A" } + $char_hash{ "T" } + $char_hash{ "U" };
+
+ $lc = 0;
+
+ map { $lc += $char_hash{ lc $_ } || 0 } @alph;
+
+ $record->{ "MIX_INDEX" } = sprintf( "%.2f", $max / ( $record->{ "SEQ_LEN" } - $indels ) );
+ $record->{ "GC%" } = sprintf( "%.2f", 100 * $gc / ( $record->{ "SEQ_LEN" } - $indels ) );
+ $record->{ "SOFT_MASK%" } = sprintf( "%.2f", 100 * $lc / ( $record->{ "SEQ_LEN" } - $indels ) );
+ $record->{ "HARD_MASK%" } = sprintf( "%.2f", 100 * ( $char_hash{ "n" } + $char_hash{ "N" } ) / ( $record->{ "SEQ_LEN" } - $indels ) );
+ $record->{ "MELT_TEMP" } = sprintf( "%.2f", 4 * $gc + 2 * $at );
+ }
}
BEGIN
{
- $run_time_beg = Maasha::Biopieces::run_time();
-
- Maasha::Biopieces::log_biopiece();
+ Maasha::Biopieces::status_set();
}
END
{
- Maasha::Biopieces::close_stream( $in );
- Maasha::Biopieces::close_stream( $out );
-
- $run_time_end = Maasha::Biopieces::run_time();
-
- Maasha::Biopieces::run_time_print( $run_time_beg, $run_time_end, $options );
+ Maasha::Biopieces::status_log();
}
__END__
-