3 # Copyright (C) 2007-2009 Martin A. Hansen.
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 # http://www.gnu.org/copyleft/gpl.html
22 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
25 # Analyze sequences in the stream.
28 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
33 use Maasha::Biopieces;
38 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
41 my ( $options, $in, $out, $record, $analysis );
43 $options = Maasha::Biopieces::parse_options();
45 $in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } );
46 $out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } );
48 while ( $record = Maasha::Biopieces::get_record( $in ) )
50 seq_analyze( $record ) if $record->{ "SEQ" };
52 Maasha::Biopieces::put_record( $record, $out );
55 Maasha::Biopieces::close_stream( $in );
56 Maasha::Biopieces::close_stream( $out );
59 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
64 # Martin A. Hansen, July 2009.
66 # Analyzes the composition of the string in the record and appends
67 # the analysis to the record.
69 my ( $record, # Biopiece record with a SEQ entry.
74 my ( %char_hash, @indels, @alph, $char, $gc, $at, $lc, $max, $indels );
76 %char_hash = Maasha::Common::str_analyze( $record->{ 'SEQ' } );
78 $record->{ 'SEQ_TYPE' } = Maasha::Seq::seq_guess_type( $record->{ 'SEQ' } );
79 $record->{ 'SEQ_LEN' } = length $record->{ 'SEQ' };
81 @alph = Maasha::Seq::seq_alph( $record->{ 'SEQ_TYPE' } . "_AMBI" );
82 @indels = qw( - ~ . _ );
86 foreach $char ( @alph )
88 $char_hash{ $char } += $char_hash{ lc $char } || 0;
90 $record->{ "RES[$char]" } = $char_hash{ $char };
92 $max = $char_hash{ $char } if $char_hash{ $char } > $max;
94 $record->{ "RES_SUM" } += $char_hash{ $char };
99 map { $record->{ "RES[$_]" } = $char_hash{ $_ }; $indels += $char_hash{ $_ } } @indels;
101 if ( $record->{ "SEQ_TYPE" } =~ /DNA|RNA/i )
103 $gc = $char_hash{ "G" } + $char_hash{ "C" };
104 $at = $char_hash{ "A" } + $char_hash{ "T" } + $char_hash{ "U" };
108 map { $lc += $char_hash{ lc $_ } || 0 } @alph;
110 $record->{ "MIX_INDEX" } = sprintf( "%.2f", $max / ( $record->{ "SEQ_LEN" } - $indels ) );
111 $record->{ "GC%" } = sprintf( "%.2f", 100 * $gc / ( $record->{ "SEQ_LEN" } - $indels ) );
112 $record->{ "SOFT_MASK%" } = sprintf( "%.2f", 100 * $lc / ( $record->{ "SEQ_LEN" } - $indels ) );
113 $record->{ "HARD_MASK%" } = sprintf( "%.2f", 100 * ( $char_hash{ "n" } + $char_hash{ "N" } ) / ( $record->{ "SEQ_LEN" } - $indels ) );
114 $record->{ "MELT_TEMP" } = sprintf( "%.2f", 4 * $gc + 2 * $at );
119 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
124 Maasha::Biopieces::status_set();
130 Maasha::Biopieces::status_log();
134 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<