#!/usr/bin/env perl # Copyright (C) 2007-2009 Martin A. Hansen. # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # http://www.gnu.org/copyleft/gpl.html # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< # Analyze sequences in the stream. # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< use warnings; use strict; use Maasha::Biopieces; use Maasha::Common; use Maasha::Seq; # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< my ( $options, $in, $out, $record, $analysis ); $options = Maasha::Biopieces::parse_options(); $in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } ); $out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } ); while ( $record = Maasha::Biopieces::get_record( $in ) ) { seq_analyze( $record ) if $record->{ "SEQ" }; Maasha::Biopieces::put_record( $record, $out ); } Maasha::Biopieces::close_stream( $in ); Maasha::Biopieces::close_stream( $out ); # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< sub seq_analyze { # Martin A. Hansen, July 2009. # Analyzes the composition of the string in the record and appends # the analysis to the record. my ( $record, # Biopiece record with a SEQ entry. ) = @_; # Returns nothing. my ( %char_hash, @indels, @alph, $char, $gc, $at, $lc, $max, $indels ); %char_hash = Maasha::Common::str_analyze( $record->{ 'SEQ' } ); $record->{ 'SEQ_TYPE' } = Maasha::Seq::seq_guess_type( $record->{ 'SEQ' } ); $record->{ 'SEQ_LEN' } = length $record->{ 'SEQ' }; @alph = Maasha::Seq::seq_alph( $record->{ 'SEQ_TYPE' } . "_AMBI" ); @indels = qw( - ~ . _ ); $max = 0; foreach $char ( @alph ) { $char_hash{ $char } += $char_hash{ lc $char } || 0; $record->{ "RES[$char]" } = $char_hash{ $char }; $max = $char_hash{ $char } if $char_hash{ $char } > $max; $record->{ "RES_SUM" } += $char_hash{ $char }; } $indels = 0; map { $record->{ "RES[$_]" } = $char_hash{ $_ }; $indels += $char_hash{ $_ } } @indels; if ( $record->{ "SEQ_TYPE" } =~ /DNA|RNA/i ) { $gc = $char_hash{ "G" } + $char_hash{ "C" }; $at = $char_hash{ "A" } + $char_hash{ "T" } + $char_hash{ "U" }; $lc = 0; map { $lc += $char_hash{ lc $_ } || 0 } @alph; $record->{ "MIX_INDEX" } = sprintf( "%.2f", $max / ( $record->{ "SEQ_LEN" } - $indels ) ); $record->{ "GC%" } = sprintf( "%.2f", 100 * $gc / ( $record->{ "SEQ_LEN" } - $indels ) ); $record->{ "SOFT_MASK%" } = sprintf( "%.2f", 100 * $lc / ( $record->{ "SEQ_LEN" } - $indels ) ); $record->{ "HARD_MASK%" } = sprintf( "%.2f", 100 * ( $char_hash{ "n" } + $char_hash{ "N" } ) / ( $record->{ "SEQ_LEN" } - $indels ) ); $record->{ "MELT_TEMP" } = sprintf( "%.2f", 4 * $gc + 2 * $at ); } } # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< BEGIN { Maasha::Biopieces::status_set(); } END { Maasha::Biopieces::status_log(); } # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< __END__