X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bp_bin%2Fanalyze_seq;h=f54b9054ea8d96862d77298fdd11b5770c3755b8;hb=d3524a295189338df71482298e67d11587375420;hp=14188894cdbd07a88da5282a3b12b108518b923a;hpb=ad46292d0fd8f1d757ae876d5c9771964bce27e7;p=biopieces.git diff --git a/bp_bin/analyze_seq b/bp_bin/analyze_seq index 1418889..f54b905 100755 --- a/bp_bin/analyze_seq +++ b/bp_bin/analyze_seq @@ -1,6 +1,6 @@ -#!/usr/bin/env perl +#!/usr/bin/env ruby -# Copyright (C) 2007-2009 Martin A. Hansen. +# Copyright (C) 2007-2010 Martin A. Hansen. # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -18,117 +18,42 @@ # http://www.gnu.org/copyleft/gpl.html - -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - - -# Analyze sequences in the stream. - - # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +# This program is part of the Biopieces framework (www.biopieces.org). -use warnings; -use strict; -use Maasha::Biopieces; -use Maasha::Common; -use Maasha::Seq; - - -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - - -my ( $options, $in, $out, $record, $analysis ); - -$options = Maasha::Biopieces::parse_options(); - -$in = Maasha::Biopieces::read_stream( $options->{ "stream_in" } ); -$out = Maasha::Biopieces::write_stream( $options->{ "stream_out" } ); - -while ( $record = Maasha::Biopieces::get_record( $in ) ) -{ - seq_analyze( $record ) if $record->{ "SEQ" }; - - Maasha::Biopieces::put_record( $record, $out ); -} - -Maasha::Biopieces::close_stream( $in ); -Maasha::Biopieces::close_stream( $out ); +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +# Analyze sequences in the stream. # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -sub seq_analyze -{ - # Martin A. Hansen, July 2009. +require 'maasha/biopieces' +require 'maasha/seq' - # Analyzes the composition of the string in the record and appends - # the analysis to the record. +casts = [] - my ( $record, # Biopiece record with a SEQ entry. - ) = @_; - - # Returns nothing. - - my ( %char_hash, @indels, @alph, $char, $gc, $at, $lc, $max, $indels ); - - %char_hash = Maasha::Common::str_analyze( $record->{ 'SEQ' } ); - - $record->{ 'SEQ_TYPE' } = Maasha::Seq::seq_guess_type( $record->{ 'SEQ' } ); - $record->{ 'SEQ_LEN' } = length $record->{ 'SEQ' }; - - @alph = Maasha::Seq::seq_alph( $record->{ 'SEQ_TYPE' } . "_AMBI" ); - @indels = qw( - ~ . _ ); - - $max = 0; - - foreach $char ( @alph ) - { - $char_hash{ $char } += $char_hash{ lc $char } || 0; - - $record->{ "RES[$char]" } = $char_hash{ $char }; - - $max = $char_hash{ $char } if $char_hash{ $char } > $max; - - $record->{ "RES_SUM" } += $char_hash{ $char }; - } - - $indels = 0; - - map { $record->{ "RES[$_]" } = $char_hash{ $_ }; $indels += $char_hash{ $_ } } @indels; - - if ( $record->{ "SEQ_TYPE" } =~ /DNA|RNA/i ) - { - $gc = $char_hash{ "G" } + $char_hash{ "C" }; - $at = $char_hash{ "A" } + $char_hash{ "T" } + $char_hash{ "U" }; - - $lc = 0; - - map { $lc += $char_hash{ lc $_ } || 0 } @alph; - - $record->{ "MIX_INDEX" } = sprintf( "%.2f", $max / ( $record->{ "SEQ_LEN" } - $indels ) ); - $record->{ "GC%" } = sprintf( "%.2f", 100 * $gc / ( $record->{ "SEQ_LEN" } - $indels ) ); - $record->{ "SOFT_MASK%" } = sprintf( "%.2f", 100 * $lc / ( $record->{ "SEQ_LEN" } - $indels ) ); - $record->{ "HARD_MASK%" } = sprintf( "%.2f", 100 * ( $char_hash{ "n" } + $char_hash{ "N" } ) / ( $record->{ "SEQ_LEN" } - $indels ) ); - $record->{ "MELT_TEMP" } = sprintf( "%.2f", 4 * $gc + 2 * $at ); - } -} - - -# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +options = Biopieces.options_parse(ARGV, casts) +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + input.each_record do |record| + if record.has_key? :SEQ + seq = Seq.new(record[:SEQ_NAME], record[:SEQ], record[:SEQ_TYPE], record[:SCORE]) + comp = seq.composition -BEGIN -{ - Maasha::Biopieces::status_set(); -} + comp.each_pair do |key,val| + record["RES[#{key}]"] = val + end + record["SOFT_MASK%"] = seq.soft_mask + record["HARD_MASK%"] = (comp["N"].to_f / (seq.len - seq.indels).to_f * 100.0).round(2) + record["GC%"] = ((comp["G"] + comp["C"]).to_f / (seq.len - seq.indels).to_f * 100.0).round(2) + end -END -{ - Maasha::Biopieces::status_log(); -} + output.puts record + end +end # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<