X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=code_perl%2FMaasha%2FSeq.pm;h=4d6c7997665eb2ff19d4e2e44b6c035de841dbd4;hb=2f10201d21af91034ac31a45177de90d4ccef43b;hp=12839653b5fbb4dec3b992401c22eda77edce6b9;hpb=288c4e7fc6fedad07c7297dfb14be17c6a7fa364;p=biopieces.git diff --git a/code_perl/Maasha/Seq.pm b/code_perl/Maasha/Seq.pm index 1283965..4d6c799 100644 --- a/code_perl/Maasha/Seq.pm +++ b/code_perl/Maasha/Seq.pm @@ -28,6 +28,7 @@ package Maasha::Seq; # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +use warnings; use strict; use POSIX qw( islower ); use Maasha::Common; @@ -64,11 +65,11 @@ sub seq_guess_type } if ( $count = $check_seq =~ tr/FLPQIEflpqie// and $count > 0 ) { - return "protein"; + return "PROTEIN"; } elsif ( $count = $check_seq =~ tr/Uu// and $count > 0 ) { - return "rna"; + return "RNA"; } else { - return "dna"; + return "DNA"; } } @@ -779,6 +780,56 @@ sub seq_generate } +sub seq_insert +{ + # Martin A. Hansen, June 2009. + + # Randomly duplicates a given number of residues in a given sequence. + + my ( $seq, # sequence to mutate + $insertions, # number of residues to insert + ) = @_; + + # Returns a string. + + my ( $i, $pos ); + + for ( $i = 0; $i < $insertions; $i++ ) + { + $pos = int( rand( length $seq ) ); + + substr $seq, $pos, 0, substr $seq , $pos, 1; + } + + return $seq; +} + + +sub seq_delete +{ + # Martin A. Hansen, June 2009. + + # Randomly deletes a given number of residues from a given sequence. + + my ( $seq, # sequence to mutate + $deletions, # number of residues to delete + ) = @_; + + # Returns a string. + + my ( $i, $pos ); + + for ( $i = 0; $i < $deletions; $i++ ) + { + $pos = int( rand( length $seq ) ); + + substr $seq, $pos, 1, ''; + } + + return $seq; +} + + sub seq_mutate { # Martin A. Hansen, June 2009. @@ -863,26 +914,30 @@ sub seq_alph { # Martin A. Hansen, May 2007. - # returns a requested alphabet + # Returns a requested sequence alphabet. my ( $type, # alphabet type ) = @_; # returns list - my ( @alph ); + my ( %alph_hash, $alph ); + + %alph_hash = ( + DNA => [ qw(A T C G) ], + DNA_AMBI => [ qw(A G C U T R Y W S M K H D V B N) ], + RNA => [ qw(A U C G) ], + RNA_AMBI => [ qw(A G C U T R Y W S M K H D V B N) ], + PROTEIN => [ qw(F L S Y C W P H Q R I M T N K V A D E G) ], + ); - if ( $type =~ /^dna$/i ) { - @alph = qw( A T C G ); - } elsif ( $type =~ /^rna$/i ) { - @alph = qw( A U C G ); - } elsif ( $type =~ /^prot/i ) { - @alph = qw( F L S Y C W P H Q R I M T N K V A D E G ); + if ( exists $alph_hash{ uc $type } ) { + $alph = $alph_hash{ uc $type }; } else { die qq(ERROR: Unknown alphabet type: "$type"\n); } - return wantarray ? @alph : \@alph; + return wantarray ? @{ $alph } : $alph; } @@ -899,7 +954,7 @@ sub seq_analyze my ( %analysis, @chars, @chars_lc, $char, %char_hash, $gc, $at, $lc, $max, $res_sum, @indels, %indel_hash ); - $analysis{ "SEQ_TYPE" } = uc Maasha::Seq::seq_guess_type( $seq ); + $analysis{ "SEQ_TYPE" } = Maasha::Seq::seq_guess_type( $seq ); $analysis{ "SEQ_LEN" } = length $seq; @indels = qw( - ~ . _ ); @@ -954,7 +1009,7 @@ sub seq_analyze map { $analysis{ "RES[$_]" } = $indel_hash{ $_ } } @indels; $analysis{ "MIX_INDEX" } = sprintf( "%.2f", $max / $analysis{ "SEQ_LEN" } ); - $analysis{ "MELT_TEMP" } = sprintf( "%.2f", 4 * $gc + 2 * $at ); + #$analysis{ "MELT_TEMP" } = sprintf( "%.2f", 4 * $gc + 2 * $at ); return wantarray ? %analysis : \%analysis; }