added missing files

[biopieces.git] / code_perl / Maasha / Seq.pm
diff --git a/code_perl/Maasha/Seq.pm b/code_perl/Maasha/Seq.pm

index 12839653b5fbb4dec3b992401c22eda77edce6b9..4d6c7997665eb2ff19d4e2e44b6c035de841dbd4 100644 (file)
--- a/code_perl/Maasha/Seq.pm
+++ b/code_perl/Maasha/Seq.pm
@@ -28,6 +28,7 @@ package Maasha::Seq;
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
  
  
+use warnings;
  use strict;
  use POSIX qw( islower );
  use Maasha::Common;
@@ -64,11 +65,11 @@ sub seq_guess_type
      }
  
      if ( $count = $check_seq =~ tr/FLPQIEflpqie// and $count > 0 ) {
-        return "protein";
+        return "PROTEIN";
      } elsif ( $count = $check_seq =~ tr/Uu// and $count > 0 ) {
-        return "rna";
+        return "RNA";
      } else {
-        return "dna";
+        return "DNA";
      }
  }
  
@@ -779,6 +780,56 @@ sub seq_generate
  }
  
  
+sub seq_insert
+{
+    # Martin A. Hansen, June 2009.
+
+    # Randomly duplicates a given number of residues in a given sequence.
+
+    my ( $seq,          # sequence to mutate
+         $insertions,   # number of residues to insert
+       ) = @_;
+
+    # Returns a string.
+
+    my ( $i, $pos );
+
+    for ( $i = 0; $i < $insertions; $i++ )
+    {
+        $pos = int( rand( length $seq ) );
+
+        substr $seq, $pos, 0, substr $seq , $pos, 1;
+    }
+
+    return $seq;
+}
+
+
+sub seq_delete
+{
+    # Martin A. Hansen, June 2009.
+
+    # Randomly deletes a given number of residues from a given sequence.
+
+    my ( $seq,         # sequence to mutate
+         $deletions,   # number of residues to delete
+       ) = @_;
+
+    # Returns a string.
+
+    my ( $i, $pos );
+
+    for ( $i = 0; $i < $deletions; $i++ )
+    {
+        $pos = int( rand( length $seq ) );
+
+        substr $seq, $pos, 1, '';
+    }
+
+    return $seq;
+}
+
+
  sub seq_mutate
  {
      # Martin A. Hansen, June 2009.
@@ -863,26 +914,30 @@ sub seq_alph
  {
      # Martin A. Hansen, May 2007.
  
-    # returns a requested alphabet
+    # Returns a requested sequence alphabet.
  
      my ( $type,   # alphabet type
         ) = @_;
  
      # returns list
  
-    my ( @alph );
+    my ( %alph_hash, $alph );
+
+    %alph_hash = (
+        DNA      => [ qw(A T C G) ],
+        DNA_AMBI => [ qw(A G C U T R Y W S M K H D V B N) ],
+        RNA      => [ qw(A U C G) ],
+        RNA_AMBI => [ qw(A G C U T R Y W S M K H D V B N) ],
+        PROTEIN  => [ qw(F L S Y C W P H Q R I M T N K V A D E G) ],
+    );
  
-    if ( $type =~ /^dna$/i ) {
-        @alph = qw( A T C G );
-    } elsif ( $type =~ /^rna$/i ) {
-        @alph = qw( A U C G );
-    } elsif ( $type =~ /^prot/i ) {
-        @alph = qw( F L S Y C W P H Q R I M T N K V A D E G );
+    if ( exists $alph_hash{ uc $type } ) {
+        $alph = $alph_hash{ uc $type };
      } else {
          die qq(ERROR: Unknown alphabet type: "$type"\n);
      }
  
-    return wantarray ? @alph : \@alph;
+    return wantarray ? @{ $alph } : $alph;
  }
  
  
@@ -899,7 +954,7 @@ sub seq_analyze
  
      my ( %analysis, @chars, @chars_lc, $char, %char_hash, $gc, $at, $lc, $max, $res_sum, @indels, %indel_hash );
  
-    $analysis{ "SEQ_TYPE" } = uc Maasha::Seq::seq_guess_type( $seq );
+    $analysis{ "SEQ_TYPE" } = Maasha::Seq::seq_guess_type( $seq );
      $analysis{ "SEQ_LEN" }  = length $seq;
  
      @indels = qw( - ~ . _ );
@@ -954,7 +1009,7 @@ sub seq_analyze
      map { $analysis{ "RES[$_]" } = $indel_hash{ $_ } } @indels;
  
      $analysis{ "MIX_INDEX" } = sprintf( "%.2f", $max / $analysis{ "SEQ_LEN" } );
-    $analysis{ "MELT_TEMP" } = sprintf( "%.2f", 4 * $gc + 2 * $at );
+    #$analysis{ "MELT_TEMP" } = sprintf( "%.2f", 4 * $gc + 2 * $at );
  
      return wantarray ? %analysis : \%analysis;
  }