adding bzip2 support in ruby

[biopieces.git] / code_perl / Maasha / AlignTwoSeq.pm
diff --git a/code_perl/Maasha/AlignTwoSeq.pm b/code_perl/Maasha/AlignTwoSeq.pm

index 54234edea33a9ac9f2055628f3bee7e23de3232e..cebd6edca2b8b94d0bb8d4a520b85b8ae606dd23 100644 (file)
--- a/code_perl/Maasha/AlignTwoSeq.pm
+++ b/code_perl/Maasha/AlignTwoSeq.pm
@@ -37,6 +37,14 @@ use vars qw ( @ISA @EXPORT );
  
  @ISA = qw( Exporter );
  
+use constant {
+    SCORE_FACTOR_LEN    => 3,
+    SCORE_FACTOR_CORNER => 2,
+    SCORE_FACTOR_DIAG   => 1,
+    SCORE_FACTOR_NARROW => 0.5,
+    VERBOSE             => 0,
+};
+
  
  # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
  
@@ -48,10 +56,10 @@ sub align_two_seq
      # Generates an alignment by chaining matches, which are subsequences
      # shared between two sequences. The routine functions by considering 
      # only matches within a given search space. If no matches are given
-    # these will be generated, if long matches are found these will be
-    # included in the alignment, otherwise matches will be included depending
-    # on a calculated score. New search spaces spanning the spaces between
-    # matches and the search space boundaries will be cast and recursed into.
+    # these will be located and matches will be included depending on a
+    # calculated score. New search spaces spanning the spaces between
+    # matches and the search space boundaries will be cast and recursed
+    # into.
      
      # Usage: align_two_seq( { Q_SEQ => \$q_seq, S_SEQ => \$s_seq }, [] );
  
@@ -417,7 +425,7 @@ sub match_redundant
  {
      # Martin A. Hansen, August 2009
      
-    my ( $new_match,       # match
+    my ( $new_match,   # match
           $redundant,   # hashref
         ) = @_;
  
@@ -512,12 +520,11 @@ sub match_score
      $score_len    = match_score_len( $match );
      $score_corner = match_score_corner( $match, $space );
      $score_diag   = match_score_diag( $match, $space );
-    $score_narrow = 0; #match_score_narrow( $match, $space );
+    $score_narrow = match_score_narrow( $match, $space );
  
      $score_total  = $score_len - $score_corner - $score_diag - $score_narrow;
  
-    # print STDERR "LEN: $score_len   CORNER: $score_corner   DIAG: $score_diag   NARROW: $score_narrow: TOTAL: $score_total\n" if $score_total > 0;
-    print STDERR "LEN: $score_len   CORNER: $score_corner   DIAG: $score_diag   NARROW: $score_narrow: TOTAL: $score_total\n";
+    printf STDERR "LEN: %d   CORNER: %d   DIAG: %d   NARROW: %d   TOTAL: %d\n", $score_len, $score_corner, $score_diag, $score_narrow, $score_total if VERBOSE;
  
      $match->{ 'SCORE' } = $score_total;
  }
@@ -534,7 +541,7 @@ sub match_score_len
  
      # Returns integer.
  
-    return $match->{ 'LEN' } * 2;
+    return $match->{ 'LEN' } * SCORE_FACTOR_LEN;
  }
  
  
@@ -557,7 +564,7 @@ sub match_score_corner
  
      $score_corner = Maasha::Calc::min( $left_dist, $right_dist );
  
-    return int $score_corner * 1.5;
+    return $score_corner * SCORE_FACTOR_CORNER;
  }
  
  
@@ -615,7 +622,7 @@ sub match_score_diag
  
      $score_diag = $min_diag_dist;
  
-    return int $score_diag;
+    return $score_diag * SCORE_FACTOR_DIAG;
  }
  
  
@@ -655,9 +662,7 @@ sub match_score_narrow
  
      $score_narrow = $max_narrow_dist;
  
-    return abs( $beg_narrow_dist - $end_narrow_dist );
-
-    return $score_narrow;
+    return $score_narrow * SCORE_FACTOR_NARROW;
  }
  
  
@@ -708,47 +713,47 @@ sub insert_indels
  
      if ( @{ $matches } > 0 )
      {
-    @{ $matches } = sort { $a->{ 'Q_BEG' } <=> $b->{ 'Q_BEG' } } @{ $matches };  # FIXME is this necessary?
-
-    # >>>>>>>>>>>>>> FIRST MATCH <<<<<<<<<<<<<<
-
-    $diff = $matches->[ 0 ]->{ 'Q_BEG' } - $matches->[ 0 ]->{ 'S_BEG' };
+        @{ $matches } = sort { $a->{ 'Q_BEG' } <=> $b->{ 'Q_BEG' } } @{ $matches };  # FIXME is this necessary?
  
-    if ( $diff > 0 )
-    {
-        substr ${ $s_seq }, 0, 0, '-' x $diff;
+        # >>>>>>>>>>>>>> FIRST MATCH <<<<<<<<<<<<<<
  
-        $s_indels += $diff;
-    }
-    elsif ( $diff < 0 )
-    {
-        substr ${ $q_seq }, 0, 0, '-' x abs $diff;
-
-        $q_indels += abs $diff;
-    }
-
-    # >>>>>>>>>>>>>> MIDDLE MATCHES <<<<<<<<<<<<<<
-
-    for ( $i = 0; $i < scalar @{ $matches } - 1; $i++ )
-    {
-        $q_dist = $matches->[ $i + 1 ]->{ 'Q_BEG' } - $matches->[ $i ]->{ 'Q_END' };
-        $s_dist = $matches->[ $i + 1 ]->{ 'S_BEG' } - $matches->[ $i ]->{ 'S_END' };
-
-        $diff   = $q_dist - $s_dist;
+        $diff = $matches->[ 0 ]->{ 'Q_BEG' } - $matches->[ 0 ]->{ 'S_BEG' };
  
          if ( $diff > 0 )
          {
-            substr ${ $s_seq }, $s_indels + $matches->[ $i ]->{ 'S_END' } + 1, 0, '-' x $diff;
+            substr ${ $s_seq }, 0, 0, '-' x $diff;
  
              $s_indels += $diff;
          }
          elsif ( $diff < 0 )
          {
-            substr ${ $q_seq }, $q_indels + $matches->[ $i ]->{ 'Q_END' } + 1, 0, '-' x abs $diff;
+            substr ${ $q_seq }, 0, 0, '-' x abs $diff;
  
              $q_indels += abs $diff;
          }
-    }
+
+        # >>>>>>>>>>>>>> MIDDLE MATCHES <<<<<<<<<<<<<<
+
+        for ( $i = 0; $i < scalar @{ $matches } - 1; $i++ )
+        {
+            $q_dist = $matches->[ $i + 1 ]->{ 'Q_BEG' } - $matches->[ $i ]->{ 'Q_END' };
+            $s_dist = $matches->[ $i + 1 ]->{ 'S_BEG' } - $matches->[ $i ]->{ 'S_END' };
+
+            $diff   = $q_dist - $s_dist;
+
+            if ( $diff > 0 )
+            {
+                substr ${ $s_seq }, $s_indels + $matches->[ $i ]->{ 'S_END' } + 1, 0, '-' x $diff;
+
+                $s_indels += $diff;
+            }
+            elsif ( $diff < 0 )
+            {
+                substr ${ $q_seq }, $q_indels + $matches->[ $i ]->{ 'Q_END' } + 1, 0, '-' x abs $diff;
+
+                $q_indels += abs $diff;
+            }
+        }
      }
  
      # >>>>>>>>>>>>>> LAST MATCH <<<<<<<<<<<<<<
@@ -769,6 +774,7 @@ sub insert_indels
      }
  }
  
+1;
  
  __END__