polished read_sam

[biopieces.git] / code_perl / Maasha / RestrictEnz.pm
diff --git a/code_perl/Maasha/RestrictEnz.pm b/code_perl/Maasha/RestrictEnz.pm

index 9b1dd27a5b52ccea6fbea5d8f9d0a44f0849764e..836835b08c76f3f4ed3fc9e597ae511d370c260f 100644 (file)
--- a/code_perl/Maasha/RestrictEnz.pm
+++ b/code_perl/Maasha/RestrictEnz.pm
@@ -65,6 +65,7 @@ use Inline ( C => <<'END_C', DIRECTORY => $ENV{ "BP_TMP" } );
  # N1111100000000000
  */
  
+/* 2-dimensional array for fast lookup of nucleotide match. */
  
  char ambi_match[16][16] = {
      "1000011100011101",
@@ -88,6 +89,11 @@ char ambi_match[16][16] = {
  
  int hash( char c )
  {
+    /* Martin A. Hansen, August 2009. */
+
+    /* Given a nucletotide returns the position of this */
+    /* on the edge of the symetrical ambi_match lookup table. */
+
      switch ( toupper( c ) )
      {
          case 'A': return 0;
@@ -105,7 +111,7 @@ int hash( char c )
          case 'H': return 12;
          case 'D': return 13;
          case 'B': return 14;
-        case 'N': return 0;
+        case 'N': return 15;
          default: return -1;
      }
  }
@@ -113,12 +119,17 @@ int hash( char c )
  
  void scan( char *seq, char *pat, int seq_len, int pat_len )
  {
+    /* Martin A. Hansen, August 2009. */
+
+    /* Scans a sequence for a subsequence allowing for ambiguity */
+    /* codes ala UIPAC. */
+
      int i;
  
      Inline_Stack_Vars;
      Inline_Stack_Reset;
  
-    for ( i = 0; i < seq_len - pat_len; i++ )
+    for ( i = 0; i < seq_len - pat_len + 1; i++ )
      {
          if ( match( &seq[ i ], pat, pat_len ) ) {
            Inline_Stack_Push( sv_2mortal( newSViv( i ) ) );
@@ -131,6 +142,11 @@ void scan( char *seq, char *pat, int seq_len, int pat_len )
  
  int match( char *seq1, char *seq2, int len )
  {
+    /* Martin A. Hansen, August 2009. */
+
+    /* Checks if two sequences are identical allowing for */
+    /* IUPAC amabiguity codes over a given length. */
+
      int  i = 0;
      char c1;
      char c2;
@@ -158,10 +174,17 @@ END_C
  
  sub re_scan
  {
+    # Martin A. Hansen, August 2009.
+
+    # Calls C function to scan a given sequence for a given
+    # restriction site.
+
      my ( $seq,   # sequence to scan
           $re,    # hashref with RE info
         ) = @_; 
  
+    # Returns a list of integers.
+
      my ( @matches );
  
      @matches = scan( $seq, $re->{ "pattern" }, length $seq, $re->{ "len" } );
@@ -172,6 +195,12 @@ sub re_scan
  
  sub parse_re_data
  {
+    # Martin A. Hansen, August 2009.
+    
+    # Parses restriction enzyme data from __DATA__ section in this module.
+
+    # Returns a list of hashrefs.
+
      my ( @lines, $line, @fields, @re_data );
  
      @lines = <DATA>;