polished RestrictEnz.pm

author martinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>

Fri, 14 Aug 2009 16:07:25 +0000 (16:07 +0000)

committer martinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>

Fri, 14 Aug 2009 16:07:25 +0000 (16:07 +0000)
author martinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Fri, 14 Aug 2009 16:07:25 +0000 (16:07 +0000)
committer martinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Fri, 14 Aug 2009 16:07:25 +0000 (16:07 +0000)
diff --git a/code_perl/Maasha/RestrictEnz.pm b/code_perl/Maasha/RestrictEnz.pm

index 9b1dd27a5b52ccea6fbea5d8f9d0a44f0849764e..19744394cd0c4a55e0323ebf288150e7f8948eda 100644 (file)
--- a/code_perl/Maasha/RestrictEnz.pm
+++ b/code_perl/Maasha/RestrictEnz.pm
@@ -65,6 +65,7 @@ use Inline ( C => <<'END_C', DIRECTORY => $ENV{ "BP_TMP" } );
  # N1111100000000000
  */
  
+/* 2-dimensional array for fast lookup of nucleotide match. */
  
  char ambi_match[16][16] = {
      "1000011100011101",
@@ -88,6 +89,11 @@ char ambi_match[16][16] = {
  
  int hash( char c )
  {
+    /* Martin A. Hansen, August 2009. */
+
+    /* Given a nucletotide returns the position of this */
+    /* on the edge of the symetrical ambi_match lookup table. */
+
      switch ( toupper( c ) )
      {
          case 'A': return 0;
@@ -105,7 +111,7 @@ int hash( char c )
          case 'H': return 12;
          case 'D': return 13;
          case 'B': return 14;
-        case 'N': return 0;
+        case 'N': return 15;
          default: return -1;
      }
  }
@@ -113,6 +119,11 @@ int hash( char c )
  
  void scan( char *seq, char *pat, int seq_len, int pat_len )
  {
+    /* Martin A. Hansen, August 2009. */
+
+    /* Scans a sequence for a subsequence allowing for ambiguity */
+    /* codes ala UIPAC. */
+
      int i;
  
      Inline_Stack_Vars;
@@ -131,6 +142,11 @@ void scan( char *seq, char *pat, int seq_len, int pat_len )
  
  int match( char *seq1, char *seq2, int len )
  {
+    /* Martin A. Hansen, August 2009. */
+
+    /* Checks if two sequences are identical allowing for */
+    /* IUPAC amabiguity codes over a given length. */
+
      int  i = 0;
      char c1;
      char c2;
@@ -158,10 +174,17 @@ END_C
  
  sub re_scan
  {
+    # Martin A. Hansen, August 2009.
+
+    # Calls C function to scan a given sequence for a given
+    # restriction site.
+
      my ( $seq,   # sequence to scan
           $re,    # hashref with RE info
         ) = @_; 
  
+    # Returns a list of integers.
+
      my ( @matches );
  
      @matches = scan( $seq, $re->{ "pattern" }, length $seq, $re->{ "len" } );
@@ -172,6 +195,12 @@ sub re_scan
  
  sub parse_re_data
  {
+    # Martin A. Hansen, August 2009.
+    
+    # Parses restriction enzyme data from __DATA__ section in this module.
+
+    # Returns a list of hashrefs.
+
      my ( @lines, $line, @fields, @re_data );
  
      @lines = <DATA>;
author	martinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
	Fri, 14 Aug 2009 16:07:25 +0000 (16:07 +0000)
committer	martinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
	Fri, 14 Aug 2009 16:07:25 +0000 (16:07 +0000)