]> git.donarmstrong.com Git - biopieces.git/commitdiff
cleanup of pair stuff in align.rb
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Thu, 28 Jun 2012 11:29:22 +0000 (11:29 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Thu, 28 Jun 2012 11:29:22 +0000 (11:29 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@1849 74ccb610-7750-0410-82ae-013aeee3265d

code_ruby/lib/maasha/align.rb

index bb83d9e22e1dfb4371d0c0dee30c377f24988e4f..a82872185fb9967f4dea5d1bbe0841f4217283a1 100755 (executable)
@@ -143,7 +143,7 @@ class Align
 
     ap = AlignPair.new(q_entry.seq, s_entry.seq)
 
-    ap.align(q_space_beg, q_space_end, s_space_beg, s_space_end, 256)
+    ap.align(q_space_beg, q_space_end, s_space_beg, s_space_end, 15, [])
 
     ap.upcase_match
     ap.insert_gaps
@@ -248,19 +248,26 @@ class Align
     # within the specified search space. Finally, the longest match is used
     # to define a left and a right search space which are recursed into
     # until no more matches are found.
-    def align(q_space_beg, q_space_end, s_space_beg, s_space_end, length)
+    def align(q_space_beg, q_space_end, s_space_beg, s_space_end, kmer_size, matches)
 #      puts "search space: #{q_space_beg}-#{q_space_end} x #{s_space_beg}-#{s_space_end}"
       new_matches = []
 
-      while new_matches.empty? and length > 0
-        if @q_seq.length >= length and @s_seq.length >= length
-          q_index = index_seq(q_space_beg, q_space_end, @q_seq, length, length)
-          s_index = index_seq(s_space_beg, s_space_end, @s_seq, length, 1)
-          new_matches = find_matches(q_index, s_index, length)
+      matches.each do |match|
+        new_matches << match if match.q_beg >= q_space_beg and
+                                match.q_end <= q_space_end and
+                                match.s_beg >= s_space_beg and
+                                match.s_end <= s_space_end
+      end
+
+      while new_matches.empty? and kmer_size > 0
+        if @q_seq.length >= kmer_size and @s_seq.length >= kmer_size
+          q_index = index_seq(q_space_beg, q_space_end, @q_seq, kmer_size, kmer_size)
+          s_index = index_seq(s_space_beg, s_space_end, @s_seq, kmer_size, 1)
+          new_matches = find_matches(q_index, s_index, kmer_size)
         end
 
         unless new_matches.empty?
-          longest_match = new_matches.sort_by { |match| match.length }.last
+          longest_match = new_matches.sort_by { |match| match.length }.pop
           longest_match.expand(@q_seq, @s_seq, q_space_beg, q_space_end, s_space_beg, s_space_end)
 #          pp longest_match
 
@@ -277,15 +284,15 @@ class Align
           s_space_end_right = (s_space_end == @s_seq.length) ? @s_seq.length : s_space_end - 1
 
           if q_space_end_left - q_space_beg_left > 0 and s_space_end_left - s_space_beg_left > 0
-            align(q_space_beg_left,  q_space_end_left,  s_space_beg_left,  s_space_end_left,  length)
+            align(q_space_beg_left,  q_space_end_left,  s_space_beg_left,  s_space_end_left,  kmer_size, new_matches)
           end
 
           if q_space_end_right - q_space_beg_right > 0 and s_space_end_right - s_space_beg_right > 0
-            align(q_space_beg_right, q_space_end_right, s_space_beg_right, s_space_end_right, length)
+            align(q_space_beg_right, q_space_end_right, s_space_beg_right, s_space_end_right, kmer_size, new_matches)
           end
         end
 
-        length /= 2
+        kmer_size /= 2
       end
     end
 
@@ -351,17 +358,18 @@ class Align
     # and step size. All oligo positions are saved in a lookup hash
     # where the oligo is the key and the position is the value. Non-
     # unique oligos are removed and the index returned. 
-    def index_seq(space_beg, space_end, seq, oligo_size, step_size)
+    def index_seq(space_beg, space_end, seq, kmer_size, step_size)
       index       = Hash.new
       index_count = Hash.new(0)
 
-      (space_beg ... space_end).step(step_size).each do |pos|
-        oligo = seq[pos ... pos + oligo_size]
+      i = space_beg
 
-        if oligo.length == oligo_size
-          index[oligo]        = pos
-          index_count[oligo] += 1
-        end
+      while i < space_end - kmer_size + 1
+        oligo = seq[i ... i + kmer_size]
+
+        index[oligo]        = i
+        index_count[oligo] += 1
+        i += step_size
       end
 
       index_count.each do |oligo, count|
@@ -375,21 +383,20 @@ class Align
     # using non-overlapping unique oligos and their positions and one is
     # using overlapping unique oligos and positions. Thus iterating over the
     # non-overlapping oligos and checking last match, this can be extended.
-    def find_matches(q_index, s_index, length)
+    def find_matches(q_index, s_index, kmer_size)
       matches = []
 
       q_index.each do |oligo, q_pos|
-        if s_index[oligo]
-          s_pos = s_index[oligo]
+        if s_pos = s_index[oligo]
 
           last_match = matches.last
-          new_match  = Match.new(q_pos, s_pos, length)
+          new_match  = Match.new(q_pos, s_pos, kmer_size)
 
           if last_match and
              last_match.q_beg + last_match.length == new_match.q_beg and
              last_match.s_beg + last_match.length == new_match.s_beg
 
-             last_match.length += length
+             last_match.length += kmer_size
           else
             matches << new_match
           end