From 268f4b42e12087be9134b35887ffe6138c3ef3df Mon Sep 17 00:00:00 2001 From: martinahansen Date: Thu, 20 Dec 2012 21:25:23 +0000 Subject: [PATCH] clearnup of pair align code git-svn-id: http://biopieces.googlecode.com/svn/trunk@2047 74ccb610-7750-0410-82ae-013aeee3265d --- code_ruby/lib/maasha/align/match.rb | 13 +-- code_ruby/lib/maasha/align/pair.rb | 128 +--------------------------- 2 files changed, 10 insertions(+), 131 deletions(-) diff --git a/code_ruby/lib/maasha/align/match.rb b/code_ruby/lib/maasha/align/match.rb index b137711..721bf13 100644 --- a/code_ruby/lib/maasha/align/match.rb +++ b/code_ruby/lib/maasha/align/match.rb @@ -22,11 +22,13 @@ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -# Class containing methods for located all Maximally Extended Matches (MEMs) between -# two strings. +require 'profile' + +# Class containing methods for located all non-redundant Maximally Extended Matches (MEMs) +# between two strings. class Matches - # Class method to located all MEMs bewteen two sequences within a given search - # space and seed with kmers of a given size. + # Class method to located all non-redudant MEMs bewteen two sequences within a given + # search space and seeded with kmers of a given size. def self.find(q_seq, s_seq, q_min, s_min, q_max, s_max, kmer) m = self.new m.matches_find(q_seq, s_seq, q_min, s_min, q_max, s_max, kmer) @@ -47,6 +49,7 @@ class Matches s_index[q_oligo].each do |s_pos| match = Match.new(q_pos, s_pos, kmer) + unless match_redundant?(redundant, match) match_expand(match, q_seq, s_seq, q_min, s_min, q_max, s_max) matches << match @@ -82,7 +85,7 @@ class Matches # Method to check if a match is redundant. def match_redundant?(redundant, match) redundant[match.q_beg].each do |s_interval| - if s_interval.include? match.s_beg and s_interval.include? match.s_end + if s_interval.include? match.s_beg and s_interval.include? match.s_end # TODO test if include? is slow return true end end diff --git a/code_ruby/lib/maasha/align/pair.rb b/code_ruby/lib/maasha/align/pair.rb index 0969fe7..2c8c523 100644 --- a/code_ruby/lib/maasha/align/pair.rb +++ b/code_ruby/lib/maasha/align/pair.rb @@ -27,6 +27,7 @@ require 'maasha/math_aux' FACTOR_SCORE_LENGTH = 1.0 FACTOR_SCORE_DIAG = -1.41 +KMER = 32 # Module with stuff to create a pairwise aligment. module PairAlign @@ -47,9 +48,8 @@ module PairAlign @s_entry.seq.downcase! space = Space.new(0, 0, @q_entry.length - 1, @s_entry.length - 1) - kmer = 32 - align_recurse(@q_entry.seq, @s_entry.seq, space, kmer) + align_recurse(@q_entry.seq, @s_entry.seq, space, KMER) matches_upcase gaps_insert end @@ -160,104 +160,6 @@ module PairAlign dist_min * FACTOR_SCORE_DIAG end - # Method that finds all maximally expanded non-redundant matches shared - # between two sequences inside a given search space. - def matches_find(q_seq, s_seq, q_min, s_min, q_max, s_max, kmer) - matches = [] - redundant = Hash.new { |h, k| h[k] = [] } - - s_index = index_seq(s_seq, s_min, s_max, kmer) - - q_pos = q_min - - while q_pos <= q_max - kmer + 1 - q_oligo = q_seq[q_pos ... q_pos + kmer] - - s_index[q_oligo].each do |s_pos| - match = Match.new(q_pos, s_pos, kmer) - - unless match_redundant?(redundant, match) - match_expand(match, q_seq, s_seq, q_min, s_min, q_max, s_max) - matches << match - - match_redundant_add(redundant, match) - end - end - - q_pos += 1 - end - - matches - end - - # Method that indexes a sequence within a given interval such that the - # index contains all oligos of a given kmer size and the positions where - # this oligo was located. - def index_seq(seq, min, max, kmer) - index_hash = Hash.new { |h, k| h[k] = [] } - - pos = min - - while pos <= max - kmer + 1 - oligo = seq[pos ... pos + kmer] - index_hash[oligo] << pos - - pos += 1 - end - - index_hash - end - - # Method to check if a match is redundant. - def match_redundant?(redundant, match) - redundant[match.q_beg].each do |s_interval| - if s_interval.include? match.s_beg and s_interval.include? match.s_end - return true - end - end - - false - end - - # Method that adds a match to the redundancy index. - def match_redundant_add(redundant, match) - (match.q_beg .. match.q_end).each do |q| - redundant[q] << (match.s_beg .. match.s_end) - end - end - - # Method that expands a match as far as possible to the left and right. - def match_expand(match, q_seq, s_seq, q_min, s_min, q_max, s_max) - match_expand_left(match, q_seq, s_seq, q_min, s_min) - match_expand_right(match, q_seq, s_seq, q_max, s_max) - - match - end - - # Method that expands a match as far as possible to the left. - def match_expand_left(match, q_seq, s_seq, q_min, s_min) - while match.q_beg > q_min and - match.s_beg > s_min and - q_seq[match.q_beg - 1] == s_seq[match.s_beg - 1] - match.q_beg -= 1 - match.s_beg -= 1 - match.length += 1 - end - - match - end - - # Method that expands a match as far as possible to the right. - def match_expand_right(match, q_seq, s_seq, q_max, s_max) - while match.q_end < q_max and - match.s_end < s_max and - q_seq[match.q_end + 1] == s_seq[match.s_end + 1] - match.length += 1 - end - - match - end - # Method for debugging purposes that upcase matching sequence while non-matches # sequence is kept in lower case. def matches_upcase @@ -335,31 +237,5 @@ module PairAlign true end end - - # Class for containing a match between two sequences q and s. - class Match - attr_accessor :q_beg, :s_beg, :length, :score - - def initialize(q_beg, s_beg, length, score = 0.0) - @q_beg = q_beg - @s_beg = s_beg - @length = length - @score = score - end - - def q_end - @q_beg + @length - 1 - end - - def s_end - @s_beg + @length - 1 - end - - def to_s(seq = nil) - s = "q: #{@q_beg} #{q_end} s: #{@s_beg} #{s_end} l: #{@length} s: #{@score}" - s << " seq: #{seq[@q_beg .. q_end]}" if seq - s - end - end end -- 2.39.2