From 3ddd17e3d21f6b33ebe66fcf19d30bd79784032c Mon Sep 17 00:00:00 2001 From: martinahansen Date: Fri, 13 Jul 2012 14:43:11 +0000 Subject: [PATCH] fixed another bug in find orfs git-svn-id: http://biopieces.googlecode.com/svn/trunk@1872 74ccb610-7750-0410-82ae-013aeee3265d --- code_ruby/lib/maasha/align/pair.rb | 88 +++++++++++------------------- code_ruby/lib/maasha/seq.rb | 2 +- 2 files changed, 34 insertions(+), 56 deletions(-) diff --git a/code_ruby/lib/maasha/align/pair.rb b/code_ruby/lib/maasha/align/pair.rb index 45e9298..307197a 100644 --- a/code_ruby/lib/maasha/align/pair.rb +++ b/code_ruby/lib/maasha/align/pair.rb @@ -22,23 +22,6 @@ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -# Extending Math module with a couple of useful methods. -module Math - # Method for calculating the distance between a point and a line. - def self.dist_point2line(px, py, x1, y1, x2, y2) - a = (y2.to_f - y1) / (x2.to_f - x1) - - b = y1 - a * x1 - - (a * px + b - py ).abs / Math.sqrt(a ** 2 + 1 ) - end - - # Method for calculating the distance between two points. - def self.dist_point2point(x1, y1, x2, y2) - Math.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2) - end -end - # Module with stuff to create a pairwise aligment. module PairAlign # Class for creating a pairwise alignment. @@ -79,7 +62,13 @@ module PairAlign kmer /= 2 end - matches = matches_select_by_score(matches, q_min, s_min, q_max, s_max) + matches_score(matches, q_min, s_min, q_max, s_max) + +# matches.each { |m| puts m.to_s(q_seq) } + + unless @matches.empty? + matches = matches.select { |match| match.score > 0 } + end if best_match = matches.pop @matches << best_match @@ -116,48 +105,29 @@ module PairAlign new_matches end - # Method to select the best scoring matches and return these sorted - # according to score. - def matches_select_by_score(matches, q_min, s_min, q_max, s_max) - new_matches = [] - + def matches_score(matches, q_min, s_min, q_max, s_max) matches.each do |match| - score_length = match_score_by_length(match) - score_diag = match_score_by_diagonal_dist(match, q_min, s_min, q_max, s_max) - - match.score = score_length - score_diag - - new_matches << match if match.score > 0 - end + score_length = match.length - new_matches.sort_by! { |match| match.score } + min = (q_min - s_min).abs + max = ((q_max - q_min) - (s_max - s_min)).abs + beg = ((match.q_beg - q_min) - (match.s_beg - s_min)).abs - new_matches - end - - # Method to calculate score based on match length. - def match_score_by_length(match) - match.length.to_f - end + if beg > (max - min) / 2 + score_diag = max - min + else + score_diag = beg + end - # Method to calculate score based on the distance to the closest - # diagonal. The smaller the distance the better the score. - def match_score_by_diagonal_dist(match, q_min, s_min, q_max, s_max) - q_dim = q_max - q_min + 1 - s_dim = s_max - s_min + 1 +# puts "score_length: #{score_length} score_diag: #{score_diag} score: #{score_length - score_diag}" - if q_dim >= s_dim # s_dim is the narrow end - beg_dist = Math.dist_point2line(match.q_beg, match.s_beg, q_min, s_min, q_min + s_dim, s_min + s_dim) - end_dist = Math.dist_point2line(match.q_beg, match.s_beg, q_max - s_dim, s_max - s_dim, q_max, s_max) - else - beg_dist = Math.dist_point2line(match.q_beg, match.s_beg, q_min, s_min, q_min + q_dim, s_min + q_dim) - end_dist = Math.dist_point2line(match.q_beg, match.s_beg, q_max - q_dim, s_max - q_dim, q_max, s_max) + match.score = score_length - score_diag end - min_dist = (beg_dist < end_dist) ? beg_dist : end_dist - min_dist.to_f + matches.sort_by! { |match| match.score } end + # Method that finds all maximally expanded non-redundant matches shared # between two sequences inside a given search space. def matches_find(q_seq, s_seq, q_min, s_min, q_max, s_max, kmer) @@ -267,17 +237,25 @@ module PairAlign # Method that insert gaps in sequences based on a list of matches and thus # creating an alignment. - # TODO check boundaries! def gaps_insert @matches.sort_by! { |m| m.q_beg } q_gaps = 0 s_gaps = 0 - @matches.each do |match| - diff = (q_gaps + match.q_beg) - (s_gaps + match.s_beg) + match = @matches.first + diff = (q_gaps + match.q_beg) - (s_gaps + match.s_beg) - #pp "q_gaps #{q_gaps} s_gaps #{s_gaps} diff #{diff}" + if diff < 0 + @q_entry.seq.insert(0, "-" * diff.abs) + q_gaps += diff.abs + elsif diff > 0 + @s_entry.seq.insert(0, "-" * diff.abs) + s_gaps += diff.abs + end + + @matches[1 .. -1].each do |match| + diff = (q_gaps + match.q_beg) - (s_gaps + match.s_beg) if diff < 0 @q_entry.seq.insert(match.q_beg + q_gaps, "-" * diff.abs) diff --git a/code_ruby/lib/maasha/seq.rb b/code_ruby/lib/maasha/seq.rb index 62170b6..92d3398 100644 --- a/code_ruby/lib/maasha/seq.rb +++ b/code_ruby/lib/maasha/seq.rb @@ -536,7 +536,7 @@ class Seq if pick_longest orf_hash = {} - orfs.each { |orf| orf_hash[orf.last] = orf } + orfs.each { |orf| orf_hash[orf.last] = orf unless orf_hash[orf.last] } orfs = orf_hash.values end -- 2.39.2