From: martinahansen Date: Thu, 19 Apr 2012 09:29:24 +0000 (+0000) Subject: added precise alignment option to findsim.rb X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=511f4a27ac48209be77e188754cc323a0c715b98;p=biopieces.git added precise alignment option to findsim.rb git-svn-id: http://biopieces.googlecode.com/svn/trunk@1806 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/code_ruby/lib/maasha/findsim.rb b/code_ruby/lib/maasha/findsim.rb index f7a811b..25f135c 100644 --- a/code_ruby/lib/maasha/findsim.rb +++ b/code_ruby/lib/maasha/findsim.rb @@ -24,6 +24,7 @@ require 'inline' require 'maasha/fasta' +require 'maasha/align' BYTES_IN_INT = 4 BYTES_IN_FLOAT = 4 @@ -58,6 +59,8 @@ class FindSim @result_count = 0 @q_ids = [] @s_ids = [] + @q_entries = [] + @s_entries = [] end # Method to load sequences from a query file in FASTA format @@ -71,7 +74,8 @@ class FindSim Fasta.open(file, 'r') do |ios| ios.each do |entry| - @q_ids << entry.seq_name if @opt_hash[:query_ids] + @q_ids << entry.seq_name if @opt_hash[:query_ids] + @q_entries << entry if @opt_hash[:precise] oligos = str_to_oligo_rb_ary_c(entry.seq, @opt_hash[:kmer], 1).uniq.sort @@ -103,7 +107,8 @@ class FindSim Fasta.open(file, 'r') do |ios| ios.each_with_index do |entry, s_index| - @s_ids << entry.seq_name if @opt_hash[:subject_ids] + @s_ids << entry.seq_name if @opt_hash[:subject_ids] + @s_entries << entry if @opt_hash[:precise] zero_ary_c(oligo_ary, (4 ** @opt_hash[:kmer]) * BYTES_IN_INT) zero_ary_c(shared_ary, @q_size * BYTES_IN_INT) @@ -149,6 +154,11 @@ class FindSim q_id = @opt_hash[:query_ids] ? @q_ids[q_index] : q_index s_id = @opt_hash[:subject_ids] ? @s_ids[s_index] : s_index + if @opt_hash[:precise] + new_score = Align.muscle([@q_entries[q_index], @s_entries[s_index]]).identity + score = new_score if new_score > score + end + yield Hit.new(q_id, s_id, score) end