]> git.donarmstrong.com Git - biopieces.git/commitdiff
added precise alignment option to findsim.rb
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Thu, 19 Apr 2012 09:29:24 +0000 (09:29 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Thu, 19 Apr 2012 09:29:24 +0000 (09:29 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@1806 74ccb610-7750-0410-82ae-013aeee3265d

code_ruby/lib/maasha/findsim.rb

index f7a811b17240a3b38b7c8d5a5ef0e458ba49792a..25f135c99cae8f63248e5bb58bb7c8ca77429732 100644 (file)
@@ -24,6 +24,7 @@
 
 require 'inline'
 require 'maasha/fasta'
+require 'maasha/align'
 
 BYTES_IN_INT   = 4
 BYTES_IN_FLOAT = 4
@@ -58,6 +59,8 @@ class FindSim
     @result_count = 0
     @q_ids        = []
     @s_ids        = []
+    @q_entries    = []
+    @s_entries    = []
   end
 
   # Method to load sequences from a query file in FASTA format
@@ -71,7 +74,8 @@ class FindSim
 
     Fasta.open(file, 'r') do |ios|
       ios.each do |entry|
-        @q_ids << entry.seq_name if @opt_hash[:query_ids]
+        @q_ids     << entry.seq_name if @opt_hash[:query_ids]
+        @q_entries << entry          if @opt_hash[:precise]
 
         oligos = str_to_oligo_rb_ary_c(entry.seq, @opt_hash[:kmer], 1).uniq.sort
 
@@ -103,7 +107,8 @@ class FindSim
 
     Fasta.open(file, 'r') do |ios|
       ios.each_with_index do |entry, s_index|
-        @s_ids << entry.seq_name if @opt_hash[:subject_ids]
+        @s_ids     << entry.seq_name if @opt_hash[:subject_ids]
+        @s_entries << entry          if @opt_hash[:precise]
 
         zero_ary_c(oligo_ary,  (4 ** @opt_hash[:kmer]) * BYTES_IN_INT)
         zero_ary_c(shared_ary, @q_size                 * BYTES_IN_INT)
@@ -149,6 +154,11 @@ class FindSim
         q_id = @opt_hash[:query_ids]   ? @q_ids[q_index] : q_index
         s_id = @opt_hash[:subject_ids] ? @s_ids[s_index] : s_index
 
+        if @opt_hash[:precise]
+          new_score = Align.muscle([@q_entries[q_index], @s_entries[s_index]]).identity
+          score     = new_score if new_score > score
+        end
+
         yield Hit.new(q_id, s_id, score)
       end