From: martinahansen Date: Mon, 25 Jun 2012 09:01:26 +0000 (+0000) Subject: added max_diversity swith to findsim.rb X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=8c0484313a1a8f17938f6bec0be6cb3b7e202a4d;p=biopieces.git added max_diversity swith to findsim.rb git-svn-id: http://biopieces.googlecode.com/svn/trunk@1844 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/code_ruby/lib/maasha/findsim.rb b/code_ruby/lib/maasha/findsim.rb index 6b1ad23..d33229d 100644 --- a/code_ruby/lib/maasha/findsim.rb +++ b/code_ruby/lib/maasha/findsim.rb @@ -28,7 +28,8 @@ require 'maasha/align' BYTES_IN_INT = 4 BYTES_IN_FLOAT = 4 -BYTES_IN_HIT = 12 +BYTES_IN_HIT = 2 * BYTES_IN_INT + 1 * BYTES_IN_FLOAT # i.e. 12 +NUC_ALPH_SIZE = 4 # Alphabet size of nucleotides. RESULT_ARY_MAX = 50_000_000 # Maximum size for the result_ary. HIT_ARY_MAX = 100_000 # Maximum size for the hit_ary. @@ -100,9 +101,9 @@ class FindSim # locating for each sequence all shared oligos with the query index. def search_db(file) time = Time.now - oligo_ary = "\0" * (4 ** @opt_hash[:kmer]) * BYTES_IN_INT - shared_ary = "\0" * @q_size * BYTES_IN_INT - result_ary = "\0" * RESULT_ARY_MAX * BYTES_IN_HIT + oligo_ary = "\0" * (NUC_ALPH_SIZE ** @opt_hash[:kmer]) * BYTES_IN_INT + shared_ary = "\0" * @q_size * BYTES_IN_INT + result_ary = "\0" * RESULT_ARY_MAX * BYTES_IN_HIT result_count = 0 Fasta.open(file, 'r') do |ios| @@ -110,8 +111,8 @@ class FindSim @s_ids << entry.seq_name if @opt_hash[:subject_ids] @s_entries << entry if @opt_hash[:realign] - zero_ary_c(oligo_ary, (4 ** @opt_hash[:kmer]) * BYTES_IN_INT) - zero_ary_c(shared_ary, @q_size * BYTES_IN_INT) + zero_ary_c(oligo_ary, (NUC_ALPH_SIZE ** @opt_hash[:kmer]) * BYTES_IN_INT) + zero_ary_c(shared_ary, @q_size * BYTES_IN_INT) oligo_ary_size = str_to_oligo_ary_c(entry.seq, entry.len, oligo_ary, @opt_hash[:kmer], @opt_hash[:step]) @@ -159,6 +160,11 @@ class FindSim score = new_score if new_score > score end + if @opt_hash[:max_diversity] + best_score = score if i == 0 + break if best_score - score > @opt_hash[:max_diversity] + end + yield Hit.new(q_id, s_id, score) end @@ -187,8 +193,8 @@ class FindSim @q_ary = "" beg = 0 - oligo_begs = Array.new(4 ** @opt_hash[:kmer], 0) - oligo_ends = Array.new(4 ** @opt_hash[:kmer], 0) + oligo_begs = Array.new(NUC_ALPH_SIZE ** @opt_hash[:kmer], 0) + oligo_ends = Array.new(NUC_ALPH_SIZE ** @opt_hash[:kmer], 0) oligo_hash.each do |oligo, list| @q_ary << list.pack("I*")