BYTES_IN_INT = 4
BYTES_IN_FLOAT = 4
-BYTES_IN_HIT = 12
+BYTES_IN_HIT = 2 * BYTES_IN_INT + 1 * BYTES_IN_FLOAT # i.e. 12
+NUC_ALPH_SIZE = 4 # Alphabet size of nucleotides.
RESULT_ARY_MAX = 50_000_000 # Maximum size for the result_ary.
HIT_ARY_MAX = 100_000 # Maximum size for the hit_ary.
# locating for each sequence all shared oligos with the query index.
def search_db(file)
time = Time.now
- oligo_ary = "\0" * (4 ** @opt_hash[:kmer]) * BYTES_IN_INT
- shared_ary = "\0" * @q_size * BYTES_IN_INT
- result_ary = "\0" * RESULT_ARY_MAX * BYTES_IN_HIT
+ oligo_ary = "\0" * (NUC_ALPH_SIZE ** @opt_hash[:kmer]) * BYTES_IN_INT
+ shared_ary = "\0" * @q_size * BYTES_IN_INT
+ result_ary = "\0" * RESULT_ARY_MAX * BYTES_IN_HIT
result_count = 0
Fasta.open(file, 'r') do |ios|
@s_ids << entry.seq_name if @opt_hash[:subject_ids]
@s_entries << entry if @opt_hash[:realign]
- zero_ary_c(oligo_ary, (4 ** @opt_hash[:kmer]) * BYTES_IN_INT)
- zero_ary_c(shared_ary, @q_size * BYTES_IN_INT)
+ zero_ary_c(oligo_ary, (NUC_ALPH_SIZE ** @opt_hash[:kmer]) * BYTES_IN_INT)
+ zero_ary_c(shared_ary, @q_size * BYTES_IN_INT)
oligo_ary_size = str_to_oligo_ary_c(entry.seq, entry.len, oligo_ary, @opt_hash[:kmer], @opt_hash[:step])
score = new_score if new_score > score
end
+ if @opt_hash[:max_diversity]
+ best_score = score if i == 0
+ break if best_score - score > @opt_hash[:max_diversity]
+ end
+
yield Hit.new(q_id, s_id, score)
end
@q_ary = ""
beg = 0
- oligo_begs = Array.new(4 ** @opt_hash[:kmer], 0)
- oligo_ends = Array.new(4 ** @opt_hash[:kmer], 0)
+ oligo_begs = Array.new(NUC_ALPH_SIZE ** @opt_hash[:kmer], 0)
+ oligo_ends = Array.new(NUC_ALPH_SIZE ** @opt_hash[:kmer], 0)
oligo_hash.each do |oligo, list|
@q_ary << list.pack("I*")