From: martinahansen Date: Wed, 10 Oct 2012 12:20:38 +0000 (+0000) Subject: added dynamic mem allocation to findsim.rb X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=c147f898ba5bb383be2617b33ad0088ae1ac1537;p=biopieces.git added dynamic mem allocation to findsim.rb git-svn-id: http://biopieces.googlecode.com/svn/trunk@1957 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/code_ruby/lib/maasha/findsim.rb b/code_ruby/lib/maasha/findsim.rb index 49cf132..a3ed37d 100644 --- a/code_ruby/lib/maasha/findsim.rb +++ b/code_ruby/lib/maasha/findsim.rb @@ -26,12 +26,12 @@ require 'inline' require 'maasha/fasta' require 'maasha/align' -BYTES_IN_INT = 4 -BYTES_IN_FLOAT = 4 -BYTES_IN_HIT = 2 * BYTES_IN_INT + 1 * BYTES_IN_FLOAT # i.e. 12 -NUC_ALPH_SIZE = 4 # Alphabet size of nucleotides. -RESULT_ARY_MAX = 50_000_000 # Maximum size for the result_ary. -HIT_ARY_MAX = 100_000 # Maximum size for the hit_ary. +BYTES_IN_INT = 4 +BYTES_IN_FLOAT = 4 +BYTES_IN_HIT = 2 * BYTES_IN_INT + 1 * BYTES_IN_FLOAT # i.e. 12 +NUC_ALPH_SIZE = 4 # Alphabet size of nucleotides. +RESULT_ARY_BUFFER = 10_000_000 # Buffer for the result_ary. +HIT_ARY_BUFFER = 1_000_000 # Buffer for the hit_ary. # FindSim is an implementation of the SimRank logic proposed by Niels Larsen. # The purpose is to find similarities between query DNA/RNA sequences and a @@ -99,7 +99,7 @@ class FindSim time = Time.now oligo_ary = "\0" * (NUC_ALPH_SIZE ** @opt_hash[:kmer]) * BYTES_IN_INT shared_ary = "\0" * @q_size * BYTES_IN_INT - result_ary = "\0" * RESULT_ARY_MAX * BYTES_IN_HIT + result_ary = "\0" * RESULT_ARY_BUFFER * BYTES_IN_HIT result_count = 0 Fasta.open(file, 'r') do |ios| @@ -119,6 +119,11 @@ class FindSim if ((s_index + 1) % 1000) == 0 and @opt_hash[:verbose] $stderr.puts "Searched #{s_index + 1} sequences in #{Time.now - time} seconds (#{result_count} hits)." end + + if result_ary.size / BYTES_IN_HIT - result_count < RESULT_ARY_BUFFER / 2 + result_ary << "\0" * RESULT_ARY_BUFFER * BYTES_IN_HIT + $stderr.puts "resizing to #{result_ary.size / BYTES_IN_HIT}" + end end end @@ -131,12 +136,12 @@ class FindSim def each sort_hits_c(@result, @result_count) - hit_ary = "\0" * HIT_ARY_MAX * BYTES_IN_HIT + hit_ary = "\0" * HIT_ARY_BUFFER * BYTES_IN_HIT hit_index = 0 (0 ... @q_size).each do |q_index| - zero_ary_c(hit_ary, HIT_ARY_MAX * BYTES_IN_HIT) + zero_ary_c(hit_ary, HIT_ARY_BUFFER * BYTES_IN_HIT) hit_ary_size = get_hits_c(@result, @result_count, hit_index, hit_ary, q_index) if @opt_hash[:max_hits]