]> git.donarmstrong.com Git - biopieces.git/commitdiff
added dynamic mem allocation to findsim.rb
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Wed, 10 Oct 2012 12:20:38 +0000 (12:20 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Wed, 10 Oct 2012 12:20:38 +0000 (12:20 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@1957 74ccb610-7750-0410-82ae-013aeee3265d

code_ruby/lib/maasha/findsim.rb

index 49cf132559ffd7c024e123e4ede2cfa17f73ac14..a3ed37d1d626d3c77a6799303a3b57703b6a77a8 100644 (file)
@@ -26,12 +26,12 @@ require 'inline'
 require 'maasha/fasta'
 require 'maasha/align'
 
-BYTES_IN_INT   = 4
-BYTES_IN_FLOAT = 4
-BYTES_IN_HIT   = 2 * BYTES_IN_INT + 1 * BYTES_IN_FLOAT   # i.e. 12
-NUC_ALPH_SIZE  = 4            # Alphabet size of nucleotides.
-RESULT_ARY_MAX = 50_000_000   # Maximum size for the result_ary.
-HIT_ARY_MAX    = 100_000      # Maximum size for the hit_ary.
+BYTES_IN_INT      = 4
+BYTES_IN_FLOAT    = 4
+BYTES_IN_HIT      = 2 * BYTES_IN_INT + 1 * BYTES_IN_FLOAT   # i.e. 12
+NUC_ALPH_SIZE     = 4            # Alphabet size of nucleotides.
+RESULT_ARY_BUFFER = 10_000_000   # Buffer for the result_ary.
+HIT_ARY_BUFFER    = 1_000_000    # Buffer for the hit_ary.
 
 # FindSim is an implementation of the SimRank logic proposed by Niels Larsen.
 # The purpose is to find similarities between query DNA/RNA sequences and a
@@ -99,7 +99,7 @@ class FindSim
     time         = Time.now
     oligo_ary    = "\0" * (NUC_ALPH_SIZE ** @opt_hash[:kmer]) * BYTES_IN_INT
     shared_ary   = "\0" * @q_size                             * BYTES_IN_INT
-    result_ary   = "\0" * RESULT_ARY_MAX                      * BYTES_IN_HIT
+    result_ary   = "\0" * RESULT_ARY_BUFFER                   * BYTES_IN_HIT
     result_count = 0
 
     Fasta.open(file, 'r') do |ios|
@@ -119,6 +119,11 @@ class FindSim
         if ((s_index + 1) % 1000) == 0 and @opt_hash[:verbose]
           $stderr.puts "Searched #{s_index + 1} sequences in #{Time.now - time} seconds (#{result_count} hits)."
         end
+
+        if result_ary.size / BYTES_IN_HIT - result_count < RESULT_ARY_BUFFER / 2
+          result_ary << "\0" * RESULT_ARY_BUFFER * BYTES_IN_HIT
+          $stderr.puts "resizing to #{result_ary.size / BYTES_IN_HIT}"
+        end
       end
     end
 
@@ -131,12 +136,12 @@ class FindSim
   def each
     sort_hits_c(@result, @result_count)
 
-    hit_ary = "\0" * HIT_ARY_MAX * BYTES_IN_HIT
+    hit_ary = "\0" * HIT_ARY_BUFFER * BYTES_IN_HIT
 
     hit_index = 0
 
     (0 ... @q_size).each do |q_index|
-      zero_ary_c(hit_ary, HIT_ARY_MAX * BYTES_IN_HIT)
+      zero_ary_c(hit_ary, HIT_ARY_BUFFER * BYTES_IN_HIT)
       hit_ary_size = get_hits_c(@result, @result_count, hit_index, hit_ary, q_index)
 
       if @opt_hash[:max_hits]