]> git.donarmstrong.com Git - biopieces.git/commitdiff
added max_diversity swith to findsim.rb
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Mon, 25 Jun 2012 09:01:26 +0000 (09:01 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Mon, 25 Jun 2012 09:01:26 +0000 (09:01 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@1844 74ccb610-7750-0410-82ae-013aeee3265d

code_ruby/lib/maasha/findsim.rb

index 6b1ad23552ed1c434cfa161f042229102c7d8709..d33229d221bc3754f7a9a23673ad9dc384307e27 100644 (file)
@@ -28,7 +28,8 @@ require 'maasha/align'
 
 BYTES_IN_INT   = 4
 BYTES_IN_FLOAT = 4
-BYTES_IN_HIT   = 12
+BYTES_IN_HIT   = 2 * BYTES_IN_INT + 1 * BYTES_IN_FLOAT   # i.e. 12
+NUC_ALPH_SIZE  = 4            # Alphabet size of nucleotides.
 RESULT_ARY_MAX = 50_000_000   # Maximum size for the result_ary.
 HIT_ARY_MAX    = 100_000      # Maximum size for the hit_ary.
 
@@ -100,9 +101,9 @@ class FindSim
   # locating for each sequence all shared oligos with the query index.
   def search_db(file)
     time           = Time.now
-    oligo_ary      = "\0" * (4 ** @opt_hash[:kmer]) * BYTES_IN_INT
-    shared_ary     = "\0" * @q_size                 * BYTES_IN_INT
-    result_ary     = "\0" * RESULT_ARY_MAX          * BYTES_IN_HIT
+    oligo_ary      = "\0" * (NUC_ALPH_SIZE ** @opt_hash[:kmer]) * BYTES_IN_INT
+    shared_ary     = "\0" * @q_size                             * BYTES_IN_INT
+    result_ary     = "\0" * RESULT_ARY_MAX                      * BYTES_IN_HIT
     result_count   = 0
 
     Fasta.open(file, 'r') do |ios|
@@ -110,8 +111,8 @@ class FindSim
         @s_ids     << entry.seq_name if @opt_hash[:subject_ids]
         @s_entries << entry          if @opt_hash[:realign]
 
-        zero_ary_c(oligo_ary,  (4 ** @opt_hash[:kmer]) * BYTES_IN_INT)
-        zero_ary_c(shared_ary, @q_size                 * BYTES_IN_INT)
+        zero_ary_c(oligo_ary,  (NUC_ALPH_SIZE ** @opt_hash[:kmer]) * BYTES_IN_INT)
+        zero_ary_c(shared_ary, @q_size                             * BYTES_IN_INT)
 
         oligo_ary_size = str_to_oligo_ary_c(entry.seq, entry.len, oligo_ary, @opt_hash[:kmer], @opt_hash[:step])
 
@@ -159,6 +160,11 @@ class FindSim
           score     = new_score if new_score > score
         end
 
+        if @opt_hash[:max_diversity]
+          best_score = score if i == 0
+          break if best_score - score > @opt_hash[:max_diversity]
+        end
+
         yield Hit.new(q_id, s_id, score)
       end
 
@@ -187,8 +193,8 @@ class FindSim
     @q_ary = ""
 
     beg        = 0
-    oligo_begs = Array.new(4 ** @opt_hash[:kmer], 0)
-    oligo_ends = Array.new(4 ** @opt_hash[:kmer], 0)
+    oligo_begs = Array.new(NUC_ALPH_SIZE ** @opt_hash[:kmer], 0)
+    oligo_ends = Array.new(NUC_ALPH_SIZE ** @opt_hash[:kmer], 0)
 
     oligo_hash.each do |oligo, list|
       @q_ary << list.pack("I*")