cleanup of findsim.rb

author martinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>

Tue, 9 Oct 2012 08:54:17 +0000 (08:54 +0000)

committer martinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>

Tue, 9 Oct 2012 08:54:17 +0000 (08:54 +0000)
author martinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Tue, 9 Oct 2012 08:54:17 +0000 (08:54 +0000)
committer martinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Tue, 9 Oct 2012 08:54:17 +0000 (08:54 +0000)
diff --git a/code_ruby/lib/maasha/findsim.rb b/code_ruby/lib/maasha/findsim.rb

index 360bd9cf495bbe7c1c010a2f4bfed4e5541e8ebd..c1da3d43cdb7500235213001c8a42de44a24f605 100644 (file)
--- a/code_ruby/lib/maasha/findsim.rb
+++ b/code_ruby/lib/maasha/findsim.rb
@@ -40,11 +40,7 @@ HIT_ARY_MAX    = 100_000      # Maximum size for the hit_ary.
  # divided by the smallest number of unique oligoes in either the query or
  # database sequence. This yields a rough under estimate of similarity e.g. 50%
  # oligo similarity may correspond to 80% similarity on a nucleotide level
-# (needs clarification). The outcome of FindSim is a table with a row per
-# query sequence and the columns are the database hits sorted according to
-# similarity.
-#
-# Extensive use of inline C for speed.
+# (needs clarification). 
  class FindSim
    include Enumerable
  
@@ -100,11 +96,11 @@ class FindSim
    # Method to search database or subject sequences from a FASTA file by
    # locating for each sequence all shared oligos with the query index.
    def search_db(file)
-    time           = Time.now
-    oligo_ary      = "\0" * (NUC_ALPH_SIZE ** @opt_hash[:kmer]) * BYTES_IN_INT
-    shared_ary     = "\0" * @q_size                             * BYTES_IN_INT
-    result_ary     = "\0" * RESULT_ARY_MAX                      * BYTES_IN_HIT
-    result_count   = 0
+    time         = Time.now
+    oligo_ary    = "\0" * (NUC_ALPH_SIZE ** @opt_hash[:kmer]) * BYTES_IN_INT
+    shared_ary   = "\0" * @q_size                             * BYTES_IN_INT
+    result_ary   = "\0" * RESULT_ARY_MAX                      * BYTES_IN_HIT
+    result_count = 0
  
      Fasta.open(file, 'r') do |ios|
        ios.each_with_index do |entry, s_index|
@@ -192,8 +188,7 @@ class FindSim
    #    particular oligo.
    def create_query_index(q_total, oligo_hash)
      @q_total_ary = q_total.pack("I*")
-
-    @q_ary = ""
+    @q_ary       = ""
  
      beg        = 0
      oligo_begs = Array.new(NUC_ALPH_SIZE ** @opt_hash[:kmer], 0)
@@ -437,7 +432,7 @@ class FindSim
  
      # Method that counts all shared oligos/kmers between a subject sequence and
      # all query sequences. For each oligo in the subject sequence (s_ary) the
-    # index of all query sequences containing this oligo is found the the q_ary
+    # index of all query sequences containing this oligo is found for the q_ary
      # where this information is stored sequentially in intervals. For the
      # particula oligo the interval is looked up in the q_beg and q_end arrays.
      # Shared oligos are recorded in the shared_ary.
author	martinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
	Tue, 9 Oct 2012 08:54:17 +0000 (08:54 +0000)
committer	martinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
	Tue, 9 Oct 2012 08:54:17 +0000 (08:54 +0000)