From 447d29efe53556e3d237cb1e1e17b94fe817c574 Mon Sep 17 00:00:00 2001
From: martinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Date: Tue, 9 Oct 2012 08:54:17 +0000
Subject: [PATCH] cleanup of findsim.rb

git-svn-id: http://biopieces.googlecode.com/svn/trunk@1951 74ccb610-7750-0410-82ae-013aeee3265d
---
 code_ruby/lib/maasha/findsim.rb | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/code_ruby/lib/maasha/findsim.rb b/code_ruby/lib/maasha/findsim.rb
index 360bd9c..c1da3d4 100644
--- a/code_ruby/lib/maasha/findsim.rb
+++ b/code_ruby/lib/maasha/findsim.rb
@@ -40,11 +40,7 @@ HIT_ARY_MAX    = 100_000      # Maximum size for the hit_ary.
 # divided by the smallest number of unique oligoes in either the query or
 # database sequence. This yields a rough under estimate of similarity e.g. 50%
 # oligo similarity may correspond to 80% similarity on a nucleotide level
-# (needs clarification). The outcome of FindSim is a table with a row per
-# query sequence and the columns are the database hits sorted according to
-# similarity.
-#
-# Extensive use of inline C for speed.
+# (needs clarification). 
 class FindSim
   include Enumerable
 
@@ -100,11 +96,11 @@ class FindSim
   # Method to search database or subject sequences from a FASTA file by
   # locating for each sequence all shared oligos with the query index.
   def search_db(file)
-    time           = Time.now
-    oligo_ary      = "\0" * (NUC_ALPH_SIZE ** @opt_hash[:kmer]) * BYTES_IN_INT
-    shared_ary     = "\0" * @q_size                             * BYTES_IN_INT
-    result_ary     = "\0" * RESULT_ARY_MAX                      * BYTES_IN_HIT
-    result_count   = 0
+    time         = Time.now
+    oligo_ary    = "\0" * (NUC_ALPH_SIZE ** @opt_hash[:kmer]) * BYTES_IN_INT
+    shared_ary   = "\0" * @q_size                             * BYTES_IN_INT
+    result_ary   = "\0" * RESULT_ARY_MAX                      * BYTES_IN_HIT
+    result_count = 0
 
     Fasta.open(file, 'r') do |ios|
       ios.each_with_index do |entry, s_index|
@@ -192,8 +188,7 @@ class FindSim
   #    particular oligo.
   def create_query_index(q_total, oligo_hash)
     @q_total_ary = q_total.pack("I*")
-
-    @q_ary = ""
+    @q_ary       = ""
 
     beg        = 0
     oligo_begs = Array.new(NUC_ALPH_SIZE ** @opt_hash[:kmer], 0)
@@ -437,7 +432,7 @@ class FindSim
 
     # Method that counts all shared oligos/kmers between a subject sequence and
     # all query sequences. For each oligo in the subject sequence (s_ary) the
-    # index of all query sequences containing this oligo is found the the q_ary
+    # index of all query sequences containing this oligo is found for the q_ary
     # where this information is stored sequentially in intervals. For the
     # particula oligo the interval is looked up in the q_beg and q_end arrays.
     # Shared oligos are recorded in the shared_ary.
-- 
2.39.5