From 447d29efe53556e3d237cb1e1e17b94fe817c574 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Tue, 9 Oct 2012 08:54:17 +0000 Subject: [PATCH] cleanup of findsim.rb git-svn-id: http://biopieces.googlecode.com/svn/trunk@1951 74ccb610-7750-0410-82ae-013aeee3265d --- code_ruby/lib/maasha/findsim.rb | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/code_ruby/lib/maasha/findsim.rb b/code_ruby/lib/maasha/findsim.rb index 360bd9c..c1da3d4 100644 --- a/code_ruby/lib/maasha/findsim.rb +++ b/code_ruby/lib/maasha/findsim.rb @@ -40,11 +40,7 @@ HIT_ARY_MAX = 100_000 # Maximum size for the hit_ary. # divided by the smallest number of unique oligoes in either the query or # database sequence. This yields a rough under estimate of similarity e.g. 50% # oligo similarity may correspond to 80% similarity on a nucleotide level -# (needs clarification). The outcome of FindSim is a table with a row per -# query sequence and the columns are the database hits sorted according to -# similarity. -# -# Extensive use of inline C for speed. +# (needs clarification). class FindSim include Enumerable @@ -100,11 +96,11 @@ class FindSim # Method to search database or subject sequences from a FASTA file by # locating for each sequence all shared oligos with the query index. def search_db(file) - time = Time.now - oligo_ary = "\0" * (NUC_ALPH_SIZE ** @opt_hash[:kmer]) * BYTES_IN_INT - shared_ary = "\0" * @q_size * BYTES_IN_INT - result_ary = "\0" * RESULT_ARY_MAX * BYTES_IN_HIT - result_count = 0 + time = Time.now + oligo_ary = "\0" * (NUC_ALPH_SIZE ** @opt_hash[:kmer]) * BYTES_IN_INT + shared_ary = "\0" * @q_size * BYTES_IN_INT + result_ary = "\0" * RESULT_ARY_MAX * BYTES_IN_HIT + result_count = 0 Fasta.open(file, 'r') do |ios| ios.each_with_index do |entry, s_index| @@ -192,8 +188,7 @@ class FindSim # particular oligo. def create_query_index(q_total, oligo_hash) @q_total_ary = q_total.pack("I*") - - @q_ary = "" + @q_ary = "" beg = 0 oligo_begs = Array.new(NUC_ALPH_SIZE ** @opt_hash[:kmer], 0) @@ -437,7 +432,7 @@ class FindSim # Method that counts all shared oligos/kmers between a subject sequence and # all query sequences. For each oligo in the subject sequence (s_ary) the - # index of all query sequences containing this oligo is found the the q_ary + # index of all query sequences containing this oligo is found for the q_ary # where this information is stored sequentially in intervals. For the # particula oligo the interval is looked up in the q_beg and q_end arrays. # Shared oligos are recorded in the shared_ary. -- 2.39.5