code_ruby/lib/maasha/findsim.rb

   1 # Copyright (C) 2007-2012 Martin A. Hansen.
   2
   3 # This program is free software; you can redistribute it and/or
   4 # modify it under the terms of the GNU General Public License
   5 # as published by the Free Software Foundation; either version 2
   6 # of the License, or (at your option) any later version.
   7
   8 # This program is distributed in the hope that it will be useful,
   9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  11 # GNU General Public License for more details.
  12
  13 # You should have received a copy of the GNU General Public License
  14 # along with this program; if not, write to the Free Software
  15 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  16
  17 # http://www.gnu.org/copyleft/gpl.html
  18
  19 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
  20
  21 # This software is part of the Biopieces framework (www.biopieces.org).
  22
  23 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
  24
  25 require 'inline'
  26 require 'maasha/fasta'
  27 require 'maasha/align'
  28
  29 BYTES_IN_INT   = 4
  30 BYTES_IN_FLOAT = 4
  31 BYTES_IN_HIT   = 12
  32 RESULT_ARY_MAX = 50_000_000   # Maximum size for the result_ary.
  33 HIT_ARY_MAX    = 100_000      # Maximum size for the hit_ary.
  34
  35 # FindSim is an implementation of the SimRank logic proposed by Niels Larsen.
  36 # The purpose is to find similarities between query DNA/RNA sequences and a
  37 # database of small subunit ribosomal DNA/RNA sequences. The sequence
  38 # similarities are calculated as the number of shared unique oligos/kmers
  39 # divided by the smallest number of unique oligoes in either the query or
  40 # database sequence. This yields a rough under estimate of similarity e.g. 50%
  41 # oligo similarity may correspond to 80% similarity on a nucleotide level
  42 # (needs clarification). The outcome of FindSim is a table with a row per
  43 # query sequence and the columns are the database hits sorted according to
  44 # similarity.
  45 #
  46 # Extensive use of inline C for speed.
  47 class FindSim
  48   include Enumerable
  49
  50   # Method to initialize FindSim Object.
  51   def initialize(opt_hash)
  52     @opt_hash     = opt_hash
  53     @q_size       = 0
  54     @q_ary        = nil
  55     @q_begs_ary   = nil
  56     @q_ends_ary   = nil
  57     @q_total_ary  = nil
  58     @result       = nil
  59     @result_count = 0
  60     @q_ids        = []
  61     @s_ids        = []
  62     @q_entries    = []
  63     @s_entries    = []
  64   end
  65
  66   # Method to load sequences from a query file in FASTA format
  67   # and index these for oligo based similarty search.
  68   def load_query(file)
  69     time       = Time.now
  70     q_total    = []
  71     oligo_hash = Hash.new { |h, k| h[k] = [] }
  72
  73     count = 0
  74
  75     Fasta.open(file, 'r') do |ios|
  76       ios.each do |entry|
  77         @q_ids     << entry.seq_name if @opt_hash[:query_ids]
  78         @q_entries << entry          if @opt_hash[:realign]
  79
  80         oligos = str_to_oligo_rb_ary_c(entry.seq, @opt_hash[:kmer], 1).uniq.sort
  81
  82         q_total << oligos.size
  83
  84         oligos.each do |oligo|
  85           oligo_hash[oligo] << count
  86         end
  87
  88         count += 1
  89       end
  90     end
  91
  92     @q_size = count
  93
  94     create_query_index(q_total, oligo_hash)
  95
  96     $stderr.puts "Loaded #{count} query sequences in #{(Time.now - time)} seconds." if @opt_hash[:verbose]
  97   end
  98
  99   # Method to search database or subject sequences from a FASTA file by
 100   # locating for each sequence all shared oligos with the query index.
 101   def search_db(file)
 102     time           = Time.now
 103     oligo_ary      = "\0" * (4 ** @opt_hash[:kmer]) * BYTES_IN_INT
 104     shared_ary     = "\0" * @q_size                 * BYTES_IN_INT
 105     result_ary     = "\0" * RESULT_ARY_MAX          * BYTES_IN_HIT
 106     result_count   = 0
 107
 108     Fasta.open(file, 'r') do |ios|
 109       ios.each_with_index do |entry, s_index|
 110         @s_ids     << entry.seq_name if @opt_hash[:subject_ids]
 111         @s_entries << entry          if @opt_hash[:realign]
 112
 113         zero_ary_c(oligo_ary,  (4 ** @opt_hash[:kmer]) * BYTES_IN_INT)
 114         zero_ary_c(shared_ary, @q_size                 * BYTES_IN_INT)
 115
 116         oligo_ary_size = str_to_oligo_ary_c(entry.seq, entry.len, oligo_ary, @opt_hash[:kmer], @opt_hash[:step])
 117
 118         count_shared_c(@q_ary, @q_begs_ary, @q_ends_ary, oligo_ary, oligo_ary_size, shared_ary)
 119
 120         result_count = calc_scores_c(@q_total_ary, shared_ary, @q_size, oligo_ary_size, result_ary, result_count, s_index, @opt_hash[:min_score])
 121
 122         if ((s_index + 1) % 1000) == 0 and @opt_hash[:verbose]
 123           $stderr.puts "Searched #{s_index + 1} sequences in #{Time.now - time} seconds (#{result_count} hits)."
 124         end
 125       end
 126     end
 127
 128     @result       = result_ary[0 ... result_count * BYTES_IN_HIT]
 129     @result_count = result_count
 130   end
 131
 132   # Method that for each query index yields all hits, sorted according to
 133   # decending score, as Hit objects.
 134   def each
 135     sort_hits_c(@result, @result_count)
 136
 137     hit_ary = "\0" * HIT_ARY_MAX * BYTES_IN_HIT
 138
 139     hit_index = 0
 140
 141     (0 ... @q_size).each do |q_index|
 142       zero_ary_c(hit_ary, HIT_ARY_MAX * BYTES_IN_HIT)
 143       hit_ary_size = get_hits_c(@result, @result_count, hit_index, hit_ary, q_index)
 144
 145       if @opt_hash[:max_hits]
 146         max = (hit_ary_size > @opt_hash[:max_hits]) ? @opt_hash[:max_hits] : hit_ary_size
 147       else
 148         max = hit_ary_size
 149       end
 150
 151       (0 ... max).each do |i|
 152         q_index, s_index, score = hit_ary[BYTES_IN_HIT * i ... BYTES_IN_HIT * i + BYTES_IN_HIT].unpack("IIF")
 153
 154         q_id = @opt_hash[:query_ids]   ? @q_ids[q_index] : q_index
 155         s_id = @opt_hash[:subject_ids] ? @s_ids[s_index] : s_index
 156
 157         if @opt_hash[:realign]
 158           new_score = Align.muscle([@q_entries[q_index], @s_entries[s_index]]).identity
 159           score     = new_score if new_score > score
 160         end
 161
 162         yield Hit.new(q_id, s_id, score)
 163       end
 164
 165       hit_index += hit_ary_size
 166     end
 167
 168     self
 169   end
 170
 171   private
 172
 173   # Method to create the query index for FindSim search. The index consists of
 174   # four lookup arrays which are stored as instance variables:
 175   # *  @q_total_ary holds per index the total of unique oligos for that
 176   #    particular query sequences.
 177   # *  @q_ary holds sequencially lists of query indexes so it is possible
 178   #    to lookup the list for a particular oligo and get all query indeces for
 179   #    query sequences containing this oligo.
 180   # *  @q_begs_ary holds per index the begin coordinate for the @q_ary list for
 181   #    a particular oligo.
 182   # *  @q_ends_ary holds per index the end coordinate for the @q_ary list for a
 183   #    particular oligo.
 184   def create_query_index(q_total, oligo_hash)
 185     @q_total_ary = q_total.pack("I*")
 186
 187     @q_ary = ""
 188
 189     beg        = 0
 190     oligo_begs = Array.new(4 ** @opt_hash[:kmer], 0)
 191     oligo_ends = Array.new(4 ** @opt_hash[:kmer], 0)
 192
 193     oligo_hash.each do |oligo, list|
 194       @q_ary << list.pack("I*")
 195       oligo_begs[oligo] = beg
 196       oligo_ends[oligo] = beg + list.size
 197
 198       beg += list.size
 199     end
 200
 201     @q_begs_ary = oligo_begs.pack("I*")
 202     @q_ends_ary = oligo_ends.pack("I*")
 203   end
 204
 205   # >>>>>>>>>>>>>>> RubyInline C code <<<<<<<<<<<<<<<
 206
 207   inline do |builder|
 208     # Bitmap lookup table where the index is ASCII values
 209     # and the following nucleotides are encoded as:
 210     # T or t = 1
 211     # U or u = 1
 212     # C or c = 2
 213     # G or g = 3
 214     builder.prefix %{
 215       unsigned char oligo_map[256] = {
 216         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 217         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 218         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 219         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 220         0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0,
 221         0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 222         0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0,
 223         0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 224         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 225         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 226         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 227         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 228         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 229         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 230         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 231         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 232       };
 233     }
 234
 235     # Defining a hit struct to hold hits consisting of query
 236     # sequence index, subject sequence index and score.
 237     builder.prefix %{
 238       typedef struct
 239       {
 240         unsigned int q_index;
 241         unsigned int s_index;
 242         float          score;
 243       } hit;
 244     }
 245
 246     # Qsort unsigned int comparison function.
 247     # Returns negative if b > a and positive if a > b.
 248     builder.prefix %{
 249       int uint_cmp(const void *a, const void *b)
 250       {
 251         const unsigned int *ia = (const unsigned int *) a;
 252         const unsigned int *ib = (const unsigned int *) b;
 253
 254         return *ia - *ib;
 255       }
 256     }
 257
 258     # Qsort hit struct comparision function of q_index.
 259     # Returns negative if b > a and positive if a > b.
 260     builder.prefix %{
 261       int hit_cmp_by_q_index(const void *a, const void *b)
 262       {
 263         hit *ia = (hit *) a;
 264         hit *ib = (hit *) b;
 265
 266         return (int) (ia->q_index - ib->q_index);
 267       }
 268     }
 269
 270     # Qsort hit struct comparision function of score.
 271     # Returns negative if b < a and positive if a < b.
 272     # We multiply with 1000 to maintain 2 decimal floats.
 273     builder.prefix %{
 274       int hit_cmp_by_score(const void *a, const void *b)
 275       {
 276         hit *ia = (hit *) a;
 277         hit *ib = (hit *) b;
 278
 279         return (int) (1000 * ib->score - 1000 * ia->score);
 280       }
 281     }
 282
 283     # Given a sorted unsigned integer removes all duplicate values
 284     # and move the unique values to the front of the array. Returns
 285     # the new size of the array.
 286     builder.prefix %{
 287       unsigned int uniq_ary(unsigned int *ary, unsigned int ary_size)
 288       {
 289         unsigned int new_ary_size = 0;
 290         unsigned int i            = 0;
 291         unsigned int j            = 0;
 292
 293         for (i = 1; i < ary_size; i++)
 294         {
 295           if (ary[i] != ary[j])
 296           {
 297             j++;
 298             ary[j] = ary[i]; // Move it to the front
 299           }
 300         }
 301
 302         new_ary_size = j + 1;
 303
 304         return new_ary_size;
 305       }
 306     }
 307
 308     # Method that given a byte array and its size in bytes
 309     # sets all bytes to 0.
 310     builder.c %{
 311       void zero_ary_c(
 312         VALUE _ary,       // Byte array to zero.
 313         VALUE _ary_size   // Size of array.
 314       )
 315       {
 316         char         *ary      = (char *) StringValuePtr(_ary);
 317         unsigned int  ary_size = FIX2UINT(_ary_size);
 318
 319         bzero(ary, ary_size);
 320       }
 321     }
 322
 323     # Method to sort array of hits according to q_index.
 324     builder.c %{
 325       void sort_hits_c(
 326         VALUE _ary,       // Array of hits.
 327         VALUE _ary_size   // Size of array.
 328       )
 329       {
 330         hit          *ary      = (hit *) StringValuePtr(_ary);
 331         unsigned int  ary_size = FIX2UINT(_ary_size);
 332
 333         qsort(ary, ary_size, sizeof(hit), hit_cmp_by_q_index);
 334       }
 335     }
 336
 337     # Method that given a string (char array) encodes all kmers overlapping
 338     # with a given step size as integers that are saved in an unsigned integer
 339     # array. Then the array is sorted and uniq'ed and the number of unique
 340     # elements is returned.
 341     builder.c %{
 342       VALUE str_to_oligo_ary_c(
 343         VALUE _str,        // DNA or RNA string.
 344         VALUE _str_size,   // String length.
 345         VALUE _ary,        // Array to store result in.
 346         VALUE _kmer,       // Size of kmers/oligos.
 347         VALUE _step        // Step size for overlapping kmers.
 348       )
 349       {
 350         unsigned char *str      = StringValuePtr(_str);
 351         unsigned int   str_size = FIX2UINT(_str_size);
 352         unsigned int  *ary      = (unsigned int *) StringValuePtr(_ary);
 353         unsigned int   kmer     = FIX2UINT(_kmer);
 354         unsigned int   step     = FIX2UINT(_step);
 355
 356         unsigned int ary_size = 0;
 357         unsigned int mask     = (1 << (2 * kmer)) - 1;
 358         unsigned int bin      = 0;
 359         unsigned int i        = 0;
 360
 361         for (i = 0; i < kmer; i++)
 362         {
 363           bin <<= 2;
 364           bin |= oligo_map[str[i]];
 365         }
 366
 367         ary[ary_size++] = bin;
 368
 369         for (i = kmer; i < str_size; i++)
 370         {
 371           bin <<= 2;
 372           bin |= oligo_map[str[i]];
 373
 374           if ((i % step) == 0) {
 375             ary[ary_size++] = (bin & mask);
 376           }
 377         }
 378
 379         qsort(ary, ary_size, sizeof(unsigned int), uint_cmp);
 380         ary_size = uniq_ary(ary, ary_size);
 381
 382         return UINT2NUM(ary_size);
 383       }
 384     }
 385
 386     # Method that given a string (char array) encodes all kmers overlapping
 387     # with a given step size as integers that are pushed onto a Ruby array
 388     # which is returned.
 389     # TODO should have an option for skipping oligos with ambiguity codes.
 390     builder.c %{
 391       VALUE str_to_oligo_rb_ary_c(
 392         VALUE _str,    // DNA or RNA string.
 393         VALUE _kmer,   // Size of kmer or oligo.
 394         VALUE _step    // Step size for overlapping kmers.
 395       )
 396       {
 397         char         *str   = StringValuePtr(_str);
 398         unsigned int  kmer  = FIX2UINT(_kmer);
 399         unsigned int  step  = FIX2UINT(_step);
 400
 401         VALUE         array = rb_ary_new();
 402         long          len   = strlen(str);
 403         unsigned int  mask  = (1 << (2 * kmer)) - 1;
 404         unsigned int  bin   = 0;
 405         unsigned int  i     = 0;
 406
 407         for (i = 0; i < kmer; i++)
 408         {
 409           bin <<= 2;
 410           bin |= oligo_map[str[i]];
 411         }
 412
 413         rb_ary_push(array, INT2FIX(bin));
 414
 415         for (i = kmer; i < len; i++)
 416         {
 417           bin <<= 2;
 418           bin |= oligo_map[str[i]];
 419
 420           if ((i % step) == 0) {
 421             rb_ary_push(array, INT2FIX((bin & mask)));
 422           }
 423         }
 424
 425         return array;
 426       }
 427     }
 428
 429     # Method that counts all shared oligos/kmers between a subject sequence and
 430     # all query sequences. For each oligo in the subject sequence (s_ary) the
 431     # index of all query sequences containing this oligo is found the the q_ary
 432     # where this information is stored sequentially in intervals. For the
 433     # particula oligo the interval is looked up in the q_beg and q_end arrays.
 434     # Shared oligos are recorded in the shared_ary.
 435     builder.c %{
 436       void count_shared_c(
 437         VALUE _q_ary,        // Lookup array with q_index lists.
 438         VALUE _q_begs_ary,   // Lookup array with interval begins of q_index lists.
 439         VALUE _q_ends_ary,   // Lookup array with interval ends of q_index lists.
 440         VALUE _s_ary,        // Subject sequence oligo array.
 441         VALUE _s_ary_size,   // Subject sequence oligo array size.
 442         VALUE _shared_ary    // Array with shared oligo counts.
 443       )
 444       {
 445         unsigned int *q_ary      = (unsigned int *) StringValuePtr(_q_ary);
 446         unsigned int *q_begs_ary = (unsigned int *) StringValuePtr(_q_begs_ary);
 447         unsigned int *q_ends_ary = (unsigned int *) StringValuePtr(_q_ends_ary);
 448         unsigned int *s_ary      = (unsigned int *) StringValuePtr(_s_ary);
 449         unsigned int  s_ary_size = FIX2UINT(_s_ary_size);
 450         unsigned int *shared_ary = (unsigned int *) StringValuePtr(_shared_ary);
 451
 452         unsigned int oligo = 0;
 453         unsigned int s     = 0;
 454         unsigned int q     = 0;
 455         unsigned int q_beg = 0;
 456         unsigned int q_end = 0;
 457
 458         for (s = 0; s < s_ary_size; s++)  // each oligo do
 459         {
 460           oligo = s_ary[s];
 461
 462           q_beg = q_begs_ary[oligo];
 463           q_end = q_ends_ary[oligo];
 464
 465           for (q = q_beg; q < q_end; q++)
 466           {
 467             shared_ary[q_ary[q]]++;
 468           }
 469         }
 470       }
 471     }
 472
 473     # Method to calculate the score for all query vs subject sequence comparisons.
 474     # The score is calculated as the number of unique shared oligos divided by the
 475     # smallest number of unique oligos in either the subject or query sequence.
 476     # Only scores greater than or equal to a given minimum is stored in the hit
 477     # array and the size of the hit array is returned.
 478     builder.c %{
 479       VALUE calc_scores_c(
 480         VALUE _q_total_ary,    // Lookup array with total oligo counts.
 481         VALUE _shared_ary,     // Lookup arary with shared oligo counts.
 482         VALUE _q_size,         // Number of query sequences.
 483         VALUE _s_count,        // Number of unique oligos in subject sequence.
 484         VALUE _hit_ary,        // Result array.
 485         VALUE _hit_ary_size,   // Result array size.
 486         VALUE _s_index,        // Subject sequence index.
 487         VALUE _min_score       // Minimum score to include.
 488       )
 489       {
 490         unsigned int *q_total_ary  = (unsigned int *) StringValuePtr(_q_total_ary);
 491         unsigned int *shared_ary   = (unsigned int *) StringValuePtr(_shared_ary);
 492         unsigned int  q_size       = FIX2UINT(_q_size);
 493         unsigned int  s_count      = FIX2UINT(_s_count);
 494         hit          *hit_ary      = (hit *) StringValuePtr(_hit_ary);
 495         unsigned int  hit_ary_size = FIX2UINT(_hit_ary_size);
 496         unsigned int  s_index      = FIX2UINT(_s_index);
 497         float         min_score    = (float) NUM2DBL(_min_score);
 498
 499         unsigned int  q_index      = 0;
 500         unsigned int  q_count      = 0;
 501         unsigned int  shared_count = 0;
 502         unsigned int  total_count  = 0;
 503         float         score        = 0;
 504         hit           new_score    = {0, 0, 0};
 505
 506         for (q_index = 0; q_index < q_size; q_index++)
 507         {
 508           shared_count = shared_ary[q_index];
 509           q_count      = q_total_ary[q_index];
 510           total_count  = (s_count < q_count) ? s_count : q_count;
 511
 512           score = shared_count / (float) total_count;
 513
 514           if (score >= min_score)
 515           {
 516             new_score.q_index = q_index;
 517             new_score.s_index = s_index;
 518             new_score.score   = score;
 519
 520             hit_ary[hit_ary_size] = new_score;
 521
 522             hit_ary_size++;
 523           }
 524         }
 525
 526         return UINT2NUM(hit_ary_size);
 527       }
 528     }
 529
 530     # Method that fetches all hits matching a given q_index value
 531     # from the result_ary. Since the result_ary is sorted according
 532     # to q_index we start at a given hit_index and fetch until
 533     # the next hit with a different q_index. The fetched hits are
 534     # stored in an array which is sorted according to decreasing
 535     # score. The size of this array is returned.
 536     builder.c %{
 537       VALUE get_hits_c(
 538         VALUE _result_ary,        // Result array
 539         VALUE _result_ary_size,   // Result array size
 540         VALUE _hit_index,         // Offset position in hit_ary to search from.
 541         VALUE _hit_ary,           // Array to store hits in.
 542         VALUE _q_index)           // q_index to fetch.
 543       {
 544         hit          *result_ary      = (hit *) StringValuePtr(_result_ary);
 545         unsigned int  result_ary_size = FIX2UINT(_result_ary_size);
 546         unsigned int  hit_index       = FIX2UINT(_hit_index);
 547         hit          *hit_ary         = (hit *) StringValuePtr(_hit_ary);
 548         unsigned int  q_index         = FIX2UINT(_q_index);
 549         hit           new_hit         = {0, 0, 0};
 550         unsigned int  hit_ary_size    = 0;
 551
 552         while ((hit_index + hit_ary_size) < result_ary_size)
 553         {
 554           new_hit = result_ary[hit_index + hit_ary_size];
 555
 556           if (new_hit.q_index == q_index)
 557           {
 558             hit_ary[hit_ary_size] = new_hit;
 559
 560             hit_ary_size++;
 561           }
 562           else
 563           {
 564             break;
 565           }
 566         }
 567
 568         qsort(hit_ary, hit_ary_size, sizeof(hit), hit_cmp_by_score);
 569
 570         return UINT2NUM(hit_ary_size);
 571       }
 572     }
 573   end
 574
 575   # >>>>>>>>>>>>>>> Embedded classes <<<<<<<<<<<<<<<
 576
 577   # Class for holding score information.
 578   class Hit
 579     attr_reader :q_id, :s_id, :score
 580
 581     # Method to initialize Hit object with
 582     # query and subject id a score.
 583     def initialize(q_id, s_id, score)
 584       @q_id  = q_id
 585       @s_id  = s_id
 586       @score = score
 587     end
 588
 589     # Method for outputting score objects.
 590     def to_s
 591       "#{@q_id}:#{@s_id}:#{@score.round(2)}"
 592     end
 593
 594     # Method to convert to a Biopiece record.
 595     def to_bp
 596       hash = {}
 597       hash[:REC_TYPE] = "findsim"
 598       hash[:Q_ID]     = @q_id
 599       hash[:S_ID]     = @s_id
 600       hash[:SCORE]    = @score.round(2)
 601
 602       hash
 603     end
 604   end
 605 end
 606
 607 __END__