code_ruby/lib/maasha/findsim.rb

   1 # Copyright (C) 2007-2012 Martin A. Hansen.
   2
   3 # This program is free software; you can redistribute it and/or
   4 # modify it under the terms of the GNU General Public License
   5 # as published by the Free Software Foundation; either version 2
   6 # of the License, or (at your option) any later version.
   7
   8 # This program is distributed in the hope that it will be useful,
   9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  11 # GNU General Public License for more details.
  12
  13 # You should have received a copy of the GNU General Public License
  14 # along with this program; if not, write to the Free Software
  15 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  16
  17 # http://www.gnu.org/copyleft/gpl.html
  18
  19 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
  20
  21 # This software is part of the Biopieces framework (www.biopieces.org).
  22
  23 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
  24
  25 require 'inline'
  26 require 'maasha/fasta'
  27 require 'maasha/align'
  28
  29 BYTES_IN_INT   = 4
  30 BYTES_IN_FLOAT = 4
  31 BYTES_IN_HIT   = 2 * BYTES_IN_INT + 1 * BYTES_IN_FLOAT   # i.e. 12
  32 NUC_ALPH_SIZE  = 4            # Alphabet size of nucleotides.
  33 RESULT_ARY_MAX = 50_000_000   # Maximum size for the result_ary.
  34 HIT_ARY_MAX    = 100_000      # Maximum size for the hit_ary.
  35
  36 # FindSim is an implementation of the SimRank logic proposed by Niels Larsen.
  37 # The purpose is to find similarities between query DNA/RNA sequences and a
  38 # database of small subunit ribosomal DNA/RNA sequences. The sequence
  39 # similarities are calculated as the number of shared unique oligos/kmers
  40 # divided by the smallest number of unique oligoes in either the query or
  41 # database sequence. This yields a rough under estimate of similarity e.g. 50%
  42 # oligo similarity may correspond to 80% similarity on a nucleotide level
  43 # (needs clarification). The outcome of FindSim is a table with a row per
  44 # query sequence and the columns are the database hits sorted according to
  45 # similarity.
  46 #
  47 # Extensive use of inline C for speed.
  48 class FindSim
  49   include Enumerable
  50
  51   # Method to initialize FindSim Object.
  52   def initialize(opt_hash)
  53     @opt_hash     = opt_hash
  54     @q_size       = 0
  55     @q_ary        = nil
  56     @q_begs_ary   = nil
  57     @q_ends_ary   = nil
  58     @q_total_ary  = nil
  59     @result       = nil
  60     @result_count = 0
  61     @q_ids        = []
  62     @s_ids        = []
  63     @q_entries    = []
  64     @s_entries    = []
  65   end
  66
  67   # Method to load sequences from a query file in FASTA format
  68   # and index these for oligo based similarty search.
  69   def load_query(file)
  70     time       = Time.now
  71     q_total    = []
  72     oligo_hash = Hash.new { |h, k| h[k] = [] }
  73
  74     count = 0
  75
  76     Fasta.open(file, 'r') do |ios|
  77       ios.each do |entry|
  78         @q_ids     << entry.seq_name if @opt_hash[:query_ids]
  79         @q_entries << entry          if @opt_hash[:realign]
  80
  81         oligos = str_to_oligo_rb_ary_c(entry.seq, @opt_hash[:kmer], 1).uniq.sort
  82
  83         q_total << oligos.size
  84
  85         oligos.each do |oligo|
  86           oligo_hash[oligo] << count
  87         end
  88
  89         count += 1
  90       end
  91     end
  92
  93     @q_size = count
  94
  95     create_query_index(q_total, oligo_hash)
  96
  97     $stderr.puts "Loaded #{count} query sequences in #{(Time.now - time)} seconds." if @opt_hash[:verbose]
  98   end
  99
 100   # Method to search database or subject sequences from a FASTA file by
 101   # locating for each sequence all shared oligos with the query index.
 102   def search_db(file)
 103     time           = Time.now
 104     oligo_ary      = "\0" * (NUC_ALPH_SIZE ** @opt_hash[:kmer]) * BYTES_IN_INT
 105     shared_ary     = "\0" * @q_size                             * BYTES_IN_INT
 106     result_ary     = "\0" * RESULT_ARY_MAX                      * BYTES_IN_HIT
 107     result_count   = 0
 108
 109     Fasta.open(file, 'r') do |ios|
 110       ios.each_with_index do |entry, s_index|
 111         @s_ids     << entry.seq_name if @opt_hash[:subject_ids]
 112         @s_entries << entry          if @opt_hash[:realign]
 113
 114         zero_ary_c(oligo_ary,  (NUC_ALPH_SIZE ** @opt_hash[:kmer]) * BYTES_IN_INT)
 115         zero_ary_c(shared_ary, @q_size                             * BYTES_IN_INT)
 116
 117         oligo_ary_size = str_to_oligo_ary_c(entry.seq, entry.len, oligo_ary, @opt_hash[:kmer], @opt_hash[:step])
 118
 119         count_shared_c(@q_ary, @q_begs_ary, @q_ends_ary, oligo_ary, oligo_ary_size, shared_ary)
 120
 121         result_count = calc_scores_c(@q_total_ary, shared_ary, @q_size, oligo_ary_size, result_ary, result_count, s_index, @opt_hash[:min_score])
 122
 123         if ((s_index + 1) % 1000) == 0 and @opt_hash[:verbose]
 124           $stderr.puts "Searched #{s_index + 1} sequences in #{Time.now - time} seconds (#{result_count} hits)."
 125         end
 126       end
 127     end
 128
 129     @result       = result_ary[0 ... result_count * BYTES_IN_HIT]
 130     @result_count = result_count
 131   end
 132
 133   # Method that for each query index yields all hits, sorted according to
 134   # decending score, as Hit objects.
 135   def each
 136     sort_hits_c(@result, @result_count)
 137
 138     hit_ary = "\0" * HIT_ARY_MAX * BYTES_IN_HIT
 139
 140     hit_index = 0
 141
 142     (0 ... @q_size).each do |q_index|
 143       zero_ary_c(hit_ary, HIT_ARY_MAX * BYTES_IN_HIT)
 144       hit_ary_size = get_hits_c(@result, @result_count, hit_index, hit_ary, q_index)
 145
 146       if @opt_hash[:max_hits]
 147         max = (hit_ary_size > @opt_hash[:max_hits]) ? @opt_hash[:max_hits] : hit_ary_size
 148       else
 149         max = hit_ary_size
 150       end
 151
 152       (0 ... max).each do |i|
 153         q_index, s_index, score = hit_ary[BYTES_IN_HIT * i ... BYTES_IN_HIT * i + BYTES_IN_HIT].unpack("IIF")
 154
 155         q_id = @opt_hash[:query_ids]   ? @q_ids[q_index] : q_index
 156         s_id = @opt_hash[:subject_ids] ? @s_ids[s_index] : s_index
 157
 158         if @opt_hash[:realign]
 159           new_score = Align.muscle([@q_entries[q_index], @s_entries[s_index]]).identity
 160           score     = new_score if new_score > score
 161         end
 162
 163         if @opt_hash[:max_diversity]
 164           best_score = score if i == 0
 165           break if best_score - score > @opt_hash[:max_diversity]
 166         end
 167
 168         yield Hit.new(q_id, s_id, score)
 169       end
 170
 171       hit_index += hit_ary_size
 172     end
 173
 174     self
 175   end
 176
 177   private
 178
 179   # Method to create the query index for FindSim search. The index consists of
 180   # four lookup arrays which are stored as instance variables:
 181   # *  @q_total_ary holds per index the total of unique oligos for that
 182   #    particular query sequences.
 183   # *  @q_ary holds sequencially lists of query indexes so it is possible
 184   #    to lookup the list for a particular oligo and get all query indeces for
 185   #    query sequences containing this oligo.
 186   # *  @q_begs_ary holds per index the begin coordinate for the @q_ary list for
 187   #    a particular oligo.
 188   # *  @q_ends_ary holds per index the end coordinate for the @q_ary list for a
 189   #    particular oligo.
 190   def create_query_index(q_total, oligo_hash)
 191     @q_total_ary = q_total.pack("I*")
 192
 193     @q_ary = ""
 194
 195     beg        = 0
 196     oligo_begs = Array.new(NUC_ALPH_SIZE ** @opt_hash[:kmer], 0)
 197     oligo_ends = Array.new(NUC_ALPH_SIZE ** @opt_hash[:kmer], 0)
 198
 199     oligo_hash.each do |oligo, list|
 200       @q_ary << list.pack("I*")
 201       oligo_begs[oligo] = beg
 202       oligo_ends[oligo] = beg + list.size
 203
 204       beg += list.size
 205     end
 206
 207     @q_begs_ary = oligo_begs.pack("I*")
 208     @q_ends_ary = oligo_ends.pack("I*")
 209   end
 210
 211   # >>>>>>>>>>>>>>> RubyInline C code <<<<<<<<<<<<<<<
 212
 213   inline do |builder|
 214     # Bitmap lookup table where the index is ASCII values
 215     # and the following nucleotides are encoded as:
 216     # T or t = 1
 217     # U or u = 1
 218     # C or c = 2
 219     # G or g = 3
 220     builder.prefix %{
 221       unsigned char oligo_map[256] = {
 222         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 223         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 224         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 225         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 226         0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0,
 227         0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 228         0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0,
 229         0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 230         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 231         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 232         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 233         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 234         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 235         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 236         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 237         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 238       };
 239     }
 240
 241     # Defining a hit struct to hold hits consisting of query
 242     # sequence index, subject sequence index and score.
 243     builder.prefix %{
 244       typedef struct
 245       {
 246         unsigned int q_index;
 247         unsigned int s_index;
 248         float          score;
 249       } hit;
 250     }
 251
 252     # Qsort unsigned int comparison function.
 253     # Returns negative if b > a and positive if a > b.
 254     builder.prefix %{
 255       int uint_cmp(const void *a, const void *b)
 256       {
 257         const unsigned int *ia = (const unsigned int *) a;
 258         const unsigned int *ib = (const unsigned int *) b;
 259
 260         return *ia - *ib;
 261       }
 262     }
 263
 264     # Qsort hit struct comparision function of q_index.
 265     # Returns negative if b > a and positive if a > b.
 266     builder.prefix %{
 267       int hit_cmp_by_q_index(const void *a, const void *b)
 268       {
 269         hit *ia = (hit *) a;
 270         hit *ib = (hit *) b;
 271
 272         return (int) (ia->q_index - ib->q_index);
 273       }
 274     }
 275
 276     # Qsort hit struct comparision function of score.
 277     # Returns negative if b < a and positive if a < b.
 278     # We multiply with 1000 to maintain 2 decimal floats.
 279     builder.prefix %{
 280       int hit_cmp_by_score(const void *a, const void *b)
 281       {
 282         hit *ia = (hit *) a;
 283         hit *ib = (hit *) b;
 284
 285         return (int) (1000 * ib->score - 1000 * ia->score);
 286       }
 287     }
 288
 289     # Given a sorted unsigned integer removes all duplicate values
 290     # and move the unique values to the front of the array. Returns
 291     # the new size of the array.
 292     builder.prefix %{
 293       unsigned int uniq_ary(unsigned int *ary, unsigned int ary_size)
 294       {
 295         unsigned int new_ary_size = 0;
 296         unsigned int i            = 0;
 297         unsigned int j            = 0;
 298
 299         for (i = 1; i < ary_size; i++)
 300         {
 301           if (ary[i] != ary[j])
 302           {
 303             j++;
 304             ary[j] = ary[i]; // Move it to the front
 305           }
 306         }
 307
 308         new_ary_size = j + 1;
 309
 310         return new_ary_size;
 311       }
 312     }
 313
 314     # Method that given a byte array and its size in bytes
 315     # sets all bytes to 0.
 316     builder.c %{
 317       void zero_ary_c(
 318         VALUE _ary,       // Byte array to zero.
 319         VALUE _ary_size   // Size of array.
 320       )
 321       {
 322         char         *ary      = (char *) StringValuePtr(_ary);
 323         unsigned int  ary_size = FIX2UINT(_ary_size);
 324
 325         bzero(ary, ary_size);
 326       }
 327     }
 328
 329     # Method to sort array of hits according to q_index.
 330     builder.c %{
 331       void sort_hits_c(
 332         VALUE _ary,       // Array of hits.
 333         VALUE _ary_size   // Size of array.
 334       )
 335       {
 336         hit          *ary      = (hit *) StringValuePtr(_ary);
 337         unsigned int  ary_size = FIX2UINT(_ary_size);
 338
 339         qsort(ary, ary_size, sizeof(hit), hit_cmp_by_q_index);
 340       }
 341     }
 342
 343     # Method that given a string (char array) encodes all kmers overlapping
 344     # with a given step size as integers that are saved in an unsigned integer
 345     # array. Then the array is sorted and uniq'ed and the number of unique
 346     # elements is returned.
 347     builder.c %{
 348       VALUE str_to_oligo_ary_c(
 349         VALUE _str,        // DNA or RNA string.
 350         VALUE _str_size,   // String length.
 351         VALUE _ary,        // Array to store result in.
 352         VALUE _kmer,       // Size of kmers/oligos.
 353         VALUE _step        // Step size for overlapping kmers.
 354       )
 355       {
 356         unsigned char *str      = StringValuePtr(_str);
 357         unsigned int   str_size = FIX2UINT(_str_size);
 358         unsigned int  *ary      = (unsigned int *) StringValuePtr(_ary);
 359         unsigned int   kmer     = FIX2UINT(_kmer);
 360         unsigned int   step     = FIX2UINT(_step);
 361
 362         unsigned int ary_size = 0;
 363         unsigned int mask     = (1 << (2 * kmer)) - 1;
 364         unsigned int bin      = 0;
 365         unsigned int i        = 0;
 366
 367         for (i = 0; i < kmer; i++)
 368         {
 369           bin <<= 2;
 370           bin |= oligo_map[str[i]];
 371         }
 372
 373         ary[ary_size++] = bin;
 374
 375         for (i = kmer; i < str_size; i++)
 376         {
 377           bin <<= 2;
 378           bin |= oligo_map[str[i]];
 379
 380           if ((i % step) == 0) {
 381             ary[ary_size++] = (bin & mask);
 382           }
 383         }
 384
 385         qsort(ary, ary_size, sizeof(unsigned int), uint_cmp);
 386         ary_size = uniq_ary(ary, ary_size);
 387
 388         return UINT2NUM(ary_size);
 389       }
 390     }
 391
 392     # Method that given a string (char array) encodes all kmers overlapping
 393     # with a given step size as integers that are pushed onto a Ruby array
 394     # which is returned.
 395     # TODO should have an option for skipping oligos with ambiguity codes.
 396     builder.c %{
 397       VALUE str_to_oligo_rb_ary_c(
 398         VALUE _str,    // DNA or RNA string.
 399         VALUE _kmer,   // Size of kmer or oligo.
 400         VALUE _step    // Step size for overlapping kmers.
 401       )
 402       {
 403         char         *str   = StringValuePtr(_str);
 404         unsigned int  kmer  = FIX2UINT(_kmer);
 405         unsigned int  step  = FIX2UINT(_step);
 406
 407         VALUE         array = rb_ary_new();
 408         long          len   = strlen(str);
 409         unsigned int  mask  = (1 << (2 * kmer)) - 1;
 410         unsigned int  bin   = 0;
 411         unsigned int  i     = 0;
 412
 413         for (i = 0; i < kmer; i++)
 414         {
 415           bin <<= 2;
 416           bin |= oligo_map[str[i]];
 417         }
 418
 419         rb_ary_push(array, INT2FIX(bin));
 420
 421         for (i = kmer; i < len; i++)
 422         {
 423           bin <<= 2;
 424           bin |= oligo_map[str[i]];
 425
 426           if ((i % step) == 0) {
 427             rb_ary_push(array, INT2FIX((bin & mask)));
 428           }
 429         }
 430
 431         return array;
 432       }
 433     }
 434
 435     # Method that counts all shared oligos/kmers between a subject sequence and
 436     # all query sequences. For each oligo in the subject sequence (s_ary) the
 437     # index of all query sequences containing this oligo is found the the q_ary
 438     # where this information is stored sequentially in intervals. For the
 439     # particula oligo the interval is looked up in the q_beg and q_end arrays.
 440     # Shared oligos are recorded in the shared_ary.
 441     builder.c %{
 442       void count_shared_c(
 443         VALUE _q_ary,        // Lookup array with q_index lists.
 444         VALUE _q_begs_ary,   // Lookup array with interval begins of q_index lists.
 445         VALUE _q_ends_ary,   // Lookup array with interval ends of q_index lists.
 446         VALUE _s_ary,        // Subject sequence oligo array.
 447         VALUE _s_ary_size,   // Subject sequence oligo array size.
 448         VALUE _shared_ary    // Array with shared oligo counts.
 449       )
 450       {
 451         unsigned int *q_ary      = (unsigned int *) StringValuePtr(_q_ary);
 452         unsigned int *q_begs_ary = (unsigned int *) StringValuePtr(_q_begs_ary);
 453         unsigned int *q_ends_ary = (unsigned int *) StringValuePtr(_q_ends_ary);
 454         unsigned int *s_ary      = (unsigned int *) StringValuePtr(_s_ary);
 455         unsigned int  s_ary_size = FIX2UINT(_s_ary_size);
 456         unsigned int *shared_ary = (unsigned int *) StringValuePtr(_shared_ary);
 457
 458         unsigned int oligo = 0;
 459         unsigned int s     = 0;
 460         unsigned int q     = 0;
 461         unsigned int q_beg = 0;
 462         unsigned int q_end = 0;
 463
 464         for (s = 0; s < s_ary_size; s++)  // each oligo do
 465         {
 466           oligo = s_ary[s];
 467
 468           q_beg = q_begs_ary[oligo];
 469           q_end = q_ends_ary[oligo];
 470
 471           for (q = q_beg; q < q_end; q++)
 472           {
 473             shared_ary[q_ary[q]]++;
 474           }
 475         }
 476       }
 477     }
 478
 479     # Method to calculate the score for all query vs subject sequence comparisons.
 480     # The score is calculated as the number of unique shared oligos divided by the
 481     # smallest number of unique oligos in either the subject or query sequence.
 482     # Only scores greater than or equal to a given minimum is stored in the hit
 483     # array and the size of the hit array is returned.
 484     builder.c %{
 485       VALUE calc_scores_c(
 486         VALUE _q_total_ary,    // Lookup array with total oligo counts.
 487         VALUE _shared_ary,     // Lookup arary with shared oligo counts.
 488         VALUE _q_size,         // Number of query sequences.
 489         VALUE _s_count,        // Number of unique oligos in subject sequence.
 490         VALUE _hit_ary,        // Result array.
 491         VALUE _hit_ary_size,   // Result array size.
 492         VALUE _s_index,        // Subject sequence index.
 493         VALUE _min_score       // Minimum score to include.
 494       )
 495       {
 496         unsigned int *q_total_ary  = (unsigned int *) StringValuePtr(_q_total_ary);
 497         unsigned int *shared_ary   = (unsigned int *) StringValuePtr(_shared_ary);
 498         unsigned int  q_size       = FIX2UINT(_q_size);
 499         unsigned int  s_count      = FIX2UINT(_s_count);
 500         hit          *hit_ary      = (hit *) StringValuePtr(_hit_ary);
 501         unsigned int  hit_ary_size = FIX2UINT(_hit_ary_size);
 502         unsigned int  s_index      = FIX2UINT(_s_index);
 503         float         min_score    = (float) NUM2DBL(_min_score);
 504
 505         unsigned int  q_index      = 0;
 506         unsigned int  q_count      = 0;
 507         unsigned int  shared_count = 0;
 508         unsigned int  total_count  = 0;
 509         float         score        = 0;
 510         hit           new_score    = {0, 0, 0};
 511
 512         for (q_index = 0; q_index < q_size; q_index++)
 513         {
 514           shared_count = shared_ary[q_index];
 515           q_count      = q_total_ary[q_index];
 516           total_count  = (s_count < q_count) ? s_count : q_count;
 517
 518           score = shared_count / (float) total_count;
 519
 520           if (score >= min_score)
 521           {
 522             new_score.q_index = q_index;
 523             new_score.s_index = s_index;
 524             new_score.score   = score;
 525
 526             hit_ary[hit_ary_size] = new_score;
 527
 528             hit_ary_size++;
 529           }
 530         }
 531
 532         return UINT2NUM(hit_ary_size);
 533       }
 534     }
 535
 536     # Method that fetches all hits matching a given q_index value
 537     # from the result_ary. Since the result_ary is sorted according
 538     # to q_index we start at a given hit_index and fetch until
 539     # the next hit with a different q_index. The fetched hits are
 540     # stored in an array which is sorted according to decreasing
 541     # score. The size of this array is returned.
 542     builder.c %{
 543       VALUE get_hits_c(
 544         VALUE _result_ary,        // Result array
 545         VALUE _result_ary_size,   // Result array size
 546         VALUE _hit_index,         // Offset position in hit_ary to search from.
 547         VALUE _hit_ary,           // Array to store hits in.
 548         VALUE _q_index)           // q_index to fetch.
 549       {
 550         hit          *result_ary      = (hit *) StringValuePtr(_result_ary);
 551         unsigned int  result_ary_size = FIX2UINT(_result_ary_size);
 552         unsigned int  hit_index       = FIX2UINT(_hit_index);
 553         hit          *hit_ary         = (hit *) StringValuePtr(_hit_ary);
 554         unsigned int  q_index         = FIX2UINT(_q_index);
 555         hit           new_hit         = {0, 0, 0};
 556         unsigned int  hit_ary_size    = 0;
 557
 558         while ((hit_index + hit_ary_size) < result_ary_size)
 559         {
 560           new_hit = result_ary[hit_index + hit_ary_size];
 561
 562           if (new_hit.q_index == q_index)
 563           {
 564             hit_ary[hit_ary_size] = new_hit;
 565
 566             hit_ary_size++;
 567           }
 568           else
 569           {
 570             break;
 571           }
 572         }
 573
 574         qsort(hit_ary, hit_ary_size, sizeof(hit), hit_cmp_by_score);
 575
 576         return UINT2NUM(hit_ary_size);
 577       }
 578     }
 579   end
 580
 581   # >>>>>>>>>>>>>>> Embedded classes <<<<<<<<<<<<<<<
 582
 583   # Class for holding score information.
 584   class Hit
 585     attr_reader :q_id, :s_id, :score
 586
 587     # Method to initialize Hit object with
 588     # query and subject id a score.
 589     def initialize(q_id, s_id, score)
 590       @q_id  = q_id
 591       @s_id  = s_id
 592       @score = score
 593     end
 594
 595     # Method for outputting score objects.
 596     def to_s
 597       "#{@q_id}:#{@s_id}:#{@score.round(2)}"
 598     end
 599
 600     # Method to convert to a Biopiece record.
 601     def to_bp
 602       hash = {}
 603       hash[:REC_TYPE] = "findsim"
 604       hash[:Q_ID]     = @q_id
 605       hash[:S_ID]     = @s_id
 606       hash[:SCORE]    = @score.round(2)
 607
 608       hash
 609     end
 610   end
 611 end
 612
 613 __END__