code_ruby/lib/maasha/findsim.rb

   1 # Copyright (C) 2007-2012 Martin A. Hansen.
   2
   3 # This program is free software; you can redistribute it and/or
   4 # modify it under the terms of the GNU General Public License
   5 # as published by the Free Software Foundation; either version 2
   6 # of the License, or (at your option) any later version.
   7
   8 # This program is distributed in the hope that it will be useful,
   9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  11 # GNU General Public License for more details.
  12
  13 # You should have received a copy of the GNU General Public License
  14 # along with this program; if not, write to the Free Software
  15 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  16
  17 # http://www.gnu.org/copyleft/gpl.html
  18
  19 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
  20
  21 # This software is part of the Biopieces framework (www.biopieces.org).
  22
  23 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
  24
  25 require 'inline'
  26 require 'maasha/fasta'
  27 require 'maasha/align'
  28
  29 BYTES_IN_INT      = 4
  30 BYTES_IN_FLOAT    = 4
  31 BYTES_IN_HIT      = 2 * BYTES_IN_INT + 1 * BYTES_IN_FLOAT   # i.e. 12
  32 NUC_ALPH_SIZE     = 4            # Alphabet size of nucleotides.
  33 RESULT_ARY_BUFFER = 10_000_000   # Buffer for the result_ary.
  34 HIT_ARY_BUFFER    = 1_000_000    # Buffer for the hit_ary.
  35
  36 # FindSim is an implementation of the SimRank logic proposed by Niels Larsen.
  37 # The purpose is to find similarities between query DNA/RNA sequences and a
  38 # database of small subunit ribosomal DNA/RNA sequences. The sequence
  39 # similarities are calculated as the number of shared unique oligos/kmers
  40 # divided by the smallest number of unique oligoes in either the query or
  41 # database sequence. This yields a rough under estimate of similarity e.g. 50%
  42 # oligo similarity may correspond to 80% similarity on a nucleotide level
  43 # (needs clarification).
  44 class FindSim
  45   include Enumerable
  46
  47   # Method to initialize FindSim Object.
  48   def initialize(opt_hash)
  49     @opt_hash     = opt_hash
  50     @q_size       = 0
  51     @q_ary        = nil
  52     @q_begs_ary   = nil
  53     @q_ends_ary   = nil
  54     @q_total_ary  = nil
  55     @result       = nil
  56     @result_count = 0
  57     @q_ids        = []
  58     @s_ids        = []
  59     @q_entries    = []
  60     @s_entries    = []
  61   end
  62
  63   # Method to load sequences from a query file in FASTA format
  64   # and index these for oligo based similarty search.
  65   def load_query(file)
  66     time       = Time.now
  67     q_total    = []
  68     oligo_hash = Hash.new { |h, k| h[k] = [] }
  69
  70     count = 0
  71
  72     Fasta.open(file, 'r') do |ios|
  73       ios.each do |entry|
  74         @q_ids     << entry.seq_name if @opt_hash[:query_ids]
  75         @q_entries << entry          if @opt_hash[:realign]
  76
  77         oligos = str_to_oligo_rb_ary_c(entry.seq, @opt_hash[:kmer], 1).uniq.sort
  78
  79         q_total << oligos.size
  80
  81         oligos.each do |oligo|
  82           oligo_hash[oligo] << count
  83         end
  84
  85         count += 1
  86       end
  87     end
  88
  89     @q_size = count
  90
  91     create_query_index(q_total, oligo_hash)
  92
  93     $stderr.puts "Loaded #{count} query sequences in #{(Time.now - time)} seconds." if @opt_hash[:verbose]
  94   end
  95
  96   # Method to search database or subject sequences from a FASTA file by
  97   # locating for each sequence all shared oligos with the query index.
  98   def search_db(file)
  99     time         = Time.now
 100     oligo_ary    = "\0" * (NUC_ALPH_SIZE ** @opt_hash[:kmer]) * BYTES_IN_INT
 101     shared_ary   = "\0" * @q_size                             * BYTES_IN_INT
 102     result_ary   = "\0" * RESULT_ARY_BUFFER                   * BYTES_IN_HIT
 103     result_count = 0
 104
 105     Fasta.open(file, 'r') do |ios|
 106       ios.each_with_index do |entry, s_index|
 107         @s_ids     << entry.seq_name if @opt_hash[:subject_ids]
 108         @s_entries << entry          if @opt_hash[:realign]
 109
 110         zero_ary_c(oligo_ary,  (NUC_ALPH_SIZE ** @opt_hash[:kmer]) * BYTES_IN_INT)
 111         zero_ary_c(shared_ary, @q_size                             * BYTES_IN_INT)
 112
 113         oligo_ary_size = str_to_oligo_ary_c(entry.seq, entry.len, oligo_ary, @opt_hash[:kmer], @opt_hash[:step])
 114
 115         count_shared_c(@q_ary, @q_begs_ary, @q_ends_ary, oligo_ary, oligo_ary_size, shared_ary)
 116
 117         result_count = calc_scores_c(@q_total_ary, shared_ary, @q_size, oligo_ary_size, result_ary, result_count, s_index, @opt_hash[:min_score])
 118
 119         if ((s_index + 1) % 1000) == 0 and @opt_hash[:verbose]
 120           $stderr.puts "Searched #{s_index + 1} sequences in #{Time.now - time} seconds (#{result_count} hits)."
 121         end
 122
 123         if result_ary.size / BYTES_IN_HIT - result_count < RESULT_ARY_BUFFER / 2
 124           result_ary << "\0" * RESULT_ARY_BUFFER * BYTES_IN_HIT
 125         end
 126       end
 127     end
 128
 129     @result       = result_ary[0 ... result_count * BYTES_IN_HIT]
 130     @result_count = result_count
 131   end
 132
 133   # Method that for each query index yields all hits, sorted according to
 134   # decending score, as Hit objects.
 135   def each
 136     sort_hits_c(@result, @result_count)
 137
 138     hit_ary = "\0" * HIT_ARY_BUFFER * BYTES_IN_HIT
 139
 140     hit_index = 0
 141
 142     (0 ... @q_size).each do |q_index|
 143       zero_ary_c(hit_ary, HIT_ARY_BUFFER * BYTES_IN_HIT)
 144       hit_ary_size = get_hits_c(@result, @result_count, hit_index, hit_ary, q_index)
 145
 146       if @opt_hash[:max_hits]
 147         max = (hit_ary_size > @opt_hash[:max_hits]) ? @opt_hash[:max_hits] : hit_ary_size
 148       else
 149         max = hit_ary_size
 150       end
 151
 152       best_score = 0
 153
 154       (0 ... max).each do |i|
 155         q_index, s_index, score = hit_ary[BYTES_IN_HIT * i ... BYTES_IN_HIT * i + BYTES_IN_HIT].unpack("IIF")
 156
 157         q_id = @opt_hash[:query_ids]   ? @q_ids[q_index] : q_index
 158         s_id = @opt_hash[:subject_ids] ? @s_ids[s_index] : s_index
 159
 160         if @opt_hash[:realign]
 161           new_score = Align.muscle([@q_entries[q_index], @s_entries[s_index]]).identity
 162           score     = new_score if new_score > score
 163         end
 164
 165         if @opt_hash[:max_diversity]
 166           best_score = score if i == 0
 167
 168           break if best_score - score >= (@opt_hash[:max_diversity] / 100)
 169         end
 170
 171         yield Hit.new(q_id, s_id, score)
 172       end
 173
 174       hit_index += hit_ary_size
 175     end
 176
 177     self
 178   end
 179
 180   private
 181
 182   # Method to create the query index for FindSim search. The index consists of
 183   # four lookup arrays which are stored as instance variables:
 184   # *  @q_total_ary holds per index the total of unique oligos for that
 185   #    particular query sequences.
 186   # *  @q_ary holds sequencially lists of query indexes so it is possible
 187   #    to lookup the list for a particular oligo and get all query indeces for
 188   #    query sequences containing this oligo.
 189   # *  @q_begs_ary holds per index the begin coordinate for the @q_ary list for
 190   #    a particular oligo.
 191   # *  @q_ends_ary holds per index the end coordinate for the @q_ary list for a
 192   #    particular oligo.
 193   def create_query_index(q_total, oligo_hash)
 194     @q_total_ary = q_total.pack("I*")
 195     @q_ary       = ""
 196
 197     beg        = 0
 198     oligo_begs = Array.new(NUC_ALPH_SIZE ** @opt_hash[:kmer], 0)
 199     oligo_ends = Array.new(NUC_ALPH_SIZE ** @opt_hash[:kmer], 0)
 200
 201     oligo_hash.each do |oligo, list|
 202       @q_ary << list.pack("I*")
 203       oligo_begs[oligo] = beg
 204       oligo_ends[oligo] = beg + list.size
 205
 206       beg += list.size
 207     end
 208
 209     @q_begs_ary = oligo_begs.pack("I*")
 210     @q_ends_ary = oligo_ends.pack("I*")
 211   end
 212
 213   # >>>>>>>>>>>>>>> RubyInline C code <<<<<<<<<<<<<<<
 214
 215   inline do |builder|
 216     # Bitmap lookup table where the index is ASCII values
 217     # and the following nucleotides are encoded as:
 218     # T or t = 1
 219     # U or u = 1
 220     # C or c = 2
 221     # G or g = 3
 222     builder.prefix %{
 223       unsigned char oligo_map[256] = {
 224         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 225         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 226         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 227         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 228         0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0,
 229         0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 230         0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0,
 231         0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 232         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 233         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 234         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 235         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 236         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 237         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 238         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 239         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 240       };
 241     }
 242
 243     # Defining a hit struct to hold hits consisting of query
 244     # sequence index, subject sequence index and score.
 245     builder.prefix %{
 246       typedef struct
 247       {
 248         unsigned int q_index;
 249         unsigned int s_index;
 250         float          score;
 251       } hit;
 252     }
 253
 254     # Qsort unsigned int comparison function.
 255     # Returns negative if b > a and positive if a > b.
 256     builder.prefix %{
 257       int uint_cmp(const void *a, const void *b)
 258       {
 259         const unsigned int *ia = (const unsigned int *) a;
 260         const unsigned int *ib = (const unsigned int *) b;
 261
 262         return *ia - *ib;
 263       }
 264     }
 265
 266     # Qsort hit struct comparision function of q_index.
 267     # Returns negative if b > a and positive if a > b.
 268     builder.prefix %{
 269       int hit_cmp_by_q_index(const void *a, const void *b)
 270       {
 271         hit *ia = (hit *) a;
 272         hit *ib = (hit *) b;
 273
 274         return (int) (ia->q_index - ib->q_index);
 275       }
 276     }
 277
 278     # Qsort hit struct comparision function of score.
 279     # Returns negative if b < a and positive if a < b.
 280     # We multiply with 1000 to maintain 2 decimal floats.
 281     builder.prefix %{
 282       int hit_cmp_by_score(const void *a, const void *b)
 283       {
 284         hit *ia = (hit *) a;
 285         hit *ib = (hit *) b;
 286
 287         return (int) (1000 * ib->score - 1000 * ia->score);
 288       }
 289     }
 290
 291     # Given a sorted unsigned integer removes all duplicate values
 292     # and move the unique values to the front of the array. Returns
 293     # the new size of the array.
 294     builder.prefix %{
 295       unsigned int uniq_ary(unsigned int *ary, unsigned int ary_size)
 296       {
 297         unsigned int new_ary_size = 0;
 298         unsigned int i            = 0;
 299         unsigned int j            = 0;
 300
 301         for (i = 1; i < ary_size; i++)
 302         {
 303           if (ary[i] != ary[j])
 304           {
 305             j++;
 306             ary[j] = ary[i]; // Move it to the front
 307           }
 308         }
 309
 310         new_ary_size = j + 1;
 311
 312         return new_ary_size;
 313       }
 314     }
 315
 316     # Method that given a byte array and its size in bytes
 317     # sets all bytes to 0.
 318     builder.c %{
 319       void zero_ary_c(
 320         VALUE _ary,       // Byte array to zero.
 321         VALUE _ary_size   // Size of array.
 322       )
 323       {
 324         char         *ary      = (char *) StringValuePtr(_ary);
 325         unsigned int  ary_size = FIX2UINT(_ary_size);
 326
 327         bzero(ary, ary_size);
 328       }
 329     }
 330
 331     # Method to sort array of hits according to q_index.
 332     builder.c %{
 333       void sort_hits_c(
 334         VALUE _ary,       // Array of hits.
 335         VALUE _ary_size   // Size of array.
 336       )
 337       {
 338         hit          *ary      = (hit *) StringValuePtr(_ary);
 339         unsigned int  ary_size = FIX2UINT(_ary_size);
 340
 341         qsort(ary, ary_size, sizeof(hit), hit_cmp_by_q_index);
 342       }
 343     }
 344
 345     # Method that given a string (char array) encodes all kmers overlapping
 346     # with a given step size as integers that are saved in an unsigned integer
 347     # array. Then the array is sorted and uniq'ed and the number of unique
 348     # elements is returned.
 349     builder.c %{
 350       VALUE str_to_oligo_ary_c(
 351         VALUE _str,        // DNA or RNA string.
 352         VALUE _str_size,   // String length.
 353         VALUE _ary,        // Array to store result in.
 354         VALUE _kmer,       // Size of kmers/oligos.
 355         VALUE _step        // Step size for overlapping kmers.
 356       )
 357       {
 358         char *str               = StringValuePtr(_str);
 359         unsigned int   str_size = FIX2UINT(_str_size);
 360         unsigned int  *ary      = (unsigned int *) StringValuePtr(_ary);
 361         unsigned int   kmer     = FIX2UINT(_kmer);
 362         unsigned int   step     = FIX2UINT(_step);
 363
 364         unsigned int ary_size = 0;
 365         unsigned int mask     = (1 << (2 * kmer)) - 1;
 366         unsigned int bin      = 0;
 367         unsigned int i        = 0;
 368
 369         for (i = 0; i < kmer; i++)
 370         {
 371           bin <<= 2;
 372           bin |= oligo_map[str[i]];
 373         }
 374
 375         ary[ary_size++] = bin;
 376
 377         for (i = kmer; i < str_size; i++)
 378         {
 379           bin <<= 2;
 380           bin |= oligo_map[str[i]];
 381
 382           if ((i % step) == 0) {
 383             ary[ary_size++] = (bin & mask);
 384           }
 385         }
 386
 387         qsort(ary, ary_size, sizeof(unsigned int), uint_cmp);
 388         ary_size = uniq_ary(ary, ary_size);
 389
 390         return UINT2NUM(ary_size);
 391       }
 392     }
 393
 394     # Method that given a string (char array) encodes all kmers overlapping
 395     # with a given step size as integers that are pushed onto a Ruby array
 396     # which is returned.
 397     # TODO should have an option for skipping oligos with ambiguity codes.
 398     builder.c %{
 399       VALUE str_to_oligo_rb_ary_c(
 400         VALUE _str,    // DNA or RNA string.
 401         VALUE _kmer,   // Size of kmer or oligo.
 402         VALUE _step    // Step size for overlapping kmers.
 403       )
 404       {
 405         char         *str   = StringValuePtr(_str);
 406         unsigned int  kmer  = FIX2UINT(_kmer);
 407         unsigned int  step  = FIX2UINT(_step);
 408
 409         VALUE         array = rb_ary_new();
 410         long          len   = strlen(str);
 411         unsigned int  mask  = (1 << (2 * kmer)) - 1;
 412         unsigned int  bin   = 0;
 413         unsigned int  i     = 0;
 414
 415         for (i = 0; i < kmer; i++)
 416         {
 417           bin <<= 2;
 418           bin |= oligo_map[str[i]];
 419         }
 420
 421         rb_ary_push(array, INT2FIX(bin));
 422
 423         for (i = kmer; i < len; i++)
 424         {
 425           bin <<= 2;
 426           bin |= oligo_map[str[i]];
 427
 428           if ((i % step) == 0) {
 429             rb_ary_push(array, INT2FIX((bin & mask)));
 430           }
 431         }
 432
 433         return array;
 434       }
 435     }
 436
 437     # Method that counts all shared oligos/kmers between a subject sequence and
 438     # all query sequences. For each oligo in the subject sequence (s_ary) the
 439     # index of all query sequences containing this oligo is found for the q_ary
 440     # where this information is stored sequentially in intervals. For the
 441     # particula oligo the interval is looked up in the q_beg and q_end arrays.
 442     # Shared oligos are recorded in the shared_ary.
 443     builder.c %{
 444       void count_shared_c(
 445         VALUE _q_ary,        // Lookup array with q_index lists.
 446         VALUE _q_begs_ary,   // Lookup array with interval begins of q_index lists.
 447         VALUE _q_ends_ary,   // Lookup array with interval ends of q_index lists.
 448         VALUE _s_ary,        // Subject sequence oligo array.
 449         VALUE _s_ary_size,   // Subject sequence oligo array size.
 450         VALUE _shared_ary    // Array with shared oligo counts.
 451       )
 452       {
 453         unsigned int *q_ary      = (unsigned int *) StringValuePtr(_q_ary);
 454         unsigned int *q_begs_ary = (unsigned int *) StringValuePtr(_q_begs_ary);
 455         unsigned int *q_ends_ary = (unsigned int *) StringValuePtr(_q_ends_ary);
 456         unsigned int *s_ary      = (unsigned int *) StringValuePtr(_s_ary);
 457         unsigned int  s_ary_size = FIX2UINT(_s_ary_size);
 458         unsigned int *shared_ary = (unsigned int *) StringValuePtr(_shared_ary);
 459
 460         unsigned int oligo = 0;
 461         unsigned int s     = 0;
 462         unsigned int q     = 0;
 463         unsigned int q_beg = 0;
 464         unsigned int q_end = 0;
 465
 466         for (s = 0; s < s_ary_size; s++)  // each oligo do
 467         {
 468           oligo = s_ary[s];
 469
 470           q_beg = q_begs_ary[oligo];
 471           q_end = q_ends_ary[oligo];
 472
 473           for (q = q_beg; q < q_end; q++)
 474           {
 475             shared_ary[q_ary[q]]++;
 476           }
 477         }
 478       }
 479     }
 480
 481     # Method to calculate the score for all query vs subject sequence comparisons.
 482     # The score is calculated as the number of unique shared oligos divided by the
 483     # smallest number of unique oligos in either the subject or query sequence.
 484     # Only scores greater than or equal to a given minimum is stored in the hit
 485     # array and the size of the hit array is returned.
 486     builder.c %{
 487       VALUE calc_scores_c(
 488         VALUE _q_total_ary,    // Lookup array with total oligo counts.
 489         VALUE _shared_ary,     // Lookup arary with shared oligo counts.
 490         VALUE _q_size,         // Number of query sequences.
 491         VALUE _s_count,        // Number of unique oligos in subject sequence.
 492         VALUE _hit_ary,        // Result array.
 493         VALUE _hit_ary_size,   // Result array size.
 494         VALUE _s_index,        // Subject sequence index.
 495         VALUE _min_score       // Minimum score to include.
 496       )
 497       {
 498         unsigned int *q_total_ary  = (unsigned int *) StringValuePtr(_q_total_ary);
 499         unsigned int *shared_ary   = (unsigned int *) StringValuePtr(_shared_ary);
 500         unsigned int  q_size       = FIX2UINT(_q_size);
 501         unsigned int  s_count      = FIX2UINT(_s_count);
 502         hit          *hit_ary      = (hit *) StringValuePtr(_hit_ary);
 503         unsigned int  hit_ary_size = FIX2UINT(_hit_ary_size);
 504         unsigned int  s_index      = FIX2UINT(_s_index);
 505         float         min_score    = (float) NUM2DBL(_min_score);
 506
 507         unsigned int  q_index      = 0;
 508         unsigned int  q_count      = 0;
 509         unsigned int  shared_count = 0;
 510         unsigned int  total_count  = 0;
 511         float         score        = 0;
 512         hit           new_score    = {0, 0, 0};
 513
 514         for (q_index = 0; q_index < q_size; q_index++)
 515         {
 516           shared_count = shared_ary[q_index];
 517           q_count      = q_total_ary[q_index];
 518           total_count  = (s_count < q_count) ? s_count : q_count;
 519
 520           score = shared_count / (float) total_count;
 521
 522           if (score >= min_score)
 523           {
 524             new_score.q_index = q_index;
 525             new_score.s_index = s_index;
 526             new_score.score   = score;
 527
 528             hit_ary[hit_ary_size] = new_score;
 529
 530             hit_ary_size++;
 531           }
 532         }
 533
 534         return UINT2NUM(hit_ary_size);
 535       }
 536     }
 537
 538     # Method that fetches all hits matching a given q_index value
 539     # from the result_ary. Since the result_ary is sorted according
 540     # to q_index we start at a given hit_index and fetch until
 541     # the next hit with a different q_index. The fetched hits are
 542     # stored in an array which is sorted according to decreasing
 543     # score. The size of this array is returned.
 544     builder.c %{
 545       VALUE get_hits_c(
 546         VALUE _result_ary,        // Result array
 547         VALUE _result_ary_size,   // Result array size
 548         VALUE _hit_index,         // Offset position in hit_ary to search from.
 549         VALUE _hit_ary,           // Array to store hits in.
 550         VALUE _q_index)           // q_index to fetch.
 551       {
 552         hit          *result_ary      = (hit *) StringValuePtr(_result_ary);
 553         unsigned int  result_ary_size = FIX2UINT(_result_ary_size);
 554         unsigned int  hit_index       = FIX2UINT(_hit_index);
 555         hit          *hit_ary         = (hit *) StringValuePtr(_hit_ary);
 556         unsigned int  q_index         = FIX2UINT(_q_index);
 557         hit           new_hit         = {0, 0, 0};
 558         unsigned int  hit_ary_size    = 0;
 559
 560         while ((hit_index + hit_ary_size) < result_ary_size)
 561         {
 562           new_hit = result_ary[hit_index + hit_ary_size];
 563
 564           if (new_hit.q_index == q_index)
 565           {
 566             hit_ary[hit_ary_size] = new_hit;
 567
 568             hit_ary_size++;
 569           }
 570           else
 571           {
 572             break;
 573           }
 574         }
 575
 576         qsort(hit_ary, hit_ary_size, sizeof(hit), hit_cmp_by_score);
 577
 578         return UINT2NUM(hit_ary_size);
 579       }
 580     }
 581   end
 582
 583   # >>>>>>>>>>>>>>> Embedded classes <<<<<<<<<<<<<<<
 584
 585   # Class for holding score information.
 586   class Hit
 587     attr_reader :q_id, :s_id, :score
 588
 589     # Method to initialize Hit object with
 590     # query and subject id a score.
 591     def initialize(q_id, s_id, score)
 592       @q_id  = q_id
 593       @s_id  = s_id
 594       @score = score
 595     end
 596
 597     # Method for outputting score objects.
 598     def to_s
 599       "#{@q_id}:#{@s_id}:#{@score.round(2)}"
 600     end
 601
 602     # Method to convert to a Biopiece record.
 603     def to_bp
 604       hash = {}
 605       hash[:REC_TYPE] = "findsim"
 606       hash[:Q_ID]     = @q_id
 607       hash[:S_ID]     = @s_id
 608       hash[:SCORE]    = @score.round(2)
 609
 610       hash
 611     end
 612   end
 613 end
 614
 615 __END__