1 # Copyright (C) 2007-2013 Martin A. Hansen.
3 # This program is free software; you can redistribute it and/or
4 # modify it under the terms of the GNU General Public License
5 # as published by the Free Software Foundation; either version 2
6 # of the License, or (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU General Public License for more details.
13 # You should have received a copy of the GNU General Public License
14 # along with this program; if not, write to the Free Software
15 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 # http://www.gnu.org/copyleft/gpl.html
19 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
21 # This software is part of the Biopieces framework (www.biopieces.org).
23 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
26 def self.pair(entry1, entry2, options = {})
27 assemble = self.new(entry1, entry2, options)
31 def initialize(entry1, entry2, options)
35 @options[:mismatches_max] ||= 0
36 @options[:overlap_min] ||= 1
37 @options[:overlap_max] ||= entry1.length
38 @options[:overlap_max] = [@options[:overlap_max], entry1.length, entry2.length].min
42 overlap = @options[:overlap_max]
44 na_seq1 = NArray.to_na(@entry1.seq.downcase, "byte")
45 na_seq2 = NArray.to_na(@entry2.seq.downcase, "byte")
47 while overlap >= @options[:overlap_min]
48 hamming_dist = (na_seq1[-1 * overlap .. -1] ^ na_seq2[0 ... overlap]).count_true
50 if hamming_dist <= percent2real(overlap, @options[:mismatches_max])
51 entry_left = @entry1[0 ... @entry1.length - overlap]
52 entry_right = @entry2[overlap .. -1]
54 if @entry1.qual and @entry2.qual
55 entry_overlap1 = @entry1[-1 * overlap .. -1]
56 entry_overlap2 = @entry2[0 ... overlap]
58 entry_overlap = merge_overlap(entry_overlap1, entry_overlap2)
60 entry_overlap = @entry1[-1 * overlap .. -1]
63 entry_merged = entry_left + entry_overlap + entry_right
64 entry_merged.seq_name = @entry1.seq_name + ":overlap=#{overlap}:hamming=#{hamming_dist}"
73 def percent2real(length, percent)
74 (length * percent * 0.01).round
77 def merge_overlap(entry_overlap1, entry_overlap2)
78 na_seq = NArray.byte(entry_overlap1.length, 2)
79 na_seq[true, 0] = NArray.to_na(entry_overlap1.seq.downcase, "byte")
80 na_seq[true, 1] = NArray.to_na(entry_overlap2.seq.downcase, "byte")
82 na_qual = NArray.byte(entry_overlap1.length, 2)
83 na_qual[true, 0] = NArray.to_na(entry_overlap1.qual, "byte")
84 na_qual[true, 1] = NArray.to_na(entry_overlap2.qual, "byte")
86 mask_xor = na_seq[true, 0] ^ na_seq[true, 1] > 0
87 mask_seq = ((na_qual * mask_xor).eq( (na_qual * mask_xor).max(1)))
90 merged.seq = (na_seq * mask_seq).max(1).to_s
91 merged.qual = na_qual.mean(1).round.to_type("byte").to_s