+# Copyright (C) 2007-2013 Martin A. Hansen.
+
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+# http://www.gnu.org/copyleft/gpl.html
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+# This software is part of the Biopieces framework (www.biopieces.org).
+
+# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
+class Assemble
+ def self.pair(entry1, entry2, options = {})
+ assemble = self.new(entry1, entry2, options)
+ assemble.match
+ end
+
+ def initialize(entry1, entry2, options)
+ @entry1 = entry1
+ @entry2 = entry2
+ @options = options
+ @options[:mismatches_max] ||= 0
+ @options[:overlap_min] ||= 1
+ @options[:overlap_max] ||= entry1.length
+ @options[:overlap_max] = [@options[:overlap_max], entry1.length, entry2.length].min
+ end
+
+ def match
+ overlap = @options[:overlap_max]
+
+ na_seq1 = NArray.to_na(@entry1.seq.downcase, "byte")
+ na_seq2 = NArray.to_na(@entry2.seq.downcase, "byte")
+
+ while overlap >= @options[:overlap_min]
+ hamming_dist = (na_seq1[-1 * overlap .. -1] ^ na_seq2[0 ... overlap]).count_true
+
+ if hamming_dist <= percent2real(overlap, @options[:mismatches_max])
+ merged = @entry1 + @entry2[overlap .. -1]
+
+ if @entry1.qual and @entry2.qual
+ qual1 = @entry1.qual[@entry1.length - overlap .. -1]
+ qual2 = @entry2.qual[0 ... overlap]
+
+ na_qual1 = NArray.to_na(qual1, "byte")
+ na_qual2 = NArray.to_na(qual2, "byte")
+
+ qual = ((na_qual1 + na_qual2) / 2).to_s
+
+ merged.seq_name = @entry1.seq_name + ":overlap=#{overlap}:hamming=#{hamming_dist}"
+ merged.qual[@entry1.length - overlap ... @entry1.length] = qual
+ end
+
+ return merged
+ end
+
+ overlap -= 1
+ end
+ end
+
+ def percent2real(length, percent)
+ (length * percent * 0.01).round
+ end
+end
+
+
+__END__
+