# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+# Class containing methods to assemble two overlapping sequences into a single.
+ # Class method to assemble two Seq objects.
def self.pair(entry1, entry2, options = {})
assemble = self.new(entry1, entry2, options)
assemble.match
end
def self.pair(entry1, entry2, options = {})
assemble = self.new(entry1, entry2, options)
assemble.match
end
+ # Method to initialize an Assembly object.
def initialize(entry1, entry2, options)
@entry1 = entry1
@entry2 = entry2
def initialize(entry1, entry2, options)
@entry1 = entry1
@entry2 = entry2
@options[:overlap_max] = [@options[:overlap_max], entry1.length, entry2.length].min
end
@options[:overlap_max] = [@options[:overlap_max], entry1.length, entry2.length].min
end
+ # Method to locate overlapping matche between two sequences.
def match
overlap = @options[:overlap_max]
def match
overlap = @options[:overlap_max]
while overlap >= @options[:overlap_min]
hamming_dist = (na_seq1[-1 * overlap .. -1] ^ na_seq2[0 ... overlap]).count_true
while overlap >= @options[:overlap_min]
hamming_dist = (na_seq1[-1 * overlap .. -1] ^ na_seq2[0 ... overlap]).count_true
- if hamming_dist <= percent2real(overlap, @options[:mismatches_max])
+ if hamming_dist <= (overlap * @options[:mismatches_max] * 0.01).round
entry_left = @entry1[0 ... @entry1.length - overlap]
entry_right = @entry2[overlap .. -1]
entry_left = @entry1[0 ... @entry1.length - overlap]
entry_right = @entry2[overlap .. -1]
- def percent2real(length, percent)
- (length * percent * 0.01).round
- end
-
+ # Method to merge sequence and quality scores in an overlap.
+ # The residue with the highest score at mismatch positions is selected.
+ # The quality scores of the overlap are the mean of the two sequences.
def merge_overlap(entry_overlap1, entry_overlap2)
na_seq = NArray.byte(entry_overlap1.length, 2)
na_seq[true, 0] = NArray.to_na(entry_overlap1.seq.downcase, "byte")
def merge_overlap(entry_overlap1, entry_overlap2)
na_seq = NArray.byte(entry_overlap1.length, 2)
na_seq[true, 0] = NArray.to_na(entry_overlap1.seq.downcase, "byte")