def initialize(entry1, entry2, options)
@entry1 = entry1
@entry2 = entry2
+ @overlap = 0
+ @offset1 = 0
+ @offset2 = 0
@options = options
@options[:mismatches_max] ||= 0
@options[:overlap_min] ||= 1
# Method to locate overlapping matches between two sequences.
def match
if @options[:overlap_max]
- overlap = [@options[:overlap_max], @entry1.length, @entry2.length].min
+ @overlap = [@options[:overlap_max], @entry1.length, @entry2.length].min
else
- overlap = [@entry1.length, @entry2.length].min
+ @overlap = [@entry1.length, @entry2.length].min
end
- while overlap >= @options[:overlap_min]
- mismatches_max = (overlap * @options[:mismatches_max] * 0.01).round
-
- offset1 = @entry2.length - overlap
- offset2 = 0
+ diff = @entry1.length - @entry2.length
+ diff = 0 if diff < 0
- if match_C(@entry1.seq, @entry2.seq, offset1, offset2, overlap, mismatches_max)
- entry_left = @entry1[0 ... @entry1.length - overlap]
- entry_right = @entry2[overlap .. -1]
+ @offset1 = @entry1.length - @overlap - diff
- if @entry1.qual and @entry2.qual
- entry_overlap1 = @entry1[-1 * overlap .. -1]
- entry_overlap2 = @entry2[0 ... overlap]
+ while @overlap >= @options[:overlap_min]
+ mismatches_max = (@overlap * @options[:mismatches_max] * 0.01).round
+
+ # puts "diff: #{diff} offset1: #{@offset1} offset2: #{@offset2} overlap: #{@overlap}"
- entry_overlap = merge_overlap(entry_overlap1, entry_overlap2)
- else
- entry_overlap = @entry1[-1 * overlap .. -1]
- end
-
- entry_left.seq.downcase!
- entry_overlap.seq.upcase!
- entry_right.seq.downcase!
+ if mismatches = match_C(@entry1.seq, @entry2.seq, @offset1, @offset2, @overlap, mismatches_max) and mismatches >= 0
entry_merged = entry_left + entry_overlap + entry_right
- entry_merged.seq_name = @entry1.seq_name + ":overlap=#{overlap}"
+ entry_merged.seq_name = @entry1.seq_name + ":overlap=#{@overlap}:hamming=#{mismatches}"
return entry_merged
end
- overlap -= 1
+ if diff > 0
+ diff -= 1
+ else
+ @overlap -= 1
+ end
+
+ @offset1 += 1
+ end
+ end
+
+ # Method to extract and downcase the left part of an assembled pair.
+ def entry_left
+ entry = @entry1[0 ... @offset1]
+ entry.seq.downcase!
+ entry
+ end
+
+ # Method to extract and downcase the right part of an assembled pair.
+ def entry_right
+ if @entry1.length > @offset1 + @overlap
+ entry = @entry1[@offset1 + @overlap .. -1]
+ else
+ entry = @entry2[@offset2 + @overlap .. -1]
+ end
+
+ entry.seq.downcase!
+ entry
+ end
+
+ # Method to extract and upcase the overlapping part of an assembled pair.
+ def entry_overlap
+ if @entry1.qual and @entry2.qual
+ entry_overlap1 = @entry1[@offset1 ... @offset1 + @overlap]
+ entry_overlap2 = @entry2[@offset2 ... @offset2 + @overlap]
+
+ entry = merge_overlap(entry_overlap1, entry_overlap2)
+ else
+ entry = @entry1[@offset1 ... @offset1 + @overlap]
end
+
+ entry.seq.upcase!
+ entry
end
# Method to merge sequence and quality scores in an overlap.
# C method for determining if two strings of equal length match
# given a maximum allowed mismatches and allowing for IUPAC
- # ambiguity codes. Returns true if match, else false.
+ # ambiguity codes. Returns number of mismatches is true if match, else false.
builder.c %{
VALUE match_C(
VALUE _string1, // String 1
match++;
if (match >= max_match) {
- return Qtrue;
+ return UINT2NUM(mismatch);
}
}
else
mismatch++;
if (mismatch > max_mismatch) {
- return Qfalse;
+ return INT2NUM(-1);
}
}
}
- return Qfalse;
+ return INT2NUM(-1);
}
}
end