# Method that locates an adaptor or part thereof in the sequence
# of a Seq object beginning from the right. Returns the location
# in the sequence that overlaps with the adaptor or -1 if the
- # adaptor was not found.
- def adaptor_locate_right(adaptor)
+ # adaptor was not found. The hd_percent is used to calculate the
+ # maximum hamming distance allowed for all possible overlaps.
+ def adaptor_locate_right(adaptor, hd_percent = 0)
+ raise SeqError, "Hamming distance percent out of range #{hd_percent}" unless (0 .. 100).include? hd_percent
pos = self.length - adaptor.length
while pos < self.length
- len = self.length - pos
- subseq = self.seq[pos ... pos + len].upcase
- subadaptor = adaptor[0 ... len].upcase
+ len = self.length - pos
+ subseq = self.seq[pos ... pos + len].upcase
+ subadaptor = adaptor[0 ... len].upcase
+ hamming_max = (len * hd_percent * 0.01).round
+ hamming_dist = String.hamming_dist(subseq, subadaptor)
+ return pos if hamming_dist <= hamming_max
- return pos if subseq == subadaptor
pos += 1
end
# Method that locates an adaptor or part thereof in the sequence
# of a Seq object beginning from the left. Returns the location
# in the sequence that overlaps with the adaptor or -1 if the
- # adaptor was not found.
- def adaptor_locate_left(adaptor)
+ # adaptor was not found. The hd_percent is used to calculate the
+ # maximum hamming distance allowed for all possible overlaps.
+ def adaptor_locate_left(adaptor, hd_percent = 0)
+ raise SeqError, "Hamming distance percent out of range #{hd_percent}" unless (0 .. 100).include? hd_percent
pos = adaptor.length
while pos > 0
- len = pos
- subseq = self.seq[0 ... len].upcase
- subadaptor = adaptor[adaptor.length - len ... adaptor.length].upcase
+ len = pos
+ subseq = self.seq[0 ... len].upcase
+ subadaptor = adaptor[adaptor.length - len ... adaptor.length].upcase
+ hamming_max = (len * hd_percent * 0.01).round
+ hamming_dist = String.hamming_dist(subseq, subadaptor)
pos -= 1
- return pos if subseq == subadaptor
+ return pos if hamming_dist <= hamming_max
end
-1
# Method that locates an adaptor or part thereof in the sequence
# of a Seq object beginning from the right and removes the adaptor
- # sequence if found.
- def adaptor_clip_right(adaptor)
- pos = self.adaptor_locate_right(adaptor)
+ # sequence if found. The hd_percent is used to calculate the
+ # maximum hamming distance allowed for all possible overlaps.
+ def adaptor_clip_right(adaptor, hd_percent = 0)
+ pos = self.adaptor_locate_right(adaptor, hd_percent)
if pos > 0
self.seq = self.seq[0 ... pos]
# Method that locates an adaptor or part thereof in the sequence
# of a Seq object beginning from the left and removes the adaptor
- # sequence if found.
- def adaptor_clip_left(adaptor)
- pos = self.adaptor_locate_left(adaptor)
+ # sequence if found. The hd_percent is used to calculate the
+ # maximum hamming distance allowed for all possible overlaps.
+ def adaptor_clip_left(adaptor, hd_percent = 0)
+ pos = self.adaptor_locate_left(adaptor, hd_percent)
if pos > 0
self.seq = self.seq[pos + 1 ... self.length]
assert_equal(25.00, @entry.soft_mask)
end
+ def test_Seq_adaptor_locate_right_with_bad_hamming_dist_raises
+ @entry.seq = "ATCG"
+ assert_raise(SeqError) { @entry.adaptor_locate_right("ATCG", -1) }
+ assert_raise(SeqError) { @entry.adaptor_locate_right("ATCG", 101) }
+ end
+
+ def test_Seq_adaptor_locate_right_with_ok_hamming_dist_dont_raise
+ @entry.seq = "ATCG"
+ assert_nothing_raised { @entry.adaptor_locate_right("ATCG", 0) }
+ assert_nothing_raised { @entry.adaptor_locate_right("ATCG", 50.5) }
+ assert_nothing_raised { @entry.adaptor_locate_right("ATCG", 100) }
+ end
+
def test_Seq_adaptor_locate_right_returns_correctly
@entry.seq = "nnnnncgat"
assert_equal(-1, @entry.adaptor_locate_right("X"))
assert_equal(0, @entry.adaptor_locate_right("NNNNNCGAT"))
end
+ def test_Seq_adaptor_locate_right_with_hd_returns_correctly
+ @entry.seq = "nnnnncgat"
+ assert_equal(5, @entry.adaptor_locate_right("XGAT", 25))
+ assert_equal(5, @entry.adaptor_locate_right("XXAT", 50))
+ end
+
+ def test_Seq_adaptor_locate_left_with_bad_hamming_dist_raises
+ @entry.seq = "ATCG"
+ assert_raise(SeqError) { @entry.adaptor_locate_left("ATCG", -1) }
+ assert_raise(SeqError) { @entry.adaptor_locate_left("ATCG", 101) }
+ end
+
+ def test_Seq_adaptor_locate_left_with_ok_hamming_dist_dont_raise
+ @entry.seq = "ATCG"
+ assert_nothing_raised { @entry.adaptor_locate_left("ATCG", 0) }
+ assert_nothing_raised { @entry.adaptor_locate_left("ATCG", 50.5) }
+ assert_nothing_raised { @entry.adaptor_locate_left("ATCG", 100) }
+ end
+
def test_Seq_adaptor_locate_left_returns_correctly
@entry.seq = "cgatnnnnn"
assert_equal(-1, @entry.adaptor_locate_left("X"))
assert_equal(8, @entry.adaptor_locate_left("CGATNNNNN"))
end
+ def test_Seq_adaptor_locate_left_with_hd_returns_correctly
+ @entry.seq = "cgatnnnnn"
+ assert_equal(3, @entry.adaptor_locate_left("XGAT", 25))
+ assert_equal(3, @entry.adaptor_locate_left("XXAT", 50))
+ end
+
def test_Seq_adaptor_clip_right_returns_correct_sequence
@entry.seq = "nnnnncgat"
@entry.adaptor_clip_right("cgat")
assert_equal( "nnnnn", @entry.seq)
end
+ def test_Seq_adaptor_clip_right_with_hd_returns_correct_sequence
+ @entry.seq = "nnnnncgat"
+ @entry.adaptor_clip_right("xgat", 25)
+ assert_equal( "nnnnn", @entry.seq)
+ end
+
def test_Seq_adaptor_clip_right_returns_correct_qual
@entry.seq = "nnnnncgat"
@entry.qual = "abcdefghi"
assert_equal( "abcde", @entry.qual)
end
+ def test_Seq_adaptor_clip_right_with_hd_returns_correct_qual
+ @entry.seq = "nnnnncgat"
+ @entry.qual = "abcdefghi"
+ @entry.adaptor_clip_right("xgat", 25)
+ assert_equal( "abcde", @entry.qual)
+ end
+
def test_Seq_adaptor_clip_left_returns_correct_sequence
@entry.seq = "cgatnnnnn"
@entry.adaptor_clip_left("cgat")
assert_equal( "nnnnn", @entry.seq)
end
+ def test_Seq_adaptor_clip_left_with_hd_returns_correct_sequence
+ @entry.seq = "cgatnnnnn"
+ @entry.adaptor_clip_left("cgax", 25)
+ assert_equal( "nnnnn", @entry.seq)
+ end
+
def test_Seq_adaptor_clip_left_returns_correct_qual
@entry.seq = "cgatnnnnn"
@entry.qual = "abcdefghi"
assert_equal( "efghi", @entry.qual)
end
+ def test_Seq_adaptor_clip_left_returns_correct_qual
+ @entry.seq = "cgatnnnnn"
+ @entry.qual = "abcdefghi"
+ @entry.adaptor_clip_left("cgax", 25)
+ assert_equal( "efghi", @entry.qual)
+ end
+
def test_Digest_new_raises_on_bad_pattern_residue
assert_raise(DigestError) { Digest.new(@entry, "X", 4) }
end