((self.seq.scan(/[a-z]/).size.to_f / (self.len - self.indels).to_f) * 100).round(2)
end
- # Method that locates an adaptor or part thereof in the sequence
- # of a Seq object beginning from the right. Returns the location
- # in the sequence that overlaps with the adaptor or nil if the
- # adaptor was not found. The mis_percent, ins_percent, and
- # del_percent indicate the maximum number of mismatches, insertions,
- # and deletions allowed in all possible overlaps.
- def adaptor_locate_right(adaptor, mis_percent = 0, ins_percent = 0, del_percent = 0)
+ # Method that finds an adaptor or part thereof in the sequence of a Seq object.
+ # Returns a Match object if the adaptor was found otherwise nil. The mis_percent,
+ # ins_percent, and del_percent indicate the maximum number of mismatches,
+ # insertions, and deletions allowed in all possible overlaps.
+ def adaptor_find(adaptor, mis_percent = 0, ins_percent = 0, del_percent = 0)
raise SeqError, "Mismatch percent out of range #{mis_percent}" unless (0 .. 100).include? mis_percent
raise SeqError, "Insertion percent out of range #{ins_percent}" unless (0 .. 100).include? ins_percent
raise SeqError, "Deletion percent out of range #{del_percent}" unless (0 .. 100).include? del_percent
- pos = self.length - adaptor.length
+ pos = 0
- while pos < self.length
- len = self.length - pos
- subseq = self.seq[pos ... pos + len].upcase
- subadaptor = adaptor[0 ... len].upcase
- mis_max = (len * mis_percent * 0.01).round
- ins_max = (len * ins_percent * 0.01).round
- del_max = (len * del_percent * 0.01).round
+ while adaptor.length > 0
+ mis_max = (adaptor.length * mis_percent * 0.01).round
+ ins_max = (adaptor.length * ins_percent * 0.01).round
+ del_max = (adaptor.length * del_percent * 0.01).round
- matches = self.scan(adaptor, pos, mis_max, ins_max, del_max)
+ match = self.match(adaptor, pos, mis_max, ins_max, del_max)
- pp matches
+ return match unless match.nil?
- pos += 1
- end
- end
+ adaptor = adaptor[0 ... -1]
- # Method that locates an adaptor or part thereof in the sequence
- # of a Seq object beginning from the left. Returns the location
- # in the sequence that overlaps with the adaptor or -1 if the
- # adaptor was not found. The hd_percent is used to calculate the
- # maximum hamming distance allowed for all possible overlaps.
- def adaptor_locate_left(adaptor, hd_percent = 0)
- raise SeqError, "Hamming distance percent out of range #{hd_percent}" unless (0 .. 100).include? hd_percent
- pos = adaptor.length
-
- while pos > 0
- len = pos
- subseq = self.seq[0 ... len].upcase
- subadaptor = adaptor[adaptor.length - len ... adaptor.length].upcase
- hamming_max = (len * hd_percent * 0.01).round
-
- pos -= 1
-
- return pos if hamming_dist <= hamming_max
+ pos = self.len - adaptor.length
end
-
- -1
end
# Method that locates an adaptor or part thereof in the sequence
end
end
- # Method that locates an adaptor or part thereof in the sequence
- # of a Seq object beginning from the left and removes the adaptor
- # sequence if found. The hd_percent is used to calculate the
- # maximum hamming distance allowed for all possible overlaps.
- def adaptor_clip_left(adaptor, hd_percent = 0)
- pos = self.adaptor_locate_left(adaptor, hd_percent)
-
- if pos > 0
- self.seq = self.seq[pos + 1 ... self.length]
- self.qual = self.qual[pos + 1 ... self.qual.length] unless self.qual.nil?
- end
- end
-
# Method to convert the quality scores from a specified base
# to another base.
def convert_phred2illumina!
assert_equal(25.00, @entry.soft_mask)
end
-# def test_Seq_adaptor_locate_right_with_bad_hamming_dist_raises
-# flunk("adaptor location needs updating")
-# @entry.seq = "ATCG"
-# assert_raise(SeqError) { @entry.adaptor_locate_right("ATCG", -1) }
-# assert_raise(SeqError) { @entry.adaptor_locate_right("ATCG", 101) }
-# end
-#
-# def test_Seq_adaptor_locate_right_with_ok_hamming_dist_dont_raise
-# flunk("adaptor location needs updating")
-# @entry.seq = "ATCG"
-# assert_nothing_raised { @entry.adaptor_locate_right("ATCG", 0) }
-# assert_nothing_raised { @entry.adaptor_locate_right("ATCG", 50.5) }
-# assert_nothing_raised { @entry.adaptor_locate_right("ATCG", 100) }
-# end
-#
-# def test_Seq_adaptor_locate_right_returns_correctly
-# flunk("adaptor location needs updating")
-# @entry.seq = "nnnnncgat"
-# assert_equal(-1, @entry.adaptor_locate_right("X"))
-# assert_equal(8, @entry.adaptor_locate_right("TX"))
-# assert_equal(7, @entry.adaptor_locate_right("ATX"))
-# assert_equal(6, @entry.adaptor_locate_right("GATX"))
-# assert_equal(5, @entry.adaptor_locate_right("CGATX"))
-# assert_equal(0, @entry.adaptor_locate_right("NNNNNCGAT"))
-# end
-#
-# def test_Seq_adaptor_locate_right_with_hd_returns_correctly
-# flunk("adaptor location needs updating")
-# @entry.seq = "nnnnncgat"
-# assert_equal(5, @entry.adaptor_locate_right("XGAT", 25))
-# assert_equal(5, @entry.adaptor_locate_right("XXAT", 50))
-# end
-#
-# def test_Seq_adaptor_locate_left_with_bad_hamming_dist_raises
-# flunk("adaptor location needs updating")
-# @entry.seq = "ATCG"
-# assert_raise(SeqError) { @entry.adaptor_locate_left("ATCG", -1) }
-# assert_raise(SeqError) { @entry.adaptor_locate_left("ATCG", 101) }
-# end
-#
-# def test_Seq_adaptor_locate_left_with_ok_hamming_dist_dont_raise
-# flunk("adaptor location needs updating")
-# @entry.seq = "ATCG"
-# assert_nothing_raised { @entry.adaptor_locate_left("ATCG", 0) }
-# assert_nothing_raised { @entry.adaptor_locate_left("ATCG", 50.5) }
-# assert_nothing_raised { @entry.adaptor_locate_left("ATCG", 100) }
-# end
-#
-# def test_Seq_adaptor_locate_left_returns_correctly
-# flunk("adaptor location needs updating")
-# @entry.seq = "cgatnnnnn"
-# assert_equal(-1, @entry.adaptor_locate_left("X"))
-# assert_equal(0, @entry.adaptor_locate_left("XC"))
-# assert_equal(1, @entry.adaptor_locate_left("XCG"))
-# assert_equal(2, @entry.adaptor_locate_left("XCGA"))
-# assert_equal(3, @entry.adaptor_locate_left("XCGAT"))
-# assert_equal(8, @entry.adaptor_locate_left("CGATNNNNN"))
-# end
-#
-# def test_Seq_adaptor_locate_left_with_hd_returns_correctly
-# flunk("adaptor location needs updating")
-# @entry.seq = "cgatnnnnn"
-# assert_equal(3, @entry.adaptor_locate_left("XGAT", 25))
-# assert_equal(3, @entry.adaptor_locate_left("XXAT", 50))
-# end
-#
-# def test_Seq_adaptor_clip_right_returns_correct_sequence
-# flunk("adaptor location needs updating")
-# @entry.seq = "nnnnncgat"
-# @entry.adaptor_clip_right("cgat")
-# assert_equal( "nnnnn", @entry.seq)
-# end
-#
-# def test_Seq_adaptor_clip_right_with_hd_returns_correct_sequence
-# flunk("adaptor location needs updating")
-# @entry.seq = "nnnnncgat"
-# @entry.adaptor_clip_right("xgat", 25)
-# assert_equal( "nnnnn", @entry.seq)
-# end
-#
-# def test_Seq_adaptor_clip_right_returns_correct_qual
-# flunk("adaptor location needs updating")
-# @entry.seq = "nnnnncgat"
-# @entry.qual = "abcdefghi"
-# @entry.adaptor_clip_right("cgat")
-# assert_equal( "abcde", @entry.qual)
-# end
-#
-# def test_Seq_adaptor_clip_right_with_hd_returns_correct_qual
-# flunk("adaptor location needs updating")
-# @entry.seq = "nnnnncgat"
-# @entry.qual = "abcdefghi"
-# @entry.adaptor_clip_right("xgat", 25)
-# assert_equal( "abcde", @entry.qual)
-# end
-#
-# def test_Seq_adaptor_clip_left_returns_correct_sequence
-# flunk("adaptor location needs updating")
-# @entry.seq = "cgatnnnnn"
-# @entry.adaptor_clip_left("cgat")
-# assert_equal( "nnnnn", @entry.seq)
-# end
-#
-# def test_Seq_adaptor_clip_left_with_hd_returns_correct_sequence
-# flunk("adaptor location needs updating")
-# @entry.seq = "cgatnnnnn"
-# @entry.adaptor_clip_left("cgax", 25)
-# assert_equal( "nnnnn", @entry.seq)
-# end
-#
-# def test_Seq_adaptor_clip_left_returns_correct_qual
-# flunk("adaptor location needs updating")
-# @entry.seq = "cgatnnnnn"
-# @entry.qual = "abcdefghi"
-# @entry.adaptor_clip_left("cgat")
-# assert_equal( "efghi", @entry.qual)
-# end
-#
-# def test_Seq_adaptor_clip_left_with_len_returns_correct_qual
-# flunk("adaptor location needs updating")
-# @entry.seq = "cgatnnnnn"
-# @entry.qual = "abcdefghi"
-# @entry.adaptor_clip_left("cgax", 25)
-# assert_equal( "efghi", @entry.qual)
-# end
+ def test_Seq_adaptor_find_with_bad_mis_percent_raises
+ @entry.seq = "actagctagctacgtacg"
+ assert_raise(SeqError) { @entry.adaptor_find("tacg", mis_percent = -1) }
+ assert_raise(SeqError) { @entry.adaptor_find("tacg", mis_percent = 101) }
+ end
+
+ def test_Seq_adaptor_find_with_ok_mis_percent_dont_raise
+ @entry.seq = "actagctagctacgtacg"
+ assert_nothing_raised { @entry.adaptor_find("tacg", mis_percent = 0) }
+ assert_nothing_raised { @entry.adaptor_find("tacg", mis_percent = 100) }
+ end
+
+ def test_Seq_adaptor_find_with_bad_ins_percent_raises
+ @entry.seq = "actagctagctacgtacg"
+ assert_raise(SeqError) { @entry.adaptor_find("tacg", mis_percent = 0, ins_percent = -1) }
+ assert_raise(SeqError) { @entry.adaptor_find("tacg", mis_percent = 0, ins_percent = 101) }
+ end
+
+ def test_Seq_adaptor_find_with_ok_ins_percent_dont_raise
+ @entry.seq = "actagctagctacgtacg"
+ assert_nothing_raised { @entry.adaptor_find("tacg", mis_percent = 0, ins_percent = 0) }
+ assert_nothing_raised { @entry.adaptor_find("tacg", mis_percent = 0, ins_percent = 100) }
+ end
+
+ def test_Seq_adaptor_find_with_bad_del_percent_raises
+ @entry.seq = "actagctagctacgtacg"
+ assert_raise(SeqError) { @entry.adaptor_find("tacg", mis_percent = 0, ins_percent = 0, del_percent = -1) }
+ assert_raise(SeqError) { @entry.adaptor_find("tacg", mis_percent = 0, ins_percent = 0, del_percent = 101) }
+ end
+
+ def test_Seq_adaptor_find_with_ok_del_percent_dont_raise
+ @entry.seq = "actagctagctacgtacg"
+ assert_nothing_raised { @entry.adaptor_find("tacg", mis_percent = 0, ins_percent = 0, del_percent = 0) }
+ assert_nothing_raised { @entry.adaptor_find("tacg", mis_percent = 0, ins_percent = 0, del_percent = 100) }
+ end
+
+ def test_Seq_adaptor_find_with_no_match_returns_nil
+ @entry.seq = "actaaggctagctacgtccg"
+ assert_nil(@entry.adaptor_find("TTTT"))
+ end
+
+ def test_Seq_adaptor_find_returns_correct_match
+ @entry.seq = "actaaggctagctacgtccg"
+ assert_equal(0, @entry.adaptor_find("actaa").pos)
+ assert_equal(7, @entry.adaptor_find("ctagc").pos)
+ assert_equal(15, @entry.adaptor_find("gtccg").pos)
+ assert_equal(17, @entry.adaptor_find("ccgTT").pos)
+ assert_equal(18, @entry.adaptor_find("cgTTT").pos)
+ assert_equal(19, @entry.adaptor_find("gTTTT").pos)
+ end
+
+ def test_Seq_adaptor_with_mis_percent_returns_correct_match
+ @entry.seq = "actaaggctagctacgtccg"
+ assert_equal(0, @entry.adaptor_find("GGGaag", mis_percent = 50).pos)
+ assert_equal(14, @entry.adaptor_find("cgtcTTTT", mis_percent = 50).pos)
+ end
+
+ def test_Seq_adaptor_with_ins_percent_returns_correct_match
+ @entry.seq = "actaaggctagctacgtccg"
+ assert_equal(0, @entry.adaptor_find("actGGGaag", mis_percent = 0, ins_percent = 50).pos)
+ assert_equal(15, @entry.adaptor_find("gtAccgTTTTT", mis_percent = 0, ins_percent = 10).pos)
+ end
+
+ def test_Seq_adaptor_with_del_percent_returns_correct_match
+ @entry.seq = "actaaggctagctacgtccg"
+ assert_equal(0, @entry.adaptor_find("actctag", mis_percent = 0, ins_percent = 0, del_percent = 50).pos)
+ end
end