From 3f150d1bb1a6b60e7fc8343d3723b01ab1e7efc8 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Tue, 29 Mar 2011 17:41:04 +0000 Subject: [PATCH] seq.rb update git-svn-id: http://biopieces.googlecode.com/svn/trunk@1310 74ccb610-7750-0410-82ae-013aeee3265d --- code_ruby/Maasha/lib/seq.rb | 67 +++-------- code_ruby/Maasha/test/test_seq.rb | 192 +++++++++++------------------- 2 files changed, 81 insertions(+), 178 deletions(-) diff --git a/code_ruby/Maasha/lib/seq.rb b/code_ruby/Maasha/lib/seq.rb index edae092..efba4ff 100644 --- a/code_ruby/Maasha/lib/seq.rb +++ b/code_ruby/Maasha/lib/seq.rb @@ -310,56 +310,30 @@ class Seq ((self.seq.scan(/[a-z]/).size.to_f / (self.len - self.indels).to_f) * 100).round(2) end - # Method that locates an adaptor or part thereof in the sequence - # of a Seq object beginning from the right. Returns the location - # in the sequence that overlaps with the adaptor or nil if the - # adaptor was not found. The mis_percent, ins_percent, and - # del_percent indicate the maximum number of mismatches, insertions, - # and deletions allowed in all possible overlaps. - def adaptor_locate_right(adaptor, mis_percent = 0, ins_percent = 0, del_percent = 0) + # Method that finds an adaptor or part thereof in the sequence of a Seq object. + # Returns a Match object if the adaptor was found otherwise nil. The mis_percent, + # ins_percent, and del_percent indicate the maximum number of mismatches, + # insertions, and deletions allowed in all possible overlaps. + def adaptor_find(adaptor, mis_percent = 0, ins_percent = 0, del_percent = 0) raise SeqError, "Mismatch percent out of range #{mis_percent}" unless (0 .. 100).include? mis_percent raise SeqError, "Insertion percent out of range #{ins_percent}" unless (0 .. 100).include? ins_percent raise SeqError, "Deletion percent out of range #{del_percent}" unless (0 .. 100).include? del_percent - pos = self.length - adaptor.length + pos = 0 - while pos < self.length - len = self.length - pos - subseq = self.seq[pos ... pos + len].upcase - subadaptor = adaptor[0 ... len].upcase - mis_max = (len * mis_percent * 0.01).round - ins_max = (len * ins_percent * 0.01).round - del_max = (len * del_percent * 0.01).round + while adaptor.length > 0 + mis_max = (adaptor.length * mis_percent * 0.01).round + ins_max = (adaptor.length * ins_percent * 0.01).round + del_max = (adaptor.length * del_percent * 0.01).round - matches = self.scan(adaptor, pos, mis_max, ins_max, del_max) + match = self.match(adaptor, pos, mis_max, ins_max, del_max) - pp matches + return match unless match.nil? - pos += 1 - end - end + adaptor = adaptor[0 ... -1] - # Method that locates an adaptor or part thereof in the sequence - # of a Seq object beginning from the left. Returns the location - # in the sequence that overlaps with the adaptor or -1 if the - # adaptor was not found. The hd_percent is used to calculate the - # maximum hamming distance allowed for all possible overlaps. - def adaptor_locate_left(adaptor, hd_percent = 0) - raise SeqError, "Hamming distance percent out of range #{hd_percent}" unless (0 .. 100).include? hd_percent - pos = adaptor.length - - while pos > 0 - len = pos - subseq = self.seq[0 ... len].upcase - subadaptor = adaptor[adaptor.length - len ... adaptor.length].upcase - hamming_max = (len * hd_percent * 0.01).round - - pos -= 1 - - return pos if hamming_dist <= hamming_max + pos = self.len - adaptor.length end - - -1 end # Method that locates an adaptor or part thereof in the sequence @@ -375,19 +349,6 @@ class Seq end end - # Method that locates an adaptor or part thereof in the sequence - # of a Seq object beginning from the left and removes the adaptor - # sequence if found. The hd_percent is used to calculate the - # maximum hamming distance allowed for all possible overlaps. - def adaptor_clip_left(adaptor, hd_percent = 0) - pos = self.adaptor_locate_left(adaptor, hd_percent) - - if pos > 0 - self.seq = self.seq[pos + 1 ... self.length] - self.qual = self.qual[pos + 1 ... self.qual.length] unless self.qual.nil? - end - end - # Method to convert the quality scores from a specified base # to another base. def convert_phred2illumina! diff --git a/code_ruby/Maasha/test/test_seq.rb b/code_ruby/Maasha/test/test_seq.rb index 62adb67..0520d06 100755 --- a/code_ruby/Maasha/test/test_seq.rb +++ b/code_ruby/Maasha/test/test_seq.rb @@ -332,131 +332,73 @@ class TestSeq < Test::Unit::TestCase assert_equal(25.00, @entry.soft_mask) end -# def test_Seq_adaptor_locate_right_with_bad_hamming_dist_raises -# flunk("adaptor location needs updating") -# @entry.seq = "ATCG" -# assert_raise(SeqError) { @entry.adaptor_locate_right("ATCG", -1) } -# assert_raise(SeqError) { @entry.adaptor_locate_right("ATCG", 101) } -# end -# -# def test_Seq_adaptor_locate_right_with_ok_hamming_dist_dont_raise -# flunk("adaptor location needs updating") -# @entry.seq = "ATCG" -# assert_nothing_raised { @entry.adaptor_locate_right("ATCG", 0) } -# assert_nothing_raised { @entry.adaptor_locate_right("ATCG", 50.5) } -# assert_nothing_raised { @entry.adaptor_locate_right("ATCG", 100) } -# end -# -# def test_Seq_adaptor_locate_right_returns_correctly -# flunk("adaptor location needs updating") -# @entry.seq = "nnnnncgat" -# assert_equal(-1, @entry.adaptor_locate_right("X")) -# assert_equal(8, @entry.adaptor_locate_right("TX")) -# assert_equal(7, @entry.adaptor_locate_right("ATX")) -# assert_equal(6, @entry.adaptor_locate_right("GATX")) -# assert_equal(5, @entry.adaptor_locate_right("CGATX")) -# assert_equal(0, @entry.adaptor_locate_right("NNNNNCGAT")) -# end -# -# def test_Seq_adaptor_locate_right_with_hd_returns_correctly -# flunk("adaptor location needs updating") -# @entry.seq = "nnnnncgat" -# assert_equal(5, @entry.adaptor_locate_right("XGAT", 25)) -# assert_equal(5, @entry.adaptor_locate_right("XXAT", 50)) -# end -# -# def test_Seq_adaptor_locate_left_with_bad_hamming_dist_raises -# flunk("adaptor location needs updating") -# @entry.seq = "ATCG" -# assert_raise(SeqError) { @entry.adaptor_locate_left("ATCG", -1) } -# assert_raise(SeqError) { @entry.adaptor_locate_left("ATCG", 101) } -# end -# -# def test_Seq_adaptor_locate_left_with_ok_hamming_dist_dont_raise -# flunk("adaptor location needs updating") -# @entry.seq = "ATCG" -# assert_nothing_raised { @entry.adaptor_locate_left("ATCG", 0) } -# assert_nothing_raised { @entry.adaptor_locate_left("ATCG", 50.5) } -# assert_nothing_raised { @entry.adaptor_locate_left("ATCG", 100) } -# end -# -# def test_Seq_adaptor_locate_left_returns_correctly -# flunk("adaptor location needs updating") -# @entry.seq = "cgatnnnnn" -# assert_equal(-1, @entry.adaptor_locate_left("X")) -# assert_equal(0, @entry.adaptor_locate_left("XC")) -# assert_equal(1, @entry.adaptor_locate_left("XCG")) -# assert_equal(2, @entry.adaptor_locate_left("XCGA")) -# assert_equal(3, @entry.adaptor_locate_left("XCGAT")) -# assert_equal(8, @entry.adaptor_locate_left("CGATNNNNN")) -# end -# -# def test_Seq_adaptor_locate_left_with_hd_returns_correctly -# flunk("adaptor location needs updating") -# @entry.seq = "cgatnnnnn" -# assert_equal(3, @entry.adaptor_locate_left("XGAT", 25)) -# assert_equal(3, @entry.adaptor_locate_left("XXAT", 50)) -# end -# -# def test_Seq_adaptor_clip_right_returns_correct_sequence -# flunk("adaptor location needs updating") -# @entry.seq = "nnnnncgat" -# @entry.adaptor_clip_right("cgat") -# assert_equal( "nnnnn", @entry.seq) -# end -# -# def test_Seq_adaptor_clip_right_with_hd_returns_correct_sequence -# flunk("adaptor location needs updating") -# @entry.seq = "nnnnncgat" -# @entry.adaptor_clip_right("xgat", 25) -# assert_equal( "nnnnn", @entry.seq) -# end -# -# def test_Seq_adaptor_clip_right_returns_correct_qual -# flunk("adaptor location needs updating") -# @entry.seq = "nnnnncgat" -# @entry.qual = "abcdefghi" -# @entry.adaptor_clip_right("cgat") -# assert_equal( "abcde", @entry.qual) -# end -# -# def test_Seq_adaptor_clip_right_with_hd_returns_correct_qual -# flunk("adaptor location needs updating") -# @entry.seq = "nnnnncgat" -# @entry.qual = "abcdefghi" -# @entry.adaptor_clip_right("xgat", 25) -# assert_equal( "abcde", @entry.qual) -# end -# -# def test_Seq_adaptor_clip_left_returns_correct_sequence -# flunk("adaptor location needs updating") -# @entry.seq = "cgatnnnnn" -# @entry.adaptor_clip_left("cgat") -# assert_equal( "nnnnn", @entry.seq) -# end -# -# def test_Seq_adaptor_clip_left_with_hd_returns_correct_sequence -# flunk("adaptor location needs updating") -# @entry.seq = "cgatnnnnn" -# @entry.adaptor_clip_left("cgax", 25) -# assert_equal( "nnnnn", @entry.seq) -# end -# -# def test_Seq_adaptor_clip_left_returns_correct_qual -# flunk("adaptor location needs updating") -# @entry.seq = "cgatnnnnn" -# @entry.qual = "abcdefghi" -# @entry.adaptor_clip_left("cgat") -# assert_equal( "efghi", @entry.qual) -# end -# -# def test_Seq_adaptor_clip_left_with_len_returns_correct_qual -# flunk("adaptor location needs updating") -# @entry.seq = "cgatnnnnn" -# @entry.qual = "abcdefghi" -# @entry.adaptor_clip_left("cgax", 25) -# assert_equal( "efghi", @entry.qual) -# end + def test_Seq_adaptor_find_with_bad_mis_percent_raises + @entry.seq = "actagctagctacgtacg" + assert_raise(SeqError) { @entry.adaptor_find("tacg", mis_percent = -1) } + assert_raise(SeqError) { @entry.adaptor_find("tacg", mis_percent = 101) } + end + + def test_Seq_adaptor_find_with_ok_mis_percent_dont_raise + @entry.seq = "actagctagctacgtacg" + assert_nothing_raised { @entry.adaptor_find("tacg", mis_percent = 0) } + assert_nothing_raised { @entry.adaptor_find("tacg", mis_percent = 100) } + end + + def test_Seq_adaptor_find_with_bad_ins_percent_raises + @entry.seq = "actagctagctacgtacg" + assert_raise(SeqError) { @entry.adaptor_find("tacg", mis_percent = 0, ins_percent = -1) } + assert_raise(SeqError) { @entry.adaptor_find("tacg", mis_percent = 0, ins_percent = 101) } + end + + def test_Seq_adaptor_find_with_ok_ins_percent_dont_raise + @entry.seq = "actagctagctacgtacg" + assert_nothing_raised { @entry.adaptor_find("tacg", mis_percent = 0, ins_percent = 0) } + assert_nothing_raised { @entry.adaptor_find("tacg", mis_percent = 0, ins_percent = 100) } + end + + def test_Seq_adaptor_find_with_bad_del_percent_raises + @entry.seq = "actagctagctacgtacg" + assert_raise(SeqError) { @entry.adaptor_find("tacg", mis_percent = 0, ins_percent = 0, del_percent = -1) } + assert_raise(SeqError) { @entry.adaptor_find("tacg", mis_percent = 0, ins_percent = 0, del_percent = 101) } + end + + def test_Seq_adaptor_find_with_ok_del_percent_dont_raise + @entry.seq = "actagctagctacgtacg" + assert_nothing_raised { @entry.adaptor_find("tacg", mis_percent = 0, ins_percent = 0, del_percent = 0) } + assert_nothing_raised { @entry.adaptor_find("tacg", mis_percent = 0, ins_percent = 0, del_percent = 100) } + end + + def test_Seq_adaptor_find_with_no_match_returns_nil + @entry.seq = "actaaggctagctacgtccg" + assert_nil(@entry.adaptor_find("TTTT")) + end + + def test_Seq_adaptor_find_returns_correct_match + @entry.seq = "actaaggctagctacgtccg" + assert_equal(0, @entry.adaptor_find("actaa").pos) + assert_equal(7, @entry.adaptor_find("ctagc").pos) + assert_equal(15, @entry.adaptor_find("gtccg").pos) + assert_equal(17, @entry.adaptor_find("ccgTT").pos) + assert_equal(18, @entry.adaptor_find("cgTTT").pos) + assert_equal(19, @entry.adaptor_find("gTTTT").pos) + end + + def test_Seq_adaptor_with_mis_percent_returns_correct_match + @entry.seq = "actaaggctagctacgtccg" + assert_equal(0, @entry.adaptor_find("GGGaag", mis_percent = 50).pos) + assert_equal(14, @entry.adaptor_find("cgtcTTTT", mis_percent = 50).pos) + end + + def test_Seq_adaptor_with_ins_percent_returns_correct_match + @entry.seq = "actaaggctagctacgtccg" + assert_equal(0, @entry.adaptor_find("actGGGaag", mis_percent = 0, ins_percent = 50).pos) + assert_equal(15, @entry.adaptor_find("gtAccgTTTTT", mis_percent = 0, ins_percent = 10).pos) + end + + def test_Seq_adaptor_with_del_percent_returns_correct_match + @entry.seq = "actaaggctagctacgtccg" + assert_equal(0, @entry.adaptor_find("actctag", mis_percent = 0, ins_percent = 0, del_percent = 50).pos) + end end -- 2.39.2