]> git.donarmstrong.com Git - biopieces.git/commitdiff
seq.rb update
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Tue, 29 Mar 2011 17:41:04 +0000 (17:41 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Tue, 29 Mar 2011 17:41:04 +0000 (17:41 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@1310 74ccb610-7750-0410-82ae-013aeee3265d

code_ruby/Maasha/lib/seq.rb
code_ruby/Maasha/test/test_seq.rb

index edae092fbde6d5573376cd694aab56a0ad891bba..efba4ff32d126a81e2dd9f2c6ac1a0a88b58348d 100644 (file)
@@ -310,56 +310,30 @@ class Seq
     ((self.seq.scan(/[a-z]/).size.to_f / (self.len - self.indels).to_f) * 100).round(2)
   end
 
-  # Method that locates an adaptor or part thereof in the sequence
-  # of a Seq object beginning from the right. Returns the location
-  # in the sequence that overlaps with the adaptor or nil if the
-  # adaptor was not found. The mis_percent, ins_percent, and
-  # del_percent indicate the maximum number of mismatches, insertions,
-  # and deletions allowed in all possible overlaps.
-  def adaptor_locate_right(adaptor, mis_percent = 0, ins_percent = 0, del_percent = 0)
+  # Method that finds an adaptor or part thereof in the sequence of a Seq object.
+  # Returns a Match object if the adaptor was found otherwise nil. The mis_percent,
+  # ins_percent, and del_percent indicate the maximum number of mismatches,
+  # insertions, and deletions allowed in all possible overlaps.
+  def adaptor_find(adaptor, mis_percent = 0, ins_percent = 0, del_percent = 0)
     raise SeqError, "Mismatch percent out of range #{mis_percent}"  unless (0 .. 100).include? mis_percent
     raise SeqError, "Insertion percent out of range #{ins_percent}" unless (0 .. 100).include? ins_percent
     raise SeqError, "Deletion percent out of range #{del_percent}"  unless (0 .. 100).include? del_percent
 
-    pos = self.length - adaptor.length
+    pos = 0
 
-    while pos < self.length
-      len        = self.length - pos
-      subseq     = self.seq[pos ... pos + len].upcase
-      subadaptor = adaptor[0 ... len].upcase
-      mis_max    = (len * mis_percent * 0.01).round
-      ins_max    = (len * ins_percent * 0.01).round
-      del_max    = (len * del_percent * 0.01).round
+    while adaptor.length > 0
+      mis_max = (adaptor.length * mis_percent * 0.01).round
+      ins_max = (adaptor.length * ins_percent * 0.01).round
+      del_max = (adaptor.length * del_percent * 0.01).round
 
-      matches = self.scan(adaptor, pos, mis_max, ins_max, del_max)
+      match = self.match(adaptor, pos, mis_max, ins_max, del_max)
 
-      pp matches
+      return match unless match.nil?
 
-      pos += 1
-    end
-  end
+      adaptor = adaptor[0 ... -1]
 
-  # Method that locates an adaptor or part thereof in the sequence
-  # of a Seq object beginning from the left. Returns the location
-  # in the sequence that overlaps with the adaptor or -1 if the
-  # adaptor was not found. The hd_percent is used to calculate the
-  # maximum hamming distance allowed for all possible overlaps.
-  def adaptor_locate_left(adaptor, hd_percent = 0)
-    raise SeqError, "Hamming distance percent out of range #{hd_percent}" unless (0 .. 100).include? hd_percent
-    pos = adaptor.length
-
-    while pos > 0
-      len          = pos
-      subseq       = self.seq[0 ... len].upcase
-      subadaptor   = adaptor[adaptor.length - len ... adaptor.length].upcase
-      hamming_max  = (len * hd_percent * 0.01).round
-
-      pos -= 1
-
-      return pos if hamming_dist <= hamming_max
+      pos = self.len - adaptor.length
     end
-    
-    -1
   end
 
   # Method that locates an adaptor or part thereof in the sequence
@@ -375,19 +349,6 @@ class Seq
     end
   end
 
-  # Method that locates an adaptor or part thereof in the sequence
-  # of a Seq object beginning from the left and removes the adaptor
-  # sequence if found. The hd_percent is used to calculate the
-  # maximum hamming distance allowed for all possible overlaps.
-  def adaptor_clip_left(adaptor, hd_percent = 0)
-    pos = self.adaptor_locate_left(adaptor, hd_percent)
-
-    if pos > 0
-      self.seq  = self.seq[pos + 1 ... self.length]
-      self.qual = self.qual[pos + 1 ... self.qual.length] unless self.qual.nil?
-    end
-  end
-
   # Method to convert the quality scores from a specified base
   # to another base.
   def convert_phred2illumina!
index 62adb67e7e7f99304d07b234d8e0c87df5e805a4..0520d06b1d9cc02112cd2a40e2378c74e4d19402 100755 (executable)
@@ -332,131 +332,73 @@ class TestSeq < Test::Unit::TestCase
     assert_equal(25.00, @entry.soft_mask)
   end
 
-#  def test_Seq_adaptor_locate_right_with_bad_hamming_dist_raises
-#    flunk("adaptor location needs updating")
-#    @entry.seq = "ATCG"
-#    assert_raise(SeqError) { @entry.adaptor_locate_right("ATCG", -1) }
-#    assert_raise(SeqError) { @entry.adaptor_locate_right("ATCG", 101) }
-#  end
-#
-#  def test_Seq_adaptor_locate_right_with_ok_hamming_dist_dont_raise
-#    flunk("adaptor location needs updating")
-#    @entry.seq = "ATCG"
-#    assert_nothing_raised { @entry.adaptor_locate_right("ATCG", 0) }
-#    assert_nothing_raised { @entry.adaptor_locate_right("ATCG", 50.5) }
-#    assert_nothing_raised { @entry.adaptor_locate_right("ATCG", 100) }
-#  end
-#
-#  def test_Seq_adaptor_locate_right_returns_correctly
-#    flunk("adaptor location needs updating")
-#    @entry.seq = "nnnnncgat"
-#    assert_equal(-1, @entry.adaptor_locate_right("X"))
-#    assert_equal(8,  @entry.adaptor_locate_right("TX"))
-#    assert_equal(7,  @entry.adaptor_locate_right("ATX"))
-#    assert_equal(6,  @entry.adaptor_locate_right("GATX"))
-#    assert_equal(5,  @entry.adaptor_locate_right("CGATX"))
-#    assert_equal(0,  @entry.adaptor_locate_right("NNNNNCGAT"))
-#  end
-#
-#  def test_Seq_adaptor_locate_right_with_hd_returns_correctly
-#    flunk("adaptor location needs updating")
-#    @entry.seq = "nnnnncgat"
-#    assert_equal(5, @entry.adaptor_locate_right("XGAT", 25))
-#    assert_equal(5, @entry.adaptor_locate_right("XXAT", 50))
-#  end
-#
-#  def test_Seq_adaptor_locate_left_with_bad_hamming_dist_raises
-#    flunk("adaptor location needs updating")
-#    @entry.seq = "ATCG"
-#    assert_raise(SeqError) { @entry.adaptor_locate_left("ATCG", -1) }
-#    assert_raise(SeqError) { @entry.adaptor_locate_left("ATCG", 101) }
-#  end
-#
-#  def test_Seq_adaptor_locate_left_with_ok_hamming_dist_dont_raise
-#    flunk("adaptor location needs updating")
-#    @entry.seq = "ATCG"
-#    assert_nothing_raised { @entry.adaptor_locate_left("ATCG", 0) }
-#    assert_nothing_raised { @entry.adaptor_locate_left("ATCG", 50.5) }
-#    assert_nothing_raised { @entry.adaptor_locate_left("ATCG", 100) }
-#  end
-#
-#  def test_Seq_adaptor_locate_left_returns_correctly
-#    flunk("adaptor location needs updating")
-#    @entry.seq = "cgatnnnnn"
-#    assert_equal(-1, @entry.adaptor_locate_left("X"))
-#    assert_equal(0,  @entry.adaptor_locate_left("XC"))
-#    assert_equal(1,  @entry.adaptor_locate_left("XCG"))
-#    assert_equal(2,  @entry.adaptor_locate_left("XCGA"))
-#    assert_equal(3,  @entry.adaptor_locate_left("XCGAT"))
-#    assert_equal(8,  @entry.adaptor_locate_left("CGATNNNNN"))
-#  end
-#
-#  def test_Seq_adaptor_locate_left_with_hd_returns_correctly
-#    flunk("adaptor location needs updating")
-#    @entry.seq = "cgatnnnnn"
-#    assert_equal(3, @entry.adaptor_locate_left("XGAT", 25))
-#    assert_equal(3, @entry.adaptor_locate_left("XXAT", 50))
-#  end
-#
-#  def test_Seq_adaptor_clip_right_returns_correct_sequence
-#    flunk("adaptor location needs updating")
-#    @entry.seq = "nnnnncgat"
-#    @entry.adaptor_clip_right("cgat")
-#    assert_equal( "nnnnn", @entry.seq)
-#  end
-#
-#  def test_Seq_adaptor_clip_right_with_hd_returns_correct_sequence
-#    flunk("adaptor location needs updating")
-#    @entry.seq = "nnnnncgat"
-#    @entry.adaptor_clip_right("xgat", 25)
-#    assert_equal( "nnnnn", @entry.seq)
-#  end
-#
-#  def test_Seq_adaptor_clip_right_returns_correct_qual
-#    flunk("adaptor location needs updating")
-#    @entry.seq  = "nnnnncgat"
-#    @entry.qual = "abcdefghi"
-#    @entry.adaptor_clip_right("cgat")
-#    assert_equal( "abcde", @entry.qual)
-#  end
-#
-#  def test_Seq_adaptor_clip_right_with_hd_returns_correct_qual
-#    flunk("adaptor location needs updating")
-#    @entry.seq  = "nnnnncgat"
-#    @entry.qual = "abcdefghi"
-#    @entry.adaptor_clip_right("xgat", 25)
-#    assert_equal( "abcde", @entry.qual)
-#  end
-#
-#  def test_Seq_adaptor_clip_left_returns_correct_sequence
-#    flunk("adaptor location needs updating")
-#    @entry.seq = "cgatnnnnn"
-#    @entry.adaptor_clip_left("cgat")
-#    assert_equal( "nnnnn", @entry.seq)
-#  end
-#
-#  def test_Seq_adaptor_clip_left_with_hd_returns_correct_sequence
-#    flunk("adaptor location needs updating")
-#    @entry.seq = "cgatnnnnn"
-#    @entry.adaptor_clip_left("cgax", 25)
-#    assert_equal( "nnnnn", @entry.seq)
-#  end
-#
-#  def test_Seq_adaptor_clip_left_returns_correct_qual
-#    flunk("adaptor location needs updating")
-#    @entry.seq  = "cgatnnnnn"
-#    @entry.qual = "abcdefghi"
-#    @entry.adaptor_clip_left("cgat")
-#    assert_equal( "efghi", @entry.qual)
-#  end
-#
-#  def test_Seq_adaptor_clip_left_with_len_returns_correct_qual
-#    flunk("adaptor location needs updating")
-#    @entry.seq  = "cgatnnnnn"
-#    @entry.qual = "abcdefghi"
-#    @entry.adaptor_clip_left("cgax", 25)
-#    assert_equal( "efghi", @entry.qual)
-#  end
+  def test_Seq_adaptor_find_with_bad_mis_percent_raises
+    @entry.seq = "actagctagctacgtacg"
+    assert_raise(SeqError) { @entry.adaptor_find("tacg", mis_percent = -1) }
+    assert_raise(SeqError) { @entry.adaptor_find("tacg", mis_percent = 101) }
+  end
+
+  def test_Seq_adaptor_find_with_ok_mis_percent_dont_raise
+    @entry.seq = "actagctagctacgtacg"
+    assert_nothing_raised { @entry.adaptor_find("tacg", mis_percent = 0) }
+    assert_nothing_raised { @entry.adaptor_find("tacg", mis_percent = 100) }
+  end
+
+  def test_Seq_adaptor_find_with_bad_ins_percent_raises
+    @entry.seq = "actagctagctacgtacg"
+    assert_raise(SeqError) { @entry.adaptor_find("tacg", mis_percent = 0, ins_percent = -1) }
+    assert_raise(SeqError) { @entry.adaptor_find("tacg", mis_percent = 0, ins_percent = 101) }
+  end
+
+  def test_Seq_adaptor_find_with_ok_ins_percent_dont_raise
+    @entry.seq = "actagctagctacgtacg"
+    assert_nothing_raised { @entry.adaptor_find("tacg", mis_percent = 0, ins_percent = 0) }
+    assert_nothing_raised { @entry.adaptor_find("tacg", mis_percent = 0, ins_percent = 100) }
+  end
+
+  def test_Seq_adaptor_find_with_bad_del_percent_raises
+    @entry.seq = "actagctagctacgtacg"
+    assert_raise(SeqError) { @entry.adaptor_find("tacg", mis_percent = 0, ins_percent = 0, del_percent = -1) }
+    assert_raise(SeqError) { @entry.adaptor_find("tacg", mis_percent = 0, ins_percent = 0, del_percent = 101) }
+  end
+
+  def test_Seq_adaptor_find_with_ok_del_percent_dont_raise
+    @entry.seq = "actagctagctacgtacg"
+    assert_nothing_raised { @entry.adaptor_find("tacg", mis_percent = 0, ins_percent = 0, del_percent = 0) }
+    assert_nothing_raised { @entry.adaptor_find("tacg", mis_percent = 0, ins_percent = 0, del_percent = 100) }
+  end
+
+  def test_Seq_adaptor_find_with_no_match_returns_nil
+    @entry.seq = "actaaggctagctacgtccg"
+    assert_nil(@entry.adaptor_find("TTTT"))
+  end
+
+  def test_Seq_adaptor_find_returns_correct_match
+    @entry.seq = "actaaggctagctacgtccg"
+    assert_equal(0, @entry.adaptor_find("actaa").pos)
+    assert_equal(7, @entry.adaptor_find("ctagc").pos)
+    assert_equal(15, @entry.adaptor_find("gtccg").pos)
+    assert_equal(17, @entry.adaptor_find("ccgTT").pos)
+    assert_equal(18, @entry.adaptor_find("cgTTT").pos)
+    assert_equal(19, @entry.adaptor_find("gTTTT").pos)
+  end
+
+  def test_Seq_adaptor_with_mis_percent_returns_correct_match
+    @entry.seq = "actaaggctagctacgtccg"
+    assert_equal(0, @entry.adaptor_find("GGGaag", mis_percent = 50).pos)
+    assert_equal(14, @entry.adaptor_find("cgtcTTTT", mis_percent = 50).pos)
+  end
+
+  def test_Seq_adaptor_with_ins_percent_returns_correct_match
+    @entry.seq = "actaaggctagctacgtccg"
+    assert_equal(0, @entry.adaptor_find("actGGGaag", mis_percent = 0, ins_percent = 50).pos)
+    assert_equal(15, @entry.adaptor_find("gtAccgTTTTT", mis_percent = 0, ins_percent = 10).pos)
+  end
+
+  def test_Seq_adaptor_with_del_percent_returns_correct_match
+    @entry.seq = "actaaggctagctacgtccg"
+    assert_equal(0, @entry.adaptor_find("actctag", mis_percent = 0, ins_percent = 0, del_percent = 50).pos)
+  end
 end