]> git.donarmstrong.com Git - biopieces.git/commitdiff
added quality_trim to seq.rb along with unit tests
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Tue, 1 Nov 2011 15:20:47 +0000 (15:20 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Tue, 1 Nov 2011 15:20:47 +0000 (15:20 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@1608 74ccb610-7750-0410-82ae-013aeee3265d

code_ruby/lib/maasha/seq.rb
code_ruby/test/maasha/test_seq.rb

index c1ce2daed2ca8f773cb2d985afc4c04238a02939..b70cfc7f3047d15cc8cc44860dfd9727551069d8 100644 (file)
@@ -36,6 +36,8 @@ INDELS  = %w[. - _ ~]
 # Quality scores bases
 SCORE_PHRED    = 33
 SCORE_ILLUMINA = 64
+SCORE_MIN      = 0
+SCORE_MAX      = 40
 
 # Error class for all exceptions to do with Seq.
 class SeqError < StandardError; end
@@ -247,9 +249,9 @@ class Seq
     raise SeqError, "Cannot complement 0 length sequence" if self.length == 0
 
     if self.is_dna?
-      self.seq.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'TCGAAYRWSKMDHBVNtcgaayrwskmdhbvn' )
+      self.seq.tr!('AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'TCGAAYRWSKMDHBVNtcgaayrwskmdhbvn')
     elsif self.is_rna?
-      self.seq.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'UCGAAYRWSKMDHBVNucgaayrwskmdhbvn' )
+      self.seq.tr!('AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'UCGAAYRWSKMDHBVNucgaayrwskmdhbvn')
     else
       raise SeqError, "Cannot complement sequence type: #{self.type}"
     end
@@ -328,22 +330,38 @@ class Seq
     self.subseq(start, length)
   end
 
-  def quality_trim(min)
-  end
-
   def quality_trim_right(min)
+    raise SeqError, "no sequence"      if self.seq.nil?
+    raise SeqError, "no quality score" if self.qual.nil?
+    raise SeqError, "minimum value: #{min} out of range #{SCORE_MIN} .. #{SCORE_MAX}" unless (SCORE_MIN .. SCORE_MAX).include? min
+
+    regex_right = Regexp.new("[#{(SCORE_ILLUMINA).chr}-#{(SCORE_ILLUMINA + min).chr}]+$")
+
+    self.qual.match(regex_right) do |m|
+      self.subseq!(0, $`.length) if $`.length > 0
+    end
+
+    self
   end
 
   def quality_trim_left(min)
-  end
+    raise SeqError, "no sequence"      if self.seq.nil?
+    raise SeqError, "no quality score" if self.qual.nil?
+    raise SeqError, "minimum value: #{min} out of range #{SCORE_MIN} .. #{SCORE_MAX}" unless (SCORE_MIN .. SCORE_MAX).include? min
 
-  def quality_trim!(min)
-  end
+    regex_left  = Regexp.new("^[#{(SCORE_ILLUMINA).chr}-#{(SCORE_ILLUMINA + min).chr}]+")
+
+    self.qual.match(regex_left) do |m|
+      self.subseq!(m.to_s.length, self.length - m.to_s.length) if self.length - m.to_s.length > 0
+    end
 
-  def quality_trim_right!(min)
+    self
   end
 
-  def quality_trim_left!(min)
+  def quality_trim(min)
+    self.quality_trim_right(min)
+    self.quality_trim_left(min)
+    self
   end
 
   # Method that returns the residue compositions of a sequence in
index 29990b7b5fe2d9fcbb78dea19afc8f9c485b70d0..cdc9294aee01febe7658618c0bf4c9eebbbfde3f 100755 (executable)
@@ -356,6 +356,86 @@ class TestSeq < Test::Unit::TestCase
     assert_equal("ATCG", @entry.subseq_rand(4).seq)
   end
 
+  def test_Seq_quality_trim_right_with_missing_seq_raises
+    @entry.qual = "hhhh"
+    assert_raise(SeqError) { @entry.quality_trim_right(20) }
+  end
+
+  def test_Seq_quality_trim_right_with_missing_qual_raises
+    @entry.seq = "ATCG"
+    assert_raise(SeqError) { @entry.quality_trim_right(20) }
+  end
+
+  def test_Seq_quality_trim_right_with_bad_min_raises
+    @entry.seq  = "ATCG"
+    @entry.qual = "hhhh"
+
+    [-1, 41].each do |min|
+      assert_raise(SeqError) { @entry.quality_trim_right(min) }
+    end
+  end
+
+  def test_Seq_quality_trim_right_with_ok_min_dont_raise
+    @entry.seq  = "ATCG"
+    @entry.qual = "hhhh"
+
+    [0, 40].each do |min|
+      assert_nothing_raised { @entry.quality_trim_right(min) }
+    end
+  end
+
+  def test_Seq_quality_trim_right_returns_correctly
+    @entry.seq  = "AAAAATCG"
+    @entry.qual = "hhhhhgfe"
+    @entry.quality_trim_right(38)
+    assert_equal("AAAAAT", @entry.seq) 
+    assert_equal("hhhhhg", @entry.qual) 
+  end
+
+  def test_Seq_quality_trim_left_with_missing_seq_raises
+    @entry.qual = "hhhh"
+    assert_raise(SeqError) { @entry.quality_trim_left(20) }
+  end
+
+  def test_Seq_quality_trim_left_with_missing_qual_raises
+    @entry.seq = "ATCG"
+    assert_raise(SeqError) { @entry.quality_trim_left(20) }
+  end
+
+  def test_Seq_quality_trim_left_with_bad_min_raises
+    @entry.seq  = "ATCG"
+    @entry.qual = "hhhh"
+
+    [-1, 41].each do |min|
+      assert_raise(SeqError) { @entry.quality_trim_left(min) }
+    end
+  end
+
+  def test_Seq_quality_trim_left_with_ok_min_dont_raise
+    @entry.seq  = "ATCG"
+    @entry.qual = "hhhh"
+
+    [0, 40].each do |min|
+      assert_nothing_raised { @entry.quality_trim_left(min) }
+    end
+  end
+
+  def test_Seq_quality_trim_left_returns_correctly
+    @entry.seq  = "GCTAAAAA"
+    @entry.qual = "efghhhhh"
+    @entry.quality_trim_left(38)
+    assert_equal("TAAAAA", @entry.seq) 
+    assert_equal("ghhhhh", @entry.qual) 
+  end
+
+  def test_Seq_quality_trim_returns_correctly
+    @entry.seq  = "GCTAAAAAGTG"
+    @entry.qual = "efghhhhhgfe"
+    @entry.quality_trim(38)
+    assert_equal("TAAAAAG", @entry.seq) 
+    assert_equal("ghhhhhg", @entry.qual) 
+  end
+
   def test_Seq_composition_returns_correctly
     @entry.seq = "AAAATTTCCG"
     assert_equal(4, @entry.composition["A"])