From af3a429640e1a47bec1c3ac363fd3d914f578b19 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Tue, 1 Nov 2011 15:20:47 +0000 Subject: [PATCH] added quality_trim to seq.rb along with unit tests git-svn-id: http://biopieces.googlecode.com/svn/trunk@1608 74ccb610-7750-0410-82ae-013aeee3265d --- code_ruby/lib/maasha/seq.rb | 38 +++++++++++---- code_ruby/test/maasha/test_seq.rb | 80 +++++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+), 10 deletions(-) diff --git a/code_ruby/lib/maasha/seq.rb b/code_ruby/lib/maasha/seq.rb index c1ce2da..b70cfc7 100644 --- a/code_ruby/lib/maasha/seq.rb +++ b/code_ruby/lib/maasha/seq.rb @@ -36,6 +36,8 @@ INDELS = %w[. - _ ~] # Quality scores bases SCORE_PHRED = 33 SCORE_ILLUMINA = 64 +SCORE_MIN = 0 +SCORE_MAX = 40 # Error class for all exceptions to do with Seq. class SeqError < StandardError; end @@ -247,9 +249,9 @@ class Seq raise SeqError, "Cannot complement 0 length sequence" if self.length == 0 if self.is_dna? - self.seq.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'TCGAAYRWSKMDHBVNtcgaayrwskmdhbvn' ) + self.seq.tr!('AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'TCGAAYRWSKMDHBVNtcgaayrwskmdhbvn') elsif self.is_rna? - self.seq.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'UCGAAYRWSKMDHBVNucgaayrwskmdhbvn' ) + self.seq.tr!('AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'UCGAAYRWSKMDHBVNucgaayrwskmdhbvn') else raise SeqError, "Cannot complement sequence type: #{self.type}" end @@ -328,22 +330,38 @@ class Seq self.subseq(start, length) end - def quality_trim(min) - end - def quality_trim_right(min) + raise SeqError, "no sequence" if self.seq.nil? + raise SeqError, "no quality score" if self.qual.nil? + raise SeqError, "minimum value: #{min} out of range #{SCORE_MIN} .. #{SCORE_MAX}" unless (SCORE_MIN .. SCORE_MAX).include? min + + regex_right = Regexp.new("[#{(SCORE_ILLUMINA).chr}-#{(SCORE_ILLUMINA + min).chr}]+$") + + self.qual.match(regex_right) do |m| + self.subseq!(0, $`.length) if $`.length > 0 + end + + self end def quality_trim_left(min) - end + raise SeqError, "no sequence" if self.seq.nil? + raise SeqError, "no quality score" if self.qual.nil? + raise SeqError, "minimum value: #{min} out of range #{SCORE_MIN} .. #{SCORE_MAX}" unless (SCORE_MIN .. SCORE_MAX).include? min - def quality_trim!(min) - end + regex_left = Regexp.new("^[#{(SCORE_ILLUMINA).chr}-#{(SCORE_ILLUMINA + min).chr}]+") + + self.qual.match(regex_left) do |m| + self.subseq!(m.to_s.length, self.length - m.to_s.length) if self.length - m.to_s.length > 0 + end - def quality_trim_right!(min) + self end - def quality_trim_left!(min) + def quality_trim(min) + self.quality_trim_right(min) + self.quality_trim_left(min) + self end # Method that returns the residue compositions of a sequence in diff --git a/code_ruby/test/maasha/test_seq.rb b/code_ruby/test/maasha/test_seq.rb index 29990b7..cdc9294 100755 --- a/code_ruby/test/maasha/test_seq.rb +++ b/code_ruby/test/maasha/test_seq.rb @@ -356,6 +356,86 @@ class TestSeq < Test::Unit::TestCase assert_equal("ATCG", @entry.subseq_rand(4).seq) end + def test_Seq_quality_trim_right_with_missing_seq_raises + @entry.qual = "hhhh" + assert_raise(SeqError) { @entry.quality_trim_right(20) } + end + + def test_Seq_quality_trim_right_with_missing_qual_raises + @entry.seq = "ATCG" + assert_raise(SeqError) { @entry.quality_trim_right(20) } + end + + def test_Seq_quality_trim_right_with_bad_min_raises + @entry.seq = "ATCG" + @entry.qual = "hhhh" + + [-1, 41].each do |min| + assert_raise(SeqError) { @entry.quality_trim_right(min) } + end + end + + def test_Seq_quality_trim_right_with_ok_min_dont_raise + @entry.seq = "ATCG" + @entry.qual = "hhhh" + + [0, 40].each do |min| + assert_nothing_raised { @entry.quality_trim_right(min) } + end + end + + def test_Seq_quality_trim_right_returns_correctly + @entry.seq = "AAAAATCG" + @entry.qual = "hhhhhgfe" + @entry.quality_trim_right(38) + assert_equal("AAAAAT", @entry.seq) + assert_equal("hhhhhg", @entry.qual) + end + + def test_Seq_quality_trim_left_with_missing_seq_raises + @entry.qual = "hhhh" + assert_raise(SeqError) { @entry.quality_trim_left(20) } + end + + def test_Seq_quality_trim_left_with_missing_qual_raises + @entry.seq = "ATCG" + assert_raise(SeqError) { @entry.quality_trim_left(20) } + end + + def test_Seq_quality_trim_left_with_bad_min_raises + @entry.seq = "ATCG" + @entry.qual = "hhhh" + + [-1, 41].each do |min| + assert_raise(SeqError) { @entry.quality_trim_left(min) } + end + end + + def test_Seq_quality_trim_left_with_ok_min_dont_raise + @entry.seq = "ATCG" + @entry.qual = "hhhh" + + [0, 40].each do |min| + assert_nothing_raised { @entry.quality_trim_left(min) } + end + end + + def test_Seq_quality_trim_left_returns_correctly + @entry.seq = "GCTAAAAA" + @entry.qual = "efghhhhh" + @entry.quality_trim_left(38) + assert_equal("TAAAAA", @entry.seq) + assert_equal("ghhhhh", @entry.qual) + end + + def test_Seq_quality_trim_returns_correctly + @entry.seq = "GCTAAAAAGTG" + @entry.qual = "efghhhhhgfe" + @entry.quality_trim(38) + assert_equal("TAAAAAG", @entry.seq) + assert_equal("ghhhhhg", @entry.qual) + end + def test_Seq_composition_returns_correctly @entry.seq = "AAAATTTCCG" assert_equal(4, @entry.composition["A"]) -- 2.39.2