From d4b9c1bec954d5ff3e6eb2f7dc6c1aaa1f7f810f Mon Sep 17 00:00:00 2001 From: martinahansen Date: Mon, 4 Feb 2013 13:37:39 +0000 Subject: [PATCH] added qual_valid? method to Seq.rb git-svn-id: http://biopieces.googlecode.com/svn/trunk@2088 74ccb610-7750-0410-82ae-013aeee3265d --- code_ruby/lib/maasha/seq.rb | 23 +++++++++++++-- code_ruby/test/maasha/test_seq.rb | 49 +++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 3 deletions(-) diff --git a/code_ruby/lib/maasha/seq.rb b/code_ruby/lib/maasha/seq.rb index 45c13fe..f2d93d1 100644 --- a/code_ruby/lib/maasha/seq.rb +++ b/code_ruby/lib/maasha/seq.rb @@ -538,13 +538,30 @@ class Seq # Method that determines if a quality score string can be # absolutely identified as base 33. def qual_base33? - self.qual.match(/[!-:]/) + self.qual.match(/[!-:]/) ? true : false end # Method that determines if a quality score string can be # absolutely identified as base 64. def qual_base64? - self.qual.match(/[K-h]/) + self.qual.match(/[K-h]/) ? true : false + end + + # Method to determine if a quality score is valid. + def qual_valid?(encoding) + raise SeqError, "Missing qual" if self.qual.nil? + + case encoding.downcase + when "sanger" then return true if self.qual.match(/^[!-I]*$/) + when "454" then return true if self.qual.match(/^[@-h]*$/) + when "solexa" then return true if self.qual.match(/^[;-h]*$/) + when "illumina13" then return true if self.qual.match(/^[@-h]*$/) + when "illumina15" then return true if self.qual.match(/^[@-h]*$/) + when "illumina18" then return true if self.qual.match(/^[!-J]*$/) + else raise SeqError, "unknown quality score encoding: #{encoding}" + end + + false end # Method to convert quality scores inbetween formats. @@ -552,7 +569,7 @@ class Seq # 454 base 64, range 0-40 # Solexa base 64, range -5-40 # Illumina13 base 64, range 0-40 - # Illumina15 base 64, range 3-40 + # Illumina15 base 64, range 0-40 # Illumina18 base 33, range 0-41 def convert_scores!(from, to) unless from == to diff --git a/code_ruby/test/maasha/test_seq.rb b/code_ruby/test/maasha/test_seq.rb index d177d31..171e6b1 100755 --- a/code_ruby/test/maasha/test_seq.rb +++ b/code_ruby/test/maasha/test_seq.rb @@ -488,6 +488,55 @@ class TestSeq < Test::Unit::TestCase assert_equal("-atCG", @entry.mask_seq_soft!(20).seq) end + # qual score detection + + def test_Seq_qual_base33_returns_correctly + # self.qual.match(/[!-:]/) + @entry.qual = '!"#$%&\'()*+,-./0123456789:' + assert_equal(true, @entry.qual_base33? ) + @entry.qual = 32.chr + assert_equal(false, @entry.qual_base33? ) + @entry.qual = 59.chr + assert_equal(false, @entry.qual_base33? ) + end + + def test_Seq_qual_base64_returns_correctly + # self.qual.match(/[K-h]/) + @entry.qual = 'KLMNOPQRSTUVWXYZ[\]^_`abcdefgh' + assert_equal(true, @entry.qual_base64? ) + @entry.qual = 74.chr + assert_equal(false, @entry.qual_base64? ) + @entry.qual = 105.chr + assert_equal(false, @entry.qual_base64? ) + end + + def test_Seq_qual_valid_with_nil_qual_raises + assert_raise(SeqError) { @entry.qual_valid?("illumina1.8") } + end + + def test_Seq_qual_valid_with_bad_encoding_raises + @entry.qual = "abc" + assert_raise(SeqError) { @entry.qual_valid?("foobar") } + end + + def test_Seq_qual_valid_returns_correctly + tests = [["sanger", 0, 40, 33], + ["454", 0, 40, 64], + ["solexa", -5, 40, 64], + ["illumina13", 0, 40, 64], + ["illumina15", 0, 40, 64], + ["illumina18", 0, 41, 33]] + + tests.each do |test| + @entry.qual = (test[1] + test[-1]).chr + (test[2] + test[-1]).chr + assert_equal(true, @entry.qual_valid?(test[0])) + @entry.qual = (test[1] + test[-1] - 1).chr + assert_equal(false, @entry.qual_valid?(test[0])) + @entry.qual = (test[2] + test[-1] + 1).chr + assert_equal(false, @entry.qual_valid?(test[0])) + end + end + # convert sanger to ... def test_Seq_convert_scores_bang_from_sanger_to_sanger_returns_OK -- 2.39.2