]> git.donarmstrong.com Git - biopieces.git/commitdiff
added qual_valid? method to Seq.rb
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Mon, 4 Feb 2013 13:37:39 +0000 (13:37 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Mon, 4 Feb 2013 13:37:39 +0000 (13:37 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@2088 74ccb610-7750-0410-82ae-013aeee3265d

code_ruby/lib/maasha/seq.rb
code_ruby/test/maasha/test_seq.rb

index 45c13fe6ded1fe1d20771b8003a58596f5ab41b1..f2d93d1fc5bbf86b6611e56e9eb9ce98d9185795 100644 (file)
@@ -538,13 +538,30 @@ class Seq
   # Method that determines if a quality score string can be
   # absolutely identified as base 33.
   def qual_base33?
-    self.qual.match(/[!-:]/)
+    self.qual.match(/[!-:]/) ? true : false
   end
 
   # Method that determines if a quality score string can be
   # absolutely identified as base 64.
   def qual_base64?
-    self.qual.match(/[K-h]/)
+    self.qual.match(/[K-h]/) ? true : false
+  end
+
+  # Method to determine if a quality score is valid.
+  def qual_valid?(encoding)
+    raise SeqError, "Missing qual" if self.qual.nil?
+
+    case encoding.downcase
+    when "sanger"     then return true if self.qual.match(/^[!-I]*$/)
+    when "454"        then return true if self.qual.match(/^[@-h]*$/)
+    when "solexa"     then return true if self.qual.match(/^[;-h]*$/)
+    when "illumina13" then return true if self.qual.match(/^[@-h]*$/)
+    when "illumina15" then return true if self.qual.match(/^[@-h]*$/)
+    when "illumina18" then return true if self.qual.match(/^[!-J]*$/)
+    else raise SeqError, "unknown quality score encoding: #{encoding}"
+    end
+
+    false
   end
 
   # Method to convert quality scores inbetween formats.
@@ -552,7 +569,7 @@ class Seq
   # 454        base 64, range  0-40 
   # Solexa     base 64, range -5-40 
   # Illumina13 base 64, range  0-40 
-  # Illumina15 base 64, range  3-40 
+  # Illumina15 base 64, range  0-40 
   # Illumina18 base 33, range  0-41 
   def convert_scores!(from, to)
     unless from == to
index d177d311467901296a0d2d0aa90f01dce29ffd86..171e6b1ae9157205527a0613ef68fe65686bf4e7 100755 (executable)
@@ -488,6 +488,55 @@ class TestSeq < Test::Unit::TestCase
     assert_equal("-atCG", @entry.mask_seq_soft!(20).seq)
   end
 
+  # qual score detection
+
+  def test_Seq_qual_base33_returns_correctly
+    # self.qual.match(/[!-:]/)
+    @entry.qual = '!"#$%&\'()*+,-./0123456789:'
+    assert_equal(true,  @entry.qual_base33? )
+    @entry.qual = 32.chr
+    assert_equal(false, @entry.qual_base33? )
+    @entry.qual = 59.chr
+    assert_equal(false, @entry.qual_base33? )
+  end
+
+  def test_Seq_qual_base64_returns_correctly
+    # self.qual.match(/[K-h]/)
+    @entry.qual = 'KLMNOPQRSTUVWXYZ[\]^_`abcdefgh'
+    assert_equal(true,  @entry.qual_base64? )
+    @entry.qual = 74.chr
+    assert_equal(false, @entry.qual_base64? )
+    @entry.qual = 105.chr
+    assert_equal(false, @entry.qual_base64? )
+  end
+
+  def test_Seq_qual_valid_with_nil_qual_raises
+    assert_raise(SeqError) { @entry.qual_valid?("illumina1.8") }
+  end
+
+  def test_Seq_qual_valid_with_bad_encoding_raises
+    @entry.qual = "abc"
+    assert_raise(SeqError) { @entry.qual_valid?("foobar") }
+  end
+
+  def test_Seq_qual_valid_returns_correctly
+    tests = [["sanger",      0, 40, 33],
+             ["454",         0, 40, 64],
+             ["solexa",     -5, 40, 64],
+             ["illumina13",  0, 40, 64],
+             ["illumina15",  0, 40, 64],
+             ["illumina18",  0, 41, 33]]
+
+    tests.each do |test|
+      @entry.qual = (test[1] + test[-1]).chr + (test[2] + test[-1]).chr
+      assert_equal(true, @entry.qual_valid?(test[0]))
+      @entry.qual = (test[1] + test[-1] - 1).chr
+      assert_equal(false, @entry.qual_valid?(test[0]))
+      @entry.qual = (test[2] + test[-1] + 1).chr
+      assert_equal(false, @entry.qual_valid?(test[0]))
+    end
+  end
+
   # convert sanger to ...
 
   def test_Seq_convert_scores_bang_from_sanger_to_sanger_returns_OK