]> git.donarmstrong.com Git - biopieces.git/commitdiff
rewrote mask_seq_hard and mask_seq_soft methods to NArrays for speed
authormartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Mon, 23 Jan 2012 14:16:33 +0000 (14:16 +0000)
committermartinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Mon, 23 Jan 2012 14:16:33 +0000 (14:16 +0000)
git-svn-id: http://biopieces.googlecode.com/svn/trunk@1731 74ccb610-7750-0410-82ae-013aeee3265d

code_ruby/lib/maasha/seq.rb
code_ruby/test/maasha/test_seq.rb

index f4abff7c36aafba918a04b76d5e6f3544345225c..f311bf60d3a04ee97009179a1ff2e3fd684992e8 100644 (file)
@@ -26,6 +26,7 @@ require 'maasha/patternmatcher'
 require 'maasha/bits'
 require 'maasha/backtrack'
 require 'maasha/digest'
+require 'narray'
 #require 'maasha/patscan'
 
 # Residue alphabets
@@ -433,7 +434,7 @@ class Seq
 
   # Hard masks sequence residues where the corresponding quality score
   # is below a given cutoff.
-  def mask_seq_hard!(cutoff)
+  def mask_seq_hard_old(cutoff)
     seq    = self.seq.upcase
     scores = self.qual
     i      = 0
@@ -446,19 +447,40 @@ class Seq
     self.seq = seq
   end
 
+  # Hard masks sequence residues where the corresponding quality score
+  # is below a given cutoff.
+  def mask_seq_hard!(cutoff)
+    raise SeqError, "seq is nil"  if self.seq.nil?
+    raise SeqError, "qual is nil" if self.qual.nil?
+    raise SeqError, "Bad cutoff value: #{cutoff}" unless (SCORE_MIN .. SCORE_MAX).include? cutoff
+
+    na_seq  = NArray.to_na(self.seq, "byte")
+    na_qual = NArray.to_na(self.qual, "byte")
+    mask    = (na_qual - SCORE_ILLUMINA) < cutoff
+
+    na_seq[mask] = 'N'.ord
+
+    self.seq = na_seq.to_s
+
+    self
+  end
+
   # Soft masks sequence residues where the corresponding quality score
   # is below a given cutoff.
   def mask_seq_soft!(cutoff)
-    seq    = self.seq.upcase
-    scores = self.qual
-    i      = 0
+    raise SeqError, "seq is nil"  if self.seq.nil?
+    raise SeqError, "qual is nil" if self.qual.nil?
+    raise SeqError, "Bad cutoff value: #{cutoff}" unless (SCORE_MIN .. SCORE_MAX).include? cutoff
 
-    scores.each_char do |score|
-      seq[i] = seq[i].downcase if score.ord - SCORE_ILLUMINA < cutoff
-      i += 1 
-    end
+    na_seq  = NArray.to_na(self.seq, "byte")
+    na_qual = NArray.to_na(self.qual, "byte")
+    mask    = (na_qual - SCORE_ILLUMINA) < cutoff
 
-    self.seq = seq
+    na_seq[mask] ^= ' '.ord
+
+    self.seq = na_seq.to_s
+
+    self
   end
 
   # Method to convert the quality scores from a specified base
index a4d47ad203d601a54a5012855c1ca3942976e186..927937ec3684097c9a42cfa773dc48e832f97b3b 100755 (executable)
@@ -509,6 +509,74 @@ class TestSeq < Test::Unit::TestCase
     @entry.seq = "--AAAa"
     assert_equal(25.00, @entry.soft_mask)
   end
+
+  def test_Seq_mask_seq_hard_bang_with_nil_seq_raises
+    @entry.seq  = nil
+    @entry.qual = ""
+
+    assert_raise(SeqError) { @entry.mask_seq_hard!(20) }
+  end
+
+  def test_Seq_mask_seq_hard_bang_with_nil_qual_raises
+    @entry.seq  = ""
+    @entry.qual = nil
+
+    assert_raise(SeqError) { @entry.mask_seq_hard!(20) }
+  end
+
+  def test_Seq_mask_seq_hard_bang_with_bad_cutoff_raises
+    assert_raise(SeqError) { @entry.mask_seq_hard!(-1) }
+    assert_raise(SeqError) { @entry.mask_seq_hard!(41) }
+  end
+
+  def test_Seq_mask_seq_hard_bang_with_OK_cutoff_dont_raise
+    @entry.seq  = "ATCG"
+    @entry.qual = "RSTU"
+
+    assert_nothing_raised { @entry.mask_seq_hard!(0) }
+    assert_nothing_raised { @entry.mask_seq_hard!(40) }
+  end
+
+  def test_Seq_mask_seq_hard_bang_returns_correctly
+    @entry.seq  = "ATCG"
+    @entry.qual = "RSTU"
+
+    assert_equal("NNCG", @entry.mask_seq_hard!(20).seq)
+  end
+
+  def test_Seq_mask_seq_soft_bang_with_nil_seq_raises
+    @entry.seq  = nil
+    @entry.qual = ""
+
+    assert_raise(SeqError) { @entry.mask_seq_soft!(20) }
+  end
+
+  def test_Seq_mask_seq_soft_bang_with_nil_qual_raises
+    @entry.seq  = ""
+    @entry.qual = nil
+
+    assert_raise(SeqError) { @entry.mask_seq_soft!(20) }
+  end
+
+  def test_Seq_mask_seq_soft_bang_returns_correctly
+    @entry.seq  = "ATCG"
+    @entry.qual = "RSTU"
+
+    assert_equal("atCG", @entry.mask_seq_soft!(20).seq)
+  end
+
+  def test_Seq_mask_seq_soft_bang_with_bad_cutoff_raises
+    assert_raise(SeqError) { @entry.mask_seq_soft!(-1) }
+    assert_raise(SeqError) { @entry.mask_seq_soft!(41) }
+  end
+
+  def test_Seq_mask_seq_soft_bang_with_OK_cutoff_dont_raise
+    @entry.seq  = "ATCG"
+    @entry.qual = "RSTU"
+
+    assert_nothing_raised { @entry.mask_seq_soft!(0) }
+    assert_nothing_raised { @entry.mask_seq_soft!(40) }
+  end
 end