From 73481219c95124438f7020da8af9b21498d4a269 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Mon, 23 Jan 2012 14:16:33 +0000 Subject: [PATCH] rewrote mask_seq_hard and mask_seq_soft methods to NArrays for speed git-svn-id: http://biopieces.googlecode.com/svn/trunk@1731 74ccb610-7750-0410-82ae-013aeee3265d --- code_ruby/lib/maasha/seq.rb | 40 ++++++++++++++---- code_ruby/test/maasha/test_seq.rb | 68 +++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+), 9 deletions(-) diff --git a/code_ruby/lib/maasha/seq.rb b/code_ruby/lib/maasha/seq.rb index f4abff7..f311bf6 100644 --- a/code_ruby/lib/maasha/seq.rb +++ b/code_ruby/lib/maasha/seq.rb @@ -26,6 +26,7 @@ require 'maasha/patternmatcher' require 'maasha/bits' require 'maasha/backtrack' require 'maasha/digest' +require 'narray' #require 'maasha/patscan' # Residue alphabets @@ -433,7 +434,7 @@ class Seq # Hard masks sequence residues where the corresponding quality score # is below a given cutoff. - def mask_seq_hard!(cutoff) + def mask_seq_hard_old(cutoff) seq = self.seq.upcase scores = self.qual i = 0 @@ -446,19 +447,40 @@ class Seq self.seq = seq end + # Hard masks sequence residues where the corresponding quality score + # is below a given cutoff. + def mask_seq_hard!(cutoff) + raise SeqError, "seq is nil" if self.seq.nil? + raise SeqError, "qual is nil" if self.qual.nil? + raise SeqError, "Bad cutoff value: #{cutoff}" unless (SCORE_MIN .. SCORE_MAX).include? cutoff + + na_seq = NArray.to_na(self.seq, "byte") + na_qual = NArray.to_na(self.qual, "byte") + mask = (na_qual - SCORE_ILLUMINA) < cutoff + + na_seq[mask] = 'N'.ord + + self.seq = na_seq.to_s + + self + end + # Soft masks sequence residues where the corresponding quality score # is below a given cutoff. def mask_seq_soft!(cutoff) - seq = self.seq.upcase - scores = self.qual - i = 0 + raise SeqError, "seq is nil" if self.seq.nil? + raise SeqError, "qual is nil" if self.qual.nil? + raise SeqError, "Bad cutoff value: #{cutoff}" unless (SCORE_MIN .. SCORE_MAX).include? cutoff - scores.each_char do |score| - seq[i] = seq[i].downcase if score.ord - SCORE_ILLUMINA < cutoff - i += 1 - end + na_seq = NArray.to_na(self.seq, "byte") + na_qual = NArray.to_na(self.qual, "byte") + mask = (na_qual - SCORE_ILLUMINA) < cutoff - self.seq = seq + na_seq[mask] ^= ' '.ord + + self.seq = na_seq.to_s + + self end # Method to convert the quality scores from a specified base diff --git a/code_ruby/test/maasha/test_seq.rb b/code_ruby/test/maasha/test_seq.rb index a4d47ad..927937e 100755 --- a/code_ruby/test/maasha/test_seq.rb +++ b/code_ruby/test/maasha/test_seq.rb @@ -509,6 +509,74 @@ class TestSeq < Test::Unit::TestCase @entry.seq = "--AAAa" assert_equal(25.00, @entry.soft_mask) end + + def test_Seq_mask_seq_hard_bang_with_nil_seq_raises + @entry.seq = nil + @entry.qual = "" + + assert_raise(SeqError) { @entry.mask_seq_hard!(20) } + end + + def test_Seq_mask_seq_hard_bang_with_nil_qual_raises + @entry.seq = "" + @entry.qual = nil + + assert_raise(SeqError) { @entry.mask_seq_hard!(20) } + end + + def test_Seq_mask_seq_hard_bang_with_bad_cutoff_raises + assert_raise(SeqError) { @entry.mask_seq_hard!(-1) } + assert_raise(SeqError) { @entry.mask_seq_hard!(41) } + end + + def test_Seq_mask_seq_hard_bang_with_OK_cutoff_dont_raise + @entry.seq = "ATCG" + @entry.qual = "RSTU" + + assert_nothing_raised { @entry.mask_seq_hard!(0) } + assert_nothing_raised { @entry.mask_seq_hard!(40) } + end + + def test_Seq_mask_seq_hard_bang_returns_correctly + @entry.seq = "ATCG" + @entry.qual = "RSTU" + + assert_equal("NNCG", @entry.mask_seq_hard!(20).seq) + end + + def test_Seq_mask_seq_soft_bang_with_nil_seq_raises + @entry.seq = nil + @entry.qual = "" + + assert_raise(SeqError) { @entry.mask_seq_soft!(20) } + end + + def test_Seq_mask_seq_soft_bang_with_nil_qual_raises + @entry.seq = "" + @entry.qual = nil + + assert_raise(SeqError) { @entry.mask_seq_soft!(20) } + end + + def test_Seq_mask_seq_soft_bang_returns_correctly + @entry.seq = "ATCG" + @entry.qual = "RSTU" + + assert_equal("atCG", @entry.mask_seq_soft!(20).seq) + end + + def test_Seq_mask_seq_soft_bang_with_bad_cutoff_raises + assert_raise(SeqError) { @entry.mask_seq_soft!(-1) } + assert_raise(SeqError) { @entry.mask_seq_soft!(41) } + end + + def test_Seq_mask_seq_soft_bang_with_OK_cutoff_dont_raise + @entry.seq = "ATCG" + @entry.qual = "RSTU" + + assert_nothing_raised { @entry.mask_seq_soft!(0) } + assert_nothing_raised { @entry.mask_seq_soft!(40) } + end end -- 2.39.2