# Quality scores bases
SCORE_PHRED = 33
SCORE_ILLUMINA = 64
+SCORE_MIN = 0
+SCORE_MAX = 40
# Error class for all exceptions to do with Seq.
class SeqError < StandardError; end
raise SeqError, "Cannot complement 0 length sequence" if self.length == 0
if self.is_dna?
- self.seq.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'TCGAAYRWSKMDHBVNtcgaayrwskmdhbvn' )
+ self.seq.tr!('AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'TCGAAYRWSKMDHBVNtcgaayrwskmdhbvn')
elsif self.is_rna?
- self.seq.tr!( 'AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'UCGAAYRWSKMDHBVNucgaayrwskmdhbvn' )
+ self.seq.tr!('AGCUTRYWSMKHDVBNagcutrywsmkhdvbn', 'UCGAAYRWSKMDHBVNucgaayrwskmdhbvn')
else
raise SeqError, "Cannot complement sequence type: #{self.type}"
end
self.subseq(start, length)
end
- def quality_trim(min)
- end
-
def quality_trim_right(min)
+ raise SeqError, "no sequence" if self.seq.nil?
+ raise SeqError, "no quality score" if self.qual.nil?
+ raise SeqError, "minimum value: #{min} out of range #{SCORE_MIN} .. #{SCORE_MAX}" unless (SCORE_MIN .. SCORE_MAX).include? min
+
+ regex_right = Regexp.new("[#{(SCORE_ILLUMINA).chr}-#{(SCORE_ILLUMINA + min).chr}]+$")
+
+ self.qual.match(regex_right) do |m|
+ self.subseq!(0, $`.length) if $`.length > 0
+ end
+
+ self
end
def quality_trim_left(min)
- end
+ raise SeqError, "no sequence" if self.seq.nil?
+ raise SeqError, "no quality score" if self.qual.nil?
+ raise SeqError, "minimum value: #{min} out of range #{SCORE_MIN} .. #{SCORE_MAX}" unless (SCORE_MIN .. SCORE_MAX).include? min
- def quality_trim!(min)
- end
+ regex_left = Regexp.new("^[#{(SCORE_ILLUMINA).chr}-#{(SCORE_ILLUMINA + min).chr}]+")
+
+ self.qual.match(regex_left) do |m|
+ self.subseq!(m.to_s.length, self.length - m.to_s.length) if self.length - m.to_s.length > 0
+ end
- def quality_trim_right!(min)
+ self
end
- def quality_trim_left!(min)
+ def quality_trim(min)
+ self.quality_trim_right(min)
+ self.quality_trim_left(min)
+ self
end
# Method that returns the residue compositions of a sequence in
assert_equal("ATCG", @entry.subseq_rand(4).seq)
end
+ def test_Seq_quality_trim_right_with_missing_seq_raises
+ @entry.qual = "hhhh"
+ assert_raise(SeqError) { @entry.quality_trim_right(20) }
+ end
+
+ def test_Seq_quality_trim_right_with_missing_qual_raises
+ @entry.seq = "ATCG"
+ assert_raise(SeqError) { @entry.quality_trim_right(20) }
+ end
+
+ def test_Seq_quality_trim_right_with_bad_min_raises
+ @entry.seq = "ATCG"
+ @entry.qual = "hhhh"
+
+ [-1, 41].each do |min|
+ assert_raise(SeqError) { @entry.quality_trim_right(min) }
+ end
+ end
+
+ def test_Seq_quality_trim_right_with_ok_min_dont_raise
+ @entry.seq = "ATCG"
+ @entry.qual = "hhhh"
+
+ [0, 40].each do |min|
+ assert_nothing_raised { @entry.quality_trim_right(min) }
+ end
+ end
+
+ def test_Seq_quality_trim_right_returns_correctly
+ @entry.seq = "AAAAATCG"
+ @entry.qual = "hhhhhgfe"
+ @entry.quality_trim_right(38)
+ assert_equal("AAAAAT", @entry.seq)
+ assert_equal("hhhhhg", @entry.qual)
+ end
+
+ def test_Seq_quality_trim_left_with_missing_seq_raises
+ @entry.qual = "hhhh"
+ assert_raise(SeqError) { @entry.quality_trim_left(20) }
+ end
+
+ def test_Seq_quality_trim_left_with_missing_qual_raises
+ @entry.seq = "ATCG"
+ assert_raise(SeqError) { @entry.quality_trim_left(20) }
+ end
+
+ def test_Seq_quality_trim_left_with_bad_min_raises
+ @entry.seq = "ATCG"
+ @entry.qual = "hhhh"
+
+ [-1, 41].each do |min|
+ assert_raise(SeqError) { @entry.quality_trim_left(min) }
+ end
+ end
+
+ def test_Seq_quality_trim_left_with_ok_min_dont_raise
+ @entry.seq = "ATCG"
+ @entry.qual = "hhhh"
+
+ [0, 40].each do |min|
+ assert_nothing_raised { @entry.quality_trim_left(min) }
+ end
+ end
+
+ def test_Seq_quality_trim_left_returns_correctly
+ @entry.seq = "GCTAAAAA"
+ @entry.qual = "efghhhhh"
+ @entry.quality_trim_left(38)
+ assert_equal("TAAAAA", @entry.seq)
+ assert_equal("ghhhhh", @entry.qual)
+ end
+
+ def test_Seq_quality_trim_returns_correctly
+ @entry.seq = "GCTAAAAAGTG"
+ @entry.qual = "efghhhhhgfe"
+ @entry.quality_trim(38)
+ assert_equal("TAAAAAG", @entry.seq)
+ assert_equal("ghhhhhg", @entry.qual)
+ end
+
def test_Seq_composition_returns_correctly
@entry.seq = "AAAATTTCCG"
assert_equal(4, @entry.composition["A"])