DNA = %w[a t c g]
RNA = %w[a u c g]
PROTEIN = %w[f l s y c w p h q r i m t n k v a d e g]
+INDELS = %w[.- _ ~]
# Quality scores bases
SCORE_PHRED = 33
alias len length
+ # Return the number indels in a sequence.
+ def indels
+ regex = Regexp.new(/[#{Regexp.escape(INDELS.join(""))}]/)
+ self.seq.scan(regex).size
+ end
+
# Method that returns true is a given sequence type is DNA.
def is_dna?
self.type == 'dna'
seq_new
end
+ # Method that returns the residue compositions of a sequence in
+ # a hash where the key is the residue and the value is the residue
+ # count.
+ def composition
+ comp = Hash.new(0);
+
+ self.seq.upcase.each_char do |char|
+ comp[char] += 1
+ end
+
+ comp
+ end
+
+ # Method that returns the percentage of hard masked residues
+ # or N's in a sequence.
+ def hard_mask
+ ((self.seq.upcase.scan("N").size.to_f / (self.len - self.indels).to_f) * 100).round(2)
+ end
+
+ # Method that returns the percentage of soft masked residues
+ # or lower cased residues in a sequence.
+ def soft_mask
+ ((self.seq.scan(/[a-z]/).size.to_f / (self.len - self.indels).to_f) * 100).round(2)
+ end
+
# Method to convert the quality scores from a specified base
# to another base.
def convert_phred2illumina!
self.replace(self.wrap(width, delimit))
end
- # Method that generates a random sequence of a given length.
- def generate(length)
- raise ArgumentError, "Cannot generate negative sequence length: #{length}." if length <= 0
-
- alph = self.residues
- Array.new(length) { alph[rand(alph.size)] }.join("")
- end
-
# Method that replaces sequence with a randomly generated sequence of a given length.
def generate!(length)
self.replace(self.generate(length))
assert(@entry.is_protein? == true)
end
- def test_Sequence_length_is_correct
+ def test_Seq_length_is_correct
@entry.seq = 'ATCG'
assert_equal(4, @entry.length)
end
+ def test_Seq_indels_is_correct
+ @entry.seq = 'ATCG.-~_'
+ assert_equal(4, @entry.indels)
+ end
+
def test_Seq_to_rna_raises_if_no_sequence
@entry.type = 'dna'
assert_raise(SeqError) { @entry.to_rna }
end
end
+ def test_Seq_composition_returns_correctly
+ @entry.seq = "AAAATTTCCG"
+ assert_equal(4, @entry.composition["A"])
+ assert_equal(3, @entry.composition["T"])
+ assert_equal(2, @entry.composition["C"])
+ assert_equal(1, @entry.composition["G"])
+ assert_equal(0, @entry.composition["X"])
+ end
+
+ def test_Seq_hard_mask_returns_correctly
+ @entry.seq = "--AAAANn"
+ assert_equal(33.33, @entry.hard_mask)
+ end
+
+ def test_Seq_soft_mask_returns_correctly
+ @entry.seq = "--AAAa"
+ assert_equal(25.00, @entry.soft_mask)
+ end
+
def test_Digest_new_raises_on_bad_pattern_residue
assert_raise(DigestError) { Digest.new(@entry, "X", 4) }
end