PROTEIN = %w[f l s y c w p h q r i m t n k v a d e g]
# Quality scores bases
-SCORE_PHRED = 33
-SCORE_SOLEXA = 64
+SCORE_PHRED = 33
+SCORE_ILLUMINA = 64
# Error class for all exceptions to do with Seq.
class SeqError < StandardError; end
# - seq_name: Name of the sequence.
# - seq: The sequence.
# - type: The sequence type - DNA, RNA, or protein
- # - qual: A Solexa type quality scores string.
+ # - qual: An Illumina type quality scores string.
def initialize(seq_name = nil, seq = nil, type = nil, qual = nil)
@seq_name = seq_name
@seq = seq
# Method to convert the quality scores from a specified base
# to another base.
- def convert!
+ def convert_phred2illumina!
self.qual.gsub! /./ do |score|
score_phred = score.ord - SCORE_PHRED
raise SeqError, "Bad Phred score: #{score} (#{score_phred})" unless (0 .. 40).include? score_phred
- score_solexa = score_phred + SCORE_SOLEXA
- score = score_solexa.chr
+ score_illumina = score_phred + SCORE_ILLUMINA
+ score = score_illumina.chr
end
end
+
+ # Method to convert the quality scores from Solexa odd/ratio to
+ # Illumina format.
+ def convert_solexa2illumina!
+ self.qual.gsub! /./ do |score|
+ score = solexa_char2illumina_char(score)
+ end
+ end
+
+ private
+
+ # Method to convert a Solexa score (odd ratio) to
+ # a phred (probability) integer score.
+ def solexa2phred(score)
+ (10.0 * Math.log(10.0 ** (score / 10.0) + 1.0, 10)).to_i
+ end
+
+ # Method to convert a Solexa score encoded using base
+ # 64 ASCII to a Phred score encoded using base 64 ASCII.
+ def solexa_char2illumina_char(char)
+ score_solexa = char.ord - 64
+ score_phred = solexa2phred(score_solexa)
+ (score_phred + 64).chr
+ end
end
# Error class for all exceptions to do with Digest.