casts = []
casts << {:long=>'data_in', :short=>'i', :type=>'files!', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
casts << {:long=>'num', :short=>'n', :type=>'uint', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>'0'}
+casts << {:long=>'solexa', :short=>'s', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
PHRED_SCORES = Regexp.new('[!"#$%&\'()*+,-./0123456789:]')
options[:data_in].each do |file|
Fastq.open(file, mode='r') do |fastq|
fastq.each do |entry|
- entry.convert! if entry.qual.match PHRED_SCORES
+ entry.convert_phred2illumina! if entry.qual.match PHRED_SCORES
+ entry.convert_solexa2illumina! if options[:solexa]
bp.puts entry.to_bp
num += 1
PROTEIN = %w[f l s y c w p h q r i m t n k v a d e g]
# Quality scores bases
-SCORE_PHRED = 33
-SCORE_SOLEXA = 64
+SCORE_PHRED = 33
+SCORE_ILLUMINA = 64
# Error class for all exceptions to do with Seq.
class SeqError < StandardError; end
# - seq_name: Name of the sequence.
# - seq: The sequence.
# - type: The sequence type - DNA, RNA, or protein
- # - qual: A Solexa type quality scores string.
+ # - qual: An Illumina type quality scores string.
def initialize(seq_name = nil, seq = nil, type = nil, qual = nil)
@seq_name = seq_name
@seq = seq
# Method to convert the quality scores from a specified base
# to another base.
- def convert!
+ def convert_phred2illumina!
self.qual.gsub! /./ do |score|
score_phred = score.ord - SCORE_PHRED
raise SeqError, "Bad Phred score: #{score} (#{score_phred})" unless (0 .. 40).include? score_phred
- score_solexa = score_phred + SCORE_SOLEXA
- score = score_solexa.chr
+ score_illumina = score_phred + SCORE_ILLUMINA
+ score = score_illumina.chr
end
end
+
+ # Method to convert the quality scores from Solexa odd/ratio to
+ # Illumina format.
+ def convert_solexa2illumina!
+ self.qual.gsub! /./ do |score|
+ score = solexa_char2illumina_char(score)
+ end
+ end
+
+ private
+
+ # Method to convert a Solexa score (odd ratio) to
+ # a phred (probability) integer score.
+ def solexa2phred(score)
+ (10.0 * Math.log(10.0 ** (score / 10.0) + 1.0, 10)).to_i
+ end
+
+ # Method to convert a Solexa score encoded using base
+ # 64 ASCII to a Phred score encoded using base 64 ASCII.
+ def solexa_char2illumina_char(char)
+ score_solexa = char.ord - 64
+ score_phred = solexa2phred(score_solexa)
+ (score_phred + 64).chr
+ end
end
# Error class for all exceptions to do with Digest.