From: martinahansen Date: Tue, 10 Apr 2012 18:08:36 +0000 (+0000) Subject: making changes to accomodate Illumina18 qual scores X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=acc00ae480f88bfd38b1ebd60cae966130c3ddeb;p=biopieces.git making changes to accomodate Illumina18 qual scores git-svn-id: http://biopieces.googlecode.com/svn/trunk@1784 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/code_ruby/lib/maasha/align.rb b/code_ruby/lib/maasha/align.rb index cdab55b..32532fe 100755 --- a/code_ruby/lib/maasha/align.rb +++ b/code_ruby/lib/maasha/align.rb @@ -223,7 +223,7 @@ class Align def na_add_entries @entries.each_with_index do |entry, i| @na_seq[true, i] = NArray.to_na(entry.seq.downcase.tr(TR_NUC, TR_HEX), "byte") - @na_qual[true, i] = NArray.to_na(entry.qual, "byte") - SCORE_ILLUMINA if @has_qual + @na_qual[true, i] = NArray.to_na(entry.qual, "byte") - SCORE_BASE if @has_qual end end @@ -336,7 +336,7 @@ class Align # Method to calculate a consensus quality from a Consensus object. def consensus_qual - (@na_qual.mean(1).round.to_type("byte") + SCORE_ILLUMINA).to_s + (@na_qual.mean(1).round.to_type("byte") + SCORE_BASE).to_s end end end diff --git a/code_ruby/lib/maasha/seq.rb b/code_ruby/lib/maasha/seq.rb index e4b0a94..2382218 100644 --- a/code_ruby/lib/maasha/seq.rb +++ b/code_ruby/lib/maasha/seq.rb @@ -36,10 +36,9 @@ PROTEIN = %w[f l s y c w p h q r i m t n k v a d e g] INDELS = %w[. - _ ~] # Quality scores bases -SCORE_PHRED = 33 -SCORE_ILLUMINA = 64 -SCORE_MIN = 0 -SCORE_MAX = 40 +SCORE_BASE = 64 +SCORE_MIN = 0 +SCORE_MAX = 40 # Error class for all exceptions to do with Seq. class SeqError < StandardError; end @@ -361,7 +360,7 @@ class Seq raise SeqError, "no quality score" if self.qual.nil? raise SeqError, "minimum value: #{min} out of range #{SCORE_MIN} .. #{SCORE_MAX}" unless (SCORE_MIN .. SCORE_MAX).include? min - regex_right = Regexp.new("[#{(SCORE_ILLUMINA).chr}-#{(SCORE_ILLUMINA + min).chr}]+$") + regex_right = Regexp.new("[#{(SCORE_BASE).chr}-#{(SCORE_BASE + min).chr}]+$") self.qual.match(regex_right) do |m| self.subseq!(0, $`.length) if $`.length > 0 @@ -375,7 +374,7 @@ class Seq raise SeqError, "no quality score" if self.qual.nil? raise SeqError, "minimum value: #{min} out of range #{SCORE_MIN} .. #{SCORE_MAX}" unless (SCORE_MIN .. SCORE_MAX).include? min - regex_left = Regexp.new("^[#{(SCORE_ILLUMINA).chr}-#{(SCORE_ILLUMINA + min).chr}]+") + regex_left = Regexp.new("^[#{(SCORE_BASE).chr}-#{(SCORE_BASE + min).chr}]+") self.qual.match(regex_left) do |m| self.subseq!(m.to_s.length, self.length - m.to_s.length) if self.length - m.to_s.length > 0 @@ -440,7 +439,7 @@ class Seq i = 0 scores.each_char do |score| - seq[i] = 'N' if score.ord - SCORE_ILLUMINA < cutoff + seq[i] = 'N' if score.ord - SCORE_BASE < cutoff i += 1 end @@ -456,7 +455,7 @@ class Seq na_seq = NArray.to_na(self.seq, "byte") na_qual = NArray.to_na(self.qual, "byte") - mask = (na_qual - SCORE_ILLUMINA) < cutoff + mask = (na_qual - SCORE_BASE) < cutoff mask *= na_seq.ne("-".ord) na_seq[mask] = 'N'.ord @@ -475,7 +474,7 @@ class Seq na_seq = NArray.to_na(self.seq, "byte") na_qual = NArray.to_na(self.qual, "byte") - mask = (na_qual - SCORE_ILLUMINA) < cutoff + mask = (na_qual - SCORE_BASE) < cutoff mask *= na_seq.ne("-".ord) na_seq[mask] ^= ' '.ord @@ -485,39 +484,38 @@ class Seq self end - # Method to convert the quality scores from a specified base - # to another base. - def convert_phred2illumina! - self.qual.gsub!(/./) do |score| - score_phred = score.ord - SCORE_PHRED - raise SeqError, "Bad Phred score: #{score} (#{score_phred})" unless (0 .. 41).include? score_phred - score_illumina = score_phred + SCORE_ILLUMINA - score = score_illumina.chr - end - end + # Method to convert quality scores inbetween formats. + # Sanger base 33, range 0-40 + # Solexa base 64, range -5-40 + # Illumina13 base 64, range 0-40 + # Illumina15 base 64, range 3-40 + # Illumina18 base 33, range 0-41 + def convert_scores!(from, to) + unless from == to + na_qual = NArray.to_na(self.qual, "byte") - # Method to convert the quality scores from Solexa odd/ratio to - # Illumina format. - def convert_solexa2illumina! - self.qual.gsub!(/./) do |score| - score = solexa_char2illumina_char(score) - end - end + case from.downcase + when "sanger" then na_qual -= 33 + when "solexa" then na_qual -= 64 + when "illumina13" then na_qual -= 64 + when "illumina15" then na_qual -= 64 + when "illumina18" then na_qual -= 33 + else raise SeqError, "unknown quality score encoding: #{from}" + end - private + case to.downcase + when "sanger" then na_qual += 33 + when "solexa" then na_qual += 64 + when "illumina13" then na_qual += 64 + when "illumina15" then na_qual += 64 + when "illumina18" then na_qual += 33 + else raise SeqError, "unknown quality score encoding: #{from}" + end - # Method to convert a Solexa score (odd ratio) to - # a phred (probability) integer score. - def solexa2phred(score) - (10.0 * Math.log(10.0 ** (score / 10.0) + 1.0, 10)).to_i - end + self.qual = na_qual.to_s + end - # Method to convert a Solexa score encoded using base - # 64 ASCII to a Phred score encoded using base 64 ASCII. - def solexa_char2illumina_char(char) - score_solexa = char.ord - 64 - score_phred = solexa2phred(score_solexa) - (score_phred + 64).chr + self end end