From b2cec0d7e409c133c2af442c861470a6d383b502 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Tue, 12 Oct 2010 14:31:38 +0000 Subject: [PATCH] added solexa switch to read_fastq git-svn-id: http://biopieces.googlecode.com/svn/trunk@1110 74ccb610-7750-0410-82ae-013aeee3265d --- bp_bin/read_fastq | 4 +++- bp_test/out/read_fastq.out.3 | 5 +++++ bp_test/test/test_read_fastq | 4 ++++ code_ruby/Maasha/lib/seq.rb | 36 ++++++++++++++++++++++++++++++------ 4 files changed, 42 insertions(+), 7 deletions(-) create mode 100644 bp_test/out/read_fastq.out.3 diff --git a/bp_bin/read_fastq b/bp_bin/read_fastq index cb114ff..fd88bf0 100755 --- a/bp_bin/read_fastq +++ b/bp_bin/read_fastq @@ -34,6 +34,7 @@ require 'fastq' casts = [] casts << {:long=>'data_in', :short=>'i', :type=>'files!', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} casts << {:long=>'num', :short=>'n', :type=>'uint', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>'0'} +casts << {:long=>'solexa', :short=>'s', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} PHRED_SCORES = Regexp.new('[!"#$%&\'()*+,-./0123456789:]') @@ -52,7 +53,8 @@ if options.has_key? :data_in options[:data_in].each do |file| Fastq.open(file, mode='r') do |fastq| fastq.each do |entry| - entry.convert! if entry.qual.match PHRED_SCORES + entry.convert_phred2illumina! if entry.qual.match PHRED_SCORES + entry.convert_solexa2illumina! if options[:solexa] bp.puts entry.to_bp num += 1 diff --git a/bp_test/out/read_fastq.out.3 b/bp_test/out/read_fastq.out.3 new file mode 100644 index 0000000..ae6f2a8 --- /dev/null +++ b/bp_test/out/read_fastq.out.3 @@ -0,0 +1,5 @@ +SEQ_NAME: sanger +SEQ: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +SEQ_LEN: 41 +SCORES: CCDDEFFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh +--- diff --git a/bp_test/test/test_read_fastq b/bp_test/test/test_read_fastq index 82b220c..0e3f3df 100755 --- a/bp_test/test/test_read_fastq +++ b/bp_test/test/test_read_fastq @@ -13,3 +13,7 @@ clean run "$bp -i $in -n 1 -O $tmp" assert_no_diff $tmp $out.2 clean + +run "$bp -i $in -n 1 -s -O $tmp" +assert_no_diff $tmp $out.3 +clean diff --git a/code_ruby/Maasha/lib/seq.rb b/code_ruby/Maasha/lib/seq.rb index b7cc44b..52f7e44 100644 --- a/code_ruby/Maasha/lib/seq.rb +++ b/code_ruby/Maasha/lib/seq.rb @@ -4,8 +4,8 @@ RNA = %w[a u c g] PROTEIN = %w[f l s y c w p h q r i m t n k v a d e g] # Quality scores bases -SCORE_PHRED = 33 -SCORE_SOLEXA = 64 +SCORE_PHRED = 33 +SCORE_ILLUMINA = 64 # Error class for all exceptions to do with Seq. class SeqError < StandardError; end @@ -17,7 +17,7 @@ class Seq # - seq_name: Name of the sequence. # - seq: The sequence. # - type: The sequence type - DNA, RNA, or protein - # - qual: A Solexa type quality scores string. + # - qual: An Illumina type quality scores string. def initialize(seq_name = nil, seq = nil, type = nil, qual = nil) @seq_name = seq_name @seq = seq @@ -143,14 +143,38 @@ class Seq # Method to convert the quality scores from a specified base # to another base. - def convert! + def convert_phred2illumina! self.qual.gsub! /./ do |score| score_phred = score.ord - SCORE_PHRED raise SeqError, "Bad Phred score: #{score} (#{score_phred})" unless (0 .. 40).include? score_phred - score_solexa = score_phred + SCORE_SOLEXA - score = score_solexa.chr + score_illumina = score_phred + SCORE_ILLUMINA + score = score_illumina.chr end end + + # Method to convert the quality scores from Solexa odd/ratio to + # Illumina format. + def convert_solexa2illumina! + self.qual.gsub! /./ do |score| + score = solexa_char2illumina_char(score) + end + end + + private + + # Method to convert a Solexa score (odd ratio) to + # a phred (probability) integer score. + def solexa2phred(score) + (10.0 * Math.log(10.0 ** (score / 10.0) + 1.0, 10)).to_i + end + + # Method to convert a Solexa score encoded using base + # 64 ASCII to a Phred score encoded using base 64 ASCII. + def solexa_char2illumina_char(char) + score_solexa = char.ord - 64 + score_phred = solexa2phred(score_solexa) + (score_phred + 64).chr + end end # Error class for all exceptions to do with Digest. -- 2.39.2