From c50a8848af62038cbde81ae901e8e18cf35f67c7 Mon Sep 17 00:00:00 2001
From: martinahansen <martinahansen@74ccb610-7750-0410-82ae-013aeee3265d>
Date: Tue, 10 Apr 2012 19:04:36 +0000
Subject: [PATCH] upgraded read_fastq to handle illumina1.8 better

git-svn-id: http://biopieces.googlecode.com/svn/trunk@1787 74ccb610-7750-0410-82ae-013aeee3265d
---
 bp_bin/read_fastq | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/bp_bin/read_fastq b/bp_bin/read_fastq
index 8fec43b..4a94957 100755
--- a/bp_bin/read_fastq
+++ b/bp_bin/read_fastq
@@ -31,17 +31,18 @@
 require 'maasha/biopieces'
 require 'maasha/fastq'
 
-casts = []
-casts << {:long=>'data_in', :short=>'i', :type=>'files!', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
-casts << {:long=>'num',     :short=>'n', :type=>'uint',   :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>'0'}
-casts << {:long=>'solexa',  :short=>'s', :type=>'flag',   :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+allowed_enc = 'auto,sanger,solexa,illumina13,illumina15,illumina18'
 
-PHRED_SCORES = Regexp.new('[!"#$%&\'()*+,-./0123456789:]')
+casts = []
+casts << {:long=>'data_in',  :short=>'i', :type=>'files!', :mandatory=>false, :default=>nil,    :allowed=>nil,         :disallowed=>nil}
+casts << {:long=>'num',      :short=>'n', :type=>'uint',   :mandatory=>false, :default=>nil,    :allowed=>nil,         :disallowed=>'0'}
+casts << {:long=>'encoding', :short=>'e', :type=>'string', :mandatory=>false, :default=>'auto', :allowed=>allowed_enc, :disallowed=>nil}
 
 options = Biopieces.options_parse(ARGV, casts)
 
-num  = 0
-last = false
+num      = 0
+last     = false
+encoding = options[:encoding]
 
 Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
   unless options[:data_in].first == '-'
@@ -54,8 +55,17 @@ Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
     options[:data_in].each do |file|
       Fastq.open(file, mode='r') do |fastq|
         fastq.each do |entry|
-          entry.convert_phred2illumina!  if entry.qual.match PHRED_SCORES
-          entry.convert_solexa2illumina! if options[:solexa]
+          if encoding == 'auto'
+            if entry.qual.match(/[!-:]/) # sanger or illumina18
+              encoding = 'illumina18'
+            elsif entry.qual.match(/[K-h]/) # solexa or illumina13 or illumina15
+              encoding = 'illumina13'
+            else
+              raise SeqError, "Could not auto-detect quality score encoding"
+            end
+          end
+
+          entry.convert_scores!(encoding, 'illumina13')
           output.puts entry.to_bp
           num += 1
 
-- 
2.39.5