require 'maasha/biopieces'
require 'maasha/fastq'
-casts = []
-casts << {:long=>'data_in', :short=>'i', :type=>'files!', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
-casts << {:long=>'num', :short=>'n', :type=>'uint', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>'0'}
-casts << {:long=>'solexa', :short=>'s', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+allowed_enc = 'auto,sanger,solexa,illumina13,illumina15,illumina18'
-PHRED_SCORES = Regexp.new('[!"#$%&\'()*+,-./0123456789:]')
+casts = []
+casts << {:long=>'data_in', :short=>'i', :type=>'files!', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'num', :short=>'n', :type=>'uint', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>'0'}
+casts << {:long=>'encoding', :short=>'e', :type=>'string', :mandatory=>false, :default=>'auto', :allowed=>allowed_enc, :disallowed=>nil}
options = Biopieces.options_parse(ARGV, casts)
-num = 0
-last = false
+num = 0
+last = false
+encoding = options[:encoding]
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
unless options[:data_in].first == '-'
options[:data_in].each do |file|
Fastq.open(file, mode='r') do |fastq|
fastq.each do |entry|
- entry.convert_phred2illumina! if entry.qual.match PHRED_SCORES
- entry.convert_solexa2illumina! if options[:solexa]
+ if encoding == 'auto'
+ if entry.qual.match(/[!-:]/) # sanger or illumina18
+ encoding = 'illumina18'
+ elsif entry.qual.match(/[K-h]/) # solexa or illumina13 or illumina15
+ encoding = 'illumina13'
+ else
+ raise SeqError, "Could not auto-detect quality score encoding"
+ end
+ end
+
+ entry.convert_scores!(encoding, 'illumina13')
output.puts entry.to_bp
num += 1