#!/usr/bin/env ruby
-# Copyright (C) 2007-2010 Martin A. Hansen.
+# Copyright (C) 2007-2013 Martin A. Hansen.
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
require 'maasha/biopieces'
require 'maasha/fastq'
-casts = []
-casts << {:long=>'data_in', :short=>'i', :type=>'files!', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
-casts << {:long=>'num', :short=>'n', :type=>'uint', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>'0'}
-casts << {:long=>'solexa', :short=>'s', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+allowed_enc = 'auto,base_33,base_64'
-PHRED_SCORES = Regexp.new('[!"#$%&\'()*+,-./0123456789:]')
+casts = []
+casts << {:long=>'data_in', :short=>'i', :type=>'files!', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'num', :short=>'n', :type=>'uint', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>'0'}
+casts << {:long=>'encoding', :short=>'e', :type=>'string', :mandatory=>false, :default=>'auto', :allowed=>allowed_enc, :disallowed=>nil}
-bp = Biopieces.new
+options = Biopieces.options_parse(ARGV, casts)
-options = bp.parse(ARGV, casts)
-
-bp.each_record do |record|
- bp.puts record
-end
+MAX_TEST = 1_000
num = 0
last = false
-if options.has_key? :data_in
- options[:data_in].each do |file|
- Fastq.open(file, mode='r') do |fastq|
- fastq.each do |entry|
- entry.convert_phred2illumina! if entry.qual.match PHRED_SCORES
- entry.convert_solexa2illumina! if options[:solexa]
- bp.puts entry.to_bp
- num += 1
-
- if options.has_key? :num and options[:num] == num
- last = true
- break
+Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
+ unless options[:data_in] and options[:data_in].first == '-'
+ input.each_record do |record|
+ output.puts record
+ end
+ end
+
+ if options[:data_in]
+ options[:data_in].each do |file|
+ encoding = options[:encoding].downcase.to_sym
+
+ Fastq.open(file, mode='r') do |fastq|
+ fastq.each do |entry|
+ if encoding == :auto
+ if entry.qual_base33?
+ encoding = :base_33
+ elsif entry.qual_base64?
+ encoding = :base_64
+ else
+ raise SeqError, "Could not auto-detect quality score encoding"
+ end
+ end
+
+ entry.qual_convert!(encoding, :base_33)
+ entry.qual_coerce!(:base_33)
+
+ if num < MAX_TEST
+ raise SeqError, "Quality score outside valid range" unless entry.qual_valid?(:base_33)
+ end
+
+ output.puts entry.to_bp
+ num += 1
+
+ if options[:num] == num
+ last = true
+ break
+ end
end
end
- end
- break if last
+ break if last
+ end
end
end