#!/usr/bin/env ruby
-# Copyright (C) 2007-2011 Martin A. Hansen.
+# Copyright (C) 2007-2012 Martin A. Hansen.
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-# Trim sequence ends for residues with a low quality score.
+# Trim sequence ends removing residues with a low quality score.
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
require 'maasha/biopieces'
+require 'maasha/seq'
require 'pp'
casts = []
-casts << {:long=>'min', :short=>'m', :type=>'uint', :mandatory=>true, :default=>15, :allowed=>nil, :disallowed=>'0'}
-casts << {:long=>'trim', :short=>'t', :type=>'string', :mandatory=>true, :default=>'both', :allowed=>'left,right,both', :disallowed=>nil}
+casts << {:long=>'min_qual', :short=>'m', :type=>'uint', :mandatory=>true, :default=>20, :allowed=>nil, :disallowed=>'0'}
+casts << {:long=>'min_len', :short=>'l', :type=>'uint', :mandatory=>true, :default=>3, :allowed=>nil, :disallowed=>'0'}
+casts << {:long=>'trim', :short=>'t', :type=>'string', :mandatory=>true, :default=>'both', :allowed=>'left,right,both', :disallowed=>nil}
options = Biopieces.options_parse(ARGV, casts)
-ILLUMINA_BASE = 64
-
-regex_left = Regexp.new("^[#{(ILLUMINA_BASE).chr}-#{(ILLUMINA_BASE + options[:min]).chr}]+")
-regex_right = Regexp.new("[#{(ILLUMINA_BASE).chr}-#{(ILLUMINA_BASE + options[:min]).chr}]+$")
-
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
if record.has_key? :SEQ and record.has_key? :SCORES
+ entry = Seq.new_bp(record)
+
case options[:trim]
- when /both/
- record[:SCORES].match(regex_right) do |m|
- record[:SEQ] = record[:SEQ][0 ... record[:SEQ].length - m.to_s.length]
- record[:SCORES] = $`
- record[:SEQ_LEN] = record[:SEQ].length
- record[:TRIM_LEFT] = m.to_s.length
- end
- record[:SCORES].match(regex_left) do |m|
- record[:SEQ] = record[:SEQ][m.to_s.length ... record[:SEQ].length]
- record[:SCORES] = $'
- record[:SEQ_LEN] = record[:SEQ].length
- record[:TRIM_RIGHT] = $'.length - 1
- end
- when /left/
- record[:SCORES].match(regex_left) do |m|
- record[:SEQ] = record[:SEQ][m.to_s.length ... record[:SEQ].length]
- record[:SCORES] = $'
- record[:SEQ_LEN] = record[:SEQ].length
- record[:TRIM_LEFT] = m.to_s.length
- end
- when /right/
- record[:SCORES].match(regex_right) do |m|
- record[:SEQ] = record[:SEQ][0 ... record[:SEQ].length - m.to_s.length]
- record[:SCORES] = $`
- record[:SEQ_LEN] = record[:SEQ].length
- record[:TRIM_RIGHT] = $`.length - 1
- end
+ when /both/ then entry.quality_trim!(options[:min_qual], options[:min_len])
+ when /left/ then entry.quality_trim_left!(options[:min_qual], options[:min_len])
+ when /right/ then entry.quality_trim_right!(options[:min_qual], options[:min_len])
end
+
+ record.merge! entry.to_bp
end
output.puts record