X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bp_bin%2Ftrim_seq;h=c89eefb6d6ef95c18c0f89279a6370956e7d3874;hb=48bea5c28b89dc5586d0bddb338ccd6ba23aa1f9;hp=58967e34b666e4522ba5808be6c2cd521ea8c5a0;hpb=fd0247fd7e58675684dcba60322249dd27337f45;p=biopieces.git diff --git a/bp_bin/trim_seq b/bp_bin/trim_seq index 58967e3..c89eefb 100755 --- a/bp_bin/trim_seq +++ b/bp_bin/trim_seq @@ -1,6 +1,6 @@ #!/usr/bin/env ruby -# Copyright (C) 2007-2011 Martin A. Hansen. +# Copyright (C) 2007-2012 Martin A. Hansen. # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License @@ -24,56 +24,35 @@ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -# Trim sequence ends for residues with a low quality score. +# Trim sequence ends removing residues with a low quality score. # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< require 'maasha/biopieces' +require 'maasha/seq' require 'pp' casts = [] -casts << {:long=>'min', :short=>'m', :type=>'uint', :mandatory=>true, :default=>15, :allowed=>nil, :disallowed=>'0'} -casts << {:long=>'trim', :short=>'t', :type=>'string', :mandatory=>true, :default=>'both', :allowed=>'left,right,both', :disallowed=>nil} +casts << {:long=>'min_qual', :short=>'m', :type=>'uint', :mandatory=>true, :default=>20, :allowed=>nil, :disallowed=>'0'} +casts << {:long=>'min_len', :short=>'l', :type=>'uint', :mandatory=>true, :default=>3, :allowed=>nil, :disallowed=>'0'} +casts << {:long=>'trim', :short=>'t', :type=>'string', :mandatory=>true, :default=>'both', :allowed=>'left,right,both', :disallowed=>nil} options = Biopieces.options_parse(ARGV, casts) -ILLUMINA_BASE = 64 - -regex_left = Regexp.new("^[#{(ILLUMINA_BASE).chr}-#{(ILLUMINA_BASE + options[:min]).chr}]+") -regex_right = Regexp.new("[#{(ILLUMINA_BASE).chr}-#{(ILLUMINA_BASE + options[:min]).chr}]+$") +trim = options[:trim].to_sym Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| input.each_record do |record| - if record.has_key? :SEQ and record.has_key? :SCORES - case options[:trim] - when /both/ - record[:SCORES].match(regex_right) do |m| - record[:SEQ] = record[:SEQ][0 ... record[:SEQ].length - m.to_s.length] - record[:SCORES] = $` - record[:SEQ_LEN] = record[:SEQ].length - record[:TRIM_LEFT] = m.to_s.length - end - record[:SCORES].match(regex_left) do |m| - record[:SEQ] = record[:SEQ][m.to_s.length ... record[:SEQ].length] - record[:SCORES] = $' - record[:SEQ_LEN] = record[:SEQ].length - record[:TRIM_RIGHT] = $'.length - 1 - end - when /left/ - record[:SCORES].match(regex_left) do |m| - record[:SEQ] = record[:SEQ][m.to_s.length ... record[:SEQ].length] - record[:SCORES] = $' - record[:SEQ_LEN] = record[:SEQ].length - record[:TRIM_LEFT] = m.to_s.length - end - when /right/ - record[:SCORES].match(regex_right) do |m| - record[:SEQ] = record[:SEQ][0 ... record[:SEQ].length - m.to_s.length] - record[:SCORES] = $` - record[:SEQ_LEN] = record[:SEQ].length - record[:TRIM_RIGHT] = $`.length - 1 - end + if record[:SEQ] and record[:SCORES] + entry = Seq.new_bp(record) + + case trim + when :both then entry.quality_trim!(options[:min_qual], options[:min_len]) + when :left then entry.quality_trim_left!(options[:min_qual], options[:min_len]) + when :right then entry.quality_trim_right!(options[:min_qual], options[:min_len]) end + + record.merge! entry.to_bp end output.puts record