]> git.donarmstrong.com Git - biopieces.git/blobdiff - bp_bin/trim_seq
refactoring of assemble_pairs
[biopieces.git] / bp_bin / trim_seq
index 58967e34b666e4522ba5808be6c2cd521ea8c5a0..c89eefb6d6ef95c18c0f89279a6370956e7d3874 100755 (executable)
@@ -1,6 +1,6 @@
 #!/usr/bin/env ruby
 
-# Copyright (C) 2007-2011 Martin A. Hansen.
+# Copyright (C) 2007-2012 Martin A. Hansen.
 
 # This program is free software; you can redistribute it and/or
 # modify it under the terms of the GNU General Public License
 
 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
 
-# Trim sequence ends for residues with a low quality score.
+# Trim sequence ends removing residues with a low quality score.
 
 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
 
 require 'maasha/biopieces'
+require 'maasha/seq'
 require 'pp'
 
 casts = []
-casts << {:long=>'min',  :short=>'m', :type=>'uint',   :mandatory=>true, :default=>15,     :allowed=>nil, :disallowed=>'0'}
-casts << {:long=>'trim', :short=>'t', :type=>'string', :mandatory=>true, :default=>'both', :allowed=>'left,right,both', :disallowed=>nil}
+casts << {:long=>'min_qual', :short=>'m', :type=>'uint',   :mandatory=>true, :default=>20,     :allowed=>nil,               :disallowed=>'0'}
+casts << {:long=>'min_len',  :short=>'l', :type=>'uint',   :mandatory=>true, :default=>3,      :allowed=>nil,               :disallowed=>'0'}
+casts << {:long=>'trim',     :short=>'t', :type=>'string', :mandatory=>true, :default=>'both', :allowed=>'left,right,both', :disallowed=>nil}
 
 options = Biopieces.options_parse(ARGV, casts)
 
-ILLUMINA_BASE = 64
-
-regex_left  = Regexp.new("^[#{(ILLUMINA_BASE).chr}-#{(ILLUMINA_BASE + options[:min]).chr}]+")
-regex_right = Regexp.new("[#{(ILLUMINA_BASE).chr}-#{(ILLUMINA_BASE + options[:min]).chr}]+$")
+trim = options[:trim].to_sym
 
 Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
   input.each_record do |record|
-    if record.has_key? :SEQ and record.has_key? :SCORES
-      case options[:trim]
-      when /both/
-        record[:SCORES].match(regex_right) do |m|
-          record[:SEQ]       = record[:SEQ][0 ... record[:SEQ].length - m.to_s.length]
-          record[:SCORES]    = $`
-          record[:SEQ_LEN]   = record[:SEQ].length
-          record[:TRIM_LEFT] = m.to_s.length
-        end
-        record[:SCORES].match(regex_left) do |m|
-          record[:SEQ]        = record[:SEQ][m.to_s.length ... record[:SEQ].length]
-          record[:SCORES]     = $'
-          record[:SEQ_LEN]    = record[:SEQ].length
-          record[:TRIM_RIGHT] = $'.length - 1
-        end
-      when /left/
-        record[:SCORES].match(regex_left) do |m|
-          record[:SEQ]       = record[:SEQ][m.to_s.length ... record[:SEQ].length]
-          record[:SCORES]    = $'
-          record[:SEQ_LEN]   = record[:SEQ].length
-          record[:TRIM_LEFT] = m.to_s.length
-        end
-      when /right/
-        record[:SCORES].match(regex_right) do |m|
-          record[:SEQ]        = record[:SEQ][0 ... record[:SEQ].length - m.to_s.length]
-          record[:SCORES]     = $`
-          record[:SEQ_LEN]    = record[:SEQ].length
-          record[:TRIM_RIGHT] = $`.length - 1
-        end
+    if record[:SEQ] and record[:SCORES]
+      entry = Seq.new_bp(record)
+
+      case trim
+      when :both  then entry.quality_trim!(options[:min_qual], options[:min_len])
+      when :left  then entry.quality_trim_left!(options[:min_qual], options[:min_len])
+      when :right then entry.quality_trim_right!(options[:min_qual], options[:min_len])
       end
+
+      record.merge! entry.to_bp
     end
 
     output.puts record