X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bp_bin%2Fmask_seq;h=3cd162a264aae007105932017a4a59636e19ddfb;hb=44f810b56c353bfe302469a8313b0aada285b27e;hp=fe70a64a5d554612bbcff68ef8d44cafee280092;hpb=43d1572d3b70aee8426471eaf6d786a456cb2d3e;p=biopieces.git diff --git a/bp_bin/mask_seq b/bp_bin/mask_seq index fe70a64..3cd162a 100755 --- a/bp_bin/mask_seq +++ b/bp_bin/mask_seq @@ -24,47 +24,33 @@ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -# Soft mask sequences in the stream based on Solexa/Illumina/Phred type quality scores. +# Mask sequences in the stream based on quality scores. # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< +require 'maasha/biopieces' +require 'maasha/seq' -require 'biopieces' +casts = [] +casts << {:long=>'cutoff', :short=>'c', :type=>'int', :mandatory=>false, :default=>20, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'hardmask', :short=>'h', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} + +options = Biopieces.options_parse(ARGV, casts) -# Expading class Hash with possibly evil monkey patch. -class Hash - # Soft masks sequence residues where the corresponding quality score - # is below a given cutoff. - def mask_seq!(cutoff, base) - if self.has_key? :SEQ and self.has_key? :SCORES - seq = self[:SEQ].upcase - scores = self[:SCORES] - i = 0 +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + input.each_record do |record| + if record.has_key? :SEQ + entry = Seq.new_bp(record) - scores.each_char do |score| - seq[i] = seq[i].downcase if score.ord - base < cutoff - i += 1 - end + options[:hardmask] ? entry.mask_seq_hard!(options[:cutoff]) : entry.mask_seq_soft!(options[:cutoff]) - self[:SEQ] = seq + record[:SEQ] = entry.seq end - self + output.puts record end end -casts = [] -casts << {:long=>'cutoff', :short=>'c', :type=>'int', :mandatory=>false, :default=>20, :allowed=>nil, :disallowed=>nil} -casts << {:long=>'base', :short=>'b', :type=>'uint', :mandatory=>false, :default=>64, :allowed=>"33,59,64", :disallowed=>nil} - -bp = Biopieces.new - -options = bp.parse(ARGV, casts) - -bp.each_record do |record| - bp.puts record.mask_seq!(options[:cutoff], options[:base]) -end - # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<