X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=bp_bin%2Fmask_seq;h=d9dd01a71e1e5b1019bf1dcf24634aefc51bb430;hb=48bea5c28b89dc5586d0bddb338ccd6ba23aa1f9;hp=011be4da735e79d6c2765613400b11cfc462f513;hpb=c4f14c511655d92281b6d70363de57b77a9b6045;p=biopieces.git diff --git a/bp_bin/mask_seq b/bp_bin/mask_seq index 011be4d..d9dd01a 100755 --- a/bp_bin/mask_seq +++ b/bp_bin/mask_seq @@ -24,45 +24,30 @@ # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< -# Soft mask sequences in the stream based on quality scores. +# Mask sequences in the stream based on quality scores. # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< - require 'maasha/biopieces' - -ILLUMINA_BASE = 64 - -# Expading class Hash with possibly evil monkey patch. -class Hash - # Soft masks sequence residues where the corresponding quality score - # is below a given cutoff. - def mask_seq!(cutoff) - if self.has_key? :SEQ and self.has_key? :SCORES - seq = self[:SEQ].upcase - scores = self[:SCORES] - i = 0 - - scores.each_char do |score| - seq[i] = seq[i].downcase if score.ord - ILLUMINA_BASE < cutoff - i += 1 - end - - self[:SEQ] = seq - end - - self - end -end +require 'maasha/seq' casts = [] -casts << {:long=>'cutoff', :short=>'c', :type=>'int', :mandatory=>false, :default=>20, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'cutoff', :short=>'c', :type=>'int', :mandatory=>false, :default=>20, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'hardmask', :short=>'h', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} options = Biopieces.options_parse(ARGV, casts) Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| input.each_record do |record| - output.puts record.mask_seq!(options[:cutoff]) + if record[:SEQ] + entry = Seq.new_bp(record) + + options[:hardmask] ? entry.mask_seq_hard!(options[:cutoff]) : entry.mask_seq_soft!(options[:cutoff]) + + record[:SEQ] = entry.seq + end + + output.puts record end end