# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-# Soft mask sequences in the stream based on quality scores.
+# Mask sequences in the stream based on quality scores.
# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-
require 'maasha/biopieces'
-
-ILLUMINA_BASE = 64
-
-# Expading class Hash with possibly evil monkey patch.
-class Hash
- # Soft masks sequence residues where the corresponding quality score
- # is below a given cutoff.
- def mask_seq!(cutoff)
- if self.has_key? :SEQ and self.has_key? :SCORES
- seq = self[:SEQ].upcase
- scores = self[:SCORES]
- i = 0
-
- scores.each_char do |score|
- seq[i] = seq[i].downcase if score.ord - ILLUMINA_BASE < cutoff
- i += 1
- end
-
- self[:SEQ] = seq
- end
-
- self
- end
-end
+require 'maasha/seq'
casts = []
-casts << {:long=>'cutoff', :short=>'c', :type=>'int', :mandatory=>false, :default=>20, :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'cutoff', :short=>'c', :type=>'int', :mandatory=>false, :default=>20, :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'hardmask', :short=>'h', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
options = Biopieces.options_parse(ARGV, casts)
Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
input.each_record do |record|
- output.puts record.mask_seq!(options[:cutoff])
+ if record.has_key? :SEQ
+ entry = Seq.new_bp(record)
+
+ options[:hardmask] ? entry.mask_seq_hard!(options[:cutoff]) : entry.mask_seq_soft!(options[:cutoff])
+
+ record[:SEQ] = entry.seq
+ end
+
+ output.puts record
end
end