]> git.donarmstrong.com Git - biopieces.git/blobdiff - bp_bin/mask_seq
adding bzip2 support in ruby
[biopieces.git] / bp_bin / mask_seq
index f29b68f95ae0fe3f84f01e7395916e7e2cecd43d..d9dd01a71e1e5b1019bf1dcf24634aefc51bb430 100755 (executable)
 
 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
 
-# Soft mask sequences in the stream based on quality scores.
+# Mask sequences in the stream based on quality scores.
 
 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
 
-
 require 'maasha/biopieces'
+require 'maasha/seq'
+
+casts = []
+casts << {:long=>'cutoff',   :short=>'c', :type=>'int',  :mandatory=>false, :default=>20,  :allowed=>nil, :disallowed=>nil}
+casts << {:long=>'hardmask', :short=>'h', :type=>'flag', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil}
 
-ILLUMINA_BASE = 64
+options = Biopieces.options_parse(ARGV, casts)
 
-# Expading class Hash with possibly evil monkey patch.
-class Hash
-  # Soft masks sequence residues where the corresponding quality score
-  # is below a given cutoff.
-  def mask_seq!(cutoff)
-    if self.has_key? :SEQ and self.has_key? :SCORES
-      seq    = self[:SEQ].upcase
-      scores = self[:SCORES]
-      i      = 0
+Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output|
+  input.each_record do |record|
+    if record[:SEQ]
+      entry = Seq.new_bp(record)
 
-      scores.each_char do |score|
-        seq[i] = seq[i].downcase if score.ord - ILLUMINA_BASE < cutoff
-        i += 1
-      end
+      options[:hardmask] ? entry.mask_seq_hard!(options[:cutoff]) : entry.mask_seq_soft!(options[:cutoff])
 
-      self[:SEQ] = seq
+      record[:SEQ] = entry.seq
     end
 
-    self
+    output.puts record
   end
 end
 
-casts = []
-casts << {:long=>'cutoff', :short=>'c', :type=>'int', :mandatory=>false, :default=>20, :allowed=>nil, :disallowed=>nil}
-
-bp = Biopieces.new
-
-options = bp.parse(ARGV, casts)
-
-bp.each_record do |record|
-  bp.puts record.mask_seq!(options[:cutoff])
-end
-
 
 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<