From: martinahansen Date: Wed, 1 Sep 2010 13:35:30 +0000 (+0000) Subject: added mask_seq biopiece X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=43d1572d3b70aee8426471eaf6d786a456cb2d3e;p=biopieces.git added mask_seq biopiece git-svn-id: http://biopieces.googlecode.com/svn/trunk@1071 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/bp_bin/mask_seq b/bp_bin/mask_seq new file mode 100755 index 0000000..fe70a64 --- /dev/null +++ b/bp_bin/mask_seq @@ -0,0 +1,72 @@ +#!/usr/bin/env ruby + +# Copyright (C) 2007-2010 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# This program is part of the Biopieces framework (www.biopieces.org). + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# Soft mask sequences in the stream based on Solexa/Illumina/Phred type quality scores. + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +require 'biopieces' + +# Expading class Hash with possibly evil monkey patch. +class Hash + # Soft masks sequence residues where the corresponding quality score + # is below a given cutoff. + def mask_seq!(cutoff, base) + if self.has_key? :SEQ and self.has_key? :SCORES + seq = self[:SEQ].upcase + scores = self[:SCORES] + i = 0 + + scores.each_char do |score| + seq[i] = seq[i].downcase if score.ord - base < cutoff + i += 1 + end + + self[:SEQ] = seq + end + + self + end +end + +casts = [] +casts << {:long=>'cutoff', :short=>'c', :type=>'int', :mandatory=>false, :default=>20, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'base', :short=>'b', :type=>'uint', :mandatory=>false, :default=>64, :allowed=>"33,59,64", :disallowed=>nil} + +bp = Biopieces.new + +options = bp.parse(ARGV, casts) + +bp.each_record do |record| + bp.puts record.mask_seq!(options[:cutoff], options[:base]) +end + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +__END__