From: martinahansen Date: Tue, 2 Nov 2010 10:40:16 +0000 (+0000) Subject: added clip_seq biopiece X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=a66f9359480be4e44b608c682c355e6a725b051c;p=biopieces.git added clip_seq biopiece git-svn-id: http://biopieces.googlecode.com/svn/trunk@1152 74ccb610-7750-0410-82ae-013aeee3265d --- diff --git a/bp_bin/clip_seq b/bp_bin/clip_seq new file mode 100755 index 0000000..4af564c --- /dev/null +++ b/bp_bin/clip_seq @@ -0,0 +1,64 @@ +#!/usr/bin/env ruby + +# Copyright (C) 2007-2010 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# This program is part of the Biopieces framework (www.biopieces.org). + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# Clip sequences in the stream based on soft masked sequence. + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +require 'biopieces' + +casts = [] + +bp = Biopieces.new + +options = bp.parse(ARGV, casts) + +bp.each_record do |record| + if record.has_key? :SEQ + trim_beg = 0 + trim_end = record[:SEQ].length + + record[:SEQ] =~ /[^a-z]/ + + trim_beg = $`.length + + record[:SEQ] =~ /[a-z]+$/ + trim_end = $`.length + + record[:SEQ] = record[:SEQ][trim_beg ... trim_end] + record[:SEQ_LEN] = record[:SEQ].length + record[:SCORES] = record[:SCORES][trim_beg ... trim_end] if record.has_key? :SCORES + end + + bp.puts record +end + + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +__END__ diff --git a/bp_test/in/clip_seq.in b/bp_test/in/clip_seq.in new file mode 100644 index 0000000..872d3fd --- /dev/null +++ b/bp_test/in/clip_seq.in @@ -0,0 +1,5 @@ +SEQ_NAME: GP8WFI101DE0H9 +SEQ: tcagTCTACGTCTCTGGACTGtaactgac +SEQ_LEN: 29 +SCORES: hhhhhhhhhhTQQOSWZ^^XMMMNNS`YY +--- diff --git a/bp_test/out/clip_seq.out.1 b/bp_test/out/clip_seq.out.1 new file mode 100644 index 0000000..bc97e94 --- /dev/null +++ b/bp_test/out/clip_seq.out.1 @@ -0,0 +1,5 @@ +SEQ_NAME: GP8WFI101DE0H9 +SEQ: TCTACGTCTCTGGACTG +SEQ_LEN: 17 +SCORES: hhhhhhTQQOSWZ^^XM +--- diff --git a/bp_test/test/test_clip_seq b/bp_test/test/test_clip_seq new file mode 100755 index 0000000..61c5709 --- /dev/null +++ b/bp_test/test/test_clip_seq @@ -0,0 +1,7 @@ +#!/bin/bash + +source "$BP_DIR/bp_test/lib/test.sh" + +run "$bp -I $in -O $tmp" +assert_no_diff $tmp $out.1 +clean