From ed64328c7cb91daf65273e91be4bb373122798c4 Mon Sep 17 00:00:00 2001 From: martinahansen Date: Thu, 8 Dec 2011 14:39:10 +0000 Subject: [PATCH] added replace_vals biopiece git-svn-id: http://biopieces.googlecode.com/svn/trunk@1698 74ccb610-7750-0410-82ae-013aeee3265d --- bp_bin/replace_vals | 81 ++++++++++++++++++++++++++++++++++ bp_test/in/replace_vals.in.1 | 8 ++++ bp_test/in/replace_vals.in.2 | 2 + bp_test/out/replace_vals.out.1 | 8 ++++ bp_test/out/replace_vals.out.2 | 8 ++++ bp_test/out/replace_vals.out.3 | 8 ++++ bp_test/test/test_replace_vals | 15 +++++++ 7 files changed, 130 insertions(+) create mode 100755 bp_bin/replace_vals create mode 100644 bp_test/in/replace_vals.in.1 create mode 100644 bp_test/in/replace_vals.in.2 create mode 100644 bp_test/out/replace_vals.out.1 create mode 100644 bp_test/out/replace_vals.out.2 create mode 100644 bp_test/out/replace_vals.out.3 create mode 100755 bp_test/test/test_replace_vals diff --git a/bp_bin/replace_vals b/bp_bin/replace_vals new file mode 100755 index 0000000..356a633 --- /dev/null +++ b/bp_bin/replace_vals @@ -0,0 +1,81 @@ +#!/usr/bin/env ruby + +# Copyright (C) 2007-2011 Martin A. Hansen. + +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +# http://www.gnu.org/copyleft/gpl.html + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# This program is part of the Biopieces framework (www.biopieces.org). + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> DESCRIPTION <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +# Replace values to a specied key for each record in the stream. + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + +require 'maasha/biopieces' + +casts = [] +casts << {:long=>'key', :short=>'k', :type=>'string', :mandatory=>true, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'search', :short=>'s', :type=>'string', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'replace', :short=>'r', :type=>'string', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'file', :short=>'f', :type=>'file!', :mandatory=>false, :default=>nil, :allowed=>nil, :disallowed=>nil} +casts << {:long=>'search_col', :short=>'S', :type=>'uint', :mandatory=>false, :default=>1, :allowed=>nil, :disallowed=>"0"} +casts << {:long=>'replace_col', :short=>'R', :type=>'uint', :mandatory=>false, :default=>2, :allowed=>nil, :disallowed=>"0"} +casts << {:long=>'delimiter', :short=>'d', :type=>'string', :mandatory=>false, :default=>'\s+', :allowed=>nil, :disallowed=>nil} + +options = Biopieces.options_parse(ARGV, casts) +key = options[:key].to_sym +delimiter = Regexp.new(options[:delimiter]) + +raise ArgumentError, "both --search and --replace must be specified" if options[:search] and not options[:replace] +raise ArgumentError, "both --search and --replace must be specified" if options[:replace] and not options[:search] + +replace_hash = {} + +if options[:search] + replace_hash[options[:search].to_s] = options[:replace].to_s +end + +if options[:file] + File.open(options[:file], "r") do |ios| + ios.each_line do |line| + unless line[0] == '#' + fields = line.chomp.split(delimiter) + replace_hash[fields[options[:search_col] - 1]] = fields[options[:replace_col] - 1] + end + end + end +end + +Biopieces.open(options[:stream_in], options[:stream_out]) do |input, output| + input.each_record do |record| + if record.has_key? key + if replace_hash.has_key? record[key] + record[key] = replace_hash[record[key]] + end + end + + output.puts record + end +end + +# >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< + + +__END__ diff --git a/bp_test/in/replace_vals.in.1 b/bp_test/in/replace_vals.in.1 new file mode 100644 index 0000000..9cc928f --- /dev/null +++ b/bp_test/in/replace_vals.in.1 @@ -0,0 +1,8 @@ +SEQ_NAME: test1 +SEQ: AAGTGTATGAGCCCAGTCGCCCTA +SEQ_LEN: 24 +--- +SEQ_NAME: test2 +SEQ: CGGGAACCTGATCAGCTGTCTACA +SEQ_LEN: 24 +--- diff --git a/bp_test/in/replace_vals.in.2 b/bp_test/in/replace_vals.in.2 new file mode 100644 index 0000000..762f91b --- /dev/null +++ b/bp_test/in/replace_vals.in.2 @@ -0,0 +1,2 @@ +test1 foo +bar test2 diff --git a/bp_test/out/replace_vals.out.1 b/bp_test/out/replace_vals.out.1 new file mode 100644 index 0000000..f668d94 --- /dev/null +++ b/bp_test/out/replace_vals.out.1 @@ -0,0 +1,8 @@ +SEQ_NAME: foo +SEQ: AAGTGTATGAGCCCAGTCGCCCTA +SEQ_LEN: 24 +--- +SEQ_NAME: test2 +SEQ: CGGGAACCTGATCAGCTGTCTACA +SEQ_LEN: 24 +--- diff --git a/bp_test/out/replace_vals.out.2 b/bp_test/out/replace_vals.out.2 new file mode 100644 index 0000000..f668d94 --- /dev/null +++ b/bp_test/out/replace_vals.out.2 @@ -0,0 +1,8 @@ +SEQ_NAME: foo +SEQ: AAGTGTATGAGCCCAGTCGCCCTA +SEQ_LEN: 24 +--- +SEQ_NAME: test2 +SEQ: CGGGAACCTGATCAGCTGTCTACA +SEQ_LEN: 24 +--- diff --git a/bp_test/out/replace_vals.out.3 b/bp_test/out/replace_vals.out.3 new file mode 100644 index 0000000..0055caf --- /dev/null +++ b/bp_test/out/replace_vals.out.3 @@ -0,0 +1,8 @@ +SEQ_NAME: test1 +SEQ: AAGTGTATGAGCCCAGTCGCCCTA +SEQ_LEN: 24 +--- +SEQ_NAME: bar +SEQ: CGGGAACCTGATCAGCTGTCTACA +SEQ_LEN: 24 +--- diff --git a/bp_test/test/test_replace_vals b/bp_test/test/test_replace_vals new file mode 100755 index 0000000..6e94f45 --- /dev/null +++ b/bp_test/test/test_replace_vals @@ -0,0 +1,15 @@ +#!/bin/bash + +source "$BP_DIR/bp_test/lib/test.sh" + +run "$bp -I $in.1 -k SEQ_NAME -s test1 -r foo -O $tmp" +assert_no_diff $tmp $out.1 +clean + +run "$bp -I $in.1 -k SEQ_NAME -f $in.2 -O $tmp" +assert_no_diff $tmp $out.2 +clean + +run "$bp -I $in.1 -k SEQ_NAME -f $in.2 -S 2 -R 1 -O $tmp" +assert_no_diff $tmp $out.3 +clean -- 2.39.5