From 73807809b5938fd4961e5e14ab181829691c17b8 Mon Sep 17 00:00:00 2001 From: Don Armstrong Date: Mon, 27 Feb 2017 08:15:40 -0800 Subject: [PATCH] add VCF rs grep --- vcf_rs_grep | 117 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100755 vcf_rs_grep diff --git a/vcf_rs_grep b/vcf_rs_grep new file mode 100755 index 0000000..8021b03 --- /dev/null +++ b/vcf_rs_grep @@ -0,0 +1,117 @@ +#!/usr/bin/perl +# vcf_rs_grep greps RS from a VCF file +# and is released under the terms of the GNU GPL version 3, or any +# later version, at your option. See the file README and COPYING for +# more information. +# Copyright 2017 by Don Armstrong . + + +use warnings; +use strict; + +use Getopt::Long; +use Pod::Usage; + +=head1 NAME + +vcf_rs_grep - greps RS from a VCF file + +=head1 SYNOPSIS + +vcf_rs_grep [options] vcf_file.gz < rs_list |gzip -c > vcf_greped.gz + + Options: + --debug, -d debugging level (Default 0) + --help, -h display this help + --man, -m display manual + +=head1 OPTIONS + +=over + +=item B<--debug, -d> + +Debug verbosity. (Default 0) + +=item B<--help, -h> + +Display brief usage information. + +=item B<--man, -m> + +Display this manual. + +=back + +=head1 EXAMPLES + +vcf_rs_grep vcf_file.gz < rs_list |gzip -c > vcf_greped.gz + +=cut + + +use vars qw($DEBUG); + +my %options = (debug => 0, + help => 0, + man => 0, + ); + +GetOptions(\%options, + 'debug|d+','help|h|?','man|m'); + +pod2usage() if $options{help}; +pod2usage({verbose=>2}) if $options{man}; + +$DEBUG = $options{debug}; + +my @USAGE_ERRORS; +if (@ARGV!=1) { + push @USAGE_ERRORS,"You must provide exactly one VCF file to read"; +} + +pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS; + + +sub open_compressed_file { + my ($file) = @_; + my $fh; + my $mode = '<:encoding(UTF-8)'; + my @opts; + if ($file =~ /\.gz$/) { + $mode = '-|:encoding(UTF-8)'; + push @opts,'gzip','-dc'; + } + if ($file =~ /\.xz$/) { + $mode = '-|:encoding(UTF-8)'; + push @opts,'xz','-dc'; + } + if ($file =~ /\.bz2$/) { + $mode = '-|:encoding(UTF-8)'; + push @opts,'bzip2','-dc'; + } + open($fh,$mode,@opts,$file); + return $fh; +} + +my $vcf = open_compressed_file($ARGV[0]) or + die "Unable to open file $ARGV[0]"; + +my %rsids; +while () { + chomp; + $rsids{$_} = 1; +} + +while (<$vcf>) { + if (/^#/o) { + print $_; + next; + } + $_ =~ /^\S+\s+\S+\s+(\S+)/o; + next unless $1; + next unless exists $rsids{$1} and $rsids{$1}; + print $_; +} + +__END__ -- 2.39.2