2 # vcf_rs_grep greps RS from a VCF file
3 # and is released under the terms of the GNU GPL version 3, or any
4 # later version, at your option. See the file README and COPYING for
6 # Copyright 2017 by Don Armstrong <don@donarmstrong.com>.
17 vcf_rs_grep - greps RS from a VCF file
21 vcf_rs_grep [options] vcf_file.gz < rs_list |gzip -c > vcf_greped.gz
24 --debug, -d debugging level (Default 0)
25 --help, -h display this help
26 --man, -m display manual
34 Debug verbosity. (Default 0)
38 Display brief usage information.
48 vcf_rs_grep vcf_file.gz < rs_list |gzip -c > vcf_greped.gz
55 my %options = (debug => 0,
62 'debug|d+','help|h|?','man|m');
64 pod2usage() if $options{help};
65 pod2usage({verbose=>2}) if $options{man};
67 $DEBUG = $options{debug};
71 push @USAGE_ERRORS,"You must provide exactly one VCF file to read";
74 pod2usage(join("\n",@USAGE_ERRORS)) if @USAGE_ERRORS;
77 sub open_compressed_file {
80 my $mode = '<:encoding(UTF-8)';
82 if ($file =~ /\.gz$/) {
83 $mode = '-|:encoding(UTF-8)';
84 push @opts,'gzip','-dc';
86 if ($file =~ /\.xz$/) {
87 $mode = '-|:encoding(UTF-8)';
88 push @opts,'xz','-dc';
90 if ($file =~ /\.bz2$/) {
91 $mode = '-|:encoding(UTF-8)';
92 push @opts,'bzip2','-dc';
94 open($fh,$mode,@opts,$file);
98 my $vcf = open_compressed_file($ARGV[0]) or
99 die "Unable to open file $ARGV[0]";
108 if (defined $options{merge}) {
109 my $merge = open_compressed_file($options{merge})
110 or die "Unable to open file $options{merge}: $!";
113 my ($old,$new,undef) = split /\t/;
114 next unless exists $rsids{'rs'.$old};
115 $merge_rsids{'rs'.$old} = 'rs'.$new;
116 $rsids{'rs'.$new} = 1;
126 $_ =~ /^\S+\s+\S+\s+(\S+)/o;
128 next unless exists $rsids{$1} and $rsids{$1};
134 for my $rsid (keys %rsids) {
135 if ($rsids{$rsid} == 1) {
136 push @unused_rsids,$rsid;
140 print STDERR "The following rsids were not found\n";
141 print STDERR map {$_."\n"} @unused_rsids;