From 036b8f518d47f646da1328205f12dcd08acec83e Mon Sep 17 00:00:00 2001
From: Heng Li <lh3@live.co.uk>
Date: Wed, 1 Sep 2010 13:26:10 +0000
Subject: [PATCH] Utilities for processing VCF

---
 bcftools/vcfutils.pl | 59 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100755 bcftools/vcfutils.pl

diff --git a/bcftools/vcfutils.pl b/bcftools/vcfutils.pl
new file mode 100755
index 0000000..bf77049
--- /dev/null
+++ b/bcftools/vcfutils.pl
@@ -0,0 +1,59 @@
+#!/usr/bin/perl -w
+
+# Author: lh3
+
+use strict;
+use warnings;
+use Getopt::Std;
+
+&main;
+exit;
+
+sub main {
+  my $version = '0.1.0';
+  &usage if (@ARGV < 1);
+  my $command = shift(@ARGV);
+  my %func = (subsam=>\&subsam);
+  die("Unknown command \"$command\".\n") if (!defined($func{$command}));
+  &{$func{$command}};
+}
+
+sub subsam {
+  die(qq/Usage: vcfutils.pl subsam <in.vcf> [samples]\n/) if (@ARGV == 0);
+  my ($fh, %h);
+  my $fn = shift(@ARGV);
+  my @col;
+  open($fh, ($fn =~ /\.gz$/)? "gzip -dc $fn |" : $fn) || die;
+  $h{$_} = 1 for (@ARGV);
+  while (<$fh>) {
+	if (/^##/) {
+	  print;
+	} elsif (/^#/) {
+	  my @t = split;
+	  my @s = @t[0..8]; # all fixed fields + FORMAT
+	  for (9 .. $#t) {
+		if ($h{$t[$_]}) {
+		  push(@s, $t[$_]);
+		  push(@col, $_);
+		}
+	  }
+	  pop(@s) if (@s == 9); # no sample selected; remove the FORMAT field
+	  print join("\t", @s), "\n";
+	} else {
+	  my @t = split;
+	  if (@col == 0) {
+		print join("\t", @t[0..7]), "\n";
+	  } else {
+		print join("\t", @t[0..8], map {$t[$_]} @col), "\n";
+	  }
+	}
+  }
+  close($fh);
+}
+
+sub usage {
+  die(qq/
+Usage:   vcfutils.pl <command> [<arguments>]\n
+Command: subsam       get a subset of samples
+\n/);
+}
-- 
2.39.5