From: Petr Danecek Date: Tue, 5 Oct 2010 12:59:36 +0000 (+0000) Subject: Convert VCF output of "bcftools view -bgcv" to a valid VCF file X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=fdeef62cd249a30ca931a8e8f81454fa7e59b4da;p=samtools.git Convert VCF output of "bcftools view -bgcv" to a valid VCF file --- diff --git a/bcftools/bcf-fix.pl b/bcftools/bcf-fix.pl new file mode 100755 index 0000000..61c6136 --- /dev/null +++ b/bcftools/bcf-fix.pl @@ -0,0 +1,101 @@ +#!/usr/bin/perl -w + +use strict; +use warnings; +use Carp; + +my $opts = parse_params(); +bcf_fix(); + +exit; + +#-------------------------------- + +sub error +{ + my (@msg) = @_; + if ( scalar @msg ) { confess @msg; } + die + "Usage: bcftools view test.bcf | bcf-fix.pl > test.vcf\n", + "Options:\n", + " -h, -?, --help This help message.\n", + "\n"; +} + + +sub parse_params +{ + my $opts = {}; + while (my $arg=shift(@ARGV)) + { + if ( $arg eq '-?' || $arg eq '-h' || $arg eq '--help' ) { error(); } + error("Unknown parameter \"$arg\". Run -h for help.\n"); + } + return $opts; +} + +sub bcf_fix +{ + while (my $line=) + { + if ( $line=~/^#CHROM/ ) + { + print +qq[##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +]; + print $line; + } + elsif ( $line=~/^#/ ) + { + print $line; + } + else + { + my @items = split(/\t/,$line); + my @tags = split(/:/,$items[8]); # FORMAT tags + + my $nidx=2; + my @idxs; # Mapping which defines new ordering: $idxs[$inew]=$iold; GT comes first, PL second + for (my $i=0; $i<@tags; $i++) + { + if ( $tags[$i] eq 'GT' ) { $idxs[0]=$i; } + elsif ( $tags[$i] eq 'PL' ) { $idxs[1]=$i; } + else { $idxs[$nidx++]=$i; } + } + if ( !exists($tags[0]) or !exists($tags[1]) ) { error("FIXME: expected GT and PL in the format field.\n"); } + + # First fix the FORMAT column + $items[8] = 'GT:GL'; + for (my $i=2; $i<@tags; $i++) + { + $items[8] .= ':'.$tags[$idxs[$i]]; + } + + # Now all the genotype columns + for (my $iitem=9; $iitem<@items; $iitem++) + { + @tags = split(/:/,$items[$iitem]); + $items[$iitem] = $tags[$idxs[0]] .':'; + + # GL=-PL/10 + my ($a,$b,$c) = split(/,/,$tags[$idxs[1]]); + $items[$iitem] .= sprintf "%.2f,%.2f,%.2f",-$a/10.,-$b/10.,-$c/10.; + + for (my $itag=2; $itag<@tags; $itag++) + { + $items[$iitem] .= ':'.$tags[$idxs[$itag]]; + } + } + print join("\t",@items); + } + } +} +