From fa23b4bbb0521525ce6e47fdb35cf5f589e691ad Mon Sep 17 00:00:00 2001 From: Heng Li Date: Thu, 4 Jun 2009 12:01:10 +0000 Subject: [PATCH] * samtools.pl-0.2.2 * added pileup2fq --- misc/samtools.pl | 77 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 75 insertions(+), 2 deletions(-) diff --git a/misc/samtools.pl b/misc/samtools.pl index ac5f34a..4ddc6d1 100755 --- a/misc/samtools.pl +++ b/misc/samtools.pl @@ -6,11 +6,11 @@ use strict; use warnings; use Getopt::Std; -my $version = '0.2.1'; +my $version = '0.2.2'; &usage if (@ARGV < 1); my $command = shift(@ARGV); -my %func = (snpFilter=>\&snpFilter, indelFilter=>\&indelFilter, showALEN=>\&showALEN); +my %func = (snpFilter=>\&snpFilter, indelFilter=>\&indelFilter, showALEN=>\&showALEN, pileup2fq=>\&pileup2fq); die("Unknown command \"$command\".\n") if (!defined($func{$command})); &{$func{$command}}; @@ -140,11 +140,84 @@ Options: -d INT minimum depth to call a SNP [$opts{d}] map { print $_->{L} if ($_->{F}) } @last; } +sub pileup2fq { + my %opts = (d=>3, D=>255, Q=>40, G=>50, l=>10); + getopts('d:D:Q:G:l:', \%opts); + die(qq/ +Usage: samtools.pl pileup2fq [options] + +Options: -d INT minimum depth [$opts{d}] + -D INT maximum depth [$opts{D}] + -Q INT min RMS mapQ [$opts{Q}] + -G INT minimum indel score [$opts{G}] + -l INT indel filter winsize [$opts{l}] +/) if (@ARGV == 0 && -t STDIN); + + my ($last_chr, $seq, $qual, @gaps, $last_pos); + my $_Q = $opts{Q}; + my $_d = $opts{d}; + my $_D = $opts{D}; + + $last_chr = ''; + while (<>) { + my @t = split; + if ($last_chr ne $t[0]) { + &p2q_post_process($last_chr, \$seq, \$qual, \@gaps, $opts{l}) if ($last_chr); + $last_chr = $t[0]; + $last_pos = 0; + $seq = ''; $qual = ''; + @gaps = (); + } + if ($t[1] - $last_pos != 1) { + $seq .= 'n' x ($t[1] - $last_pos - 1); + $qual .= '!' x ($t[1] - $last_pos - 1); + } + if ($t[2] eq '*') { + push(@gaps, $t[1]) if ($t[5] >= $opts{G}); + } else { + $seq .= ($t[6] >= $_Q && $t[7] >= $_d && $t[7] <= $_D)? uc($t[3]) : lc($t[3]); + my $q = $t[4] + 33; + $q = 126 if ($q > 126); + $qual .= chr($q); + } + $last_pos = $t[1]; + } + &p2q_post_process($last_chr, \$seq, \$qual, \@gaps, $opts{l}); +} + +sub p2q_post_process { + my ($chr, $seq, $qual, $gaps, $l) = @_; + &p2q_filter_gaps($seq, $gaps, $l); + print "\@$chr\n"; &p2q_print_str($seq); + print "+\n"; &p2q_print_str($qual); +} + +sub p2q_filter_gaps { + my ($seq, $gaps, $l) = @_; + for my $g (@$gaps) { + my $x = $g > $l? $g - $l : 0; + substr($$seq, $x, $l + $l) = lc(substr($$seq, $x, $l + $l)); + } +} + +sub p2q_print_str { + my ($s) = @_; + my $l = length($$s); + for (my $i = 0; $i < $l; $i += 60) { + print substr($$s, $i, 60), "\n"; + } +} + +# +# Usage +# + sub usage { die(qq/ Usage: samtools.pl []\n Command: indelFilter filter indels generated by `pileup -c' snpFilter filter SNPs generated by `pileup -c' + pileup2fq generate fastq from `pileup -c' showALEN print alignment length (ALEN) following CIGAR \n/); } -- 2.39.2