X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=misc%2Fsoap2sam.pl;h=b37135e811c6300d734488ae5a112f421bb920b5;hb=9f118264ea012adc21a46d7c03eaad4f9ce7d4d4;hp=0f99987940345887dd5b63979c55ee92c069d48a;hpb=38c2703b74ff9a045933206123c86c4e8f999d14;p=samtools.git diff --git a/misc/soap2sam.pl b/misc/soap2sam.pl index 0f99987..b37135e 100755 --- a/misc/soap2sam.pl +++ b/misc/soap2sam.pl @@ -1,7 +1,7 @@ #!/usr/bin/perl -w # Contact: lh3 -# Version: 0.1.0 +# Version: 0.1.1 use strict; use warnings; @@ -36,14 +36,15 @@ sub mating { sub soap2sam { my %opts = (); getopts("p", \%opts); - die("Usage: soap2sam.pl [-p] \n") if (@ARGV == 0); + die("Usage: soap2sam.pl [-p] \n") if (@ARGV == 0 && -t STDIN); my $is_paired = defined($opts{p}); # core loop my @s1 = (); my @s2 = (); my ($s_last, $s_curr) = (\@s1, \@s2); while (<>) { - &soap2sam_aux($_, $s_curr, $is_paired); + s/[\177-\377]|[\000-\010]|[\012-\040]//g; + next if (&soap2sam_aux($_, $s_curr, $is_paired) < 0); if (@$s_last != 0 && $s_last->[0] eq $s_curr->[0]) { &mating($s_last, $s_curr); print join("\t", @$s_last), "\n"; @@ -60,8 +61,11 @@ sub soap2sam { sub soap2sam_aux { my ($line, $s, $is_paired) = @_; chomp($line); - my @t = split("\t", $line); + my @t = split(/\s+/, $line); + return -1 if (@t < 9 || $line =~ /^\s/ || !$t[0]); @$s = (); + # fix SOAP-2.1.x bugs + @t = @t[0..2,4..$#t] unless ($t[3] =~ /^\d+$/); # read name $s->[0] = $t[0]; $s->[0] =~ s/\/[12]$//g; @@ -70,7 +74,8 @@ sub soap2sam_aux { $s->[1] |= 1 | 1<<($t[4] eq 'a'? 6 : 7); $s->[1] |= 2 if ($is_paired); # read & quality - $s->[9] = $t[1]; $s->[10] = $t[2]; + $s->[9] = $t[1]; + $s->[10] = (length($t[2]) > length($t[1]))? substr($t[2], 0, length($t[1])) : $t[2]; # cigar $s->[5] = length($s->[9]) . "M"; # coor @@ -100,4 +105,5 @@ sub soap2sam_aux { $md = length($t[1]); } push(@$s, "MD:Z:$md"); + return 0; }