From: Heng Li Date: Thu, 26 Feb 2009 14:53:03 +0000 (+0000) Subject: * wgsim-0.2.1 X-Git-Url: https://git.donarmstrong.com/?a=commitdiff_plain;h=b24678a4c94649a6b089d32c8cef1c682ac42a1f;p=samtools.git * wgsim-0.2.1 * fixed a bug about color read coordinates * fixed a bug in read names * wgsim_eval.pl-0.1.3 * make the script work with color reads --- diff --git a/ChangeLog b/ChangeLog index 31ec757..887cc60 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,97 @@ +------------------------------------------------------------------------ +r129 | lh3lh3 | 2009-02-18 22:23:27 +0000 (Wed, 18 Feb 2009) | 3 lines +Changed paths: + M /trunk/samtools/bam_index.c + M /trunk/samtools/bamtk.c + + * samtools-0.1.2-9 + * fixed a bug in bam_fetch, caused by completely contained adjacent chunks + +------------------------------------------------------------------------ +r128 | bhandsaker | 2009-02-18 19:06:57 +0000 (Wed, 18 Feb 2009) | 2 lines +Changed paths: + M /trunk/samtools/bamtk.c + +Fix annoying segv when invalid region specified. + +------------------------------------------------------------------------ +r127 | lh3lh3 | 2009-02-17 10:49:55 +0000 (Tue, 17 Feb 2009) | 2 lines +Changed paths: + D /trunk/samtools/misc/indel_filter.pl + A /trunk/samtools/misc/samtools.pl + + * move indel_filter.pl to samtools.pl + +------------------------------------------------------------------------ +r126 | lh3lh3 | 2009-02-14 21:22:30 +0000 (Sat, 14 Feb 2009) | 3 lines +Changed paths: + M /trunk/samtools/bam_mate.c + M /trunk/samtools/bamtk.c + + * samtools-0.1.2-7 + * fixed a bug in fixmate: SE reads are flagged as BAM_FMUNMAP + +------------------------------------------------------------------------ +r125 | lh3lh3 | 2009-02-13 09:54:45 +0000 (Fri, 13 Feb 2009) | 3 lines +Changed paths: + M /trunk/samtools/bam_stat.c + M /trunk/samtools/bamtk.c + + * samtools-0.1.2-7 + * fixed a minor bug in flagstat + +------------------------------------------------------------------------ +r124 | lh3lh3 | 2009-02-12 11:15:32 +0000 (Thu, 12 Feb 2009) | 3 lines +Changed paths: + M /trunk/samtools/bam_maqcns.c + M /trunk/samtools/bamtk.c + M /trunk/samtools/misc/indel_filter.pl + + * samtools-0.1.2-6 + * improve indel caller by setting maximum window size + +------------------------------------------------------------------------ +r123 | lh3lh3 | 2009-02-12 10:30:29 +0000 (Thu, 12 Feb 2009) | 2 lines +Changed paths: + M /trunk/samtools/bam_plcmd.c + M /trunk/samtools/bamtk.c + + * output max mapping quality in indel line + +------------------------------------------------------------------------ +r122 | lh3lh3 | 2009-02-11 10:59:10 +0000 (Wed, 11 Feb 2009) | 2 lines +Changed paths: + M /trunk/samtools/misc/maq2sam.c + +fixed a bug in generating tag AM + +------------------------------------------------------------------------ +r121 | lh3lh3 | 2009-02-03 10:43:11 +0000 (Tue, 03 Feb 2009) | 2 lines +Changed paths: + M /trunk/samtools/bam_index.c + M /trunk/samtools/bamtk.c + +fixed a potential memory problem in indexing + +------------------------------------------------------------------------ +r120 | bhandsaker | 2009-02-02 15:52:52 +0000 (Mon, 02 Feb 2009) | 2 lines +Changed paths: + M /trunk/samtools/Makefile + +Pass LIBS to recursive targets to facilitate building at Broad. + +------------------------------------------------------------------------ +r119 | lh3lh3 | 2009-02-02 10:12:15 +0000 (Mon, 02 Feb 2009) | 4 lines +Changed paths: + M /trunk/samtools/ChangeLog + M /trunk/samtools/bam_plcmd.c + M /trunk/samtools/bam_stat.c + M /trunk/samtools/bamtk.c + + * samtools-0.1.2-3 + * fixed a bug in generating GLFv2 for indels + * improve flagstat report a little bit + ------------------------------------------------------------------------ r118 | lh3lh3 | 2009-01-29 12:33:23 +0000 (Thu, 29 Jan 2009) | 3 lines Changed paths: diff --git a/misc/wgsim.c b/misc/wgsim.c index 1ae7572..46c9ff9 100644 --- a/misc/wgsim.c +++ b/misc/wgsim.c @@ -39,7 +39,7 @@ #include #include -#define PACKAGE_VERSION "0.2.0" +#define PACKAGE_VERSION "0.2.1" const uint8_t nst_nt4_table[256] = { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, @@ -352,14 +352,11 @@ void wgsim_core(FILE *fpout1, FILE *fpout2, FILE *fp_fa, int is_hap, uint64_t N, n_sub[0] = n_sub[1] = n_indel[0] = n_indel[1] = n_err[0] = n_err[1] = 0; #define __gen_read(x, start, iter) do { \ - for (i = (start), k = 0, ext_coor[x] = -1; i >= 0 && i < seq.l && k < s[x]; iter) { \ + for (i = (start), k = 0, ext_coor[x] = -10; i >= 0 && i < seq.l && k < s[x]; iter) { \ int c = target[i], mut_type = c & mutmsk; \ if (ext_coor[x] < 0) { \ + if (mut_type != NOCHANGE && mut_type != SUBSTITUTE) continue; \ ext_coor[x] = i; \ - if (mut_type != NOCHANGE && mut_type != SUBSTITUTE) { \ - ext_coor[x] = -1; \ - break; \ - } \ } \ if (mut_type == DELETE) ++n_indel[x]; \ else if (mut_type == NOCHANGE || mut_type == SUBSTITUTE) { \ @@ -372,7 +369,7 @@ void wgsim_core(FILE *fpout1, FILE *fpout2, FILE *fp_fa, int is_hap, uint64_t N, tmp_seq[x][k++] = ins & 0x3; \ } \ } \ - if (k != s[x]) ext_coor[x] = -1; \ + if (k != s[x]) ext_coor[x] = -10; \ } while (0) if (!IS_SOLID) { @@ -388,6 +385,7 @@ void wgsim_core(FILE *fpout1, FILE *fpout2, FILE *fp_fa, int is_hap, uint64_t N, } else { // FF pair __gen_read(0, pos, ++i); __gen_read(1, pos + d - 1 - s[1], ++i); + ++ext_coor[0]; ++ext_coor[1]; } // change to color sequence: (0,1,2,3) -> (A,C,G,T) for (j = 0; j < 2; ++j) { @@ -423,8 +421,9 @@ void wgsim_core(FILE *fpout1, FILE *fpout2, FILE *fp_fa, int is_hap, uint64_t N, for (j = 0; j < 2; ++j) { for (i = 0; i < s[j]; ++i) qstr[i] = Q; qstr[i] = 0; - fprintf(fpo[j], "@%s_%u_%u_%d:%d:%d_%llx/%d\n", name, ext_coor[0]+1, ext_coor[1]+1, - n_err[j], n_sub[j], n_indel[j], (long long)ii, j==0? is_flip+1 : 2-is_flip); + fprintf(fpo[j], "@%s_%u_%u_%d:%d:%d_%d:%d:%d_%llx/%d\n", name, ext_coor[0]+1, ext_coor[1]+1, + n_err[0], n_sub[0], n_indel[0], n_err[1], n_sub[1], n_indel[1], + (long long)ii, j==0? is_flip+1 : 2-is_flip); for (i = 0; i < s[j]; ++i) fputc("ACGTN"[(int)tmp_seq[j][i]], fpo[j]); fprintf(fpo[j], "\n+\n%s\n", qstr); diff --git a/misc/wgsim_eval.pl b/misc/wgsim_eval.pl index fe91a3d..ecc53cc 100755 --- a/misc/wgsim_eval.pl +++ b/misc/wgsim_eval.pl @@ -1,7 +1,7 @@ #!/usr/bin/perl -w # Contact: lh3 -# Version: 0.1.2 +# Version: 0.1.3 use strict; use warnings; @@ -12,10 +12,13 @@ exit; sub wgsim_eval { my %opts; - getopts('p', \%opts); - die("Usage: wgsim_eval.pl [-p] \n") if (@ARGV == 0 && -t STDIN); + getopts('pc', \%opts); + die("Usage: wgsim_eval.pl [-pc] \n") if (@ARGV == 0 && -t STDIN); my (@c0, @c1); - my $max_q = 0; + my ($max_q, $flag) = (0, 0); + my $gap = 5; + $flag |= 1 if (defined $opts{p}); + $flag |= 2 if (defined $opts{c}); while (<>) { my @t = split; my $line = $_; @@ -34,20 +37,30 @@ sub wgsim_eval { if ($t[0] =~ /^(\S+)_(\d+)_(\d+)_/) { if ($1 ne $chr) { # different chr $is_correct = 0; - } elsif ($2 < $3) { # should always be true if reads are generated from wgsim - if ($t[1] & 0x10) { # reverse - $is_correct = 0 if (abs($3 - $rght) > 5); # in case of indels that are close to the end of a reads + } else { + if ($flag & 2) { + if (($t[1]&0x40) && !($t[1]&0x10)) { # F3, forward + $is_correct = 0 if (abs($2 - $left) > $gap); + } elsif (($t[1]&0x40) && ($t[1]&0x10)) { # F3, reverse + $is_correct = 0 if (abs($3 - $rght) > $gap); + } elsif (($t[1]&0x80) && !($t[1]&0x10)) { # R3, forward + $is_correct = 0 if (abs($3 - $left) > $gap); + } else { # R3, reverse + $is_correct = 0 if (abs($2 - $rght) > $gap); + } } else { - $is_correct = 0 if (abs($2 - $left) > 5); + if ($t[1] & 0x10) { # reverse + $is_correct = 0 if (abs($3 - $rght) > $gap); # in case of indels that are close to the end of a reads + } else { + $is_correct = 0 if (abs($2 - $left) > $gap); + } } - } else { - die("[wgsim_eval] bug in wgsim?\n"); } } else { die("[wgsim_eval] read '$t[0]' was not generated by wgsim?\n"); } ++$c1[$q] unless ($is_correct); - print STDERR $line if (defined($opts{p}) && !$is_correct && $q > 0); + print STDERR $line if (($flag&1) && !$is_correct && $q > 0); } # print my ($cc0, $cc1) = (0, 0);