]> git.donarmstrong.com Git - samtools.git/commitdiff
* wgsim-0.2.1
authorHeng Li <lh3@live.co.uk>
Thu, 26 Feb 2009 14:53:03 +0000 (14:53 +0000)
committerHeng Li <lh3@live.co.uk>
Thu, 26 Feb 2009 14:53:03 +0000 (14:53 +0000)
 * fixed a bug about color read coordinates
 * fixed a bug in read names
 * wgsim_eval.pl-0.1.3
 * make the script work with color reads

ChangeLog
misc/wgsim.c
misc/wgsim_eval.pl

index 31ec75786fbfdf319788140d8f4bd16ab4b2a36e..887cc605a03818d3767347536069ce0dd0f27151 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,97 @@
+------------------------------------------------------------------------
+r129 | lh3lh3 | 2009-02-18 22:23:27 +0000 (Wed, 18 Feb 2009) | 3 lines
+Changed paths:
+   M /trunk/samtools/bam_index.c
+   M /trunk/samtools/bamtk.c
+
+ * samtools-0.1.2-9
+ * fixed a bug in bam_fetch, caused by completely contained adjacent chunks
+
+------------------------------------------------------------------------
+r128 | bhandsaker | 2009-02-18 19:06:57 +0000 (Wed, 18 Feb 2009) | 2 lines
+Changed paths:
+   M /trunk/samtools/bamtk.c
+
+Fix annoying segv when invalid region specified.
+
+------------------------------------------------------------------------
+r127 | lh3lh3 | 2009-02-17 10:49:55 +0000 (Tue, 17 Feb 2009) | 2 lines
+Changed paths:
+   D /trunk/samtools/misc/indel_filter.pl
+   A /trunk/samtools/misc/samtools.pl
+
+ * move indel_filter.pl to samtools.pl
+
+------------------------------------------------------------------------
+r126 | lh3lh3 | 2009-02-14 21:22:30 +0000 (Sat, 14 Feb 2009) | 3 lines
+Changed paths:
+   M /trunk/samtools/bam_mate.c
+   M /trunk/samtools/bamtk.c
+
+ * samtools-0.1.2-7
+ * fixed a bug in fixmate: SE reads are flagged as BAM_FMUNMAP
+
+------------------------------------------------------------------------
+r125 | lh3lh3 | 2009-02-13 09:54:45 +0000 (Fri, 13 Feb 2009) | 3 lines
+Changed paths:
+   M /trunk/samtools/bam_stat.c
+   M /trunk/samtools/bamtk.c
+
+ * samtools-0.1.2-7
+ * fixed a minor bug in flagstat
+
+------------------------------------------------------------------------
+r124 | lh3lh3 | 2009-02-12 11:15:32 +0000 (Thu, 12 Feb 2009) | 3 lines
+Changed paths:
+   M /trunk/samtools/bam_maqcns.c
+   M /trunk/samtools/bamtk.c
+   M /trunk/samtools/misc/indel_filter.pl
+
+ * samtools-0.1.2-6
+ * improve indel caller by setting maximum window size
+
+------------------------------------------------------------------------
+r123 | lh3lh3 | 2009-02-12 10:30:29 +0000 (Thu, 12 Feb 2009) | 2 lines
+Changed paths:
+   M /trunk/samtools/bam_plcmd.c
+   M /trunk/samtools/bamtk.c
+
+ * output max mapping quality in indel line
+
+------------------------------------------------------------------------
+r122 | lh3lh3 | 2009-02-11 10:59:10 +0000 (Wed, 11 Feb 2009) | 2 lines
+Changed paths:
+   M /trunk/samtools/misc/maq2sam.c
+
+fixed a bug in generating tag AM
+
+------------------------------------------------------------------------
+r121 | lh3lh3 | 2009-02-03 10:43:11 +0000 (Tue, 03 Feb 2009) | 2 lines
+Changed paths:
+   M /trunk/samtools/bam_index.c
+   M /trunk/samtools/bamtk.c
+
+fixed a potential memory problem in indexing
+
+------------------------------------------------------------------------
+r120 | bhandsaker | 2009-02-02 15:52:52 +0000 (Mon, 02 Feb 2009) | 2 lines
+Changed paths:
+   M /trunk/samtools/Makefile
+
+Pass LIBS to recursive targets to facilitate building at Broad.
+
+------------------------------------------------------------------------
+r119 | lh3lh3 | 2009-02-02 10:12:15 +0000 (Mon, 02 Feb 2009) | 4 lines
+Changed paths:
+   M /trunk/samtools/ChangeLog
+   M /trunk/samtools/bam_plcmd.c
+   M /trunk/samtools/bam_stat.c
+   M /trunk/samtools/bamtk.c
+
+ * samtools-0.1.2-3
+ * fixed a bug in generating GLFv2 for indels
+ * improve flagstat report a little bit
+
 ------------------------------------------------------------------------
 r118 | lh3lh3 | 2009-01-29 12:33:23 +0000 (Thu, 29 Jan 2009) | 3 lines
 Changed paths:
index 1ae7572a6a6aff77b04acfea9f6fd194f3ee6f3e..46c9ff9bcda84686a124f37e6cf8de57915cbb54 100644 (file)
@@ -39,7 +39,7 @@
 #include <ctype.h>
 #include <string.h>
 
-#define PACKAGE_VERSION "0.2.0"
+#define PACKAGE_VERSION "0.2.1"
 
 const uint8_t nst_nt4_table[256] = {
        4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4, 
@@ -352,14 +352,11 @@ void wgsim_core(FILE *fpout1, FILE *fpout2, FILE *fp_fa, int is_hap, uint64_t N,
                        n_sub[0] = n_sub[1] = n_indel[0] = n_indel[1] = n_err[0] = n_err[1] = 0;
 
 #define __gen_read(x, start, iter) do {                                                                        \
-                               for (i = (start), k = 0, ext_coor[x] = -1; i >= 0 && i < seq.l && k < s[x]; iter) {     \
+                               for (i = (start), k = 0, ext_coor[x] = -10; i >= 0 && i < seq.l && k < s[x]; iter) {    \
                                        int c = target[i], mut_type = c & mutmsk;                       \
                                        if (ext_coor[x] < 0) {                                                          \
+                                               if (mut_type != NOCHANGE && mut_type != SUBSTITUTE) continue; \
                                                ext_coor[x] = i;                                                                \
-                                               if (mut_type != NOCHANGE && mut_type != SUBSTITUTE) { \
-                                                       ext_coor[x] = -1;                                                       \
-                                                       break;                                                                          \
-                                               }                                                                                               \
                                        }                                                                                                       \
                                        if (mut_type == DELETE) ++n_indel[x];                           \
                                        else if (mut_type == NOCHANGE || mut_type == SUBSTITUTE) { \
@@ -372,7 +369,7 @@ void wgsim_core(FILE *fpout1, FILE *fpout2, FILE *fp_fa, int is_hap, uint64_t N,
                                                        tmp_seq[x][k++] = ins & 0x3;                            \
                                        }                                                                                                       \
                                }                                                                                                               \
-                               if (k != s[x]) ext_coor[x] = -1                                               \
+                               if (k != s[x]) ext_coor[x] = -10;                                               \
                        } while (0)
 
                        if (!IS_SOLID) {
@@ -388,6 +385,7 @@ void wgsim_core(FILE *fpout1, FILE *fpout2, FILE *fp_fa, int is_hap, uint64_t N,
                                } else { // FF pair
                                        __gen_read(0, pos, ++i);
                                        __gen_read(1, pos + d - 1 - s[1], ++i);
+                                       ++ext_coor[0]; ++ext_coor[1];
                                }
                                // change to color sequence: (0,1,2,3) -> (A,C,G,T)
                                for (j = 0; j < 2; ++j) {
@@ -423,8 +421,9 @@ void wgsim_core(FILE *fpout1, FILE *fpout2, FILE *fp_fa, int is_hap, uint64_t N,
                        for (j = 0; j < 2; ++j) {
                                for (i = 0; i < s[j]; ++i) qstr[i] = Q;
                                qstr[i] = 0;
-                               fprintf(fpo[j], "@%s_%u_%u_%d:%d:%d_%llx/%d\n", name, ext_coor[0]+1, ext_coor[1]+1,
-                                               n_err[j], n_sub[j], n_indel[j], (long long)ii, j==0? is_flip+1 : 2-is_flip);
+                               fprintf(fpo[j], "@%s_%u_%u_%d:%d:%d_%d:%d:%d_%llx/%d\n", name, ext_coor[0]+1, ext_coor[1]+1,
+                                               n_err[0], n_sub[0], n_indel[0], n_err[1], n_sub[1], n_indel[1],
+                                               (long long)ii, j==0? is_flip+1 : 2-is_flip);
                                for (i = 0; i < s[j]; ++i)
                                        fputc("ACGTN"[(int)tmp_seq[j][i]], fpo[j]);
                                fprintf(fpo[j], "\n+\n%s\n", qstr);
index fe91a3d32403f19c8e5c5a8d330a94fd69f728e6..ecc53cca5163d8050ddb9159085d6b7f0ad3b9c3 100755 (executable)
@@ -1,7 +1,7 @@
 #!/usr/bin/perl -w
 
 # Contact: lh3
-# Version: 0.1.2
+# Version: 0.1.3
 
 use strict;
 use warnings;
@@ -12,10 +12,13 @@ exit;
 
 sub wgsim_eval {
   my %opts;
-  getopts('p', \%opts);
-  die("Usage: wgsim_eval.pl [-p] <in.sam>\n") if (@ARGV == 0 && -t STDIN);
+  getopts('pc', \%opts);
+  die("Usage: wgsim_eval.pl [-pc] <in.sam>\n") if (@ARGV == 0 && -t STDIN);
   my (@c0, @c1);
-  my $max_q = 0;
+  my ($max_q, $flag) = (0, 0);
+  my $gap = 5;
+  $flag |= 1 if (defined $opts{p});
+  $flag |= 2 if (defined $opts{c});
   while (<>) {
        my @t = split;
        my $line = $_;
@@ -34,20 +37,30 @@ sub wgsim_eval {
        if ($t[0] =~ /^(\S+)_(\d+)_(\d+)_/) {
          if ($1 ne $chr) { # different chr
                $is_correct = 0;
-         } elsif ($2 < $3) { # should always be true if reads are generated from wgsim
-               if ($t[1] & 0x10) { # reverse
-                 $is_correct = 0 if (abs($3 - $rght) > 5); # in case of indels that are close to the end of a reads
+         } else {
+               if ($flag & 2) {
+                 if (($t[1]&0x40) && !($t[1]&0x10)) { # F3, forward
+                       $is_correct = 0 if (abs($2 - $left) > $gap);
+                 } elsif (($t[1]&0x40) && ($t[1]&0x10)) { # F3, reverse
+                       $is_correct = 0 if (abs($3 - $rght) > $gap);
+                 } elsif (($t[1]&0x80) && !($t[1]&0x10)) { # R3, forward
+                       $is_correct = 0 if (abs($3 - $left) > $gap);
+                 } else { # R3, reverse
+                       $is_correct = 0 if (abs($2 - $rght) > $gap);
+                 }
                } else {
-                 $is_correct = 0 if (abs($2 - $left) > 5);
+                 if ($t[1] & 0x10) { # reverse
+                       $is_correct = 0 if (abs($3 - $rght) > $gap); # in case of indels that are close to the end of a reads
+                 } else {
+                       $is_correct = 0 if (abs($2 - $left) > $gap);
+                 }
                }
-         } else {
-               die("[wgsim_eval] bug in wgsim?\n");
          }
        } else {
          die("[wgsim_eval] read '$t[0]' was not generated by wgsim?\n");
        }
        ++$c1[$q] unless ($is_correct);
-       print STDERR $line if (defined($opts{p}) && !$is_correct && $q > 0);
+       print STDERR $line if (($flag&1) && !$is_correct && $q > 0);
   }
   # print
   my ($cc0, $cc1) = (0, 0);