From 140d53dfdfe32bfa3ae4b24f1f33d071f366054f Mon Sep 17 00:00:00 2001 From: Heng Li Date: Sat, 29 Aug 2009 19:36:41 +0000 Subject: [PATCH] * samtools-0.1.5-32 (r449) * fillmd: fixed a bug in modifying MD/NM tags * in import, give a warning if the read is aligned but there is no CIGAR. --- bam_import.c | 5 ++++- bam_md.c | 11 +++++++---- bamtk.c | 2 +- misc/samtools.pl | 7 ++++++- 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/bam_import.c b/bam_import.c index 19cd75f..1dc906e 100644 --- a/bam_import.c +++ b/bam_import.c @@ -374,7 +374,10 @@ int sam_read1(tamFile fp, bam_header_t *header, bam1_t *b) c->bin = bam_reg2bin(c->pos, bam_calend(c, bam1_cigar(b))); doff += c->n_cigar * 4; } else { - if (!(c->flag&BAM_FUNMAP)) parse_error(fp->n_lines, "mapped sequence without CIGAR"); + if (!(c->flag&BAM_FUNMAP)) { + fprintf(stderr, "Parse warning at line %lld: mapped sequence without CIGAR\n", (long long)fp->n_lines); + c->flag |= BAM_FUNMAP; + } c->bin = bam_reg2bin(c->pos, c->pos + 1); } } diff --git a/bam_md.c b/bam_md.c index ead2346..e2fdff8 100644 --- a/bam_md.c +++ b/bam_md.c @@ -16,10 +16,6 @@ void bam_fillmd1(bam1_t *b, char *ref, int is_equal) uint8_t *old_md, *old_nm; int32_t old_nm_i = -1, nm = 0; - old_md = bam_aux_get(b, "MD"); - old_nm = bam_aux_get(b, "NM"); - if (c->flag & BAM_FUNMAP) return; - if (old_nm) old_nm_i = bam_aux2i(old_nm); str = (kstring_t*)calloc(1, sizeof(kstring_t)); for (i = y = 0, x = c->pos; i < c->n_cigar; ++i) { int j, l = cigar[i]>>4, op = cigar[i]&0xf; @@ -57,12 +53,19 @@ void bam_fillmd1(bam1_t *b, char *ref, int is_equal) } } ksprintf(str, "%d", u); + // update NM + old_nm = bam_aux_get(b, "NM"); + if (c->flag & BAM_FUNMAP) return; + if (old_nm) old_nm_i = bam_aux2i(old_nm); if (!old_nm) bam_aux_append(b, "NM", 'i', 4, (uint8_t*)&nm); else if (nm != old_nm_i) { + uint8_t *old_data = b->data; fprintf(stderr, "[bam_fillmd1] different NM for read '%s': %d -> %d\n", bam1_qname(b), old_nm_i, nm); bam_aux_del(b, old_nm); bam_aux_append(b, "NM", 'i', 4, (uint8_t*)&nm); } + // update MD + old_md = bam_aux_get(b, "MD"); if (!old_md) bam_aux_append(b, "MD", 'Z', str->l + 1, (uint8_t*)str->s); else { int is_diff = 0; diff --git a/bamtk.c b/bamtk.c index c6a5714..41b3bc8 100644 --- a/bamtk.c +++ b/bamtk.c @@ -9,7 +9,7 @@ #endif #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.1.5-31 (r448)" +#define PACKAGE_VERSION "0.1.5-32 (r449)" #endif int bam_taf2baf(int argc, char *argv[]); diff --git a/misc/samtools.pl b/misc/samtools.pl index 8c64de8..86b285c 100755 --- a/misc/samtools.pl +++ b/misc/samtools.pl @@ -302,7 +302,11 @@ sub uniqcmp { $p->[0][5]-$p->[1][5], "\n" if ($z && defined($opts{p}) && ($p->[0][3] >= $opts{q} || $p->[1][3] >= $opts{q})); } elsif (defined($p->[0])) { ++$cnt[$p->[0][3]>=$opts{q}? 6 : 7]; + print STDERR "$x\t$p->[0][1]:$p->[0][2]\t$p->[0][3]\t$p->[0][4]\t*\t0\t*\t", + $p->[0][5], "\n" if (defined($opts{p}) && $p->[0][3] >= $opts{q}); } else { + print STDERR "$x\t*\t0\t*\t$p->[1][1]:$p->[1][2]\t$p->[1][3]\t$p->[1][4]\t", + -$p->[1][5], "\n" if (defined($opts{p}) && $p->[1][3] >= $opts{q}); ++$cnt[$p->[1][3]>=$opts{q}? 8 : 9]; } } @@ -326,7 +330,8 @@ sub uniqcmp_aux { while (<$fh>) { my @t = split; next if (@t < 11); - my $l = ($t[5] =~ /^(\d+)S/)? $1 : 0; +# my $l = ($t[5] =~ /^(\d+)S/)? $1 : 0; + my $l = 0; my ($x, $nm) = (0, 0); $nm = $1 if (/NM:i:(\d+)/); $_ = $t[5]; -- 2.39.2