From 140d53dfdfe32bfa3ae4b24f1f33d071f366054f Mon Sep 17 00:00:00 2001
From: Heng Li <lh3@live.co.uk>
Date: Sat, 29 Aug 2009 19:36:41 +0000
Subject: [PATCH]  * samtools-0.1.5-32 (r449)  * fillmd: fixed a bug in
 modifying MD/NM tags  * in import, give a warning if the read is aligned but
 there is no CIGAR.

---
 bam_import.c     |  5 ++++-
 bam_md.c         | 11 +++++++----
 bamtk.c          |  2 +-
 misc/samtools.pl |  7 ++++++-
 4 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/bam_import.c b/bam_import.c
index 19cd75f..1dc906e 100644
--- a/bam_import.c
+++ b/bam_import.c
@@ -374,7 +374,10 @@ int sam_read1(tamFile fp, bam_header_t *header, bam1_t *b)
 			c->bin = bam_reg2bin(c->pos, bam_calend(c, bam1_cigar(b)));
 			doff += c->n_cigar * 4;
 		} else {
-			if (!(c->flag&BAM_FUNMAP)) parse_error(fp->n_lines, "mapped sequence without CIGAR");
+			if (!(c->flag&BAM_FUNMAP)) {
+				fprintf(stderr, "Parse warning at line %lld: mapped sequence without CIGAR\n", (long long)fp->n_lines);
+				c->flag |= BAM_FUNMAP;
+			}
 			c->bin = bam_reg2bin(c->pos, c->pos + 1);
 		}
 	}
diff --git a/bam_md.c b/bam_md.c
index ead2346..e2fdff8 100644
--- a/bam_md.c
+++ b/bam_md.c
@@ -16,10 +16,6 @@ void bam_fillmd1(bam1_t *b, char *ref, int is_equal)
 	uint8_t *old_md, *old_nm;
 	int32_t old_nm_i = -1, nm = 0;
 
-	old_md = bam_aux_get(b, "MD");
-	old_nm = bam_aux_get(b, "NM");
-	if (c->flag & BAM_FUNMAP) return;
-	if (old_nm) old_nm_i = bam_aux2i(old_nm);
 	str = (kstring_t*)calloc(1, sizeof(kstring_t));
 	for (i = y = 0, x = c->pos; i < c->n_cigar; ++i) {
 		int j, l = cigar[i]>>4, op = cigar[i]&0xf;
@@ -57,12 +53,19 @@ void bam_fillmd1(bam1_t *b, char *ref, int is_equal)
 		}
 	}
 	ksprintf(str, "%d", u);
+	// update NM
+	old_nm = bam_aux_get(b, "NM");
+	if (c->flag & BAM_FUNMAP) return;
+	if (old_nm) old_nm_i = bam_aux2i(old_nm);
 	if (!old_nm) bam_aux_append(b, "NM", 'i', 4, (uint8_t*)&nm);
 	else if (nm != old_nm_i) {
+		uint8_t *old_data = b->data;
 		fprintf(stderr, "[bam_fillmd1] different NM for read '%s': %d -> %d\n", bam1_qname(b), old_nm_i, nm);
 		bam_aux_del(b, old_nm);
 		bam_aux_append(b, "NM", 'i', 4, (uint8_t*)&nm);
 	}
+	// update MD
+	old_md = bam_aux_get(b, "MD");
 	if (!old_md) bam_aux_append(b, "MD", 'Z', str->l + 1, (uint8_t*)str->s);
 	else {
 		int is_diff = 0;
diff --git a/bamtk.c b/bamtk.c
index c6a5714..41b3bc8 100644
--- a/bamtk.c
+++ b/bamtk.c
@@ -9,7 +9,7 @@
 #endif
 
 #ifndef PACKAGE_VERSION
-#define PACKAGE_VERSION "0.1.5-31 (r448)"
+#define PACKAGE_VERSION "0.1.5-32 (r449)"
 #endif
 
 int bam_taf2baf(int argc, char *argv[]);
diff --git a/misc/samtools.pl b/misc/samtools.pl
index 8c64de8..86b285c 100755
--- a/misc/samtools.pl
+++ b/misc/samtools.pl
@@ -302,7 +302,11 @@ sub uniqcmp {
 		$p->[0][5]-$p->[1][5], "\n" if ($z && defined($opts{p}) && ($p->[0][3] >= $opts{q} || $p->[1][3] >= $opts{q}));
 	} elsif (defined($p->[0])) {
 	  ++$cnt[$p->[0][3]>=$opts{q}? 6 : 7];
+	  print STDERR "$x\t$p->[0][1]:$p->[0][2]\t$p->[0][3]\t$p->[0][4]\t*\t0\t*\t",
+		$p->[0][5], "\n" if (defined($opts{p}) && $p->[0][3] >= $opts{q});
 	} else {
+	  print STDERR "$x\t*\t0\t*\t$p->[1][1]:$p->[1][2]\t$p->[1][3]\t$p->[1][4]\t",
+		-$p->[1][5], "\n" if (defined($opts{p}) && $p->[1][3] >= $opts{q});
 	  ++$cnt[$p->[1][3]>=$opts{q}? 8 : 9];
 	}
   }
@@ -326,7 +330,8 @@ sub uniqcmp_aux {
   while (<$fh>) {
 	my @t = split;
 	next if (@t < 11);
-	my $l = ($t[5] =~ /^(\d+)S/)? $1 : 0;
+#	my $l = ($t[5] =~ /^(\d+)S/)? $1 : 0;
+	my $l = 0;
 	my ($x, $nm) = (0, 0);
 	$nm = $1 if (/NM:i:(\d+)/);
 	$_ = $t[5];
-- 
2.39.5