From 6762f5307ddcbc61cb6752db2371f8112f31a988 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Fri, 24 Apr 2009 20:27:26 +0000 Subject: [PATCH] * samtools-0.1.3-5 (r243) * fixed a long existing bug which may cause memory leak * check MD * consensus calling now works with "=", but indel calling not --- bam.h | 6 ++---- bam_maqcns.c | 5 +++-- bam_md.c | 26 +++++++++++++++++++++----- bamtk.c | 2 +- 4 files changed, 27 insertions(+), 12 deletions(-) diff --git a/bam.h b/bam.h index 92e7a36..b607f96 100644 --- a/bam.h +++ b/bam.h @@ -179,7 +179,6 @@ typedef struct { @field data_len current length of bam1_t::data @field m_data maximum length of bam1_t::data @field data all variable-length data, concatenated; structure: cigar-qname-seq-qual-aux - @field hash hash table for fast retrieval of tag-value pairs; private @discussion Notes: @@ -191,7 +190,6 @@ typedef struct { bam1_core_t core; int l_aux, data_len, m_data; uint8_t *data; - void *hash; } bam1_t; #define bam1_strand(b) (((b)->core.flag&BAM_FREVERSE) != 0) @@ -401,8 +399,8 @@ extern "C" { @abstract Free the memory allocated for an alignment. @param b pointer to an alignment */ -#define bam_destroy1(b) do { \ - if ((b)->hash) free((b)->data); free(b); \ +#define bam_destroy1(b) do { \ + free((b)->data); free(b); \ } while (0) /*! diff --git a/bam_maqcns.c b/bam_maqcns.c index 60a1945..482f33c 100644 --- a/bam_maqcns.c +++ b/bam_maqcns.c @@ -142,13 +142,14 @@ glf1_t *bam_maqcns_glfgen(int _n, const bam_pileup1_t *pl, uint8_t ref_base, bam } for (i = n = 0; i < _n; ++i) { const bam_pileup1_t *p = pl + i; - uint32_t q, x = 0; + uint32_t q, x = 0, qq; if (p->is_del || (p->b->core.flag&BAM_FUNMAP)) continue; q = (uint32_t)bam1_qual(p->b)[p->qpos]; x |= (uint32_t)bam1_strand(p->b) << 18 | q << 8 | p->b->core.qual; if (p->b->core.qual < q) q = p->b->core.qual; x |= q << 24; - q = bam_nt16_nt4_table[bam1_seqi(bam1_seq(p->b), p->qpos)]; + qq = bam1_seqi(bam1_seq(p->b), p->qpos); + q = bam_nt16_nt4_table[qq? qq : ref_base]; if (!p->is_del && q < 4) x |= 1 << 21 | q << 16; bm->aux->info[n++] = x; } diff --git a/bam_md.c b/bam_md.c index 303a83d..a7d2ab4 100644 --- a/bam_md.c +++ b/bam_md.c @@ -1,4 +1,6 @@ #include +#include +#include #include "faidx.h" #include "bam.h" #include "kstring.h" @@ -8,13 +10,14 @@ void bam_fillmd1(bam1_t *b, char *ref, int is_equal) uint8_t *seq = bam1_seq(b); uint32_t *cigar = bam1_cigar(b); bam1_core_t *c = &b->core; - int i, x, y, u = 0, has_md = 0; + int i, x, y, u = 0; kstring_t *str; + uint8_t *old_md; - if (bam_aux_get(b, "MD")) has_md = 1; - if (has_md == 1 && !is_equal) return; // no need to add MD - str = (kstring_t*)calloc(1, sizeof(kstring_t)); + old_md = bam_aux_get(b, "MD"); if (c->flag & BAM_FUNMAP) return; + if (old_md && !is_equal) return; // no need to add MD + str = (kstring_t*)calloc(1, sizeof(kstring_t)); for (i = y = 0, x = c->pos; i < c->n_cigar; ++i) { int j, l = cigar[i]>>4, op = cigar[i]&0xf; if (op == BAM_CMATCH) { @@ -40,6 +43,7 @@ void bam_fillmd1(bam1_t *b, char *ref, int is_equal) if (ref[x+j] == 0) break; kputc(ref[x+j], str); } + u = 0; if (j < l) break; x += l; } else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) { @@ -49,7 +53,18 @@ void bam_fillmd1(bam1_t *b, char *ref, int is_equal) } } ksprintf(str, "%d", u); - if (!has_md) bam_aux_append(b, "MD", 'Z', str->l + 1, (uint8_t*)str->s); + if (!old_md) bam_aux_append(b, "MD", 'Z', str->l + 1, (uint8_t*)str->s); + else { + int is_diff = 0; + if (strlen((char*)old_md+1) == str->l) { + for (i = 0; i < str->l; ++i) + if (toupper(old_md[i+1]) != toupper(str->s[i])) + break; + if (i < str->l) is_diff = 1; + } else is_diff = 1; + if (is_diff) + fprintf(stderr, "[bam_fillmd1] different MD for read '%s': '%s' != '%s'\n", bam1_qname(b), old_md+1, str->s); + } free(str->s); free(str); } @@ -91,6 +106,7 @@ int bam_fillmd(int argc, char *argv[]) } bam_destroy1(b); + free(ref); fai_destroy(fai); bam_header_destroy(header); bam_close(fp); bam_close(fpout); diff --git a/bamtk.c b/bamtk.c index b10c339..fa256ae 100644 --- a/bamtk.c +++ b/bamtk.c @@ -3,7 +3,7 @@ #include "bam.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.1.3-4 (r242)" +#define PACKAGE_VERSION "0.1.3-5 (r243)" #endif int bam_taf2baf(int argc, char *argv[]); -- 2.39.5