From ecbde30918d2dc8ed3fcbad5e4cff44d0977dc34 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Mon, 2 Mar 2009 10:39:09 +0000 Subject: [PATCH] * samtools-0.1.2-10 * fixed a bug in import: core.bin is undefined for unmapped reads * this bug can be alleviated (not completely solved) in bam_index.c * update to GLFv3: pos is changed to offset for better compression --- Makefile | 3 ++- bam_import.c | 2 +- bam_index.c | 2 +- bam_plcmd.c | 15 ++++++++------- bamtk.c | 2 +- glf.c | 14 ++++++++------ glf.h | 4 ++-- 7 files changed, 23 insertions(+), 19 deletions(-) diff --git a/Makefile b/Makefile index f74baa5..6b60a7b 100644 --- a/Makefile +++ b/Makefile @@ -46,13 +46,14 @@ razip.o:razf.h bam.o:bam.h razf.h bam_endian.h bam_import.o:bam.h kseq.h khash.h razf.h bam_pileup.o:bam.h razf.h ksort.h -bam_plcmd.o:bam.h faidx.h bam_maqcns.h +bam_plcmd.o:bam.h faidx.h bam_maqcns.h glf.h bam_index.o:bam.h khash.h ksort.h razf.h bam_endian.h bam_lpileup.o:bam.h ksort.h bam_tview.o:bam.h faidx.h bam_maqcns.h bam_maqcns.o:bam.h ksort.h bam_maqcns.h bam_sort.o:bam.h ksort.h razf.h razf.o:razf.h +glf.o:glf.h faidx.o:faidx.h razf.h khash.h faidx_main.o:faidx.h razf.h diff --git a/bam_import.c b/bam_import.c index 4b7bb21..7742d2f 100644 --- a/bam_import.c +++ b/bam_import.c @@ -209,7 +209,7 @@ int sam_read1(tamFile fp, bam_header_t *header, bam1_t *b) if (*s) parse_error(fp->n_lines, "unmatched CIGAR operation"); c->bin = bam_reg2bin(c->pos, bam_calend(c, bam1_cigar(b))); doff += c->n_cigar * 4; - } + } else c->bin = bam_reg2bin(c->pos, c->pos + 1); } { // mtid, mpos, isize ret = ks_getuntil(ks, 0, str, &dret); c->mtid = strcmp(str->s, "=")? bam_get_tid(header, str->s) : c->tid; diff --git a/bam_index.c b/bam_index.c index 59deca0..7670c2f 100644 --- a/bam_index.c +++ b/bam_index.c @@ -374,7 +374,7 @@ static inline int reg2bins(uint32_t beg, uint32_t end, uint16_t list[MAX_BIN]) static inline int is_overlap(uint32_t beg, uint32_t end, const bam1_t *b) { uint32_t rbeg = b->core.pos; - uint32_t rend = bam_calend(&b->core, bam1_cigar(b)); + uint32_t rend = b->core.n_cigar? bam_calend(&b->core, bam1_cigar(b)) : b->core.pos + 1; return (rend > beg && rbeg < end); } diff --git a/bam_plcmd.c b/bam_plcmd.c index 2803659..2fe32b4 100644 --- a/bam_plcmd.c +++ b/bam_plcmd.c @@ -20,7 +20,7 @@ typedef struct { faidx_t *fai; khash_t(64) *hash; uint32_t format; - int tid, len; + int tid, len, last_pos; int mask; char *ref; glfFile fp; // for glf output only @@ -58,7 +58,7 @@ static int glt_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pu, bam_maqindel_ret_t *r = 0; int rb; glf1_t *g; - glf2_t g2; + glf3_t g2; if (d->fai == 0) { fprintf(stderr, "[glt_func] reference sequence is required for generating GLT. Abort!\n"); exit(1); @@ -66,9 +66,9 @@ static int glt_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pu, if (d->hash && kh_get(64, d->hash, (uint64_t)tid<<32|pos) == kh_end(d->hash)) return 0; if (d->fai && (int)tid != d->tid) { if (d->ref) { // then write the end mark - memset(&g2, 0, sizeof(glf2_t)); + memset(&g2, 0, sizeof(glf3_t)); g2.type = GLF_TYPE_END; - bgzf_write(d->fp, &g2, sizeof(glf2_t)); + bgzf_write(d->fp, &g2, sizeof(glf3_t)); } glf_ref_write(d->fp, d->h->target_name[tid]); // write reference free(d->ref); @@ -79,8 +79,9 @@ static int glt_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pu, g = bam_maqcns_glfgen(n, pu, bam_nt16_table[rb], d->c); memcpy(&g2, g, sizeof(glf1_t)); g2.type = GLF_TYPE_NORMAL; - g2.pos = pos; - bgzf_write(d->fp, &g2, sizeof(glf2_t)); + g2.offset = pos - d->last_pos; + d->last_pos = pos; + bgzf_write(d->fp, &g2, sizeof(glf3_t)); r = bam_maqindel(n, pos, d->ido, pu, d->ref); if (r) { // then write indel line int g3 = 3 * n, min; @@ -96,7 +97,7 @@ static int glt_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pu, *(int16_t*)(g2.lk + 3) = (int16_t)r->indel1; *(int16_t*)(g2.lk + 5) = (int16_t)r->indel2; g2.min_lk = min < 255? min : 255; - bgzf_write(d->fp, &g2, sizeof(glf2_t)); + bgzf_write(d->fp, &g2, sizeof(glf3_t)); if (r->indel1) bgzf_write(d->fp, r->s[0]+1, r->indel1>0? r->indel1 : -r->indel1); if (r->indel2) bgzf_write(d->fp, r->s[1]+1, r->indel2>0? r->indel2 : -r->indel2); bam_maqindel_ret_destroy(r); diff --git a/bamtk.c b/bamtk.c index 950eae0..9ebaac6 100644 --- a/bamtk.c +++ b/bamtk.c @@ -3,7 +3,7 @@ #include "bam.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.1.2-9" +#define PACKAGE_VERSION "0.1.2-10" #endif int bam_taf2baf(int argc, char *argv[]); diff --git a/glf.c b/glf.c index 302f9ef..0d09180 100644 --- a/glf.c +++ b/glf.c @@ -74,10 +74,10 @@ void glf_ref_write(glfFile fp, const char *str) bgzf_write(fp, str, n); } -void glf_view_normal(const char *ref_name, glf2_t *g1) +void glf_view_normal(const char *ref_name, glf3_t *g1, int pos) { int j; - printf("%s\t%d\t%c\t%d\t%d\t%d", ref_name, g1->pos + 1, "XACMGRSVTWYHKDBN"[g1->ref_base], + printf("%s\t%d\t%c\t%d\t%d\t%d", ref_name, pos + 1, "XACMGRSVTWYHKDBN"[g1->ref_base], g1->depth, g1->max_mapQ, g1->min_lk); for (j = 0; j != 10; ++j) printf("\t%d", g1->lk[j]); printf("\n"); @@ -99,18 +99,20 @@ void glf_view(glfFile fp) { glf_header_t *h; char *name, *str; - glf2_t g2; + glf3_t g2; int max; h = glf_header_read(fp); str = 0; max = 0; while ((name = glf_ref_read(fp)) != 0) { - while (bgzf_read(fp, &g2, sizeof(glf2_t))) { + int pos = 0; + while (bgzf_read(fp, &g2, sizeof(glf3_t))) { if (g2.type == GLF_TYPE_END) break; - else if (g2.type == GLF_TYPE_NORMAL) glf_view_normal(name, &g2); + pos += g2.offset; + if (g2.type == GLF_TYPE_NORMAL) glf_view_normal(name, &g2, pos); else if (g2.type == GLF_TYPE_INDEL) { int16_t indel1, indel2; - printf("%s\t%d\t*\t%d\t%d\t%d\t", name, g2.pos + 1, g2.depth, g2.max_mapQ, g2.min_lk); + printf("%s\t%d\t*\t%d\t%d\t%d\t", name, pos + 1, g2.depth, g2.max_mapQ, g2.min_lk); printf("%d\t%d\t%d\t", g2.lk[0], g2.lk[1], g2.lk[2]); indel1 = *(int16_t*)(g2.lk + 3); indel2 = *(int16_t*)(g2.lk + 5); diff --git a/glf.h b/glf.h index af72a48..99acd3a 100644 --- a/glf.h +++ b/glf.h @@ -21,8 +21,8 @@ typedef struct { unsigned char max_mapQ; /** maximum mapping quality */ unsigned char lk[10]; /** log likelihood ratio, capped at 255 */ unsigned min_lk:8, depth:24; /** minimum lk capped at 255, and the number of mapped reads */ - unsigned pos; /** this is ***ZERO-BASED*** coordinate */ -} glf2_t; + unsigned offset; /** the first base in a chromosome has offset zero. */ +} glf3_t; typedef struct { int32_t l_text; -- 2.39.2