From 77734b9975b7f86b4ce3e977729b0329c3414dd7 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Fri, 9 Mar 2012 02:31:51 +0000 Subject: [PATCH] Incoporated changes from Nils Homer. * Better error checking in bam_sort.c * Parse "K", "M" and "G" in the sort command * Better color-space handling * Bugfix of wrong integer conversion New commands are not added. --- Makefile | 10 ++++++---- bam_color.c | 24 +++++++++++++++++++++--- bam_index.c | 7 ++++++- bam_sort.c | 27 ++++++++++++++++++++++++++- bam_tview.c | 3 +-- kstring.h | 3 ++- sam.c | 2 +- sam_header.c | 10 ++++++++-- 8 files changed, 71 insertions(+), 15 deletions(-) diff --git a/Makefile b/Makefile index 99f7eca..fdcddd6 100644 --- a/Makefile +++ b/Makefile @@ -1,12 +1,13 @@ CC= gcc -CFLAGS= -g -Wall -O2 #-m64 #-arch ppc +CFLAGS= -g -Wall -O2 +#LDFLAGS= -Wl,-rpath,\$$ORIGIN/../lib DFLAGS= -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE -D_USE_KNETFILE -D_CURSES_LIB=1 KNETFILE_O= knetfile.o LOBJS= bgzf.o kstring.o bam_aux.o bam.o bam_import.o sam.o bam_index.o \ bam_pileup.o bam_lpileup.o bam_md.o razf.o faidx.o bedidx.o \ $(KNETFILE_O) bam_sort.o sam_header.o bam_reheader.o kprobaln.o bam_cat.o -AOBJS= bam_tview.o bam_plcmd.o sam_view.o \ - bam_rmdup.o bam_rmdupse.o bam_mate.o bam_stat.o bam_color.o \ +AOBJS= bam_tview.o bam_plcmd.o sam_view.o \ + bam_rmdup.o bam_rmdupse.o bam_mate.o bam_stat.o bam_color.o \ bamtk.o kaln.o bam2bcf.o bam2bcf_indel.o errmod.o sample.o \ cut_target.o phase.o bam2depth.o padding.o PROG= samtools @@ -16,6 +17,7 @@ LIBPATH= LIBCURSES= -lcurses # -lXCurses .SUFFIXES:.c .o +.PHONY: all lib .c.o: $(CC) -c $(CFLAGS) $(DFLAGS) $(INCLUDES) $< -o $@ @@ -41,7 +43,7 @@ libbam.a:$(LOBJS) $(AR) -csru $@ $(LOBJS) samtools:lib-recur $(AOBJS) - $(CC) $(CFLAGS) -o $@ $(AOBJS) -Lbcftools $(LIBPATH) libbam.a -lbcf $(LIBCURSES) -lm -lz + $(CC) $(CFLAGS) -o $@ $(AOBJS) $(LDFLAGS) libbam.a -Lbcftools -lbcf $(LIBPATH) $(LIBCURSES) -lm -lz razip:razip.o razf.o $(KNETFILE_O) $(CC) $(CFLAGS) -o $@ razf.o razip.o $(KNETFILE_O) -lz diff --git a/bam_color.c b/bam_color.c index ce637f7..8b86e2f 100644 --- a/bam_color.c +++ b/bam_color.c @@ -19,8 +19,14 @@ char bam_aux_getCSi(bam1_t *b, int i) cs = bam_aux2Z(c); // adjust for strandedness and leading adaptor - if(bam1_strand(b)) i = strlen(cs) - 1 - i; - else i++; + if(bam1_strand(b)) { + i = strlen(cs) - 1 - i; + // adjust for leading hard clip + uint32_t cigar = bam1_cigar(b)[0]; + if((cigar & BAM_CIGAR_MASK) == BAM_CHARD_CLIP) { + i -= cigar >> BAM_CIGAR_SHIFT; + } + } else { i++; } return cs[i]; } @@ -42,7 +48,14 @@ char bam_aux_getCQi(bam1_t *b, int i) cq = bam_aux2Z(c); // adjust for strandedness - if(bam1_strand(b)) i = strlen(cq) - 1 - i; + if(bam1_strand(b)) { + i = strlen(cq) - 1 - i; + // adjust for leading hard clip + uint32_t cigar = bam1_cigar(b)[0]; + if((cigar & BAM_CIGAR_MASK) == BAM_CHARD_CLIP) { + i -= (cigar >> BAM_CIGAR_SHIFT); + } + } return cq[i]; } @@ -98,6 +111,11 @@ char bam_aux_getCEi(bam1_t *b, int i) // adjust for strandedness and leading adaptor if(bam1_strand(b)) { //reverse strand cs_i = strlen(cs) - 1 - i; + // adjust for leading hard clip + uint32_t cigar = bam1_cigar(b)[0]; + if((cigar & BAM_CIGAR_MASK) == BAM_CHARD_CLIP) { + cs_i -= cigar >> BAM_CIGAR_SHIFT; + } // get current color cur_color = cs[cs_i]; // get previous base. Note: must rc adaptor diff --git a/bam_index.c b/bam_index.c index 9610a26..d6b94e2 100644 --- a/bam_index.c +++ b/bam_index.c @@ -159,9 +159,14 @@ bam_index_t *bam_index_core(bamFile fp) bam1_core_t *c; uint64_t save_off, last_off, n_mapped, n_unmapped, off_beg, off_end, n_no_coor; + h = bam_header_read(fp); + if(h == 0) { + fprintf(stderr, "[bam_index_core] Invalid BAM header."); + return NULL; + } + idx = (bam_index_t*)calloc(1, sizeof(bam_index_t)); b = (bam1_t*)calloc(1, sizeof(bam1_t)); - h = bam_header_read(fp); c = &b->core; idx->n = h->n_targets; diff --git a/bam_sort.c b/bam_sort.c index abf8d4f..e3a6c47 100644 --- a/bam_sort.c +++ b/bam_sort.c @@ -418,6 +418,31 @@ void bam_sort_core(int is_by_qname, const char *fn, const char *prefix, size_t m bam_sort_core_ext(is_by_qname, fn, prefix, max_mem, 0); } + +size_t bam_sort_get_max_mem(char *max_mem_string) +{ + char c; + size_t max_mem; + size_t multiplier=1; + c=max_mem_string[strlen(max_mem_string)-1]; + switch(c) { + case 'G': + multiplier*=1024; + case 'M': + multiplier*=1024; + case 'K': + multiplier*=1024; + case 'B': + max_mem_string[strlen(max_mem_string)-1]='\0'; + break; + default: + break; + } + max_mem = multiplier * atol(max_mem_string); + // max_mem should be checked that it was not zero after atol! + return max_mem; +} + int bam_sort(int argc, char *argv[]) { size_t max_mem = 500000000; @@ -426,7 +451,7 @@ int bam_sort(int argc, char *argv[]) switch (c) { case 'o': is_stdout = 1; break; case 'n': is_by_qname = 1; break; - case 'm': max_mem = atol(optarg); break; + case 'm': max_mem = bam_sort_get_max_mem(optarg); break; } } if (optind + 2 > argc) { diff --git a/bam_tview.c b/bam_tview.c index 4eea955..1324815 100644 --- a/bam_tview.c +++ b/bam_tview.c @@ -114,7 +114,6 @@ int tv_pl_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void if (!p->is_del) { if (tv->base_for == TV_BASE_COLOR_SPACE && (c = bam_aux_getCSi(p->b, p->qpos))) { - c = bam_aux_getCSi(p->b, p->qpos); // assume that if we found one color, we will be able to get the color error if (tv->is_dot && '-' == bam_aux_getCEi(p->b, p->qpos)) c = bam1_strand(p->b)? ',' : '.'; } else { @@ -304,7 +303,7 @@ static void tv_win_goto(tview_t *tv, int *tid, int *pos) int c = wgetch(tv->wgoto); wrefresh(tv->wgoto); if (c == KEY_BACKSPACE || c == '\010' || c == '\177') { - --l; + if(l > 0) --l; } else if (c == KEY_ENTER || c == '\012' || c == '\015') { int _tid = -1, _beg, _end; if (str[0] == '=') { diff --git a/kstring.h b/kstring.h index 89a7920..d490f58 100644 --- a/kstring.h +++ b/kstring.h @@ -112,7 +112,8 @@ static inline int kputw(int c, kstring_t *s) char buf[16]; int l, x; if (c == 0) return kputc('0', s); - for (l = 0, x = c < 0? -c : c; x > 0; x /= 10) buf[l++] = x%10 + '0'; + if(c < 0) for (l = 0, x = c; x < 0; x /= 10) buf[l++] = '0' - (x%10); + else for (l = 0, x = c; x > 0; x /= 10) buf[l++] = x%10 + '0'; if (c < 0) buf[l++] = '-'; if (s->l + l + 1 >= s->m) { s->m = s->l + l + 2; diff --git a/sam.c b/sam.c index f026bc8..ba3158f 100644 --- a/sam.c +++ b/sam.c @@ -79,7 +79,7 @@ samfile_t *samopen(const char *fn, const char *mode, const void *aux) } else { // text // open file fp->x.tamw = strcmp(fn, "-")? fopen(fn, "w") : stdout; - if (fp->x.tamr == 0) goto open_err_ret; + if (fp->x.tamw == 0) goto open_err_ret; if (strchr(mode, 'X')) fp->type |= BAM_OFSTR<<2; else if (strchr(mode, 'x')) fp->type |= BAM_OFHEX<<2; else fp->type |= BAM_OFDEC<<2; diff --git a/sam_header.c b/sam_header.c index f4c8a3b..d348d10 100644 --- a/sam_header.c +++ b/sam_header.c @@ -434,8 +434,14 @@ static int sam_header_line_validate(HeaderLine *hline) tag = tags->data; if ( !tag_exists(tag->key,required_tags[itype]) && !tag_exists(tag->key,optional_tags[itype]) ) { - debug("Unknown tag [%c%c] for [%c%c].\n", tag->key[0],tag->key[1], hline->type[0],hline->type[1]); - return 0; + // Lower case tags are user-defined values. + if( !(islower(tag->key[0]) || islower(tag->key[1])) ) + { + // Neither is lower case, but tag was not recognized. + debug("Unknown tag [%c%c] for [%c%c].\n", tag->key[0],tag->key[1], hline->type[0],hline->type[1]); + // return 0; // Even unknown tags are allowed - for forward compatibility with new attributes + } + // else - allow user defined tag } tags = tags->next; } -- 2.39.2