From: Heng Li Date: Sun, 18 Mar 2012 15:43:30 +0000 (-0400) Subject: multi-threading working on toy examples X-Git-Url: https://git.donarmstrong.com/?p=samtools.git;a=commitdiff_plain;h=d46afb500a437323dda82920f9fb18324b2dbec0 multi-threading working on toy examples --- diff --git a/Makefile b/Makefile index 4699878..0d3eb4c 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ CC= gcc CFLAGS= -g -Wall -O2 #LDFLAGS= -Wl,-rpath,\$$ORIGIN/../lib -DFLAGS= -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE -D_USE_KNETFILE -DBGZF_CACHE -D_CURSES_LIB=1 +DFLAGS= -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE -D_USE_KNETFILE -D_CURSES_LIB=1 KNETFILE_O= knetfile.o LOBJS= bgzf.o kstring.o bam_aux.o bam.o bam_import.o sam.o bam_index.o \ bam_pileup.o bam_lpileup.o bam_md.o razf.o faidx.o bedidx.o \ @@ -43,13 +43,16 @@ libbam.a:$(LOBJS) $(AR) -csru $@ $(LOBJS) samtools:lib-recur $(AOBJS) - $(CC) $(CFLAGS) -o $@ $(AOBJS) $(LDFLAGS) libbam.a -Lbcftools -lbcf $(LIBPATH) $(LIBCURSES) -lm -lz + $(CC) $(CFLAGS) -o $@ $(AOBJS) $(LDFLAGS) libbam.a -Lbcftools -lbcf $(LIBPATH) $(LIBCURSES) -lm -lz -lpthread razip:razip.o razf.o $(KNETFILE_O) $(CC) $(CFLAGS) -o $@ razf.o razip.o $(KNETFILE_O) -lz bgzip:bgzip.o bgzf.o $(KNETFILE_O) - $(CC) $(CFLAGS) -o $@ bgzf.o bgzip.o $(KNETFILE_O) -lz + $(CC) $(CFLAGS) -o $@ bgzf.o bgzip.o $(KNETFILE_O) -lz -lpthread + +bgzf.o:bgzf.h + $(CC) -c $(CFLAGS) $(DFLAGS) -DBGZF_CACHE $(INCLUDES) bgzf.c -o $@ razip.o:razf.h bam.o:bam.h razf.h bam_endian.h kstring.h sam_header.h diff --git a/bcftools/Makefile b/bcftools/Makefile index 9b6f863..be831de 100644 --- a/bcftools/Makefile +++ b/bcftools/Makefile @@ -31,7 +31,7 @@ libbcf.a:$(LOBJS) $(AR) -csru $@ $(LOBJS) bcftools:lib $(AOBJS) - $(CC) $(CFLAGS) -o $@ $(AOBJS) -L. $(LIBPATH) -lbcf -lm -lz + $(CC) $(CFLAGS) -o $@ $(AOBJS) -L. $(LIBPATH) -lbcf -lm -lz -lpthread bcf.o:bcf.h vcf.o:bcf.h diff --git a/bgzf.c b/bgzf.c index 566a1e3..281aac0 100644 --- a/bgzf.c +++ b/bgzf.c @@ -219,7 +219,7 @@ static int inflate_block(BGZF* fp, int block_length) zs.next_in = fp->compressed_block + 18; zs.avail_in = block_length - 16; zs.next_out = fp->uncompressed_block; - zs.avail_out = BGZF_BLOCK_SIZE; + zs.avail_out = BGZF_MAX_BLOCK_SIZE; if (inflateInit2(&zs, -15) != Z_OK) { fp->errcode |= BGZF_ERR_ZLIB; @@ -310,7 +310,7 @@ int bgzf_read_block(BGZF *fp) int count, size = 0, block_length, remaining; int64_t block_address; block_address = _bgzf_tell((_bgzf_file_t)fp->fp); - if (load_block_from_cache(fp, block_address)) return 0; + if (fp->cache_size && load_block_from_cache(fp, block_address)) return 0; count = _bgzf_read(fp->fp, header, sizeof(header)); if (count == 0) { // no data read fp->block_length = 0; @@ -385,14 +385,15 @@ typedef struct mtaux_t { worker_t *w; } mtaux_t; -void bgzf_mt(BGZF *fp, int n_threads, int n_sub_blks) +int bgzf_mt(BGZF *fp, int n_threads, int n_sub_blks) { int i; mtaux_t *mt; - if (!fp->is_write || fp->mt || n_threads <= 1) return; + if (!fp->is_write || fp->mt || n_threads <= 1) return -1; mt = calloc(1, sizeof(mtaux_t)); mt->n_threads = n_threads; mt->n_blks = n_threads * n_sub_blks; + mt->len = calloc(mt->n_blks, sizeof(int)); mt->blk = calloc(mt->n_blks, sizeof(void*)); for (i = 0; i < mt->n_blks; ++i) mt->blk[i] = malloc(BGZF_MAX_BLOCK_SIZE); @@ -407,6 +408,7 @@ void bgzf_mt(BGZF *fp, int n_threads, int n_sub_blks) pthread_attr_init(&mt->attr); pthread_attr_setdetachstate(&mt->attr, PTHREAD_CREATE_JOINABLE); fp->mt = mt; + return 0; } static void mt_destroy(mtaux_t *mt) @@ -433,11 +435,21 @@ static void *mt_worker(void *data) return 0; } +static void mt_queue(BGZF *fp) +{ + mtaux_t *mt = (mtaux_t*)fp->mt; + assert(mt->curr < mt->n_blks); // guaranteed by the caller + memcpy(mt->blk[mt->curr], fp->uncompressed_block, fp->block_offset); + mt->len[mt->curr] = fp->block_offset; + fp->block_offset = 0; + ++mt->curr; +} + static int mt_flush(BGZF *fp) { int i; mtaux_t *mt = (mtaux_t*)fp->mt; - if (mt->curr == 0) return 0; + if (fp->block_offset) mt_queue(fp); // guaranteed that assertion does not fail for (i = 0; i < mt->n_threads; ++i) pthread_create(&mt->tid[i], &mt->attr, mt_worker, &mt->w[i]); for (i = 0; i < mt->n_threads; ++i) { pthread_join(mt->tid[i], 0); @@ -450,14 +462,13 @@ static int mt_flush(BGZF *fp) return 0; } -static int mt_push_blk(BGZF *fp) +static int mt_lazy_flush(BGZF *fp) { mtaux_t *mt = (mtaux_t*)fp->mt; - memcpy(mt->blk[mt->curr], fp->uncompressed_block, fp->block_offset); - mt->len[mt->curr] = fp->block_offset; - fp->block_offset = 0; - if (++mt->curr == mt->n_blks) mt_flush(fp); - return 0; + mt_queue(fp); + if (mt->curr == mt->n_blks) + return mt_flush(fp); + return -1; } static ssize_t mt_write(BGZF *fp, const void *data, ssize_t length) @@ -468,7 +479,7 @@ static ssize_t mt_write(BGZF *fp, const void *data, ssize_t length) int copy_length = BGZF_BLOCK_SIZE - fp->block_offset < rest? BGZF_BLOCK_SIZE - fp->block_offset : rest; memcpy(fp->uncompressed_block + fp->block_offset, input, copy_length); fp->block_offset += copy_length; input += copy_length; rest -= copy_length; - if (fp->block_offset == BGZF_BLOCK_SIZE) mt_push_blk(fp); + if (fp->block_offset == BGZF_BLOCK_SIZE) mt_lazy_flush(fp); } return length - rest; } @@ -495,7 +506,7 @@ int bgzf_flush(BGZF *fp) int bgzf_flush_try(BGZF *fp, ssize_t size) { if (fp->block_offset + size > BGZF_BLOCK_SIZE) { - if (fp->mt) return mt_push_blk(fp); + if (fp->mt) return mt_lazy_flush(fp); else return bgzf_flush(fp); } return -1; diff --git a/bgzf.h b/bgzf.h index 7d928a7..2a70bb9 100644 --- a/bgzf.h +++ b/bgzf.h @@ -1,7 +1,7 @@ /* The MIT License Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology - 2011 Attractive Chaos + 2011, 2012 Attractive Chaos Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -32,7 +32,7 @@ #include #include -#define BGZF_BLOCK_SIZE 0xff00 +#define BGZF_BLOCK_SIZE 0xff00 // make sure compressBound(BGZF_BLOCK_SIZE) < BGZF_MAX_BLOCK_SIZE #define BGZF_MAX_BLOCK_SIZE 0x10000 #define BGZF_ERR_ZLIB 1 @@ -190,7 +190,7 @@ extern "C" { */ int bgzf_read_block(BGZF *fp); - void bgzf_mt(BGZF *fp, int n_threads, int n_sub_blks); + int bgzf_mt(BGZF *fp, int n_threads, int n_sub_blks); #ifdef __cplusplus } diff --git a/sam.c b/sam.c index ba3158f..fa11df6 100644 --- a/sam.c +++ b/sam.c @@ -36,6 +36,13 @@ static void append_header_text(bam_header_t *header, char* text, int len) header->text[header->l_text] = 0; } +int samthreads(samfile_t *fp, int n_threads, int n_sub_blks) +{ + if (!(fp->type&1) || (fp->type&2)) return -1; + bgzf_mt(fp->x.bam, n_threads, n_sub_blks); + return 0; +} + samfile_t *samopen(const char *fn, const char *mode, const void *aux) { samfile_t *fp; diff --git a/sam.h b/sam.h index 0b87194..0495501 100644 --- a/sam.h +++ b/sam.h @@ -90,6 +90,7 @@ extern "C" { int sampileup(samfile_t *fp, int mask, bam_pileup_f func, void *data); char *samfaipath(const char *fn_ref); + int samthreads(samfile_t *fp, int n_threads, int n_sub_blks); #ifdef __cplusplus } diff --git a/sam_view.c b/sam_view.c index 18b1282..b9bde44 100644 --- a/sam_view.c +++ b/sam_view.c @@ -119,14 +119,14 @@ static int usage(int is_long_help); int main_samview(int argc, char *argv[]) { int c, is_header = 0, is_header_only = 0, is_bamin = 1, ret = 0, compress_level = -1, is_bamout = 0, is_count = 0; - int of_type = BAM_OFDEC, is_long_help = 0; + int of_type = BAM_OFDEC, is_long_help = 0, n_threads = 0; int count = 0; samfile_t *in = 0, *out = 0; char in_mode[5], out_mode[5], *fn_out = 0, *fn_list = 0, *fn_ref = 0, *fn_rg = 0; /* parse command-line options */ strcpy(in_mode, "r"); strcpy(out_mode, "w"); - while ((c = getopt(argc, argv, "SbBct:h1Ho:q:f:F:ul:r:xX?T:R:L:s:Q:")) >= 0) { + while ((c = getopt(argc, argv, "SbBct:h1Ho:q:f:F:ul:r:xX?T:R:L:s:Q:@:")) >= 0) { switch (c) { case 's': g_subsam = atof(optarg); break; case 'c': is_count = 1; break; @@ -151,6 +151,7 @@ int main_samview(int argc, char *argv[]) case 'T': fn_ref = strdup(optarg); is_bamin = 0; break; case 'B': bam_no_B = 1; break; case 'Q': g_qual_scale = atoi(optarg); break; + case '@': n_threads = strtol(optarg, 0, 0); break; default: return usage(is_long_help); } } @@ -208,6 +209,7 @@ int main_samview(int argc, char *argv[]) ret = 1; goto view_end; } + if (n_threads > 1) samthreads(out, n_threads, 16); if (is_header_only) goto view_end; // no need to print alignments if (argc == optind + 1) { // convert/print the entire file