works

[samtools.git] / bgzf.c
diff --git a/bgzf.c b/bgzf.c

index 281aac0d932191b0faa99361772fefdfce5e094b..880d5afcbabee58a68fa8a998e68bc5509c6462f 100644 (file)
--- a/bgzf.c
+++ b/bgzf.c
@@ -267,7 +267,7 @@ static int load_block_from_cache(BGZF *fp, int64_t block_address)
         if (fp->block_length != 0) fp->block_offset = 0;
         fp->block_address = block_address;
         fp->block_length = p->size;
-       memcpy(fp->uncompressed_block, p->block, BGZF_BLOCK_SIZE);
+       memcpy(fp->uncompressed_block, p->block, BGZF_MAX_BLOCK_SIZE);
         _bgzf_seek((_bgzf_file_t)fp->fp, p->end_offset, SEEK_SET);
         return p->size;
  }
@@ -278,8 +278,8 @@ static void cache_block(BGZF *fp, int size)
         khint_t k;
         cache_t *p;
         khash_t(cache) *h = (khash_t(cache)*)fp->cache;
-       if (BGZF_BLOCK_SIZE >= fp->cache_size) return;
-       if ((kh_size(h) + 1) * BGZF_BLOCK_SIZE > fp->cache_size) {
+       if (BGZF_MAX_BLOCK_SIZE >= fp->cache_size) return;
+       if ((kh_size(h) + 1) * BGZF_MAX_BLOCK_SIZE > fp->cache_size) {
                 /* A better way would be to remove the oldest block in the
                  * cache, but here we remove a random one for simplicity. This
                  * should not have a big impact on performance. */
@@ -295,8 +295,8 @@ static void cache_block(BGZF *fp, int size)
         p = &kh_val(h, k);
         p->size = fp->block_length;
         p->end_offset = fp->block_address + size;
-       p->block = malloc(BGZF_BLOCK_SIZE);
-       memcpy(kh_val(h, k).block, fp->uncompressed_block, BGZF_BLOCK_SIZE);
+       p->block = malloc(BGZF_MAX_BLOCK_SIZE);
+       memcpy(kh_val(h, k).block, fp->uncompressed_block, BGZF_MAX_BLOCK_SIZE);
  }
  #else
  static void free_cache(BGZF *fp) {}
@@ -373,22 +373,54 @@ typedef struct {
         BGZF *fp;
         struct mtaux_t *mt;
         void *buf;
-       int i, errcode;
+       int i, errcode, toproc;
  } worker_t;
  
  typedef struct mtaux_t {
-       int n_threads, n_blks, curr;
+       int n_threads, n_blks, curr, done;
+       volatile int proc_cnt;
         void **blk;
         int *len;
-       pthread_t *tid;
-       pthread_attr_t attr;
         worker_t *w;
+       pthread_t *tid;
+       pthread_mutex_t lock;
+       pthread_cond_t cv;
  } mtaux_t;
  
+static int worker_aux(worker_t *w)
+{
+       int i, tmp, stop = 0;
+       // wait for condition: to process or all done
+       pthread_mutex_lock(&w->mt->lock);
+       while (!w->toproc && !w->mt->done)
+               pthread_cond_wait(&w->mt->cv, &w->mt->lock);
+       if (w->mt->done) stop = 1;
+       w->toproc = 0;
+       pthread_mutex_unlock(&w->mt->lock);
+       if (stop) return 1; // to quit the thread
+       w->errcode = 0;
+       for (i = w->i; i < w->mt->curr; i += w->mt->n_threads) {
+               int clen = BGZF_MAX_BLOCK_SIZE;
+               if (bgzf_compress(w->buf, &clen, w->mt->blk[i], w->mt->len[i], w->fp->compress_level) != 0)
+                       w->errcode |= BGZF_ERR_ZLIB;
+               memcpy(w->mt->blk[i], w->buf, clen);
+               w->mt->len[i] = clen;
+       }
+       tmp = __sync_fetch_and_add(&w->mt->proc_cnt, 1);
+       return 0;
+}
+
+static void *mt_worker(void *data)
+{
+       while (worker_aux(data) == 0);
+       return 0;
+}
+
  int bgzf_mt(BGZF *fp, int n_threads, int n_sub_blks)
  {
         int i;
         mtaux_t *mt;
+       pthread_attr_t attr;
         if (!fp->is_write || fp->mt || n_threads <= 1) return -1;
         mt = calloc(1, sizeof(mtaux_t));
         mt->n_threads = n_threads;
@@ -397,7 +429,7 @@ int bgzf_mt(BGZF *fp, int n_threads, int n_sub_blks)
         mt->blk = calloc(mt->n_blks, sizeof(void*));
         for (i = 0; i < mt->n_blks; ++i)
                 mt->blk[i] = malloc(BGZF_MAX_BLOCK_SIZE);
-       mt->tid = calloc(mt->n_threads, sizeof(pthread_t));
+       mt->tid = calloc(mt->n_threads, sizeof(pthread_t)); // tid[0] is not used, as the worker 0 is launched by the master
         mt->w = calloc(mt->n_threads, sizeof(worker_t));
         for (i = 0; i < mt->n_threads; ++i) {
                 mt->w[i].i = i;
@@ -405,8 +437,12 @@ int bgzf_mt(BGZF *fp, int n_threads, int n_sub_blks)
                 mt->w[i].fp = fp;
                 mt->w[i].buf = malloc(BGZF_MAX_BLOCK_SIZE);
         }
-       pthread_attr_init(&mt->attr);
-       pthread_attr_setdetachstate(&mt->attr, PTHREAD_CREATE_JOINABLE);
+       pthread_attr_init(&attr);
+       pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
+       pthread_mutex_init(&mt->lock, 0);
+       pthread_cond_init(&mt->cv, 0);
+       for (i = 1; i < mt->n_threads; ++i) // worker 0 is effectively launched by the master thread
+               pthread_create(&mt->tid[i], &attr, mt_worker, &mt->w[i]);
         fp->mt = mt;
         return 0;
  }
@@ -414,27 +450,21 @@ int bgzf_mt(BGZF *fp, int n_threads, int n_sub_blks)
  static void mt_destroy(mtaux_t *mt)
  {
         int i;
+       // signal all workers to quit
+       pthread_mutex_lock(&mt->lock);
+       mt->done = 1; mt->proc_cnt = 0;
+       pthread_cond_broadcast(&mt->cv);
+       pthread_mutex_unlock(&mt->lock);
+       for (i = 1; i < mt->n_threads; ++i) pthread_join(mt->tid[i], 0); // worker 0 is effectively launched by the master thread
+       // free other data allocated on heap
         for (i = 0; i < mt->n_blks; ++i) free(mt->blk[i]);
         for (i = 0; i < mt->n_threads; ++i) free(mt->w[i].buf);
         free(mt->blk); free(mt->len); free(mt->w); free(mt->tid);
+       pthread_cond_destroy(&mt->cv);
+       pthread_mutex_destroy(&mt->lock);
         free(mt);
  }
  
-static void *mt_worker(void *data)
-{
-       int i;
-       worker_t *w = (worker_t*)data;
-       w->errcode = 0;
-       for (i = w->i; i < w->mt->curr; i += w->mt->n_threads) {
-               int clen = BGZF_MAX_BLOCK_SIZE;
-               if (bgzf_compress(w->buf, &clen, w->mt->blk[i], w->mt->len[i], w->fp->compress_level) != 0)
-                       w->errcode |= BGZF_ERR_ZLIB;
-               memcpy(w->mt->blk[i], w->buf, clen);
-               w->mt->len[i] = clen;
-       }
-       return 0;
-}
-
  static void mt_queue(BGZF *fp)
  {
         mtaux_t *mt = (mtaux_t*)fp->mt;
@@ -450,11 +480,18 @@ static int mt_flush(BGZF *fp)
         int i;
         mtaux_t *mt = (mtaux_t*)fp->mt;
         if (fp->block_offset) mt_queue(fp); // guaranteed that assertion does not fail
-       for (i = 0; i < mt->n_threads; ++i) pthread_create(&mt->tid[i], &mt->attr, mt_worker, &mt->w[i]);
-       for (i = 0; i < mt->n_threads; ++i) {
-               pthread_join(mt->tid[i], 0);
-               fp->errcode |= mt->w[i].errcode;
-       }
+       // signal all the workers to compress
+       pthread_mutex_lock(&mt->lock);
+       for (i = 0; i < mt->n_threads; ++i) mt->w[i].toproc = 1;
+       mt->proc_cnt = 0;
+       pthread_cond_broadcast(&mt->cv);
+       pthread_mutex_unlock(&mt->lock);
+       // worker 0 is doing things here
+       worker_aux(&mt->w[0]);
+       // wait for all the threads to complete
+       while (mt->proc_cnt < mt->n_threads);
+       // dump data to disk
+       for (i = 0; i < mt->n_threads; ++i) fp->errcode |= mt->w[i].errcode;
         for (i = 0; i < mt->curr; ++i)
                 if (fwrite(mt->blk[i], 1, mt->len[i], fp->fp) != mt->len[i])
                         fp->errcode |= BGZF_ERR_IO;
@@ -465,7 +502,7 @@ static int mt_flush(BGZF *fp)
  static int mt_lazy_flush(BGZF *fp)
  {
         mtaux_t *mt = (mtaux_t*)fp->mt;
-       mt_queue(fp);
+       if (fp->block_offset) mt_queue(fp);
         if (mt->curr == mt->n_blks)
                 return mt_flush(fp);
         return -1;