From 4e0a2f896484bc0792e81b7036a17a3f4b15d2dd Mon Sep 17 00:00:00 2001 From: Heng Li Date: Sat, 17 Mar 2012 23:16:54 -0400 Subject: [PATCH] updated with the latest bgzf library --- Makefile | 2 +- bam_cat.c | 9 +- bam_reheader.c | 7 +- bcftools/bcf.c | 3 - bcftools/main.c | 11 +- bgzf.c | 899 +++++++++++++++++++----------------------------- bgzf.h | 262 ++++++++------ 7 files changed, 526 insertions(+), 667 deletions(-) diff --git a/Makefile b/Makefile index fdcddd6..4699878 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ CC= gcc CFLAGS= -g -Wall -O2 #LDFLAGS= -Wl,-rpath,\$$ORIGIN/../lib -DFLAGS= -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE -D_USE_KNETFILE -D_CURSES_LIB=1 +DFLAGS= -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE -D_USE_KNETFILE -DBGZF_CACHE -D_CURSES_LIB=1 KNETFILE_O= knetfile.o LOBJS= bgzf.o kstring.o bam_aux.o bam.o bam_import.o sam.o bam_index.o \ bam_pileup.o bam_lpileup.o bam_md.o razf.o faidx.o bedidx.o \ diff --git a/bam_cat.c b/bam_cat.c index 0fde045..a7502b9 100644 --- a/bam_cat.c +++ b/bam_cat.c @@ -59,6 +59,7 @@ all:bam_cat #include #include +#include "knetfile.h" #include "bgzf.h" #include "bam.h" @@ -97,7 +98,7 @@ int bam_cat(int nfn, char * const *fn, const bam_header_t *h, const char* outbam fprintf(stderr, "[%s] ERROR: fail to open file '%s'.\n", __func__, fn[i]); return -1; } - if (in->open_mode != 'r') return -1; + if (in->is_write) return -1; old = bam_header_read(in); if (h == 0 && i == 0) bam_header_write(fp, old); @@ -109,10 +110,10 @@ int bam_cat(int nfn, char * const *fn, const bam_header_t *h, const char* outbam j=0; #ifdef _USE_KNETFILE - fp_file=fp->x.fpw; - while ((len = knet_read(in->x.fpr, buf, BUF_SIZE)) > 0) { + fp_file = fp->fp; + while ((len = knet_read(in->fp, buf, BUF_SIZE)) > 0) { #else - fp_file=fp->file; + fp_file = fp->fp; while (!feof(in->file) && (len = fread(buf, 1, BUF_SIZE, in->file)) > 0) { #endif if(len #include +#include "knetfile.h" #include "bgzf.h" #include "bam.h" @@ -11,7 +12,7 @@ int bam_reheader(BGZF *in, const bam_header_t *h, int fd) bam_header_t *old; int len; uint8_t *buf; - if (in->open_mode != 'r') return -1; + if (in->is_write) return -1; buf = malloc(BUF_SIZE); old = bam_header_read(in); fp = bgzf_fdopen(fd, "w"); @@ -21,8 +22,8 @@ int bam_reheader(BGZF *in, const bam_header_t *h, int fd) bgzf_flush(fp); } #ifdef _USE_KNETFILE - while ((len = knet_read(in->x.fpr, buf, BUF_SIZE)) > 0) - fwrite(buf, 1, len, fp->x.fpw); + while ((len = knet_read(in->fp, buf, BUF_SIZE)) > 0) + fwrite(buf, 1, len, fp->fp); #else while (!feof(in->file) && (len = fread(buf, 1, BUF_SIZE, in->file)) > 0) fwrite(buf, 1, len, fp->file); diff --git a/bcftools/bcf.c b/bcftools/bcf.c index a538e6e..0524408 100644 --- a/bcftools/bcf.c +++ b/bcftools/bcf.c @@ -13,9 +13,6 @@ bcf_t *bcf_open(const char *fn, const char *mode) } else { b->fp = strcmp(fn, "-")? bgzf_open(fn, mode) : bgzf_fdopen(fileno(stdin), mode); } -#ifndef BCF_LITE - b->fp->owned_file = 1; -#endif return b; } diff --git a/bcftools/main.c b/bcftools/main.c index fcd94b8..eda6217 100644 --- a/bcftools/main.c +++ b/bcftools/main.c @@ -2,6 +2,7 @@ #include #include #include +#include "knetfile.h" #include "bcf.h" #include "kseq.h" @@ -29,12 +30,12 @@ int bcf_cat(int n, char * const *fn) if (i == 0) bcf_hdr_write(out, h); bcf_hdr_destroy(h); #ifdef _USE_KNETFILE - fstat(knet_fileno(in->fp->x.fpr), &s); + fstat(knet_fileno((knetFile*)in->fp->fp), &s); end = s.st_size - 28; - while (knet_tell(in->fp->x.fpr) < end) { - int size = knet_tell(in->fp->x.fpr) + BUF_SIZE < end? BUF_SIZE : end - knet_tell(in->fp->x.fpr); - knet_read(in->fp->x.fpr, buf, size); - fwrite(buf, 1, size, out->fp->x.fpw); + while (knet_tell((knetFile*)in->fp->fp) < end) { + int size = knet_tell((knetFile*)in->fp->fp) + BUF_SIZE < end? BUF_SIZE : end - knet_tell((knetFile*)in->fp->fp); + knet_read(in->fp->fp, buf, size); + fwrite(buf, 1, size, out->fp->fp); } #else abort(); // FIXME: not implemented diff --git a/bgzf.c b/bgzf.c index 216cd04..9badafb 100644 --- a/bgzf.c +++ b/bgzf.c @@ -1,6 +1,7 @@ /* The MIT License Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology + 2011 Attractive Chaos Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -21,400 +22,234 @@ THE SOFTWARE. */ -/* - 2009-06-29 by lh3: cache recent uncompressed blocks. - 2009-06-25 by lh3: optionally use my knetfile library to access file on a FTP. - 2009-06-12 by lh3: support a mode string like "wu" where 'u' for uncompressed output */ - #include #include #include #include -#include +#include #include -#include #include "bgzf.h" -#include "khash.h" +#ifdef _USE_KNETFILE +#include "knetfile.h" +typedef knetFile *_bgzf_file_t; +#define _bgzf_open(fn, mode) knet_open(fn, mode) +#define _bgzf_dopen(fp, mode) knet_dopen(fp, mode) +#define _bgzf_close(fp) knet_close(fp) +#define _bgzf_fileno(fp) ((fp)->fd) +#define _bgzf_tell(fp) knet_tell(fp) +#define _bgzf_seek(fp, offset, whence) knet_seek(fp, offset, whence) +#define _bgzf_read(fp, buf, len) knet_read(fp, buf, len) +#define _bgzf_write(fp, buf, len) knet_write(fp, buf, len) +#else // ~defined(_USE_KNETFILE) +#if defined(_WIN32) || defined(_MSC_VER) +#define ftello(fp) ftell(fp) +#define fseeko(fp, offset, whence) fseek(fp, offset, whence) +#else // ~defined(_WIN32) +extern off_t ftello(FILE *stream); +extern int fseeko(FILE *stream, off_t offset, int whence); +#endif // ~defined(_WIN32) +typedef FILE *_bgzf_file_t; +#define _bgzf_open(fn, mode) fopen(fn, mode) +#define _bgzf_dopen(fp, mode) fdopen(fp, mode) +#define _bgzf_close(fp) fclose(fp) +#define _bgzf_fileno(fp) fileno(fp) +#define _bgzf_tell(fp) ftello(fp) +#define _bgzf_seek(fp, offset, whence) fseeko(fp, offset, whence) +#define _bgzf_read(fp, buf, len) fread(buf, 1, len, fp) +#define _bgzf_write(fp, buf, len) fwrite(buf, 1, len, fp) +#endif // ~define(_USE_KNETFILE) + +#define BLOCK_HEADER_LENGTH 18 +#define BLOCK_FOOTER_LENGTH 8 + + +/* BGZF/GZIP header (speciallized from RFC 1952; little endian): + +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ + | 31|139| 8| 4| 0| 0|255| 6| 66| 67| 2|BLK_LEN| + +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ +*/ +static const uint8_t g_magic[19] = "\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\0\0"; + +#ifdef BGZF_CACHE typedef struct { int size; uint8_t *block; int64_t end_offset; } cache_t; +#include "khash.h" KHASH_MAP_INIT_INT64(cache, cache_t) - -#if defined(_WIN32) || defined(_MSC_VER) -#define ftello(fp) ftell(fp) -#define fseeko(fp, offset, whence) fseek(fp, offset, whence) -#else -extern off_t ftello(FILE *stream); -extern int fseeko(FILE *stream, off_t offset, int whence); #endif -typedef int8_t bgzf_byte_t; - -static const int DEFAULT_BLOCK_SIZE = 64 * 1024; -static const int MAX_BLOCK_SIZE = 64 * 1024; - -static const int BLOCK_HEADER_LENGTH = 18; -static const int BLOCK_FOOTER_LENGTH = 8; - -static const int GZIP_ID1 = 31; -static const int GZIP_ID2 = 139; -static const int CM_DEFLATE = 8; -static const int FLG_FEXTRA = 4; -static const int OS_UNKNOWN = 255; -static const int BGZF_ID1 = 66; // 'B' -static const int BGZF_ID2 = 67; // 'C' -static const int BGZF_LEN = 2; -static const int BGZF_XLEN = 6; // BGZF_LEN+4 - -static const int GZIP_WINDOW_BITS = -15; // no zlib header -static const int Z_DEFAULT_MEM_LEVEL = 8; - - -inline -void -packInt16(uint8_t* buffer, uint16_t value) -{ - buffer[0] = value; - buffer[1] = value >> 8; -} - -inline -int -unpackInt16(const uint8_t* buffer) +static inline void packInt16(uint8_t *buffer, uint16_t value) { - return (buffer[0] | (buffer[1] << 8)); + buffer[0] = value; + buffer[1] = value >> 8; } -inline -void -packInt32(uint8_t* buffer, uint32_t value) +static inline int unpackInt16(const uint8_t *buffer) { - buffer[0] = value; - buffer[1] = value >> 8; - buffer[2] = value >> 16; - buffer[3] = value >> 24; + return buffer[0] | buffer[1] << 8; } -static inline -int -bgzf_min(int x, int y) +static inline void packInt32(uint8_t *buffer, uint32_t value) { - return (x < y) ? x : y; + buffer[0] = value; + buffer[1] = value >> 8; + buffer[2] = value >> 16; + buffer[3] = value >> 24; } -static -void -report_error(BGZF* fp, const char* message) { - fp->error = message; -} - -int bgzf_check_bgzf(const char *fn) +static BGZF *bgzf_read_init() { - BGZF *fp; - uint8_t buf[10],magic[10]="\037\213\010\4\0\0\0\0\0\377"; - int n; - - if ((fp = bgzf_open(fn, "r")) == 0) - { - fprintf(stderr, "[bgzf_check_bgzf] failed to open the file: %s\n",fn); - return -1; - } - -#ifdef _USE_KNETFILE - n = knet_read(fp->x.fpr, buf, 10); -#else - n = fread(buf, 1, 10, fp->file); + BGZF *fp; + fp = calloc(1, sizeof(BGZF)); + fp->is_write = 0; + fp->uncompressed_block = malloc(BGZF_MAX_BLOCK_SIZE); + fp->compressed_block = malloc(BGZF_MAX_BLOCK_SIZE); +#ifdef BGZF_CACHE + fp->cache = kh_init(cache); #endif - bgzf_close(fp); - - if ( n!=10 ) - return -1; - - if ( !memcmp(magic, buf, 10) ) return 1; - return 0; + return fp; } -static BGZF *bgzf_read_init() +static BGZF *bgzf_write_init(int compress_level) // compress_level==-1 for the default level { BGZF *fp; fp = calloc(1, sizeof(BGZF)); - fp->uncompressed_block_size = MAX_BLOCK_SIZE; - fp->uncompressed_block = malloc(MAX_BLOCK_SIZE); - fp->compressed_block_size = MAX_BLOCK_SIZE; - fp->compressed_block = malloc(MAX_BLOCK_SIZE); - fp->cache_size = 0; - fp->cache = kh_init(cache); + fp->is_write = 1; + fp->uncompressed_block = malloc(BGZF_MAX_BLOCK_SIZE); + fp->compressed_block = malloc(BGZF_MAX_BLOCK_SIZE); + fp->compress_level = compress_level < 0? Z_DEFAULT_COMPRESSION : compress_level; // Z_DEFAULT_COMPRESSION==-1 + if (fp->compress_level > 9) fp->compress_level = Z_DEFAULT_COMPRESSION; return fp; } - -static -BGZF* -open_read(int fd) +// get the compress level from the mode string +static int mode2level(const char *__restrict mode) { -#ifdef _USE_KNETFILE - knetFile *file = knet_dopen(fd, "r"); -#else - FILE* file = fdopen(fd, "r"); -#endif - BGZF* fp; - if (file == 0) return 0; - fp = bgzf_read_init(); - fp->file_descriptor = fd; - fp->open_mode = 'r'; -#ifdef _USE_KNETFILE - fp->x.fpr = file; -#else - fp->file = file; -#endif - return fp; + int i, compress_level = -1; + for (i = 0; mode[i]; ++i) + if (mode[i] >= '0' && mode[i] <= '9') break; + if (mode[i]) compress_level = (int)mode[i] - '0'; + if (strchr(mode, 'u')) compress_level = 0; + return compress_level; } -static -BGZF* -open_write(int fd, int compress_level) // compress_level==-1 for the default level +BGZF *bgzf_open(const char *path, const char *mode) { - FILE* file = fdopen(fd, "w"); - BGZF* fp; - if (file == 0) return 0; - fp = malloc(sizeof(BGZF)); - fp->file_descriptor = fd; - fp->open_mode = 'w'; - fp->owned_file = 0; - fp->compress_level = compress_level < 0? Z_DEFAULT_COMPRESSION : compress_level; // Z_DEFAULT_COMPRESSION==-1 - if (fp->compress_level > 9) fp->compress_level = Z_DEFAULT_COMPRESSION; -#ifdef _USE_KNETFILE - fp->x.fpw = file; -#else - fp->file = file; -#endif - fp->uncompressed_block_size = DEFAULT_BLOCK_SIZE; - fp->uncompressed_block = NULL; - fp->compressed_block_size = MAX_BLOCK_SIZE; - fp->compressed_block = malloc(MAX_BLOCK_SIZE); - fp->block_address = 0; - fp->block_offset = 0; - fp->block_length = 0; - fp->error = NULL; - return fp; + BGZF *fp = 0; + assert(compressBound(BGZF_BLOCK_SIZE) < BGZF_MAX_BLOCK_SIZE); + if (strchr(mode, 'r') || strchr(mode, 'R')) { + _bgzf_file_t fpr; + if ((fpr = _bgzf_open(path, "r")) == 0) return 0; + fp = bgzf_read_init(); + fp->fp = fpr; + } else if (strchr(mode, 'w') || strchr(mode, 'W')) { + FILE *fpw; + if ((fpw = fopen(path, "w")) == 0) return 0; + fp = bgzf_write_init(mode2level(mode)); + fp->fp = fpw; + } + return fp; } -BGZF* -bgzf_open(const char* __restrict path, const char* __restrict mode) +BGZF *bgzf_dopen(int fd, const char *mode) { - BGZF* fp = NULL; - if (strchr(mode, 'r') || strchr(mode, 'R')) { /* The reading mode is preferred. */ -#ifdef _USE_KNETFILE - knetFile *file = knet_open(path, mode); - if (file == 0) return 0; + BGZF *fp = 0; + assert(compressBound(BGZF_BLOCK_SIZE) < BGZF_MAX_BLOCK_SIZE); + if (strchr(mode, 'r') || strchr(mode, 'R')) { + _bgzf_file_t fpr; + if ((fpr = _bgzf_dopen(fd, "r")) == 0) return 0; fp = bgzf_read_init(); - fp->file_descriptor = -1; - fp->open_mode = 'r'; - fp->x.fpr = file; -#else - int fd, oflag = O_RDONLY; -#ifdef _WIN32 - oflag |= O_BINARY; -#endif - fd = open(path, oflag); - if (fd == -1) return 0; - fp = open_read(fd); -#endif - } else if (strchr(mode, 'w') || strchr(mode, 'W')) { - int fd, compress_level = -1, oflag = O_WRONLY | O_CREAT | O_TRUNC; -#ifdef _WIN32 - oflag |= O_BINARY; -#endif - fd = open(path, oflag, 0666); - if (fd == -1) return 0; - { // set compress_level - int i; - for (i = 0; mode[i]; ++i) - if (mode[i] >= '0' && mode[i] <= '9') break; - if (mode[i]) compress_level = (int)mode[i] - '0'; - if (strchr(mode, 'u')) compress_level = 0; - } - fp = open_write(fd, compress_level); - } - if (fp != NULL) fp->owned_file = 1; - return fp; + fp->fp = fpr; + } else if (strchr(mode, 'w') || strchr(mode, 'W')) { + FILE *fpw; + if ((fpw = fdopen(fd, "w")) == 0) return 0; + fp = bgzf_write_init(mode2level(mode)); + fp->fp = fpw; + } + return fp; } -BGZF* -bgzf_fdopen(int fd, const char * __restrict mode) +static int bgzf_compress(void *_dst, int *dlen, void *src, int slen, int level) { - if (fd == -1) return 0; - if (mode[0] == 'r' || mode[0] == 'R') { - return open_read(fd); - } else if (mode[0] == 'w' || mode[0] == 'W') { - int i, compress_level = -1; - for (i = 0; mode[i]; ++i) - if (mode[i] >= '0' && mode[i] <= '9') break; - if (mode[i]) compress_level = (int)mode[i] - '0'; - if (strchr(mode, 'u')) compress_level = 0; - return open_write(fd, compress_level); - } else { - return NULL; - } + uint32_t crc; + z_stream zs; + uint8_t *dst = (uint8_t*)_dst; + + // compress the body + zs.zalloc = NULL; zs.zfree = NULL; + zs.next_in = src; + zs.avail_in = slen; + zs.next_out = dst + BLOCK_HEADER_LENGTH; + zs.avail_out = *dlen - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH; + if (deflateInit2(&zs, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY) != Z_OK) return -1; // -15 to disable zlib header/footer + if (deflate(&zs, Z_FINISH) != Z_STREAM_END) return -1; + if (deflateEnd(&zs) != Z_OK) return -1; + *dlen = zs.total_out + BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH; + // write the header + memcpy(dst, g_magic, BLOCK_HEADER_LENGTH); // the last two bytes are a place holder for the length of the block + packInt16(&dst[16], *dlen - 1); // write the compressed length; -1 to fit 2 bytes + // write the footer + crc = crc32(crc32(0L, NULL, 0L), src, slen); + packInt32((uint8_t*)&dst[*dlen - 8], crc); + packInt32((uint8_t*)&dst[*dlen - 4], slen); + return 0; } -static -int -deflate_block(BGZF* fp, int block_length) +// Deflate the block in fp->uncompressed_block into fp->compressed_block. Also adds an extra field that stores the compressed block length. +static int deflate_block(BGZF *fp, int block_length) { - // Deflate the block in fp->uncompressed_block into fp->compressed_block. - // Also adds an extra field that stores the compressed block length. - - bgzf_byte_t* buffer = fp->compressed_block; - int buffer_size = fp->compressed_block_size; - - // Init gzip header - buffer[0] = GZIP_ID1; - buffer[1] = GZIP_ID2; - buffer[2] = CM_DEFLATE; - buffer[3] = FLG_FEXTRA; - buffer[4] = 0; // mtime - buffer[5] = 0; - buffer[6] = 0; - buffer[7] = 0; - buffer[8] = 0; - buffer[9] = OS_UNKNOWN; - buffer[10] = BGZF_XLEN; - buffer[11] = 0; - buffer[12] = BGZF_ID1; - buffer[13] = BGZF_ID2; - buffer[14] = BGZF_LEN; - buffer[15] = 0; - buffer[16] = 0; // placeholder for block length - buffer[17] = 0; - - // loop to retry for blocks that do not compress enough - int input_length = block_length; - int compressed_length = 0; - while (1) { - z_stream zs; - zs.zalloc = NULL; - zs.zfree = NULL; - zs.next_in = fp->uncompressed_block; - zs.avail_in = input_length; - zs.next_out = (void*)&buffer[BLOCK_HEADER_LENGTH]; - zs.avail_out = buffer_size - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH; - - int status = deflateInit2(&zs, fp->compress_level, Z_DEFLATED, - GZIP_WINDOW_BITS, Z_DEFAULT_MEM_LEVEL, Z_DEFAULT_STRATEGY); - if (status != Z_OK) { - report_error(fp, "deflate init failed"); - return -1; - } - status = deflate(&zs, Z_FINISH); - if (status != Z_STREAM_END) { - deflateEnd(&zs); - if (status == Z_OK) { - // Not enough space in buffer. - // Can happen in the rare case the input doesn't compress enough. - // Reduce the amount of input until it fits. - input_length -= 1024; - if (input_length <= 0) { - // should never happen - report_error(fp, "input reduction failed"); - return -1; - } - continue; - } - report_error(fp, "deflate failed"); - return -1; - } - status = deflateEnd(&zs); - if (status != Z_OK) { - report_error(fp, "deflate end failed"); - return -1; - } - compressed_length = zs.total_out; - compressed_length += BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH; - if (compressed_length > MAX_BLOCK_SIZE) { - // should never happen - report_error(fp, "deflate overflow"); - return -1; - } - break; - } - - packInt16((uint8_t*)&buffer[16], compressed_length-1); - uint32_t crc = crc32(0L, NULL, 0L); - crc = crc32(crc, fp->uncompressed_block, input_length); - packInt32((uint8_t*)&buffer[compressed_length-8], crc); - packInt32((uint8_t*)&buffer[compressed_length-4], input_length); - - int remaining = block_length - input_length; - if (remaining > 0) { - if (remaining > input_length) { - // should never happen (check so we can use memcpy) - report_error(fp, "remainder too large"); - return -1; - } - memcpy(fp->uncompressed_block, - fp->uncompressed_block + input_length, - remaining); - } - fp->block_offset = remaining; - return compressed_length; + int comp_size = BGZF_MAX_BLOCK_SIZE; + if (bgzf_compress(fp->compressed_block, &comp_size, fp->uncompressed_block, block_length, fp->compress_level) != 0) { + fp->errcode |= BGZF_ERR_ZLIB; + return -1; + } + fp->block_offset = 0; + return comp_size; } -static -int -inflate_block(BGZF* fp, int block_length) +// Inflate the block in fp->compressed_block into fp->uncompressed_block +static int inflate_block(BGZF* fp, int block_length) { - // Inflate the block in fp->compressed_block into fp->uncompressed_block - - z_stream zs; - int status; - zs.zalloc = NULL; - zs.zfree = NULL; - zs.next_in = fp->compressed_block + 18; - zs.avail_in = block_length - 16; - zs.next_out = fp->uncompressed_block; - zs.avail_out = fp->uncompressed_block_size; - - status = inflateInit2(&zs, GZIP_WINDOW_BITS); - if (status != Z_OK) { - report_error(fp, "inflate init failed"); - return -1; - } - status = inflate(&zs, Z_FINISH); - if (status != Z_STREAM_END) { - inflateEnd(&zs); - report_error(fp, "inflate failed"); - return -1; - } - status = inflateEnd(&zs); - if (status != Z_OK) { - report_error(fp, "inflate failed"); - return -1; - } - return zs.total_out; + z_stream zs; + zs.zalloc = NULL; + zs.zfree = NULL; + zs.next_in = fp->compressed_block + 18; + zs.avail_in = block_length - 16; + zs.next_out = fp->uncompressed_block; + zs.avail_out = BGZF_BLOCK_SIZE; + + if (inflateInit2(&zs, -15) != Z_OK) { + fp->errcode |= BGZF_ERR_ZLIB; + return -1; + } + if (inflate(&zs, Z_FINISH) != Z_STREAM_END) { + inflateEnd(&zs); + fp->errcode |= BGZF_ERR_ZLIB; + return -1; + } + if (inflateEnd(&zs) != Z_OK) { + fp->errcode |= BGZF_ERR_ZLIB; + return -1; + } + return zs.total_out; } -static -int -check_header(const bgzf_byte_t* header) +static int check_header(const uint8_t *header) { - return (header[0] == GZIP_ID1 && - header[1] == (bgzf_byte_t) GZIP_ID2 && - header[2] == Z_DEFLATED && - (header[3] & FLG_FEXTRA) != 0 && - unpackInt16((uint8_t*)&header[10]) == BGZF_XLEN && - header[12] == BGZF_ID1 && - header[13] == BGZF_ID2 && - unpackInt16((uint8_t*)&header[14]) == BGZF_LEN); + return (header[0] == 31 && header[1] == 139 && header[2] == 8 && (header[3] & 4) != 0 + && unpackInt16((uint8_t*)&header[10]) == 6 + && header[12] == 'B' && header[13] == 'C' + && unpackInt16((uint8_t*)&header[14]) == 2); } +#ifdef BGZF_CACHE static void free_cache(BGZF *fp) { khint_t k; khash_t(cache) *h = (khash_t(cache)*)fp->cache; - if (fp->open_mode != 'r') return; + if (fp->is_write) return; for (k = kh_begin(h); k < kh_end(h); ++k) if (kh_exist(h, k)) free(kh_val(h, k).block); kh_destroy(cache, h); @@ -431,12 +266,8 @@ static int load_block_from_cache(BGZF *fp, int64_t block_address) if (fp->block_length != 0) fp->block_offset = 0; fp->block_address = block_address; fp->block_length = p->size; - memcpy(fp->uncompressed_block, p->block, MAX_BLOCK_SIZE); -#ifdef _USE_KNETFILE - knet_seek(fp->x.fpr, p->end_offset, SEEK_SET); -#else - fseeko(fp->file, p->end_offset, SEEK_SET); -#endif + memcpy(fp->uncompressed_block, p->block, BGZF_BLOCK_SIZE); + _bgzf_seek((_bgzf_file_t)fp->fp, p->end_offset, SEEK_SET); return p->size; } @@ -446,8 +277,8 @@ static void cache_block(BGZF *fp, int size) khint_t k; cache_t *p; khash_t(cache) *h = (khash_t(cache)*)fp->cache; - if (MAX_BLOCK_SIZE >= fp->cache_size) return; - if ((kh_size(h) + 1) * MAX_BLOCK_SIZE > fp->cache_size) { + if (BGZF_BLOCK_SIZE >= fp->cache_size) return; + if ((kh_size(h) + 1) * BGZF_BLOCK_SIZE > fp->cache_size) { /* A better way would be to remove the oldest block in the * cache, but here we remove a random one for simplicity. This * should not have a big impact on performance. */ @@ -463,201 +294,140 @@ static void cache_block(BGZF *fp, int size) p = &kh_val(h, k); p->size = fp->block_length; p->end_offset = fp->block_address + size; - p->block = malloc(MAX_BLOCK_SIZE); - memcpy(kh_val(h, k).block, fp->uncompressed_block, MAX_BLOCK_SIZE); + p->block = malloc(BGZF_BLOCK_SIZE); + memcpy(kh_val(h, k).block, fp->uncompressed_block, BGZF_BLOCK_SIZE); } +#else +static void free_cache(BGZF *fp) {} +static int load_block_from_cache(BGZF *fp, int64_t block_address) {return 0;} +static void cache_block(BGZF *fp, int size) {} +#endif -int -bgzf_read_block(BGZF* fp) +int bgzf_read_block(BGZF *fp) { - bgzf_byte_t header[BLOCK_HEADER_LENGTH]; + uint8_t header[BLOCK_HEADER_LENGTH], *compressed_block; int count, size = 0, block_length, remaining; -#ifdef _USE_KNETFILE - int64_t block_address = knet_tell(fp->x.fpr); - if (load_block_from_cache(fp, block_address)) return 0; - count = knet_read(fp->x.fpr, header, sizeof(header)); -#else - int64_t block_address = ftello(fp->file); + int64_t block_address; + block_address = _bgzf_tell((_bgzf_file_t)fp->fp); if (load_block_from_cache(fp, block_address)) return 0; - count = fread(header, 1, sizeof(header), fp->file); -#endif - if (count == 0) { - fp->block_length = 0; - return 0; - } + count = _bgzf_read(fp->fp, header, sizeof(header)); + if (count == 0) { // no data read + fp->block_length = 0; + return 0; + } + if (count != sizeof(header) || !check_header(header)) { + fp->errcode |= BGZF_ERR_HEADER; + return -1; + } size = count; - if (count != sizeof(header)) { - report_error(fp, "read failed"); - return -1; - } - if (!check_header(header)) { - report_error(fp, "invalid block header"); - return -1; - } - block_length = unpackInt16((uint8_t*)&header[16]) + 1; - bgzf_byte_t* compressed_block = (bgzf_byte_t*) fp->compressed_block; - memcpy(compressed_block, header, BLOCK_HEADER_LENGTH); - remaining = block_length - BLOCK_HEADER_LENGTH; -#ifdef _USE_KNETFILE - count = knet_read(fp->x.fpr, &compressed_block[BLOCK_HEADER_LENGTH], remaining); -#else - count = fread(&compressed_block[BLOCK_HEADER_LENGTH], 1, remaining, fp->file); -#endif - if (count != remaining) { - report_error(fp, "read failed"); - return -1; - } + block_length = unpackInt16((uint8_t*)&header[16]) + 1; // +1 because when writing this number, we used "-1" + compressed_block = (uint8_t*)fp->compressed_block; + memcpy(compressed_block, header, BLOCK_HEADER_LENGTH); + remaining = block_length - BLOCK_HEADER_LENGTH; + count = _bgzf_read(fp->fp, &compressed_block[BLOCK_HEADER_LENGTH], remaining); + if (count != remaining) { + fp->errcode |= BGZF_ERR_IO; + return -1; + } size += count; - count = inflate_block(fp, block_length); - if (count < 0) return -1; - if (fp->block_length != 0) { - // Do not reset offset if this read follows a seek. - fp->block_offset = 0; - } - fp->block_address = block_address; - fp->block_length = count; + if ((count = inflate_block(fp, block_length)) < 0) return -1; + if (fp->block_length != 0) fp->block_offset = 0; // Do not reset offset if this read follows a seek. + fp->block_address = block_address; + fp->block_length = count; cache_block(fp, size); - return 0; + return 0; } -int -bgzf_read(BGZF* fp, void* data, int length) +ssize_t bgzf_read(BGZF *fp, void *data, ssize_t length) { - if (length <= 0) { - return 0; - } - if (fp->open_mode != 'r') { - report_error(fp, "file not open for reading"); - return -1; - } - - int bytes_read = 0; - bgzf_byte_t* output = data; - while (bytes_read < length) { - int copy_length, available = fp->block_length - fp->block_offset; - bgzf_byte_t *buffer; - if (available <= 0) { - if (bgzf_read_block(fp) != 0) { - return -1; - } - available = fp->block_length - fp->block_offset; - if (available <= 0) { - break; - } - } - copy_length = bgzf_min(length-bytes_read, available); - buffer = fp->uncompressed_block; - memcpy(output, buffer + fp->block_offset, copy_length); - fp->block_offset += copy_length; - output += copy_length; - bytes_read += copy_length; - } - if (fp->block_offset == fp->block_length) { -#ifdef _USE_KNETFILE - fp->block_address = knet_tell(fp->x.fpr); -#else - fp->block_address = ftello(fp->file); -#endif - fp->block_offset = 0; - fp->block_length = 0; - } - return bytes_read; + ssize_t bytes_read = 0; + uint8_t *output = data; + if (length <= 0) return 0; + assert(fp->is_write == 0); + while (bytes_read < length) { + int copy_length, available = fp->block_length - fp->block_offset; + uint8_t *buffer; + if (available <= 0) { + if (bgzf_read_block(fp) != 0) return -1; + available = fp->block_length - fp->block_offset; + if (available <= 0) break; + } + copy_length = length - bytes_read < available? length - bytes_read : available; + buffer = fp->uncompressed_block; + memcpy(output, buffer + fp->block_offset, copy_length); + fp->block_offset += copy_length; + output += copy_length; + bytes_read += copy_length; + } + if (fp->block_offset == fp->block_length) { + fp->block_address = _bgzf_tell((_bgzf_file_t)fp->fp); + fp->block_offset = fp->block_length = 0; + } + return bytes_read; } -int bgzf_flush(BGZF* fp) +int bgzf_flush(BGZF *fp) { - while (fp->block_offset > 0) { - int count, block_length; + if (!fp->is_write) return 0; + while (fp->block_offset > 0) { + int block_length; block_length = deflate_block(fp, fp->block_offset); - if (block_length < 0) return -1; -#ifdef _USE_KNETFILE - count = fwrite(fp->compressed_block, 1, block_length, fp->x.fpw); -#else - count = fwrite(fp->compressed_block, 1, block_length, fp->file); -#endif - if (count != block_length) { - report_error(fp, "write failed"); - return -1; - } - fp->block_address += block_length; - } - return 0; + if (block_length < 0) return -1; + if (fwrite(fp->compressed_block, 1, block_length, fp->fp) != block_length) { + fp->errcode |= BGZF_ERR_IO; // possibly truncated file + return -1; + } + fp->block_address += block_length; + } + return 0; } -int bgzf_flush_try(BGZF *fp, int size) +int bgzf_flush_try(BGZF *fp, ssize_t size) { - if (fp->block_offset + size > fp->uncompressed_block_size) + if (fp->block_offset + size > BGZF_BLOCK_SIZE) return bgzf_flush(fp); return -1; } -int bgzf_write(BGZF* fp, const void* data, int length) +ssize_t bgzf_write(BGZF *fp, const void *data, ssize_t length) { - const bgzf_byte_t *input = data; - int block_length, bytes_written; - if (fp->open_mode != 'w') { - report_error(fp, "file not open for writing"); - return -1; - } - - if (fp->uncompressed_block == NULL) - fp->uncompressed_block = malloc(fp->uncompressed_block_size); - - input = data; - block_length = fp->uncompressed_block_size; - bytes_written = 0; - while (bytes_written < length) { - int copy_length = bgzf_min(block_length - fp->block_offset, length - bytes_written); - bgzf_byte_t* buffer = fp->uncompressed_block; - memcpy(buffer + fp->block_offset, input, copy_length); - fp->block_offset += copy_length; - input += copy_length; - bytes_written += copy_length; - if (fp->block_offset == block_length) { - if (bgzf_flush(fp) != 0) { - break; - } - } - } - return bytes_written; + const uint8_t *input = data; + int block_length = BGZF_BLOCK_SIZE, bytes_written; + assert(fp->is_write); + input = data; + bytes_written = 0; + while (bytes_written < length) { + uint8_t* buffer = fp->uncompressed_block; + int copy_length = block_length - fp->block_offset < length - bytes_written? block_length - fp->block_offset : length - bytes_written; + memcpy(buffer + fp->block_offset, input, copy_length); + fp->block_offset += copy_length; + input += copy_length; + bytes_written += copy_length; + if (fp->block_offset == block_length && bgzf_flush(fp)) break; + } + return bytes_written; } int bgzf_close(BGZF* fp) { - if (fp->open_mode == 'w') { - if (bgzf_flush(fp) != 0) return -1; - { // add an empty block - int count, block_length = deflate_block(fp, 0); -#ifdef _USE_KNETFILE - count = fwrite(fp->compressed_block, 1, block_length, fp->x.fpw); -#else - count = fwrite(fp->compressed_block, 1, block_length, fp->file); -#endif + int ret, count, block_length; + if (fp == 0) return -1; + if (fp->is_write) { + if (bgzf_flush(fp) != 0) return -1; + block_length = deflate_block(fp, 0); // write an empty block + count = fwrite(fp->compressed_block, 1, block_length, fp->fp); + if (fflush(fp->fp) != 0) { + fp->errcode |= BGZF_ERR_IO; + return -1; } -#ifdef _USE_KNETFILE - if (fflush(fp->x.fpw) != 0) { -#else - if (fflush(fp->file) != 0) { -#endif - report_error(fp, "flush failed"); - return -1; - } - } - if (fp->owned_file) { -#ifdef _USE_KNETFILE - int ret; - if (fp->open_mode == 'w') ret = fclose(fp->x.fpw); - else ret = knet_close(fp->x.fpr); - if (ret != 0) return -1; -#else - if (fclose(fp->file) != 0) return -1; -#endif - } - free(fp->uncompressed_block); - free(fp->compressed_block); + } + ret = fp->is_write? fclose(fp->fp) : _bgzf_close(fp->fp); + if (ret != 0) return -1; + free(fp->uncompressed_block); + free(fp->compressed_block); free_cache(fp); - free(fp); - return 0; + free(fp); + return 0; } void bgzf_set_cache_size(BGZF *fp, int cache_size) @@ -670,17 +440,10 @@ int bgzf_check_EOF(BGZF *fp) static uint8_t magic[28] = "\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\033\0\3\0\0\0\0\0\0\0\0\0"; uint8_t buf[28]; off_t offset; -#ifdef _USE_KNETFILE - offset = knet_tell(fp->x.fpr); - if (knet_seek(fp->x.fpr, -28, SEEK_END) != 0) return -1; - knet_read(fp->x.fpr, buf, 28); - knet_seek(fp->x.fpr, offset, SEEK_SET); -#else - offset = ftello(fp->file); - if (fseeko(fp->file, -28, SEEK_END) != 0) return -1; - fread(buf, 1, 28, fp->file); - fseeko(fp->file, offset, SEEK_SET); -#endif + offset = _bgzf_tell((_bgzf_file_t)fp->fp); + if (_bgzf_seek(fp->fp, -28, SEEK_END) < 0) return 0; + _bgzf_read(fp->fp, buf, 28); + _bgzf_seek(fp->fp, offset, SEEK_SET); return (memcmp(magic, buf, 28) == 0)? 1 : 0; } @@ -689,26 +452,82 @@ int64_t bgzf_seek(BGZF* fp, int64_t pos, int where) int block_offset; int64_t block_address; - if (fp->open_mode != 'r') { - report_error(fp, "file not open for read"); - return -1; - } - if (where != SEEK_SET) { - report_error(fp, "unimplemented seek option"); - return -1; + if (fp->is_write || where != SEEK_SET) { + fp->errcode |= BGZF_ERR_MISUSE; + return -1; + } + block_offset = pos & 0xFFFF; + block_address = pos >> 16; + if (_bgzf_seek(fp->fp, block_address, SEEK_SET) < 0) { + fp->errcode |= BGZF_ERR_IO; + return -1; + } + fp->block_length = 0; // indicates current block has not been loaded + fp->block_address = block_address; + fp->block_offset = block_offset; + return 0; +} + +int bgzf_is_bgzf(const char *fn) +{ + uint8_t buf[16]; + int n; + _bgzf_file_t fp; + if ((fp = _bgzf_open(fn, "r")) == 0) return 0; + n = _bgzf_read(fp, buf, 16); + _bgzf_close(fp); + if (n != 16) return 0; + return memcmp(g_magic, buf, 16) == 0? 1 : 0; +} + +int bgzf_getc(BGZF *fp) +{ + int c; + if (fp->block_offset >= fp->block_length) { + if (bgzf_read_block(fp) != 0) return -2; /* error */ + if (fp->block_length == 0) return -1; /* end-of-file */ + } + c = ((unsigned char*)fp->uncompressed_block)[fp->block_offset++]; + if (fp->block_offset == fp->block_length) { + fp->block_address = _bgzf_tell((_bgzf_file_t)fp->fp); + fp->block_offset = 0; + fp->block_length = 0; } - block_offset = pos & 0xFFFF; - block_address = (pos >> 16) & 0xFFFFFFFFFFFFLL; -#ifdef _USE_KNETFILE - if (knet_seek(fp->x.fpr, block_address, SEEK_SET) != 0) { -#else - if (fseeko(fp->file, block_address, SEEK_SET) != 0) { + return c; +} + +#ifndef kroundup32 +#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) #endif - report_error(fp, "seek failed"); - return -1; - } - fp->block_length = 0; // indicates current block is not loaded - fp->block_address = block_address; - fp->block_offset = block_offset; - return 0; + +int bgzf_getline(BGZF *fp, int delim, kstring_t *str) +{ + int l, state = 0; + unsigned char *buf = (unsigned char*)fp->uncompressed_block; + str->l = 0; + do { + if (fp->block_offset >= fp->block_length) { + if (bgzf_read_block(fp) != 0) { state = -2; break; } + if (fp->block_length == 0) { state = -1; break; } + } + for (l = fp->block_offset; l < fp->block_length && buf[l] != delim; ++l); + if (l < fp->block_length) state = 1; + l -= fp->block_offset; + if (str->l + l + 1 >= str->m) { + str->m = str->l + l + 2; + kroundup32(str->m); + str->s = (char*)realloc(str->s, str->m); + } + memcpy(str->s + str->l, buf + fp->block_offset, l); + str->l += l; + fp->block_offset += l + 1; + if (fp->block_offset >= fp->block_length) { + fp->block_address = _bgzf_tell((_bgzf_file_t)fp->fp); + fp->block_offset = 0; + fp->block_length = 0; + } + } while (state == 0); + if (str->l == 0 && state < 0) return state; + str->s[str->l] = 0; + return str->l; } diff --git a/bgzf.h b/bgzf.h index 7295f37..4f69225 100644 --- a/bgzf.h +++ b/bgzf.h @@ -1,6 +1,7 @@ /* The MIT License Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology + 2011 Attractive Chaos Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -21,137 +22,176 @@ THE SOFTWARE. */ +/* The BGZF library was originally written by Bob Handsaker from the Broad + * Institute. It was later improved by the SAMtools developers. */ + #ifndef __BGZF_H #define __BGZF_H #include #include #include -#ifdef _USE_KNETFILE -#include "knetfile.h" -#endif -//typedef int8_t bool; +#define BGZF_BLOCK_SIZE 0xff00 +#define BGZF_MAX_BLOCK_SIZE 0x10000 + +#define BGZF_ERR_ZLIB 1 +#define BGZF_ERR_HEADER 2 +#define BGZF_ERR_IO 4 +#define BGZF_ERR_MISUSE 8 typedef struct { - int file_descriptor; - char open_mode; // 'r' or 'w' - int16_t owned_file, compress_level; -#ifdef _USE_KNETFILE - union { - knetFile *fpr; - FILE *fpw; - } x; -#else - FILE* file; -#endif - int uncompressed_block_size; - int compressed_block_size; - void* uncompressed_block; - void* compressed_block; - int64_t block_address; - int block_length; - int block_offset; + int errcode:30, is_write:2; + int compress_level, n_threads; int cache_size; - const char* error; + int block_length, block_offset; + int64_t block_address; + void *uncompressed_block, *compressed_block; void *cache; // a pointer to a hash table + void *fp; // actual file handler; FILE* on writing; FILE* or knetFile* on reading } BGZF; +#ifndef KSTRING_T +#define KSTRING_T kstring_t +typedef struct __kstring_t { + size_t l, m; + char *s; +} kstring_t; +#endif + #ifdef __cplusplus extern "C" { #endif -/* - * Open an existing file descriptor for reading or writing. - * Mode must be either "r" or "w". - * A subsequent bgzf_close will not close the file descriptor. - * Returns null on error. - */ -BGZF* bgzf_fdopen(int fd, const char* __restrict mode); - -/* - * Open the specified file for reading or writing. - * Mode must be either "r" or "w". - * Returns null on error. - */ -BGZF* bgzf_open(const char* path, const char* __restrict mode); - -/* - * Close the BGZ file and free all associated resources. - * Does not close the underlying file descriptor if created with bgzf_fdopen. - * Returns zero on success, -1 on error. - */ -int bgzf_close(BGZF* fp); - -/* - * Read up to length bytes from the file storing into data. - * Returns the number of bytes actually read. - * Returns zero on end of file. - * Returns -1 on error. - */ -int bgzf_read(BGZF* fp, void* data, int length); - -/* - * Write length bytes from data to the file. - * Returns the number of bytes written. - * Returns -1 on error. - */ -int bgzf_write(BGZF* fp, const void* data, int length); - -/* - * Return a virtual file pointer to the current location in the file. - * No interpetation of the value should be made, other than a subsequent - * call to bgzf_seek can be used to position the file at the same point. - * Return value is non-negative on success. - * Returns -1 on error. - */ -#define bgzf_tell(fp) ((fp->block_address << 16) | (fp->block_offset & 0xFFFF)) - -/* - * Set the file to read from the location specified by pos, which must - * be a value previously returned by bgzf_tell for this file (but not - * necessarily one returned by this file handle). - * The where argument must be SEEK_SET. - * Seeking on a file opened for write is not supported. - * Returns zero on success, -1 on error. - */ -int64_t bgzf_seek(BGZF* fp, int64_t pos, int where); - -/* - * Set the cache size. Zero to disable. By default, caching is - * disabled. The recommended cache size for frequent random access is - * about 8M bytes. - */ -void bgzf_set_cache_size(BGZF *fp, int cache_size); - -int bgzf_check_EOF(BGZF *fp); -int bgzf_read_block(BGZF* fp); -int bgzf_flush(BGZF* fp); -int bgzf_flush_try(BGZF *fp, int size); -int bgzf_check_bgzf(const char *fn); + /****************** + * Basic routines * + ******************/ + + /** + * Open an existing file descriptor for reading or writing. + * + * @param fd file descriptor + * @param mode mode matching /[rwu0-9]+/: 'r' for reading, 'w' for writing and a digit specifies + * the zlib compression level; if both 'r' and 'w' are present, 'w' is ignored. + * @return BGZF file handler; 0 on error + */ + BGZF* bgzf_dopen(int fd, const char *mode); + + #define bgzf_fdopen(fd, mode) bgzf_dopen((fd), (mode)) // for backward compatibility + + /** + * Open the specified file for reading or writing. + */ + BGZF* bgzf_open(const char* path, const char *mode); + + /** + * Close the BGZF and free all associated resources. + * + * @param fp BGZF file handler + * @return 0 on success and -1 on error + */ + int bgzf_close(BGZF *fp); + + /** + * Read up to _length_ bytes from the file storing into _data_. + * + * @param fp BGZF file handler + * @param data data array to read into + * @param length size of data to read + * @return number of bytes actually read; 0 on end-of-file and -1 on error + */ + ssize_t bgzf_read(BGZF *fp, void *data, ssize_t length); + + /** + * Write _length_ bytes from _data_ to the file. + * + * @param fp BGZF file handler + * @param data data array to write + * @param length size of data to write + * @return number of bytes actually written; -1 on error + */ + ssize_t bgzf_write(BGZF *fp, const void *data, ssize_t length); + + /** + * Write the data in the buffer to the file. + */ + int bgzf_flush(BGZF *fp); + + /** + * Return a virtual file pointer to the current location in the file. + * No interpetation of the value should be made, other than a subsequent + * call to bgzf_seek can be used to position the file at the same point. + * Return value is non-negative on success. + */ + #define bgzf_tell(fp) ((fp->block_address << 16) | (fp->block_offset & 0xFFFF)) + + /** + * Set the file to read from the location specified by _pos_. + * + * @param fp BGZF file handler + * @param pos virtual file offset returned by bgzf_tell() + * @param whence must be SEEK_SET + * @return 0 on success and -1 on error + */ + int64_t bgzf_seek(BGZF *fp, int64_t pos, int whence); + + /** + * Check if the BGZF end-of-file (EOF) marker is present + * + * @param fp BGZF file handler opened for reading + * @return 1 if EOF is present; 0 if not or on I/O error + */ + int bgzf_check_EOF(BGZF *fp); + + /** + * Check if a file is in the BGZF format + * + * @param fn file name + * @return 1 if _fn_ is BGZF; 0 if not or on I/O error + */ + int bgzf_is_bgzf(const char *fn); + + /********************* + * Advanced routines * + *********************/ + + /** + * Set the cache size. Only effective when compiled with -DBGZF_CACHE. + * + * @param fp BGZF file handler + * @param size size of cache in bytes; 0 to disable caching (default) + */ + void bgzf_set_cache_size(BGZF *fp, int size); + + /** + * Flush the file if the remaining buffer size is smaller than _size_ + */ + int bgzf_flush_try(BGZF *fp, ssize_t size); + + /** + * Read one byte from a BGZF file. It is faster than bgzf_read() + * @param fp BGZF file handler + * @return byte read; -1 on end-of-file or error + */ + int bgzf_getc(BGZF *fp); + + /** + * Read one line from a BGZF file. It is faster than bgzf_getc() + * + * @param fp BGZF file handler + * @param delim delimitor + * @param str string to write to; must be initialized + * @return length of the string; 0 on end-of-file; negative on error + */ + int bgzf_getline(BGZF *fp, int delim, kstring_t *str); + + /** + * Read the next BGZF block. + */ + int bgzf_read_block(BGZF *fp); #ifdef __cplusplus } #endif -static inline int bgzf_getc(BGZF *fp) -{ - int c; - if (fp->block_offset >= fp->block_length) { - if (bgzf_read_block(fp) != 0) return -2; /* error */ - if (fp->block_length == 0) return -1; /* end-of-file */ - } - c = ((unsigned char*)fp->uncompressed_block)[fp->block_offset++]; - if (fp->block_offset == fp->block_length) { -#ifdef _USE_KNETFILE - fp->block_address = knet_tell(fp->x.fpr); -#else - fp->block_address = ftello(fp->file); -#endif - fp->block_offset = 0; - fp->block_length = 0; - } - return c; -} - #endif -- 2.39.2