From 3ddb3942053df00fdae714e77cbc2f5618db617e Mon Sep 17 00:00:00 2001 From: Heng Li Date: Thu, 25 Jun 2009 19:44:52 +0000 Subject: [PATCH] * samtools-0.1.4-16 (r360) * report more information in index when the input is not sorted * change the behaviour of knet_seek() such that it returns 0 on success * support knetfile library in BGZF --- Makefile | 4 +-- bam_import.c | 1 + bam_index.c | 3 ++- bamtk.c | 2 +- bgzf.c | 75 +++++++++++++++++++++++++++++++++++++++++++++------- bgzf.h | 10 +++++++ knetfile.c | 16 ++++++----- knetfile.h | 2 +- 8 files changed, 92 insertions(+), 21 deletions(-) diff --git a/Makefile b/Makefile index c529c7e..0a2791e 100644 --- a/Makefile +++ b/Makefile @@ -2,9 +2,9 @@ CC= gcc CXX= g++ CFLAGS= -g -Wall -O2 #-m64 #-arch ppc CXXFLAGS= $(CFLAGS) -DFLAGS= -D_FILE_OFFSET_BITS=64 #-D_NO_CURSES +DFLAGS= -D_FILE_OFFSET_BITS=64 -D_USE_KNETFILE #-D_NO_CURSES LOBJS= bgzf.o kstring.o bam_aux.o bam.o bam_import.o sam.o bam_index.o \ - bam_pileup.o bam_lpileup.o bam_md.o glf.o razf.o faidx.o + bam_pileup.o bam_lpileup.o bam_md.o glf.o razf.o faidx.o knetfile.o AOBJS= bam_sort.o bam_tview.o bam_maqcns.o bam_plcmd.o sam_view.o \ bam_rmdup.o bam_rmdupse.o bam_mate.o bam_stat.o bam_color.o \ bamtk.o diff --git a/bam_import.c b/bam_import.c index c7f8667..fccaa02 100644 --- a/bam_import.c +++ b/bam_import.c @@ -154,6 +154,7 @@ int sam_header_parse_rg(bam_header_t *h) int n = 0; // free + if (h == 0) return 0; bam_strmap_destroy(h->rg2lib); h->rg2lib = 0; if (h->l_text < 3) return 0; // parse @RG lines diff --git a/bam_index.c b/bam_index.c index 4a41f52..347189d 100644 --- a/bam_index.c +++ b/bam_index.c @@ -155,7 +155,8 @@ bam_index_t *bam_index_core(bamFile fp) last_tid = c->tid; last_bin = 0xffffffffu; } else if (last_coor > c->pos) { - fprintf(stderr, "[bam_index_core] the alignment is not sorted. Abort!\n"); + fprintf(stderr, "[bam_index_core] the alignment is not sorted (%s): %u > %u in %d-th chr\n", + bam1_qname(b), last_coor, c->pos, c->tid+1); exit(1); } if (b->core.tid >= 0 && b->core.bin < 4681) insert_offset2(&idx->index2[b->core.tid], b, last_off); diff --git a/bamtk.c b/bamtk.c index 0bcf4a4..a1e87c2 100644 --- a/bamtk.c +++ b/bamtk.c @@ -3,7 +3,7 @@ #include "bam.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.1.4-15 (r354)" +#define PACKAGE_VERSION "0.1.4-16 (r360)" #endif int bam_taf2baf(int argc, char *argv[]); diff --git a/bgzf.c b/bgzf.c index 49a78f1..b3d25f7 100644 --- a/bgzf.c +++ b/bgzf.c @@ -83,26 +83,36 @@ report_error(BGZF* fp, const char* message) { fp->error = message; } +static BGZF *bgzf_read_init() +{ + BGZF *fp; + fp = calloc(1, sizeof(BGZF)); + fp->uncompressed_block_size = MAX_BLOCK_SIZE; + fp->uncompressed_block = malloc(MAX_BLOCK_SIZE); + fp->compressed_block_size = MAX_BLOCK_SIZE; + fp->compressed_block = malloc(MAX_BLOCK_SIZE); + return fp; +} + static BGZF* open_read(int fd) { +#ifdef _USE_KNETFILE + knetFile *file = knet_dopen(fd, "r"); +#else FILE* file = fdopen(fd, "r"); +#endif BGZF* fp; if (file == 0) return 0; - fp = malloc(sizeof(BGZF)); + fp = bgzf_read_init(); fp->file_descriptor = fd; fp->open_mode = 'r'; - fp->owned_file = 0; fp->is_uncompressed = 0; +#ifdef _USE_KNETFILE + fp->x.fpr = file; +#else fp->file = file; - fp->uncompressed_block_size = MAX_BLOCK_SIZE; - fp->uncompressed_block = malloc(MAX_BLOCK_SIZE); - fp->compressed_block_size = MAX_BLOCK_SIZE; - fp->compressed_block = malloc(MAX_BLOCK_SIZE); - fp->block_address = 0; - fp->block_offset = 0; - fp->block_length = 0; - fp->error = NULL; +#endif return fp; } @@ -117,7 +127,11 @@ open_write(int fd, bool is_uncompressed) fp->file_descriptor = fd; fp->open_mode = 'w'; fp->owned_file = 0; fp->is_uncompressed = is_uncompressed; +#ifdef _USE_KNETFILE + fp->x.fpw = file; +#else fp->file = file; +#endif fp->uncompressed_block_size = DEFAULT_BLOCK_SIZE; fp->uncompressed_block = NULL; fp->compressed_block_size = MAX_BLOCK_SIZE; @@ -134,10 +148,19 @@ bgzf_open(const char* __restrict path, const char* __restrict mode) { BGZF* fp = NULL; if (mode[0] == 'r' || mode[0] == 'R') { /* The reading mode is preferred. */ +#ifdef _USE_KNETFILE + knetFile *file = knet_open(path, mode); + if (file == 0) return 0; + fp = bgzf_read_init(); + fp->file_descriptor = -1; + fp->open_mode = 'r'; + fp->x.fpr = file; +#else int oflag = O_RDONLY; int fd = open(path, oflag); if (fd == -1) return 0; fp = open_read(fd); +#endif } else if (mode[0] == 'w' || mode[0] == 'W') { int oflag = O_WRONLY | O_CREAT | O_TRUNC; int fd = open(path, oflag, 0644); @@ -318,8 +341,13 @@ int read_block(BGZF* fp) { byte header[BLOCK_HEADER_LENGTH]; +#ifdef _USE_KNETFILE + int64_t block_address = knet_tell(fp->x.fpr); + int count = knet_read(fp->x.fpr, header, sizeof(header)); +#else int64_t block_address = ftello(fp->file); int count = fread(header, 1, sizeof(header), fp->file); +#endif if (count == 0) { fp->block_length = 0; return 0; @@ -336,7 +364,11 @@ read_block(BGZF* fp) byte* compressed_block = (byte*) fp->compressed_block; memcpy(compressed_block, header, BLOCK_HEADER_LENGTH); int remaining = block_length - BLOCK_HEADER_LENGTH; +#ifdef _USE_KNETFILE + count = knet_read(fp->x.fpr, &compressed_block[BLOCK_HEADER_LENGTH], remaining); +#else count = fread(&compressed_block[BLOCK_HEADER_LENGTH], 1, remaining, fp->file); +#endif if (count != remaining) { report_error(fp, "read failed"); return -1; @@ -386,7 +418,11 @@ bgzf_read(BGZF* fp, void* data, int length) bytes_read += copy_length; } if (fp->block_offset == fp->block_length) { +#ifdef _USE_KNETFILE + fp->block_address = knet_tell(fp->x.fpr); +#else fp->block_address = ftello(fp->file); +#endif fp->block_offset = 0; fp->block_length = 0; } @@ -402,7 +438,11 @@ flush_block(BGZF* fp) if (block_length < 0) { return -1; } +#ifdef _USE_KNETFILE + int count = fwrite(fp->compressed_block, 1, block_length, fp->x.fpw); +#else int count = fwrite(fp->compressed_block, 1, block_length, fp->file); +#endif if (count != block_length) { report_error(fp, "write failed"); return -1; @@ -450,15 +490,26 @@ bgzf_close(BGZF* fp) if (flush_block(fp) != 0) { return -1; } +#ifdef _USE_KNETFILE + if (fflush(fp->x.fpw) != 0) { +#else if (fflush(fp->file) != 0) { +#endif report_error(fp, "flush failed"); return -1; } } if (fp->owned_file) { +#ifdef _USE_KNETFILE + int ret; + if (fp->open_mode == 'w') ret = fclose(fp->x.fpw); + else ret = knet_close(fp->x.fpr); + if (ret != 0) return -1; +#else if (fclose(fp->file) != 0) { return -1; } +#endif } free(fp->uncompressed_block); free(fp->compressed_block); @@ -485,7 +536,11 @@ bgzf_seek(BGZF* fp, int64_t pos, int where) } int block_offset = pos & 0xFFFF; int64_t block_address = (pos >> 16) & 0xFFFFFFFFFFFFLL; +#ifdef _USE_KNETFILE + if (knet_seek(fp->x.fpr, block_address, SEEK_SET) != 0) { +#else if (fseeko(fp->file, block_address, SEEK_SET) != 0) { +#endif report_error(fp, "seek failed"); return -1; } diff --git a/bgzf.h b/bgzf.h index ca5e7ab..68375b4 100644 --- a/bgzf.h +++ b/bgzf.h @@ -16,6 +16,9 @@ #include #include #include +#ifdef _USE_KNETFILE +#include "knetfile.h" +#endif //typedef int8_t bool; @@ -23,7 +26,14 @@ typedef struct { int file_descriptor; char open_mode; // 'r' or 'w' bool owned_file, is_uncompressed; +#ifdef _USE_KNETFILE + union { + knetFile *fpr; + FILE *fpw; + } x; +#else FILE* file; +#endif int uncompressed_block_size; int compressed_block_size; void* uncompressed_block; diff --git a/knetfile.c b/knetfile.c index dbec205..322885a 100644 --- a/knetfile.c +++ b/knetfile.c @@ -208,22 +208,26 @@ off_t knet_read(knetFile *fp, void *buf, off_t len) return l; } -off_t knet_seek(knetFile *fp, off_t off, int whence) +int knet_seek(knetFile *fp, off_t off, int whence) { if (fp->type == KNF_TYPE_LOCAL) { - fp->offset = lseek(fp->fd, off, whence); - return fp->offset; + if (lseek(fp->fd, off, whence) == -1) { + perror("lseek"); + return -1; + } + fp->offset = off; + return 0; } if (fp->type == KNF_TYPE_FTP) { - if (whence != SEEK_SET) { + if (whence != SEEK_SET) { // FIXME: we can surely allow SEEK_CUR and SEEK_END in future fprintf(stderr, "[knet_seek] only SEEK_SET is supported for FTP. Offset is unchanged.\n"); return -1; } if (!fp->no_reconnect) kftp_reconnect(fp); kftp_connect_file(fp, off); - return fp->offset; + return 0; } - return 0; + return -1; } int knet_close(knetFile *fp) diff --git a/knetfile.h b/knetfile.h index 3309902..7fc86c2 100644 --- a/knetfile.h +++ b/knetfile.h @@ -30,7 +30,7 @@ extern "C" { knetFile *knet_open(const char *fn, const char *mode); knetFile *knet_dopen(int fd, const char *mode); off_t knet_read(knetFile *fp, void *buf, off_t len); - off_t knet_seek(knetFile *fp, off_t off, int whence); + int knet_seek(knetFile *fp, off_t off, int whence); int knet_close(knetFile *fp); #ifdef __cplusplus -- 2.39.2