]> git.donarmstrong.com Git - samtools.git/commitdiff
* samtools-0.1.4-16 (r360)
authorHeng Li <lh3@live.co.uk>
Thu, 25 Jun 2009 19:44:52 +0000 (19:44 +0000)
committerHeng Li <lh3@live.co.uk>
Thu, 25 Jun 2009 19:44:52 +0000 (19:44 +0000)
 * report more information in index when the input is not sorted
 * change the behaviour of knet_seek() such that it returns 0 on success
 * support knetfile library in BGZF

Makefile
bam_import.c
bam_index.c
bamtk.c
bgzf.c
bgzf.h
knetfile.c
knetfile.h

index c529c7e302b9b5ad4e354f898821096b3dd7db4b..0a2791e8810435f244e310ec8e64aaa46331269a 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,9 +2,9 @@ CC=                     gcc
 CXX=           g++
 CFLAGS=                -g -Wall -O2 #-m64 #-arch ppc
 CXXFLAGS=      $(CFLAGS)
-DFLAGS=                -D_FILE_OFFSET_BITS=64 #-D_NO_CURSES
+DFLAGS=                -D_FILE_OFFSET_BITS=64 -D_USE_KNETFILE #-D_NO_CURSES
 LOBJS=         bgzf.o kstring.o bam_aux.o bam.o bam_import.o sam.o bam_index.o \
-                       bam_pileup.o bam_lpileup.o bam_md.o glf.o razf.o faidx.o
+                       bam_pileup.o bam_lpileup.o bam_md.o glf.o razf.o faidx.o knetfile.o
 AOBJS=         bam_sort.o bam_tview.o bam_maqcns.o bam_plcmd.o sam_view.o      \
                        bam_rmdup.o bam_rmdupse.o bam_mate.o bam_stat.o bam_color.o     \
                        bamtk.o
index c7f866701edd87f37e18fbc9768ed63f68be7f9e..fccaa022208131b27093a2b44f32e74d13a469c0 100644 (file)
@@ -154,6 +154,7 @@ int sam_header_parse_rg(bam_header_t *h)
        int n = 0;
 
        // free
+       if (h == 0) return 0;
        bam_strmap_destroy(h->rg2lib); h->rg2lib = 0;
        if (h->l_text < 3) return 0;
        // parse @RG lines
index 4a41f5264a67148c75f1f9be6d7e6880e5b848a6..347189d63e397d4d433cc42783aea3a07545e9e2 100644 (file)
@@ -155,7 +155,8 @@ bam_index_t *bam_index_core(bamFile fp)
                        last_tid = c->tid;
                        last_bin = 0xffffffffu;
                } else if (last_coor > c->pos) {
-                       fprintf(stderr, "[bam_index_core] the alignment is not sorted. Abort!\n");
+                       fprintf(stderr, "[bam_index_core] the alignment is not sorted (%s): %u > %u in %d-th chr\n",
+                                       bam1_qname(b), last_coor, c->pos, c->tid+1);
                        exit(1);
                }
                if (b->core.tid >= 0 && b->core.bin < 4681) insert_offset2(&idx->index2[b->core.tid], b, last_off);
diff --git a/bamtk.c b/bamtk.c
index 0bcf4a4fafdca0704eafbea295d477abf4084ef4..a1e87c20eb7aaeeef2aa7cc51c7147182d7a2be3 100644 (file)
--- a/bamtk.c
+++ b/bamtk.c
@@ -3,7 +3,7 @@
 #include "bam.h"
 
 #ifndef PACKAGE_VERSION
-#define PACKAGE_VERSION "0.1.4-15 (r354)"
+#define PACKAGE_VERSION "0.1.4-16 (r360)"
 #endif
 
 int bam_taf2baf(int argc, char *argv[]);
diff --git a/bgzf.c b/bgzf.c
index 49a78f13e1b053a6da30c37be76d5a1769a0a530..b3d25f781c0b18e53fc9e9707ccb6350e175a627 100644 (file)
--- a/bgzf.c
+++ b/bgzf.c
@@ -83,26 +83,36 @@ report_error(BGZF* fp, const char* message) {
     fp->error = message;
 }
 
+static BGZF *bgzf_read_init()
+{
+       BGZF *fp;
+       fp = calloc(1, sizeof(BGZF));
+    fp->uncompressed_block_size = MAX_BLOCK_SIZE;
+    fp->uncompressed_block = malloc(MAX_BLOCK_SIZE);
+    fp->compressed_block_size = MAX_BLOCK_SIZE;
+    fp->compressed_block = malloc(MAX_BLOCK_SIZE);
+       return fp;
+}
+
 static
 BGZF*
 open_read(int fd)
 {
+#ifdef _USE_KNETFILE
+    knetFile *file = knet_dopen(fd, "r");
+#else
     FILE* file = fdopen(fd, "r");
+#endif
     BGZF* fp;
        if (file == 0) return 0;
-       fp = malloc(sizeof(BGZF));
+       fp = bgzf_read_init();
     fp->file_descriptor = fd;
     fp->open_mode = 'r';
-    fp->owned_file = 0; fp->is_uncompressed = 0;
+#ifdef _USE_KNETFILE
+    fp->x.fpr = file;
+#else
     fp->file = file;
-    fp->uncompressed_block_size = MAX_BLOCK_SIZE;
-    fp->uncompressed_block = malloc(MAX_BLOCK_SIZE);
-    fp->compressed_block_size = MAX_BLOCK_SIZE;
-    fp->compressed_block = malloc(MAX_BLOCK_SIZE);
-    fp->block_address = 0;
-    fp->block_offset = 0;
-    fp->block_length = 0;
-    fp->error = NULL;
+#endif
     return fp;
 }
 
@@ -117,7 +127,11 @@ open_write(int fd, bool is_uncompressed)
     fp->file_descriptor = fd;
     fp->open_mode = 'w';
     fp->owned_file = 0; fp->is_uncompressed = is_uncompressed;
+#ifdef _USE_KNETFILE
+    fp->x.fpw = file;
+#else
     fp->file = file;
+#endif
     fp->uncompressed_block_size = DEFAULT_BLOCK_SIZE;
     fp->uncompressed_block = NULL;
     fp->compressed_block_size = MAX_BLOCK_SIZE;
@@ -134,10 +148,19 @@ bgzf_open(const char* __restrict path, const char* __restrict mode)
 {
     BGZF* fp = NULL;
     if (mode[0] == 'r' || mode[0] == 'R') { /* The reading mode is preferred. */
+#ifdef _USE_KNETFILE
+               knetFile *file = knet_open(path, mode);
+               if (file == 0) return 0;
+               fp = bgzf_read_init();
+               fp->file_descriptor = -1;
+               fp->open_mode = 'r';
+               fp->x.fpr = file;
+#else
                int oflag = O_RDONLY;
                int fd = open(path, oflag);
                if (fd == -1) return 0;
         fp = open_read(fd);
+#endif
     } else if (mode[0] == 'w' || mode[0] == 'W') {
                int oflag = O_WRONLY | O_CREAT | O_TRUNC;
                int fd = open(path, oflag, 0644);
@@ -318,8 +341,13 @@ int
 read_block(BGZF* fp)
 {
     byte header[BLOCK_HEADER_LENGTH];
+#ifdef _USE_KNETFILE
+    int64_t block_address = knet_tell(fp->x.fpr);
+    int count = knet_read(fp->x.fpr, header, sizeof(header));
+#else
     int64_t block_address = ftello(fp->file);
     int count = fread(header, 1, sizeof(header), fp->file);
+#endif
     if (count == 0) {
         fp->block_length = 0;
         return 0;
@@ -336,7 +364,11 @@ read_block(BGZF* fp)
     byte* compressed_block = (byte*) fp->compressed_block;
     memcpy(compressed_block, header, BLOCK_HEADER_LENGTH);
     int remaining = block_length - BLOCK_HEADER_LENGTH;
+#ifdef _USE_KNETFILE
+    count = knet_read(fp->x.fpr, &compressed_block[BLOCK_HEADER_LENGTH], remaining);
+#else
     count = fread(&compressed_block[BLOCK_HEADER_LENGTH], 1, remaining, fp->file);
+#endif
     if (count != remaining) {
         report_error(fp, "read failed");
         return -1;
@@ -386,7 +418,11 @@ bgzf_read(BGZF* fp, void* data, int length)
         bytes_read += copy_length;
     }
     if (fp->block_offset == fp->block_length) {
+#ifdef _USE_KNETFILE
+        fp->block_address = knet_tell(fp->x.fpr);
+#else
         fp->block_address = ftello(fp->file);
+#endif
         fp->block_offset = 0;
         fp->block_length = 0;
     }
@@ -402,7 +438,11 @@ flush_block(BGZF* fp)
         if (block_length < 0) {
             return -1;
         }
+#ifdef _USE_KNETFILE
+        int count = fwrite(fp->compressed_block, 1, block_length, fp->x.fpw);
+#else
         int count = fwrite(fp->compressed_block, 1, block_length, fp->file);
+#endif
         if (count != block_length) {
             report_error(fp, "write failed");
             return -1;
@@ -450,15 +490,26 @@ bgzf_close(BGZF* fp)
         if (flush_block(fp) != 0) {
             return -1;
         }
+#ifdef _USE_KNETFILE
+        if (fflush(fp->x.fpw) != 0) {
+#else
         if (fflush(fp->file) != 0) {
+#endif
             report_error(fp, "flush failed");
             return -1;
         }
     }
     if (fp->owned_file) {
+#ifdef _USE_KNETFILE
+               int ret;
+               if (fp->open_mode == 'w') ret = fclose(fp->x.fpw);
+               else ret = knet_close(fp->x.fpr);
+        if (ret != 0) return -1;
+#else
         if (fclose(fp->file) != 0) {
             return -1;
         }
+#endif
     }
     free(fp->uncompressed_block);
     free(fp->compressed_block);
@@ -485,7 +536,11 @@ bgzf_seek(BGZF* fp, int64_t pos, int where)
     }
     int block_offset = pos & 0xFFFF;
     int64_t block_address = (pos >> 16) & 0xFFFFFFFFFFFFLL;
+#ifdef _USE_KNETFILE
+    if (knet_seek(fp->x.fpr, block_address, SEEK_SET) != 0) {
+#else
     if (fseeko(fp->file, block_address, SEEK_SET) != 0) {
+#endif
         report_error(fp, "seek failed");
         return -1;
     }
diff --git a/bgzf.h b/bgzf.h
index ca5e7ab08b53723491b18534a48e03b7188fbba7..68375b4e07f523ebe6c6b2efdb50b2951977695a 100644 (file)
--- a/bgzf.h
+++ b/bgzf.h
@@ -16,6 +16,9 @@
 #include <stdio.h>
 #include <stdbool.h>
 #include <zlib.h>
+#ifdef _USE_KNETFILE
+#include "knetfile.h"
+#endif
 
 //typedef int8_t bool;
 
@@ -23,7 +26,14 @@ typedef struct {
     int file_descriptor;
     char open_mode;  // 'r' or 'w'
     bool owned_file, is_uncompressed;
+#ifdef _USE_KNETFILE
+       union {
+               knetFile *fpr;
+               FILE *fpw;
+       } x;
+#else
     FILE* file;
+#endif
     int uncompressed_block_size;
     int compressed_block_size;
     void* uncompressed_block;
index dbec205d2c29a0a562a5e5d337a3e914c3de7c21..322885a379642d19b638a9b0164b153cdcf5ed0b 100644 (file)
@@ -208,22 +208,26 @@ off_t knet_read(knetFile *fp, void *buf, off_t len)
        return l;
 }
 
-off_t knet_seek(knetFile *fp, off_t off, int whence)
+int knet_seek(knetFile *fp, off_t off, int whence)
 {
        if (fp->type == KNF_TYPE_LOCAL) {
-               fp->offset = lseek(fp->fd, off, whence);
-               return fp->offset;
+               if (lseek(fp->fd, off, whence) == -1) {
+                       perror("lseek");
+                       return -1;
+               }
+               fp->offset = off;
+               return 0;
        }
        if (fp->type == KNF_TYPE_FTP) {
-               if (whence != SEEK_SET) {
+               if (whence != SEEK_SET) { // FIXME: we can surely allow SEEK_CUR and SEEK_END in future
                        fprintf(stderr, "[knet_seek] only SEEK_SET is supported for FTP. Offset is unchanged.\n");
                        return -1;
                }
                if (!fp->no_reconnect) kftp_reconnect(fp);
                kftp_connect_file(fp, off);
-               return fp->offset;
+               return 0;
        }
-       return 0;
+       return -1;
 }
 
 int knet_close(knetFile *fp)
index 3309902304b23c3d4a7c95edfd4de5a8bf60e9d7..7fc86c244490e0d179ee24f80b79b94dcd905139 100644 (file)
@@ -30,7 +30,7 @@ extern "C" {
        knetFile *knet_open(const char *fn, const char *mode);
        knetFile *knet_dopen(int fd, const char *mode);
        off_t knet_read(knetFile *fp, void *buf, off_t len);
-       off_t knet_seek(knetFile *fp, off_t off, int whence);
+       int knet_seek(knetFile *fp, off_t off, int whence);
        int knet_close(knetFile *fp);
 
 #ifdef __cplusplus