]> git.donarmstrong.com Git - samtools.git/blobdiff - faidx.c
* samtools-0.1.6-2 (r457)
[samtools.git] / faidx.c
diff --git a/faidx.c b/faidx.c
index 9302dfb4d6480bb94295d65e7fc7fcf4c4644332..055445f266cbdb429fe65c2aeb58c763af86d1d9 100644 (file)
--- a/faidx.c
+++ b/faidx.c
@@ -1,5 +1,4 @@
 #include <ctype.h>
-#include <assert.h>
 #include <string.h>
 #include <stdlib.h>
 #include <stdio.h>
@@ -15,8 +14,13 @@ KHASH_MAP_INIT_STR(s, faidx1_t)
 #ifndef _NO_RAZF
 #include "razf.h"
 #else
+#ifdef _WIN32
+#define ftello(fp) ftell(fp)
+#define fseeko(fp, offset, whence) fseek(fp, offset, whence)
+#else
 extern off_t ftello(FILE *stream);
 extern int fseeko(FILE *stream, off_t offset, int whence);
+#endif
 #define RAZF FILE
 #define razf_read(fp, buf, size) fread(buf, 1, size, fp)
 #define razf_open(fn, mode) fopen(fn, mode)
@@ -66,6 +70,12 @@ faidx_t *fai_build_core(RAZF *rz)
        name = 0; l_name = m_name = 0;
        len = line_len = line_blen = -1; state = 0; l1 = l2 = -1; offset = 0;
        while (razf_read(rz, &c, 1)) {
+               if (c == '\n') { // an empty line
+                       if (state == 1) {
+                               offset = razf_tell(rz);
+                               continue;
+                       } else if ((state == 0 && len < 0) || state == 2) continue;
+               }
                if (c == '>') { // fasta header
                        if (len >= 0)
                                fai_insert_index(idx, name, len, line_len, line_blen, offset);
@@ -79,14 +89,19 @@ faidx_t *fai_build_core(RAZF *rz)
                                name[l_name++] = c;
                        }
                        name[l_name] = '\0';
-                       assert(ret);
+                       if (ret == 0) {
+                               fprintf(stderr, "[fai_build_core] the last entry has no sequence\n");
+                               free(name); fai_destroy(idx);
+                               return 0;
+                       }
                        if (c != '\n') while (razf_read(rz, &c, 1) && c != '\n');
                        state = 1; len = 0;
                        offset = razf_tell(rz);
                } else {
                        if (state == 3) {
-                               fprintf(stderr, "[fai_build_core] inlined empty line is not allowed in sequence '%s'. Abort!\n", name);
-                               exit(1);
+                               fprintf(stderr, "[fai_build_core] inlined empty line is not allowed in sequence '%s'.\n", name);
+                               free(name); fai_destroy(idx);
+                               return 0;
                        }
                        if (state == 2) state = 3;
                        l1 = l2 = 0;
@@ -95,13 +110,15 @@ faidx_t *fai_build_core(RAZF *rz)
                                if (isgraph(c)) ++l2;
                        } while ((ret = razf_read(rz, &c, 1)) && c != '\n');
                        if (state == 3 && l2) {
-                               fprintf(stderr, "[fai_build_core] different line length in sequence '%s'. Abort!\n", name);
-                               exit(1);
+                               fprintf(stderr, "[fai_build_core] different line length in sequence '%s'.\n", name);
+                               free(name); fai_destroy(idx);
+                               return 0;
                        }
                        ++l1; len += l2;
                        if (l2 >= 0x10000) {
-                               fprintf(stderr, "[fai_build_core] line length exceeds 65535 in sequence '%s'. Abort!\n", name);
-                               exit(1);
+                               fprintf(stderr, "[fai_build_core] line length exceeds 65535 in sequence '%s'.\n", name);
+                               free(name); fai_destroy(idx);
+                               return 0;
                        }
                        if (state == 1) line_len = l1, line_blen = l2, state = 0;
                        else if (state == 0) {
@@ -122,7 +139,11 @@ void fai_save(const faidx_t *fai, FILE *fp)
                faidx1_t x;
                k = kh_get(s, fai->hash, fai->name[i]);
                x = kh_value(fai->hash, k);
+#ifdef _WIN32
+               fprintf(fp, "%s\t%d\t%ld\t%d\t%d\n", fai->name[i], (int)x.len, (long)x.offset, (int)x.line_blen, (int)x.line_len);
+#else
                fprintf(fp, "%s\t%d\t%lld\t%d\t%d\n", fai->name[i], (int)x.len, (long long)x.offset, (int)x.line_blen, (int)x.line_len);
+#endif
        }
 }
 
@@ -131,14 +152,22 @@ faidx_t *fai_read(FILE *fp)
        faidx_t *fai;
        char *buf, *p;
        int len, line_len, line_blen;
+#ifdef _WIN32
+       long offset;
+#else
        long long offset;
+#endif
        fai = (faidx_t*)calloc(1, sizeof(faidx_t));
        fai->hash = kh_init(s);
        buf = (char*)calloc(0x10000, 1);
        while (!feof(fp) && fgets(buf, 0x10000, fp)) {
                for (p = buf; *p && isgraph(*p); ++p);
                *p = 0; ++p;
+#ifdef _WIN32
+               sscanf(p, "%d%ld%d%d", &len, &offset, &line_blen, &line_len);
+#else
                sscanf(p, "%d%lld%d%d", &len, &offset, &line_blen, &line_len);
+#endif
                fai_insert_index(fai, buf, len, line_len, line_blen, offset);
        }
        free(buf);
@@ -155,7 +184,7 @@ void fai_destroy(faidx_t *fai)
        free(fai);
 }
 
-void fai_build(const char *fn)
+int fai_build(const char *fn)
 {
        char *str;
        RAZF *rz;
@@ -164,15 +193,24 @@ void fai_build(const char *fn)
        str = (char*)calloc(strlen(fn) + 5, 1);
        sprintf(str, "%s.fai", fn);
        rz = razf_open(fn, "r");
-       assert(rz);
+       if (rz == 0) {
+               fprintf(stderr, "[fai_build] fail to open the FASTA file.\n");
+               free(str);
+               return -1;
+       }
        fai = fai_build_core(rz);
        razf_close(rz);
-       fp = fopen(str, "w");
-       assert(fp);
+       fp = fopen(str, "wb");
+       if (fp == 0) {
+               fprintf(stderr, "[fai_build] fail to write FASTA index.\n");
+               fai_destroy(fai); free(str);
+               return -1;
+       }
        fai_save(fai, fp);
        fclose(fp);
        free(str);
        fai_destroy(fai);
+       return 0;
 }
 
 faidx_t *fai_load(const char *fn)
@@ -182,22 +220,25 @@ faidx_t *fai_load(const char *fn)
        faidx_t *fai;
        str = (char*)calloc(strlen(fn) + 5, 1);
        sprintf(str, "%s.fai", fn);
-       fp = fopen(str, "r");
+       fp = fopen(str, "rb");
        if (fp == 0) {
                fprintf(stderr, "[fai_load] build FASTA index.\n");
                fai_build(fn);
                fp = fopen(str, "r");
                if (fp == 0) {
+                       fprintf(stderr, "[fai_load] fail to open FASTA index.\n");
                        free(str);
                        return 0;
                }
        }
        fai = fai_read(fp);
        fclose(fp);
-       fai->rz = razf_open(fn, "r");
-       if (fai->rz == 0) return 0;
-       assert(fai->rz);
+       fai->rz = razf_open(fn, "rb");
        free(str);
+       if (fai->rz == 0) {
+               fprintf(stderr, "[fai_load] fail to open FASTA file.\n");
+               return 0;
+       }
        return fai;
 }
 
@@ -265,7 +306,7 @@ int faidx_main(int argc, char *argv[])
                        char *s;
                        faidx_t *fai;
                        fai = fai_load(argv[1]);
-                       assert(fai);
+                       if (fai == 0) return 1;
                        for (i = 2; i != argc; ++i) {
                                printf(">%s\n", argv[i]);
                                s = fai_fetch(fai, argv[i], &l);