X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=faidx.c;h=055445f266cbdb429fe65c2aeb58c763af86d1d9;hb=c18ba0d59383e9bc2cf7d36c227617b03faeab25;hp=9302dfb4d6480bb94295d65e7fc7fcf4c4644332;hpb=635998cfe030da5f3dbec42a6daa3ca82fa5c871;p=samtools.git diff --git a/faidx.c b/faidx.c index 9302dfb..055445f 100644 --- a/faidx.c +++ b/faidx.c @@ -1,5 +1,4 @@ #include -#include #include #include #include @@ -15,8 +14,13 @@ KHASH_MAP_INIT_STR(s, faidx1_t) #ifndef _NO_RAZF #include "razf.h" #else +#ifdef _WIN32 +#define ftello(fp) ftell(fp) +#define fseeko(fp, offset, whence) fseek(fp, offset, whence) +#else extern off_t ftello(FILE *stream); extern int fseeko(FILE *stream, off_t offset, int whence); +#endif #define RAZF FILE #define razf_read(fp, buf, size) fread(buf, 1, size, fp) #define razf_open(fn, mode) fopen(fn, mode) @@ -66,6 +70,12 @@ faidx_t *fai_build_core(RAZF *rz) name = 0; l_name = m_name = 0; len = line_len = line_blen = -1; state = 0; l1 = l2 = -1; offset = 0; while (razf_read(rz, &c, 1)) { + if (c == '\n') { // an empty line + if (state == 1) { + offset = razf_tell(rz); + continue; + } else if ((state == 0 && len < 0) || state == 2) continue; + } if (c == '>') { // fasta header if (len >= 0) fai_insert_index(idx, name, len, line_len, line_blen, offset); @@ -79,14 +89,19 @@ faidx_t *fai_build_core(RAZF *rz) name[l_name++] = c; } name[l_name] = '\0'; - assert(ret); + if (ret == 0) { + fprintf(stderr, "[fai_build_core] the last entry has no sequence\n"); + free(name); fai_destroy(idx); + return 0; + } if (c != '\n') while (razf_read(rz, &c, 1) && c != '\n'); state = 1; len = 0; offset = razf_tell(rz); } else { if (state == 3) { - fprintf(stderr, "[fai_build_core] inlined empty line is not allowed in sequence '%s'. Abort!\n", name); - exit(1); + fprintf(stderr, "[fai_build_core] inlined empty line is not allowed in sequence '%s'.\n", name); + free(name); fai_destroy(idx); + return 0; } if (state == 2) state = 3; l1 = l2 = 0; @@ -95,13 +110,15 @@ faidx_t *fai_build_core(RAZF *rz) if (isgraph(c)) ++l2; } while ((ret = razf_read(rz, &c, 1)) && c != '\n'); if (state == 3 && l2) { - fprintf(stderr, "[fai_build_core] different line length in sequence '%s'. Abort!\n", name); - exit(1); + fprintf(stderr, "[fai_build_core] different line length in sequence '%s'.\n", name); + free(name); fai_destroy(idx); + return 0; } ++l1; len += l2; if (l2 >= 0x10000) { - fprintf(stderr, "[fai_build_core] line length exceeds 65535 in sequence '%s'. Abort!\n", name); - exit(1); + fprintf(stderr, "[fai_build_core] line length exceeds 65535 in sequence '%s'.\n", name); + free(name); fai_destroy(idx); + return 0; } if (state == 1) line_len = l1, line_blen = l2, state = 0; else if (state == 0) { @@ -122,7 +139,11 @@ void fai_save(const faidx_t *fai, FILE *fp) faidx1_t x; k = kh_get(s, fai->hash, fai->name[i]); x = kh_value(fai->hash, k); +#ifdef _WIN32 + fprintf(fp, "%s\t%d\t%ld\t%d\t%d\n", fai->name[i], (int)x.len, (long)x.offset, (int)x.line_blen, (int)x.line_len); +#else fprintf(fp, "%s\t%d\t%lld\t%d\t%d\n", fai->name[i], (int)x.len, (long long)x.offset, (int)x.line_blen, (int)x.line_len); +#endif } } @@ -131,14 +152,22 @@ faidx_t *fai_read(FILE *fp) faidx_t *fai; char *buf, *p; int len, line_len, line_blen; +#ifdef _WIN32 + long offset; +#else long long offset; +#endif fai = (faidx_t*)calloc(1, sizeof(faidx_t)); fai->hash = kh_init(s); buf = (char*)calloc(0x10000, 1); while (!feof(fp) && fgets(buf, 0x10000, fp)) { for (p = buf; *p && isgraph(*p); ++p); *p = 0; ++p; +#ifdef _WIN32 + sscanf(p, "%d%ld%d%d", &len, &offset, &line_blen, &line_len); +#else sscanf(p, "%d%lld%d%d", &len, &offset, &line_blen, &line_len); +#endif fai_insert_index(fai, buf, len, line_len, line_blen, offset); } free(buf); @@ -155,7 +184,7 @@ void fai_destroy(faidx_t *fai) free(fai); } -void fai_build(const char *fn) +int fai_build(const char *fn) { char *str; RAZF *rz; @@ -164,15 +193,24 @@ void fai_build(const char *fn) str = (char*)calloc(strlen(fn) + 5, 1); sprintf(str, "%s.fai", fn); rz = razf_open(fn, "r"); - assert(rz); + if (rz == 0) { + fprintf(stderr, "[fai_build] fail to open the FASTA file.\n"); + free(str); + return -1; + } fai = fai_build_core(rz); razf_close(rz); - fp = fopen(str, "w"); - assert(fp); + fp = fopen(str, "wb"); + if (fp == 0) { + fprintf(stderr, "[fai_build] fail to write FASTA index.\n"); + fai_destroy(fai); free(str); + return -1; + } fai_save(fai, fp); fclose(fp); free(str); fai_destroy(fai); + return 0; } faidx_t *fai_load(const char *fn) @@ -182,22 +220,25 @@ faidx_t *fai_load(const char *fn) faidx_t *fai; str = (char*)calloc(strlen(fn) + 5, 1); sprintf(str, "%s.fai", fn); - fp = fopen(str, "r"); + fp = fopen(str, "rb"); if (fp == 0) { fprintf(stderr, "[fai_load] build FASTA index.\n"); fai_build(fn); fp = fopen(str, "r"); if (fp == 0) { + fprintf(stderr, "[fai_load] fail to open FASTA index.\n"); free(str); return 0; } } fai = fai_read(fp); fclose(fp); - fai->rz = razf_open(fn, "r"); - if (fai->rz == 0) return 0; - assert(fai->rz); + fai->rz = razf_open(fn, "rb"); free(str); + if (fai->rz == 0) { + fprintf(stderr, "[fai_load] fail to open FASTA file.\n"); + return 0; + } return fai; } @@ -265,7 +306,7 @@ int faidx_main(int argc, char *argv[]) char *s; faidx_t *fai; fai = fai_load(argv[1]); - assert(fai); + if (fai == 0) return 1; for (i = 2; i != argc; ++i) { printf(">%s\n", argv[i]); s = fai_fetch(fai, argv[i], &l);