#define razf_seek(fp, offset, whence) fseeko(fp, offset, whence)
#define razf_tell(fp) ftello(fp)
#endif
+#ifdef _USE_KNETFILE
+#include "knetfile.h"
+#endif
struct __faidx_t {
RAZF *rz;
sprintf(str, "%s.fai", fn);
rz = razf_open(fn, "r");
if (rz == 0) {
- fprintf(stderr, "[fai_build] fail to open the FASTA file.\n");
+ fprintf(stderr, "[fai_build] fail to open the FASTA file %s\n",str);
free(str);
return -1;
}
razf_close(rz);
fp = fopen(str, "wb");
if (fp == 0) {
- fprintf(stderr, "[fai_build] fail to write FASTA index.\n");
+ fprintf(stderr, "[fai_build] fail to write FASTA index %s\n",str);
fai_destroy(fai); free(str);
return -1;
}
return 0;
}
+#ifdef _USE_KNETFILE
+FILE *download_and_open(const char *fn)
+{
+ const int buf_size = 1 * 1024 * 1024;
+ uint8_t *buf;
+ FILE *fp;
+ knetFile *fp_remote;
+ const char *url = fn;
+ const char *p;
+ int l = strlen(fn);
+ for (p = fn + l - 1; p >= fn; --p)
+ if (*p == '/') break;
+ fn = p + 1;
+
+ // First try to open a local copy
+ fp = fopen(fn, "r");
+ if (fp)
+ return fp;
+
+ // If failed, download from remote and open
+ fp_remote = knet_open(url, "rb");
+ if (fp_remote == 0) {
+ fprintf(stderr, "[download_from_remote] fail to open remote file %s\n",url);
+ return NULL;
+ }
+ if ((fp = fopen(fn, "wb")) == 0) {
+ fprintf(stderr, "[download_from_remote] fail to create file in the working directory %s\n",fn);
+ knet_close(fp_remote);
+ return NULL;
+ }
+ buf = (uint8_t*)calloc(buf_size, 1);
+ while ((l = knet_read(fp_remote, buf, buf_size)) != 0)
+ fwrite(buf, 1, l, fp);
+ free(buf);
+ fclose(fp);
+ knet_close(fp_remote);
+
+ return fopen(fn, "r");
+}
+#endif
+
faidx_t *fai_load(const char *fn)
{
char *str;
faidx_t *fai;
str = (char*)calloc(strlen(fn) + 5, 1);
sprintf(str, "%s.fai", fn);
- fp = fopen(str, "rb");
+
+#ifdef _USE_KNETFILE
+ if (strstr(fn, "ftp://") == fn || strstr(fn, "http://") == fn)
+ {
+ fp = download_and_open(str);
+ if ( !fp )
+ {
+ fprintf(stderr, "[fai_load] failed to open remote FASTA index %s\n", str);
+ free(str);
+ return 0;
+ }
+ }
+ else
+#endif
+ fp = fopen(str, "rb");
if (fp == 0) {
fprintf(stderr, "[fai_load] build FASTA index.\n");
fai_build(fn);
- fp = fopen(str, "r");
+ fp = fopen(str, "rb");
if (fp == 0) {
fprintf(stderr, "[fai_load] fail to open FASTA index.\n");
free(str);
return 0;
}
}
+
fai = fai_read(fp);
fclose(fp);
+
fai->rz = razf_open(fn, "rb");
free(str);
if (fai->rz == 0) {
l = 0;
s = (char*)malloc(end - beg + 2);
razf_seek(fai->rz, val.offset + beg / val.line_blen * val.line_len + beg % val.line_blen, SEEK_SET);
- while (razf_read(fai->rz, &c, 1) == 1 && l < end - beg)
+ while (razf_read(fai->rz, &c, 1) == 1 && l < end - beg && !fai->rz->z_err)
if (isgraph(c)) s[l++] = c;
s[l] = '\0';
*len = l;
return kftp_connect(ftp);
}
-// initialize ->type, ->host and ->retr
+// initialize ->type, ->host, ->retr and ->size
knetFile *kftp_parse_url(const char *fn, const char *mode)
{
knetFile *fp;
strncpy(fp->host, fn + 6, l);
fp->retr = calloc(strlen(p) + 8, 1);
sprintf(fp->retr, "RETR %s\r\n", p);
- fp->seek_offset = -1;
+ fp->size_cmd = calloc(strlen(p) + 8, 1);
+ sprintf(fp->size_cmd, "SIZE %s\r\n", p);
+ fp->seek_offset = 0;
return fp;
}
// place ->fd at offset off
int kftp_connect_file(knetFile *fp)
{
int ret;
+ long long file_size;
if (fp->fd != -1) {
netclose(fp->fd);
if (fp->no_reconnect) kftp_get_response(fp);
}
kftp_pasv_prep(fp);
- if (fp->offset) {
+ kftp_send_cmd(fp, fp->size_cmd, 1);
+ if ( sscanf(fp->response,"%*d %lld", &file_size) != 1 )
+ {
+ fprintf(stderr,"[kftp_connect_file] %s\n", fp->response);
+ return -1;
+ } else fp->file_size = file_size;
+ if (fp->offset>=0) {
char tmp[32];
#ifndef _WIN32
sprintf(tmp, "REST %lld\r\n", (long long)fp->offset);
return 0;
}
+
/**************************
* HTTP specific routines *
**************************/
}
fp->type = KNF_TYPE_HTTP;
fp->ctrl_fd = fp->fd = -1;
- fp->seek_offset = -1;
+ fp->seek_offset = 0;
return fp;
}
fp->fd = socket_connect(fp->host, fp->port);
buf = calloc(0x10000, 1); // FIXME: I am lazy... But in principle, 64KB should be large enough.
l += sprintf(buf + l, "GET %s HTTP/1.0\r\nHost: %s\r\n", fp->path, fp->http_host);
- if (fp->offset)
- l += sprintf(buf + l, "Range: bytes=%lld-\r\n", (long long)fp->offset);
+ l += sprintf(buf + l, "Range: bytes=%lld-\r\n", (long long)fp->offset);
l += sprintf(buf + l, "\r\n");
netwrite(fp->fd, buf, l);
l = 0;
return -1;
}
ret = strtol(buf + 8, &p, 0); // HTTP return code
- if (ret == 200 && fp->offset) { // 200 (complete result); then skip beginning of the file
+ if (ret == 200 && fp->offset>0) { // 200 (complete result); then skip beginning of the file
off_t rest = fp->offset;
while (rest) {
off_t l = rest < 0x10000? rest : 0x10000;
return l;
}
-int knet_seek(knetFile *fp, off_t off, int whence)
+off_t knet_seek(knetFile *fp, off_t off, int whence)
{
if (whence == SEEK_SET && off == fp->offset) return 0;
if (fp->type == KNF_TYPE_LOCAL) {
return -1;
}
fp->offset = offset;
- return 0;
- } else if (fp->type == KNF_TYPE_FTP || fp->type == KNF_TYPE_HTTP) {
- if (whence != SEEK_SET) { // FIXME: we can surely allow SEEK_CUR and SEEK_END in future
- fprintf(stderr, "[knet_seek] only SEEK_SET is supported for FTP/HTTP. Offset is unchanged.\n");
+ return fp->offset;
+ }
+ else if (fp->type == KNF_TYPE_FTP)
+ {
+ if (whence==SEEK_CUR)
+ fp->offset += off;
+ else if (whence==SEEK_SET)
+ fp->offset = off;
+ else if ( whence==SEEK_END)
+ fp->offset = fp->file_size+off;
+ fp->is_ready = 0;
+ return fp->offset;
+ }
+ else if (fp->type == KNF_TYPE_HTTP)
+ {
+ if (whence == SEEK_END) { // FIXME: we can surely allow SEEK_CUR and SEEK_END in future
+ fprintf(stderr, "[knet_seek] SEEK_END is supported for HTTP. Offset is unchanged.\n");
return -1;
}
- fp->offset = off;
+ if (whence==SEEK_CUR)
+ fp->offset += off;
+ else if (whence==SEEK_SET)
+ fp->offset = off;
fp->is_ready = 0;
- return 0;
+ return fp->offset;
}
return -1;
}
#include <unistd.h>
#include "razf.h"
+
#if ZLIB_VERNUM < 0x1221
struct _gz_header_s {
int text;
}
#endif
+#ifdef _USE_KNETFILE
+static void load_zindex(RAZF *rz, knetFile *fp){
+#else
static void load_zindex(RAZF *rz, int fd){
+#endif
int32_t i, v32;
int is_be;
if(!rz->load_index) return;
if(rz->index == NULL) rz->index = malloc(sizeof(ZBlockIndex));
is_be = is_big_endian();
+#ifdef _USE_KNETFILE
+ knet_read(fp, &rz->index->size, sizeof(int));
+#else
read(fd, &rz->index->size, sizeof(int));
+#endif
if(!is_be) rz->index->size = byte_swap_4((uint32_t)rz->index->size);
rz->index->cap = rz->index->size;
v32 = rz->index->size / RZ_BIN_SIZE + 1;
rz->index->bin_offsets = malloc(sizeof(int64_t) * v32);
+#ifdef _USE_KNETFILE
+ knet_read(fp, rz->index->bin_offsets, sizeof(int64_t) * v32);
+#else
read(fd, rz->index->bin_offsets, sizeof(int64_t) * v32);
+#endif
rz->index->cell_offsets = malloc(sizeof(int) * rz->index->size);
+#ifdef _USE_KNETFILE
+ knet_read(fp, rz->index->cell_offsets, sizeof(int) * rz->index->size);
+#else
read(fd, rz->index->cell_offsets, sizeof(int) * rz->index->size);
+#endif
if(!is_be){
for(i=0;i<v32;i++) rz->index->bin_offsets[i] = byte_swap_8((uint64_t)rz->index->bin_offsets[i]);
for(i=0;i<rz->index->size;i++) rz->index->cell_offsets[i] = byte_swap_4((uint32_t)rz->index->cell_offsets[i]);
#endif
rz = calloc(1, sizeof(RAZF));
rz->mode = 'w';
+#ifdef _USE_KNETFILE
+ rz->x.fpw = fd;
+#else
rz->filedes = fd;
+#endif
rz->stream = calloc(sizeof(z_stream), 1);
rz->inbuf = malloc(RZ_BUFFER_SIZE);
rz->outbuf = malloc(RZ_BUFFER_SIZE);
deflate(rz->stream, Z_NO_FLUSH);
rz->out += tout - rz->stream->avail_out;
if(rz->stream->avail_out) break;
+#ifdef _USE_KNETFILE
+ write(rz->x.fpw, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
+#else
write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
+#endif
rz->stream->avail_out = RZ_BUFFER_SIZE;
rz->stream->next_out = rz->outbuf;
if(rz->stream->avail_in == 0) break;
rz->buf_off = rz->buf_len = 0;
}
if(rz->stream->avail_out){
+#ifdef _USE_KNETFILE
+ write(rz->x.fpw, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
+#else
write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
+#endif
rz->stream->avail_out = RZ_BUFFER_SIZE;
rz->stream->next_out = rz->outbuf;
}
deflate(rz->stream, Z_FULL_FLUSH);
rz->out += tout - rz->stream->avail_out;
if(rz->stream->avail_out == 0){
+#ifdef _USE_KNETFILE
+ write(rz->x.fpw, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
+#else
write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
+#endif
rz->stream->avail_out = RZ_BUFFER_SIZE;
rz->stream->next_out = rz->outbuf;
} else break;
deflate(rz->stream, Z_FINISH);
rz->out += tout - rz->stream->avail_out;
if(rz->stream->avail_out < RZ_BUFFER_SIZE){
+#ifdef _USE_KNETFILE
+ write(rz->x.fpw, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
+#else
write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
+#endif
rz->stream->avail_out = RZ_BUFFER_SIZE;
rz->stream->next_out = rz->outbuf;
} else break;
return n;
}
+#ifdef _USE_KNETFILE
+static RAZF* razf_open_r(knetFile *fp, int _load_index){
+#else
static RAZF* razf_open_r(int fd, int _load_index){
+#endif
RAZF *rz;
int ext_off, ext_len;
int n, is_be, ret;
int64_t end;
unsigned char c[] = "RAZF";
+ rz = calloc(1, sizeof(RAZF));
+ rz->mode = 'r';
+#ifdef _USE_KNETFILE
+ rz->x.fpr = fp;
+#else
#ifdef _WIN32
setmode(fd, O_BINARY);
#endif
- rz = calloc(1, sizeof(RAZF));
- rz->mode = 'r';
rz->filedes = fd;
+#endif
rz->stream = calloc(sizeof(z_stream), 1);
rz->inbuf = malloc(RZ_BUFFER_SIZE);
rz->outbuf = malloc(RZ_BUFFER_SIZE);
rz->end = rz->src_end = 0x7FFFFFFFFFFFFFFFLL;
+#ifdef _USE_KNETFILE
+ n = knet_read(rz->x.fpr, rz->inbuf, RZ_BUFFER_SIZE);
+#else
n = read(rz->filedes, rz->inbuf, RZ_BUFFER_SIZE);
+#endif
ret = _read_gz_header(rz->inbuf, n, &ext_off, &ext_len);
if(ret == 0){
PLAIN_FILE:
}
rz->load_index = _load_index;
rz->file_type = FILE_TYPE_RZ;
+#ifdef _USE_KNETFILE
+ if(knet_seek(fp, -16, SEEK_END) == -1){
+#else
if(lseek(fd, -16, SEEK_END) == -1){
+#endif
UNSEEKABLE:
rz->seekable = 0;
rz->index = NULL;
} else {
is_be = is_big_endian();
rz->seekable = 1;
+#ifdef _USE_KNETFILE
+ knet_read(fp, &end, sizeof(int64_t));
+#else
read(fd, &end, sizeof(int64_t));
+#endif
if(!is_be) rz->src_end = (int64_t)byte_swap_8((uint64_t)end);
else rz->src_end = end;
+
+#ifdef _USE_KNETFILE
+ knet_read(fp, &end, sizeof(int64_t));
+#else
read(fd, &end, sizeof(int64_t));
+#endif
if(!is_be) rz->end = (int64_t)byte_swap_8((uint64_t)end);
else rz->end = end;
if(n > rz->end){
n = rz->end;
}
if(rz->end > rz->src_end){
+#ifdef _USE_KNETFILE
+ knet_seek(fp, rz->in, SEEK_SET);
+#else
lseek(fd, rz->in, SEEK_SET);
+#endif
goto UNSEEKABLE;
}
+#ifdef _USE_KNETFILE
+ if(knet_seek(fp, rz->end, SEEK_SET) != rz->end){
+ knet_seek(fp, rz->in, SEEK_SET);
+#else
if(lseek(fd, rz->end, SEEK_SET) != rz->end){
lseek(fd, rz->in, SEEK_SET);
+#endif
goto UNSEEKABLE;
}
+#ifdef _USE_KNETFILE
+ load_zindex(rz, fp);
+ knet_seek(fp, n, SEEK_SET);
+#else
load_zindex(rz, fd);
lseek(fd, n, SEEK_SET);
+#endif
}
return rz;
}
+#ifdef _USE_KNETFILE
+RAZF* razf_dopen(int fd, const char *mode){
+ if (strstr(mode, "r")) fprintf(stderr,"[razf_dopen] implement me\n");
+ else if(strstr(mode, "w")) return razf_open_w(fd);
+ return NULL;
+}
+
+RAZF* razf_dopen2(int fd, const char *mode)
+{
+ fprintf(stderr,"[razf_dopen2] implement me\n");
+ return NULL;
+}
+#else
RAZF* razf_dopen(int fd, const char *mode){
if(strstr(mode, "r")) return razf_open_r(fd, 1);
else if(strstr(mode, "w")) return razf_open_w(fd);
else if(strstr(mode, "w")) return razf_open_w(fd);
else return NULL;
}
+#endif
static inline RAZF* _razf_open(const char *filename, const char *mode, int _load_index){
int fd;
RAZF *rz;
if(strstr(mode, "r")){
+#ifdef _USE_KNETFILE
+ knetFile *fd = knet_open(filename, "r");
+ if (fd == 0) {
+ fprintf(stderr, "[_razf_open] fail to open remote file %s\n", filename);
+ return NULL;
+ }
+#else
#ifdef _WIN32
fd = open(filename, O_RDONLY | O_BINARY);
#else
fd = open(filename, O_RDONLY);
+#endif
#endif
if(fd < 0) return NULL;
rz = razf_open_r(fd, _load_index);
switch(rz->file_type){
case FILE_TYPE_PLAIN:
if(rz->end == 0x7fffffffffffffffLL){
+#ifdef _USE_KNETFILE
+ if((n = knet_seek(rz->x.fpr, 0, SEEK_CUR)) == -1) return 0;
+ rz->end = knet_seek(rz->x.fpr, 0, SEEK_END);
+ knet_seek(rz->x.fpr, n, SEEK_SET);
+#else
if((n = lseek(rz->filedes, 0, SEEK_CUR)) == -1) return 0;
rz->end = lseek(rz->filedes, 0, SEEK_END);
lseek(rz->filedes, n, SEEK_SET);
+#endif
}
*u_size = *c_size = rz->end;
return 1;
int ret, tin;
if(rz->z_eof || rz->z_err) return 0;
if (rz->file_type == FILE_TYPE_PLAIN) {
+#ifdef _USE_KNETFILE
+ ret = knet_read(rz->x.fpr, data, size);
+#else
ret = read(rz->filedes, data, size);
+#endif
if (ret == 0) rz->z_eof = 1;
return ret;
}
if(rz->stream->avail_in == 0){
if(rz->in >= rz->end){ rz->z_eof = 1; break; }
if(rz->end - rz->in < RZ_BUFFER_SIZE){
+#ifdef _USE_KNETFILE
+ rz->stream->avail_in = knet_read(rz->x.fpr, rz->inbuf, rz->end -rz->in);
+#else
rz->stream->avail_in = read(rz->filedes, rz->inbuf, rz->end -rz->in);
+#endif
} else {
+#ifdef _USE_KNETFILE
+ rz->stream->avail_in = knet_read(rz->x.fpr, rz->inbuf, RZ_BUFFER_SIZE);
+#else
rz->stream->avail_in = read(rz->filedes, rz->inbuf, RZ_BUFFER_SIZE);
+#endif
}
if(rz->stream->avail_in == 0){
rz->z_eof = 1;
ret = inflate(rz->stream, Z_BLOCK);
rz->in += tin - rz->stream->avail_in;
if(ret == Z_NEED_DICT || ret == Z_MEM_ERROR || ret == Z_DATA_ERROR){
- fprintf(stderr, "[_razf_read] inflate error: %d (at %s:%d)\n", ret, __FILE__, __LINE__);
+ fprintf(stderr, "[_razf_read] inflate error: %d %s (at %s:%d)\n", ret, rz->stream->msg ? rz->stream->msg : "", __FILE__, __LINE__);
rz->z_err = 1;
break;
}
}
if(rz->buf_flush) continue;
rz->buf_len = _razf_read(rz, rz->outbuf, RZ_BUFFER_SIZE);
- if(rz->z_eof) break;
+ if(rz->z_eof || rz->z_err) break;
}
rz->out += ori_size - size;
return ori_size - size;
}
static void _razf_reset_read(RAZF *rz, int64_t in, int64_t out){
+#ifdef _USE_KNETFILE
+ knet_seek(rz->x.fpr, in, SEEK_SET);
+#else
lseek(rz->filedes, in, SEEK_SET);
+#endif
rz->in = in;
rz->out = out;
rz->block_pos = in;
if(rz->file_type == FILE_TYPE_PLAIN){
rz->buf_off = rz->buf_len = 0;
pos = block_start + block_offset;
+#ifdef _USE_KNETFILE
+ pos = knet_seek(rz->x.fpr, pos, SEEK_SET);
+#else
pos = lseek(rz->filedes, pos, SEEK_SET);
+#endif
rz->out = rz->in = pos;
return pos;
}
if (where == SEEK_CUR) pos += rz->out;
else if (where == SEEK_END) pos += rz->src_end;
if(rz->file_type == FILE_TYPE_PLAIN){
+#ifdef _USE_KNETFILE
+ seek_pos = knet_seek(rz->x.fpr, pos, SEEK_SET);
+#else
seek_pos = lseek(rz->filedes, pos, SEEK_SET);
+#endif
rz->buf_off = rz->buf_len = 0;
rz->out = rz->in = seek_pos;
return seek_pos;
#ifndef _RZ_READONLY
razf_end_flush(rz);
deflateEnd(rz->stream);
+#ifdef _USE_KNETFILE
+ save_zindex(rz, rz->x.fpw);
+ if(is_big_endian()){
+ write(rz->x.fpw, &rz->in, sizeof(int64_t));
+ write(rz->x.fpw, &rz->out, sizeof(int64_t));
+ } else {
+ uint64_t v64 = byte_swap_8((uint64_t)rz->in);
+ write(rz->x.fpw, &v64, sizeof(int64_t));
+ v64 = byte_swap_8((uint64_t)rz->out);
+ write(rz->x.fpw, &v64, sizeof(int64_t));
+ }
+#else
save_zindex(rz, rz->filedes);
if(is_big_endian()){
write(rz->filedes, &rz->in, sizeof(int64_t));
v64 = byte_swap_8((uint64_t)rz->out);
write(rz->filedes, &v64, sizeof(int64_t));
}
+#endif
#endif
} else if(rz->mode == 'r'){
if(rz->stream) inflateEnd(rz->stream);
free(rz->index);
}
free(rz->stream);
+#ifdef _USE_KNETFILE
+ if (rz->mode == 'r')
+ knet_close(rz->x.fpr);
+ if (rz->mode == 'w')
+ close(rz->x.fpw);
+#else
close(rz->filedes);
+#endif
free(rz);
}