13 #include <arpa/inet.h>
14 #include <sys/socket.h>
19 static int socket_wait(int fd, int is_read)
21 fd_set fds, *fdr = 0, *fdw = 0;
24 tv.tv_sec = 5; tv.tv_usec = 0; // 5 seconds time out
27 if (is_read) fdr = &fds;
29 ret = select(fd+1, fdr, fdw, 0, &tv);
30 if (ret == -1) perror("select");
35 static int socket_connect(const char *host, const char *port)
37 #define __err_connect(func) do { perror(func); freeaddrinfo(res); return -1; } while (0)
40 struct linger lng = { 0, 0 };
41 struct addrinfo hints, *res;
42 memset(&hints, 0, sizeof(struct addrinfo));
43 hints.ai_family = AF_UNSPEC;
44 hints.ai_socktype = SOCK_STREAM;
45 if (getaddrinfo(host, port, &hints, &res) != 0) __err_connect("getaddrinfo");
46 if ((fd = socket(res->ai_family, res->ai_socktype, res->ai_protocol)) == -1) __err_connect("socket");
47 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) == -1) __err_connect("setsockopt");
48 if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &lng, sizeof(lng)) == -1) __err_connect("setsockopt");
49 if (connect(fd, res->ai_addr, res->ai_addrlen) != 0) __err_connect("connect");
57 return WSAStartup(MAKEWORD(2, 2), &wsaData);
59 void knet_win32_destroy()
63 static SOCKET socket_connect(const char *host, const char *port)
65 #define __err_connect(func) do { perror(func); return -1; } while (0)
69 struct linger lng = { 0, 0 };
70 struct sockaddr_in server;
71 struct hostent *hp = 0;
73 if ((fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == INVALID_SOCKET) __err_connect("socket");
74 if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char*)&on, sizeof(on)) == -1) __err_connect("setsockopt");
75 if (setsockopt(fd, SOL_SOCKET, SO_LINGER, (char*)&lng, sizeof(lng)) == -1) __err_connect("setsockopt");
77 if (isalpha(host[0])) hp = gethostbyname(host);
80 addr.s_addr = inet_addr(host);
81 hp = gethostbyaddr((char*)&addr, 4, AF_INET);
83 if (hp == 0) __err_connect("gethost");
85 server.sin_addr.s_addr = *((unsigned long*)hp->h_addr);
86 server.sin_family= AF_INET;
87 server.sin_port = htons(atoi(port));
88 if (connect(fd, (struct sockaddr*)&server, sizeof(server)) != 0) __err_connect("connect");
89 // freehostent(hp); // strangely in MSDN, hp is NOT freed (memory leak?!)
94 static off_t my_netread(int fd, void *buf, off_t len)
96 off_t rest = len, curr, l = 0;
98 if (socket_wait(fd, 1) <= 0) break; // socket is not ready for reading
99 curr = netread(fd, buf + l, rest);
100 if (curr == 0) break;
101 l += curr; rest -= curr;
106 /*************************
107 * FTP specific routines *
108 *************************/
110 static int kftp_get_response(knetFile *ftp)
115 if (socket_wait(ftp->ctrl_fd, 1) <= 0) return 0;
116 while (netread(ftp->ctrl_fd, &c, 1)) { // FIXME: this is *VERY BAD* for unbuffered I/O
118 if (n >= ftp->max_response) {
119 ftp->max_response = ftp->max_response? ftp->max_response<<1 : 256;
120 ftp->response = realloc(ftp->response, ftp->max_response);
122 ftp->response[n++] = c;
124 if (n >= 4 && isdigit(ftp->response[0]) && isdigit(ftp->response[1]) && isdigit(ftp->response[2])
125 && ftp->response[3] != '-') break;
130 if (n < 2) return -1;
131 ftp->response[n-2] = 0;
132 return strtol(ftp->response, &p, 0);
135 static int kftp_send_cmd(knetFile *ftp, const char *cmd, int is_get)
137 if (socket_wait(ftp->ctrl_fd, 0) <= 0) return -1; // socket is not ready for writing
138 netwrite(ftp->ctrl_fd, cmd, strlen(cmd));
139 return is_get? kftp_get_response(ftp) : 0;
142 static int kftp_pasv_prep(knetFile *ftp)
146 kftp_send_cmd(ftp, "PASV\r\n", 1);
147 for (p = ftp->response; *p && *p != '('; ++p);
148 if (*p != '(') return -1;
150 sscanf(p, "%d,%d,%d,%d,%d,%d", &v[0], &v[1], &v[2], &v[3], &v[4], &v[5]);
151 memcpy(ftp->pasv_ip, v, 4 * sizeof(int));
152 ftp->pasv_port = (v[4]<<8&0xff00) + v[5];
157 static int kftp_pasv_connect(knetFile *ftp)
159 char host[80], port[10];
160 if (ftp->pasv_port == 0) {
161 fprintf(stderr, "[kftp_pasv_connect] kftp_pasv_prep() is not called before hand.\n");
164 sprintf(host, "%d.%d.%d.%d", ftp->pasv_ip[0], ftp->pasv_ip[1], ftp->pasv_ip[2], ftp->pasv_ip[3]);
165 sprintf(port, "%d", ftp->pasv_port);
166 ftp->fd = socket_connect(host, port);
167 if (ftp->fd == -1) return -1;
171 int kftp_connect(knetFile *ftp)
173 ftp->ctrl_fd = socket_connect(ftp->host, ftp->port);
174 if (ftp->ctrl_fd == -1) return -1;
175 kftp_get_response(ftp);
176 kftp_send_cmd(ftp, "USER anonymous\r\n", 1);
177 kftp_send_cmd(ftp, "PASS kftp@\r\n", 1);
178 kftp_send_cmd(ftp, "TYPE I\r\n", 1);
182 int kftp_reconnect(knetFile *ftp)
184 if (ftp->ctrl_fd != -1) {
185 netclose(ftp->ctrl_fd);
189 return kftp_connect(ftp);
192 // initialize ->type, ->host and ->retr
193 knetFile *kftp_parse_url(const char *fn, const char *mode)
198 if (strstr(fn, "ftp://") != fn) return 0;
199 for (p = (char*)fn + 6; *p && *p != '/'; ++p);
200 if (*p != '/') return 0;
202 fp = calloc(1, sizeof(knetFile));
203 fp->type = KNF_TYPE_FTP;
205 fp->port = strdup("21");
206 fp->host = calloc(l + 1, 1);
207 if (strchr(mode, 'c')) fp->no_reconnect = 1;
208 strncpy(fp->host, fn + 6, l);
209 fp->retr = calloc(strlen(p) + 8, 1);
210 sprintf(fp->retr, "RETR %s\r\n", p);
211 fp->seek_offset = -1;
214 // place ->fd at offset off
215 int kftp_connect_file(knetFile *fp)
220 if (fp->no_reconnect) kftp_get_response(fp);
225 sprintf(tmp, "REST %lld\r\n", (long long)fp->offset);
226 kftp_send_cmd(fp, tmp, 1);
228 kftp_send_cmd(fp, fp->retr, 0);
229 kftp_pasv_connect(fp);
230 ret = kftp_get_response(fp);
232 fprintf(stderr, "[kftp_connect_file] %s\n", fp->response);
241 /**************************
242 * HTTP specific routines *
243 **************************/
245 knetFile *khttp_parse_url(const char *fn, const char *mode)
250 if (strstr(fn, "http://") != fn) return 0;
252 for (p = (char*)fn + 7; *p && *p != '/'; ++p);
254 fp = calloc(1, sizeof(knetFile));
255 fp->http_host = calloc(l + 1, 1);
256 strncpy(fp->http_host, fn + 7, l);
257 fp->http_host[l] = 0;
258 for (q = fp->http_host; *q && *q != ':'; ++q);
259 if (*q == ':') *q++ = 0;
261 proxy = getenv("http_proxy");
262 // set ->host, ->port and ->path
264 fp->host = strdup(fp->http_host); // when there is no proxy, server name is identical to http_host name.
265 fp->port = strdup(*q? q : "80");
266 fp->path = strdup(*p? p : "/");
268 fp->host = (strstr(proxy, "http://") == proxy)? strdup(proxy + 7) : strdup(proxy);
269 for (q = fp->host; *q && *q != ':'; ++q);
270 if (*q == ':') *q++ = 0;
271 fp->port = strdup(*q? q : "80");
272 fp->path = strdup(fn);
274 fp->type = KNF_TYPE_HTTP;
275 fp->ctrl_fd = fp->fd = -1;
276 fp->seek_offset = -1;
280 int khttp_connect_file(knetFile *fp)
284 if (fp->fd != -1) netclose(fp->fd);
285 fp->fd = socket_connect(fp->host, fp->port);
286 buf = calloc(0x10000, 1); // FIXME: I am lazy... But in principle, 64KB should be large enough.
287 l += sprintf(buf + l, "GET %s HTTP/1.0\r\nHost: %s\r\n", fp->path, fp->http_host);
289 l += sprintf(buf + l, "Range: bytes=%lld-\r\n", (long long)fp->offset);
290 l += sprintf(buf + l, "\r\n");
291 netwrite(fp->fd, buf, l);
293 while (netread(fp->fd, buf + l, 1)) { // read HTTP header; FIXME: bad efficiency
294 if (buf[l] == '\n' && l >= 3)
295 if (strncmp(buf + l - 3, "\r\n\r\n", 4) == 0) break;
299 if (l < 14) { // prematured header
304 ret = strtol(buf + 8, &p, 0); // HTTP return code
305 if (ret == 200 && fp->offset) { // 200 (complete result); then skip beginning of the file
306 off_t rest = fp->offset;
308 off_t l = rest < 0x10000? rest : 0x10000;
309 rest -= my_netread(fp->fd, buf, l);
311 } else if (ret != 206 && ret != 200) {
313 fprintf(stderr, "[khttp_connect_file] fail to open file (HTTP code: %d).\n", ret);
323 /********************
325 ********************/
327 knetFile *knet_open(const char *fn, const char *mode)
330 if (mode[0] != 'r') {
331 fprintf(stderr, "[kftp_open] only mode \"r\" is supported.\n");
334 if (strstr(fn, "ftp://") == fn) {
335 fp = kftp_parse_url(fn, mode);
336 if (fp == 0) return 0;
337 if (kftp_connect(fp) == -1) {
341 kftp_connect_file(fp);
342 } else if (strstr(fn, "http://") == fn) {
343 fp = khttp_parse_url(fn, mode);
344 if (fp == 0) return 0;
345 khttp_connect_file(fp);
346 } else { // local file
348 int fd = open(fn, O_RDONLY | O_BINARY);
350 int fd = open(fn, O_RDONLY);
356 fp = (knetFile*)calloc(1, sizeof(knetFile));
357 fp->type = KNF_TYPE_LOCAL;
361 if (fp && fp->fd == -1) {
368 knetFile *knet_dopen(int fd, const char *mode)
370 knetFile *fp = (knetFile*)calloc(1, sizeof(knetFile));
371 fp->type = KNF_TYPE_LOCAL;
376 off_t knet_read(knetFile *fp, void *buf, off_t len)
379 if (fp->fd == -1) return 0;
380 if (fp->type == KNF_TYPE_FTP) {
381 if (fp->is_ready == 0) {
382 if (!fp->no_reconnect) kftp_reconnect(fp);
383 kftp_connect_file(fp);
385 } else if (fp->type == KNF_TYPE_HTTP) {
386 if (fp->is_ready == 0)
387 khttp_connect_file(fp);
389 if (fp->type == KNF_TYPE_LOCAL) { // on Windows, the following block is necessary; not on UNIX
390 off_t rest = len, curr;
392 curr = read(fp->fd, buf + l, rest);
393 if (curr == 0) break;
394 l += curr; rest -= curr;
396 } else l = my_netread(fp->fd, buf, len);
401 int knet_seek(knetFile *fp, off_t off, int whence)
403 if (whence == SEEK_SET && off == fp->offset) return 0;
404 if (fp->type == KNF_TYPE_LOCAL) {
405 off_t offset = lseek(fp->fd, off, whence);
412 } else if (fp->type == KNF_TYPE_FTP || fp->type == KNF_TYPE_HTTP) {
413 if (whence != SEEK_SET) { // FIXME: we can surely allow SEEK_CUR and SEEK_END in future
414 fprintf(stderr, "[knet_seek] only SEEK_SET is supported for FTP/HTTP. Offset is unchanged.\n");
424 int knet_close(knetFile *fp)
426 if (fp == 0) return 0;
427 if (fp->ctrl_fd != -1) netclose(fp->ctrl_fd); // FTP specific
429 if (fp->type == KNF_TYPE_LOCAL) close(fp->fd);
430 else netclose(fp->fd);
432 free(fp->host); free(fp->port);
433 free(fp->response); free(fp->retr); // FTP specific
434 free(fp->path); free(fp->http_host); // HTTP specific
448 buf = calloc(0x100000, 1);
450 fp = knet_open("knetfile.c", "r");
451 knet_seek(fp, 1000, SEEK_SET);
452 } else if (type == 1) { // NCBI FTP, large file
453 fp = knet_open("ftp://ftp.ncbi.nih.gov/1000genomes/ftp/data/NA12878/alignment/NA12878.chrom6.SLX.SRP000032.2009_06.bam", "r");
454 knet_seek(fp, 2500000000ll, SEEK_SET);
455 l = knet_read(fp, buf, 255);
456 } else if (type == 2) {
457 fp = knet_open("ftp://ftp.sanger.ac.uk/pub4/treefam/tmp/index.shtml", "r");
458 knet_seek(fp, 1000, SEEK_SET);
459 } else if (type == 3) {
460 fp = knet_open("http://www.sanger.ac.uk/Users/lh3/index.shtml", "r");
461 knet_seek(fp, 1000, SEEK_SET);
462 } else if (type == 4) {
463 fp = knet_open("http://www.sanger.ac.uk/Users/lh3/ex1.bam", "r");
464 knet_read(fp, buf, 10000);
465 knet_seek(fp, 20000, SEEK_SET);
466 knet_seek(fp, 10000, SEEK_SET);
467 l = knet_read(fp, buf+10000, 10000000) + 10000;
469 if (type != 4 && type != 1) {
470 knet_read(fp, buf, 255);
473 } else write(fileno(stdout), buf, l);