+/* The MIT License
+
+ Copyright (c) 2008 Genome Research Ltd (GRL).
+
+ Permission is hereby granted, free of charge, to any person obtaining
+ a copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sublicense, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ SOFTWARE.
+*/
+
+/* Contact: Heng Li <lh3@sanger.ac.uk> */
+
+/* Probably I will not do socket programming in the next few years and
+ therefore I decide to heavily annotate this file, for Linux and
+ Windows as well. -lh3 */
+
#include <time.h>
#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>
+#include <errno.h>
#include <unistd.h>
#include <sys/types.h>
-#ifdef _WIN32
-#include <winsock.h>
-#else
+#ifndef _WIN32
#include <netdb.h>
#include <arpa/inet.h>
#include <sys/socket.h>
#include "knetfile.h"
+/* In winsock.h, the type of a socket is SOCKET, which is: "typedef
+ * u_int SOCKET". An invalid SOCKET is: "(SOCKET)(~0)", or signed
+ * integer -1. In knetfile.c, I use "int" for socket type
+ * throughout. This should be improved to avoid confusion.
+ *
+ * In Linux/Mac, recv() and read() do almost the same thing. You can see
+ * in the header file that netread() is simply an alias of read(). In
+ * Windows, however, they are different and using recv() is mandatory.
+ */
+
+/* This function tests if the file handler is ready for reading (or
+ * writing if is_read==0). */
static int socket_wait(int fd, int is_read)
{
fd_set fds, *fdr = 0, *fdw = 0;
if (is_read) fdr = &fds;
else fdw = &fds;
ret = select(fd+1, fdr, fdw, 0, &tv);
+#ifndef _WIN32
if (ret == -1) perror("select");
+#else
+ if (ret == 0)
+ fprintf(stderr, "select time-out\n");
+ else if (ret == SOCKET_ERROR)
+ fprintf(stderr, "select: %d\n", WSAGetLastError());
+#endif
return ret;
}
#ifndef _WIN32
+/* This function does not work with Windows due to the lack of
+ * getaddrinfo() in winsock. It is addapted from an example in "Beej's
+ * Guide to Network Programming" (http://beej.us/guide/bgnet/). */
static int socket_connect(const char *host, const char *port)
{
#define __err_connect(func) do { perror(func); freeaddrinfo(res); return -1; } while (0)
memset(&hints, 0, sizeof(struct addrinfo));
hints.ai_family = AF_UNSPEC;
hints.ai_socktype = SOCK_STREAM;
+ /* In Unix/Mac, getaddrinfo() is the most convenient way to get
+ * server information. */
if (getaddrinfo(host, port, &hints, &res) != 0) __err_connect("getaddrinfo");
if ((fd = socket(res->ai_family, res->ai_socktype, res->ai_protocol)) == -1) __err_connect("socket");
+ /* The following two setsockopt() are used by ftplib
+ * (http://nbpfaus.net/~pfau/ftplib/). I am not sure if they
+ * necessary. */
if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) == -1) __err_connect("setsockopt");
if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &lng, sizeof(lng)) == -1) __err_connect("setsockopt");
if (connect(fd, res->ai_addr, res->ai_addrlen) != 0) __err_connect("connect");
return fd;
}
#else
+/* MinGW's printf has problem with "%lld" */
+char *int64tostr(char *buf, int64_t x)
+{
+ int cnt;
+ int i = 0;
+ do {
+ buf[i++] = '0' + x % 10;
+ x /= 10;
+ } while (x);
+ buf[i] = 0;
+ for (cnt = i, i = 0; i < cnt/2; ++i) {
+ int c = buf[i]; buf[i] = buf[cnt-i-1]; buf[cnt-i-1] = c;
+ }
+ return buf;
+}
+
+int64_t strtoint64(const char *buf)
+{
+ int64_t x;
+ for (x = 0; *buf != '\0'; ++buf)
+ x = x * 10 + ((int64_t) *buf - 48);
+ return x;
+}
+/* In windows, the first thing is to establish the TCP connection. */
int knet_win32_init()
{
WSADATA wsaData;
{
WSACleanup();
}
+/* A slightly modfied version of the following function also works on
+ * Mac (and presummably Linux). However, this function is not stable on
+ * my Mac. It sometimes works fine but sometimes does not. Therefore for
+ * non-Windows OS, I do not use this one. */
static SOCKET socket_connect(const char *host, const char *port)
{
-#define __err_connect(func) do { perror(func); return -1; } while (0)
+#define __err_connect(func) \
+ do { \
+ fprintf(stderr, "%s: %d\n", func, WSAGetLastError()); \
+ return -1; \
+ } while (0)
int on = 1;
SOCKET fd;
static off_t my_netread(int fd, void *buf, off_t len)
{
off_t rest = len, curr, l = 0;
+ /* recv() and read() may not read the required length of data with
+ * one call. They have to be called repeatedly. */
while (rest) {
if (socket_wait(fd, 1) <= 0) break; // socket is not ready for reading
curr = netread(fd, buf + l, rest);
+ /* According to the glibc manual, section 13.2, a zero returned
+ * value indicates end-of-file (EOF), which should mean that
+ * read() will not return zero if EOF has not been met but data
+ * are not immediately available. */
if (curr == 0) break;
l += curr; rest -= curr;
}
static int kftp_get_response(knetFile *ftp)
{
+#ifndef _WIN32
unsigned char c;
+#else
+ char c;
+#endif
int n = 0;
char *p;
if (socket_wait(ftp->ctrl_fd, 1) <= 0) return 0;
ftp->ctrl_fd = -1;
}
netclose(ftp->fd);
+ ftp->fd = -1;
return kftp_connect(ftp);
}
-// initialize ->type, ->host and ->retr
+// initialize ->type, ->host, ->retr and ->size
knetFile *kftp_parse_url(const char *fn, const char *mode)
{
knetFile *fp;
fp = calloc(1, sizeof(knetFile));
fp->type = KNF_TYPE_FTP;
fp->fd = -1;
+ /* the Linux/Mac version of socket_connect() also recognizes a port
+ * like "ftp", but the Windows version does not. */
fp->port = strdup("21");
fp->host = calloc(l + 1, 1);
if (strchr(mode, 'c')) fp->no_reconnect = 1;
strncpy(fp->host, fn + 6, l);
fp->retr = calloc(strlen(p) + 8, 1);
sprintf(fp->retr, "RETR %s\r\n", p);
- fp->seek_offset = -1;
+ fp->size_cmd = calloc(strlen(p) + 8, 1);
+ sprintf(fp->size_cmd, "SIZE %s\r\n", p);
+ fp->seek_offset = 0;
return fp;
}
// place ->fd at offset off
int kftp_connect_file(knetFile *fp)
{
int ret;
+ long long file_size;
if (fp->fd != -1) {
netclose(fp->fd);
if (fp->no_reconnect) kftp_get_response(fp);
}
kftp_pasv_prep(fp);
- if (fp->offset) {
+ kftp_send_cmd(fp, fp->size_cmd, 1);
+#ifndef _WIN32
+ if ( sscanf(fp->response,"%*d %lld", &file_size) != 1 )
+ {
+ fprintf(stderr,"[kftp_connect_file] %s\n", fp->response);
+ return -1;
+ }
+#else
+ const char *p = fp->response;
+ while (*p != ' ') ++p;
+ while (*p < '0' || *p > '9') ++p;
+ file_size = strtoint64(p);
+#endif
+ fp->file_size = file_size;
+ if (fp->offset>=0) {
char tmp[32];
+#ifndef _WIN32
sprintf(tmp, "REST %lld\r\n", (long long)fp->offset);
+#else
+ strcpy(tmp, "REST ");
+ int64tostr(tmp + 5, fp->offset);
+ strcat(tmp, "\r\n");
+#endif
kftp_send_cmd(fp, tmp, 1);
}
kftp_send_cmd(fp, fp->retr, 0);
return 0;
}
+
/**************************
* HTTP specific routines *
**************************/
}
fp->type = KNF_TYPE_HTTP;
fp->ctrl_fd = fp->fd = -1;
- fp->seek_offset = -1;
+ fp->seek_offset = 0;
return fp;
}
fp->fd = socket_connect(fp->host, fp->port);
buf = calloc(0x10000, 1); // FIXME: I am lazy... But in principle, 64KB should be large enough.
l += sprintf(buf + l, "GET %s HTTP/1.0\r\nHost: %s\r\n", fp->path, fp->http_host);
- if (fp->offset)
- l += sprintf(buf + l, "Range: bytes=%lld-\r\n", (long long)fp->offset);
+ l += sprintf(buf + l, "Range: bytes=%lld-\r\n", (long long)fp->offset);
l += sprintf(buf + l, "\r\n");
netwrite(fp->fd, buf, l);
l = 0;
return -1;
}
ret = strtol(buf + 8, &p, 0); // HTTP return code
- if (ret == 200 && fp->offset) { // 200 (complete result); then skip beginning of the file
+ if (ret == 200 && fp->offset>0) { // 200 (complete result); then skip beginning of the file
off_t rest = fp->offset;
while (rest) {
off_t l = rest < 0x10000? rest : 0x10000;
khttp_connect_file(fp);
} else { // local file
#ifdef _WIN32
+ /* In windows, O_BINARY is necessary. In Linux/Mac, O_BINARY may
+ * be undefined on some systems, although it is defined on my
+ * Mac and the Linux I have tested on. */
int fd = open(fn, O_RDONLY | O_BINARY);
#else
int fd = open(fn, O_RDONLY);
return l;
}
-int knet_seek(knetFile *fp, off_t off, int whence)
+off_t knet_seek(knetFile *fp, int64_t off, int whence)
{
if (whence == SEEK_SET && off == fp->offset) return 0;
if (fp->type == KNF_TYPE_LOCAL) {
+ /* Be aware that lseek() returns the offset after seeking,
+ * while fseek() returns zero on success. */
off_t offset = lseek(fp->fd, off, whence);
if (offset == -1) {
- perror("lseek");
+ // Be silent, it is OK for knet_seek to fail when the file is streamed
+ // fprintf(stderr,"[knet_seek] %s\n", strerror(errno));
return -1;
}
fp->offset = offset;
return 0;
- } else if (fp->type == KNF_TYPE_FTP || fp->type == KNF_TYPE_HTTP) {
- if (whence != SEEK_SET) { // FIXME: we can surely allow SEEK_CUR and SEEK_END in future
- fprintf(stderr, "[knet_seek] only SEEK_SET is supported for FTP/HTTP. Offset is unchanged.\n");
+ }
+ else if (fp->type == KNF_TYPE_FTP)
+ {
+ if (whence==SEEK_CUR)
+ fp->offset += off;
+ else if (whence==SEEK_SET)
+ fp->offset = off;
+ else if ( whence==SEEK_END)
+ fp->offset = fp->file_size+off;
+ fp->is_ready = 0;
+ return 0;
+ }
+ else if (fp->type == KNF_TYPE_HTTP)
+ {
+ if (whence == SEEK_END) { // FIXME: can we allow SEEK_END in future?
+ fprintf(stderr, "[knet_seek] SEEK_END is not supported for HTTP. Offset is unchanged.\n");
+ errno = ESPIPE;
return -1;
}
- fp->offset = off;
+ if (whence==SEEK_CUR)
+ fp->offset += off;
+ else if (whence==SEEK_SET)
+ fp->offset = off;
fp->is_ready = 0;
return 0;
}
+ errno = EINVAL;
+ fprintf(stderr,"[knet_seek] %s\n", strerror(errno));
return -1;
}
if (fp == 0) return 0;
if (fp->ctrl_fd != -1) netclose(fp->ctrl_fd); // FTP specific
if (fp->fd != -1) {
+ /* On Linux/Mac, netclose() is an alias of close(), but on
+ * Windows, it is an alias of closesocket(). */
if (fp->type == KNF_TYPE_LOCAL) close(fp->fd);
else netclose(fp->fd);
}