From 1a1defe361c4f567e9bbc4ef4e3d55caba24f43a Mon Sep 17 00:00:00 2001 From: Don Armstrong Date: Mon, 21 Oct 2013 15:03:13 -0700 Subject: [PATCH] Imported Upstream version 0.49 --- nscd.c | 2610 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 2610 insertions(+) create mode 100644 nscd.c diff --git a/nscd.c b/nscd.c new file mode 100644 index 0000000..23f5851 --- /dev/null +++ b/nscd.c @@ -0,0 +1,2610 @@ +/* This file is part of unscd, a complete nscd replacement. + * Copyright (C) 2007-2012 Denys Vlasenko. Licensed under the GPL version 2. + */ + +/* unscd is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * unscd is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You can download the GNU General Public License from the GNU website + * at http://www.gnu.org/ or write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ + +/* +Build instructions: + +gcc -Wall -Wunused-parameter -Os -o nscd nscd.c + +gcc -fomit-frame-pointer -Wl,--sort-section -Wl,alignment -Wl,--sort-common + -Os -o nscd nscd.c + +Description: + +nscd problems are not exactly unheard of. Over the years, there were +quite a bit of bugs in it. This leads people to invent babysitters +which restart crashed/hung nscd. This is ugly. + +After looking at nscd source in glibc I arrived to the conclusion +that its design is contributing to this significantly. Even if nscd's +code is 100.00% perfect and bug-free, it can still suffer from bugs +in libraries it calls. + +As designed, it's a multithreaded program which calls NSS libraries. +These libraries are not part of libc, they may be provided +by third-party projects (samba, ldap, you name it). + +Thus nscd cannot be sure that libraries it calls do not have memory +or file descriptor leaks and other bugs. + +Since nscd is multithreaded program with single shared cache, +any resource leak in any NSS library has cumulative effect. +Even if a NSS library leaks a file descriptor 0.01% of the time, +this will make nscd crash or hang after some time. + +Of course bugs in NSS .so modules should be fixed, but meanwhile +I do want nscd which does not crash or lock up. + +So I went ahead and wrote a replacement. + +It is a single-threaded server process which offloads all NSS +lookups to worker children (not threads, but fully independent +processes). Cache hits are handled by parent. Only cache misses +start worker children. This design is immune against +resource leaks and hangs in NSS libraries. + +It is also many times smaller. + +Currently (v0.36) it emulates glibc nscd pretty closely +(handles same command line flags and config file), and is moderately tested. + +Please note that as of 2008-08 it is not in wide use (yet?). +If you have trouble compiling it, see an incompatibility with +"standard" one or experience hangs/crashes, please report it to +vda.linux@googlemail.com + +***********************************************************************/ + +/* Make struct ucred appear in sys/socket.h */ +#define _GNU_SOURCE 1 +/* For all good things */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +/* For INT_MAX */ +#include +/* For inet_ntoa (for debug build only) */ +#include + +/* + * 0.21 add SEGV reporting to worker + * 0.22 don't do freeaddrinfo() in GETAI worker, it's crashy + * 0.23 add parameter parsing + * 0.24 add conf file parsing, not using results yet + * 0.25 used some of conf file settings (not tested) + * 0.26 almost all conf file settings are wired up + * 0.27 a bit more of almost all conf file settings are wired up + * 0.28 optimized cache aging + * 0.29 implemented invalidate and shutdown options + * 0.30 fixed buglet (sizeof(ptr) != sizeof(array)) + * 0.31 reduced client_info by one member + * 0.32 fix nttl/size defaults; simpler check for worker child in main() + * 0.33 tweak includes so that it builds on my new machine (64-bit userspace); + * do not die on unknown service name, just warn + * ("services" is a new service we don't support) + * 0.34 create /var/run/nscd/nscd.pid pidfile like glibc nscd 2.8 does; + * delay setuid'ing itself to server-user after log and pidfile are open + * 0.35 readlink /proc/self/exe and use result if execing /proc/self/exe fails + * 0.36 excercise extreme paranoia handling server-user option; + * a little bit more verbose logging: + * L_DEBUG2 log level added, use debug-level 7 to get it + * 0.37 users reported over-zealous "detected change in /etc/passwd", + * apparently stat() returns random garbage in unused padding + * on some systems. Made the check less paranoid. + * 0.38 log POLLHUP better + * 0.39 log answers to client better, log getpwnam in the worker, + * pass debug level value down to worker. + * 0.40 fix handling of shutdown and invalidate requests; + * fix bug with answer written in several pieces + * 0.40.1 set hints.ai_socktype = SOCK_STREAM in GETAI request + * 0.41 eliminate double caching of two near-simultaneous identical requests - + * EXPERIMENTAL + * 0.42 execute /proc/self/exe by link name first (better comm field) + * 0.43 fix off-by-one error in setgroups + * 0.44 make -d[ddd] bump up debug - easier to explain to users + * how to produce detailed log (no nscd.conf tweaking) + * 0.45 Fix out-of-bounds array access and log/pid file permissions - + * thanks to Sebastian Krahmer (krahmer AT suse.de) + * 0.46 fix a case when we forgot to remove a future entry on worker failure + * 0.47 fix nscd without -d to not bump debug level + * 0.48 fix for changes in __nss_disable_nscd API in glibc-2.15 + * 0.49 minor tweaks to messages + */ +#define PROGRAM_VERSION "0.49" + +#define DEBUG_BUILD 1 + + +/* +** Generic helpers +*/ + +#define ARRAY_SIZE(x) ((unsigned)(sizeof(x) / sizeof((x)[0]))) + +#define NORETURN __attribute__ ((__noreturn__)) + + +#ifdef MY_CPU_HATES_CHARS +typedef int smallint; +#else +typedef signed char smallint; +#endif + + +enum { + L_INFO = (1 << 0), + L_DEBUG = ((1 << 1) * DEBUG_BUILD), + L_DEBUG2 = ((1 << 2) * DEBUG_BUILD), + L_DUMP = ((1 << 3) * DEBUG_BUILD), + L_ALL = 0xf, + D_DAEMON = (1 << 6), + D_STAMP = (1 << 5), +}; + +static smallint debug = D_DAEMON; + +static void verror(const char *s, va_list p, const char *strerr) +{ + char msgbuf[1024]; + int sz, rem, strerr_len; + struct timeval tv; + + sz = 0; + if (debug & D_STAMP) { + gettimeofday(&tv, NULL); + sz = sprintf(msgbuf, "%02u:%02u:%02u.%05u ", + (unsigned)((tv.tv_sec / (60*60)) % 24), + (unsigned)((tv.tv_sec / 60) % 60), + (unsigned)(tv.tv_sec % 60), + (unsigned)(tv.tv_usec / 10)); + } + rem = sizeof(msgbuf) - sz; + sz += vsnprintf(msgbuf + sz, rem, s, p); + rem = sizeof(msgbuf) - sz; /* can be negative after this! */ + + if (strerr) { + strerr_len = strlen(strerr); + if (rem >= strerr_len + 4) { /* ": STRERR\n\0" */ + msgbuf[sz++] = ':'; + msgbuf[sz++] = ' '; + strcpy(msgbuf + sz, strerr); + sz += strerr_len; + } + } + if (rem >= 2) { + msgbuf[sz++] = '\n'; + msgbuf[sz] = '\0'; + } + fflush(NULL); + fputs(msgbuf, stderr); +} + +static void error(const char *msg, ...) +{ + va_list p; + va_start(p, msg); + verror(msg, p, NULL); + va_end(p); +} + +static void error_and_die(const char *msg, ...) NORETURN; +static void error_and_die(const char *msg, ...) +{ + va_list p; + va_start(p, msg); + verror(msg, p, NULL); + va_end(p); + _exit(1); +} + +static void perror_and_die(const char *msg, ...) NORETURN; +static void perror_and_die(const char *msg, ...) +{ + va_list p; + va_start(p, msg); + /* Guard against ": Success" */ + verror(msg, p, errno ? strerror(errno) : NULL); + va_end(p); + _exit(1); +} + +static void nscd_log(int mask, const char *msg, ...) +{ + if (debug & mask) { + va_list p; + va_start(p, msg); + verror(msg, p, NULL); + va_end(p); + } +} + +#define log(lvl, ...) do { if (lvl) nscd_log(lvl, __VA_ARGS__); } while (0) + +#if DEBUG_BUILD +static void dump(const void *ptr, int len) +{ + char text[18]; + const unsigned char *buf; + char *p; + + if (!(debug & L_DUMP)) + return; + + buf = ptr; + while (len > 0) { + int chunk = ((len >= 16) ? 16 : len); + fprintf(stderr, + "%02x %02x %02x %02x %02x %02x %02x %02x " + "%02x %02x %02x %02x %02x %02x %02x %02x " + (16-chunk) * 5, + buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7], + buf[8], buf[9],buf[10],buf[11],buf[12],buf[13],buf[14],buf[15] + ); + fprintf(stderr, "%*s", (16-chunk) * 3, ""); + len -= chunk; + p = text; + do { + unsigned char c = *buf++; + *p++ = (c >= 32 && c < 127 ? c : '.'); + } while (--chunk); + *p++ = '\n'; + *p = '\0'; + fputs(text, stderr); + } +} +#else +void dump(const void *ptr, int len); +#endif + +#define hex_dump(p,n) do { if (L_DUMP) dump(p,n); } while (0) + +static int xopen3(const char *pathname, int flags, int mode) +{ + int fd = open(pathname, flags, mode); + if (fd < 0) + perror_and_die("open"); + return fd; +} + +static void xpipe(int *fds) +{ + if (pipe(fds) < 0) + perror_and_die("pipe"); +} + +static void xexecve(const char *filename, char **argv, char **envp) NORETURN; +static void xexecve(const char *filename, char **argv, char **envp) +{ + execve(filename, argv, envp); + perror_and_die("cannot re-exec %s", filename); +} + +static void ndelay_on(int fd) +{ + int fl = fcntl(fd, F_GETFL); + if (fl < 0) + perror_and_die("F_GETFL"); + if (fcntl(fd, F_SETFL, fl | O_NONBLOCK) < 0) + perror_and_die("setting O_NONBLOCK"); +} + +static void close_on_exec(int fd) +{ + if (fcntl(fd, F_SETFD, FD_CLOEXEC) < 0) + perror_and_die("setting FD_CLOEXEC"); +} + +static unsigned monotonic_ms(void) +{ + struct timespec ts; + if (syscall(__NR_clock_gettime, CLOCK_MONOTONIC, &ts)) + perror_and_die("clock_gettime(MONOTONIC)"); + return ts.tv_sec * 1000 + ts.tv_nsec / 1000000; +} + +static unsigned strsize(const char *str) +{ + return strlen(str) + 1; +} + +static unsigned strsize_aligned4(const char *str) +{ + return (strlen(str) + 1 + 3) & (~3); +} + +static ssize_t safe_read(int fd, void *buf, size_t count) +{ + ssize_t n; + do { + n = read(fd, buf, count); + } while (n < 0 && errno == EINTR); + return n; +} + +static ssize_t full_read(int fd, void *buf, size_t len) +{ + ssize_t cc; + ssize_t total; + total = 0; + while (len) { + cc = safe_read(fd, buf, len); + if (cc < 0) + return cc; /* read() returns -1 on failure. */ + if (cc == 0) + break; + buf = ((char *)buf) + cc; + total += cc; + len -= cc; + } + return total; +} + +/* unused +static void xsafe_read(int fd, void *buf, size_t len) +{ + if (len != safe_read(fd, buf, len)) + perror_and_die("short read"); +} +static void xfull_read(int fd, void *buf, size_t len) +{ + if (len != full_read(fd, buf, len)) + perror_and_die("short read"); +} +*/ + +static ssize_t safe_write(int fd, const void *buf, size_t count) +{ + ssize_t n; + do { + n = write(fd, buf, count); + } while (n < 0 && errno == EINTR); + return n; +} + +static ssize_t full_write(int fd, const void *buf, size_t len) +{ + ssize_t cc; + ssize_t total; + + total = 0; + while (len) { + cc = safe_write(fd, buf, len); + if (cc < 0) + return cc; /* write() returns -1 on failure. */ + total += cc; + buf = ((const char *)buf) + cc; + len -= cc; + } + return total; +} + +static void xsafe_write(int fd, const void *buf, size_t count) +{ + if (count != safe_write(fd, buf, count)) + perror_and_die("short write of %ld bytes", (long)count); +} +static void xfull_write(int fd, const void *buf, size_t count) +{ + if (count != full_write(fd, buf, count)) + perror_and_die("short write of %ld bytes", (long)count); +} + +static void xmovefd(int from_fd, int to_fd) +{ + if (from_fd != to_fd) { + if (dup2(from_fd, to_fd) < 0) + perror_and_die("dup2"); + close(from_fd); + } +} + +static unsigned getnum(const char *str) +{ + if (str[0] >= '0' && str[0] <= '9') { + char *p; + unsigned long l = strtoul(str, &p, 10); + /* must not overflow int even after x1000 */ + if (!*p && l <= INT_MAX / 1000) + return l; + } + error_and_die("malformed or too big number '%s'", str); +}; + +static char *skip_whitespace(const char *s) +{ + /* NB: isspace('\0') returns 0 */ + while (isspace(*s)) ++s; + return (char *) s; +} + +static char *skip_non_whitespace(const char *s) +{ + while (*s && !isspace(*s)) ++s; + return (char *) s; +} + +static void *xmalloc(unsigned sz) +{ + void *p = malloc(sz); + if (!p) + error_and_die("out of memory"); + return p; +} + +static void *xzalloc(unsigned sz) +{ + void *p = xmalloc(sz); + memset(p, 0, sz); + return p; +} + +static void *xrealloc(void *p, unsigned size) +{ + p = realloc(p, size); + if (!p) + error_and_die("out of memory"); + return p; +} + +static const char *xstrdup(const char *str) +{ + const char *p = strdup(str); + if (!p) + error_and_die("out of memory"); + return p; +} + + +/* +** Config data +*/ + +enum { + SRV_PASSWD, + SRV_GROUP, + SRV_HOSTS, +}; + +static const char srv_name[3][7] = { + "passwd", + "group", + "hosts" +}; + +static struct { + const char *logfile; + const char *user; + smallint srv_enable[3]; + smallint check_files[3]; + unsigned pttl[3]; + unsigned nttl[3]; + unsigned size[3]; +} config = { + /* We try to closely mimic glibc nscd */ + .logfile = NULL, /* default is to not have a log file */ + .user = NULL, + .srv_enable = { 0, 0, 0 }, + .check_files = { 1, 1, 1 }, + .pttl = { 3600, 3600, 3600 }, + .nttl = { 20, 60, 20 }, + /* huh, what is the default cache size in glibc nscd? */ + .size = { 256 * 8 / 3, 256 * 8 / 3, 256 * 8 / 3 }, +}; + +static const char default_conffile[] = "/etc/nscd.conf"; +static const char *self_exe_points_to = "/proc/self/exe"; + + +/* +** Clients, workers machinery +*/ + +/* Header common to all requests */ +#define USER_REQ_STRUCT \ + uint32_t version; /* Version number of the daemon interface */ \ + uint32_t type; /* Service requested */ \ + uint32_t key_len; /* Key length */ + +typedef struct user_req_header { + USER_REQ_STRUCT +} user_req_header; + +enum { + NSCD_VERSION = 2, + MAX_USER_REQ_SIZE = 1024, + USER_HDR_SIZE = sizeof(user_req_header), + /* DNS queries time out after 20 seconds, + * we will allow for a bit more */ + WORKER_TIMEOUT_SEC = 30, + CLIENT_TIMEOUT_MS = 100, + SMALL_POLL_TIMEOUT_MS = 200, +}; + +typedef struct user_req { + union { + struct { /* as came from client */ + USER_REQ_STRUCT + }; + struct { /* when stored in cache, overlaps .version */ + unsigned refcount:8; + /* (timestamp24 * 256) == timestamp in ms */ + unsigned timestamp24:24; + }; + }; + char reqbuf[MAX_USER_REQ_SIZE - USER_HDR_SIZE]; +} user_req; + +/* Compile-time check for correct size */ +struct BUG_wrong_user_req_size { + char BUG_wrong_user_req_size[sizeof(user_req) == MAX_USER_REQ_SIZE ? 1 : -1]; +}; + +enum { + GETPWBYNAME, + GETPWBYUID, + GETGRBYNAME, + GETGRBYGID, + GETHOSTBYNAME, + GETHOSTBYNAMEv6, + GETHOSTBYADDR, + GETHOSTBYADDRv6, + SHUTDOWN, /* Shut the server down */ + GETSTAT, /* Get the server statistic */ + INVALIDATE, /* Invalidate one special cache */ + GETFDPW, + GETFDGR, + GETFDHST, + GETAI, + INITGROUPS, + GETSERVBYNAME, + GETSERVBYPORT, + GETFDSERV, + LASTREQ +}; +#if DEBUG_BUILD +static const char *const typestr[] = { + "GETPWBYNAME", /* done */ + "GETPWBYUID", /* done */ + "GETGRBYNAME", /* done */ + "GETGRBYGID", /* done */ + "GETHOSTBYNAME", /* done */ + "GETHOSTBYNAMEv6", /* done */ + "GETHOSTBYADDR", /* done */ + "GETHOSTBYADDRv6", /* done */ + "SHUTDOWN", /* done */ + "GETSTAT", /* info? */ + "INVALIDATE", /* done */ + /* won't do: nscd passes a name of shmem segment + * which client can map and "see" the db */ + "GETFDPW", + "GETFDGR", /* won't do */ + "GETFDHST", /* won't do */ + "GETAI", /* done */ + "INITGROUPS", /* done */ + "GETSERVBYNAME", /* prio 3 (no caching?) */ + "GETSERVBYPORT", /* prio 3 (no caching?) */ + "GETFDSERV" /* won't do */ +}; +#else +extern const char *const typestr[]; +#endif +static const smallint type_to_srv[] = { + [GETPWBYNAME ] = SRV_PASSWD, + [GETPWBYUID ] = SRV_PASSWD, + [GETGRBYNAME ] = SRV_GROUP, + [GETGRBYGID ] = SRV_GROUP, + [GETHOSTBYNAME ] = SRV_HOSTS, + [GETHOSTBYNAMEv6 ] = SRV_HOSTS, + [GETHOSTBYADDR ] = SRV_HOSTS, + [GETHOSTBYADDRv6 ] = SRV_HOSTS, + [GETAI ] = SRV_HOSTS, + [INITGROUPS ] = SRV_GROUP, +}; + +static int unsupported_ureq_type(unsigned type) +{ + if (type == GETAI) return 0; + if (type == INITGROUPS) return 0; + if (type == GETSTAT) return 1; + if (type > INVALIDATE) return 1; + return 0; +} + + +typedef struct client_info { + /* if client_fd != 0, we are waiting for the reply from worker + * on pfd[i].fd, and client_fd is saved client's fd + * (we need to put it back into pfd[i].fd later) */ + int client_fd; + unsigned bytecnt; /* bytes read from client */ + unsigned bufidx; /* buffer# in global client_buf[] */ + unsigned started_ms; + unsigned respos; /* response */ + user_req *resptr; /* response */ + user_req **cache_pp; /* cache entry address */ + user_req *ureq; /* request (points to client_buf[x]) */ +} client_info; + +static unsigned g_now_ms; +static int min_closed = INT_MAX; +static int cnt_closed = 0; +static int num_clients = 2; /* two listening sockets are "clients" too */ + +/* We read up to max_reqnum requests in parallel */ +static unsigned max_reqnum = 14; +static int next_buf; +static char (*client_buf)[MAX_USER_REQ_SIZE]; +static char *busy_cbuf; +static struct pollfd *pfd; +static client_info *cinfo; + +/* Request, response and cache data structures: + * + * cache[] (defined later): + * cacheline_t cache[cache_size] array, or in other words, + * user_req* cache[cache_size][8] array. + * Every client request is hashed, hash value determines which cache[x] + * will have the response stored in one of its 8 elements. + * Cache entries have this format: request, then padding to 32 bits, + * then the response. + * Addresses in cache[x][y] may be NULL or: + * (&client_buf[z]) & 1: the cache miss is in progress ("future entry"): + * "the data is not in the cache (yet), wait for it to appear" + * (&client_buf[z]) & 3: the cache miss is in progress and other clients + * also want the same data ("shared future entry") + * else (non-NULL but low two bits are 0): cached data in malloc'ed block + * + * Each of these is a [max_reqnum] sized array: + * pfd[i] - given to poll() to wait for requests and replies. + * .fd: first two pfd[i]: listening Unix domain sockets, else + * .fd: open fd to a client, for reading client's request, or + * .fd: open fd to a worker, to send request and get response back + * cinfo[i] - auxiliary client data for pfd[i] + * .client_fd: open fd to a client, in case we already had read its + * request and got a cache miss, and created a worker or + * wait for another client's worker. + * Otherwise, it's 0 and client's fd is in pfd[i].fd + * .bufidx: index in client_buf[] we store client's request in + * .bytecnt: size of the request + * .started_ms: used to time out unresponsive clients + * .respos: + * .resptr: + * .cache_pp: &cache[x][y] where the response is, or will be stored. + * .ureq: + * When a client has received its reply (or otherwise closed (timeout etc)), + * corresponding pfd[i] and cinfo[i] are removed by shifting [i+1], [i+2] etc + * elements down, so that both arrays never have free holes. + * [num_clients] is always the first free element. + * + * Each of these also is a [max_reqnum] sized array, but indexes + * do not correspond directly to pfd[i] and cinfo[i]: + * client_buf[n][MAX_USER_REQ_SIZE] - buffers we read client requests into + * busy_cbuf[n] - bool flags marking busy client_buf[] + */ +/* Possible reductions: + * fd, bufidx - uint8_t + * started_ms -> uint16_t started_s + * ureq - eliminate (derivable from bufidx?) + */ + +/* Are special bits 0? is it a true cached entry? */ +#define CACHED_ENTRY(p) ( ((long)(p) & 3) == 0 ) +/* Are special bits 11? is it a shared future cache entry? */ +#define CACHE_SHARED(p) ( ((long)(p) & 3) == 3 ) +/* Return a ptr with special bits cleared (used for accessing data) */ +#define CACHE_PTR(p) ( (void*) ((long)(p) & ~(long)3) ) +/* Return a ptr with special bits set to x1: make future cache entry ptr */ +#define MAKE_FUTURE_PTR(p) ( (void*) ((long)(p) | 1) ) +/* Modify ptr, set special bits to 11: shared future cache entry */ +#define MARK_PTR_SHARED(pp) ( *(long*)(pp) |= 3 ) + +static inline unsigned ureq_size(const user_req *ureq) +{ + return sizeof(user_req_header) + ureq->key_len; +} + +static unsigned cache_age(const user_req *ureq) +{ + if (!CACHED_ENTRY(ureq)) + return 0; + return (uint32_t) (g_now_ms - (ureq->timestamp24 << 8)); +} + +static void set_cache_timestamp(user_req *ureq) +{ + ureq->timestamp24 = g_now_ms >> 8; +} + +static int alloc_buf_no(void) +{ + int n = next_buf; + do { + int cur = next_buf; + next_buf = (next_buf + 1) % max_reqnum; + if (!busy_cbuf[cur]) { + busy_cbuf[cur] = 1; + return cur; + } + } while (next_buf != n); + error_and_die("no free bufs?!"); +} + +static inline void *bufno2buf(int i) +{ + return client_buf[i]; +} + +static void close_client(unsigned i) +{ + log(L_DEBUG, "closing client %u (fd %u,%u)", i, pfd[i].fd, cinfo[i].client_fd); + /* Paranoia. We had nasty bugs where client was closed twice. */ + if (pfd[i].fd == 0) //// + return; + close(pfd[i].fd); + if (cinfo[i].client_fd && cinfo[i].client_fd != pfd[i].fd) + close(cinfo[i].client_fd); + pfd[i].fd = 0; /* flag as unused (coalescing needs this) */ + busy_cbuf[cinfo[i].bufidx] = 0; + cnt_closed++; + if (i < min_closed) + min_closed = i; +} + + +/* +** nscd API <-> C API conversion +*/ + +typedef struct response_header { + uint32_t version_or_size; + int32_t found; + char body[0]; +} response_header; + +typedef struct initgr_response_header { + uint32_t version_or_size; + int32_t found; + int32_t ngrps; + /* code assumes gid_t == int32, let's check that */ + int32_t gid[sizeof(gid_t) == sizeof(int32_t) ? 0 : -1]; + /* char user_str[as_needed]; */ +} initgr_response_header; + +static initgr_response_header *obtain_initgroups(const char *username) +{ + struct initgr_response_header *resp; + struct passwd *pw; + enum { MAGIC_OFFSET = sizeof(*resp) / sizeof(int32_t) }; + unsigned sz; + int ngroups; + + pw = getpwnam(username); + if (!pw) { + resp = xzalloc(8); + resp->version_or_size = sizeof(*resp); + /*resp->found = 0;*/ + /*resp->ngrps = 0;*/ + goto ret; + } + + /* getgrouplist may be very expensive, it's much better to allocate + * a bit more than to run getgrouplist twice */ + ngroups = 128; + resp = NULL; + do { + sz = sizeof(*resp) + sizeof(resp->gid[0]) * ngroups; + resp = xrealloc(resp, sz); + } while (getgrouplist(username, pw->pw_gid, (gid_t*) &resp->gid, &ngroups) == -1); + log(L_DEBUG, "ngroups=%d", ngroups); + + sz = sizeof(*resp) + sizeof(resp->gid[0]) * ngroups; + /* resp = xrealloc(resp, sz); - why bother */ + resp->version_or_size = sz; + resp->found = 1; + resp->ngrps = ngroups; + ret: + return resp; +} + +typedef struct pw_response_header { + uint32_t version_or_size; + int32_t found; + int32_t pw_name_len; + int32_t pw_passwd_len; + int32_t pw_uid; + int32_t pw_gid; + int32_t pw_gecos_len; + int32_t pw_dir_len; + int32_t pw_shell_len; + /* char pw_name[pw_name_len]; */ + /* char pw_passwd[pw_passwd_len]; */ + /* char pw_gecos[pw_gecos_len]; */ + /* char pw_dir[pw_dir_len]; */ + /* char pw_shell[pw_shell_len]; */ +} pw_response_header; + +static pw_response_header *marshal_passwd(struct passwd *pw) +{ + char *p; + pw_response_header *resp; + unsigned pw_name_len; + unsigned pw_passwd_len; + unsigned pw_gecos_len; + unsigned pw_dir_len; + unsigned pw_shell_len; + unsigned sz = sizeof(*resp); + if (pw) { + sz += (pw_name_len = strsize(pw->pw_name)); + sz += (pw_passwd_len = strsize(pw->pw_passwd)); + sz += (pw_gecos_len = strsize(pw->pw_gecos)); + sz += (pw_dir_len = strsize(pw->pw_dir)); + sz += (pw_shell_len = strsize(pw->pw_shell)); + } + resp = xzalloc(sz); + resp->version_or_size = sz; + if (!pw) { + /*resp->found = 0;*/ + goto ret; + } + resp->found = 1; + resp->pw_name_len = pw_name_len; + resp->pw_passwd_len = pw_passwd_len; + resp->pw_uid = pw->pw_uid; + resp->pw_gid = pw->pw_gid; + resp->pw_gecos_len = pw_gecos_len; + resp->pw_dir_len = pw_dir_len; + resp->pw_shell_len = pw_shell_len; + p = (char*)(resp + 1); + strcpy(p, pw->pw_name); p += pw_name_len; + strcpy(p, pw->pw_passwd); p += pw_passwd_len; + strcpy(p, pw->pw_gecos); p += pw_gecos_len; + strcpy(p, pw->pw_dir); p += pw_dir_len; + strcpy(p, pw->pw_shell); p += pw_shell_len; + log(L_DEBUG, "sz:%u realsz:%u", sz, p - (char*)resp); + ret: + return resp; +} + +typedef struct gr_response_header { + uint32_t version_or_size; + int32_t found; + int32_t gr_name_len; /* strlen(gr->gr_name) + 1; */ + int32_t gr_passwd_len; /* strlen(gr->gr_passwd) + 1; */ + int32_t gr_gid; /* gr->gr_gid */ + int32_t gr_mem_cnt; /* while (gr->gr_mem[gr_mem_cnt]) ++gr_mem_cnt; */ + /* int32_t gr_mem_len[gr_mem_cnt]; */ + /* char gr_name[gr_name_len]; */ + /* char gr_passwd[gr_passwd_len]; */ + /* char gr_mem[gr_mem_cnt][gr_mem_len[i]]; */ + /* char gr_gid_str[as_needed]; - huh? */ + /* char orig_key[as_needed]; - needed?? I don't do this ATM... */ +/* + glibc adds gr_gid_str, but client doesn't get/use it: + writev(3, [{"\2\0\0\0\2\0\0\0\5\0\0\0", 12}, {"root\0", 5}], 2) = 17 + poll([{fd=3, events=POLLIN|POLLERR|POLLHUP, revents=POLLIN}], 1, 5000) = 1 + read(3, "\2\0\0\0\1\0\0\0\10\0\0\0\4\0\0\0\0\0\0\0\0\0\0\0", 24) = 24 + readv(3, [{"", 0}, {"root\0\0\0\0\0\0\0\0", 12}], 2) = 12 + read(3, NULL, 0) = 0 +*/ +} gr_response_header; + +static gr_response_header *marshal_group(struct group *gr) +{ + char *p; + gr_response_header *resp; + unsigned gr_mem_cnt; + unsigned sz = sizeof(*resp); + if (gr) { + sz += strsize(gr->gr_name); + sz += strsize(gr->gr_passwd); + gr_mem_cnt = 0; + while (gr->gr_mem[gr_mem_cnt]) { + sz += strsize(gr->gr_mem[gr_mem_cnt]); + gr_mem_cnt++; + } + /* for int32_t gr_mem_len[gr_mem_cnt]; */ + sz += gr_mem_cnt * sizeof(int32_t); + } + resp = xzalloc(sz); + resp->version_or_size = sz; + if (!gr) { + /*resp->found = 0;*/ + goto ret; + } + resp->found = 1; + resp->gr_name_len = strsize(gr->gr_name); + resp->gr_passwd_len = strsize(gr->gr_passwd); + resp->gr_gid = gr->gr_gid; + resp->gr_mem_cnt = gr_mem_cnt; + p = (char*)(resp + 1); +/* int32_t gr_mem_len[gr_mem_cnt]; */ + gr_mem_cnt = 0; + while (gr->gr_mem[gr_mem_cnt]) { + *(uint32_t*)p = strsize(gr->gr_mem[gr_mem_cnt]); + p += 4; + gr_mem_cnt++; + } +/* char gr_name[gr_name_len]; */ + strcpy(p, gr->gr_name); + p += strsize(gr->gr_name); +/* char gr_passwd[gr_passwd_len]; */ + strcpy(p, gr->gr_passwd); + p += strsize(gr->gr_passwd); +/* char gr_mem[gr_mem_cnt][gr_mem_len[i]]; */ + gr_mem_cnt = 0; + while (gr->gr_mem[gr_mem_cnt]) { + strcpy(p, gr->gr_mem[gr_mem_cnt]); + p += strsize(gr->gr_mem[gr_mem_cnt]); + gr_mem_cnt++; + } + log(L_DEBUG, "sz:%u realsz:%u", sz, p - (char*)resp); + ret: + return resp; +} + +typedef struct hst_response_header { + uint32_t version_or_size; + int32_t found; + int32_t h_name_len; + int32_t h_aliases_cnt; + int32_t h_addrtype; /* AF_INET or AF_INET6 */ + int32_t h_length; /* 4 or 16 */ + int32_t h_addr_list_cnt; + int32_t error; + /* char h_name[h_name_len]; - we pad it to 4 bytes */ + /* uint32_t h_aliases_len[h_aliases_cnt]; */ + /* char h_addr_list[h_addr_list_cnt][h_length]; - every one is the same size [h_length] (4 or 16) */ + /* char h_aliases[h_aliases_cnt][h_aliases_len[i]]; */ +} hst_response_header; + +static hst_response_header *marshal_hostent(struct hostent *h) +{ + char *p; + hst_response_header *resp; + unsigned h_name_len; + unsigned h_aliases_cnt; + unsigned h_addr_list_cnt; + unsigned sz = sizeof(*resp); + if (h) { +/* char h_name[h_name_len] */ + sz += h_name_len = strsize_aligned4(h->h_name); + h_addr_list_cnt = 0; + while (h->h_addr_list[h_addr_list_cnt]) { + h_addr_list_cnt++; + } +/* char h_addr_list[h_addr_list_cnt][h_length] */ + sz += h_addr_list_cnt * h->h_length; + h_aliases_cnt = 0; + while (h->h_aliases[h_aliases_cnt]) { +/* char h_aliases[h_aliases_cnt][h_aliases_len[i]] */ + sz += strsize(h->h_aliases[h_aliases_cnt]); + h_aliases_cnt++; + } +/* uint32_t h_aliases_len[h_aliases_cnt] */ + sz += h_aliases_cnt * 4; + } + resp = xzalloc(sz); + resp->version_or_size = sz; + if (!h) { + /*resp->found = 0;*/ + resp->error = HOST_NOT_FOUND; + goto ret; + } + resp->found = 1; + resp->h_name_len = h_name_len; + resp->h_aliases_cnt = h_aliases_cnt; + resp->h_addrtype = h->h_addrtype; + resp->h_length = h->h_length; + resp->h_addr_list_cnt = h_addr_list_cnt; + /*resp->error = 0;*/ + p = (char*)(resp + 1); +/* char h_name[h_name_len]; */ + strcpy(p, h->h_name); + p += h_name_len; +/* uint32_t h_aliases_len[h_aliases_cnt]; */ + h_aliases_cnt = 0; + while (h->h_aliases[h_aliases_cnt]) { + *(uint32_t*)p = strsize(h->h_aliases[h_aliases_cnt]); + p += 4; + h_aliases_cnt++; + } +/* char h_addr_list[h_addr_list_cnt][h_length]; */ + h_addr_list_cnt = 0; + while (h->h_addr_list[h_addr_list_cnt]) { + memcpy(p, h->h_addr_list[h_addr_list_cnt], h->h_length); + p += h->h_length; + h_addr_list_cnt++; + } +/* char h_aliases[h_aliases_cnt][h_aliases_len[i]]; */ + h_aliases_cnt = 0; + while (h->h_aliases[h_aliases_cnt]) { + strcpy(p, h->h_aliases[h_aliases_cnt]); + p += strsize(h->h_aliases[h_aliases_cnt]); + h_aliases_cnt++; + } + log(L_DEBUG, "sz:%u realsz:%u", sz, p - (char*)resp); + ret: + return resp; +} + +/* Reply to addrinfo query */ +typedef struct ai_response_header { + uint32_t version_or_size; + int32_t found; + int32_t naddrs; + int32_t addrslen; + int32_t canonlen; + int32_t error; + /* char ai_addr[naddrs][4 or 16]; - addrslen bytes in total */ + /* char ai_family[naddrs]; - AF_INET[6] each (determines ai_addr[i] length) */ + /* char ai_canonname[canonlen]; */ +} ai_response_header; + +static ai_response_header *obtain_addrinfo(const char *hostname) +{ + struct addrinfo hints; + struct addrinfo *ai; + struct addrinfo *ap; + ai_response_header *resp; + char *p, *family; + int err; + unsigned sz; + unsigned naddrs = 0; + unsigned addrslen = 0; + unsigned canonlen = 0; + + memset(&hints, 0, sizeof(hints)); + hints.ai_flags = AI_CANONNAME; + /* kills dups (one for each possible SOCK_xxx) */ + /* this matches glibc behavior */ + hints.ai_socktype = SOCK_STREAM; + ai = NULL; /* on failure getaddrinfo may leave it as-is */ + err = getaddrinfo(hostname, NULL, &hints, &ai); + + sz = sizeof(*resp); + if (!err) { + if (ai->ai_canonname) + sz += canonlen = strsize(ai->ai_canonname); + ap = ai; + do { + naddrs++; + addrslen += (ap->ai_family == AF_INET ? 4 : 16); + ap = ap->ai_next; + } while (ap); + sz += naddrs + addrslen; + } + resp = xzalloc(sz); + resp->version_or_size = sz; + resp->error = err; + if (err) { + /*resp->found = 0;*/ + goto ret; + } + resp->found = 1; + resp->naddrs = naddrs; + resp->addrslen = addrslen; + resp->canonlen = canonlen; + p = (char*)(resp + 1); + family = p + addrslen; + ap = ai; + do { +/* char ai_family[naddrs]; */ + *family++ = ap->ai_family; +/* char ai_addr[naddrs][4 or 16]; */ + if (ap->ai_family == AF_INET) { + memcpy(p, &(((struct sockaddr_in*)(ap->ai_addr))->sin_addr), 4); + p += 4; + } else { + memcpy(p, &(((struct sockaddr_in6*)(ap->ai_addr))->sin6_addr), 16); + p += 16; + } + ap = ap->ai_next; + } while (ap); +/* char ai_canonname[canonlen]; */ + if (ai->ai_canonname) + strcpy(family, ai->ai_canonname); + log(L_DEBUG, "sz:%u realsz:%u", sz, family + strsize(ai->ai_canonname) - (char*)resp); + ret: + /* glibc 2.3.6 segfaults here sometimes + * (maybe my mistake, fixed by "ai = NULL;" above). + * Since we are in worker and are going to exit anyway, why bother? */ + /*freeaddrinfo(ai);*/ + return resp; +} + + +/* +** Cache management +*/ + +/* one 8-element "cacheline" */ +typedef user_req *cacheline_t[8]; +static unsigned cache_size; +/* Points to cacheline_t cache[cache_size] array, or in other words, + * points to user_req* cache[cache_size][8] array */ +static cacheline_t *cache; +static unsigned cached_cnt; +static unsigned cache_access_cnt = 1; /* prevent division by zero */ +static unsigned cache_hit_cnt = 1; +static unsigned last_age_time; +static unsigned aging_interval_ms; +static unsigned min_aging_interval_ms; + +static response_header *ureq_response(user_req *ureq) +{ + /* Skip query part, find answer part + * (answer is 32-bit aligned) */ + return (void*) ((char*)ureq + ((ureq_size(ureq) + 3) & ~3)); +} + +/* This hash is supposed to be good for short textual data */ +static uint32_t bernstein_hash(void *p, unsigned sz, uint32_t hash) +{ + uint8_t *key = p; + do { + hash = (32 * hash + hash) ^ *key++; + } while (--sz); + return hash; +} + +static void free_refcounted_ureq(user_req **ureqp) +{ + user_req *ureq = *ureqp; + + if (!CACHED_ENTRY(ureq)) + return; + + if (ureq->refcount) { + ureq->refcount--; + } else { + log(L_DEBUG2, "refcount == 0, free(%p)", ureq); + free(ureq); + } + *ureqp = NULL; +} + +static user_req **lookup_in_cache(user_req *ureq) +{ + user_req **cacheline; + int free_cache; + unsigned hash; + unsigned i; + unsigned ureq_sz = ureq_size(ureq); + + /* prevent overflow and division by zero */ + cache_access_cnt++; + if ((int)cache_access_cnt < 0) { + cache_access_cnt = (cache_access_cnt >> 1) + 1; + cache_hit_cnt = (cache_hit_cnt >> 1) + 1; + } + + hash = bernstein_hash(&ureq->key_len, ureq_sz - offsetof(user_req, key_len), ureq->type); + log(L_DEBUG2, "hash:%08x", hash); + hash = hash % cache_size; + cacheline = cache[hash]; + + free_cache = -1; + for (i = 0; i < 8; i++) { + user_req *cached = CACHE_PTR(cacheline[i]); + if (!cached) { + if (free_cache == -1) + free_cache = i; + continue; + } + /* ureq->version is always 2 and is reused in cache + * for other purposes, we need to skip it here */ + if (memcmp(&ureq->type, &cached->type, ureq_sz - offsetof(user_req, type)) == 0) { + log(L_DEBUG, "found in cache[%u][%u]", hash, i); + cache_hit_cnt++; + return &cacheline[i]; + } + } + + if (free_cache >= 0) { + cached_cnt++; + i = free_cache; + log(L_DEBUG, "not found, using free cache[%u][%u]", hash, i); + goto ret; + } + + unsigned oldest_idx = 0; + unsigned oldest_age = 0; + for (i = 0; i < 8; i++) { + unsigned age = cache_age(cacheline[i]); + if (age > oldest_age) { + oldest_age = age; + oldest_idx = i; + } + } + if (oldest_age == 0) { + /* All entries in cacheline are "future" entries! + * This is very unlikely, but we must still work correctly. + * We call this "fake cache entry". + * The data will be "cached" only for the duration + * of this client's request lifetime. + */ + log(L_DEBUG, "not found, and cache[%u] is full: using fake cache entry", hash); + return NULL; + } + i = oldest_idx; + log(L_DEBUG, "not found, freeing and reusing cache[%u][%u] (age %u)", hash, i, oldest_age); + free_refcounted_ureq(&cacheline[i]); + + ret: + cacheline[i] = MAKE_FUTURE_PTR(ureq); + return &cacheline[i]; +} + +static void age_cache(unsigned free_all, int srv) +{ + user_req **cp = *cache; + int i; + unsigned sv = cached_cnt; + + log(L_DEBUG, "aging cache, srv:%d, free_all:%u", srv, free_all); + if (srv == -1 || free_all) + aging_interval_ms = INT_MAX; + i = cache_size * 8; + do { + user_req *cached = *cp; + if (CACHED_ENTRY(cached) && cached != NULL) { + int csrv = type_to_srv[cached->type]; + if (srv == -1 || srv == csrv) { + if (free_all) { + cached_cnt--; + free_refcounted_ureq(cp); + } else { + unsigned age = cache_age(cached); + response_header *resp = ureq_response(cached); + unsigned ttl = (resp->found ? config.pttl : config.nttl)[csrv]; + if (age >= ttl) { + log(L_DEBUG2, "freeing: age %u positive %d ttl %u", age, resp->found, ttl); + cached_cnt--; + free_refcounted_ureq(cp); + } else if (srv == -1) { + ttl -= age; + if (aging_interval_ms > ttl) + aging_interval_ms = ttl; + } + } + } + } + cp++; + } while (--i); + log(L_INFO, "aged cache, freed:%u, remain:%u", sv - cached_cnt, cached_cnt); + log(L_DEBUG2, "aging interval now %u ms", aging_interval_ms); +} + + +/* +** Worker child +*/ + +/* Spawns a worker and feeds it with user query on stdin */ +/* Returns stdout fd of the worker, in blocking mode */ +static int create_and_feed_worker(user_req *ureq) +{ + pid_t pid; + struct { + int rd; + int wr; + } to_child, to_parent; + + /* NB: these pipe fds are in blocking mode and non-CLOEXECed */ + xpipe(&to_child.rd); + xpipe(&to_parent.rd); + + pid = vfork(); + if (pid < 0) /* error */ + perror_and_die("vfork"); + if (!pid) { /* child */ + char param[sizeof(int)*3 + 2]; + char *argv[3]; + + close(to_child.wr); + close(to_parent.rd); + xmovefd(to_child.rd, 0); + xmovefd(to_parent.wr, 1); + sprintf(param, "%u", debug); + argv[0] = (char*) "worker_nscd"; + argv[1] = param; + argv[2] = NULL; + /* Re-exec ourself, cleaning up all allocated memory. + * fds in parent are marked CLOEXEC and will be closed too + * (modulo bugs) */ + /* Try link name first: it's better to have comm field + * of "nscd" than "exe" (pgrep reported to fail to find us + * by name when comm field contains "exe") */ + execve(self_exe_points_to, argv, argv+2); + xexecve("/proc/self/exe", argv, argv+2); + } + + /* parent */ + close(to_child.rd); + close(to_parent.wr); + /* We do not expect child to block for any noticeably long time, + * and also we expect write to be one-piece one: + * ureq size is <= 1k and pipes are guaranteed to accept + * at least PIPE_BUF at once */ + xsafe_write(to_child.wr, ureq, ureq_size(ureq)); + + close(to_child.wr); + close_on_exec(to_parent.rd); + return to_parent.rd; +} + +static user_req *worker_ureq; + +#if DEBUG_BUILD +static const char *req_str(unsigned type, const char *buf) +{ + if (type == GETHOSTBYADDR) { + struct in_addr in; + in.s_addr = *((uint32_t*)buf); + return inet_ntoa(in); + } + if (type == GETHOSTBYADDRv6) { + return "IPv6"; + } + return buf; +} +#else +const char *req_str(unsigned type, const char *buf); +#endif + +static void worker_signal_handler(int sig) +{ +#if DEBUG_BUILD + log(L_INFO, "worker:%d got sig:%d while handling req " + "type:%d(%s) key_len:%d '%s'", + getpid(), sig, + worker_ureq->type, typestr[worker_ureq->type], + worker_ureq->key_len, + req_str(worker_ureq->type, worker_ureq->reqbuf) + ); +#else + log(L_INFO, "worker:%d got sig:%d while handling req " + "type:%d key_len:%d", + getpid(), sig, + worker_ureq->type, worker_ureq->key_len); +#endif + _exit(0); +} + +static void worker(const char *param) NORETURN; +static void worker(const char *param) +{ + user_req ureq; + void *resp; + + debug = atoi(param); + + worker_ureq = &ureq; /* for signal handler */ + + /* Make sure we won't hang, but rather die */ + if (WORKER_TIMEOUT_SEC) + alarm(WORKER_TIMEOUT_SEC); + + /* NB: fds 0, 1 are in blocking mode */ + + /* We block here (for a short time) */ + /* Due to ureq size < PIPE_BUF read is atomic */ + /* No error or size checking: we trust the parent */ + safe_read(0, &ureq, sizeof(ureq)); + + signal(SIGSEGV, worker_signal_handler); + signal(SIGBUS, worker_signal_handler); + signal(SIGILL, worker_signal_handler); + signal(SIGFPE, worker_signal_handler); + signal(SIGABRT, worker_signal_handler); +#ifdef SIGSTKFLT + signal(SIGSTKFLT, worker_signal_handler); +#endif + + if (ureq.type == GETHOSTBYNAME + || ureq.type == GETHOSTBYNAMEv6 + ) { + resp = marshal_hostent( + ureq.type == GETHOSTBYNAME + ? gethostbyname(ureq.reqbuf) + : gethostbyname2(ureq.reqbuf, AF_INET6) + ); + } else if (ureq.type == GETHOSTBYADDR + || ureq.type == GETHOSTBYADDRv6 + ) { + resp = marshal_hostent(gethostbyaddr(ureq.reqbuf, ureq.key_len, + (ureq.type == GETHOSTBYADDR ? AF_INET : AF_INET6) + )); + } else if (ureq.type == GETPWBYNAME) { + struct passwd *pw; + log(L_DEBUG2, "getpwnam('%s')", ureq.reqbuf); + pw = getpwnam(ureq.reqbuf); + log(L_DEBUG2, "getpwnam result:%p", pw); + resp = marshal_passwd(pw); + } else if (ureq.type == GETPWBYUID) { + resp = marshal_passwd(getpwuid(atoi(ureq.reqbuf))); + } else if (ureq.type == GETGRBYNAME) { + struct group *gr = getgrnam(ureq.reqbuf); + resp = marshal_group(gr); + } else if (ureq.type == GETGRBYGID) { + struct group *gr = getgrgid(atoi(ureq.reqbuf)); + resp = marshal_group(gr); + } else if (ureq.type == GETAI) { + resp = obtain_addrinfo(ureq.reqbuf); + } else /*if (ureq.type == INITGROUPS)*/ { + resp = obtain_initgroups(ureq.reqbuf); + } + + if (!((response_header*)resp)->found) { + /* Parent knows about this special case */ + xfull_write(1, resp, 8); + } else { + /* Responses can be big (getgrnam("guest") on a big user db), + * we cannot rely on them being atomic. full_write loops + * if needed */ + xfull_write(1, resp, ((response_header*)resp)->version_or_size); + } + _exit(0); +} + + +/* +** Main loop +*/ + +static const char checked_filenames[][sizeof("/etc/passwd")] = { + [SRV_PASSWD] = "/etc/passwd", /* "/etc/shadow"? */ + [SRV_GROUP] = "/etc/group", + [SRV_HOSTS] = "/etc/hosts", /* "/etc/resolv.conf" "/etc/nsswitch.conf"? */ +}; + +static long checked_status[ARRAY_SIZE(checked_filenames)]; + +static void check_files(int srv) +{ + struct stat tsb; + const char *file = checked_filenames[srv]; + long v; + + memset(&tsb, 0, sizeof(tsb)); + stat(file, &tsb); /* ignore errors */ + /* Comparing struct stat's was giving false positives. + * Extracting only those fields which are interesting: */ + v = (long)tsb.st_mtime ^ (long)tsb.st_size ^ (long)tsb.st_ino; /* ^ (long)tsb.st_dev ? */ + + if (v != checked_status[srv]) { + checked_status[srv] = v; + log(L_INFO, "detected change in %s", file); + age_cache(/*free_all:*/ 1, srv); + } +} + +/* Returns 1 if we immediately have the answer */ +static int handle_client(int i) +{ + int srv; + user_req *ureq = cinfo[i].ureq; + user_req **cache_pp; + user_req *ureq_and_resp; + +#if DEBUG_BUILD + log(L_DEBUG, "version:%d type:%d(%s) key_len:%d '%s'", + ureq->version, ureq->type, + ureq->type < ARRAY_SIZE(typestr) ? typestr[ureq->type] : "?", + ureq->key_len, req_str(ureq->type, ureq->reqbuf)); +#endif + + if (ureq->version != NSCD_VERSION) { + log(L_INFO, "wrong version"); + close_client(i); + return 0; + } + if (ureq->key_len > sizeof(ureq->reqbuf)) { + log(L_INFO, "bogus key_len %u - ignoring", ureq->key_len); + close_client(i); + return 0; + } + if (cinfo[i].bytecnt < USER_HDR_SIZE + ureq->key_len) { + log(L_INFO, "read %d, need to read %d", + cinfo[i].bytecnt, USER_HDR_SIZE + ureq->key_len); + return 0; /* more to read */ + } + if (cinfo[i].bytecnt > USER_HDR_SIZE + ureq->key_len) { + log(L_INFO, "read overflow: %u > %u", + (int)cinfo[i].bytecnt, (int)(USER_HDR_SIZE + ureq->key_len)); + close_client(i); + return 0; + } + if (unsupported_ureq_type(ureq->type)) { + /* We don't know this request. Just close the connection. + * (glibc client interprets this like "not supported by this nscd") + * Happens very often, thus DEBUG, not INFO */ + log(L_DEBUG, "unsupported query, dropping"); + close_client(i); + return 0; + } + srv = type_to_srv[ureq->type]; + if (!config.srv_enable[srv]) { + log(L_INFO, "service %d is disabled, dropping", srv); + close_client(i); + return 0; + } + + hex_dump(cinfo[i].ureq, cinfo[i].bytecnt); + + if (ureq->type == SHUTDOWN + || ureq->type == INVALIDATE + ) { +#ifdef SO_PEERCRED + struct ucred caller; + socklen_t optlen = sizeof(caller); + if (getsockopt(pfd[i].fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0) { + log(L_INFO, "ignoring special request - cannot get caller's id: %s", strerror(errno)); + close_client(i); + return 0; + } + if (caller.uid != 0) { + log(L_INFO, "special request from non-root - ignoring"); + close_client(i); + return 0; + } +#endif + if (ureq->type == SHUTDOWN) { + log(L_INFO, "got shutdown request, exiting"); + exit(0); + } + if (!ureq->key_len || ureq->reqbuf[ureq->key_len - 1]) { + log(L_INFO, "malformed invalidate request - ignoring"); + close_client(i); + return 0; + } + log(L_INFO, "got invalidate request, flushing cache"); + /* Frees entire cache. TODO: replace -1 with service (in ureq->reqbuf) */ + age_cache(/*free_all:*/ 1, -1); + close_client(i); + return 0; + } + + if (ureq->type != GETHOSTBYADDR + && ureq->type != GETHOSTBYADDRv6 + ) { + if (ureq->key_len && ureq->reqbuf[ureq->key_len - 1] != '\0') { + log(L_INFO, "badly terminated buffer"); + close_client(i); + return 0; + } + } + + if (config.check_files[srv]) { + check_files(srv); + } + + cache_pp = lookup_in_cache(ureq); + ureq_and_resp = cache_pp ? *cache_pp : NULL; + + if (ureq_and_resp) { + if (CACHED_ENTRY(ureq_and_resp)) { + /* Found. Save ptr to response into cinfo and return */ + response_header *resp = ureq_response(ureq_and_resp); + unsigned sz = resp->version_or_size; + + log(L_DEBUG, "sz:%u", sz); + hex_dump(resp, sz); + ureq_and_resp->refcount++; /* cache shouldn't free it under us! */ + pfd[i].events = POLLOUT; /* we want to write out */ + cinfo[i].resptr = ureq_and_resp; + /*cinfo[i].respos = 0; - already is */ + /* prevent future matches with anything */ + cinfo[i].cache_pp = (void *) 1; + return 1; /* "ready to write data out to client" */ + } + + /* Not found. Remember a pointer where it will appear */ + cinfo[i].cache_pp = cache_pp; + + /* If it does not point to our own ureq buffer... */ + if (CACHE_PTR(ureq_and_resp) != ureq) { + /* We are not the first client who wants this */ + log(L_DEBUG, "another request is in progress (%p), waiting for its result", ureq_and_resp); + MARK_PTR_SHARED(cache_pp); /* "please inform us when it's ready" */ + /* "we do not wait for client anymore" */ + cinfo[i].client_fd = pfd[i].fd; + /* Don't wait on fd. Worker response will unblock us */ + pfd[i].events = 0; + return 0; + } + /* else: lookup_in_cache inserted (ureq & 1) into *cache_pp: + * we are the first client to miss on this ureq. */ + } + + /* Start worker thread */ + log(L_DEBUG, "stored %p in cache, starting a worker", ureq_and_resp); + /* Now we will wait on worker's fd, not client's! */ + cinfo[i].client_fd = pfd[i].fd; + pfd[i].fd = create_and_feed_worker(ureq); + return 0; +} + +static void prepare_for_writeout(unsigned i, user_req *cached) +{ + log(L_DEBUG2, "client %u: data is ready at %p", i, cached); + + if (cinfo[i].client_fd) { + pfd[i].fd = cinfo[i].client_fd; + cinfo[i].client_fd = 0; /* "we don't wait for worker reply" */ + } + pfd[i].events = POLLOUT; + + /* Writeout position etc */ + cinfo[i].resptr = cached; + /*cinfo[i].respos = 0; - already is */ + /* if worker took some time to get info (e.g. DNS query), + * prevent client timeout from triggering at once */ + cinfo[i].started_ms = g_now_ms; +} + +/* Worker seems to be ready to write the response. + * When we return, response is fully read and stored in cache, + * worker's fd is closed, pfd[i] and cinfo[i] are updated. */ +static void handle_worker_response(int i) +{ + struct { /* struct response_header + small body */ + uint32_t version_or_size; + int32_t found; + char body[256 - 8]; + } sz_and_found; + user_req *cached; + user_req *ureq; + response_header *resp; + unsigned sz, resp_sz; + unsigned ureq_sz_aligned; + + cached = NULL; + ureq = cinfo[i].ureq; + ureq_sz_aligned = (char*)ureq_response(ureq) - (char*)ureq; + + sz = full_read(pfd[i].fd, &sz_and_found, sizeof(sz_and_found)); + if (sz < 8) { + /* worker was killed? */ + log(L_DEBUG, "worker gave short reply:%u < 8", sz); + goto err; + } + + resp_sz = sz_and_found.version_or_size; + if (resp_sz < sz || resp_sz > 0x0fffffff) { /* 256 mb */ + error("BUG: bad size from worker:%u", resp_sz); + goto err; + } + + /* Create new block of cached info */ + cached = xzalloc(ureq_sz_aligned + resp_sz); + log(L_DEBUG2, "xzalloc(%u):%p sz:%u resp_sz:%u found:%u", + ureq_sz_aligned + resp_sz, cached, + sz, resp_sz, + (int)sz_and_found.found + ); + resp = (void*) (((char*) cached) + ureq_sz_aligned); + memcpy(cached, ureq, ureq_size(ureq)); + memcpy(resp, &sz_and_found, sz); + if (sz_and_found.found && resp_sz > sz) { + /* We need to read data only if it's found + * (otherwise worker sends only 8 bytes). + * + * Replies can be big (getgrnam("guest") on a big user db), + * we cannot rely on them being atomic. However, we know + * that worker _always_ gives reply in one full_write(), + * so we loop and read it all + * (looping is implemented inside full_read()) + */ + if (full_read(pfd[i].fd, ((char*) resp) + sz, resp_sz - sz) != resp_sz - sz) { + /* worker was killed? */ + log(L_DEBUG, "worker gave short reply, free(%p)", cached); + err: + free(cached); + cached = NULL; + goto wo; + } + } + set_cache_timestamp(cached); + hex_dump(resp, resp_sz); + + wo: + close(pfd[i].fd); + + /* Save in cache */ + unsigned ref = 0; + user_req **cache_pp = cinfo[i].cache_pp; + if (cache_pp != NULL) { /* if not a fake entry */ + ureq = *cache_pp; + *cache_pp = cached; + if (CACHE_SHARED(ureq)) { + /* Other clients wait for this response too, + * wake them (and us) up and set refcount = no_of_clients */ + unsigned j; + + for (j = 2; j < num_clients; j++) { + if (cinfo[j].cache_pp == cache_pp) { + /* This client uses the same cache entry */ + ref++; + /* prevent future matches with anything */ + cinfo[j].cache_pp = (void *) 1; + prepare_for_writeout(j, cached); + } + } + goto ret; + } + /* prevent future matches with anything */ + cinfo[i].cache_pp = (void *) 1; + ref = 1; + } + + prepare_for_writeout(i, cached); +ret: + /* cache shouldn't free it under us! */ + if (cached) + cached->refcount = ref; + aging_interval_ms = min_aging_interval_ms; +} + +static void main_loop(void) +{ + /* 1/2 of smallest negative TTL */ + min_aging_interval_ms = config.nttl[0]; + if (min_aging_interval_ms > config.nttl[1]) min_aging_interval_ms = config.nttl[1]; + if (min_aging_interval_ms > config.nttl[2]) min_aging_interval_ms = config.nttl[2]; + min_aging_interval_ms = (min_aging_interval_ms / 2) | 1; + aging_interval_ms = min_aging_interval_ms; + + while (1) { + int i, j; + int r; + + r = SMALL_POLL_TIMEOUT_MS; + if (num_clients <= 2 && !cached_cnt) + r = -1; /* infinite */ + else if (num_clients < max_reqnum) + r = aging_interval_ms; +#if 0 /* Debug: leak detector */ + { + static unsigned long long cnt; + static unsigned long low_malloc = -1L; + static unsigned long low_sbrk = -1L; + void *p = malloc(540); /* should not be too small */ + void *s = sbrk(0); + free(p); + if ((unsigned long)p < low_malloc) + low_malloc = (unsigned long)p; + if ((unsigned long)s < low_sbrk) + low_sbrk = (unsigned long)s; + log(L_INFO, "poll %llu (%d ms). clients:%u cached:%u %u/%u malloc:%p (%lu), sbrk:%p (%lu)", + cnt, r, num_clients, cached_cnt, cache_hit_cnt, cache_access_cnt, + p, (unsigned long)p - low_malloc, + s, (unsigned long)s - low_sbrk); + cnt++; + } +#else + log(L_DEBUG, "poll %d ms. clients:%u cached:%u hit ratio:%u/%u", + r, num_clients, cached_cnt, cache_hit_cnt, cache_access_cnt); +#endif + + r = poll(pfd, num_clients, r); + log(L_DEBUG2, "poll returns %d", r); + if (r < 0) { + if (errno != EINTR) + perror_and_die("poll"); + continue; + } + + /* Everything between polls never sleeps. + * There is no blocking I/O (except when we talk to worker thread + * which is guaranteed to not block us for long) */ + + g_now_ms = monotonic_ms(); + if (r == 0) + goto skip_fd_checks; + + for (i = 0; i < 2; i++) { + int cfd; + if (!pfd[i].revents) + continue; + /* pfd[i].revents = 0; - not needed */ + cfd = accept(pfd[i].fd, NULL, NULL); + if (cfd < 0) { + /* odd... poll() says we can accept but accept failed? */ + log(L_DEBUG2, "accept failed with %s", strerror(errno)); + continue; + } + ndelay_on(cfd); + close_on_exec(cfd); + /* x[num_clients] is next free element, taking it */ + log(L_DEBUG2, "new client %d, fd %d", num_clients, cfd); + pfd[num_clients].fd = cfd; + pfd[num_clients].events = POLLIN; + /* this will make us do read() in next for() loop: */ + pfd[num_clients].revents = POLLIN; + memset(&cinfo[num_clients], 0, sizeof(cinfo[num_clients])); + /* cinfo[num_clients].bytecnt = 0; - done */ + cinfo[num_clients].started_ms = g_now_ms; + cinfo[num_clients].bufidx = alloc_buf_no(); + cinfo[num_clients].ureq = bufno2buf(cinfo[num_clients].bufidx); + num_clients++; + if (num_clients >= max_reqnum) { + /* stop accepting new connects for now */ + pfd[0].events = pfd[0].revents = 0; + pfd[1].events = pfd[1].revents = 0; + } + } + for (; i < num_clients; i++) { + if (!pfd[i].revents) + continue; + log(L_DEBUG2, "pfd[%d].revents:0x%x", i, pfd[i].revents); + /* pfd[i].revents = 0; - not needed */ + + /* "Write out result" case */ + if (pfd[i].revents == POLLOUT) { + response_header *resp; + uint32_t resp_sz; + if (!cinfo[i].resptr) { + /* corner case: worker gave bad response earlier */ + close_client(i); + continue; + } + write_out: + resp = ureq_response(cinfo[i].resptr); + resp_sz = resp->version_or_size; + resp->version_or_size = NSCD_VERSION; + errno = 0; + r = safe_write(pfd[i].fd, ((char*) resp) + cinfo[i].respos, resp_sz - cinfo[i].respos); + resp->version_or_size = resp_sz; + + if (r < 0 && errno == EAGAIN) + continue; + if (r <= 0) { /* client isn't there anymore */ + log(L_DEBUG, "client %d is gone (write returned:%d err:%s)", + i, r, errno ? strerror(errno) : "-"); + write_out_is_done: + if (cinfo[i].cache_pp == NULL) { + log(L_DEBUG, "client %d: freeing fake cache entry %p", i, cinfo[i].resptr); + free(cinfo[i].resptr); + } else { + /* Most of the time, it is not freed here, + * only refcounted--. Freeing happens + * if it was deleted from cache[] but retained + * for writeout. */ + free_refcounted_ureq(&cinfo[i].resptr); + } + close_client(i); + continue; + } + cinfo[i].respos += r; + if (cinfo[i].respos >= resp_sz) { + /* We wrote everything */ + /* No point in trying to get next request, it won't come. + * glibc 2.4 client closes its end after each request, + * without testing for EOF from server. strace: + * ... + * read(3, "www.google.com\0\0", 16) = 16 + * close(3) = 0 + */ + log(L_DEBUG, "client %u: sent answer %u bytes", i, cinfo[i].respos); + goto write_out_is_done; + } + } + + /* "Read reply from worker" case. Worker may be + * already dead, revents may contain other bits too */ + if ((pfd[i].revents & POLLIN) && cinfo[i].client_fd) { + log(L_DEBUG, "reading response for client %u", i); + handle_worker_response(i); + /* We can immediately try to write a response + * to client */ + goto write_out; + } + + /* POLLHUP means pfd[i].fd is closed by peer. + * POLLHUP+POLLOUT is seen when we switch for writeout + * and see that pfd[i].fd is closed by peer. */ + if ((pfd[i].revents & ~POLLOUT) == POLLHUP) { + int is_client = (cinfo[i].client_fd == 0 || cinfo[i].client_fd == pfd[i].fd); + log(L_INFO, "%s %u disappeared (got POLLHUP on fd %d)", + is_client ? "client" : "worker", + i, + pfd[i].fd + ); + if (is_client) + close_client(i); + else { + /* Read worker output anyway, error handling + * in that function deals with short read. + * Simply closing client is wrong: it leaks + * shared future entries. */ + handle_worker_response(i); + } + continue; + } + + /* All strange and unexpected cases */ + if (pfd[i].revents != POLLIN) { + /* Not just "can read", but some other bits are there */ + log(L_INFO, "client %u revents is strange:%x", i, pfd[i].revents); + close_client(i); + continue; + } + + /* "Read request from client" case */ + r = safe_read(pfd[i].fd, (char*)(cinfo[i].ureq) + cinfo[i].bytecnt, MAX_USER_REQ_SIZE - cinfo[i].bytecnt); + if (r < 0) { + log(L_DEBUG2, "error reading from client: %s", strerror(errno)); + if (errno == EAGAIN) + continue; + close_client(i); + continue; + } + if (r == 0) { + log(L_INFO, "premature EOF from client, dropping"); + close_client(i); + continue; + } + cinfo[i].bytecnt += r; + if (cinfo[i].bytecnt >= sizeof(user_req_header)) { + if (handle_client(i)) { + /* Response is found in cache! */ + goto write_out; + } + } + } /* for each client[2..num_clients-1] */ + + skip_fd_checks: + /* Age cache */ + if ((g_now_ms - last_age_time) >= aging_interval_ms) { + last_age_time = g_now_ms; + age_cache(/*free_all:*/ 0, -1); + } + + /* Close timed out client connections */ + for (i = 2; i < num_clients; i++) { + if (pfd[i].fd != 0 /* not closed yet? */ //// + && cinfo[i].client_fd == 0 /* do we still wait for client, not worker? */ + && (g_now_ms - cinfo[i].started_ms) > CLIENT_TIMEOUT_MS + ) { + log(L_INFO, "timed out waiting for client %u (%u ms), dropping", + i, (unsigned)(g_now_ms - cinfo[i].started_ms)); + close_client(i); + } + } + + if (!cnt_closed) + continue; + + /* We closed at least one client, coalesce pfd[], cinfo[] */ + if (min_closed + cnt_closed >= num_clients) { + /* clients [min_closed..num_clients-1] are all closed */ + /* log(L_DEBUG, "taking shortcut"); - almost always happens */ + goto coalesce_done; + } + j = min_closed; + i = min_closed + 1; + while (i < num_clients) { + while (1) { + if (pfd[i].fd) + break; + if (++i >= num_clients) + goto coalesce_done; + } + pfd[j] = pfd[i]; + cinfo[j++] = cinfo[i++]; + } + + coalesce_done: + num_clients -= cnt_closed; + log(L_DEBUG, "removing %d closed clients. clients:%d", cnt_closed, num_clients); + min_closed = INT_MAX; + cnt_closed = 0; + /* start accepting new connects */ + pfd[0].events = POLLIN; + pfd[1].events = POLLIN; + } /* while (1) */ +} + + +/* +** Initialization +*/ + +#define NSCD_PIDFILE "/var/run/nscd/nscd.pid" +#define NSCD_DIR "/var/run/nscd" +#define NSCD_SOCKET "/var/run/nscd/socket" +#define NSCD_SOCKET_OLD "/var/run/.nscd_socket" + +static smallint wrote_pidfile; + +static void cleanup_on_signal(int sig) +{ + if (wrote_pidfile) + unlink(NSCD_PIDFILE); + unlink(NSCD_SOCKET_OLD); + unlink(NSCD_SOCKET); + exit(0); +} + +static void write_pid(void) +{ + FILE *pid = fopen(NSCD_PIDFILE, "w"); + if (!pid) + return; + fprintf(pid, "%d\n", getpid()); + fclose(pid); + wrote_pidfile = 1; +} + +/* Open a listening nscd server socket */ +static int open_socket(const char *name) +{ + struct sockaddr_un sun; + int sock = socket(AF_UNIX, SOCK_STREAM, 0); + if (sock < 0) + perror_and_die("cannot create unix domain socket"); + ndelay_on(sock); + close_on_exec(sock); + sun.sun_family = AF_UNIX; + strcpy(sun.sun_path, name); + unlink(name); + if (bind(sock, (struct sockaddr *) &sun, sizeof(sun)) < 0) + perror_and_die("bind(%s)", name); + if (chmod(name, 0666) < 0) + perror_and_die("chmod(%s)", name); + if (listen(sock, (max_reqnum/8) | 1) < 0) + perror_and_die("listen"); + return sock; +} + +static const struct option longopt[] = { + /* name, has_arg, int *flag, int val */ + { "debug" , no_argument , NULL, 'd' }, + { "config-file", required_argument, NULL, 'f' }, + { "invalidate" , required_argument, NULL, 'i' }, + { "shutdown" , no_argument , NULL, 'K' }, + { "nthreads" , required_argument, NULL, 't' }, + { "version" , no_argument , NULL, 'V' }, + { "help" , no_argument , NULL, '?' }, + { "usage" , no_argument , NULL, '?' }, + /* just exit(0). TODO: "test" connect? */ + { "statistic" , no_argument , NULL, 'g' }, + { "secure" , no_argument , NULL, 'S' }, /* ? */ + { } +}; + +static const char *const help[] = { + "Do not daemonize; log to stderr (-dd: more verbosity)", + "File to read configuration from", + "Invalidate cache", + "Shut the server down", + "Serve N requests in parallel", + "Version", +}; + +static void print_help_and_die(void) +{ + const struct option *opt = longopt; + const char *const *h = help; + + puts("Usage: nscd [OPTION...]\n" + "Name Service Cache Daemon\n"); + do { + printf("\t" "-%c,--%-11s %s\n", opt->val, opt->name, *h); + h++; + opt++; + } while (opt->val != '?'); + exit(1); +} + +static char *skip_service(int *srv, const char *s) +{ + if (strcmp("passwd", s) == 0) { + *srv = SRV_PASSWD; + s++; + } else if (strcmp("group", s) == 0) { + *srv = SRV_GROUP; + } else if (strcmp("hosts", s) == 0) { + *srv = SRV_HOSTS; + } else { + return NULL; + } + return skip_whitespace(s + 6); +} + +static void handle_null(const char *str, int srv) {} + +static void handle_logfile(const char *str, int srv) +{ + config.logfile = xstrdup(str); +} + +static void handle_debuglvl(const char *str, int srv) +{ + debug |= (uint8_t) getnum(str); +} + +static void handle_threads(const char *str, int srv) +{ + unsigned n = getnum(str); + if (max_reqnum < n) + max_reqnum = n; +} + +static void handle_user(const char *str, int srv) +{ + config.user = xstrdup(str); +} + +static void handle_enable(const char *str, int srv) +{ + config.srv_enable[srv] = ((str[0] | 0x20) == 'y'); +} + +static void handle_pttl(const char *str, int srv) +{ + config.pttl[srv] = getnum(str); +} + +static void handle_nttl(const char *str, int srv) +{ + config.nttl[srv] = getnum(str); +} + +static void handle_size(const char *str, int srv) +{ + config.size[srv] = getnum(str); +} + +static void handle_chfiles(const char *str, int srv) +{ + config.check_files[srv] = ((str[0] | 0x20) == 'y'); +} + +static void parse_conffile(const char *conffile, int warn) +{ + static const struct confword { + const char *str; + void (*handler)(const char *, int); + } conf_words[] = { + { "_" "logfile" , handle_logfile }, + { "_" "debug-level" , handle_debuglvl }, + { "_" "threads" , handle_threads }, + { "_" "max-threads" , handle_threads }, + { "_" "server-user" , handle_user }, + /* ignore: any user can stat */ + { "_" "stat-user" , handle_null }, + { "_" "paranoia" , handle_null }, /* ? */ + /* ignore: design goal is to never crash/hang */ + { "_" "reload-count" , handle_null }, + { "_" "restart-interval" , handle_null }, + { "S" "enable-cache" , handle_enable }, + { "S" "positive-time-to-live" , handle_pttl }, + { "S" "negative-time-to-live" , handle_nttl }, + { "S" "suggested-size" , handle_size }, + { "S" "check-files" , handle_chfiles }, + { "S" "persistent" , handle_null }, /* ? */ + { "S" "shared" , handle_null }, /* ? */ + { "S" "auto-propagate" , handle_null }, /* ? */ + { } + }; + + char buf[128]; + FILE *file = fopen(conffile, "r"); + int lineno = 0; + + if (!file) { + if (conffile != default_conffile) + perror_and_die("cannot open %s", conffile); + return; + } + + while (fgets(buf, sizeof(buf), file) != NULL) { + const struct confword *word; + char *p; + int len = strlen(buf); + + lineno++; + if (len) { + if (buf[len-1] != '\n') { + if (len >= sizeof(buf) - 1) + error_and_die("%s:%d: line is too long", conffile, lineno); + len++; /* last line, not terminated by '\n' */ + } + buf[len-1] = '\0'; + } + p = strchr(buf, '#'); + if (p) + *p = '\0'; + + p = skip_whitespace(buf); + if (!*p) + continue; + *skip_non_whitespace(p) = '\0'; + word = conf_words; + while (1) { + if (strcmp(word->str + 1, p) == 0) { + int srv = 0; + p = skip_whitespace(p + strlen(p) + 1); + *skip_non_whitespace(p) = '\0'; + if (word->str[0] == 'S') { + char *p2 = skip_service(&srv, p); + if (!p2) { + if (warn) + error("%s:%d: ignoring unknown service name '%s'", conffile, lineno, p); + break; + } + p = p2; + *skip_non_whitespace(p) = '\0'; + } + word->handler(p, srv); + break; + } + word++; + if (!word->str) { + if (warn) + error("%s:%d: ignoring unknown directive '%s'", conffile, lineno, p); + break; + } + } + } + fclose(file); +} + + +/* "XX,XX[,XX]..." -> gid_t[] */ +static gid_t* env_U_to_uid_and_gids(const char *str, int *sizep) +{ + const char *sp; + gid_t *ug, *gp; + int ng; + + sp = str; + ng = 1; + while (*sp) + if (*sp++ == ',') + ng++; + ug = xmalloc(ng * sizeof(ug[0])); + + ng = 0; + gp = ug; + sp = str; + errno = 0; + while (1) { + ng++; + *gp++ = strtoul(sp, (char**)&sp, 16); + if (errno || (*sp != ',' && *sp != '\0')) + error_and_die("internal error"); + if (*sp == '\0') + break; + sp++; + } + + *sizep = ng; + return ug; +} + + +static char* user_to_env_U(const char *user) +{ + int ng; + char *ug_str, *sp; + gid_t *ug, *gp; + struct passwd *pw; + + pw = getpwnam(user); + if (!pw) + perror_and_die("user '%s' is not known", user); + + ng = 64; + /* 0th cell will be used for uid */ + ug = xmalloc((1 + ng) * sizeof(ug[0])); + if (getgrouplist(user, pw->pw_gid, &ug[1], &ng) < 0) { + ug = xrealloc(ug, (1 + ng) * sizeof(ug[0])); + if (getgrouplist(user, pw->pw_gid, &ug[1], &ng) < 0) + perror_and_die("can't get groups of user '%s'", user); + } + ng++; + ug[0] = pw->pw_uid; + + /* How much do we need for "-Uxx,xx[,xx]..." string? */ + ug_str = xmalloc((sizeof(unsigned long)+1)*2 * ng + 3); + gp = ug; + sp = ug_str; + *sp++ = 'U'; + *sp++ = '='; + do { + sp += sprintf(sp, "%lx,", (unsigned long)(*gp++)); + } while (--ng); + sp[-1] = '\0'; + + free(ug); + return ug_str; +} + + +/* not static - don't inline me, compiler! */ +void readlink_self_exe(void); +void readlink_self_exe(void) +{ + char buf[PATH_MAX + 1]; + ssize_t sz = readlink("/proc/self/exe", buf, sizeof(buf) - 1); + if (sz < 0) + perror_and_die("readlink %s failed", "/proc/self/exe"); + buf[sz] = 0; + self_exe_points_to = xstrdup(buf); +} + + +static void special_op(const char *arg) NORETURN; +static void special_op(const char *arg) +{ + static const user_req_header ureq = { NSCD_VERSION, SHUTDOWN, 0 }; + + struct sockaddr_un addr; + int sock; + + sock = socket(PF_UNIX, SOCK_STREAM, 0); + if (sock < 0) + error_and_die("cannot create AF_UNIX socket"); + + addr.sun_family = AF_UNIX; + strcpy(addr.sun_path, NSCD_SOCKET); + if (connect(sock, (struct sockaddr *) &addr, sizeof(addr)) < 0) + error_and_die("cannot connect to %s", NSCD_SOCKET); + + if (!arg) { /* shutdown */ + xfull_write(sock, &ureq, sizeof(ureq)); + printf("sent shutdown request, exiting\n"); + } else { /* invalidate */ + size_t arg_len = strlen(arg) + 1; + struct { + user_req_header req; + char arg[arg_len]; + } reqdata; + reqdata.req.version = NSCD_VERSION; + reqdata.req.type = INVALIDATE; + reqdata.req.key_len = arg_len; + memcpy(reqdata.arg, arg, arg_len); + xfull_write(sock, &reqdata, arg_len + sizeof(ureq)); + printf("sent invalidate(%s) request, exiting\n", arg); + } + exit(0); +} + + +/* Callback for glibc-2.15 */ +struct traced_file; +static void do_nothing(size_t dbidx, struct traced_file *finfo) +{ + /* nscd from glibc-2.15 does something like this: + if (!dbs[dbidx].enabled || !dbs[dbidx].check_file) + return; + add_file_to_watch_list(finfo->fname); + */ +} + +/* This internal glibc function is called to disable trying to contact nscd. + * We _are_ nscd, so we need to do the lookups, and not recurse. + * Until 2.14, this function was taking no parameters. + * In 2.15, it takes a function pointer from hell. + */ +void __nss_disable_nscd(void (*hell)(size_t, struct traced_file*)); + + +int main(int argc, char **argv) +{ + int n; + unsigned opt_d_cnt; + const char *env_U; + const char *conffile; + + /* make sure we don't get recursive calls */ + __nss_disable_nscd(do_nothing); + + if (argv[0][0] == 'w') /* "worker_nscd" */ + worker(argv[1]); + + setlinebuf(stdout); + setlinebuf(stderr); + + /* Make sure stdio is not closed */ + n = xopen3("/dev/null", O_RDWR, 0); + while (n < 2) + n = dup(n); + /* Close unexpected open file descriptors */ + n |= 0xff; /* start from at least fd# 255 */ + do { + close(n--); + } while (n > 2); + + /* For idiotic kernels which disallow "exec /proc/self/exe" */ + readlink_self_exe(); + + conffile = default_conffile; + opt_d_cnt = 0; + while ((n = getopt_long(argc, argv, "df:i:KVgt:", longopt, NULL)) != -1) { + switch (n) { + case 'd': + opt_d_cnt++; + debug &= ~D_DAEMON; + break; + case 'f': + conffile = optarg; + break; + case 'i': + /* invalidate */ + special_op(optarg); /* exits */ + case 'K': + /* shutdown server */ + special_op(NULL); /* exits */ + case 'V': + puts("unscd - nscd which does not hang, v."PROGRAM_VERSION); + exit(0); + case 'g': + exit(0); + case 't': + /* N threads */ + max_reqnum = getnum(optarg); + break; + case 'S': + /* secure (?) */ + break; + default: + print_help_and_die(); + } + } + /* Multiple -d can bump debug regardless of nscd.conf: + * no -d or -d: 0, -dd: 1, + * -ddd: 3, -dddd: 7, -ddddd: 15 + */ + if (opt_d_cnt != 0) + debug |= (((1U << opt_d_cnt) >> 1) - 1) & L_ALL; + + env_U = getenv("U"); + /* Avoid duplicate warnings if $U exists */ + parse_conffile(conffile, /* warn? */ (env_U == NULL)); + + /* I have a user report of (broken?) ldap nss library + * opening and never closing a socket to a ldap server, + * even across fork() and exec(). This messes up + * worker child's operations for the reporter. + * + * This strenghtens my belief that nscd _must not_ trust + * nss libs to be written correctly. + * + * Here, we need to jump through the hoops to guard against + * such problems. If config file has server-user setting, we need + * to setgroups + setuid. For that, we need to get uid and gid vector. + * And that means possibly using buggy nss libs. + * We will do it here, but then we will re-exec, passing uid+gids + * in an environment variable. + */ + if (!env_U && config.user) { + /* user_to_env_U() does getpwnam and getgrouplist */ + if (putenv(user_to_env_U(config.user))) + error_and_die("out of memory"); + /* fds leaked by nss will be closed by execed copy */ + execv(self_exe_points_to, argv); + xexecve("/proc/self/exe", argv, environ); + } + + /* Allocate dynamically sized stuff */ + max_reqnum += 2; /* account for 2 first "fake" clients */ + if (max_reqnum < 8) max_reqnum = 8; /* sanitize */ + /* Since refcount is a byte, can't serve more than 255-2 clients + * at once. The rest will block in connect() */ + if (max_reqnum > 0xff) max_reqnum = 0xff; + client_buf = xzalloc(max_reqnum * sizeof(client_buf[0])); + busy_cbuf = xzalloc(max_reqnum * sizeof(busy_cbuf[0])); + pfd = xzalloc(max_reqnum * sizeof(pfd[0])); + cinfo = xzalloc(max_reqnum * sizeof(cinfo[0])); + + cache_size = (config.size[0] + config.size[1] + config.size[2]) / 8; + if (cache_size < 8) cache_size = 8; /* 8*8 = 64 entries min */ + if (cache_size > 0xffff) cache_size = 0xffff; /* 8*64k entries max */ + cache_size |= 1; /* force it to be odd */ + cache = xzalloc(cache_size * sizeof(cache[0])); + + /* Register cleanup hooks */ + signal(SIGINT, cleanup_on_signal); + signal(SIGTERM, cleanup_on_signal); + /* Don't die if a client closes a socket on us */ + signal(SIGPIPE, SIG_IGN); + /* Avoid creating zombies */ + signal(SIGCHLD, SIG_IGN); +#if !DEBUG_BUILD + /* Ensure workers don't have SIGALRM ignored */ + signal(SIGALRM, SIG_DFL); +#endif + + if (mkdir(NSCD_DIR, 0755) == 0) { + /* prevent bad mode of NSCD_DIR if umask is e.g. 077 */ + chmod(NSCD_DIR, 0755); + } + pfd[0].fd = open_socket(NSCD_SOCKET); + pfd[1].fd = open_socket(NSCD_SOCKET_OLD); + pfd[0].events = POLLIN; + pfd[1].events = POLLIN; + + if (debug & D_DAEMON) { + daemon(/*nochdir*/ 1, /*noclose*/ 0); + if (config.logfile) { + /* nochdir=1: relative paths still work as expected */ + xmovefd(xopen3(config.logfile, O_WRONLY|O_CREAT|O_TRUNC, 0666), 2); + debug |= D_STAMP; + } else { + debug = 0; /* why bother? it's /dev/null'ed anyway */ + } + chdir("/"); /* compat */ + write_pid(); + setsid(); + /* ignore job control signals */ + signal(SIGTTOU, SIG_IGN); + signal(SIGTTIN, SIG_IGN); + signal(SIGTSTP, SIG_IGN); + } + + log(L_ALL, "unscd v" PROGRAM_VERSION ", debug level 0x%x", debug & L_ALL); + log(L_DEBUG, "max %u requests in parallel", max_reqnum - 2); + log(L_DEBUG, "cache size %u x 8 entries", cache_size); + + if (env_U) { + int size; + gid_t *ug = env_U_to_uid_and_gids(env_U, &size); + if (size > 1) + if (setgroups(size - 1, &ug[1]) || setgid(ug[1])) + perror_and_die("cannot set groups for user '%s'", config.user); + if (size > 0) + if (setuid(ug[0])) + perror_and_die("cannot set uid to %u", (unsigned)(ug[0])); + free(ug); + } + + for (n = 0; n < 3; n++) { + log(L_DEBUG, "%s cache enabled:%u pttl:%u nttl:%u", + srv_name[n], + config.srv_enable[n], + config.pttl[n], + config.nttl[n]); + config.pttl[n] *= 1000; + config.nttl[n] *= 1000; + } + + main_loop(); + + return 0; +} -- 2.39.2