1 /* This file is part of unscd, a complete nscd replacement.
2 * Copyright (C) 2007-2012 Denys Vlasenko. Licensed under the GPL version 2.
5 /* unscd is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; version 2 of the License.
9 * unscd is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You can download the GNU General Public License from the GNU website
15 * at http://www.gnu.org/ or write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
21 gcc -Wall -Wunused-parameter -Os -o nscd nscd.c
23 gcc -fomit-frame-pointer -Wl,--sort-section -Wl,alignment -Wl,--sort-common
28 nscd problems are not exactly unheard of. Over the years, there were
29 quite a bit of bugs in it. This leads people to invent babysitters
30 which restart crashed/hung nscd. This is ugly.
32 After looking at nscd source in glibc I arrived to the conclusion
33 that its design is contributing to this significantly. Even if nscd's
34 code is 100.00% perfect and bug-free, it can still suffer from bugs
35 in libraries it calls.
37 As designed, it's a multithreaded program which calls NSS libraries.
38 These libraries are not part of libc, they may be provided
39 by third-party projects (samba, ldap, you name it).
41 Thus nscd cannot be sure that libraries it calls do not have memory
42 or file descriptor leaks and other bugs.
44 Since nscd is multithreaded program with single shared cache,
45 any resource leak in any NSS library has cumulative effect.
46 Even if a NSS library leaks a file descriptor 0.01% of the time,
47 this will make nscd crash or hang after some time.
49 Of course bugs in NSS .so modules should be fixed, but meanwhile
50 I do want nscd which does not crash or lock up.
52 So I went ahead and wrote a replacement.
54 It is a single-threaded server process which offloads all NSS
55 lookups to worker children (not threads, but fully independent
56 processes). Cache hits are handled by parent. Only cache misses
57 start worker children. This design is immune against
58 resource leaks and hangs in NSS libraries.
60 It is also many times smaller.
62 Currently (v0.36) it emulates glibc nscd pretty closely
63 (handles same command line flags and config file), and is moderately tested.
65 Please note that as of 2008-08 it is not in wide use (yet?).
66 If you have trouble compiling it, see an incompatibility with
67 "standard" one or experience hangs/crashes, please report it to
68 vda.linux@googlemail.com
70 ***********************************************************************/
72 /* Make struct ucred appear in sys/socket.h */
74 /* For all good things */
91 #include <sys/socket.h>
93 #include <sys/types.h>
99 /* For inet_ntoa (for debug build only) */
100 #include <arpa/inet.h>
103 * 0.21 add SEGV reporting to worker
104 * 0.22 don't do freeaddrinfo() in GETAI worker, it's crashy
105 * 0.23 add parameter parsing
106 * 0.24 add conf file parsing, not using results yet
107 * 0.25 used some of conf file settings (not tested)
108 * 0.26 almost all conf file settings are wired up
109 * 0.27 a bit more of almost all conf file settings are wired up
110 * 0.28 optimized cache aging
111 * 0.29 implemented invalidate and shutdown options
112 * 0.30 fixed buglet (sizeof(ptr) != sizeof(array))
113 * 0.31 reduced client_info by one member
114 * 0.32 fix nttl/size defaults; simpler check for worker child in main()
115 * 0.33 tweak includes so that it builds on my new machine (64-bit userspace);
116 * do not die on unknown service name, just warn
117 * ("services" is a new service we don't support)
118 * 0.34 create /var/run/nscd/nscd.pid pidfile like glibc nscd 2.8 does;
119 * delay setuid'ing itself to server-user after log and pidfile are open
120 * 0.35 readlink /proc/self/exe and use result if execing /proc/self/exe fails
121 * 0.36 excercise extreme paranoia handling server-user option;
122 * a little bit more verbose logging:
123 * L_DEBUG2 log level added, use debug-level 7 to get it
124 * 0.37 users reported over-zealous "detected change in /etc/passwd",
125 * apparently stat() returns random garbage in unused padding
126 * on some systems. Made the check less paranoid.
127 * 0.38 log POLLHUP better
128 * 0.39 log answers to client better, log getpwnam in the worker,
129 * pass debug level value down to worker.
130 * 0.40 fix handling of shutdown and invalidate requests;
131 * fix bug with answer written in several pieces
132 * 0.40.1 set hints.ai_socktype = SOCK_STREAM in GETAI request
133 * 0.41 eliminate double caching of two near-simultaneous identical requests -
135 * 0.42 execute /proc/self/exe by link name first (better comm field)
136 * 0.43 fix off-by-one error in setgroups
137 * 0.44 make -d[ddd] bump up debug - easier to explain to users
138 * how to produce detailed log (no nscd.conf tweaking)
139 * 0.45 Fix out-of-bounds array access and log/pid file permissions -
140 * thanks to Sebastian Krahmer (krahmer AT suse.de)
141 * 0.46 fix a case when we forgot to remove a future entry on worker failure
142 * 0.47 fix nscd without -d to not bump debug level
143 * 0.48 fix for changes in __nss_disable_nscd API in glibc-2.15
144 * 0.49 minor tweaks to messages
145 * 0.50 add more files to watch for changes
146 * 0.51 fix a case where we forget to refcount-- the cached entry
147 * 0.52 make free_refcounted_ureq() tolerant to pointers to NULLs
149 #define PROGRAM_VERSION "0.52"
151 #define DEBUG_BUILD 1
158 #define ARRAY_SIZE(x) ((unsigned)(sizeof(x) / sizeof((x)[0])))
160 #define NORETURN __attribute__ ((__noreturn__))
163 #ifdef MY_CPU_HATES_CHARS
164 typedef int smallint;
166 typedef signed char smallint;
172 L_DEBUG = ((1 << 1) * DEBUG_BUILD),
173 L_DEBUG2 = ((1 << 2) * DEBUG_BUILD),
174 L_DUMP = ((1 << 3) * DEBUG_BUILD),
180 static smallint debug = D_DAEMON;
182 static void verror(const char *s, va_list p, const char *strerr)
185 int sz, rem, strerr_len;
189 if (debug & D_STAMP) {
190 gettimeofday(&tv, NULL);
191 sz = sprintf(msgbuf, "%02u:%02u:%02u.%05u ",
192 (unsigned)((tv.tv_sec / (60*60)) % 24),
193 (unsigned)((tv.tv_sec / 60) % 60),
194 (unsigned)(tv.tv_sec % 60),
195 (unsigned)(tv.tv_usec / 10));
197 rem = sizeof(msgbuf) - sz;
198 sz += vsnprintf(msgbuf + sz, rem, s, p);
199 rem = sizeof(msgbuf) - sz; /* can be negative after this! */
202 strerr_len = strlen(strerr);
203 if (rem >= strerr_len + 4) { /* ": STRERR\n\0" */
206 strcpy(msgbuf + sz, strerr);
215 fputs(msgbuf, stderr);
218 static void error(const char *msg, ...)
222 verror(msg, p, NULL);
226 static void error_and_die(const char *msg, ...) NORETURN;
227 static void error_and_die(const char *msg, ...)
231 verror(msg, p, NULL);
236 static void perror_and_die(const char *msg, ...) NORETURN;
237 static void perror_and_die(const char *msg, ...)
241 /* Guard against "<error message>: Success" */
242 verror(msg, p, errno ? strerror(errno) : NULL);
247 static void nscd_log(int mask, const char *msg, ...)
252 verror(msg, p, NULL);
257 #define log(lvl, ...) do { if (lvl) nscd_log(lvl, __VA_ARGS__); } while (0)
260 static void dump(const void *ptr, int len)
263 const unsigned char *buf;
266 if (!(debug & L_DUMP))
271 int chunk = ((len >= 16) ? 16 : len);
273 "%02x %02x %02x %02x %02x %02x %02x %02x "
274 "%02x %02x %02x %02x %02x %02x %02x %02x " + (16-chunk) * 5,
275 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7],
276 buf[8], buf[9],buf[10],buf[11],buf[12],buf[13],buf[14],buf[15]
278 fprintf(stderr, "%*s", (16-chunk) * 3, "");
282 unsigned char c = *buf++;
283 *p++ = (c >= 32 && c < 127 ? c : '.');
291 void dump(const void *ptr, int len);
294 #define hex_dump(p,n) do { if (L_DUMP) dump(p,n); } while (0)
296 static int xopen3(const char *pathname, int flags, int mode)
298 int fd = open(pathname, flags, mode);
300 perror_and_die("open");
304 static void xpipe(int *fds)
307 perror_and_die("pipe");
310 static void xexecve(const char *filename, char **argv, char **envp) NORETURN;
311 static void xexecve(const char *filename, char **argv, char **envp)
313 execve(filename, argv, envp);
314 perror_and_die("cannot re-exec %s", filename);
317 static void ndelay_on(int fd)
319 int fl = fcntl(fd, F_GETFL);
321 perror_and_die("F_GETFL");
322 if (fcntl(fd, F_SETFL, fl | O_NONBLOCK) < 0)
323 perror_and_die("setting O_NONBLOCK");
326 static void close_on_exec(int fd)
328 if (fcntl(fd, F_SETFD, FD_CLOEXEC) < 0)
329 perror_and_die("setting FD_CLOEXEC");
332 static unsigned monotonic_ms(void)
335 if (syscall(__NR_clock_gettime, CLOCK_MONOTONIC, &ts))
336 perror_and_die("clock_gettime(MONOTONIC)");
337 return ts.tv_sec * 1000 + ts.tv_nsec / 1000000;
340 static unsigned strsize(const char *str)
342 return strlen(str) + 1;
345 static unsigned strsize_aligned4(const char *str)
347 return (strlen(str) + 1 + 3) & (~3);
350 static ssize_t safe_read(int fd, void *buf, size_t count)
354 n = read(fd, buf, count);
355 } while (n < 0 && errno == EINTR);
359 static ssize_t full_read(int fd, void *buf, size_t len)
365 cc = safe_read(fd, buf, len);
367 return cc; /* read() returns -1 on failure. */
370 buf = ((char *)buf) + cc;
378 static void xsafe_read(int fd, void *buf, size_t len)
380 if (len != safe_read(fd, buf, len))
381 perror_and_die("short read");
383 static void xfull_read(int fd, void *buf, size_t len)
385 if (len != full_read(fd, buf, len))
386 perror_and_die("short read");
390 static ssize_t safe_write(int fd, const void *buf, size_t count)
394 n = write(fd, buf, count);
395 } while (n < 0 && errno == EINTR);
399 static ssize_t full_write(int fd, const void *buf, size_t len)
406 cc = safe_write(fd, buf, len);
408 return cc; /* write() returns -1 on failure. */
410 buf = ((const char *)buf) + cc;
416 static void xsafe_write(int fd, const void *buf, size_t count)
418 if (count != safe_write(fd, buf, count))
419 perror_and_die("short write of %ld bytes", (long)count);
421 static void xfull_write(int fd, const void *buf, size_t count)
423 if (count != full_write(fd, buf, count))
424 perror_and_die("short write of %ld bytes", (long)count);
427 static void xmovefd(int from_fd, int to_fd)
429 if (from_fd != to_fd) {
430 if (dup2(from_fd, to_fd) < 0)
431 perror_and_die("dup2");
436 static unsigned getnum(const char *str)
438 if (str[0] >= '0' && str[0] <= '9') {
440 unsigned long l = strtoul(str, &p, 10);
441 /* must not overflow int even after x1000 */
442 if (!*p && l <= INT_MAX / 1000)
445 error_and_die("malformed or too big number '%s'", str);
448 static char *skip_whitespace(const char *s)
450 /* NB: isspace('\0') returns 0 */
451 while (isspace(*s)) ++s;
455 static char *skip_non_whitespace(const char *s)
457 while (*s && !isspace(*s)) ++s;
461 static void *xmalloc(unsigned sz)
463 void *p = malloc(sz);
465 error_and_die("out of memory");
469 static void *xzalloc(unsigned sz)
471 void *p = xmalloc(sz);
476 static void *xrealloc(void *p, unsigned size)
478 p = realloc(p, size);
480 error_and_die("out of memory");
484 static const char *xstrdup(const char *str)
486 const char *p = strdup(str);
488 error_and_die("out of memory");
503 static const char srv_name[3][7] = {
512 smallint srv_enable[3];
513 smallint check_files[3];
518 /* We try to closely mimic glibc nscd */
519 .logfile = NULL, /* default is to not have a log file */
521 .srv_enable = { 0, 0, 0 },
522 .check_files = { 1, 1, 1 },
523 .pttl = { 3600, 3600, 3600 },
524 .nttl = { 20, 60, 20 },
525 /* huh, what is the default cache size in glibc nscd? */
526 .size = { 256 * 8 / 3, 256 * 8 / 3, 256 * 8 / 3 },
529 static const char default_conffile[] = "/etc/nscd.conf";
530 static const char *self_exe_points_to = "/proc/self/exe";
534 ** Clients, workers machinery
537 /* Header common to all requests */
538 #define USER_REQ_STRUCT \
539 uint32_t version; /* Version number of the daemon interface */ \
540 uint32_t type; /* Service requested */ \
541 uint32_t key_len; /* Key length */
543 typedef struct user_req_header {
549 MAX_USER_REQ_SIZE = 1024,
550 USER_HDR_SIZE = sizeof(user_req_header),
551 /* DNS queries time out after 20 seconds,
552 * we will allow for a bit more */
553 WORKER_TIMEOUT_SEC = 30,
554 CLIENT_TIMEOUT_MS = 100,
555 SMALL_POLL_TIMEOUT_MS = 200,
558 typedef struct user_req {
560 struct { /* as came from client */
563 struct { /* when stored in cache, overlaps .version */
565 /* (timestamp24 * 256) == timestamp in ms */
566 unsigned timestamp24:24;
569 char reqbuf[MAX_USER_REQ_SIZE - USER_HDR_SIZE];
572 /* Compile-time check for correct size */
573 struct BUG_wrong_user_req_size {
574 char BUG_wrong_user_req_size[sizeof(user_req) == MAX_USER_REQ_SIZE ? 1 : -1];
586 SHUTDOWN, /* Shut the server down */
587 GETSTAT, /* Get the server statistic */
588 INVALIDATE, /* Invalidate one special cache */
600 static const char *const typestr[] = {
601 "GETPWBYNAME", /* done */
602 "GETPWBYUID", /* done */
603 "GETGRBYNAME", /* done */
604 "GETGRBYGID", /* done */
605 "GETHOSTBYNAME", /* done */
606 "GETHOSTBYNAMEv6", /* done */
607 "GETHOSTBYADDR", /* done */
608 "GETHOSTBYADDRv6", /* done */
609 "SHUTDOWN", /* done */
610 "GETSTAT", /* info? */
611 "INVALIDATE", /* done */
612 /* won't do: nscd passes a name of shmem segment
613 * which client can map and "see" the db */
615 "GETFDGR", /* won't do */
616 "GETFDHST", /* won't do */
618 "INITGROUPS", /* done */
619 "GETSERVBYNAME", /* prio 3 (no caching?) */
620 "GETSERVBYPORT", /* prio 3 (no caching?) */
621 "GETFDSERV" /* won't do */
624 extern const char *const typestr[];
626 static const smallint type_to_srv[] = {
627 [GETPWBYNAME ] = SRV_PASSWD,
628 [GETPWBYUID ] = SRV_PASSWD,
629 [GETGRBYNAME ] = SRV_GROUP,
630 [GETGRBYGID ] = SRV_GROUP,
631 [GETHOSTBYNAME ] = SRV_HOSTS,
632 [GETHOSTBYNAMEv6 ] = SRV_HOSTS,
633 [GETHOSTBYADDR ] = SRV_HOSTS,
634 [GETHOSTBYADDRv6 ] = SRV_HOSTS,
635 [GETAI ] = SRV_HOSTS,
636 [INITGROUPS ] = SRV_GROUP,
639 static int unsupported_ureq_type(unsigned type)
641 if (type == GETAI) return 0;
642 if (type == INITGROUPS) return 0;
643 if (type == GETSTAT) return 1;
644 if (type > INVALIDATE) return 1;
649 typedef struct client_info {
650 /* if client_fd != 0, we are waiting for the reply from worker
651 * on pfd[i].fd, and client_fd is saved client's fd
652 * (we need to put it back into pfd[i].fd later) */
654 unsigned bytecnt; /* bytes read from client */
655 unsigned bufidx; /* buffer# in global client_buf[] */
657 unsigned respos; /* response */
658 user_req *resptr; /* response */
659 user_req **cache_pp; /* cache entry address */
660 user_req *ureq; /* request (points to client_buf[x]) */
663 static unsigned g_now_ms;
664 static int min_closed = INT_MAX;
665 static int cnt_closed = 0;
666 static int num_clients = 2; /* two listening sockets are "clients" too */
668 /* We read up to max_reqnum requests in parallel */
669 static unsigned max_reqnum = 14;
671 /* To be allocated at init to become client_buf[max_reqnum][MAX_USER_REQ_SIZE].
672 * Note: it is a pointer to [MAX_USER_REQ_SIZE] arrays,
673 * not [MAX_USER_REQ_SIZE] array of pointers.
675 static char (*client_buf)[MAX_USER_REQ_SIZE];
676 static char *busy_cbuf;
677 static struct pollfd *pfd;
678 static client_info *cinfo;
680 /* Request, response and cache data structures:
682 * cache[] (defined later):
683 * cacheline_t cache[cache_size] array, or in other words,
684 * user_req* cache[cache_size][8] array.
685 * Every client request is hashed, hash value determines which cache[x]
686 * will have the response stored in one of its 8 elements.
687 * Cache entries have this format: request, then padding to 32 bits,
689 * Addresses in cache[x][y] may be NULL or:
690 * (&client_buf[z]) & 1: the cache miss is in progress ("future entry"):
691 * "the data is not in the cache (yet), wait for it to appear"
692 * (&client_buf[z]) & 3: the cache miss is in progress and other clients
693 * also want the same data ("shared future entry")
694 * else (non-NULL but low two bits are 0): cached data in malloc'ed block
696 * Each of these is a [max_reqnum] sized array:
697 * pfd[i] - given to poll() to wait for requests and replies.
698 * .fd: first two pfd[i]: listening Unix domain sockets, else
699 * .fd: open fd to a client, for reading client's request, or
700 * .fd: open fd to a worker, to send request and get response back
701 * cinfo[i] - auxiliary client data for pfd[i]
702 * .client_fd: open fd to a client, in case we already had read its
703 * request and got a cache miss, and created a worker or
704 * wait for another client's worker.
705 * Otherwise, it's 0 and client's fd is in pfd[i].fd
706 * .bufidx: index in client_buf[] we store client's request in
707 * .ureq: = client_buf[bufidx]
708 * .bytecnt: size of the request
709 * .started_ms: used to time out unresponsive clients
710 * .resptr: initially NULL. Later, same as cache[x][y] pointer to a cached
711 * response, or (a rare case) a "fake cache" entry:
712 * all cache[hash(request)][0..7] blocks were found busy,
713 * the result won't be cached.
714 * .respos: "write-out to client" offset
715 * .cache_pp: initially NULL. Later, &cache[x][y] where the response is,
716 * or will be stored. Remains NULL if "fake cache" entry is in use
718 * When a client has received its reply (or otherwise closed (timeout etc)),
719 * corresponding pfd[i] and cinfo[i] are removed by shifting [i+1], [i+2] etc
720 * elements down, so that both arrays never have free holes.
721 * [num_clients] is always the first free element.
723 * Each of these also is a [max_reqnum] sized array, but indexes
724 * do not correspond directly to pfd[i] and cinfo[i]:
725 * client_buf[n][MAX_USER_REQ_SIZE] - buffers we read client requests into
726 * busy_cbuf[n] - bool flags marking busy client_buf[]
728 /* Possible reductions:
729 * fd, bufidx - uint8_t
730 * started_ms -> uint16_t started_s
731 * ureq - eliminate (derivable from bufidx?)
734 /* Are special bits 0? is it a true cached entry? */
735 #define CACHED_ENTRY(p) ( ((long)(p) & 3) == 0 )
736 /* Are special bits 11? is it a shared future cache entry? */
737 #define CACHE_SHARED(p) ( ((long)(p) & 3) == 3 )
738 /* Return a ptr with special bits cleared (used for accessing data) */
739 #define CACHE_PTR(p) ( (void*) ((long)(p) & ~(long)3) )
740 /* Return a ptr with special bits set to x1: make future cache entry ptr */
741 #define MAKE_FUTURE_PTR(p) ( (void*) ((long)(p) | 1) )
742 /* Modify ptr, set special bits to 11: shared future cache entry */
743 #define MARK_PTR_SHARED(pp) ( *(long*)(pp) |= 3 )
745 static inline unsigned ureq_size(const user_req *ureq)
747 return sizeof(user_req_header) + ureq->key_len;
750 static unsigned cache_age(const user_req *ureq)
752 if (!CACHED_ENTRY(ureq))
754 return (uint32_t) (g_now_ms - (ureq->timestamp24 << 8));
757 static void set_cache_timestamp(user_req *ureq)
759 ureq->timestamp24 = g_now_ms >> 8;
762 static int alloc_buf_no(void)
767 next_buf = (next_buf + 1) % max_reqnum;
768 if (!busy_cbuf[cur]) {
772 } while (next_buf != n);
773 error_and_die("no free bufs?!");
776 static inline void *bufno2buf(int i)
778 return client_buf[i];
781 static void free_refcounted_ureq(user_req **ureqp);
783 static void close_client(unsigned i)
785 log(L_DEBUG, "closing client %u (fd %u,%u)", i, pfd[i].fd, cinfo[i].client_fd);
786 /* Paranoia. We had nasty bugs where client was closed twice. */
791 if (cinfo[i].client_fd && cinfo[i].client_fd != pfd[i].fd)
792 close(cinfo[i].client_fd);
793 pfd[i].fd = 0; /* flag as unused (coalescing needs this) */
794 busy_cbuf[cinfo[i].bufidx] = 0;
796 if (cinfo[i].cache_pp == NULL) {
797 user_req *resptr = cinfo[i].resptr;
799 log(L_DEBUG, "client %u: freeing fake cache entry %p", i, resptr);
803 /* Most of the time, it is not freed here,
804 * only refcounted--. Freeing happens
805 * if it was deleted from cache[] but retained
808 free_refcounted_ureq(&cinfo[i].resptr);
818 ** nscd API <-> C API conversion
821 typedef struct response_header {
822 uint32_t version_or_size;
827 typedef struct initgr_response_header {
828 uint32_t version_or_size;
831 /* code assumes gid_t == int32, let's check that */
832 int32_t gid[sizeof(gid_t) == sizeof(int32_t) ? 0 : -1];
833 /* char user_str[as_needed]; */
834 } initgr_response_header;
836 static initgr_response_header *obtain_initgroups(const char *username)
838 struct initgr_response_header *resp;
840 enum { MAGIC_OFFSET = sizeof(*resp) / sizeof(int32_t) };
844 pw = getpwnam(username);
847 resp->version_or_size = sizeof(*resp);
853 /* getgrouplist may be very expensive, it's much better to allocate
854 * a bit more than to run getgrouplist twice */
858 sz = sizeof(*resp) + sizeof(resp->gid[0]) * ngroups;
859 resp = xrealloc(resp, sz);
860 } while (getgrouplist(username, pw->pw_gid, (gid_t*) &resp->gid, &ngroups) == -1);
861 log(L_DEBUG, "ngroups=%d", ngroups);
863 sz = sizeof(*resp) + sizeof(resp->gid[0]) * ngroups;
864 /* resp = xrealloc(resp, sz); - why bother */
865 resp->version_or_size = sz;
867 resp->ngrps = ngroups;
872 typedef struct pw_response_header {
873 uint32_t version_or_size;
876 int32_t pw_passwd_len;
879 int32_t pw_gecos_len;
881 int32_t pw_shell_len;
882 /* char pw_name[pw_name_len]; */
883 /* char pw_passwd[pw_passwd_len]; */
884 /* char pw_gecos[pw_gecos_len]; */
885 /* char pw_dir[pw_dir_len]; */
886 /* char pw_shell[pw_shell_len]; */
887 } pw_response_header;
889 static pw_response_header *marshal_passwd(struct passwd *pw)
892 pw_response_header *resp;
893 unsigned pw_name_len;
894 unsigned pw_passwd_len;
895 unsigned pw_gecos_len;
897 unsigned pw_shell_len;
898 unsigned sz = sizeof(*resp);
900 sz += (pw_name_len = strsize(pw->pw_name));
901 sz += (pw_passwd_len = strsize(pw->pw_passwd));
902 sz += (pw_gecos_len = strsize(pw->pw_gecos));
903 sz += (pw_dir_len = strsize(pw->pw_dir));
904 sz += (pw_shell_len = strsize(pw->pw_shell));
907 resp->version_or_size = sz;
913 resp->pw_name_len = pw_name_len;
914 resp->pw_passwd_len = pw_passwd_len;
915 resp->pw_uid = pw->pw_uid;
916 resp->pw_gid = pw->pw_gid;
917 resp->pw_gecos_len = pw_gecos_len;
918 resp->pw_dir_len = pw_dir_len;
919 resp->pw_shell_len = pw_shell_len;
920 p = (char*)(resp + 1);
921 strcpy(p, pw->pw_name); p += pw_name_len;
922 strcpy(p, pw->pw_passwd); p += pw_passwd_len;
923 strcpy(p, pw->pw_gecos); p += pw_gecos_len;
924 strcpy(p, pw->pw_dir); p += pw_dir_len;
925 strcpy(p, pw->pw_shell); p += pw_shell_len;
926 log(L_DEBUG, "sz:%u realsz:%u", sz, p - (char*)resp);
931 typedef struct gr_response_header {
932 uint32_t version_or_size;
934 int32_t gr_name_len; /* strlen(gr->gr_name) + 1; */
935 int32_t gr_passwd_len; /* strlen(gr->gr_passwd) + 1; */
936 int32_t gr_gid; /* gr->gr_gid */
937 int32_t gr_mem_cnt; /* while (gr->gr_mem[gr_mem_cnt]) ++gr_mem_cnt; */
938 /* int32_t gr_mem_len[gr_mem_cnt]; */
939 /* char gr_name[gr_name_len]; */
940 /* char gr_passwd[gr_passwd_len]; */
941 /* char gr_mem[gr_mem_cnt][gr_mem_len[i]]; */
942 /* char gr_gid_str[as_needed]; - huh? */
943 /* char orig_key[as_needed]; - needed?? I don't do this ATM... */
945 glibc adds gr_gid_str, but client doesn't get/use it:
946 writev(3, [{"\2\0\0\0\2\0\0\0\5\0\0\0", 12}, {"root\0", 5}], 2) = 17
947 poll([{fd=3, events=POLLIN|POLLERR|POLLHUP, revents=POLLIN}], 1, 5000) = 1
948 read(3, "\2\0\0\0\1\0\0\0\10\0\0\0\4\0\0\0\0\0\0\0\0\0\0\0", 24) = 24
949 readv(3, [{"", 0}, {"root\0\0\0\0\0\0\0\0", 12}], 2) = 12
952 } gr_response_header;
954 static gr_response_header *marshal_group(struct group *gr)
957 gr_response_header *resp;
959 unsigned sz = sizeof(*resp);
961 sz += strsize(gr->gr_name);
962 sz += strsize(gr->gr_passwd);
964 while (gr->gr_mem[gr_mem_cnt]) {
965 sz += strsize(gr->gr_mem[gr_mem_cnt]);
968 /* for int32_t gr_mem_len[gr_mem_cnt]; */
969 sz += gr_mem_cnt * sizeof(int32_t);
972 resp->version_or_size = sz;
978 resp->gr_name_len = strsize(gr->gr_name);
979 resp->gr_passwd_len = strsize(gr->gr_passwd);
980 resp->gr_gid = gr->gr_gid;
981 resp->gr_mem_cnt = gr_mem_cnt;
982 p = (char*)(resp + 1);
983 /* int32_t gr_mem_len[gr_mem_cnt]; */
985 while (gr->gr_mem[gr_mem_cnt]) {
986 *(uint32_t*)p = strsize(gr->gr_mem[gr_mem_cnt]);
990 /* char gr_name[gr_name_len]; */
991 strcpy(p, gr->gr_name);
992 p += strsize(gr->gr_name);
993 /* char gr_passwd[gr_passwd_len]; */
994 strcpy(p, gr->gr_passwd);
995 p += strsize(gr->gr_passwd);
996 /* char gr_mem[gr_mem_cnt][gr_mem_len[i]]; */
998 while (gr->gr_mem[gr_mem_cnt]) {
999 strcpy(p, gr->gr_mem[gr_mem_cnt]);
1000 p += strsize(gr->gr_mem[gr_mem_cnt]);
1003 log(L_DEBUG, "sz:%u realsz:%u", sz, p - (char*)resp);
1008 typedef struct hst_response_header {
1009 uint32_t version_or_size;
1012 int32_t h_aliases_cnt;
1013 int32_t h_addrtype; /* AF_INET or AF_INET6 */
1014 int32_t h_length; /* 4 or 16 */
1015 int32_t h_addr_list_cnt;
1017 /* char h_name[h_name_len]; - we pad it to 4 bytes */
1018 /* uint32_t h_aliases_len[h_aliases_cnt]; */
1019 /* char h_addr_list[h_addr_list_cnt][h_length]; - every one is the same size [h_length] (4 or 16) */
1020 /* char h_aliases[h_aliases_cnt][h_aliases_len[i]]; */
1021 } hst_response_header;
1023 static hst_response_header *marshal_hostent(struct hostent *h)
1026 hst_response_header *resp;
1027 unsigned h_name_len;
1028 unsigned h_aliases_cnt;
1029 unsigned h_addr_list_cnt;
1030 unsigned sz = sizeof(*resp);
1032 /* char h_name[h_name_len] */
1033 sz += h_name_len = strsize_aligned4(h->h_name);
1034 h_addr_list_cnt = 0;
1035 while (h->h_addr_list[h_addr_list_cnt]) {
1038 /* char h_addr_list[h_addr_list_cnt][h_length] */
1039 sz += h_addr_list_cnt * h->h_length;
1041 while (h->h_aliases[h_aliases_cnt]) {
1042 /* char h_aliases[h_aliases_cnt][h_aliases_len[i]] */
1043 sz += strsize(h->h_aliases[h_aliases_cnt]);
1046 /* uint32_t h_aliases_len[h_aliases_cnt] */
1047 sz += h_aliases_cnt * 4;
1050 resp->version_or_size = sz;
1052 /*resp->found = 0;*/
1053 resp->error = HOST_NOT_FOUND;
1057 resp->h_name_len = h_name_len;
1058 resp->h_aliases_cnt = h_aliases_cnt;
1059 resp->h_addrtype = h->h_addrtype;
1060 resp->h_length = h->h_length;
1061 resp->h_addr_list_cnt = h_addr_list_cnt;
1062 /*resp->error = 0;*/
1063 p = (char*)(resp + 1);
1064 /* char h_name[h_name_len]; */
1065 strcpy(p, h->h_name);
1067 /* uint32_t h_aliases_len[h_aliases_cnt]; */
1069 while (h->h_aliases[h_aliases_cnt]) {
1070 *(uint32_t*)p = strsize(h->h_aliases[h_aliases_cnt]);
1074 /* char h_addr_list[h_addr_list_cnt][h_length]; */
1075 h_addr_list_cnt = 0;
1076 while (h->h_addr_list[h_addr_list_cnt]) {
1077 memcpy(p, h->h_addr_list[h_addr_list_cnt], h->h_length);
1081 /* char h_aliases[h_aliases_cnt][h_aliases_len[i]]; */
1083 while (h->h_aliases[h_aliases_cnt]) {
1084 strcpy(p, h->h_aliases[h_aliases_cnt]);
1085 p += strsize(h->h_aliases[h_aliases_cnt]);
1088 log(L_DEBUG, "sz:%u realsz:%u", sz, p - (char*)resp);
1093 /* Reply to addrinfo query */
1094 typedef struct ai_response_header {
1095 uint32_t version_or_size;
1101 /* char ai_addr[naddrs][4 or 16]; - addrslen bytes in total */
1102 /* char ai_family[naddrs]; - AF_INET[6] each (determines ai_addr[i] length) */
1103 /* char ai_canonname[canonlen]; */
1104 } ai_response_header;
1106 static ai_response_header *obtain_addrinfo(const char *hostname)
1108 struct addrinfo hints;
1109 struct addrinfo *ai;
1110 struct addrinfo *ap;
1111 ai_response_header *resp;
1115 unsigned naddrs = 0;
1116 unsigned addrslen = 0;
1117 unsigned canonlen = 0;
1119 memset(&hints, 0, sizeof(hints));
1120 hints.ai_flags = AI_CANONNAME;
1121 /* kills dups (one for each possible SOCK_xxx) */
1122 /* this matches glibc behavior */
1123 hints.ai_socktype = SOCK_STREAM;
1124 ai = NULL; /* on failure getaddrinfo may leave it as-is */
1125 err = getaddrinfo(hostname, NULL, &hints, &ai);
1129 if (ai->ai_canonname)
1130 sz += canonlen = strsize(ai->ai_canonname);
1134 addrslen += (ap->ai_family == AF_INET ? 4 : 16);
1137 sz += naddrs + addrslen;
1140 resp->version_or_size = sz;
1143 /*resp->found = 0;*/
1147 resp->naddrs = naddrs;
1148 resp->addrslen = addrslen;
1149 resp->canonlen = canonlen;
1150 p = (char*)(resp + 1);
1151 family = p + addrslen;
1154 /* char ai_family[naddrs]; */
1155 *family++ = ap->ai_family;
1156 /* char ai_addr[naddrs][4 or 16]; */
1157 if (ap->ai_family == AF_INET) {
1158 memcpy(p, &(((struct sockaddr_in*)(ap->ai_addr))->sin_addr), 4);
1161 memcpy(p, &(((struct sockaddr_in6*)(ap->ai_addr))->sin6_addr), 16);
1166 /* char ai_canonname[canonlen]; */
1167 if (ai->ai_canonname)
1168 strcpy(family, ai->ai_canonname);
1169 log(L_DEBUG, "sz:%u realsz:%u", sz, family + strsize(ai->ai_canonname) - (char*)resp);
1171 /* glibc 2.3.6 segfaults here sometimes
1172 * (maybe my mistake, fixed by "ai = NULL;" above).
1173 * Since we are in worker and are going to exit anyway, why bother? */
1174 /*freeaddrinfo(ai);*/
1183 /* one 8-element "cacheline" */
1184 typedef user_req *cacheline_t[8];
1185 static unsigned cache_size;
1186 /* Points to cacheline_t cache[cache_size] array, or in other words,
1187 * points to user_req* cache[cache_size][8] array */
1188 static cacheline_t *cache;
1189 static unsigned cached_cnt;
1190 static unsigned cache_access_cnt = 1; /* prevent division by zero */
1191 static unsigned cache_hit_cnt = 1;
1192 static unsigned last_age_time;
1193 static unsigned aging_interval_ms;
1194 static unsigned min_aging_interval_ms;
1196 static response_header *ureq_response(user_req *ureq)
1198 /* Skip query part, find answer part
1199 * (answer is 32-bit aligned) */
1200 return (void*) ((char*)ureq + ((ureq_size(ureq) + 3) & ~3));
1203 /* This hash is supposed to be good for short textual data */
1204 static uint32_t bernstein_hash(void *p, unsigned sz, uint32_t hash)
1208 hash = (32 * hash + hash) ^ *key++;
1213 static void free_refcounted_ureq(user_req **ureqp)
1215 user_req *ureq = *ureqp;
1217 /* (when exactly can this happen?) */
1221 if (!CACHED_ENTRY(ureq))
1224 if (ureq->refcount) {
1226 log(L_DEBUG2, "--%p.refcount=%u", ureq, ureq->refcount);
1228 log(L_DEBUG2, "%p.refcount=0, freeing", ureq);
1234 static user_req **lookup_in_cache(user_req *ureq)
1236 user_req **cacheline;
1240 unsigned ureq_sz = ureq_size(ureq);
1242 /* prevent overflow and division by zero */
1244 if ((int)cache_access_cnt < 0) {
1245 cache_access_cnt = (cache_access_cnt >> 1) + 1;
1246 cache_hit_cnt = (cache_hit_cnt >> 1) + 1;
1249 hash = bernstein_hash(&ureq->key_len, ureq_sz - offsetof(user_req, key_len), ureq->type);
1250 log(L_DEBUG2, "hash:%08x", hash);
1251 hash = hash % cache_size;
1252 cacheline = cache[hash];
1255 for (i = 0; i < 8; i++) {
1256 user_req *cached = CACHE_PTR(cacheline[i]);
1258 if (free_cache == -1)
1262 /* ureq->version is always 2 and is reused in cache
1263 * for other purposes, we need to skip it here */
1264 if (memcmp(&ureq->type, &cached->type, ureq_sz - offsetof(user_req, type)) == 0) {
1265 log(L_DEBUG, "found in cache[%u][%u]", hash, i);
1267 return &cacheline[i];
1271 if (free_cache >= 0) {
1274 log(L_DEBUG, "not found, using free cache[%u][%u]", hash, i);
1278 unsigned oldest_idx = 0;
1279 unsigned oldest_age = 0;
1280 for (i = 0; i < 8; i++) {
1281 unsigned age = cache_age(cacheline[i]);
1282 if (age > oldest_age) {
1287 if (oldest_age == 0) {
1288 /* All entries in cacheline are "future" entries!
1289 * This is very unlikely, but we must still work correctly.
1290 * We call this "fake cache entry".
1291 * The data will be "cached" only for the duration
1292 * of this client's request lifetime.
1294 log(L_DEBUG, "not found, and cache[%u] is full: using fake cache entry", hash);
1298 log(L_DEBUG, "not found, freeing and reusing cache[%u][%u] (age %u)", hash, i, oldest_age);
1299 free_refcounted_ureq(&cacheline[i]);
1302 cacheline[i] = MAKE_FUTURE_PTR(ureq);
1303 return &cacheline[i];
1306 static void age_cache(unsigned free_all, int srv)
1308 user_req **cp = *cache;
1310 unsigned sv = cached_cnt;
1312 log(L_DEBUG, "aging cache, srv:%d, free_all:%u", srv, free_all);
1313 if (srv == -1 || free_all)
1314 aging_interval_ms = INT_MAX;
1317 user_req *cached = *cp;
1318 if (CACHED_ENTRY(cached) && cached != NULL) {
1319 int csrv = type_to_srv[cached->type];
1320 if (srv == -1 || srv == csrv) {
1323 free_refcounted_ureq(cp);
1325 unsigned age = cache_age(cached);
1326 response_header *resp = ureq_response(cached);
1327 unsigned ttl = (resp->found ? config.pttl : config.nttl)[csrv];
1329 log(L_DEBUG2, "freeing: age %u positive %d ttl %u", age, resp->found, ttl);
1331 free_refcounted_ureq(cp);
1332 } else if (srv == -1) {
1334 if (aging_interval_ms > ttl)
1335 aging_interval_ms = ttl;
1342 log(L_INFO, "aged cache, freed:%u, remain:%u", sv - cached_cnt, cached_cnt);
1343 log(L_DEBUG2, "aging interval now %u ms", aging_interval_ms);
1351 /* Spawns a worker and feeds it with user query on stdin */
1352 /* Returns stdout fd of the worker, in blocking mode */
1353 static int create_and_feed_worker(user_req *ureq)
1359 } to_child, to_parent;
1361 /* NB: these pipe fds are in blocking mode and non-CLOEXECed */
1362 xpipe(&to_child.rd);
1363 xpipe(&to_parent.rd);
1366 if (pid < 0) /* error */
1367 perror_and_die("vfork");
1368 if (!pid) { /* child */
1369 char param[sizeof(int)*3 + 2];
1373 close(to_parent.rd);
1374 xmovefd(to_child.rd, 0);
1375 xmovefd(to_parent.wr, 1);
1376 sprintf(param, "%u", debug);
1377 argv[0] = (char*) "worker_nscd";
1380 /* Re-exec ourself, cleaning up all allocated memory.
1381 * fds in parent are marked CLOEXEC and will be closed too
1383 /* Try link name first: it's better to have comm field
1384 * of "nscd" than "exe" (pgrep reported to fail to find us
1385 * by name when comm field contains "exe") */
1386 execve(self_exe_points_to, argv, argv+2);
1387 xexecve("/proc/self/exe", argv, argv+2);
1392 close(to_parent.wr);
1393 /* We do not expect child to block for any noticeably long time,
1394 * and also we expect write to be one-piece one:
1395 * ureq size is <= 1k and pipes are guaranteed to accept
1396 * at least PIPE_BUF at once */
1397 xsafe_write(to_child.wr, ureq, ureq_size(ureq));
1400 close_on_exec(to_parent.rd);
1401 return to_parent.rd;
1404 static user_req *worker_ureq;
1407 static const char *req_str(unsigned type, const char *buf)
1409 if (type == GETHOSTBYADDR) {
1411 in.s_addr = *((uint32_t*)buf);
1412 return inet_ntoa(in);
1414 if (type == GETHOSTBYADDRv6) {
1420 const char *req_str(unsigned type, const char *buf);
1423 static void worker_signal_handler(int sig)
1426 log(L_INFO, "worker:%d got sig:%d while handling req "
1427 "type:%d(%s) key_len:%d '%s'",
1429 worker_ureq->type, typestr[worker_ureq->type],
1430 worker_ureq->key_len,
1431 req_str(worker_ureq->type, worker_ureq->reqbuf)
1434 log(L_INFO, "worker:%d got sig:%d while handling req "
1435 "type:%d key_len:%d",
1437 worker_ureq->type, worker_ureq->key_len);
1442 static void worker(const char *param) NORETURN;
1443 static void worker(const char *param)
1448 debug = atoi(param);
1450 worker_ureq = &ureq; /* for signal handler */
1452 /* Make sure we won't hang, but rather die */
1453 if (WORKER_TIMEOUT_SEC)
1454 alarm(WORKER_TIMEOUT_SEC);
1456 /* NB: fds 0, 1 are in blocking mode */
1458 /* We block here (for a short time) */
1459 /* Due to ureq size < PIPE_BUF read is atomic */
1460 /* No error or size checking: we trust the parent */
1461 safe_read(0, &ureq, sizeof(ureq));
1463 signal(SIGSEGV, worker_signal_handler);
1464 signal(SIGBUS, worker_signal_handler);
1465 signal(SIGILL, worker_signal_handler);
1466 signal(SIGFPE, worker_signal_handler);
1467 signal(SIGABRT, worker_signal_handler);
1469 signal(SIGSTKFLT, worker_signal_handler);
1472 if (ureq.type == GETHOSTBYNAME
1473 || ureq.type == GETHOSTBYNAMEv6
1475 resp = marshal_hostent(
1476 ureq.type == GETHOSTBYNAME
1477 ? gethostbyname(ureq.reqbuf)
1478 : gethostbyname2(ureq.reqbuf, AF_INET6)
1480 } else if (ureq.type == GETHOSTBYADDR
1481 || ureq.type == GETHOSTBYADDRv6
1483 resp = marshal_hostent(gethostbyaddr(ureq.reqbuf, ureq.key_len,
1484 (ureq.type == GETHOSTBYADDR ? AF_INET : AF_INET6)
1486 } else if (ureq.type == GETPWBYNAME) {
1488 log(L_DEBUG2, "getpwnam('%s')", ureq.reqbuf);
1489 pw = getpwnam(ureq.reqbuf);
1490 log(L_DEBUG2, "getpwnam result:%p", pw);
1491 resp = marshal_passwd(pw);
1492 } else if (ureq.type == GETPWBYUID) {
1493 resp = marshal_passwd(getpwuid(atoi(ureq.reqbuf)));
1494 } else if (ureq.type == GETGRBYNAME) {
1495 struct group *gr = getgrnam(ureq.reqbuf);
1496 resp = marshal_group(gr);
1497 } else if (ureq.type == GETGRBYGID) {
1498 struct group *gr = getgrgid(atoi(ureq.reqbuf));
1499 resp = marshal_group(gr);
1500 } else if (ureq.type == GETAI) {
1501 resp = obtain_addrinfo(ureq.reqbuf);
1502 } else /*if (ureq.type == INITGROUPS)*/ {
1503 resp = obtain_initgroups(ureq.reqbuf);
1506 if (!((response_header*)resp)->found) {
1507 /* Parent knows about this special case */
1508 xfull_write(1, resp, 8);
1510 /* Responses can be big (getgrnam("guest") on a big user db),
1511 * we cannot rely on them being atomic. full_write loops
1513 xfull_write(1, resp, ((response_header*)resp)->version_or_size);
1523 static const char *const checked_filenames[] = {
1524 /* Note: compiler adds another \0 byte at the end of each array element,
1525 * so there are TWO \0's there.
1527 [SRV_PASSWD] = "/etc/passwd\0" "/etc/passwd.cache\0" "/etc/shadow\0",
1528 [SRV_GROUP] = "/etc/group\0" "/etc/group.cache\0",
1529 [SRV_HOSTS] = "/etc/hosts\0" "/etc/hosts.cache\0" "/etc/resolv.conf\0" "/etc/nsswitch.conf\0",
1530 /* ("foo.cache" files are maintained by libnss-cache) */
1533 static long checked_status[ARRAY_SIZE(checked_filenames)];
1535 static void check_files(int srv)
1538 const char *file = checked_filenames[srv];
1543 memset(&tsb, 0, sizeof(tsb));
1544 stat(file, &tsb); /* ignore errors */
1545 /* Comparing struct stat's was giving false positives.
1546 * Extracting only those fields which are interesting:
1548 v ^= (long)tsb.st_mtime ^ (long)tsb.st_size ^ (long)tsb.st_ino; /* ^ (long)tsb.st_dev ? */
1549 file += strlen(file) + 1;
1552 if (v != checked_status[srv]) {
1553 checked_status[srv] = v;
1554 log(L_INFO, "detected change in files related to service %d", srv);
1555 age_cache(/*free_all:*/ 1, srv);
1559 /* Returns 1 if we immediately have the answer */
1560 static int handle_client(int i)
1563 user_req *ureq = cinfo[i].ureq;
1564 user_req **cache_pp;
1565 user_req *ureq_and_resp;
1568 log(L_DEBUG, "version:%d type:%d(%s) key_len:%d '%s'",
1569 ureq->version, ureq->type,
1570 ureq->type < ARRAY_SIZE(typestr) ? typestr[ureq->type] : "?",
1571 ureq->key_len, req_str(ureq->type, ureq->reqbuf));
1574 if (ureq->version != NSCD_VERSION) {
1575 log(L_INFO, "wrong version");
1579 if (ureq->key_len > sizeof(ureq->reqbuf)) {
1580 log(L_INFO, "bogus key_len %u - ignoring", ureq->key_len);
1584 if (cinfo[i].bytecnt < USER_HDR_SIZE + ureq->key_len) {
1585 log(L_INFO, "read %d, need to read %d",
1586 cinfo[i].bytecnt, USER_HDR_SIZE + ureq->key_len);
1587 return 0; /* more to read */
1589 if (cinfo[i].bytecnt > USER_HDR_SIZE + ureq->key_len) {
1590 log(L_INFO, "read overflow: %u > %u",
1591 (int)cinfo[i].bytecnt, (int)(USER_HDR_SIZE + ureq->key_len));
1595 if (unsupported_ureq_type(ureq->type)) {
1596 /* We don't know this request. Just close the connection.
1597 * (glibc client interprets this like "not supported by this nscd")
1598 * Happens very often, thus DEBUG, not INFO */
1599 log(L_DEBUG, "unsupported query, dropping");
1603 srv = type_to_srv[ureq->type];
1604 if (!config.srv_enable[srv]) {
1605 log(L_INFO, "service %d is disabled, dropping", srv);
1610 hex_dump(cinfo[i].ureq, cinfo[i].bytecnt);
1612 if (ureq->type == SHUTDOWN
1613 || ureq->type == INVALIDATE
1616 struct ucred caller;
1617 socklen_t optlen = sizeof(caller);
1618 if (getsockopt(pfd[i].fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0) {
1619 log(L_INFO, "ignoring special request - cannot get caller's id: %s", strerror(errno));
1623 if (caller.uid != 0) {
1624 log(L_INFO, "special request from non-root - ignoring");
1629 if (ureq->type == SHUTDOWN) {
1630 log(L_INFO, "got shutdown request, exiting");
1633 if (!ureq->key_len || ureq->reqbuf[ureq->key_len - 1]) {
1634 log(L_INFO, "malformed invalidate request - ignoring");
1638 log(L_INFO, "got invalidate request, flushing cache");
1639 /* Frees entire cache. TODO: replace -1 with service (in ureq->reqbuf) */
1640 age_cache(/*free_all:*/ 1, -1);
1645 if (ureq->type != GETHOSTBYADDR
1646 && ureq->type != GETHOSTBYADDRv6
1648 if (ureq->key_len && ureq->reqbuf[ureq->key_len - 1] != '\0') {
1649 log(L_INFO, "badly terminated buffer");
1655 if (config.check_files[srv]) {
1659 cache_pp = lookup_in_cache(ureq);
1660 ureq_and_resp = cache_pp ? *cache_pp : NULL;
1662 if (ureq_and_resp) {
1663 if (CACHED_ENTRY(ureq_and_resp)) {
1664 /* Found. Save ptr to response into cinfo and return */
1665 response_header *resp = ureq_response(ureq_and_resp);
1666 unsigned sz = resp->version_or_size;
1668 log(L_DEBUG, "sz:%u", sz);
1670 /* cache shouldn't free it under us! */
1671 if (++ureq_and_resp->refcount == 0) {
1672 error_and_die("BUG! ++%p.refcount rolled over to 0, exiting", ureq_and_resp);
1674 log(L_DEBUG2, "++%p.refcount=%u", ureq_and_resp, ureq_and_resp->refcount);
1675 pfd[i].events = POLLOUT; /* we want to write out */
1676 cinfo[i].resptr = ureq_and_resp;
1677 /*cinfo[i].respos = 0; - already is */
1678 /* prevent future matches with anything */
1679 cinfo[i].cache_pp = (void *) 1;
1680 return 1; /* "ready to write data out to client" */
1683 /* Not found. Remember a pointer where it will appear */
1684 cinfo[i].cache_pp = cache_pp;
1686 /* If it does not point to our own ureq buffer... */
1687 if (CACHE_PTR(ureq_and_resp) != ureq) {
1688 /* We are not the first client who wants this */
1689 log(L_DEBUG, "another request is in progress (%p), waiting for its result", ureq_and_resp);
1690 MARK_PTR_SHARED(cache_pp); /* "please inform us when it's ready" */
1691 /* "we do not wait for client anymore" */
1692 cinfo[i].client_fd = pfd[i].fd;
1693 /* Don't wait on fd. Worker response will unblock us */
1697 /* else: lookup_in_cache inserted (ureq & 1) into *cache_pp:
1698 * we are the first client to miss on this ureq. */
1701 /* Start worker thread */
1702 log(L_DEBUG, "stored %p in cache, starting a worker", ureq_and_resp);
1703 /* Now we will wait on worker's fd, not client's! */
1704 cinfo[i].client_fd = pfd[i].fd;
1705 pfd[i].fd = create_and_feed_worker(ureq);
1709 static void prepare_for_writeout(unsigned i, user_req *cached)
1711 log(L_DEBUG2, "client %u: data is ready at %p", i, cached);
1713 if (cinfo[i].client_fd) {
1714 pfd[i].fd = cinfo[i].client_fd;
1715 cinfo[i].client_fd = 0; /* "we don't wait for worker reply" */
1717 pfd[i].events = POLLOUT;
1719 /* Writeout position etc */
1720 cinfo[i].resptr = cached;
1721 /*cinfo[i].respos = 0; - already is */
1722 /* if worker took some time to get info (e.g. DNS query),
1723 * prevent client timeout from triggering at once */
1724 cinfo[i].started_ms = g_now_ms;
1727 /* Worker seems to be ready to write the response.
1728 * When we return, response is fully read and stored in cache,
1729 * worker's fd is closed, pfd[i] and cinfo[i] are updated. */
1730 static void handle_worker_response(int i)
1732 struct { /* struct response_header + small body */
1733 uint32_t version_or_size;
1739 response_header *resp;
1740 unsigned sz, resp_sz;
1741 unsigned ureq_sz_aligned;
1744 ureq = cinfo[i].ureq;
1745 ureq_sz_aligned = (char*)ureq_response(ureq) - (char*)ureq;
1747 sz = full_read(pfd[i].fd, &sz_and_found, sizeof(sz_and_found));
1749 /* worker was killed? */
1750 log(L_DEBUG, "worker gave short reply:%u < 8", sz);
1754 resp_sz = sz_and_found.version_or_size;
1755 if (resp_sz < sz || resp_sz > 0x0fffffff) { /* 256 mb */
1756 error("BUG: bad size from worker:%u", resp_sz);
1760 /* Create new block of cached info */
1761 cached = xzalloc(ureq_sz_aligned + resp_sz);
1762 log(L_DEBUG2, "xzalloc(%u):%p sz:%u resp_sz:%u found:%u",
1763 ureq_sz_aligned + resp_sz, cached,
1765 (int)sz_and_found.found
1767 resp = (void*) (((char*) cached) + ureq_sz_aligned);
1768 memcpy(cached, ureq, ureq_size(ureq));
1769 memcpy(resp, &sz_and_found, sz);
1770 if (sz_and_found.found && resp_sz > sz) {
1771 /* We need to read data only if it's found
1772 * (otherwise worker sends only 8 bytes).
1774 * Replies can be big (getgrnam("guest") on a big user db),
1775 * we cannot rely on them being atomic. However, we know
1776 * that worker _always_ gives reply in one full_write(),
1777 * so we loop and read it all
1778 * (looping is implemented inside full_read())
1780 if (full_read(pfd[i].fd, ((char*) resp) + sz, resp_sz - sz) != resp_sz - sz) {
1781 /* worker was killed? */
1782 log(L_DEBUG, "worker gave short reply, free(%p)", cached);
1789 set_cache_timestamp(cached);
1790 hex_dump(resp, resp_sz);
1797 user_req **cache_pp = cinfo[i].cache_pp;
1798 if (cache_pp != NULL) { /* if not a fake entry */
1801 if (CACHE_SHARED(ureq)) {
1802 /* Other clients wait for this response too,
1803 * wake them (and us) up and set refcount = no_of_clients */
1806 for (j = 2; j < num_clients; j++) {
1807 if (cinfo[j].cache_pp == cache_pp) {
1808 /* This client uses the same cache entry */
1810 /* prevent future matches with anything */
1811 cinfo[j].cache_pp = (void *) 1;
1812 prepare_for_writeout(j, cached);
1817 /* prevent future matches with anything */
1818 cinfo[i].cache_pp = (void *) 1;
1822 prepare_for_writeout(i, cached);
1824 /* cache shouldn't free it under us! */
1826 cached->refcount = ref;
1827 log(L_DEBUG2, "%p.refcount=%u", cached, ref);
1829 aging_interval_ms = min_aging_interval_ms;
1832 static void main_loop(void)
1834 /* 1/2 of smallest negative TTL */
1835 min_aging_interval_ms = config.nttl[0];
1836 if (min_aging_interval_ms > config.nttl[1]) min_aging_interval_ms = config.nttl[1];
1837 if (min_aging_interval_ms > config.nttl[2]) min_aging_interval_ms = config.nttl[2];
1838 min_aging_interval_ms = (min_aging_interval_ms / 2) | 1;
1839 aging_interval_ms = min_aging_interval_ms;
1845 r = SMALL_POLL_TIMEOUT_MS;
1846 if (num_clients <= 2 && !cached_cnt)
1847 r = -1; /* infinite */
1848 else if (num_clients < max_reqnum)
1849 r = aging_interval_ms;
1850 #if 0 /* Debug: leak detector */
1852 static unsigned long long cnt;
1853 static unsigned long low_malloc = -1L;
1854 static unsigned long low_sbrk = -1L;
1855 void *p = malloc(540); /* should not be too small */
1858 if ((unsigned long)p < low_malloc)
1859 low_malloc = (unsigned long)p;
1860 if ((unsigned long)s < low_sbrk)
1861 low_sbrk = (unsigned long)s;
1862 log(L_INFO, "poll %llu (%d ms). clients:%u cached:%u %u/%u malloc:%p (%lu), sbrk:%p (%lu)",
1863 cnt, r, num_clients, cached_cnt, cache_hit_cnt, cache_access_cnt,
1864 p, (unsigned long)p - low_malloc,
1865 s, (unsigned long)s - low_sbrk);
1869 log(L_DEBUG, "poll %d ms. clients:%u cached:%u hit ratio:%u/%u",
1870 r, num_clients, cached_cnt, cache_hit_cnt, cache_access_cnt);
1873 r = poll(pfd, num_clients, r);
1874 log(L_DEBUG2, "poll returns %d", r);
1877 perror_and_die("poll");
1881 /* Everything between polls never sleeps.
1882 * There is no blocking I/O (except when we talk to worker thread
1883 * which is guaranteed to not block us for long) */
1885 g_now_ms = monotonic_ms();
1887 goto skip_fd_checks;
1889 for (i = 0; i < 2; i++) {
1891 if (!pfd[i].revents)
1893 /* pfd[i].revents = 0; - not needed */
1894 cfd = accept(pfd[i].fd, NULL, NULL);
1896 /* odd... poll() says we can accept but accept failed? */
1897 log(L_DEBUG2, "accept failed with %s", strerror(errno));
1902 /* x[num_clients] is next free element, taking it */
1903 log(L_DEBUG2, "new client %d, fd %d", num_clients, cfd);
1904 pfd[num_clients].fd = cfd;
1905 pfd[num_clients].events = POLLIN;
1906 /* this will make us do read() in next for() loop: */
1907 pfd[num_clients].revents = POLLIN;
1908 memset(&cinfo[num_clients], 0, sizeof(cinfo[num_clients]));
1909 /* cinfo[num_clients].bytecnt = 0; - done */
1910 cinfo[num_clients].started_ms = g_now_ms;
1911 cinfo[num_clients].bufidx = alloc_buf_no();
1912 cinfo[num_clients].ureq = bufno2buf(cinfo[num_clients].bufidx);
1914 if (num_clients >= max_reqnum) {
1915 /* stop accepting new connects for now */
1916 pfd[0].events = pfd[0].revents = 0;
1917 pfd[1].events = pfd[1].revents = 0;
1920 for (; i < num_clients; i++) {
1921 if (!pfd[i].revents)
1923 log(L_DEBUG2, "pfd[%d].revents:0x%x", i, pfd[i].revents);
1924 /* pfd[i].revents = 0; - not needed */
1926 /* "Write out result" case */
1927 if (pfd[i].revents == POLLOUT) {
1928 response_header *resp;
1930 if (!cinfo[i].resptr) {
1931 /* corner case: worker gave bad response earlier */
1936 resp = ureq_response(cinfo[i].resptr);
1937 resp_sz = resp->version_or_size;
1938 resp->version_or_size = NSCD_VERSION;
1940 r = safe_write(pfd[i].fd, ((char*) resp) + cinfo[i].respos, resp_sz - cinfo[i].respos);
1941 resp->version_or_size = resp_sz;
1943 if (r < 0 && errno == EAGAIN) {
1944 log(L_DEBUG, "client %u: EAGAIN on write", i);
1947 if (r <= 0) { /* client isn't there anymore */
1948 log(L_DEBUG, "client %u is gone (write returned:%d err:%s)",
1949 i, r, errno ? strerror(errno) : "-");
1953 cinfo[i].respos += r;
1954 if (cinfo[i].respos >= resp_sz) {
1955 /* We wrote everything */
1956 /* No point in trying to get next request, it won't come.
1957 * glibc 2.4 client closes its end after each request,
1958 * without testing for EOF from server. strace:
1960 * read(3, "www.google.com\0\0", 16) = 16
1963 log(L_DEBUG, "client %u: sent answer %u/%u/%u bytes", i, r, cinfo[i].respos, resp_sz);
1967 log(L_DEBUG, "client %u: sent partial answer %u/%u/%u bytes", i, r, cinfo[i].respos, resp_sz);
1971 /* "Read reply from worker" case. Worker may be
1972 * already dead, revents may contain other bits too
1974 if ((pfd[i].revents & POLLIN) && cinfo[i].client_fd) {
1975 log(L_DEBUG, "reading response for client %u", i);
1976 handle_worker_response(i);
1977 /* We can immediately try to write a response
1982 /* POLLHUP means pfd[i].fd is closed by peer.
1983 * POLLHUP+POLLOUT[+POLLERR] is seen when we writing out
1984 * and see that pfd[i].fd is closed by peer (for example,
1985 * it happens when client's result buffer is too small
1986 * to receive a huge GETGRBYNAME result).
1988 if ((pfd[i].revents & ~(POLLOUT+POLLERR)) == POLLHUP) {
1989 int is_client = (cinfo[i].client_fd == 0 || cinfo[i].client_fd == pfd[i].fd);
1990 log(L_INFO, "%s %u disappeared (got POLLHUP on fd %d)",
1991 is_client ? "client" : "worker",
1998 /* Read worker output anyway, error handling
1999 * in that function deals with short read.
2000 * Simply closing client is wrong: it leaks
2001 * shared future entries. */
2002 handle_worker_response(i);
2007 /* All strange and unexpected cases */
2008 if (pfd[i].revents != POLLIN) {
2009 /* Not just "can read", but some other bits are there */
2010 log(L_INFO, "client %u revents is strange:0x%x", i, pfd[i].revents);
2015 /* "Read request from client" case */
2016 r = safe_read(pfd[i].fd, (char*)(cinfo[i].ureq) + cinfo[i].bytecnt, MAX_USER_REQ_SIZE - cinfo[i].bytecnt);
2018 log(L_DEBUG2, "error reading from client: %s", strerror(errno));
2019 if (errno == EAGAIN)
2025 log(L_INFO, "premature EOF from client, dropping");
2029 cinfo[i].bytecnt += r;
2030 if (cinfo[i].bytecnt >= sizeof(user_req_header)) {
2031 if (handle_client(i)) {
2032 /* Response is found in cache! */
2036 } /* for each client[2..num_clients-1] */
2040 if ((g_now_ms - last_age_time) >= aging_interval_ms) {
2041 last_age_time = g_now_ms;
2042 age_cache(/*free_all:*/ 0, -1);
2045 /* Close timed out client connections */
2046 for (i = 2; i < num_clients; i++) {
2047 if (pfd[i].fd != 0 /* not closed yet? */
2048 && cinfo[i].client_fd == 0 /* do we still wait for client, not worker? */
2049 && (g_now_ms - cinfo[i].started_ms) > CLIENT_TIMEOUT_MS
2051 log(L_INFO, "timed out waiting for client %u (%u ms), dropping",
2052 i, (unsigned)(g_now_ms - cinfo[i].started_ms));
2060 /* We closed at least one client, coalesce pfd[], cinfo[] */
2061 if (min_closed + cnt_closed >= num_clients) {
2062 /* clients [min_closed..num_clients-1] are all closed */
2063 /* log(L_DEBUG, "taking shortcut"); - almost always happens */
2068 while (i < num_clients) {
2072 if (++i >= num_clients)
2076 cinfo[j++] = cinfo[i++];
2080 num_clients -= cnt_closed;
2081 log(L_DEBUG, "removing %d closed clients. clients:%d", cnt_closed, num_clients);
2082 min_closed = INT_MAX;
2084 /* start accepting new connects */
2085 pfd[0].events = POLLIN;
2086 pfd[1].events = POLLIN;
2095 #define NSCD_PIDFILE "/var/run/nscd/nscd.pid"
2096 #define NSCD_DIR "/var/run/nscd"
2097 #define NSCD_SOCKET "/var/run/nscd/socket"
2098 #define NSCD_SOCKET_OLD "/var/run/.nscd_socket"
2100 static smallint wrote_pidfile;
2102 static void cleanup_on_signal(int sig)
2105 unlink(NSCD_PIDFILE);
2106 unlink(NSCD_SOCKET_OLD);
2107 unlink(NSCD_SOCKET);
2111 static void write_pid(void)
2113 FILE *pid = fopen(NSCD_PIDFILE, "w");
2116 fprintf(pid, "%d\n", getpid());
2121 /* Open a listening nscd server socket */
2122 static int open_socket(const char *name)
2124 struct sockaddr_un sun;
2125 int sock = socket(AF_UNIX, SOCK_STREAM, 0);
2127 perror_and_die("cannot create unix domain socket");
2129 close_on_exec(sock);
2130 sun.sun_family = AF_UNIX;
2131 strcpy(sun.sun_path, name);
2133 if (bind(sock, (struct sockaddr *) &sun, sizeof(sun)) < 0)
2134 perror_and_die("bind(%s)", name);
2135 if (chmod(name, 0666) < 0)
2136 perror_and_die("chmod(%s)", name);
2137 if (listen(sock, (max_reqnum/8) | 1) < 0)
2138 perror_and_die("listen");
2142 static const struct option longopt[] = {
2143 /* name, has_arg, int *flag, int val */
2144 { "debug" , no_argument , NULL, 'd' },
2145 { "config-file", required_argument, NULL, 'f' },
2146 { "invalidate" , required_argument, NULL, 'i' },
2147 { "shutdown" , no_argument , NULL, 'K' },
2148 { "nthreads" , required_argument, NULL, 't' },
2149 { "version" , no_argument , NULL, 'V' },
2150 { "help" , no_argument , NULL, '?' },
2151 { "usage" , no_argument , NULL, '?' },
2152 /* just exit(0). TODO: "test" connect? */
2153 { "statistic" , no_argument , NULL, 'g' },
2154 { "secure" , no_argument , NULL, 'S' }, /* ? */
2158 static const char *const help[] = {
2159 "Do not daemonize; log to stderr (-dd: more verbosity)",
2160 "File to read configuration from",
2162 "Shut the server down",
2163 "Serve N requests in parallel",
2167 static void print_help_and_die(void)
2169 const struct option *opt = longopt;
2170 const char *const *h = help;
2172 puts("Usage: nscd [OPTION...]\n"
2173 "Name Service Cache Daemon\n");
2175 printf("\t" "-%c,--%-11s %s\n", opt->val, opt->name, *h);
2178 } while (opt->val != '?');
2182 static char *skip_service(int *srv, const char *s)
2184 if (strcmp("passwd", s) == 0) {
2187 } else if (strcmp("group", s) == 0) {
2189 } else if (strcmp("hosts", s) == 0) {
2194 return skip_whitespace(s + 6);
2197 static void handle_null(const char *str, int srv) {}
2199 static void handle_logfile(const char *str, int srv)
2201 config.logfile = xstrdup(str);
2204 static void handle_debuglvl(const char *str, int srv)
2206 debug |= (uint8_t) getnum(str);
2209 static void handle_threads(const char *str, int srv)
2211 unsigned n = getnum(str);
2216 static void handle_user(const char *str, int srv)
2218 config.user = xstrdup(str);
2221 static void handle_enable(const char *str, int srv)
2223 config.srv_enable[srv] = ((str[0] | 0x20) == 'y');
2226 static void handle_pttl(const char *str, int srv)
2228 config.pttl[srv] = getnum(str);
2231 static void handle_nttl(const char *str, int srv)
2233 config.nttl[srv] = getnum(str);
2236 static void handle_size(const char *str, int srv)
2238 config.size[srv] = getnum(str);
2241 static void handle_chfiles(const char *str, int srv)
2243 config.check_files[srv] = ((str[0] | 0x20) == 'y');
2246 static void parse_conffile(const char *conffile, int warn)
2248 static const struct confword {
2250 void (*handler)(const char *, int);
2252 { "_" "logfile" , handle_logfile },
2253 { "_" "debug-level" , handle_debuglvl },
2254 { "_" "threads" , handle_threads },
2255 { "_" "max-threads" , handle_threads },
2256 { "_" "server-user" , handle_user },
2257 /* ignore: any user can stat */
2258 { "_" "stat-user" , handle_null },
2259 { "_" "paranoia" , handle_null }, /* ? */
2260 /* ignore: design goal is to never crash/hang */
2261 { "_" "reload-count" , handle_null },
2262 { "_" "restart-interval" , handle_null },
2263 { "S" "enable-cache" , handle_enable },
2264 { "S" "positive-time-to-live" , handle_pttl },
2265 { "S" "negative-time-to-live" , handle_nttl },
2266 { "S" "suggested-size" , handle_size },
2267 { "S" "check-files" , handle_chfiles },
2268 { "S" "persistent" , handle_null }, /* ? */
2269 { "S" "shared" , handle_null }, /* ? */
2270 { "S" "auto-propagate" , handle_null }, /* ? */
2275 FILE *file = fopen(conffile, "r");
2279 if (conffile != default_conffile)
2280 perror_and_die("cannot open %s", conffile);
2284 while (fgets(buf, sizeof(buf), file) != NULL) {
2285 const struct confword *word;
2287 int len = strlen(buf);
2291 if (buf[len-1] != '\n') {
2292 if (len >= sizeof(buf) - 1)
2293 error_and_die("%s:%d: line is too long", conffile, lineno);
2294 len++; /* last line, not terminated by '\n' */
2298 p = strchr(buf, '#');
2302 p = skip_whitespace(buf);
2305 *skip_non_whitespace(p) = '\0';
2308 if (strcmp(word->str + 1, p) == 0) {
2310 p = skip_whitespace(p + strlen(p) + 1);
2311 *skip_non_whitespace(p) = '\0';
2312 if (word->str[0] == 'S') {
2313 char *p2 = skip_service(&srv, p);
2316 error("%s:%d: ignoring unknown service name '%s'", conffile, lineno, p);
2320 *skip_non_whitespace(p) = '\0';
2322 word->handler(p, srv);
2328 error("%s:%d: ignoring unknown directive '%s'", conffile, lineno, p);
2337 /* "XX,XX[,XX]..." -> gid_t[] */
2338 static gid_t* env_U_to_uid_and_gids(const char *str, int *sizep)
2349 ug = xmalloc(ng * sizeof(ug[0]));
2357 *gp++ = strtoul(sp, (char**)&sp, 16);
2358 if (errno || (*sp != ',' && *sp != '\0'))
2359 error_and_die("internal error");
2370 static char* user_to_env_U(const char *user)
2377 pw = getpwnam(user);
2379 perror_and_die("user '%s' is not known", user);
2382 /* 0th cell will be used for uid */
2383 ug = xmalloc((1 + ng) * sizeof(ug[0]));
2384 if (getgrouplist(user, pw->pw_gid, &ug[1], &ng) < 0) {
2385 ug = xrealloc(ug, (1 + ng) * sizeof(ug[0]));
2386 if (getgrouplist(user, pw->pw_gid, &ug[1], &ng) < 0)
2387 perror_and_die("can't get groups of user '%s'", user);
2392 /* How much do we need for "-Uxx,xx[,xx]..." string? */
2393 ug_str = xmalloc((sizeof(unsigned long)+1)*2 * ng + 3);
2399 sp += sprintf(sp, "%lx,", (unsigned long)(*gp++));
2408 /* not static - don't inline me, compiler! */
2409 void readlink_self_exe(void);
2410 void readlink_self_exe(void)
2412 char buf[PATH_MAX + 1];
2413 ssize_t sz = readlink("/proc/self/exe", buf, sizeof(buf) - 1);
2415 perror_and_die("readlink %s failed", "/proc/self/exe");
2417 self_exe_points_to = xstrdup(buf);
2421 static void special_op(const char *arg) NORETURN;
2422 static void special_op(const char *arg)
2424 static const user_req_header ureq = { NSCD_VERSION, SHUTDOWN, 0 };
2426 struct sockaddr_un addr;
2429 sock = socket(PF_UNIX, SOCK_STREAM, 0);
2431 error_and_die("cannot create AF_UNIX socket");
2433 addr.sun_family = AF_UNIX;
2434 strcpy(addr.sun_path, NSCD_SOCKET);
2435 if (connect(sock, (struct sockaddr *) &addr, sizeof(addr)) < 0)
2436 error_and_die("cannot connect to %s", NSCD_SOCKET);
2438 if (!arg) { /* shutdown */
2439 xfull_write(sock, &ureq, sizeof(ureq));
2440 printf("sent shutdown request, exiting\n");
2441 } else { /* invalidate */
2442 size_t arg_len = strlen(arg) + 1;
2444 user_req_header req;
2447 reqdata.req.version = NSCD_VERSION;
2448 reqdata.req.type = INVALIDATE;
2449 reqdata.req.key_len = arg_len;
2450 memcpy(reqdata.arg, arg, arg_len);
2451 xfull_write(sock, &reqdata, arg_len + sizeof(ureq));
2452 printf("sent invalidate(%s) request, exiting\n", arg);
2458 /* Callback for glibc-2.15 */
2460 static void do_nothing(size_t dbidx, struct traced_file *finfo)
2462 /* nscd from glibc-2.15 does something like this:
2463 if (!dbs[dbidx].enabled || !dbs[dbidx].check_file)
2465 add_file_to_watch_list(finfo->fname);
2469 /* This internal glibc function is called to disable trying to contact nscd.
2470 * We _are_ nscd, so we need to do the lookups, and not recurse.
2471 * Until 2.14, this function was taking no parameters.
2472 * In 2.15, it takes a function pointer from hell.
2474 void __nss_disable_nscd(void (*hell)(size_t, struct traced_file*));
2477 int main(int argc, char **argv)
2482 const char *conffile;
2484 /* make sure we don't get recursive calls */
2485 __nss_disable_nscd(do_nothing);
2487 if (argv[0][0] == 'w') /* "worker_nscd" */
2493 /* Make sure stdio is not closed */
2494 n = xopen3("/dev/null", O_RDWR, 0);
2497 /* Close unexpected open file descriptors */
2498 n |= 0xff; /* start from at least fd# 255 */
2503 /* For idiotic kernels which disallow "exec /proc/self/exe" */
2504 readlink_self_exe();
2506 conffile = default_conffile;
2508 while ((n = getopt_long(argc, argv, "df:i:KVgt:", longopt, NULL)) != -1) {
2519 special_op(optarg); /* exits */
2521 /* shutdown server */
2522 special_op(NULL); /* exits */
2524 puts("unscd - nscd which does not hang, v."PROGRAM_VERSION);
2530 max_reqnum = getnum(optarg);
2536 print_help_and_die();
2539 /* Multiple -d can bump debug regardless of nscd.conf:
2540 * no -d or -d: 0, -dd: 1,
2541 * -ddd: 3, -dddd: 7, -ddddd: 15
2544 debug |= (((1U << opt_d_cnt) >> 1) - 1) & L_ALL;
2546 env_U = getenv("U");
2547 /* Avoid duplicate warnings if $U exists */
2548 parse_conffile(conffile, /* warn? */ (env_U == NULL));
2550 /* I have a user report of (broken?) ldap nss library
2551 * opening and never closing a socket to a ldap server,
2552 * even across fork() and exec(). This messes up
2553 * worker child's operations for the reporter.
2555 * This strenghtens my belief that nscd _must not_ trust
2556 * nss libs to be written correctly.
2558 * Here, we need to jump through the hoops to guard against
2559 * such problems. If config file has server-user setting, we need
2560 * to setgroups + setuid. For that, we need to get uid and gid vector.
2561 * And that means possibly using buggy nss libs.
2562 * We will do it here, but then we will re-exec, passing uid+gids
2563 * in an environment variable.
2565 if (!env_U && config.user) {
2566 /* user_to_env_U() does getpwnam and getgrouplist */
2567 if (putenv(user_to_env_U(config.user)))
2568 error_and_die("out of memory");
2569 /* fds leaked by nss will be closed by execed copy */
2570 execv(self_exe_points_to, argv);
2571 xexecve("/proc/self/exe", argv, environ);
2574 /* Allocate dynamically sized stuff */
2575 max_reqnum += 2; /* account for 2 first "fake" clients */
2576 if (max_reqnum < 8) max_reqnum = 8; /* sanitize */
2577 /* Since refcount is a byte, can't serve more than 255-2 clients
2578 * at once. The rest will block in connect() */
2579 if (max_reqnum > 0xff) max_reqnum = 0xff;
2580 client_buf = xzalloc(max_reqnum * sizeof(client_buf[0]));
2581 busy_cbuf = xzalloc(max_reqnum * sizeof(busy_cbuf[0]));
2582 pfd = xzalloc(max_reqnum * sizeof(pfd[0]));
2583 cinfo = xzalloc(max_reqnum * sizeof(cinfo[0]));
2585 cache_size = (config.size[0] + config.size[1] + config.size[2]) / 8;
2586 if (cache_size < 8) cache_size = 8; /* 8*8 = 64 entries min */
2587 if (cache_size > 0xffff) cache_size = 0xffff; /* 8*64k entries max */
2588 cache_size |= 1; /* force it to be odd */
2589 cache = xzalloc(cache_size * sizeof(cache[0]));
2591 /* Register cleanup hooks */
2592 signal(SIGINT, cleanup_on_signal);
2593 signal(SIGTERM, cleanup_on_signal);
2594 /* Don't die if a client closes a socket on us */
2595 signal(SIGPIPE, SIG_IGN);
2596 /* Avoid creating zombies */
2597 signal(SIGCHLD, SIG_IGN);
2599 /* Ensure workers don't have SIGALRM ignored */
2600 signal(SIGALRM, SIG_DFL);
2603 if (mkdir(NSCD_DIR, 0755) == 0) {
2604 /* prevent bad mode of NSCD_DIR if umask is e.g. 077 */
2605 chmod(NSCD_DIR, 0755);
2607 pfd[0].fd = open_socket(NSCD_SOCKET);
2608 pfd[1].fd = open_socket(NSCD_SOCKET_OLD);
2609 pfd[0].events = POLLIN;
2610 pfd[1].events = POLLIN;
2612 if (debug & D_DAEMON) {
2613 daemon(/*nochdir*/ 1, /*noclose*/ 0);
2614 if (config.logfile) {
2615 /* nochdir=1: relative paths still work as expected */
2616 xmovefd(xopen3(config.logfile, O_WRONLY|O_CREAT|O_TRUNC, 0666), 2);
2619 debug = 0; /* why bother? it's /dev/null'ed anyway */
2621 chdir("/"); /* compat */
2624 /* ignore job control signals */
2625 signal(SIGTTOU, SIG_IGN);
2626 signal(SIGTTIN, SIG_IGN);
2627 signal(SIGTSTP, SIG_IGN);
2630 log(L_ALL, "unscd v" PROGRAM_VERSION ", debug level 0x%x", debug & L_ALL);
2631 log(L_DEBUG, "max %u requests in parallel", max_reqnum - 2);
2632 log(L_DEBUG, "cache size %u x 8 entries", cache_size);
2636 gid_t *ug = env_U_to_uid_and_gids(env_U, &size);
2638 if (setgroups(size - 1, &ug[1]) || setgid(ug[1]))
2639 perror_and_die("cannot set groups for user '%s'", config.user);
2642 perror_and_die("cannot set uid to %u", (unsigned)(ug[0]));
2646 for (n = 0; n < 3; n++) {
2647 log(L_DEBUG, "%s cache enabled:%u pttl:%u nttl:%u",
2649 config.srv_enable[n],
2652 config.pttl[n] *= 1000;
2653 config.nttl[n] *= 1000;