1 /* This file is part of unscd, a complete nscd replacement.
2 * Copyright (C) 2007-2012 Denys Vlasenko. Licensed under the GPL version 2.
5 /* unscd is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; version 2 of the License.
9 * unscd is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You can download the GNU General Public License from the GNU website
15 * at http://www.gnu.org/ or write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
21 gcc -Wall -Wunused-parameter -Os -o nscd nscd.c
23 gcc -fomit-frame-pointer -Wl,--sort-section -Wl,alignment -Wl,--sort-common
28 nscd problems are not exactly unheard of. Over the years, there were
29 quite a bit of bugs in it. This leads people to invent babysitters
30 which restart crashed/hung nscd. This is ugly.
32 After looking at nscd source in glibc I arrived to the conclusion
33 that its design is contributing to this significantly. Even if nscd's
34 code is 100.00% perfect and bug-free, it can still suffer from bugs
35 in libraries it calls.
37 As designed, it's a multithreaded program which calls NSS libraries.
38 These libraries are not part of libc, they may be provided
39 by third-party projects (samba, ldap, you name it).
41 Thus nscd cannot be sure that libraries it calls do not have memory
42 or file descriptor leaks and other bugs.
44 Since nscd is multithreaded program with single shared cache,
45 any resource leak in any NSS library has cumulative effect.
46 Even if a NSS library leaks a file descriptor 0.01% of the time,
47 this will make nscd crash or hang after some time.
49 Of course bugs in NSS .so modules should be fixed, but meanwhile
50 I do want nscd which does not crash or lock up.
52 So I went ahead and wrote a replacement.
54 It is a single-threaded server process which offloads all NSS
55 lookups to worker children (not threads, but fully independent
56 processes). Cache hits are handled by parent. Only cache misses
57 start worker children. This design is immune against
58 resource leaks and hangs in NSS libraries.
60 It is also many times smaller.
62 Currently (v0.36) it emulates glibc nscd pretty closely
63 (handles same command line flags and config file), and is moderately tested.
65 Please note that as of 2008-08 it is not in wide use (yet?).
66 If you have trouble compiling it, see an incompatibility with
67 "standard" one or experience hangs/crashes, please report it to
68 vda.linux@googlemail.com
70 ***********************************************************************/
72 /* Make struct ucred appear in sys/socket.h */
74 /* For all good things */
91 #include <sys/socket.h>
93 #include <sys/types.h>
99 /* For inet_ntoa (for debug build only) */
100 #include <arpa/inet.h>
103 * 0.21 add SEGV reporting to worker
104 * 0.22 don't do freeaddrinfo() in GETAI worker, it's crashy
105 * 0.23 add parameter parsing
106 * 0.24 add conf file parsing, not using results yet
107 * 0.25 used some of conf file settings (not tested)
108 * 0.26 almost all conf file settings are wired up
109 * 0.27 a bit more of almost all conf file settings are wired up
110 * 0.28 optimized cache aging
111 * 0.29 implemented invalidate and shutdown options
112 * 0.30 fixed buglet (sizeof(ptr) != sizeof(array))
113 * 0.31 reduced client_info by one member
114 * 0.32 fix nttl/size defaults; simpler check for worker child in main()
115 * 0.33 tweak includes so that it builds on my new machine (64-bit userspace);
116 * do not die on unknown service name, just warn
117 * ("services" is a new service we don't support)
118 * 0.34 create /var/run/nscd/nscd.pid pidfile like glibc nscd 2.8 does;
119 * delay setuid'ing itself to server-user after log and pidfile are open
120 * 0.35 readlink /proc/self/exe and use result if execing /proc/self/exe fails
121 * 0.36 excercise extreme paranoia handling server-user option;
122 * a little bit more verbose logging:
123 * L_DEBUG2 log level added, use debug-level 7 to get it
124 * 0.37 users reported over-zealous "detected change in /etc/passwd",
125 * apparently stat() returns random garbage in unused padding
126 * on some systems. Made the check less paranoid.
127 * 0.38 log POLLHUP better
128 * 0.39 log answers to client better, log getpwnam in the worker,
129 * pass debug level value down to worker.
130 * 0.40 fix handling of shutdown and invalidate requests;
131 * fix bug with answer written in several pieces
132 * 0.40.1 set hints.ai_socktype = SOCK_STREAM in GETAI request
133 * 0.41 eliminate double caching of two near-simultaneous identical requests -
135 * 0.42 execute /proc/self/exe by link name first (better comm field)
136 * 0.43 fix off-by-one error in setgroups
137 * 0.44 make -d[ddd] bump up debug - easier to explain to users
138 * how to produce detailed log (no nscd.conf tweaking)
139 * 0.45 Fix out-of-bounds array access and log/pid file permissions -
140 * thanks to Sebastian Krahmer (krahmer AT suse.de)
141 * 0.46 fix a case when we forgot to remove a future entry on worker failure
142 * 0.47 fix nscd without -d to not bump debug level
143 * 0.48 fix for changes in __nss_disable_nscd API in glibc-2.15
144 * 0.49 minor tweaks to messages
145 * 0.50 add more files to watch for changes
146 * 0.51 fix a case where we forget to refcount-- the cached entry
147 * 0.52 make free_refcounted_ureq() tolerant to pointers to NULLs
148 * 0.53 fix INVALIDATE and SHUTDOWN requests being ignored
149 * 0.54 clang warning fix for "str" + OFFSET trick and variable struct field
151 #define PROGRAM_VERSION "0.54"
153 #define DEBUG_BUILD 1
160 #define ARRAY_SIZE(x) ((unsigned)(sizeof(x) / sizeof((x)[0])))
162 #define NORETURN __attribute__ ((__noreturn__))
165 #ifdef MY_CPU_HATES_CHARS
166 typedef int smallint;
168 typedef signed char smallint;
174 L_DEBUG = ((1 << 1) * DEBUG_BUILD),
175 L_DEBUG2 = ((1 << 2) * DEBUG_BUILD),
176 L_DUMP = ((1 << 3) * DEBUG_BUILD),
182 static smallint debug = D_DAEMON;
184 static void verror(const char *s, va_list p, const char *strerr)
187 int sz, rem, strerr_len;
191 if (debug & D_STAMP) {
192 gettimeofday(&tv, NULL);
193 sz = sprintf(msgbuf, "%02u:%02u:%02u.%05u ",
194 (unsigned)((tv.tv_sec / (60*60)) % 24),
195 (unsigned)((tv.tv_sec / 60) % 60),
196 (unsigned)(tv.tv_sec % 60),
197 (unsigned)(tv.tv_usec / 10));
199 rem = sizeof(msgbuf) - sz;
200 sz += vsnprintf(msgbuf + sz, rem, s, p);
201 rem = sizeof(msgbuf) - sz; /* can be negative after this! */
204 strerr_len = strlen(strerr);
205 if (rem >= strerr_len + 4) { /* ": STRERR\n\0" */
208 strcpy(msgbuf + sz, strerr);
217 fputs(msgbuf, stderr);
220 static void error(const char *msg, ...)
224 verror(msg, p, NULL);
228 static void error_and_die(const char *msg, ...) NORETURN;
229 static void error_and_die(const char *msg, ...)
233 verror(msg, p, NULL);
238 static void perror_and_die(const char *msg, ...) NORETURN;
239 static void perror_and_die(const char *msg, ...)
243 /* Guard against "<error message>: Success" */
244 verror(msg, p, errno ? strerror(errno) : NULL);
249 static void nscd_log(int mask, const char *msg, ...)
254 verror(msg, p, NULL);
259 #define log(lvl, ...) do { if (lvl) nscd_log(lvl, __VA_ARGS__); } while (0)
262 static void dump(const void *ptr, int len)
265 const unsigned char *buf;
268 if (!(debug & L_DUMP))
273 int chunk = ((len >= 16) ? 16 : len);
275 "%02x %02x %02x %02x %02x %02x %02x %02x "
276 "%02x %02x %02x %02x %02x %02x %02x %02x ";
277 fmt += (16-chunk) * 5;
279 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7],
280 buf[8], buf[9],buf[10],buf[11],buf[12],buf[13],buf[14],buf[15]
282 fprintf(stderr, "%*s", (16-chunk) * 3, "");
286 unsigned char c = *buf++;
287 *p++ = (c >= 32 && c < 127 ? c : '.');
295 void dump(const void *ptr, int len);
298 #define hex_dump(p,n) do { if (L_DUMP) dump(p,n); } while (0)
300 static int xopen3(const char *pathname, int flags, int mode)
302 int fd = open(pathname, flags, mode);
304 perror_and_die("open");
308 static void xpipe(int *fds)
311 perror_and_die("pipe");
314 static void xexecve(const char *filename, char **argv, char **envp) NORETURN;
315 static void xexecve(const char *filename, char **argv, char **envp)
317 execve(filename, argv, envp);
318 perror_and_die("cannot re-exec %s", filename);
321 static void ndelay_on(int fd)
323 int fl = fcntl(fd, F_GETFL);
325 perror_and_die("F_GETFL");
326 if (fcntl(fd, F_SETFL, fl | O_NONBLOCK) < 0)
327 perror_and_die("setting O_NONBLOCK");
330 static void close_on_exec(int fd)
332 if (fcntl(fd, F_SETFD, FD_CLOEXEC) < 0)
333 perror_and_die("setting FD_CLOEXEC");
336 static unsigned monotonic_ms(void)
339 if (syscall(__NR_clock_gettime, CLOCK_MONOTONIC, &ts))
340 perror_and_die("clock_gettime(MONOTONIC)");
341 return ts.tv_sec * 1000 + ts.tv_nsec / 1000000;
344 static unsigned strsize(const char *str)
346 return strlen(str) + 1;
349 static unsigned strsize_aligned4(const char *str)
351 return (strlen(str) + 1 + 3) & (~3);
354 static ssize_t safe_read(int fd, void *buf, size_t count)
358 n = read(fd, buf, count);
359 } while (n < 0 && errno == EINTR);
363 static ssize_t full_read(int fd, void *buf, size_t len)
369 cc = safe_read(fd, buf, len);
371 return cc; /* read() returns -1 on failure. */
374 buf = ((char *)buf) + cc;
382 static void xsafe_read(int fd, void *buf, size_t len)
384 if (len != safe_read(fd, buf, len))
385 perror_and_die("short read");
387 static void xfull_read(int fd, void *buf, size_t len)
389 if (len != full_read(fd, buf, len))
390 perror_and_die("short read");
394 static ssize_t safe_write(int fd, const void *buf, size_t count)
398 n = write(fd, buf, count);
399 } while (n < 0 && errno == EINTR);
403 static ssize_t full_write(int fd, const void *buf, size_t len)
410 cc = safe_write(fd, buf, len);
412 return cc; /* write() returns -1 on failure. */
414 buf = ((const char *)buf) + cc;
420 static void xsafe_write(int fd, const void *buf, size_t count)
422 if (count != safe_write(fd, buf, count))
423 perror_and_die("short write of %ld bytes", (long)count);
425 static void xfull_write(int fd, const void *buf, size_t count)
427 if (count != full_write(fd, buf, count))
428 perror_and_die("short write of %ld bytes", (long)count);
431 static void xmovefd(int from_fd, int to_fd)
433 if (from_fd != to_fd) {
434 if (dup2(from_fd, to_fd) < 0)
435 perror_and_die("dup2");
440 static unsigned getnum(const char *str)
442 if (str[0] >= '0' && str[0] <= '9') {
444 unsigned long l = strtoul(str, &p, 10);
445 /* must not overflow int even after x1000 */
446 if (!*p && l <= INT_MAX / 1000)
449 error_and_die("malformed or too big number '%s'", str);
452 static char *skip_whitespace(const char *s)
454 /* NB: isspace('\0') returns 0 */
455 while (isspace(*s)) ++s;
459 static char *skip_non_whitespace(const char *s)
461 while (*s && !isspace(*s)) ++s;
465 static void *xmalloc(unsigned sz)
467 void *p = malloc(sz);
469 error_and_die("out of memory");
473 static void *xzalloc(unsigned sz)
475 void *p = xmalloc(sz);
480 static void *xrealloc(void *p, unsigned size)
482 p = realloc(p, size);
484 error_and_die("out of memory");
488 static const char *xstrdup(const char *str)
490 const char *p = strdup(str);
492 error_and_die("out of memory");
507 static const char srv_name[3][7] = {
516 smallint srv_enable[3];
517 smallint check_files[3];
522 /* We try to closely mimic glibc nscd */
523 .logfile = NULL, /* default is to not have a log file */
525 .srv_enable = { 0, 0, 0 },
526 .check_files = { 1, 1, 1 },
527 .pttl = { 3600, 3600, 3600 },
528 .nttl = { 20, 60, 20 },
529 /* huh, what is the default cache size in glibc nscd? */
530 .size = { 256 * 8 / 3, 256 * 8 / 3, 256 * 8 / 3 },
533 static const char default_conffile[] = "/etc/nscd.conf";
534 static const char *self_exe_points_to = "/proc/self/exe";
538 ** Clients, workers machinery
541 /* Header common to all requests */
542 #define USER_REQ_STRUCT \
543 uint32_t version; /* Version number of the daemon interface */ \
544 uint32_t type; /* Service requested */ \
545 uint32_t key_len; /* Key length */
547 typedef struct user_req_header {
553 MAX_USER_REQ_SIZE = 1024,
554 USER_HDR_SIZE = sizeof(user_req_header),
555 /* DNS queries time out after 20 seconds,
556 * we will allow for a bit more */
557 WORKER_TIMEOUT_SEC = 30,
558 CLIENT_TIMEOUT_MS = 100,
559 SMALL_POLL_TIMEOUT_MS = 200,
562 typedef struct user_req {
564 struct { /* as came from client */
567 struct { /* when stored in cache, overlaps .version */
569 /* (timestamp24 * 256) == timestamp in ms */
570 unsigned timestamp24:24;
573 char reqbuf[MAX_USER_REQ_SIZE - USER_HDR_SIZE];
576 /* Compile-time check for correct size */
577 struct BUG_wrong_user_req_size {
578 char BUG_wrong_user_req_size[sizeof(user_req) == MAX_USER_REQ_SIZE ? 1 : -1];
590 SHUTDOWN, /* Shut the server down */
591 GETSTAT, /* Get the server statistic */
592 INVALIDATE, /* Invalidate one special cache */
604 static const char *const typestr[] = {
605 "GETPWBYNAME", /* done */
606 "GETPWBYUID", /* done */
607 "GETGRBYNAME", /* done */
608 "GETGRBYGID", /* done */
609 "GETHOSTBYNAME", /* done */
610 "GETHOSTBYNAMEv6", /* done */
611 "GETHOSTBYADDR", /* done */
612 "GETHOSTBYADDRv6", /* done */
613 "SHUTDOWN", /* done */
614 "GETSTAT", /* info? */
615 "INVALIDATE", /* done */
616 /* won't do: nscd passes a name of shmem segment
617 * which client can map and "see" the db */
619 "GETFDGR", /* won't do */
620 "GETFDHST", /* won't do */
622 "INITGROUPS", /* done */
623 "GETSERVBYNAME", /* prio 3 (no caching?) */
624 "GETSERVBYPORT", /* prio 3 (no caching?) */
625 "GETFDSERV" /* won't do */
628 extern const char *const typestr[];
630 static const smallint type_to_srv[] = {
631 [GETPWBYNAME ] = SRV_PASSWD,
632 [GETPWBYUID ] = SRV_PASSWD,
633 [GETGRBYNAME ] = SRV_GROUP,
634 [GETGRBYGID ] = SRV_GROUP,
635 [GETHOSTBYNAME ] = SRV_HOSTS,
636 [GETHOSTBYNAMEv6 ] = SRV_HOSTS,
637 [GETHOSTBYADDR ] = SRV_HOSTS,
638 [GETHOSTBYADDRv6 ] = SRV_HOSTS,
639 [GETAI ] = SRV_HOSTS,
640 [INITGROUPS ] = SRV_GROUP,
643 static int unsupported_ureq_type(unsigned type)
645 if (type == GETAI) return 0;
646 if (type == INITGROUPS) return 0;
647 if (type == GETSTAT) return 1;
648 if (type > INVALIDATE) return 1;
653 typedef struct client_info {
654 /* if client_fd != 0, we are waiting for the reply from worker
655 * on pfd[i].fd, and client_fd is saved client's fd
656 * (we need to put it back into pfd[i].fd later) */
658 unsigned bytecnt; /* bytes read from client */
659 unsigned bufidx; /* buffer# in global client_buf[] */
661 unsigned respos; /* response */
662 user_req *resptr; /* response */
663 user_req **cache_pp; /* cache entry address */
664 user_req *ureq; /* request (points to client_buf[x]) */
667 static unsigned g_now_ms;
668 static int min_closed = INT_MAX;
669 static int cnt_closed = 0;
670 static int num_clients = 2; /* two listening sockets are "clients" too */
672 /* We read up to max_reqnum requests in parallel */
673 static unsigned max_reqnum = 14;
675 /* To be allocated at init to become client_buf[max_reqnum][MAX_USER_REQ_SIZE].
676 * Note: it is a pointer to [MAX_USER_REQ_SIZE] arrays,
677 * not [MAX_USER_REQ_SIZE] array of pointers.
679 static char (*client_buf)[MAX_USER_REQ_SIZE];
680 static char *busy_cbuf;
681 static struct pollfd *pfd;
682 static client_info *cinfo;
684 /* Request, response and cache data structures:
686 * cache[] (defined later):
687 * cacheline_t cache[cache_size] array, or in other words,
688 * user_req* cache[cache_size][8] array.
689 * Every client request is hashed, hash value determines which cache[x]
690 * will have the response stored in one of its 8 elements.
691 * Cache entries have this format: request, then padding to 32 bits,
693 * Addresses in cache[x][y] may be NULL or:
694 * (&client_buf[z]) & 1: the cache miss is in progress ("future entry"):
695 * "the data is not in the cache (yet), wait for it to appear"
696 * (&client_buf[z]) & 3: the cache miss is in progress and other clients
697 * also want the same data ("shared future entry")
698 * else (non-NULL but low two bits are 0): cached data in malloc'ed block
700 * Each of these is a [max_reqnum] sized array:
701 * pfd[i] - given to poll() to wait for requests and replies.
702 * .fd: first two pfd[i]: listening Unix domain sockets, else
703 * .fd: open fd to a client, for reading client's request, or
704 * .fd: open fd to a worker, to send request and get response back
705 * cinfo[i] - auxiliary client data for pfd[i]
706 * .client_fd: open fd to a client, in case we already had read its
707 * request and got a cache miss, and created a worker or
708 * wait for another client's worker.
709 * Otherwise, it's 0 and client's fd is in pfd[i].fd
710 * .bufidx: index in client_buf[] we store client's request in
711 * .ureq: = client_buf[bufidx]
712 * .bytecnt: size of the request
713 * .started_ms: used to time out unresponsive clients
714 * .resptr: initially NULL. Later, same as cache[x][y] pointer to a cached
715 * response, or (a rare case) a "fake cache" entry:
716 * all cache[hash(request)][0..7] blocks were found busy,
717 * the result won't be cached.
718 * .respos: "write-out to client" offset
719 * .cache_pp: initially NULL. Later, &cache[x][y] where the response is,
720 * or will be stored. Remains NULL if "fake cache" entry is in use
722 * When a client has received its reply (or otherwise closed (timeout etc)),
723 * corresponding pfd[i] and cinfo[i] are removed by shifting [i+1], [i+2] etc
724 * elements down, so that both arrays never have free holes.
725 * [num_clients] is always the first free element.
727 * Each of these also is a [max_reqnum] sized array, but indexes
728 * do not correspond directly to pfd[i] and cinfo[i]:
729 * client_buf[n][MAX_USER_REQ_SIZE] - buffers we read client requests into
730 * busy_cbuf[n] - bool flags marking busy client_buf[]
732 /* Possible reductions:
733 * fd, bufidx - uint8_t
734 * started_ms -> uint16_t started_s
735 * ureq - eliminate (derivable from bufidx?)
738 /* Are special bits 0? is it a true cached entry? */
739 #define CACHED_ENTRY(p) ( ((long)(p) & 3) == 0 )
740 /* Are special bits 11? is it a shared future cache entry? */
741 #define CACHE_SHARED(p) ( ((long)(p) & 3) == 3 )
742 /* Return a ptr with special bits cleared (used for accessing data) */
743 #define CACHE_PTR(p) ( (void*) ((long)(p) & ~(long)3) )
744 /* Return a ptr with special bits set to x1: make future cache entry ptr */
745 #define MAKE_FUTURE_PTR(p) ( (void*) ((long)(p) | 1) )
746 /* Modify ptr, set special bits to 11: shared future cache entry */
747 #define MARK_PTR_SHARED(pp) ( *(long*)(pp) |= 3 )
749 static inline unsigned ureq_size(const user_req *ureq)
751 return sizeof(user_req_header) + ureq->key_len;
754 static unsigned cache_age(const user_req *ureq)
756 if (!CACHED_ENTRY(ureq))
758 return (uint32_t) (g_now_ms - (ureq->timestamp24 << 8));
761 static void set_cache_timestamp(user_req *ureq)
763 ureq->timestamp24 = g_now_ms >> 8;
766 static int alloc_buf_no(void)
771 next_buf = (next_buf + 1) % max_reqnum;
772 if (!busy_cbuf[cur]) {
776 } while (next_buf != n);
777 error_and_die("no free bufs?!");
780 static inline void *bufno2buf(int i)
782 return client_buf[i];
785 static void free_refcounted_ureq(user_req **ureqp);
787 static void close_client(unsigned i)
789 log(L_DEBUG, "closing client %u (fd %u,%u)", i, pfd[i].fd, cinfo[i].client_fd);
790 /* Paranoia. We had nasty bugs where client was closed twice. */
795 if (cinfo[i].client_fd && cinfo[i].client_fd != pfd[i].fd)
796 close(cinfo[i].client_fd);
797 pfd[i].fd = 0; /* flag as unused (coalescing needs this) */
798 busy_cbuf[cinfo[i].bufidx] = 0;
800 if (cinfo[i].cache_pp == NULL) {
801 user_req *resptr = cinfo[i].resptr;
803 log(L_DEBUG, "client %u: freeing fake cache entry %p", i, resptr);
807 /* Most of the time, it is not freed here,
808 * only refcounted--. Freeing happens
809 * if it was deleted from cache[] but retained
812 free_refcounted_ureq(&cinfo[i].resptr);
822 ** nscd API <-> C API conversion
825 typedef struct response_header {
826 uint32_t version_or_size;
831 typedef struct initgr_response_header {
832 uint32_t version_or_size;
835 /* code assumes gid_t == int32, let's check that */
836 int32_t gid[sizeof(gid_t) == sizeof(int32_t) ? 0 : -1];
837 /* char user_str[as_needed]; */
838 } initgr_response_header;
840 static initgr_response_header *obtain_initgroups(const char *username)
842 struct initgr_response_header *resp;
844 enum { MAGIC_OFFSET = sizeof(*resp) / sizeof(int32_t) };
848 pw = getpwnam(username);
851 resp->version_or_size = sizeof(*resp);
857 /* getgrouplist may be very expensive, it's much better to allocate
858 * a bit more than to run getgrouplist twice */
862 sz = sizeof(*resp) + sizeof(resp->gid[0]) * ngroups;
863 resp = xrealloc(resp, sz);
864 } while (getgrouplist(username, pw->pw_gid, (gid_t*) &resp->gid, &ngroups) == -1);
865 log(L_DEBUG, "ngroups=%d", ngroups);
867 sz = sizeof(*resp) + sizeof(resp->gid[0]) * ngroups;
868 /* resp = xrealloc(resp, sz); - why bother */
869 resp->version_or_size = sz;
871 resp->ngrps = ngroups;
876 typedef struct pw_response_header {
877 uint32_t version_or_size;
880 int32_t pw_passwd_len;
883 int32_t pw_gecos_len;
885 int32_t pw_shell_len;
886 /* char pw_name[pw_name_len]; */
887 /* char pw_passwd[pw_passwd_len]; */
888 /* char pw_gecos[pw_gecos_len]; */
889 /* char pw_dir[pw_dir_len]; */
890 /* char pw_shell[pw_shell_len]; */
891 } pw_response_header;
893 static pw_response_header *marshal_passwd(struct passwd *pw)
896 pw_response_header *resp;
897 unsigned pw_name_len;
898 unsigned pw_passwd_len;
899 unsigned pw_gecos_len;
901 unsigned pw_shell_len;
902 unsigned sz = sizeof(*resp);
904 sz += (pw_name_len = strsize(pw->pw_name));
905 sz += (pw_passwd_len = strsize(pw->pw_passwd));
906 sz += (pw_gecos_len = strsize(pw->pw_gecos));
907 sz += (pw_dir_len = strsize(pw->pw_dir));
908 sz += (pw_shell_len = strsize(pw->pw_shell));
911 resp->version_or_size = sz;
917 resp->pw_name_len = pw_name_len;
918 resp->pw_passwd_len = pw_passwd_len;
919 resp->pw_uid = pw->pw_uid;
920 resp->pw_gid = pw->pw_gid;
921 resp->pw_gecos_len = pw_gecos_len;
922 resp->pw_dir_len = pw_dir_len;
923 resp->pw_shell_len = pw_shell_len;
924 p = (char*)(resp + 1);
925 strcpy(p, pw->pw_name); p += pw_name_len;
926 strcpy(p, pw->pw_passwd); p += pw_passwd_len;
927 strcpy(p, pw->pw_gecos); p += pw_gecos_len;
928 strcpy(p, pw->pw_dir); p += pw_dir_len;
929 strcpy(p, pw->pw_shell); p += pw_shell_len;
930 log(L_DEBUG, "sz:%u realsz:%u", sz, p - (char*)resp);
935 typedef struct gr_response_header {
936 uint32_t version_or_size;
938 int32_t gr_name_len; /* strlen(gr->gr_name) + 1; */
939 int32_t gr_passwd_len; /* strlen(gr->gr_passwd) + 1; */
940 int32_t gr_gid; /* gr->gr_gid */
941 int32_t gr_mem_cnt; /* while (gr->gr_mem[gr_mem_cnt]) ++gr_mem_cnt; */
942 /* int32_t gr_mem_len[gr_mem_cnt]; */
943 /* char gr_name[gr_name_len]; */
944 /* char gr_passwd[gr_passwd_len]; */
945 /* char gr_mem[gr_mem_cnt][gr_mem_len[i]]; */
946 /* char gr_gid_str[as_needed]; - huh? */
947 /* char orig_key[as_needed]; - needed?? I don't do this ATM... */
949 glibc adds gr_gid_str, but client doesn't get/use it:
950 writev(3, [{"\2\0\0\0\2\0\0\0\5\0\0\0", 12}, {"root\0", 5}], 2) = 17
951 poll([{fd=3, events=POLLIN|POLLERR|POLLHUP, revents=POLLIN}], 1, 5000) = 1
952 read(3, "\2\0\0\0\1\0\0\0\10\0\0\0\4\0\0\0\0\0\0\0\0\0\0\0", 24) = 24
953 readv(3, [{"", 0}, {"root\0\0\0\0\0\0\0\0", 12}], 2) = 12
956 } gr_response_header;
958 static gr_response_header *marshal_group(struct group *gr)
961 gr_response_header *resp;
963 unsigned sz = sizeof(*resp);
965 sz += strsize(gr->gr_name);
966 sz += strsize(gr->gr_passwd);
968 while (gr->gr_mem[gr_mem_cnt]) {
969 sz += strsize(gr->gr_mem[gr_mem_cnt]);
972 /* for int32_t gr_mem_len[gr_mem_cnt]; */
973 sz += gr_mem_cnt * sizeof(int32_t);
976 resp->version_or_size = sz;
982 resp->gr_name_len = strsize(gr->gr_name);
983 resp->gr_passwd_len = strsize(gr->gr_passwd);
984 resp->gr_gid = gr->gr_gid;
985 resp->gr_mem_cnt = gr_mem_cnt;
986 p = (char*)(resp + 1);
987 /* int32_t gr_mem_len[gr_mem_cnt]; */
989 while (gr->gr_mem[gr_mem_cnt]) {
990 *(uint32_t*)p = strsize(gr->gr_mem[gr_mem_cnt]);
994 /* char gr_name[gr_name_len]; */
995 strcpy(p, gr->gr_name);
996 p += strsize(gr->gr_name);
997 /* char gr_passwd[gr_passwd_len]; */
998 strcpy(p, gr->gr_passwd);
999 p += strsize(gr->gr_passwd);
1000 /* char gr_mem[gr_mem_cnt][gr_mem_len[i]]; */
1002 while (gr->gr_mem[gr_mem_cnt]) {
1003 strcpy(p, gr->gr_mem[gr_mem_cnt]);
1004 p += strsize(gr->gr_mem[gr_mem_cnt]);
1007 log(L_DEBUG, "sz:%u realsz:%u", sz, p - (char*)resp);
1012 typedef struct hst_response_header {
1013 uint32_t version_or_size;
1016 int32_t h_aliases_cnt;
1017 int32_t h_addrtype; /* AF_INET or AF_INET6 */
1018 int32_t h_length; /* 4 or 16 */
1019 int32_t h_addr_list_cnt;
1021 /* char h_name[h_name_len]; - we pad it to 4 bytes */
1022 /* uint32_t h_aliases_len[h_aliases_cnt]; */
1023 /* char h_addr_list[h_addr_list_cnt][h_length]; - every one is the same size [h_length] (4 or 16) */
1024 /* char h_aliases[h_aliases_cnt][h_aliases_len[i]]; */
1025 } hst_response_header;
1027 static hst_response_header *marshal_hostent(struct hostent *h)
1030 hst_response_header *resp;
1031 unsigned h_name_len;
1032 unsigned h_aliases_cnt;
1033 unsigned h_addr_list_cnt;
1034 unsigned sz = sizeof(*resp);
1036 /* char h_name[h_name_len] */
1037 sz += h_name_len = strsize_aligned4(h->h_name);
1038 h_addr_list_cnt = 0;
1039 while (h->h_addr_list[h_addr_list_cnt]) {
1042 /* char h_addr_list[h_addr_list_cnt][h_length] */
1043 sz += h_addr_list_cnt * h->h_length;
1045 while (h->h_aliases[h_aliases_cnt]) {
1046 /* char h_aliases[h_aliases_cnt][h_aliases_len[i]] */
1047 sz += strsize(h->h_aliases[h_aliases_cnt]);
1050 /* uint32_t h_aliases_len[h_aliases_cnt] */
1051 sz += h_aliases_cnt * 4;
1054 resp->version_or_size = sz;
1056 /*resp->found = 0;*/
1057 resp->error = HOST_NOT_FOUND;
1061 resp->h_name_len = h_name_len;
1062 resp->h_aliases_cnt = h_aliases_cnt;
1063 resp->h_addrtype = h->h_addrtype;
1064 resp->h_length = h->h_length;
1065 resp->h_addr_list_cnt = h_addr_list_cnt;
1066 /*resp->error = 0;*/
1067 p = (char*)(resp + 1);
1068 /* char h_name[h_name_len]; */
1069 strcpy(p, h->h_name);
1071 /* uint32_t h_aliases_len[h_aliases_cnt]; */
1073 while (h->h_aliases[h_aliases_cnt]) {
1074 *(uint32_t*)p = strsize(h->h_aliases[h_aliases_cnt]);
1078 /* char h_addr_list[h_addr_list_cnt][h_length]; */
1079 h_addr_list_cnt = 0;
1080 while (h->h_addr_list[h_addr_list_cnt]) {
1081 memcpy(p, h->h_addr_list[h_addr_list_cnt], h->h_length);
1085 /* char h_aliases[h_aliases_cnt][h_aliases_len[i]]; */
1087 while (h->h_aliases[h_aliases_cnt]) {
1088 strcpy(p, h->h_aliases[h_aliases_cnt]);
1089 p += strsize(h->h_aliases[h_aliases_cnt]);
1092 log(L_DEBUG, "sz:%u realsz:%u", sz, p - (char*)resp);
1097 /* Reply to addrinfo query */
1098 typedef struct ai_response_header {
1099 uint32_t version_or_size;
1105 /* char ai_addr[naddrs][4 or 16]; - addrslen bytes in total */
1106 /* char ai_family[naddrs]; - AF_INET[6] each (determines ai_addr[i] length) */
1107 /* char ai_canonname[canonlen]; */
1108 } ai_response_header;
1110 static ai_response_header *obtain_addrinfo(const char *hostname)
1112 struct addrinfo hints;
1113 struct addrinfo *ai;
1114 struct addrinfo *ap;
1115 ai_response_header *resp;
1119 unsigned naddrs = 0;
1120 unsigned addrslen = 0;
1121 unsigned canonlen = 0;
1123 memset(&hints, 0, sizeof(hints));
1124 hints.ai_flags = AI_CANONNAME;
1125 /* kills dups (one for each possible SOCK_xxx) */
1126 /* this matches glibc behavior */
1127 hints.ai_socktype = SOCK_STREAM;
1128 ai = NULL; /* on failure getaddrinfo may leave it as-is */
1129 err = getaddrinfo(hostname, NULL, &hints, &ai);
1133 if (ai->ai_canonname)
1134 sz += canonlen = strsize(ai->ai_canonname);
1138 addrslen += (ap->ai_family == AF_INET ? 4 : 16);
1141 sz += naddrs + addrslen;
1144 resp->version_or_size = sz;
1147 /*resp->found = 0;*/
1151 resp->naddrs = naddrs;
1152 resp->addrslen = addrslen;
1153 resp->canonlen = canonlen;
1154 p = (char*)(resp + 1);
1155 family = p + addrslen;
1158 /* char ai_family[naddrs]; */
1159 *family++ = ap->ai_family;
1160 /* char ai_addr[naddrs][4 or 16]; */
1161 if (ap->ai_family == AF_INET) {
1162 memcpy(p, &(((struct sockaddr_in*)(ap->ai_addr))->sin_addr), 4);
1165 memcpy(p, &(((struct sockaddr_in6*)(ap->ai_addr))->sin6_addr), 16);
1170 /* char ai_canonname[canonlen]; */
1171 if (ai->ai_canonname)
1172 strcpy(family, ai->ai_canonname);
1173 log(L_DEBUG, "sz:%u realsz:%u", sz, family + strsize(ai->ai_canonname) - (char*)resp);
1175 /* glibc 2.3.6 segfaults here sometimes
1176 * (maybe my mistake, fixed by "ai = NULL;" above).
1177 * Since we are in worker and are going to exit anyway, why bother? */
1178 /*freeaddrinfo(ai);*/
1187 /* one 8-element "cacheline" */
1188 typedef user_req *cacheline_t[8];
1189 static unsigned cache_size;
1190 /* Points to cacheline_t cache[cache_size] array, or in other words,
1191 * points to user_req* cache[cache_size][8] array */
1192 static cacheline_t *cache;
1193 static unsigned cached_cnt;
1194 static unsigned cache_access_cnt = 1; /* prevent division by zero */
1195 static unsigned cache_hit_cnt = 1;
1196 static unsigned last_age_time;
1197 static unsigned aging_interval_ms;
1198 static unsigned min_aging_interval_ms;
1200 static response_header *ureq_response(user_req *ureq)
1202 /* Skip query part, find answer part
1203 * (answer is 32-bit aligned) */
1204 return (void*) ((char*)ureq + ((ureq_size(ureq) + 3) & ~3));
1207 /* This hash is supposed to be good for short textual data */
1208 static uint32_t bernstein_hash(void *p, unsigned sz, uint32_t hash)
1212 hash = (32 * hash + hash) ^ *key++;
1217 static void free_refcounted_ureq(user_req **ureqp)
1219 user_req *ureq = *ureqp;
1221 /* (when exactly can this happen?) */
1225 if (!CACHED_ENTRY(ureq))
1228 if (ureq->refcount) {
1230 log(L_DEBUG2, "--%p.refcount=%u", ureq, ureq->refcount);
1232 log(L_DEBUG2, "%p.refcount=0, freeing", ureq);
1238 static user_req **lookup_in_cache(user_req *ureq)
1240 user_req **cacheline;
1244 unsigned ureq_sz = ureq_size(ureq);
1246 /* prevent overflow and division by zero */
1248 if ((int)cache_access_cnt < 0) {
1249 cache_access_cnt = (cache_access_cnt >> 1) + 1;
1250 cache_hit_cnt = (cache_hit_cnt >> 1) + 1;
1253 hash = bernstein_hash(&ureq->key_len, ureq_sz - offsetof(user_req, key_len), ureq->type);
1254 log(L_DEBUG2, "hash:%08x", hash);
1255 hash = hash % cache_size;
1256 cacheline = cache[hash];
1259 for (i = 0; i < 8; i++) {
1260 user_req *cached = CACHE_PTR(cacheline[i]);
1262 if (free_cache == -1)
1266 /* ureq->version is always 2 and is reused in cache
1267 * for other purposes, we need to skip it here */
1268 if (memcmp(&ureq->type, &cached->type, ureq_sz - offsetof(user_req, type)) == 0) {
1269 log(L_DEBUG, "found in cache[%u][%u]", hash, i);
1271 return &cacheline[i];
1275 if (free_cache >= 0) {
1278 log(L_DEBUG, "not found, using free cache[%u][%u]", hash, i);
1282 unsigned oldest_idx = 0;
1283 unsigned oldest_age = 0;
1284 for (i = 0; i < 8; i++) {
1285 unsigned age = cache_age(cacheline[i]);
1286 if (age > oldest_age) {
1291 if (oldest_age == 0) {
1292 /* All entries in cacheline are "future" entries!
1293 * This is very unlikely, but we must still work correctly.
1294 * We call this "fake cache entry".
1295 * The data will be "cached" only for the duration
1296 * of this client's request lifetime.
1298 log(L_DEBUG, "not found, and cache[%u] is full: using fake cache entry", hash);
1302 log(L_DEBUG, "not found, freeing and reusing cache[%u][%u] (age %u)", hash, i, oldest_age);
1303 free_refcounted_ureq(&cacheline[i]);
1306 cacheline[i] = MAKE_FUTURE_PTR(ureq);
1307 return &cacheline[i];
1310 static void age_cache(unsigned free_all, int srv)
1312 user_req **cp = *cache;
1314 unsigned sv = cached_cnt;
1316 log(L_DEBUG, "aging cache, srv:%d, free_all:%u", srv, free_all);
1317 if (srv == -1 || free_all)
1318 aging_interval_ms = INT_MAX;
1321 user_req *cached = *cp;
1322 if (CACHED_ENTRY(cached) && cached != NULL) {
1323 int csrv = type_to_srv[cached->type];
1324 if (srv == -1 || srv == csrv) {
1327 free_refcounted_ureq(cp);
1329 unsigned age = cache_age(cached);
1330 response_header *resp = ureq_response(cached);
1331 unsigned ttl = (resp->found ? config.pttl : config.nttl)[csrv];
1333 log(L_DEBUG2, "freeing: age %u positive %d ttl %u", age, resp->found, ttl);
1335 free_refcounted_ureq(cp);
1336 } else if (srv == -1) {
1338 if (aging_interval_ms > ttl)
1339 aging_interval_ms = ttl;
1346 log(L_INFO, "aged cache, freed:%u, remain:%u", sv - cached_cnt, cached_cnt);
1347 log(L_DEBUG2, "aging interval now %u ms", aging_interval_ms);
1355 /* Spawns a worker and feeds it with user query on stdin */
1356 /* Returns stdout fd of the worker, in blocking mode */
1357 static int create_and_feed_worker(user_req *ureq)
1363 } to_child, to_parent;
1365 /* NB: these pipe fds are in blocking mode and non-CLOEXECed */
1366 xpipe(&to_child.rd);
1367 xpipe(&to_parent.rd);
1370 if (pid < 0) /* error */
1371 perror_and_die("vfork");
1372 if (!pid) { /* child */
1373 char param[sizeof(int)*3 + 2];
1377 close(to_parent.rd);
1378 xmovefd(to_child.rd, 0);
1379 xmovefd(to_parent.wr, 1);
1380 sprintf(param, "%u", debug);
1381 argv[0] = (char*) "worker_nscd";
1384 /* Re-exec ourself, cleaning up all allocated memory.
1385 * fds in parent are marked CLOEXEC and will be closed too
1387 /* Try link name first: it's better to have comm field
1388 * of "nscd" than "exe" (pgrep reported to fail to find us
1389 * by name when comm field contains "exe") */
1390 execve(self_exe_points_to, argv, argv+2);
1391 xexecve("/proc/self/exe", argv, argv+2);
1396 close(to_parent.wr);
1397 /* We do not expect child to block for any noticeably long time,
1398 * and also we expect write to be one-piece one:
1399 * ureq size is <= 1k and pipes are guaranteed to accept
1400 * at least PIPE_BUF at once */
1401 xsafe_write(to_child.wr, ureq, ureq_size(ureq));
1404 close_on_exec(to_parent.rd);
1405 return to_parent.rd;
1408 static user_req *worker_ureq;
1411 static const char *req_str(unsigned type, const char *buf)
1413 if (type == GETHOSTBYADDR) {
1415 in.s_addr = *((uint32_t*)buf);
1416 return inet_ntoa(in);
1418 if (type == GETHOSTBYADDRv6) {
1424 const char *req_str(unsigned type, const char *buf);
1427 static void worker_signal_handler(int sig)
1430 log(L_INFO, "worker:%d got sig:%d while handling req "
1431 "type:%d(%s) key_len:%d '%s'",
1433 worker_ureq->type, typestr[worker_ureq->type],
1434 worker_ureq->key_len,
1435 req_str(worker_ureq->type, worker_ureq->reqbuf)
1438 log(L_INFO, "worker:%d got sig:%d while handling req "
1439 "type:%d key_len:%d",
1441 worker_ureq->type, worker_ureq->key_len);
1446 static void worker(const char *param) NORETURN;
1447 static void worker(const char *param)
1452 debug = atoi(param);
1454 worker_ureq = &ureq; /* for signal handler */
1456 /* Make sure we won't hang, but rather die */
1457 if (WORKER_TIMEOUT_SEC)
1458 alarm(WORKER_TIMEOUT_SEC);
1460 /* NB: fds 0, 1 are in blocking mode */
1462 /* We block here (for a short time) */
1463 /* Due to ureq size < PIPE_BUF read is atomic */
1464 /* No error or size checking: we trust the parent */
1465 safe_read(0, &ureq, sizeof(ureq));
1467 signal(SIGSEGV, worker_signal_handler);
1468 signal(SIGBUS, worker_signal_handler);
1469 signal(SIGILL, worker_signal_handler);
1470 signal(SIGFPE, worker_signal_handler);
1471 signal(SIGABRT, worker_signal_handler);
1473 signal(SIGSTKFLT, worker_signal_handler);
1476 if (ureq.type == GETHOSTBYNAME
1477 || ureq.type == GETHOSTBYNAMEv6
1479 resp = marshal_hostent(
1480 ureq.type == GETHOSTBYNAME
1481 ? gethostbyname(ureq.reqbuf)
1482 : gethostbyname2(ureq.reqbuf, AF_INET6)
1484 } else if (ureq.type == GETHOSTBYADDR
1485 || ureq.type == GETHOSTBYADDRv6
1487 resp = marshal_hostent(gethostbyaddr(ureq.reqbuf, ureq.key_len,
1488 (ureq.type == GETHOSTBYADDR ? AF_INET : AF_INET6)
1490 } else if (ureq.type == GETPWBYNAME) {
1492 log(L_DEBUG2, "getpwnam('%s')", ureq.reqbuf);
1493 pw = getpwnam(ureq.reqbuf);
1494 log(L_DEBUG2, "getpwnam result:%p", pw);
1495 resp = marshal_passwd(pw);
1496 } else if (ureq.type == GETPWBYUID) {
1497 resp = marshal_passwd(getpwuid(atoi(ureq.reqbuf)));
1498 } else if (ureq.type == GETGRBYNAME) {
1499 struct group *gr = getgrnam(ureq.reqbuf);
1500 resp = marshal_group(gr);
1501 } else if (ureq.type == GETGRBYGID) {
1502 struct group *gr = getgrgid(atoi(ureq.reqbuf));
1503 resp = marshal_group(gr);
1504 } else if (ureq.type == GETAI) {
1505 resp = obtain_addrinfo(ureq.reqbuf);
1506 } else /*if (ureq.type == INITGROUPS)*/ {
1507 resp = obtain_initgroups(ureq.reqbuf);
1510 if (!((response_header*)resp)->found) {
1511 /* Parent knows about this special case */
1512 xfull_write(1, resp, 8);
1514 /* Responses can be big (getgrnam("guest") on a big user db),
1515 * we cannot rely on them being atomic. full_write loops
1517 xfull_write(1, resp, ((response_header*)resp)->version_or_size);
1527 static const char *const checked_filenames[] = {
1528 /* Note: compiler adds another \0 byte at the end of each array element,
1529 * so there are TWO \0's there.
1531 [SRV_PASSWD] = "/etc/passwd\0" "/etc/passwd.cache\0" "/etc/shadow\0",
1532 [SRV_GROUP] = "/etc/group\0" "/etc/group.cache\0",
1533 [SRV_HOSTS] = "/etc/hosts\0" "/etc/hosts.cache\0" "/etc/resolv.conf\0" "/etc/nsswitch.conf\0",
1534 /* ("foo.cache" files are maintained by libnss-cache) */
1537 static long checked_status[ARRAY_SIZE(checked_filenames)];
1539 static void check_files(int srv)
1542 const char *file = checked_filenames[srv];
1547 memset(&tsb, 0, sizeof(tsb));
1548 stat(file, &tsb); /* ignore errors */
1549 /* Comparing struct stat's was giving false positives.
1550 * Extracting only those fields which are interesting:
1552 v ^= (long)tsb.st_mtime ^ (long)tsb.st_size ^ (long)tsb.st_ino; /* ^ (long)tsb.st_dev ? */
1553 file += strlen(file) + 1;
1556 if (v != checked_status[srv]) {
1557 checked_status[srv] = v;
1558 log(L_INFO, "detected change in files related to service %d", srv);
1559 age_cache(/*free_all:*/ 1, srv);
1563 /* Returns 1 if we immediately have the answer */
1564 static int handle_client(int i)
1567 user_req *ureq = cinfo[i].ureq;
1568 user_req **cache_pp;
1569 user_req *ureq_and_resp;
1572 log(L_DEBUG, "version:%d type:%d(%s) key_len:%d '%s'",
1573 ureq->version, ureq->type,
1574 ureq->type < ARRAY_SIZE(typestr) ? typestr[ureq->type] : "?",
1575 ureq->key_len, req_str(ureq->type, ureq->reqbuf));
1578 if (ureq->version != NSCD_VERSION) {
1579 log(L_INFO, "wrong version");
1583 if (ureq->key_len > sizeof(ureq->reqbuf)) {
1584 log(L_INFO, "bogus key_len %u - ignoring", ureq->key_len);
1588 if (cinfo[i].bytecnt < USER_HDR_SIZE + ureq->key_len) {
1589 log(L_INFO, "read %d, need to read %d",
1590 cinfo[i].bytecnt, USER_HDR_SIZE + ureq->key_len);
1591 return 0; /* more to read */
1593 if (cinfo[i].bytecnt > USER_HDR_SIZE + ureq->key_len) {
1594 log(L_INFO, "read overflow: %u > %u",
1595 (int)cinfo[i].bytecnt, (int)(USER_HDR_SIZE + ureq->key_len));
1599 if (unsupported_ureq_type(ureq->type)) {
1600 /* We don't know this request. Just close the connection.
1601 * (glibc client interprets this like "not supported by this nscd")
1602 * Happens very often, thus DEBUG, not INFO */
1603 log(L_DEBUG, "unsupported query, dropping");
1608 hex_dump(cinfo[i].ureq, cinfo[i].bytecnt);
1610 if (ureq->type == SHUTDOWN
1611 || ureq->type == INVALIDATE
1614 struct ucred caller;
1615 socklen_t optlen = sizeof(caller);
1616 if (getsockopt(pfd[i].fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0) {
1617 log(L_INFO, "ignoring special request - cannot get caller's id: %s", strerror(errno));
1621 if (caller.uid != 0) {
1622 log(L_INFO, "special request from non-root - ignoring");
1627 if (ureq->type == SHUTDOWN) {
1628 log(L_INFO, "got shutdown request, exiting");
1631 if (!ureq->key_len || ureq->reqbuf[ureq->key_len - 1]) {
1632 log(L_INFO, "malformed invalidate request - ignoring");
1636 log(L_INFO, "got invalidate request, flushing cache");
1637 /* Frees entire cache. TODO: replace -1 with service (in ureq->reqbuf) */
1638 age_cache(/*free_all:*/ 1, -1);
1643 srv = type_to_srv[ureq->type];
1644 if (!config.srv_enable[srv]) {
1645 log(L_INFO, "service %d is disabled, dropping", srv);
1650 if (ureq->type != GETHOSTBYADDR
1651 && ureq->type != GETHOSTBYADDRv6
1653 if (ureq->key_len && ureq->reqbuf[ureq->key_len - 1] != '\0') {
1654 log(L_INFO, "badly terminated buffer");
1660 if (config.check_files[srv]) {
1664 cache_pp = lookup_in_cache(ureq);
1665 ureq_and_resp = cache_pp ? *cache_pp : NULL;
1667 if (ureq_and_resp) {
1668 if (CACHED_ENTRY(ureq_and_resp)) {
1669 /* Found. Save ptr to response into cinfo and return */
1670 response_header *resp = ureq_response(ureq_and_resp);
1671 unsigned sz = resp->version_or_size;
1673 log(L_DEBUG, "sz:%u", sz);
1675 /* cache shouldn't free it under us! */
1676 if (++ureq_and_resp->refcount == 0) {
1677 error_and_die("BUG! ++%p.refcount rolled over to 0, exiting", ureq_and_resp);
1679 log(L_DEBUG2, "++%p.refcount=%u", ureq_and_resp, ureq_and_resp->refcount);
1680 pfd[i].events = POLLOUT; /* we want to write out */
1681 cinfo[i].resptr = ureq_and_resp;
1682 /*cinfo[i].respos = 0; - already is */
1683 /* prevent future matches with anything */
1684 cinfo[i].cache_pp = (void *) 1;
1685 return 1; /* "ready to write data out to client" */
1688 /* Not found. Remember a pointer where it will appear */
1689 cinfo[i].cache_pp = cache_pp;
1691 /* If it does not point to our own ureq buffer... */
1692 if (CACHE_PTR(ureq_and_resp) != ureq) {
1693 /* We are not the first client who wants this */
1694 log(L_DEBUG, "another request is in progress (%p), waiting for its result", ureq_and_resp);
1695 MARK_PTR_SHARED(cache_pp); /* "please inform us when it's ready" */
1696 /* "we do not wait for client anymore" */
1697 cinfo[i].client_fd = pfd[i].fd;
1698 /* Don't wait on fd. Worker response will unblock us */
1702 /* else: lookup_in_cache inserted (ureq & 1) into *cache_pp:
1703 * we are the first client to miss on this ureq. */
1706 /* Start worker thread */
1707 log(L_DEBUG, "stored %p in cache, starting a worker", ureq_and_resp);
1708 /* Now we will wait on worker's fd, not client's! */
1709 cinfo[i].client_fd = pfd[i].fd;
1710 pfd[i].fd = create_and_feed_worker(ureq);
1714 static void prepare_for_writeout(unsigned i, user_req *cached)
1716 log(L_DEBUG2, "client %u: data is ready at %p", i, cached);
1718 if (cinfo[i].client_fd) {
1719 pfd[i].fd = cinfo[i].client_fd;
1720 cinfo[i].client_fd = 0; /* "we don't wait for worker reply" */
1722 pfd[i].events = POLLOUT;
1724 /* Writeout position etc */
1725 cinfo[i].resptr = cached;
1726 /*cinfo[i].respos = 0; - already is */
1727 /* if worker took some time to get info (e.g. DNS query),
1728 * prevent client timeout from triggering at once */
1729 cinfo[i].started_ms = g_now_ms;
1732 /* Worker seems to be ready to write the response.
1733 * When we return, response is fully read and stored in cache,
1734 * worker's fd is closed, pfd[i] and cinfo[i] are updated. */
1735 static void handle_worker_response(int i)
1737 struct { /* struct response_header + small body */
1738 uint32_t version_or_size;
1744 response_header *resp;
1745 unsigned sz, resp_sz;
1746 unsigned ureq_sz_aligned;
1749 ureq = cinfo[i].ureq;
1750 ureq_sz_aligned = (char*)ureq_response(ureq) - (char*)ureq;
1752 sz = full_read(pfd[i].fd, &sz_and_found, sizeof(sz_and_found));
1754 /* worker was killed? */
1755 log(L_DEBUG, "worker gave short reply:%u < 8", sz);
1759 resp_sz = sz_and_found.version_or_size;
1760 if (resp_sz < sz || resp_sz > 0x0fffffff) { /* 256 mb */
1761 error("BUG: bad size from worker:%u", resp_sz);
1765 /* Create new block of cached info */
1766 cached = xzalloc(ureq_sz_aligned + resp_sz);
1767 log(L_DEBUG2, "xzalloc(%u):%p sz:%u resp_sz:%u found:%u",
1768 ureq_sz_aligned + resp_sz, cached,
1770 (int)sz_and_found.found
1772 resp = (void*) (((char*) cached) + ureq_sz_aligned);
1773 memcpy(cached, ureq, ureq_size(ureq));
1774 memcpy(resp, &sz_and_found, sz);
1775 if (sz_and_found.found && resp_sz > sz) {
1776 /* We need to read data only if it's found
1777 * (otherwise worker sends only 8 bytes).
1779 * Replies can be big (getgrnam("guest") on a big user db),
1780 * we cannot rely on them being atomic. However, we know
1781 * that worker _always_ gives reply in one full_write(),
1782 * so we loop and read it all
1783 * (looping is implemented inside full_read())
1785 if (full_read(pfd[i].fd, ((char*) resp) + sz, resp_sz - sz) != resp_sz - sz) {
1786 /* worker was killed? */
1787 log(L_DEBUG, "worker gave short reply, free(%p)", cached);
1794 set_cache_timestamp(cached);
1795 hex_dump(resp, resp_sz);
1802 user_req **cache_pp = cinfo[i].cache_pp;
1803 if (cache_pp != NULL) { /* if not a fake entry */
1806 if (CACHE_SHARED(ureq)) {
1807 /* Other clients wait for this response too,
1808 * wake them (and us) up and set refcount = no_of_clients */
1811 for (j = 2; j < num_clients; j++) {
1812 if (cinfo[j].cache_pp == cache_pp) {
1813 /* This client uses the same cache entry */
1815 /* prevent future matches with anything */
1816 cinfo[j].cache_pp = (void *) 1;
1817 prepare_for_writeout(j, cached);
1822 /* prevent future matches with anything */
1823 cinfo[i].cache_pp = (void *) 1;
1827 prepare_for_writeout(i, cached);
1829 /* cache shouldn't free it under us! */
1831 cached->refcount = ref;
1832 log(L_DEBUG2, "%p.refcount=%u", cached, ref);
1834 aging_interval_ms = min_aging_interval_ms;
1837 static void main_loop(void)
1839 /* 1/2 of smallest negative TTL */
1840 min_aging_interval_ms = config.nttl[0];
1841 if (min_aging_interval_ms > config.nttl[1]) min_aging_interval_ms = config.nttl[1];
1842 if (min_aging_interval_ms > config.nttl[2]) min_aging_interval_ms = config.nttl[2];
1843 min_aging_interval_ms = (min_aging_interval_ms / 2) | 1;
1844 aging_interval_ms = min_aging_interval_ms;
1850 r = SMALL_POLL_TIMEOUT_MS;
1851 if (num_clients <= 2 && !cached_cnt)
1852 r = -1; /* infinite */
1853 else if (num_clients < max_reqnum)
1854 r = aging_interval_ms;
1855 #if 0 /* Debug: leak detector */
1857 static unsigned long long cnt;
1858 static unsigned long low_malloc = -1L;
1859 static unsigned long low_sbrk = -1L;
1860 void *p = malloc(540); /* should not be too small */
1863 if ((unsigned long)p < low_malloc)
1864 low_malloc = (unsigned long)p;
1865 if ((unsigned long)s < low_sbrk)
1866 low_sbrk = (unsigned long)s;
1867 log(L_INFO, "poll %llu (%d ms). clients:%u cached:%u %u/%u malloc:%p (%lu), sbrk:%p (%lu)",
1868 cnt, r, num_clients, cached_cnt, cache_hit_cnt, cache_access_cnt,
1869 p, (unsigned long)p - low_malloc,
1870 s, (unsigned long)s - low_sbrk);
1874 log(L_DEBUG, "poll %d ms. clients:%u cached:%u hit ratio:%u/%u",
1875 r, num_clients, cached_cnt, cache_hit_cnt, cache_access_cnt);
1878 r = poll(pfd, num_clients, r);
1879 log(L_DEBUG2, "poll returns %d", r);
1882 perror_and_die("poll");
1886 /* Everything between polls never sleeps.
1887 * There is no blocking I/O (except when we talk to worker thread
1888 * which is guaranteed to not block us for long) */
1890 g_now_ms = monotonic_ms();
1892 goto skip_fd_checks;
1894 for (i = 0; i < 2; i++) {
1896 if (!pfd[i].revents)
1898 /* pfd[i].revents = 0; - not needed */
1899 cfd = accept(pfd[i].fd, NULL, NULL);
1901 /* odd... poll() says we can accept but accept failed? */
1902 log(L_DEBUG2, "accept failed with %s", strerror(errno));
1907 /* x[num_clients] is next free element, taking it */
1908 log(L_DEBUG2, "new client %d, fd %d", num_clients, cfd);
1909 pfd[num_clients].fd = cfd;
1910 pfd[num_clients].events = POLLIN;
1911 /* this will make us do read() in next for() loop: */
1912 pfd[num_clients].revents = POLLIN;
1913 memset(&cinfo[num_clients], 0, sizeof(cinfo[num_clients]));
1914 /* cinfo[num_clients].bytecnt = 0; - done */
1915 cinfo[num_clients].started_ms = g_now_ms;
1916 cinfo[num_clients].bufidx = alloc_buf_no();
1917 cinfo[num_clients].ureq = bufno2buf(cinfo[num_clients].bufidx);
1919 if (num_clients >= max_reqnum) {
1920 /* stop accepting new connects for now */
1921 pfd[0].events = pfd[0].revents = 0;
1922 pfd[1].events = pfd[1].revents = 0;
1925 for (; i < num_clients; i++) {
1926 if (!pfd[i].revents)
1928 log(L_DEBUG2, "pfd[%d].revents:0x%x", i, pfd[i].revents);
1929 /* pfd[i].revents = 0; - not needed */
1931 /* "Write out result" case */
1932 if (pfd[i].revents == POLLOUT) {
1933 response_header *resp;
1935 if (!cinfo[i].resptr) {
1936 /* corner case: worker gave bad response earlier */
1941 resp = ureq_response(cinfo[i].resptr);
1942 resp_sz = resp->version_or_size;
1943 resp->version_or_size = NSCD_VERSION;
1945 r = safe_write(pfd[i].fd, ((char*) resp) + cinfo[i].respos, resp_sz - cinfo[i].respos);
1946 resp->version_or_size = resp_sz;
1948 if (r < 0 && errno == EAGAIN) {
1949 log(L_DEBUG, "client %u: EAGAIN on write", i);
1952 if (r <= 0) { /* client isn't there anymore */
1953 log(L_DEBUG, "client %u is gone (write returned:%d err:%s)",
1954 i, r, errno ? strerror(errno) : "-");
1958 cinfo[i].respos += r;
1959 if (cinfo[i].respos >= resp_sz) {
1960 /* We wrote everything */
1961 /* No point in trying to get next request, it won't come.
1962 * glibc 2.4 client closes its end after each request,
1963 * without testing for EOF from server. strace:
1965 * read(3, "www.google.com\0\0", 16) = 16
1968 log(L_DEBUG, "client %u: sent answer %u/%u/%u bytes", i, r, cinfo[i].respos, resp_sz);
1972 log(L_DEBUG, "client %u: sent partial answer %u/%u/%u bytes", i, r, cinfo[i].respos, resp_sz);
1976 /* "Read reply from worker" case. Worker may be
1977 * already dead, revents may contain other bits too
1979 if ((pfd[i].revents & POLLIN) && cinfo[i].client_fd) {
1980 log(L_DEBUG, "reading response for client %u", i);
1981 handle_worker_response(i);
1982 /* We can immediately try to write a response
1987 /* POLLHUP means pfd[i].fd is closed by peer.
1988 * POLLHUP+POLLOUT[+POLLERR] is seen when we writing out
1989 * and see that pfd[i].fd is closed by peer (for example,
1990 * it happens when client's result buffer is too small
1991 * to receive a huge GETGRBYNAME result).
1993 if ((pfd[i].revents & ~(POLLOUT+POLLERR)) == POLLHUP) {
1994 int is_client = (cinfo[i].client_fd == 0 || cinfo[i].client_fd == pfd[i].fd);
1995 log(L_INFO, "%s %u disappeared (got POLLHUP on fd %d)",
1996 is_client ? "client" : "worker",
2003 /* Read worker output anyway, error handling
2004 * in that function deals with short read.
2005 * Simply closing client is wrong: it leaks
2006 * shared future entries. */
2007 handle_worker_response(i);
2012 /* All strange and unexpected cases */
2013 if (pfd[i].revents != POLLIN) {
2014 /* Not just "can read", but some other bits are there */
2015 log(L_INFO, "client %u revents is strange:0x%x", i, pfd[i].revents);
2020 /* "Read request from client" case */
2021 r = safe_read(pfd[i].fd, (char*)(cinfo[i].ureq) + cinfo[i].bytecnt, MAX_USER_REQ_SIZE - cinfo[i].bytecnt);
2023 log(L_DEBUG2, "error reading from client: %s", strerror(errno));
2024 if (errno == EAGAIN)
2030 log(L_INFO, "premature EOF from client, dropping");
2034 cinfo[i].bytecnt += r;
2035 if (cinfo[i].bytecnt >= sizeof(user_req_header)) {
2036 if (handle_client(i)) {
2037 /* Response is found in cache! */
2041 } /* for each client[2..num_clients-1] */
2045 if ((g_now_ms - last_age_time) >= aging_interval_ms) {
2046 last_age_time = g_now_ms;
2047 age_cache(/*free_all:*/ 0, -1);
2050 /* Close timed out client connections */
2051 for (i = 2; i < num_clients; i++) {
2052 if (pfd[i].fd != 0 /* not closed yet? */
2053 && cinfo[i].client_fd == 0 /* do we still wait for client, not worker? */
2054 && (g_now_ms - cinfo[i].started_ms) > CLIENT_TIMEOUT_MS
2056 log(L_INFO, "timed out waiting for client %u (%u ms), dropping",
2057 i, (unsigned)(g_now_ms - cinfo[i].started_ms));
2065 /* We closed at least one client, coalesce pfd[], cinfo[] */
2066 if (min_closed + cnt_closed >= num_clients) {
2067 /* clients [min_closed..num_clients-1] are all closed */
2068 /* log(L_DEBUG, "taking shortcut"); - almost always happens */
2073 while (i < num_clients) {
2077 if (++i >= num_clients)
2081 cinfo[j++] = cinfo[i++];
2085 num_clients -= cnt_closed;
2086 log(L_DEBUG, "removing %d closed clients. clients:%d", cnt_closed, num_clients);
2087 min_closed = INT_MAX;
2089 /* start accepting new connects */
2090 pfd[0].events = POLLIN;
2091 pfd[1].events = POLLIN;
2100 #define NSCD_PIDFILE "/var/run/nscd/nscd.pid"
2101 #define NSCD_DIR "/var/run/nscd"
2102 #define NSCD_SOCKET "/var/run/nscd/socket"
2103 #define NSCD_SOCKET_OLD "/var/run/.nscd_socket"
2105 static smallint wrote_pidfile;
2107 static void cleanup_on_signal(int sig)
2110 unlink(NSCD_PIDFILE);
2111 unlink(NSCD_SOCKET_OLD);
2112 unlink(NSCD_SOCKET);
2116 static void write_pid(void)
2118 FILE *pid = fopen(NSCD_PIDFILE, "w");
2121 fprintf(pid, "%d\n", getpid());
2126 /* Open a listening nscd server socket */
2127 static int open_socket(const char *name)
2129 struct sockaddr_un sun;
2130 int sock = socket(AF_UNIX, SOCK_STREAM, 0);
2132 perror_and_die("cannot create unix domain socket");
2134 close_on_exec(sock);
2135 sun.sun_family = AF_UNIX;
2136 strcpy(sun.sun_path, name);
2138 if (bind(sock, (struct sockaddr *) &sun, sizeof(sun)) < 0)
2139 perror_and_die("bind(%s)", name);
2140 if (chmod(name, 0666) < 0)
2141 perror_and_die("chmod(%s)", name);
2142 if (listen(sock, (max_reqnum/8) | 1) < 0)
2143 perror_and_die("listen");
2147 static const struct option longopt[] = {
2148 /* name, has_arg, int *flag, int val */
2149 { "debug" , no_argument , NULL, 'd' },
2150 { "config-file", required_argument, NULL, 'f' },
2151 { "invalidate" , required_argument, NULL, 'i' },
2152 { "shutdown" , no_argument , NULL, 'K' },
2153 { "nthreads" , required_argument, NULL, 't' },
2154 { "version" , no_argument , NULL, 'V' },
2155 { "help" , no_argument , NULL, '?' },
2156 { "usage" , no_argument , NULL, '?' },
2157 /* just exit(0). TODO: "test" connect? */
2158 { "statistic" , no_argument , NULL, 'g' },
2159 { "secure" , no_argument , NULL, 'S' }, /* ? */
2163 static const char *const help[] = {
2164 "Do not daemonize; log to stderr (-dd: more verbosity)",
2165 "File to read configuration from",
2167 "Shut the server down",
2168 "Serve N requests in parallel",
2172 static void print_help_and_die(void)
2174 const struct option *opt = longopt;
2175 const char *const *h = help;
2177 puts("Usage: nscd [OPTION...]\n"
2178 "Name Service Cache Daemon\n");
2180 printf("\t" "-%c,--%-11s %s\n", opt->val, opt->name, *h);
2183 } while (opt->val != '?');
2187 static char *skip_service(int *srv, const char *s)
2189 if (strcmp("passwd", s) == 0) {
2192 } else if (strcmp("group", s) == 0) {
2194 } else if (strcmp("hosts", s) == 0) {
2199 return skip_whitespace(s + 6);
2202 static void handle_null(const char *str, int srv) {}
2204 static void handle_logfile(const char *str, int srv)
2206 config.logfile = xstrdup(str);
2209 static void handle_debuglvl(const char *str, int srv)
2211 debug |= (uint8_t) getnum(str);
2214 static void handle_threads(const char *str, int srv)
2216 unsigned n = getnum(str);
2221 static void handle_user(const char *str, int srv)
2223 config.user = xstrdup(str);
2226 static void handle_enable(const char *str, int srv)
2228 config.srv_enable[srv] = ((str[0] | 0x20) == 'y');
2231 static void handle_pttl(const char *str, int srv)
2233 config.pttl[srv] = getnum(str);
2236 static void handle_nttl(const char *str, int srv)
2238 config.nttl[srv] = getnum(str);
2241 static void handle_size(const char *str, int srv)
2243 config.size[srv] = getnum(str);
2246 static void handle_chfiles(const char *str, int srv)
2248 config.check_files[srv] = ((str[0] | 0x20) == 'y');
2251 static void parse_conffile(const char *conffile, int warn)
2253 static const struct confword {
2255 void (*handler)(const char *, int);
2257 { "_" "logfile" , handle_logfile },
2258 { "_" "debug-level" , handle_debuglvl },
2259 { "_" "threads" , handle_threads },
2260 { "_" "max-threads" , handle_threads },
2261 { "_" "server-user" , handle_user },
2262 /* ignore: any user can stat */
2263 { "_" "stat-user" , handle_null },
2264 { "_" "paranoia" , handle_null }, /* ? */
2265 /* ignore: design goal is to never crash/hang */
2266 { "_" "reload-count" , handle_null },
2267 { "_" "restart-interval" , handle_null },
2268 { "S" "enable-cache" , handle_enable },
2269 { "S" "positive-time-to-live" , handle_pttl },
2270 { "S" "negative-time-to-live" , handle_nttl },
2271 { "S" "suggested-size" , handle_size },
2272 { "S" "check-files" , handle_chfiles },
2273 { "S" "persistent" , handle_null }, /* ? */
2274 { "S" "shared" , handle_null }, /* ? */
2275 { "S" "auto-propagate" , handle_null }, /* ? */
2280 FILE *file = fopen(conffile, "r");
2284 if (conffile != default_conffile)
2285 perror_and_die("cannot open %s", conffile);
2289 while (fgets(buf, sizeof(buf), file) != NULL) {
2290 const struct confword *word;
2292 int len = strlen(buf);
2296 if (buf[len-1] != '\n') {
2297 if (len >= sizeof(buf) - 1)
2298 error_and_die("%s:%d: line is too long", conffile, lineno);
2299 len++; /* last line, not terminated by '\n' */
2303 p = strchr(buf, '#');
2307 p = skip_whitespace(buf);
2310 *skip_non_whitespace(p) = '\0';
2313 if (strcmp(word->str + 1, p) == 0) {
2315 p = skip_whitespace(p + strlen(p) + 1);
2316 *skip_non_whitespace(p) = '\0';
2317 if (word->str[0] == 'S') {
2318 char *p2 = skip_service(&srv, p);
2321 error("%s:%d: ignoring unknown service name '%s'", conffile, lineno, p);
2325 *skip_non_whitespace(p) = '\0';
2327 word->handler(p, srv);
2333 error("%s:%d: ignoring unknown directive '%s'", conffile, lineno, p);
2342 /* "XX,XX[,XX]..." -> gid_t[] */
2343 static gid_t* env_U_to_uid_and_gids(const char *str, int *sizep)
2354 ug = xmalloc(ng * sizeof(ug[0]));
2362 *gp++ = strtoul(sp, (char**)&sp, 16);
2363 if (errno || (*sp != ',' && *sp != '\0'))
2364 error_and_die("internal error");
2375 static char* user_to_env_U(const char *user)
2382 pw = getpwnam(user);
2384 perror_and_die("user '%s' is not known", user);
2387 /* 0th cell will be used for uid */
2388 ug = xmalloc((1 + ng) * sizeof(ug[0]));
2389 if (getgrouplist(user, pw->pw_gid, &ug[1], &ng) < 0) {
2390 ug = xrealloc(ug, (1 + ng) * sizeof(ug[0]));
2391 if (getgrouplist(user, pw->pw_gid, &ug[1], &ng) < 0)
2392 perror_and_die("can't get groups of user '%s'", user);
2397 /* How much do we need for "-Uxx,xx[,xx]..." string? */
2398 ug_str = xmalloc((sizeof(unsigned long)+1)*2 * ng + 3);
2404 sp += sprintf(sp, "%lx,", (unsigned long)(*gp++));
2413 /* not static - don't inline me, compiler! */
2414 void readlink_self_exe(void);
2415 void readlink_self_exe(void)
2417 char buf[PATH_MAX + 1];
2418 ssize_t sz = readlink("/proc/self/exe", buf, sizeof(buf) - 1);
2420 perror_and_die("readlink %s failed", "/proc/self/exe");
2422 self_exe_points_to = xstrdup(buf);
2426 static void special_op(const char *arg) NORETURN;
2427 static void special_op(const char *arg)
2429 static const user_req_header ureq = { NSCD_VERSION, SHUTDOWN, 0 };
2431 struct sockaddr_un addr;
2434 sock = socket(PF_UNIX, SOCK_STREAM, 0);
2436 error_and_die("cannot create AF_UNIX socket");
2438 addr.sun_family = AF_UNIX;
2439 strcpy(addr.sun_path, NSCD_SOCKET);
2440 if (connect(sock, (struct sockaddr *) &addr, sizeof(addr)) < 0)
2441 error_and_die("cannot connect to %s", NSCD_SOCKET);
2443 if (!arg) { /* shutdown */
2444 xfull_write(sock, &ureq, sizeof(ureq));
2445 printf("sent shutdown request, exiting\n");
2446 } else { /* invalidate */
2447 size_t arg_len = strlen(arg) + 1;
2448 char buf[sizeof(user_req_header) + arg_len];
2449 user_req_header *req = (void*) buf;
2451 req->version = NSCD_VERSION;
2452 req->type = INVALIDATE;
2453 req->key_len = arg_len;
2454 memcpy(req + 1, arg, arg_len);
2455 xfull_write(sock, req, sizeof(*req) + arg_len);
2456 printf("sent invalidate(%s) request, exiting\n", arg);
2462 /* Callback for glibc-2.15 */
2464 static void do_nothing(size_t dbidx, struct traced_file *finfo)
2466 /* nscd from glibc-2.15 does something like this:
2467 if (!dbs[dbidx].enabled || !dbs[dbidx].check_file)
2469 add_file_to_watch_list(finfo->fname);
2473 /* This internal glibc function is called to disable trying to contact nscd.
2474 * We _are_ nscd, so we need to do the lookups, and not recurse.
2475 * Until 2.14, this function was taking no parameters.
2476 * In 2.15, it takes a function pointer from hell.
2478 void __nss_disable_nscd(void (*hell)(size_t, struct traced_file*));
2481 int main(int argc, char **argv)
2486 const char *conffile;
2488 /* make sure we don't get recursive calls */
2489 __nss_disable_nscd(do_nothing);
2491 if (argv[0][0] == 'w') /* "worker_nscd" */
2497 /* Make sure stdio is not closed */
2498 n = xopen3("/dev/null", O_RDWR, 0);
2501 /* Close unexpected open file descriptors */
2502 n |= 0xff; /* start from at least fd# 255 */
2507 /* For idiotic kernels which disallow "exec /proc/self/exe" */
2508 readlink_self_exe();
2510 conffile = default_conffile;
2512 while ((n = getopt_long(argc, argv, "df:i:KVgt:", longopt, NULL)) != -1) {
2523 special_op(optarg); /* exits */
2525 /* shutdown server */
2526 special_op(NULL); /* exits */
2528 puts("unscd - nscd which does not hang, v."PROGRAM_VERSION);
2534 max_reqnum = getnum(optarg);
2540 print_help_and_die();
2543 /* Multiple -d can bump debug regardless of nscd.conf:
2544 * no -d or -d: 0, -dd: 1,
2545 * -ddd: 3, -dddd: 7, -ddddd: 15
2548 debug |= (((1U << opt_d_cnt) >> 1) - 1) & L_ALL;
2550 env_U = getenv("U");
2551 /* Avoid duplicate warnings if $U exists */
2552 parse_conffile(conffile, /* warn? */ (env_U == NULL));
2554 /* I have a user report of (broken?) ldap nss library
2555 * opening and never closing a socket to a ldap server,
2556 * even across fork() and exec(). This messes up
2557 * worker child's operations for the reporter.
2559 * This strenghtens my belief that nscd _must not_ trust
2560 * nss libs to be written correctly.
2562 * Here, we need to jump through the hoops to guard against
2563 * such problems. If config file has server-user setting, we need
2564 * to setgroups + setuid. For that, we need to get uid and gid vector.
2565 * And that means possibly using buggy nss libs.
2566 * We will do it here, but then we will re-exec, passing uid+gids
2567 * in an environment variable.
2569 if (!env_U && config.user) {
2570 /* user_to_env_U() does getpwnam and getgrouplist */
2571 if (putenv(user_to_env_U(config.user)))
2572 error_and_die("out of memory");
2573 /* fds leaked by nss will be closed by execed copy */
2574 execv(self_exe_points_to, argv);
2575 xexecve("/proc/self/exe", argv, environ);
2578 /* Allocate dynamically sized stuff */
2579 max_reqnum += 2; /* account for 2 first "fake" clients */
2580 if (max_reqnum < 8) max_reqnum = 8; /* sanitize */
2581 /* Since refcount is a byte, can't serve more than 255-2 clients
2582 * at once. The rest will block in connect() */
2583 if (max_reqnum > 0xff) max_reqnum = 0xff;
2584 client_buf = xzalloc(max_reqnum * sizeof(client_buf[0]));
2585 busy_cbuf = xzalloc(max_reqnum * sizeof(busy_cbuf[0]));
2586 pfd = xzalloc(max_reqnum * sizeof(pfd[0]));
2587 cinfo = xzalloc(max_reqnum * sizeof(cinfo[0]));
2589 cache_size = (config.size[0] + config.size[1] + config.size[2]) / 8;
2590 if (cache_size < 8) cache_size = 8; /* 8*8 = 64 entries min */
2591 if (cache_size > 0xffff) cache_size = 0xffff; /* 8*64k entries max */
2592 cache_size |= 1; /* force it to be odd */
2593 cache = xzalloc(cache_size * sizeof(cache[0]));
2595 /* Register cleanup hooks */
2596 signal(SIGINT, cleanup_on_signal);
2597 signal(SIGTERM, cleanup_on_signal);
2598 /* Don't die if a client closes a socket on us */
2599 signal(SIGPIPE, SIG_IGN);
2600 /* Avoid creating zombies */
2601 signal(SIGCHLD, SIG_IGN);
2603 /* Ensure workers don't have SIGALRM ignored */
2604 signal(SIGALRM, SIG_DFL);
2607 if (mkdir(NSCD_DIR, 0755) == 0) {
2608 /* prevent bad mode of NSCD_DIR if umask is e.g. 077 */
2609 chmod(NSCD_DIR, 0755);
2611 pfd[0].fd = open_socket(NSCD_SOCKET);
2612 pfd[1].fd = open_socket(NSCD_SOCKET_OLD);
2613 pfd[0].events = POLLIN;
2614 pfd[1].events = POLLIN;
2616 if (debug & D_DAEMON) {
2617 daemon(/*nochdir*/ 1, /*noclose*/ 0);
2618 if (config.logfile) {
2619 /* nochdir=1: relative paths still work as expected */
2620 xmovefd(xopen3(config.logfile, O_WRONLY|O_CREAT|O_TRUNC, 0666), 2);
2623 debug = 0; /* why bother? it's /dev/null'ed anyway */
2625 chdir("/"); /* compat */
2628 /* ignore job control signals */
2629 signal(SIGTTOU, SIG_IGN);
2630 signal(SIGTTIN, SIG_IGN);
2631 signal(SIGTSTP, SIG_IGN);
2634 log(L_ALL, "unscd v" PROGRAM_VERSION ", debug level 0x%x", debug & L_ALL);
2635 log(L_DEBUG, "max %u requests in parallel", max_reqnum - 2);
2636 log(L_DEBUG, "cache size %u x 8 entries", cache_size);
2640 gid_t *ug = env_U_to_uid_and_gids(env_U, &size);
2642 if (setgroups(size - 1, &ug[1]) || setgid(ug[1]))
2643 perror_and_die("cannot set groups for user '%s'", config.user);
2646 perror_and_die("cannot set uid to %u", (unsigned)(ug[0]));
2650 for (n = 0; n < 3; n++) {
2651 log(L_DEBUG, "%s cache enabled:%u pttl:%u nttl:%u",
2653 config.srv_enable[n],
2656 config.pttl[n] *= 1000;
2657 config.nttl[n] *= 1000;