1 /* This file is part of unscd, a complete nscd replacement.
2 * Copyright (C) 2007-2012 Denys Vlasenko. Licensed under the GPL version 2.
5 /* unscd is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; version 2 of the License.
9 * unscd is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You can download the GNU General Public License from the GNU website
15 * at http://www.gnu.org/ or write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
21 gcc -Wall -Wunused-parameter -Os -o nscd nscd.c
23 gcc -fomit-frame-pointer -Wl,--sort-section -Wl,alignment -Wl,--sort-common
28 nscd problems are not exactly unheard of. Over the years, there were
29 quite a bit of bugs in it. This leads people to invent babysitters
30 which restart crashed/hung nscd. This is ugly.
32 After looking at nscd source in glibc I arrived to the conclusion
33 that its design is contributing to this significantly. Even if nscd's
34 code is 100.00% perfect and bug-free, it can still suffer from bugs
35 in libraries it calls.
37 As designed, it's a multithreaded program which calls NSS libraries.
38 These libraries are not part of libc, they may be provided
39 by third-party projects (samba, ldap, you name it).
41 Thus nscd cannot be sure that libraries it calls do not have memory
42 or file descriptor leaks and other bugs.
44 Since nscd is multithreaded program with single shared cache,
45 any resource leak in any NSS library has cumulative effect.
46 Even if a NSS library leaks a file descriptor 0.01% of the time,
47 this will make nscd crash or hang after some time.
49 Of course bugs in NSS .so modules should be fixed, but meanwhile
50 I do want nscd which does not crash or lock up.
52 So I went ahead and wrote a replacement.
54 It is a single-threaded server process which offloads all NSS
55 lookups to worker children (not threads, but fully independent
56 processes). Cache hits are handled by parent. Only cache misses
57 start worker children. This design is immune against
58 resource leaks and hangs in NSS libraries.
60 It is also many times smaller.
62 Currently (v0.36) it emulates glibc nscd pretty closely
63 (handles same command line flags and config file), and is moderately tested.
65 Please note that as of 2008-08 it is not in wide use (yet?).
66 If you have trouble compiling it, see an incompatibility with
67 "standard" one or experience hangs/crashes, please report it to
68 vda.linux@googlemail.com
70 ***********************************************************************/
72 /* Make struct ucred appear in sys/socket.h */
74 /* For all good things */
91 #include <sys/socket.h>
93 #include <sys/types.h>
99 /* For inet_ntoa (for debug build only) */
100 #include <arpa/inet.h>
103 * 0.21 add SEGV reporting to worker
104 * 0.22 don't do freeaddrinfo() in GETAI worker, it's crashy
105 * 0.23 add parameter parsing
106 * 0.24 add conf file parsing, not using results yet
107 * 0.25 used some of conf file settings (not tested)
108 * 0.26 almost all conf file settings are wired up
109 * 0.27 a bit more of almost all conf file settings are wired up
110 * 0.28 optimized cache aging
111 * 0.29 implemented invalidate and shutdown options
112 * 0.30 fixed buglet (sizeof(ptr) != sizeof(array))
113 * 0.31 reduced client_info by one member
114 * 0.32 fix nttl/size defaults; simpler check for worker child in main()
115 * 0.33 tweak includes so that it builds on my new machine (64-bit userspace);
116 * do not die on unknown service name, just warn
117 * ("services" is a new service we don't support)
118 * 0.34 create /var/run/nscd/nscd.pid pidfile like glibc nscd 2.8 does;
119 * delay setuid'ing itself to server-user after log and pidfile are open
120 * 0.35 readlink /proc/self/exe and use result if execing /proc/self/exe fails
121 * 0.36 excercise extreme paranoia handling server-user option;
122 * a little bit more verbose logging:
123 * L_DEBUG2 log level added, use debug-level 7 to get it
124 * 0.37 users reported over-zealous "detected change in /etc/passwd",
125 * apparently stat() returns random garbage in unused padding
126 * on some systems. Made the check less paranoid.
127 * 0.38 log POLLHUP better
128 * 0.39 log answers to client better, log getpwnam in the worker,
129 * pass debug level value down to worker.
130 * 0.40 fix handling of shutdown and invalidate requests;
131 * fix bug with answer written in several pieces
132 * 0.40.1 set hints.ai_socktype = SOCK_STREAM in GETAI request
133 * 0.41 eliminate double caching of two near-simultaneous identical requests -
135 * 0.42 execute /proc/self/exe by link name first (better comm field)
136 * 0.43 fix off-by-one error in setgroups
137 * 0.44 make -d[ddd] bump up debug - easier to explain to users
138 * how to produce detailed log (no nscd.conf tweaking)
139 * 0.45 Fix out-of-bounds array access and log/pid file permissions -
140 * thanks to Sebastian Krahmer (krahmer AT suse.de)
141 * 0.46 fix a case when we forgot to remove a future entry on worker failure
142 * 0.47 fix nscd without -d to not bump debug level
143 * 0.48 fix for changes in __nss_disable_nscd API in glibc-2.15
144 * 0.49 minor tweaks to messages
145 * 0.50 add more files to watch for changes
146 * 0.51 fix a case where we forget to refcount-- the cached entry
147 * 0.52 make free_refcounted_ureq() tolerant to pointers to NULLs
148 * 0.53 fix INVALIDATE and SHUTDOWN requests being ignored
150 #define PROGRAM_VERSION "0.53"
152 #define DEBUG_BUILD 1
159 #define ARRAY_SIZE(x) ((unsigned)(sizeof(x) / sizeof((x)[0])))
161 #define NORETURN __attribute__ ((__noreturn__))
164 #ifdef MY_CPU_HATES_CHARS
165 typedef int smallint;
167 typedef signed char smallint;
173 L_DEBUG = ((1 << 1) * DEBUG_BUILD),
174 L_DEBUG2 = ((1 << 2) * DEBUG_BUILD),
175 L_DUMP = ((1 << 3) * DEBUG_BUILD),
181 static smallint debug = D_DAEMON;
183 static void verror(const char *s, va_list p, const char *strerr)
186 int sz, rem, strerr_len;
190 if (debug & D_STAMP) {
191 gettimeofday(&tv, NULL);
192 sz = sprintf(msgbuf, "%02u:%02u:%02u.%05u ",
193 (unsigned)((tv.tv_sec / (60*60)) % 24),
194 (unsigned)((tv.tv_sec / 60) % 60),
195 (unsigned)(tv.tv_sec % 60),
196 (unsigned)(tv.tv_usec / 10));
198 rem = sizeof(msgbuf) - sz;
199 sz += vsnprintf(msgbuf + sz, rem, s, p);
200 rem = sizeof(msgbuf) - sz; /* can be negative after this! */
203 strerr_len = strlen(strerr);
204 if (rem >= strerr_len + 4) { /* ": STRERR\n\0" */
207 strcpy(msgbuf + sz, strerr);
216 fputs(msgbuf, stderr);
219 static void error(const char *msg, ...)
223 verror(msg, p, NULL);
227 static void error_and_die(const char *msg, ...) NORETURN;
228 static void error_and_die(const char *msg, ...)
232 verror(msg, p, NULL);
237 static void perror_and_die(const char *msg, ...) NORETURN;
238 static void perror_and_die(const char *msg, ...)
242 /* Guard against "<error message>: Success" */
243 verror(msg, p, errno ? strerror(errno) : NULL);
248 static void nscd_log(int mask, const char *msg, ...)
253 verror(msg, p, NULL);
258 #define log(lvl, ...) do { if (lvl) nscd_log(lvl, __VA_ARGS__); } while (0)
261 static void dump(const void *ptr, int len)
264 const unsigned char *buf;
267 if (!(debug & L_DUMP))
272 int chunk = ((len >= 16) ? 16 : len);
274 "%02x %02x %02x %02x %02x %02x %02x %02x "
275 "%02x %02x %02x %02x %02x %02x %02x %02x " + (16-chunk) * 5,
276 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7],
277 buf[8], buf[9],buf[10],buf[11],buf[12],buf[13],buf[14],buf[15]
279 fprintf(stderr, "%*s", (16-chunk) * 3, "");
283 unsigned char c = *buf++;
284 *p++ = (c >= 32 && c < 127 ? c : '.');
292 void dump(const void *ptr, int len);
295 #define hex_dump(p,n) do { if (L_DUMP) dump(p,n); } while (0)
297 static int xopen3(const char *pathname, int flags, int mode)
299 int fd = open(pathname, flags, mode);
301 perror_and_die("open");
305 static void xpipe(int *fds)
308 perror_and_die("pipe");
311 static void xexecve(const char *filename, char **argv, char **envp) NORETURN;
312 static void xexecve(const char *filename, char **argv, char **envp)
314 execve(filename, argv, envp);
315 perror_and_die("cannot re-exec %s", filename);
318 static void ndelay_on(int fd)
320 int fl = fcntl(fd, F_GETFL);
322 perror_and_die("F_GETFL");
323 if (fcntl(fd, F_SETFL, fl | O_NONBLOCK) < 0)
324 perror_and_die("setting O_NONBLOCK");
327 static void close_on_exec(int fd)
329 if (fcntl(fd, F_SETFD, FD_CLOEXEC) < 0)
330 perror_and_die("setting FD_CLOEXEC");
333 static unsigned monotonic_ms(void)
336 if (syscall(__NR_clock_gettime, CLOCK_MONOTONIC, &ts))
337 perror_and_die("clock_gettime(MONOTONIC)");
338 return ts.tv_sec * 1000 + ts.tv_nsec / 1000000;
341 static unsigned strsize(const char *str)
343 return strlen(str) + 1;
346 static unsigned strsize_aligned4(const char *str)
348 return (strlen(str) + 1 + 3) & (~3);
351 static ssize_t safe_read(int fd, void *buf, size_t count)
355 n = read(fd, buf, count);
356 } while (n < 0 && errno == EINTR);
360 static ssize_t full_read(int fd, void *buf, size_t len)
366 cc = safe_read(fd, buf, len);
368 return cc; /* read() returns -1 on failure. */
371 buf = ((char *)buf) + cc;
379 static void xsafe_read(int fd, void *buf, size_t len)
381 if (len != safe_read(fd, buf, len))
382 perror_and_die("short read");
384 static void xfull_read(int fd, void *buf, size_t len)
386 if (len != full_read(fd, buf, len))
387 perror_and_die("short read");
391 static ssize_t safe_write(int fd, const void *buf, size_t count)
395 n = write(fd, buf, count);
396 } while (n < 0 && errno == EINTR);
400 static ssize_t full_write(int fd, const void *buf, size_t len)
407 cc = safe_write(fd, buf, len);
409 return cc; /* write() returns -1 on failure. */
411 buf = ((const char *)buf) + cc;
417 static void xsafe_write(int fd, const void *buf, size_t count)
419 if (count != safe_write(fd, buf, count))
420 perror_and_die("short write of %ld bytes", (long)count);
422 static void xfull_write(int fd, const void *buf, size_t count)
424 if (count != full_write(fd, buf, count))
425 perror_and_die("short write of %ld bytes", (long)count);
428 static void xmovefd(int from_fd, int to_fd)
430 if (from_fd != to_fd) {
431 if (dup2(from_fd, to_fd) < 0)
432 perror_and_die("dup2");
437 static unsigned getnum(const char *str)
439 if (str[0] >= '0' && str[0] <= '9') {
441 unsigned long l = strtoul(str, &p, 10);
442 /* must not overflow int even after x1000 */
443 if (!*p && l <= INT_MAX / 1000)
446 error_and_die("malformed or too big number '%s'", str);
449 static char *skip_whitespace(const char *s)
451 /* NB: isspace('\0') returns 0 */
452 while (isspace(*s)) ++s;
456 static char *skip_non_whitespace(const char *s)
458 while (*s && !isspace(*s)) ++s;
462 static void *xmalloc(unsigned sz)
464 void *p = malloc(sz);
466 error_and_die("out of memory");
470 static void *xzalloc(unsigned sz)
472 void *p = xmalloc(sz);
477 static void *xrealloc(void *p, unsigned size)
479 p = realloc(p, size);
481 error_and_die("out of memory");
485 static const char *xstrdup(const char *str)
487 const char *p = strdup(str);
489 error_and_die("out of memory");
504 static const char srv_name[3][7] = {
513 smallint srv_enable[3];
514 smallint check_files[3];
519 /* We try to closely mimic glibc nscd */
520 .logfile = NULL, /* default is to not have a log file */
522 .srv_enable = { 0, 0, 0 },
523 .check_files = { 1, 1, 1 },
524 .pttl = { 3600, 3600, 3600 },
525 .nttl = { 20, 60, 20 },
526 /* huh, what is the default cache size in glibc nscd? */
527 .size = { 256 * 8 / 3, 256 * 8 / 3, 256 * 8 / 3 },
530 static const char default_conffile[] = "/etc/nscd.conf";
531 static const char *self_exe_points_to = "/proc/self/exe";
535 ** Clients, workers machinery
538 /* Header common to all requests */
539 #define USER_REQ_STRUCT \
540 uint32_t version; /* Version number of the daemon interface */ \
541 uint32_t type; /* Service requested */ \
542 uint32_t key_len; /* Key length */
544 typedef struct user_req_header {
550 MAX_USER_REQ_SIZE = 1024,
551 USER_HDR_SIZE = sizeof(user_req_header),
552 /* DNS queries time out after 20 seconds,
553 * we will allow for a bit more */
554 WORKER_TIMEOUT_SEC = 30,
555 CLIENT_TIMEOUT_MS = 100,
556 SMALL_POLL_TIMEOUT_MS = 200,
559 typedef struct user_req {
561 struct { /* as came from client */
564 struct { /* when stored in cache, overlaps .version */
566 /* (timestamp24 * 256) == timestamp in ms */
567 unsigned timestamp24:24;
570 char reqbuf[MAX_USER_REQ_SIZE - USER_HDR_SIZE];
573 /* Compile-time check for correct size */
574 struct BUG_wrong_user_req_size {
575 char BUG_wrong_user_req_size[sizeof(user_req) == MAX_USER_REQ_SIZE ? 1 : -1];
587 SHUTDOWN, /* Shut the server down */
588 GETSTAT, /* Get the server statistic */
589 INVALIDATE, /* Invalidate one special cache */
601 static const char *const typestr[] = {
602 "GETPWBYNAME", /* done */
603 "GETPWBYUID", /* done */
604 "GETGRBYNAME", /* done */
605 "GETGRBYGID", /* done */
606 "GETHOSTBYNAME", /* done */
607 "GETHOSTBYNAMEv6", /* done */
608 "GETHOSTBYADDR", /* done */
609 "GETHOSTBYADDRv6", /* done */
610 "SHUTDOWN", /* done */
611 "GETSTAT", /* info? */
612 "INVALIDATE", /* done */
613 /* won't do: nscd passes a name of shmem segment
614 * which client can map and "see" the db */
616 "GETFDGR", /* won't do */
617 "GETFDHST", /* won't do */
619 "INITGROUPS", /* done */
620 "GETSERVBYNAME", /* prio 3 (no caching?) */
621 "GETSERVBYPORT", /* prio 3 (no caching?) */
622 "GETFDSERV" /* won't do */
625 extern const char *const typestr[];
627 static const smallint type_to_srv[] = {
628 [GETPWBYNAME ] = SRV_PASSWD,
629 [GETPWBYUID ] = SRV_PASSWD,
630 [GETGRBYNAME ] = SRV_GROUP,
631 [GETGRBYGID ] = SRV_GROUP,
632 [GETHOSTBYNAME ] = SRV_HOSTS,
633 [GETHOSTBYNAMEv6 ] = SRV_HOSTS,
634 [GETHOSTBYADDR ] = SRV_HOSTS,
635 [GETHOSTBYADDRv6 ] = SRV_HOSTS,
636 [GETAI ] = SRV_HOSTS,
637 [INITGROUPS ] = SRV_GROUP,
640 static int unsupported_ureq_type(unsigned type)
642 if (type == GETAI) return 0;
643 if (type == INITGROUPS) return 0;
644 if (type == GETSTAT) return 1;
645 if (type > INVALIDATE) return 1;
650 typedef struct client_info {
651 /* if client_fd != 0, we are waiting for the reply from worker
652 * on pfd[i].fd, and client_fd is saved client's fd
653 * (we need to put it back into pfd[i].fd later) */
655 unsigned bytecnt; /* bytes read from client */
656 unsigned bufidx; /* buffer# in global client_buf[] */
658 unsigned respos; /* response */
659 user_req *resptr; /* response */
660 user_req **cache_pp; /* cache entry address */
661 user_req *ureq; /* request (points to client_buf[x]) */
664 static unsigned g_now_ms;
665 static int min_closed = INT_MAX;
666 static int cnt_closed = 0;
667 static int num_clients = 2; /* two listening sockets are "clients" too */
669 /* We read up to max_reqnum requests in parallel */
670 static unsigned max_reqnum = 14;
672 /* To be allocated at init to become client_buf[max_reqnum][MAX_USER_REQ_SIZE].
673 * Note: it is a pointer to [MAX_USER_REQ_SIZE] arrays,
674 * not [MAX_USER_REQ_SIZE] array of pointers.
676 static char (*client_buf)[MAX_USER_REQ_SIZE];
677 static char *busy_cbuf;
678 static struct pollfd *pfd;
679 static client_info *cinfo;
681 /* Request, response and cache data structures:
683 * cache[] (defined later):
684 * cacheline_t cache[cache_size] array, or in other words,
685 * user_req* cache[cache_size][8] array.
686 * Every client request is hashed, hash value determines which cache[x]
687 * will have the response stored in one of its 8 elements.
688 * Cache entries have this format: request, then padding to 32 bits,
690 * Addresses in cache[x][y] may be NULL or:
691 * (&client_buf[z]) & 1: the cache miss is in progress ("future entry"):
692 * "the data is not in the cache (yet), wait for it to appear"
693 * (&client_buf[z]) & 3: the cache miss is in progress and other clients
694 * also want the same data ("shared future entry")
695 * else (non-NULL but low two bits are 0): cached data in malloc'ed block
697 * Each of these is a [max_reqnum] sized array:
698 * pfd[i] - given to poll() to wait for requests and replies.
699 * .fd: first two pfd[i]: listening Unix domain sockets, else
700 * .fd: open fd to a client, for reading client's request, or
701 * .fd: open fd to a worker, to send request and get response back
702 * cinfo[i] - auxiliary client data for pfd[i]
703 * .client_fd: open fd to a client, in case we already had read its
704 * request and got a cache miss, and created a worker or
705 * wait for another client's worker.
706 * Otherwise, it's 0 and client's fd is in pfd[i].fd
707 * .bufidx: index in client_buf[] we store client's request in
708 * .ureq: = client_buf[bufidx]
709 * .bytecnt: size of the request
710 * .started_ms: used to time out unresponsive clients
711 * .resptr: initially NULL. Later, same as cache[x][y] pointer to a cached
712 * response, or (a rare case) a "fake cache" entry:
713 * all cache[hash(request)][0..7] blocks were found busy,
714 * the result won't be cached.
715 * .respos: "write-out to client" offset
716 * .cache_pp: initially NULL. Later, &cache[x][y] where the response is,
717 * or will be stored. Remains NULL if "fake cache" entry is in use
719 * When a client has received its reply (or otherwise closed (timeout etc)),
720 * corresponding pfd[i] and cinfo[i] are removed by shifting [i+1], [i+2] etc
721 * elements down, so that both arrays never have free holes.
722 * [num_clients] is always the first free element.
724 * Each of these also is a [max_reqnum] sized array, but indexes
725 * do not correspond directly to pfd[i] and cinfo[i]:
726 * client_buf[n][MAX_USER_REQ_SIZE] - buffers we read client requests into
727 * busy_cbuf[n] - bool flags marking busy client_buf[]
729 /* Possible reductions:
730 * fd, bufidx - uint8_t
731 * started_ms -> uint16_t started_s
732 * ureq - eliminate (derivable from bufidx?)
735 /* Are special bits 0? is it a true cached entry? */
736 #define CACHED_ENTRY(p) ( ((long)(p) & 3) == 0 )
737 /* Are special bits 11? is it a shared future cache entry? */
738 #define CACHE_SHARED(p) ( ((long)(p) & 3) == 3 )
739 /* Return a ptr with special bits cleared (used for accessing data) */
740 #define CACHE_PTR(p) ( (void*) ((long)(p) & ~(long)3) )
741 /* Return a ptr with special bits set to x1: make future cache entry ptr */
742 #define MAKE_FUTURE_PTR(p) ( (void*) ((long)(p) | 1) )
743 /* Modify ptr, set special bits to 11: shared future cache entry */
744 #define MARK_PTR_SHARED(pp) ( *(long*)(pp) |= 3 )
746 static inline unsigned ureq_size(const user_req *ureq)
748 return sizeof(user_req_header) + ureq->key_len;
751 static unsigned cache_age(const user_req *ureq)
753 if (!CACHED_ENTRY(ureq))
755 return (uint32_t) (g_now_ms - (ureq->timestamp24 << 8));
758 static void set_cache_timestamp(user_req *ureq)
760 ureq->timestamp24 = g_now_ms >> 8;
763 static int alloc_buf_no(void)
768 next_buf = (next_buf + 1) % max_reqnum;
769 if (!busy_cbuf[cur]) {
773 } while (next_buf != n);
774 error_and_die("no free bufs?!");
777 static inline void *bufno2buf(int i)
779 return client_buf[i];
782 static void free_refcounted_ureq(user_req **ureqp);
784 static void close_client(unsigned i)
786 log(L_DEBUG, "closing client %u (fd %u,%u)", i, pfd[i].fd, cinfo[i].client_fd);
787 /* Paranoia. We had nasty bugs where client was closed twice. */
792 if (cinfo[i].client_fd && cinfo[i].client_fd != pfd[i].fd)
793 close(cinfo[i].client_fd);
794 pfd[i].fd = 0; /* flag as unused (coalescing needs this) */
795 busy_cbuf[cinfo[i].bufidx] = 0;
797 if (cinfo[i].cache_pp == NULL) {
798 user_req *resptr = cinfo[i].resptr;
800 log(L_DEBUG, "client %u: freeing fake cache entry %p", i, resptr);
804 /* Most of the time, it is not freed here,
805 * only refcounted--. Freeing happens
806 * if it was deleted from cache[] but retained
809 free_refcounted_ureq(&cinfo[i].resptr);
819 ** nscd API <-> C API conversion
822 typedef struct response_header {
823 uint32_t version_or_size;
828 typedef struct initgr_response_header {
829 uint32_t version_or_size;
832 /* code assumes gid_t == int32, let's check that */
833 int32_t gid[sizeof(gid_t) == sizeof(int32_t) ? 0 : -1];
834 /* char user_str[as_needed]; */
835 } initgr_response_header;
837 static initgr_response_header *obtain_initgroups(const char *username)
839 struct initgr_response_header *resp;
841 enum { MAGIC_OFFSET = sizeof(*resp) / sizeof(int32_t) };
845 pw = getpwnam(username);
848 resp->version_or_size = sizeof(*resp);
854 /* getgrouplist may be very expensive, it's much better to allocate
855 * a bit more than to run getgrouplist twice */
859 sz = sizeof(*resp) + sizeof(resp->gid[0]) * ngroups;
860 resp = xrealloc(resp, sz);
861 } while (getgrouplist(username, pw->pw_gid, (gid_t*) &resp->gid, &ngroups) == -1);
862 log(L_DEBUG, "ngroups=%d", ngroups);
864 sz = sizeof(*resp) + sizeof(resp->gid[0]) * ngroups;
865 /* resp = xrealloc(resp, sz); - why bother */
866 resp->version_or_size = sz;
868 resp->ngrps = ngroups;
873 typedef struct pw_response_header {
874 uint32_t version_or_size;
877 int32_t pw_passwd_len;
880 int32_t pw_gecos_len;
882 int32_t pw_shell_len;
883 /* char pw_name[pw_name_len]; */
884 /* char pw_passwd[pw_passwd_len]; */
885 /* char pw_gecos[pw_gecos_len]; */
886 /* char pw_dir[pw_dir_len]; */
887 /* char pw_shell[pw_shell_len]; */
888 } pw_response_header;
890 static pw_response_header *marshal_passwd(struct passwd *pw)
893 pw_response_header *resp;
894 unsigned pw_name_len;
895 unsigned pw_passwd_len;
896 unsigned pw_gecos_len;
898 unsigned pw_shell_len;
899 unsigned sz = sizeof(*resp);
901 sz += (pw_name_len = strsize(pw->pw_name));
902 sz += (pw_passwd_len = strsize(pw->pw_passwd));
903 sz += (pw_gecos_len = strsize(pw->pw_gecos));
904 sz += (pw_dir_len = strsize(pw->pw_dir));
905 sz += (pw_shell_len = strsize(pw->pw_shell));
908 resp->version_or_size = sz;
914 resp->pw_name_len = pw_name_len;
915 resp->pw_passwd_len = pw_passwd_len;
916 resp->pw_uid = pw->pw_uid;
917 resp->pw_gid = pw->pw_gid;
918 resp->pw_gecos_len = pw_gecos_len;
919 resp->pw_dir_len = pw_dir_len;
920 resp->pw_shell_len = pw_shell_len;
921 p = (char*)(resp + 1);
922 strcpy(p, pw->pw_name); p += pw_name_len;
923 strcpy(p, pw->pw_passwd); p += pw_passwd_len;
924 strcpy(p, pw->pw_gecos); p += pw_gecos_len;
925 strcpy(p, pw->pw_dir); p += pw_dir_len;
926 strcpy(p, pw->pw_shell); p += pw_shell_len;
927 log(L_DEBUG, "sz:%u realsz:%u", sz, p - (char*)resp);
932 typedef struct gr_response_header {
933 uint32_t version_or_size;
935 int32_t gr_name_len; /* strlen(gr->gr_name) + 1; */
936 int32_t gr_passwd_len; /* strlen(gr->gr_passwd) + 1; */
937 int32_t gr_gid; /* gr->gr_gid */
938 int32_t gr_mem_cnt; /* while (gr->gr_mem[gr_mem_cnt]) ++gr_mem_cnt; */
939 /* int32_t gr_mem_len[gr_mem_cnt]; */
940 /* char gr_name[gr_name_len]; */
941 /* char gr_passwd[gr_passwd_len]; */
942 /* char gr_mem[gr_mem_cnt][gr_mem_len[i]]; */
943 /* char gr_gid_str[as_needed]; - huh? */
944 /* char orig_key[as_needed]; - needed?? I don't do this ATM... */
946 glibc adds gr_gid_str, but client doesn't get/use it:
947 writev(3, [{"\2\0\0\0\2\0\0\0\5\0\0\0", 12}, {"root\0", 5}], 2) = 17
948 poll([{fd=3, events=POLLIN|POLLERR|POLLHUP, revents=POLLIN}], 1, 5000) = 1
949 read(3, "\2\0\0\0\1\0\0\0\10\0\0\0\4\0\0\0\0\0\0\0\0\0\0\0", 24) = 24
950 readv(3, [{"", 0}, {"root\0\0\0\0\0\0\0\0", 12}], 2) = 12
953 } gr_response_header;
955 static gr_response_header *marshal_group(struct group *gr)
958 gr_response_header *resp;
960 unsigned sz = sizeof(*resp);
962 sz += strsize(gr->gr_name);
963 sz += strsize(gr->gr_passwd);
965 while (gr->gr_mem[gr_mem_cnt]) {
966 sz += strsize(gr->gr_mem[gr_mem_cnt]);
969 /* for int32_t gr_mem_len[gr_mem_cnt]; */
970 sz += gr_mem_cnt * sizeof(int32_t);
973 resp->version_or_size = sz;
979 resp->gr_name_len = strsize(gr->gr_name);
980 resp->gr_passwd_len = strsize(gr->gr_passwd);
981 resp->gr_gid = gr->gr_gid;
982 resp->gr_mem_cnt = gr_mem_cnt;
983 p = (char*)(resp + 1);
984 /* int32_t gr_mem_len[gr_mem_cnt]; */
986 while (gr->gr_mem[gr_mem_cnt]) {
987 *(uint32_t*)p = strsize(gr->gr_mem[gr_mem_cnt]);
991 /* char gr_name[gr_name_len]; */
992 strcpy(p, gr->gr_name);
993 p += strsize(gr->gr_name);
994 /* char gr_passwd[gr_passwd_len]; */
995 strcpy(p, gr->gr_passwd);
996 p += strsize(gr->gr_passwd);
997 /* char gr_mem[gr_mem_cnt][gr_mem_len[i]]; */
999 while (gr->gr_mem[gr_mem_cnt]) {
1000 strcpy(p, gr->gr_mem[gr_mem_cnt]);
1001 p += strsize(gr->gr_mem[gr_mem_cnt]);
1004 log(L_DEBUG, "sz:%u realsz:%u", sz, p - (char*)resp);
1009 typedef struct hst_response_header {
1010 uint32_t version_or_size;
1013 int32_t h_aliases_cnt;
1014 int32_t h_addrtype; /* AF_INET or AF_INET6 */
1015 int32_t h_length; /* 4 or 16 */
1016 int32_t h_addr_list_cnt;
1018 /* char h_name[h_name_len]; - we pad it to 4 bytes */
1019 /* uint32_t h_aliases_len[h_aliases_cnt]; */
1020 /* char h_addr_list[h_addr_list_cnt][h_length]; - every one is the same size [h_length] (4 or 16) */
1021 /* char h_aliases[h_aliases_cnt][h_aliases_len[i]]; */
1022 } hst_response_header;
1024 static hst_response_header *marshal_hostent(struct hostent *h)
1027 hst_response_header *resp;
1028 unsigned h_name_len;
1029 unsigned h_aliases_cnt;
1030 unsigned h_addr_list_cnt;
1031 unsigned sz = sizeof(*resp);
1033 /* char h_name[h_name_len] */
1034 sz += h_name_len = strsize_aligned4(h->h_name);
1035 h_addr_list_cnt = 0;
1036 while (h->h_addr_list[h_addr_list_cnt]) {
1039 /* char h_addr_list[h_addr_list_cnt][h_length] */
1040 sz += h_addr_list_cnt * h->h_length;
1042 while (h->h_aliases[h_aliases_cnt]) {
1043 /* char h_aliases[h_aliases_cnt][h_aliases_len[i]] */
1044 sz += strsize(h->h_aliases[h_aliases_cnt]);
1047 /* uint32_t h_aliases_len[h_aliases_cnt] */
1048 sz += h_aliases_cnt * 4;
1051 resp->version_or_size = sz;
1053 /*resp->found = 0;*/
1054 resp->error = HOST_NOT_FOUND;
1058 resp->h_name_len = h_name_len;
1059 resp->h_aliases_cnt = h_aliases_cnt;
1060 resp->h_addrtype = h->h_addrtype;
1061 resp->h_length = h->h_length;
1062 resp->h_addr_list_cnt = h_addr_list_cnt;
1063 /*resp->error = 0;*/
1064 p = (char*)(resp + 1);
1065 /* char h_name[h_name_len]; */
1066 strcpy(p, h->h_name);
1068 /* uint32_t h_aliases_len[h_aliases_cnt]; */
1070 while (h->h_aliases[h_aliases_cnt]) {
1071 *(uint32_t*)p = strsize(h->h_aliases[h_aliases_cnt]);
1075 /* char h_addr_list[h_addr_list_cnt][h_length]; */
1076 h_addr_list_cnt = 0;
1077 while (h->h_addr_list[h_addr_list_cnt]) {
1078 memcpy(p, h->h_addr_list[h_addr_list_cnt], h->h_length);
1082 /* char h_aliases[h_aliases_cnt][h_aliases_len[i]]; */
1084 while (h->h_aliases[h_aliases_cnt]) {
1085 strcpy(p, h->h_aliases[h_aliases_cnt]);
1086 p += strsize(h->h_aliases[h_aliases_cnt]);
1089 log(L_DEBUG, "sz:%u realsz:%u", sz, p - (char*)resp);
1094 /* Reply to addrinfo query */
1095 typedef struct ai_response_header {
1096 uint32_t version_or_size;
1102 /* char ai_addr[naddrs][4 or 16]; - addrslen bytes in total */
1103 /* char ai_family[naddrs]; - AF_INET[6] each (determines ai_addr[i] length) */
1104 /* char ai_canonname[canonlen]; */
1105 } ai_response_header;
1107 static ai_response_header *obtain_addrinfo(const char *hostname)
1109 struct addrinfo hints;
1110 struct addrinfo *ai;
1111 struct addrinfo *ap;
1112 ai_response_header *resp;
1116 unsigned naddrs = 0;
1117 unsigned addrslen = 0;
1118 unsigned canonlen = 0;
1120 memset(&hints, 0, sizeof(hints));
1121 hints.ai_flags = AI_CANONNAME;
1122 /* kills dups (one for each possible SOCK_xxx) */
1123 /* this matches glibc behavior */
1124 hints.ai_socktype = SOCK_STREAM;
1125 ai = NULL; /* on failure getaddrinfo may leave it as-is */
1126 err = getaddrinfo(hostname, NULL, &hints, &ai);
1130 if (ai->ai_canonname)
1131 sz += canonlen = strsize(ai->ai_canonname);
1135 addrslen += (ap->ai_family == AF_INET ? 4 : 16);
1138 sz += naddrs + addrslen;
1141 resp->version_or_size = sz;
1144 /*resp->found = 0;*/
1148 resp->naddrs = naddrs;
1149 resp->addrslen = addrslen;
1150 resp->canonlen = canonlen;
1151 p = (char*)(resp + 1);
1152 family = p + addrslen;
1155 /* char ai_family[naddrs]; */
1156 *family++ = ap->ai_family;
1157 /* char ai_addr[naddrs][4 or 16]; */
1158 if (ap->ai_family == AF_INET) {
1159 memcpy(p, &(((struct sockaddr_in*)(ap->ai_addr))->sin_addr), 4);
1162 memcpy(p, &(((struct sockaddr_in6*)(ap->ai_addr))->sin6_addr), 16);
1167 /* char ai_canonname[canonlen]; */
1168 if (ai->ai_canonname)
1169 strcpy(family, ai->ai_canonname);
1170 log(L_DEBUG, "sz:%u realsz:%u", sz, family + strsize(ai->ai_canonname) - (char*)resp);
1172 /* glibc 2.3.6 segfaults here sometimes
1173 * (maybe my mistake, fixed by "ai = NULL;" above).
1174 * Since we are in worker and are going to exit anyway, why bother? */
1175 /*freeaddrinfo(ai);*/
1184 /* one 8-element "cacheline" */
1185 typedef user_req *cacheline_t[8];
1186 static unsigned cache_size;
1187 /* Points to cacheline_t cache[cache_size] array, or in other words,
1188 * points to user_req* cache[cache_size][8] array */
1189 static cacheline_t *cache;
1190 static unsigned cached_cnt;
1191 static unsigned cache_access_cnt = 1; /* prevent division by zero */
1192 static unsigned cache_hit_cnt = 1;
1193 static unsigned last_age_time;
1194 static unsigned aging_interval_ms;
1195 static unsigned min_aging_interval_ms;
1197 static response_header *ureq_response(user_req *ureq)
1199 /* Skip query part, find answer part
1200 * (answer is 32-bit aligned) */
1201 return (void*) ((char*)ureq + ((ureq_size(ureq) + 3) & ~3));
1204 /* This hash is supposed to be good for short textual data */
1205 static uint32_t bernstein_hash(void *p, unsigned sz, uint32_t hash)
1209 hash = (32 * hash + hash) ^ *key++;
1214 static void free_refcounted_ureq(user_req **ureqp)
1216 user_req *ureq = *ureqp;
1218 /* (when exactly can this happen?) */
1222 if (!CACHED_ENTRY(ureq))
1225 if (ureq->refcount) {
1227 log(L_DEBUG2, "--%p.refcount=%u", ureq, ureq->refcount);
1229 log(L_DEBUG2, "%p.refcount=0, freeing", ureq);
1235 static user_req **lookup_in_cache(user_req *ureq)
1237 user_req **cacheline;
1241 unsigned ureq_sz = ureq_size(ureq);
1243 /* prevent overflow and division by zero */
1245 if ((int)cache_access_cnt < 0) {
1246 cache_access_cnt = (cache_access_cnt >> 1) + 1;
1247 cache_hit_cnt = (cache_hit_cnt >> 1) + 1;
1250 hash = bernstein_hash(&ureq->key_len, ureq_sz - offsetof(user_req, key_len), ureq->type);
1251 log(L_DEBUG2, "hash:%08x", hash);
1252 hash = hash % cache_size;
1253 cacheline = cache[hash];
1256 for (i = 0; i < 8; i++) {
1257 user_req *cached = CACHE_PTR(cacheline[i]);
1259 if (free_cache == -1)
1263 /* ureq->version is always 2 and is reused in cache
1264 * for other purposes, we need to skip it here */
1265 if (memcmp(&ureq->type, &cached->type, ureq_sz - offsetof(user_req, type)) == 0) {
1266 log(L_DEBUG, "found in cache[%u][%u]", hash, i);
1268 return &cacheline[i];
1272 if (free_cache >= 0) {
1275 log(L_DEBUG, "not found, using free cache[%u][%u]", hash, i);
1279 unsigned oldest_idx = 0;
1280 unsigned oldest_age = 0;
1281 for (i = 0; i < 8; i++) {
1282 unsigned age = cache_age(cacheline[i]);
1283 if (age > oldest_age) {
1288 if (oldest_age == 0) {
1289 /* All entries in cacheline are "future" entries!
1290 * This is very unlikely, but we must still work correctly.
1291 * We call this "fake cache entry".
1292 * The data will be "cached" only for the duration
1293 * of this client's request lifetime.
1295 log(L_DEBUG, "not found, and cache[%u] is full: using fake cache entry", hash);
1299 log(L_DEBUG, "not found, freeing and reusing cache[%u][%u] (age %u)", hash, i, oldest_age);
1300 free_refcounted_ureq(&cacheline[i]);
1303 cacheline[i] = MAKE_FUTURE_PTR(ureq);
1304 return &cacheline[i];
1307 static void age_cache(unsigned free_all, int srv)
1309 user_req **cp = *cache;
1311 unsigned sv = cached_cnt;
1313 log(L_DEBUG, "aging cache, srv:%d, free_all:%u", srv, free_all);
1314 if (srv == -1 || free_all)
1315 aging_interval_ms = INT_MAX;
1318 user_req *cached = *cp;
1319 if (CACHED_ENTRY(cached) && cached != NULL) {
1320 int csrv = type_to_srv[cached->type];
1321 if (srv == -1 || srv == csrv) {
1324 free_refcounted_ureq(cp);
1326 unsigned age = cache_age(cached);
1327 response_header *resp = ureq_response(cached);
1328 unsigned ttl = (resp->found ? config.pttl : config.nttl)[csrv];
1330 log(L_DEBUG2, "freeing: age %u positive %d ttl %u", age, resp->found, ttl);
1332 free_refcounted_ureq(cp);
1333 } else if (srv == -1) {
1335 if (aging_interval_ms > ttl)
1336 aging_interval_ms = ttl;
1343 log(L_INFO, "aged cache, freed:%u, remain:%u", sv - cached_cnt, cached_cnt);
1344 log(L_DEBUG2, "aging interval now %u ms", aging_interval_ms);
1352 /* Spawns a worker and feeds it with user query on stdin */
1353 /* Returns stdout fd of the worker, in blocking mode */
1354 static int create_and_feed_worker(user_req *ureq)
1360 } to_child, to_parent;
1362 /* NB: these pipe fds are in blocking mode and non-CLOEXECed */
1363 xpipe(&to_child.rd);
1364 xpipe(&to_parent.rd);
1367 if (pid < 0) /* error */
1368 perror_and_die("vfork");
1369 if (!pid) { /* child */
1370 char param[sizeof(int)*3 + 2];
1374 close(to_parent.rd);
1375 xmovefd(to_child.rd, 0);
1376 xmovefd(to_parent.wr, 1);
1377 sprintf(param, "%u", debug);
1378 argv[0] = (char*) "worker_nscd";
1381 /* Re-exec ourself, cleaning up all allocated memory.
1382 * fds in parent are marked CLOEXEC and will be closed too
1384 /* Try link name first: it's better to have comm field
1385 * of "nscd" than "exe" (pgrep reported to fail to find us
1386 * by name when comm field contains "exe") */
1387 execve(self_exe_points_to, argv, argv+2);
1388 xexecve("/proc/self/exe", argv, argv+2);
1393 close(to_parent.wr);
1394 /* We do not expect child to block for any noticeably long time,
1395 * and also we expect write to be one-piece one:
1396 * ureq size is <= 1k and pipes are guaranteed to accept
1397 * at least PIPE_BUF at once */
1398 xsafe_write(to_child.wr, ureq, ureq_size(ureq));
1401 close_on_exec(to_parent.rd);
1402 return to_parent.rd;
1405 static user_req *worker_ureq;
1408 static const char *req_str(unsigned type, const char *buf)
1410 if (type == GETHOSTBYADDR) {
1412 in.s_addr = *((uint32_t*)buf);
1413 return inet_ntoa(in);
1415 if (type == GETHOSTBYADDRv6) {
1421 const char *req_str(unsigned type, const char *buf);
1424 static void worker_signal_handler(int sig)
1427 log(L_INFO, "worker:%d got sig:%d while handling req "
1428 "type:%d(%s) key_len:%d '%s'",
1430 worker_ureq->type, typestr[worker_ureq->type],
1431 worker_ureq->key_len,
1432 req_str(worker_ureq->type, worker_ureq->reqbuf)
1435 log(L_INFO, "worker:%d got sig:%d while handling req "
1436 "type:%d key_len:%d",
1438 worker_ureq->type, worker_ureq->key_len);
1443 static void worker(const char *param) NORETURN;
1444 static void worker(const char *param)
1449 debug = atoi(param);
1451 worker_ureq = &ureq; /* for signal handler */
1453 /* Make sure we won't hang, but rather die */
1454 if (WORKER_TIMEOUT_SEC)
1455 alarm(WORKER_TIMEOUT_SEC);
1457 /* NB: fds 0, 1 are in blocking mode */
1459 /* We block here (for a short time) */
1460 /* Due to ureq size < PIPE_BUF read is atomic */
1461 /* No error or size checking: we trust the parent */
1462 safe_read(0, &ureq, sizeof(ureq));
1464 signal(SIGSEGV, worker_signal_handler);
1465 signal(SIGBUS, worker_signal_handler);
1466 signal(SIGILL, worker_signal_handler);
1467 signal(SIGFPE, worker_signal_handler);
1468 signal(SIGABRT, worker_signal_handler);
1470 signal(SIGSTKFLT, worker_signal_handler);
1473 if (ureq.type == GETHOSTBYNAME
1474 || ureq.type == GETHOSTBYNAMEv6
1476 resp = marshal_hostent(
1477 ureq.type == GETHOSTBYNAME
1478 ? gethostbyname(ureq.reqbuf)
1479 : gethostbyname2(ureq.reqbuf, AF_INET6)
1481 } else if (ureq.type == GETHOSTBYADDR
1482 || ureq.type == GETHOSTBYADDRv6
1484 resp = marshal_hostent(gethostbyaddr(ureq.reqbuf, ureq.key_len,
1485 (ureq.type == GETHOSTBYADDR ? AF_INET : AF_INET6)
1487 } else if (ureq.type == GETPWBYNAME) {
1489 log(L_DEBUG2, "getpwnam('%s')", ureq.reqbuf);
1490 pw = getpwnam(ureq.reqbuf);
1491 log(L_DEBUG2, "getpwnam result:%p", pw);
1492 resp = marshal_passwd(pw);
1493 } else if (ureq.type == GETPWBYUID) {
1494 resp = marshal_passwd(getpwuid(atoi(ureq.reqbuf)));
1495 } else if (ureq.type == GETGRBYNAME) {
1496 struct group *gr = getgrnam(ureq.reqbuf);
1497 resp = marshal_group(gr);
1498 } else if (ureq.type == GETGRBYGID) {
1499 struct group *gr = getgrgid(atoi(ureq.reqbuf));
1500 resp = marshal_group(gr);
1501 } else if (ureq.type == GETAI) {
1502 resp = obtain_addrinfo(ureq.reqbuf);
1503 } else /*if (ureq.type == INITGROUPS)*/ {
1504 resp = obtain_initgroups(ureq.reqbuf);
1507 if (!((response_header*)resp)->found) {
1508 /* Parent knows about this special case */
1509 xfull_write(1, resp, 8);
1511 /* Responses can be big (getgrnam("guest") on a big user db),
1512 * we cannot rely on them being atomic. full_write loops
1514 xfull_write(1, resp, ((response_header*)resp)->version_or_size);
1524 static const char *const checked_filenames[] = {
1525 /* Note: compiler adds another \0 byte at the end of each array element,
1526 * so there are TWO \0's there.
1528 [SRV_PASSWD] = "/etc/passwd\0" "/etc/passwd.cache\0" "/etc/shadow\0",
1529 [SRV_GROUP] = "/etc/group\0" "/etc/group.cache\0",
1530 [SRV_HOSTS] = "/etc/hosts\0" "/etc/hosts.cache\0" "/etc/resolv.conf\0" "/etc/nsswitch.conf\0",
1531 /* ("foo.cache" files are maintained by libnss-cache) */
1534 static long checked_status[ARRAY_SIZE(checked_filenames)];
1536 static void check_files(int srv)
1539 const char *file = checked_filenames[srv];
1544 memset(&tsb, 0, sizeof(tsb));
1545 stat(file, &tsb); /* ignore errors */
1546 /* Comparing struct stat's was giving false positives.
1547 * Extracting only those fields which are interesting:
1549 v ^= (long)tsb.st_mtime ^ (long)tsb.st_size ^ (long)tsb.st_ino; /* ^ (long)tsb.st_dev ? */
1550 file += strlen(file) + 1;
1553 if (v != checked_status[srv]) {
1554 checked_status[srv] = v;
1555 log(L_INFO, "detected change in files related to service %d", srv);
1556 age_cache(/*free_all:*/ 1, srv);
1560 /* Returns 1 if we immediately have the answer */
1561 static int handle_client(int i)
1564 user_req *ureq = cinfo[i].ureq;
1565 user_req **cache_pp;
1566 user_req *ureq_and_resp;
1569 log(L_DEBUG, "version:%d type:%d(%s) key_len:%d '%s'",
1570 ureq->version, ureq->type,
1571 ureq->type < ARRAY_SIZE(typestr) ? typestr[ureq->type] : "?",
1572 ureq->key_len, req_str(ureq->type, ureq->reqbuf));
1575 if (ureq->version != NSCD_VERSION) {
1576 log(L_INFO, "wrong version");
1580 if (ureq->key_len > sizeof(ureq->reqbuf)) {
1581 log(L_INFO, "bogus key_len %u - ignoring", ureq->key_len);
1585 if (cinfo[i].bytecnt < USER_HDR_SIZE + ureq->key_len) {
1586 log(L_INFO, "read %d, need to read %d",
1587 cinfo[i].bytecnt, USER_HDR_SIZE + ureq->key_len);
1588 return 0; /* more to read */
1590 if (cinfo[i].bytecnt > USER_HDR_SIZE + ureq->key_len) {
1591 log(L_INFO, "read overflow: %u > %u",
1592 (int)cinfo[i].bytecnt, (int)(USER_HDR_SIZE + ureq->key_len));
1596 if (unsupported_ureq_type(ureq->type)) {
1597 /* We don't know this request. Just close the connection.
1598 * (glibc client interprets this like "not supported by this nscd")
1599 * Happens very often, thus DEBUG, not INFO */
1600 log(L_DEBUG, "unsupported query, dropping");
1605 hex_dump(cinfo[i].ureq, cinfo[i].bytecnt);
1607 if (ureq->type == SHUTDOWN
1608 || ureq->type == INVALIDATE
1611 struct ucred caller;
1612 socklen_t optlen = sizeof(caller);
1613 if (getsockopt(pfd[i].fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0) {
1614 log(L_INFO, "ignoring special request - cannot get caller's id: %s", strerror(errno));
1618 if (caller.uid != 0) {
1619 log(L_INFO, "special request from non-root - ignoring");
1624 if (ureq->type == SHUTDOWN) {
1625 log(L_INFO, "got shutdown request, exiting");
1628 if (!ureq->key_len || ureq->reqbuf[ureq->key_len - 1]) {
1629 log(L_INFO, "malformed invalidate request - ignoring");
1633 log(L_INFO, "got invalidate request, flushing cache");
1634 /* Frees entire cache. TODO: replace -1 with service (in ureq->reqbuf) */
1635 age_cache(/*free_all:*/ 1, -1);
1640 srv = type_to_srv[ureq->type];
1641 if (!config.srv_enable[srv]) {
1642 log(L_INFO, "service %d is disabled, dropping", srv);
1647 if (ureq->type != GETHOSTBYADDR
1648 && ureq->type != GETHOSTBYADDRv6
1650 if (ureq->key_len && ureq->reqbuf[ureq->key_len - 1] != '\0') {
1651 log(L_INFO, "badly terminated buffer");
1657 if (config.check_files[srv]) {
1661 cache_pp = lookup_in_cache(ureq);
1662 ureq_and_resp = cache_pp ? *cache_pp : NULL;
1664 if (ureq_and_resp) {
1665 if (CACHED_ENTRY(ureq_and_resp)) {
1666 /* Found. Save ptr to response into cinfo and return */
1667 response_header *resp = ureq_response(ureq_and_resp);
1668 unsigned sz = resp->version_or_size;
1670 log(L_DEBUG, "sz:%u", sz);
1672 /* cache shouldn't free it under us! */
1673 if (++ureq_and_resp->refcount == 0) {
1674 error_and_die("BUG! ++%p.refcount rolled over to 0, exiting", ureq_and_resp);
1676 log(L_DEBUG2, "++%p.refcount=%u", ureq_and_resp, ureq_and_resp->refcount);
1677 pfd[i].events = POLLOUT; /* we want to write out */
1678 cinfo[i].resptr = ureq_and_resp;
1679 /*cinfo[i].respos = 0; - already is */
1680 /* prevent future matches with anything */
1681 cinfo[i].cache_pp = (void *) 1;
1682 return 1; /* "ready to write data out to client" */
1685 /* Not found. Remember a pointer where it will appear */
1686 cinfo[i].cache_pp = cache_pp;
1688 /* If it does not point to our own ureq buffer... */
1689 if (CACHE_PTR(ureq_and_resp) != ureq) {
1690 /* We are not the first client who wants this */
1691 log(L_DEBUG, "another request is in progress (%p), waiting for its result", ureq_and_resp);
1692 MARK_PTR_SHARED(cache_pp); /* "please inform us when it's ready" */
1693 /* "we do not wait for client anymore" */
1694 cinfo[i].client_fd = pfd[i].fd;
1695 /* Don't wait on fd. Worker response will unblock us */
1699 /* else: lookup_in_cache inserted (ureq & 1) into *cache_pp:
1700 * we are the first client to miss on this ureq. */
1703 /* Start worker thread */
1704 log(L_DEBUG, "stored %p in cache, starting a worker", ureq_and_resp);
1705 /* Now we will wait on worker's fd, not client's! */
1706 cinfo[i].client_fd = pfd[i].fd;
1707 pfd[i].fd = create_and_feed_worker(ureq);
1711 static void prepare_for_writeout(unsigned i, user_req *cached)
1713 log(L_DEBUG2, "client %u: data is ready at %p", i, cached);
1715 if (cinfo[i].client_fd) {
1716 pfd[i].fd = cinfo[i].client_fd;
1717 cinfo[i].client_fd = 0; /* "we don't wait for worker reply" */
1719 pfd[i].events = POLLOUT;
1721 /* Writeout position etc */
1722 cinfo[i].resptr = cached;
1723 /*cinfo[i].respos = 0; - already is */
1724 /* if worker took some time to get info (e.g. DNS query),
1725 * prevent client timeout from triggering at once */
1726 cinfo[i].started_ms = g_now_ms;
1729 /* Worker seems to be ready to write the response.
1730 * When we return, response is fully read and stored in cache,
1731 * worker's fd is closed, pfd[i] and cinfo[i] are updated. */
1732 static void handle_worker_response(int i)
1734 struct { /* struct response_header + small body */
1735 uint32_t version_or_size;
1741 response_header *resp;
1742 unsigned sz, resp_sz;
1743 unsigned ureq_sz_aligned;
1746 ureq = cinfo[i].ureq;
1747 ureq_sz_aligned = (char*)ureq_response(ureq) - (char*)ureq;
1749 sz = full_read(pfd[i].fd, &sz_and_found, sizeof(sz_and_found));
1751 /* worker was killed? */
1752 log(L_DEBUG, "worker gave short reply:%u < 8", sz);
1756 resp_sz = sz_and_found.version_or_size;
1757 if (resp_sz < sz || resp_sz > 0x0fffffff) { /* 256 mb */
1758 error("BUG: bad size from worker:%u", resp_sz);
1762 /* Create new block of cached info */
1763 cached = xzalloc(ureq_sz_aligned + resp_sz);
1764 log(L_DEBUG2, "xzalloc(%u):%p sz:%u resp_sz:%u found:%u",
1765 ureq_sz_aligned + resp_sz, cached,
1767 (int)sz_and_found.found
1769 resp = (void*) (((char*) cached) + ureq_sz_aligned);
1770 memcpy(cached, ureq, ureq_size(ureq));
1771 memcpy(resp, &sz_and_found, sz);
1772 if (sz_and_found.found && resp_sz > sz) {
1773 /* We need to read data only if it's found
1774 * (otherwise worker sends only 8 bytes).
1776 * Replies can be big (getgrnam("guest") on a big user db),
1777 * we cannot rely on them being atomic. However, we know
1778 * that worker _always_ gives reply in one full_write(),
1779 * so we loop and read it all
1780 * (looping is implemented inside full_read())
1782 if (full_read(pfd[i].fd, ((char*) resp) + sz, resp_sz - sz) != resp_sz - sz) {
1783 /* worker was killed? */
1784 log(L_DEBUG, "worker gave short reply, free(%p)", cached);
1791 set_cache_timestamp(cached);
1792 hex_dump(resp, resp_sz);
1799 user_req **cache_pp = cinfo[i].cache_pp;
1800 if (cache_pp != NULL) { /* if not a fake entry */
1803 if (CACHE_SHARED(ureq)) {
1804 /* Other clients wait for this response too,
1805 * wake them (and us) up and set refcount = no_of_clients */
1808 for (j = 2; j < num_clients; j++) {
1809 if (cinfo[j].cache_pp == cache_pp) {
1810 /* This client uses the same cache entry */
1812 /* prevent future matches with anything */
1813 cinfo[j].cache_pp = (void *) 1;
1814 prepare_for_writeout(j, cached);
1819 /* prevent future matches with anything */
1820 cinfo[i].cache_pp = (void *) 1;
1824 prepare_for_writeout(i, cached);
1826 /* cache shouldn't free it under us! */
1828 cached->refcount = ref;
1829 log(L_DEBUG2, "%p.refcount=%u", cached, ref);
1831 aging_interval_ms = min_aging_interval_ms;
1834 static void main_loop(void)
1836 /* 1/2 of smallest negative TTL */
1837 min_aging_interval_ms = config.nttl[0];
1838 if (min_aging_interval_ms > config.nttl[1]) min_aging_interval_ms = config.nttl[1];
1839 if (min_aging_interval_ms > config.nttl[2]) min_aging_interval_ms = config.nttl[2];
1840 min_aging_interval_ms = (min_aging_interval_ms / 2) | 1;
1841 aging_interval_ms = min_aging_interval_ms;
1847 r = SMALL_POLL_TIMEOUT_MS;
1848 if (num_clients <= 2 && !cached_cnt)
1849 r = -1; /* infinite */
1850 else if (num_clients < max_reqnum)
1851 r = aging_interval_ms;
1852 #if 0 /* Debug: leak detector */
1854 static unsigned long long cnt;
1855 static unsigned long low_malloc = -1L;
1856 static unsigned long low_sbrk = -1L;
1857 void *p = malloc(540); /* should not be too small */
1860 if ((unsigned long)p < low_malloc)
1861 low_malloc = (unsigned long)p;
1862 if ((unsigned long)s < low_sbrk)
1863 low_sbrk = (unsigned long)s;
1864 log(L_INFO, "poll %llu (%d ms). clients:%u cached:%u %u/%u malloc:%p (%lu), sbrk:%p (%lu)",
1865 cnt, r, num_clients, cached_cnt, cache_hit_cnt, cache_access_cnt,
1866 p, (unsigned long)p - low_malloc,
1867 s, (unsigned long)s - low_sbrk);
1871 log(L_DEBUG, "poll %d ms. clients:%u cached:%u hit ratio:%u/%u",
1872 r, num_clients, cached_cnt, cache_hit_cnt, cache_access_cnt);
1875 r = poll(pfd, num_clients, r);
1876 log(L_DEBUG2, "poll returns %d", r);
1879 perror_and_die("poll");
1883 /* Everything between polls never sleeps.
1884 * There is no blocking I/O (except when we talk to worker thread
1885 * which is guaranteed to not block us for long) */
1887 g_now_ms = monotonic_ms();
1889 goto skip_fd_checks;
1891 for (i = 0; i < 2; i++) {
1893 if (!pfd[i].revents)
1895 /* pfd[i].revents = 0; - not needed */
1896 cfd = accept(pfd[i].fd, NULL, NULL);
1898 /* odd... poll() says we can accept but accept failed? */
1899 log(L_DEBUG2, "accept failed with %s", strerror(errno));
1904 /* x[num_clients] is next free element, taking it */
1905 log(L_DEBUG2, "new client %d, fd %d", num_clients, cfd);
1906 pfd[num_clients].fd = cfd;
1907 pfd[num_clients].events = POLLIN;
1908 /* this will make us do read() in next for() loop: */
1909 pfd[num_clients].revents = POLLIN;
1910 memset(&cinfo[num_clients], 0, sizeof(cinfo[num_clients]));
1911 /* cinfo[num_clients].bytecnt = 0; - done */
1912 cinfo[num_clients].started_ms = g_now_ms;
1913 cinfo[num_clients].bufidx = alloc_buf_no();
1914 cinfo[num_clients].ureq = bufno2buf(cinfo[num_clients].bufidx);
1916 if (num_clients >= max_reqnum) {
1917 /* stop accepting new connects for now */
1918 pfd[0].events = pfd[0].revents = 0;
1919 pfd[1].events = pfd[1].revents = 0;
1922 for (; i < num_clients; i++) {
1923 if (!pfd[i].revents)
1925 log(L_DEBUG2, "pfd[%d].revents:0x%x", i, pfd[i].revents);
1926 /* pfd[i].revents = 0; - not needed */
1928 /* "Write out result" case */
1929 if (pfd[i].revents == POLLOUT) {
1930 response_header *resp;
1932 if (!cinfo[i].resptr) {
1933 /* corner case: worker gave bad response earlier */
1938 resp = ureq_response(cinfo[i].resptr);
1939 resp_sz = resp->version_or_size;
1940 resp->version_or_size = NSCD_VERSION;
1942 r = safe_write(pfd[i].fd, ((char*) resp) + cinfo[i].respos, resp_sz - cinfo[i].respos);
1943 resp->version_or_size = resp_sz;
1945 if (r < 0 && errno == EAGAIN) {
1946 log(L_DEBUG, "client %u: EAGAIN on write", i);
1949 if (r <= 0) { /* client isn't there anymore */
1950 log(L_DEBUG, "client %u is gone (write returned:%d err:%s)",
1951 i, r, errno ? strerror(errno) : "-");
1955 cinfo[i].respos += r;
1956 if (cinfo[i].respos >= resp_sz) {
1957 /* We wrote everything */
1958 /* No point in trying to get next request, it won't come.
1959 * glibc 2.4 client closes its end after each request,
1960 * without testing for EOF from server. strace:
1962 * read(3, "www.google.com\0\0", 16) = 16
1965 log(L_DEBUG, "client %u: sent answer %u/%u/%u bytes", i, r, cinfo[i].respos, resp_sz);
1969 log(L_DEBUG, "client %u: sent partial answer %u/%u/%u bytes", i, r, cinfo[i].respos, resp_sz);
1973 /* "Read reply from worker" case. Worker may be
1974 * already dead, revents may contain other bits too
1976 if ((pfd[i].revents & POLLIN) && cinfo[i].client_fd) {
1977 log(L_DEBUG, "reading response for client %u", i);
1978 handle_worker_response(i);
1979 /* We can immediately try to write a response
1984 /* POLLHUP means pfd[i].fd is closed by peer.
1985 * POLLHUP+POLLOUT[+POLLERR] is seen when we writing out
1986 * and see that pfd[i].fd is closed by peer (for example,
1987 * it happens when client's result buffer is too small
1988 * to receive a huge GETGRBYNAME result).
1990 if ((pfd[i].revents & ~(POLLOUT+POLLERR)) == POLLHUP) {
1991 int is_client = (cinfo[i].client_fd == 0 || cinfo[i].client_fd == pfd[i].fd);
1992 log(L_INFO, "%s %u disappeared (got POLLHUP on fd %d)",
1993 is_client ? "client" : "worker",
2000 /* Read worker output anyway, error handling
2001 * in that function deals with short read.
2002 * Simply closing client is wrong: it leaks
2003 * shared future entries. */
2004 handle_worker_response(i);
2009 /* All strange and unexpected cases */
2010 if (pfd[i].revents != POLLIN) {
2011 /* Not just "can read", but some other bits are there */
2012 log(L_INFO, "client %u revents is strange:0x%x", i, pfd[i].revents);
2017 /* "Read request from client" case */
2018 r = safe_read(pfd[i].fd, (char*)(cinfo[i].ureq) + cinfo[i].bytecnt, MAX_USER_REQ_SIZE - cinfo[i].bytecnt);
2020 log(L_DEBUG2, "error reading from client: %s", strerror(errno));
2021 if (errno == EAGAIN)
2027 log(L_INFO, "premature EOF from client, dropping");
2031 cinfo[i].bytecnt += r;
2032 if (cinfo[i].bytecnt >= sizeof(user_req_header)) {
2033 if (handle_client(i)) {
2034 /* Response is found in cache! */
2038 } /* for each client[2..num_clients-1] */
2042 if ((g_now_ms - last_age_time) >= aging_interval_ms) {
2043 last_age_time = g_now_ms;
2044 age_cache(/*free_all:*/ 0, -1);
2047 /* Close timed out client connections */
2048 for (i = 2; i < num_clients; i++) {
2049 if (pfd[i].fd != 0 /* not closed yet? */
2050 && cinfo[i].client_fd == 0 /* do we still wait for client, not worker? */
2051 && (g_now_ms - cinfo[i].started_ms) > CLIENT_TIMEOUT_MS
2053 log(L_INFO, "timed out waiting for client %u (%u ms), dropping",
2054 i, (unsigned)(g_now_ms - cinfo[i].started_ms));
2062 /* We closed at least one client, coalesce pfd[], cinfo[] */
2063 if (min_closed + cnt_closed >= num_clients) {
2064 /* clients [min_closed..num_clients-1] are all closed */
2065 /* log(L_DEBUG, "taking shortcut"); - almost always happens */
2070 while (i < num_clients) {
2074 if (++i >= num_clients)
2078 cinfo[j++] = cinfo[i++];
2082 num_clients -= cnt_closed;
2083 log(L_DEBUG, "removing %d closed clients. clients:%d", cnt_closed, num_clients);
2084 min_closed = INT_MAX;
2086 /* start accepting new connects */
2087 pfd[0].events = POLLIN;
2088 pfd[1].events = POLLIN;
2097 #define NSCD_PIDFILE "/var/run/nscd/nscd.pid"
2098 #define NSCD_DIR "/var/run/nscd"
2099 #define NSCD_SOCKET "/var/run/nscd/socket"
2100 #define NSCD_SOCKET_OLD "/var/run/.nscd_socket"
2102 static smallint wrote_pidfile;
2104 static void cleanup_on_signal(int sig)
2107 unlink(NSCD_PIDFILE);
2108 unlink(NSCD_SOCKET_OLD);
2109 unlink(NSCD_SOCKET);
2113 static void write_pid(void)
2115 FILE *pid = fopen(NSCD_PIDFILE, "w");
2118 fprintf(pid, "%d\n", getpid());
2123 /* Open a listening nscd server socket */
2124 static int open_socket(const char *name)
2126 struct sockaddr_un sun;
2127 int sock = socket(AF_UNIX, SOCK_STREAM, 0);
2129 perror_and_die("cannot create unix domain socket");
2131 close_on_exec(sock);
2132 sun.sun_family = AF_UNIX;
2133 strcpy(sun.sun_path, name);
2135 if (bind(sock, (struct sockaddr *) &sun, sizeof(sun)) < 0)
2136 perror_and_die("bind(%s)", name);
2137 if (chmod(name, 0666) < 0)
2138 perror_and_die("chmod(%s)", name);
2139 if (listen(sock, (max_reqnum/8) | 1) < 0)
2140 perror_and_die("listen");
2144 static const struct option longopt[] = {
2145 /* name, has_arg, int *flag, int val */
2146 { "debug" , no_argument , NULL, 'd' },
2147 { "config-file", required_argument, NULL, 'f' },
2148 { "invalidate" , required_argument, NULL, 'i' },
2149 { "shutdown" , no_argument , NULL, 'K' },
2150 { "nthreads" , required_argument, NULL, 't' },
2151 { "version" , no_argument , NULL, 'V' },
2152 { "help" , no_argument , NULL, '?' },
2153 { "usage" , no_argument , NULL, '?' },
2154 /* just exit(0). TODO: "test" connect? */
2155 { "statistic" , no_argument , NULL, 'g' },
2156 { "secure" , no_argument , NULL, 'S' }, /* ? */
2160 static const char *const help[] = {
2161 "Do not daemonize; log to stderr (-dd: more verbosity)",
2162 "File to read configuration from",
2164 "Shut the server down",
2165 "Serve N requests in parallel",
2169 static void print_help_and_die(void)
2171 const struct option *opt = longopt;
2172 const char *const *h = help;
2174 puts("Usage: nscd [OPTION...]\n"
2175 "Name Service Cache Daemon\n");
2177 printf("\t" "-%c,--%-11s %s\n", opt->val, opt->name, *h);
2180 } while (opt->val != '?');
2184 static char *skip_service(int *srv, const char *s)
2186 if (strcmp("passwd", s) == 0) {
2189 } else if (strcmp("group", s) == 0) {
2191 } else if (strcmp("hosts", s) == 0) {
2196 return skip_whitespace(s + 6);
2199 static void handle_null(const char *str, int srv) {}
2201 static void handle_logfile(const char *str, int srv)
2203 config.logfile = xstrdup(str);
2206 static void handle_debuglvl(const char *str, int srv)
2208 debug |= (uint8_t) getnum(str);
2211 static void handle_threads(const char *str, int srv)
2213 unsigned n = getnum(str);
2218 static void handle_user(const char *str, int srv)
2220 config.user = xstrdup(str);
2223 static void handle_enable(const char *str, int srv)
2225 config.srv_enable[srv] = ((str[0] | 0x20) == 'y');
2228 static void handle_pttl(const char *str, int srv)
2230 config.pttl[srv] = getnum(str);
2233 static void handle_nttl(const char *str, int srv)
2235 config.nttl[srv] = getnum(str);
2238 static void handle_size(const char *str, int srv)
2240 config.size[srv] = getnum(str);
2243 static void handle_chfiles(const char *str, int srv)
2245 config.check_files[srv] = ((str[0] | 0x20) == 'y');
2248 static void parse_conffile(const char *conffile, int warn)
2250 static const struct confword {
2252 void (*handler)(const char *, int);
2254 { "_" "logfile" , handle_logfile },
2255 { "_" "debug-level" , handle_debuglvl },
2256 { "_" "threads" , handle_threads },
2257 { "_" "max-threads" , handle_threads },
2258 { "_" "server-user" , handle_user },
2259 /* ignore: any user can stat */
2260 { "_" "stat-user" , handle_null },
2261 { "_" "paranoia" , handle_null }, /* ? */
2262 /* ignore: design goal is to never crash/hang */
2263 { "_" "reload-count" , handle_null },
2264 { "_" "restart-interval" , handle_null },
2265 { "S" "enable-cache" , handle_enable },
2266 { "S" "positive-time-to-live" , handle_pttl },
2267 { "S" "negative-time-to-live" , handle_nttl },
2268 { "S" "suggested-size" , handle_size },
2269 { "S" "check-files" , handle_chfiles },
2270 { "S" "persistent" , handle_null }, /* ? */
2271 { "S" "shared" , handle_null }, /* ? */
2272 { "S" "auto-propagate" , handle_null }, /* ? */
2277 FILE *file = fopen(conffile, "r");
2281 if (conffile != default_conffile)
2282 perror_and_die("cannot open %s", conffile);
2286 while (fgets(buf, sizeof(buf), file) != NULL) {
2287 const struct confword *word;
2289 int len = strlen(buf);
2293 if (buf[len-1] != '\n') {
2294 if (len >= sizeof(buf) - 1)
2295 error_and_die("%s:%d: line is too long", conffile, lineno);
2296 len++; /* last line, not terminated by '\n' */
2300 p = strchr(buf, '#');
2304 p = skip_whitespace(buf);
2307 *skip_non_whitespace(p) = '\0';
2310 if (strcmp(word->str + 1, p) == 0) {
2312 p = skip_whitespace(p + strlen(p) + 1);
2313 *skip_non_whitespace(p) = '\0';
2314 if (word->str[0] == 'S') {
2315 char *p2 = skip_service(&srv, p);
2318 error("%s:%d: ignoring unknown service name '%s'", conffile, lineno, p);
2322 *skip_non_whitespace(p) = '\0';
2324 word->handler(p, srv);
2330 error("%s:%d: ignoring unknown directive '%s'", conffile, lineno, p);
2339 /* "XX,XX[,XX]..." -> gid_t[] */
2340 static gid_t* env_U_to_uid_and_gids(const char *str, int *sizep)
2351 ug = xmalloc(ng * sizeof(ug[0]));
2359 *gp++ = strtoul(sp, (char**)&sp, 16);
2360 if (errno || (*sp != ',' && *sp != '\0'))
2361 error_and_die("internal error");
2372 static char* user_to_env_U(const char *user)
2379 pw = getpwnam(user);
2381 perror_and_die("user '%s' is not known", user);
2384 /* 0th cell will be used for uid */
2385 ug = xmalloc((1 + ng) * sizeof(ug[0]));
2386 if (getgrouplist(user, pw->pw_gid, &ug[1], &ng) < 0) {
2387 ug = xrealloc(ug, (1 + ng) * sizeof(ug[0]));
2388 if (getgrouplist(user, pw->pw_gid, &ug[1], &ng) < 0)
2389 perror_and_die("can't get groups of user '%s'", user);
2394 /* How much do we need for "-Uxx,xx[,xx]..." string? */
2395 ug_str = xmalloc((sizeof(unsigned long)+1)*2 * ng + 3);
2401 sp += sprintf(sp, "%lx,", (unsigned long)(*gp++));
2410 /* not static - don't inline me, compiler! */
2411 void readlink_self_exe(void);
2412 void readlink_self_exe(void)
2414 char buf[PATH_MAX + 1];
2415 ssize_t sz = readlink("/proc/self/exe", buf, sizeof(buf) - 1);
2417 perror_and_die("readlink %s failed", "/proc/self/exe");
2419 self_exe_points_to = xstrdup(buf);
2423 static void special_op(const char *arg) NORETURN;
2424 static void special_op(const char *arg)
2426 static const user_req_header ureq = { NSCD_VERSION, SHUTDOWN, 0 };
2428 struct sockaddr_un addr;
2431 sock = socket(PF_UNIX, SOCK_STREAM, 0);
2433 error_and_die("cannot create AF_UNIX socket");
2435 addr.sun_family = AF_UNIX;
2436 strcpy(addr.sun_path, NSCD_SOCKET);
2437 if (connect(sock, (struct sockaddr *) &addr, sizeof(addr)) < 0)
2438 error_and_die("cannot connect to %s", NSCD_SOCKET);
2440 if (!arg) { /* shutdown */
2441 xfull_write(sock, &ureq, sizeof(ureq));
2442 printf("sent shutdown request, exiting\n");
2443 } else { /* invalidate */
2444 size_t arg_len = strlen(arg) + 1;
2446 user_req_header req;
2449 reqdata.req.version = NSCD_VERSION;
2450 reqdata.req.type = INVALIDATE;
2451 reqdata.req.key_len = arg_len;
2452 memcpy(reqdata.arg, arg, arg_len);
2453 xfull_write(sock, &reqdata, arg_len + sizeof(ureq));
2454 printf("sent invalidate(%s) request, exiting\n", arg);
2460 /* Callback for glibc-2.15 */
2462 static void do_nothing(size_t dbidx, struct traced_file *finfo)
2464 /* nscd from glibc-2.15 does something like this:
2465 if (!dbs[dbidx].enabled || !dbs[dbidx].check_file)
2467 add_file_to_watch_list(finfo->fname);
2471 /* This internal glibc function is called to disable trying to contact nscd.
2472 * We _are_ nscd, so we need to do the lookups, and not recurse.
2473 * Until 2.14, this function was taking no parameters.
2474 * In 2.15, it takes a function pointer from hell.
2476 void __nss_disable_nscd(void (*hell)(size_t, struct traced_file*));
2479 int main(int argc, char **argv)
2484 const char *conffile;
2486 /* make sure we don't get recursive calls */
2487 __nss_disable_nscd(do_nothing);
2489 if (argv[0][0] == 'w') /* "worker_nscd" */
2495 /* Make sure stdio is not closed */
2496 n = xopen3("/dev/null", O_RDWR, 0);
2499 /* Close unexpected open file descriptors */
2500 n |= 0xff; /* start from at least fd# 255 */
2505 /* For idiotic kernels which disallow "exec /proc/self/exe" */
2506 readlink_self_exe();
2508 conffile = default_conffile;
2510 while ((n = getopt_long(argc, argv, "df:i:KVgt:", longopt, NULL)) != -1) {
2521 special_op(optarg); /* exits */
2523 /* shutdown server */
2524 special_op(NULL); /* exits */
2526 puts("unscd - nscd which does not hang, v."PROGRAM_VERSION);
2532 max_reqnum = getnum(optarg);
2538 print_help_and_die();
2541 /* Multiple -d can bump debug regardless of nscd.conf:
2542 * no -d or -d: 0, -dd: 1,
2543 * -ddd: 3, -dddd: 7, -ddddd: 15
2546 debug |= (((1U << opt_d_cnt) >> 1) - 1) & L_ALL;
2548 env_U = getenv("U");
2549 /* Avoid duplicate warnings if $U exists */
2550 parse_conffile(conffile, /* warn? */ (env_U == NULL));
2552 /* I have a user report of (broken?) ldap nss library
2553 * opening and never closing a socket to a ldap server,
2554 * even across fork() and exec(). This messes up
2555 * worker child's operations for the reporter.
2557 * This strenghtens my belief that nscd _must not_ trust
2558 * nss libs to be written correctly.
2560 * Here, we need to jump through the hoops to guard against
2561 * such problems. If config file has server-user setting, we need
2562 * to setgroups + setuid. For that, we need to get uid and gid vector.
2563 * And that means possibly using buggy nss libs.
2564 * We will do it here, but then we will re-exec, passing uid+gids
2565 * in an environment variable.
2567 if (!env_U && config.user) {
2568 /* user_to_env_U() does getpwnam and getgrouplist */
2569 if (putenv(user_to_env_U(config.user)))
2570 error_and_die("out of memory");
2571 /* fds leaked by nss will be closed by execed copy */
2572 execv(self_exe_points_to, argv);
2573 xexecve("/proc/self/exe", argv, environ);
2576 /* Allocate dynamically sized stuff */
2577 max_reqnum += 2; /* account for 2 first "fake" clients */
2578 if (max_reqnum < 8) max_reqnum = 8; /* sanitize */
2579 /* Since refcount is a byte, can't serve more than 255-2 clients
2580 * at once. The rest will block in connect() */
2581 if (max_reqnum > 0xff) max_reqnum = 0xff;
2582 client_buf = xzalloc(max_reqnum * sizeof(client_buf[0]));
2583 busy_cbuf = xzalloc(max_reqnum * sizeof(busy_cbuf[0]));
2584 pfd = xzalloc(max_reqnum * sizeof(pfd[0]));
2585 cinfo = xzalloc(max_reqnum * sizeof(cinfo[0]));
2587 cache_size = (config.size[0] + config.size[1] + config.size[2]) / 8;
2588 if (cache_size < 8) cache_size = 8; /* 8*8 = 64 entries min */
2589 if (cache_size > 0xffff) cache_size = 0xffff; /* 8*64k entries max */
2590 cache_size |= 1; /* force it to be odd */
2591 cache = xzalloc(cache_size * sizeof(cache[0]));
2593 /* Register cleanup hooks */
2594 signal(SIGINT, cleanup_on_signal);
2595 signal(SIGTERM, cleanup_on_signal);
2596 /* Don't die if a client closes a socket on us */
2597 signal(SIGPIPE, SIG_IGN);
2598 /* Avoid creating zombies */
2599 signal(SIGCHLD, SIG_IGN);
2601 /* Ensure workers don't have SIGALRM ignored */
2602 signal(SIGALRM, SIG_DFL);
2605 if (mkdir(NSCD_DIR, 0755) == 0) {
2606 /* prevent bad mode of NSCD_DIR if umask is e.g. 077 */
2607 chmod(NSCD_DIR, 0755);
2609 pfd[0].fd = open_socket(NSCD_SOCKET);
2610 pfd[1].fd = open_socket(NSCD_SOCKET_OLD);
2611 pfd[0].events = POLLIN;
2612 pfd[1].events = POLLIN;
2614 if (debug & D_DAEMON) {
2615 daemon(/*nochdir*/ 1, /*noclose*/ 0);
2616 if (config.logfile) {
2617 /* nochdir=1: relative paths still work as expected */
2618 xmovefd(xopen3(config.logfile, O_WRONLY|O_CREAT|O_TRUNC, 0666), 2);
2621 debug = 0; /* why bother? it's /dev/null'ed anyway */
2623 chdir("/"); /* compat */
2626 /* ignore job control signals */
2627 signal(SIGTTOU, SIG_IGN);
2628 signal(SIGTTIN, SIG_IGN);
2629 signal(SIGTSTP, SIG_IGN);
2632 log(L_ALL, "unscd v" PROGRAM_VERSION ", debug level 0x%x", debug & L_ALL);
2633 log(L_DEBUG, "max %u requests in parallel", max_reqnum - 2);
2634 log(L_DEBUG, "cache size %u x 8 entries", cache_size);
2638 gid_t *ug = env_U_to_uid_and_gids(env_U, &size);
2640 if (setgroups(size - 1, &ug[1]) || setgid(ug[1]))
2641 perror_and_die("cannot set groups for user '%s'", config.user);
2644 perror_and_die("cannot set uid to %u", (unsigned)(ug[0]));
2648 for (n = 0; n < 3; n++) {
2649 log(L_DEBUG, "%s cache enabled:%u pttl:%u nttl:%u",
2651 config.srv_enable[n],
2654 config.pttl[n] *= 1000;
2655 config.nttl[n] *= 1000;