1 /* This file is part of unscd, a complete nscd replacement.
2 * Copyright (C) 2007-2012 Denys Vlasenko. Licensed under the GPL version 2.
5 /* unscd is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; version 2 of the License.
9 * unscd is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You can download the GNU General Public License from the GNU website
15 * at http://www.gnu.org/ or write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
21 gcc -Wall -Wunused-parameter -Os -o nscd nscd.c
23 gcc -fomit-frame-pointer -Wl,--sort-section -Wl,alignment -Wl,--sort-common
28 nscd problems are not exactly unheard of. Over the years, there were
29 quite a bit of bugs in it. This leads people to invent babysitters
30 which restart crashed/hung nscd. This is ugly.
32 After looking at nscd source in glibc I arrived to the conclusion
33 that its design is contributing to this significantly. Even if nscd's
34 code is 100.00% perfect and bug-free, it can still suffer from bugs
35 in libraries it calls.
37 As designed, it's a multithreaded program which calls NSS libraries.
38 These libraries are not part of libc, they may be provided
39 by third-party projects (samba, ldap, you name it).
41 Thus nscd cannot be sure that libraries it calls do not have memory
42 or file descriptor leaks and other bugs.
44 Since nscd is multithreaded program with single shared cache,
45 any resource leak in any NSS library has cumulative effect.
46 Even if a NSS library leaks a file descriptor 0.01% of the time,
47 this will make nscd crash or hang after some time.
49 Of course bugs in NSS .so modules should be fixed, but meanwhile
50 I do want nscd which does not crash or lock up.
52 So I went ahead and wrote a replacement.
54 It is a single-threaded server process which offloads all NSS
55 lookups to worker children (not threads, but fully independent
56 processes). Cache hits are handled by parent. Only cache misses
57 start worker children. This design is immune against
58 resource leaks and hangs in NSS libraries.
60 It is also many times smaller.
62 Currently (v0.36) it emulates glibc nscd pretty closely
63 (handles same command line flags and config file), and is moderately tested.
65 Please note that as of 2008-08 it is not in wide use (yet?).
66 If you have trouble compiling it, see an incompatibility with
67 "standard" one or experience hangs/crashes, please report it to
68 vda.linux@googlemail.com
70 ***********************************************************************/
72 /* Make struct ucred appear in sys/socket.h */
74 /* For all good things */
91 #include <sys/socket.h>
93 #include <sys/types.h>
99 /* For inet_ntoa (for debug build only) */
100 #include <arpa/inet.h>
103 * 0.21 add SEGV reporting to worker
104 * 0.22 don't do freeaddrinfo() in GETAI worker, it's crashy
105 * 0.23 add parameter parsing
106 * 0.24 add conf file parsing, not using results yet
107 * 0.25 used some of conf file settings (not tested)
108 * 0.26 almost all conf file settings are wired up
109 * 0.27 a bit more of almost all conf file settings are wired up
110 * 0.28 optimized cache aging
111 * 0.29 implemented invalidate and shutdown options
112 * 0.30 fixed buglet (sizeof(ptr) != sizeof(array))
113 * 0.31 reduced client_info by one member
114 * 0.32 fix nttl/size defaults; simpler check for worker child in main()
115 * 0.33 tweak includes so that it builds on my new machine (64-bit userspace);
116 * do not die on unknown service name, just warn
117 * ("services" is a new service we don't support)
118 * 0.34 create /var/run/nscd/nscd.pid pidfile like glibc nscd 2.8 does;
119 * delay setuid'ing itself to server-user after log and pidfile are open
120 * 0.35 readlink /proc/self/exe and use result if execing /proc/self/exe fails
121 * 0.36 excercise extreme paranoia handling server-user option;
122 * a little bit more verbose logging:
123 * L_DEBUG2 log level added, use debug-level 7 to get it
124 * 0.37 users reported over-zealous "detected change in /etc/passwd",
125 * apparently stat() returns random garbage in unused padding
126 * on some systems. Made the check less paranoid.
127 * 0.38 log POLLHUP better
128 * 0.39 log answers to client better, log getpwnam in the worker,
129 * pass debug level value down to worker.
130 * 0.40 fix handling of shutdown and invalidate requests;
131 * fix bug with answer written in several pieces
132 * 0.40.1 set hints.ai_socktype = SOCK_STREAM in GETAI request
133 * 0.41 eliminate double caching of two near-simultaneous identical requests -
135 * 0.42 execute /proc/self/exe by link name first (better comm field)
136 * 0.43 fix off-by-one error in setgroups
137 * 0.44 make -d[ddd] bump up debug - easier to explain to users
138 * how to produce detailed log (no nscd.conf tweaking)
139 * 0.45 Fix out-of-bounds array access and log/pid file permissions -
140 * thanks to Sebastian Krahmer (krahmer AT suse.de)
141 * 0.46 fix a case when we forgot to remove a future entry on worker failure
142 * 0.47 fix nscd without -d to not bump debug level
143 * 0.48 fix for changes in __nss_disable_nscd API in glibc-2.15
144 * 0.49 minor tweaks to messages
145 * 0.50 add more files to watch for changes
146 * 0.51 fix a case where we forget to refcount-- the cached entry
148 #define PROGRAM_VERSION "0.51"
150 #define DEBUG_BUILD 1
157 #define ARRAY_SIZE(x) ((unsigned)(sizeof(x) / sizeof((x)[0])))
159 #define NORETURN __attribute__ ((__noreturn__))
162 #ifdef MY_CPU_HATES_CHARS
163 typedef int smallint;
165 typedef signed char smallint;
171 L_DEBUG = ((1 << 1) * DEBUG_BUILD),
172 L_DEBUG2 = ((1 << 2) * DEBUG_BUILD),
173 L_DUMP = ((1 << 3) * DEBUG_BUILD),
179 static smallint debug = D_DAEMON;
181 static void verror(const char *s, va_list p, const char *strerr)
184 int sz, rem, strerr_len;
188 if (debug & D_STAMP) {
189 gettimeofday(&tv, NULL);
190 sz = sprintf(msgbuf, "%02u:%02u:%02u.%05u ",
191 (unsigned)((tv.tv_sec / (60*60)) % 24),
192 (unsigned)((tv.tv_sec / 60) % 60),
193 (unsigned)(tv.tv_sec % 60),
194 (unsigned)(tv.tv_usec / 10));
196 rem = sizeof(msgbuf) - sz;
197 sz += vsnprintf(msgbuf + sz, rem, s, p);
198 rem = sizeof(msgbuf) - sz; /* can be negative after this! */
201 strerr_len = strlen(strerr);
202 if (rem >= strerr_len + 4) { /* ": STRERR\n\0" */
205 strcpy(msgbuf + sz, strerr);
214 fputs(msgbuf, stderr);
217 static void error(const char *msg, ...)
221 verror(msg, p, NULL);
225 static void error_and_die(const char *msg, ...) NORETURN;
226 static void error_and_die(const char *msg, ...)
230 verror(msg, p, NULL);
235 static void perror_and_die(const char *msg, ...) NORETURN;
236 static void perror_and_die(const char *msg, ...)
240 /* Guard against "<error message>: Success" */
241 verror(msg, p, errno ? strerror(errno) : NULL);
246 static void nscd_log(int mask, const char *msg, ...)
251 verror(msg, p, NULL);
256 #define log(lvl, ...) do { if (lvl) nscd_log(lvl, __VA_ARGS__); } while (0)
259 static void dump(const void *ptr, int len)
262 const unsigned char *buf;
265 if (!(debug & L_DUMP))
270 int chunk = ((len >= 16) ? 16 : len);
272 "%02x %02x %02x %02x %02x %02x %02x %02x "
273 "%02x %02x %02x %02x %02x %02x %02x %02x " + (16-chunk) * 5,
274 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7],
275 buf[8], buf[9],buf[10],buf[11],buf[12],buf[13],buf[14],buf[15]
277 fprintf(stderr, "%*s", (16-chunk) * 3, "");
281 unsigned char c = *buf++;
282 *p++ = (c >= 32 && c < 127 ? c : '.');
290 void dump(const void *ptr, int len);
293 #define hex_dump(p,n) do { if (L_DUMP) dump(p,n); } while (0)
295 static int xopen3(const char *pathname, int flags, int mode)
297 int fd = open(pathname, flags, mode);
299 perror_and_die("open");
303 static void xpipe(int *fds)
306 perror_and_die("pipe");
309 static void xexecve(const char *filename, char **argv, char **envp) NORETURN;
310 static void xexecve(const char *filename, char **argv, char **envp)
312 execve(filename, argv, envp);
313 perror_and_die("cannot re-exec %s", filename);
316 static void ndelay_on(int fd)
318 int fl = fcntl(fd, F_GETFL);
320 perror_and_die("F_GETFL");
321 if (fcntl(fd, F_SETFL, fl | O_NONBLOCK) < 0)
322 perror_and_die("setting O_NONBLOCK");
325 static void close_on_exec(int fd)
327 if (fcntl(fd, F_SETFD, FD_CLOEXEC) < 0)
328 perror_and_die("setting FD_CLOEXEC");
331 static unsigned monotonic_ms(void)
334 if (syscall(__NR_clock_gettime, CLOCK_MONOTONIC, &ts))
335 perror_and_die("clock_gettime(MONOTONIC)");
336 return ts.tv_sec * 1000 + ts.tv_nsec / 1000000;
339 static unsigned strsize(const char *str)
341 return strlen(str) + 1;
344 static unsigned strsize_aligned4(const char *str)
346 return (strlen(str) + 1 + 3) & (~3);
349 static ssize_t safe_read(int fd, void *buf, size_t count)
353 n = read(fd, buf, count);
354 } while (n < 0 && errno == EINTR);
358 static ssize_t full_read(int fd, void *buf, size_t len)
364 cc = safe_read(fd, buf, len);
366 return cc; /* read() returns -1 on failure. */
369 buf = ((char *)buf) + cc;
377 static void xsafe_read(int fd, void *buf, size_t len)
379 if (len != safe_read(fd, buf, len))
380 perror_and_die("short read");
382 static void xfull_read(int fd, void *buf, size_t len)
384 if (len != full_read(fd, buf, len))
385 perror_and_die("short read");
389 static ssize_t safe_write(int fd, const void *buf, size_t count)
393 n = write(fd, buf, count);
394 } while (n < 0 && errno == EINTR);
398 static ssize_t full_write(int fd, const void *buf, size_t len)
405 cc = safe_write(fd, buf, len);
407 return cc; /* write() returns -1 on failure. */
409 buf = ((const char *)buf) + cc;
415 static void xsafe_write(int fd, const void *buf, size_t count)
417 if (count != safe_write(fd, buf, count))
418 perror_and_die("short write of %ld bytes", (long)count);
420 static void xfull_write(int fd, const void *buf, size_t count)
422 if (count != full_write(fd, buf, count))
423 perror_and_die("short write of %ld bytes", (long)count);
426 static void xmovefd(int from_fd, int to_fd)
428 if (from_fd != to_fd) {
429 if (dup2(from_fd, to_fd) < 0)
430 perror_and_die("dup2");
435 static unsigned getnum(const char *str)
437 if (str[0] >= '0' && str[0] <= '9') {
439 unsigned long l = strtoul(str, &p, 10);
440 /* must not overflow int even after x1000 */
441 if (!*p && l <= INT_MAX / 1000)
444 error_and_die("malformed or too big number '%s'", str);
447 static char *skip_whitespace(const char *s)
449 /* NB: isspace('\0') returns 0 */
450 while (isspace(*s)) ++s;
454 static char *skip_non_whitespace(const char *s)
456 while (*s && !isspace(*s)) ++s;
460 static void *xmalloc(unsigned sz)
462 void *p = malloc(sz);
464 error_and_die("out of memory");
468 static void *xzalloc(unsigned sz)
470 void *p = xmalloc(sz);
475 static void *xrealloc(void *p, unsigned size)
477 p = realloc(p, size);
479 error_and_die("out of memory");
483 static const char *xstrdup(const char *str)
485 const char *p = strdup(str);
487 error_and_die("out of memory");
502 static const char srv_name[3][7] = {
511 smallint srv_enable[3];
512 smallint check_files[3];
517 /* We try to closely mimic glibc nscd */
518 .logfile = NULL, /* default is to not have a log file */
520 .srv_enable = { 0, 0, 0 },
521 .check_files = { 1, 1, 1 },
522 .pttl = { 3600, 3600, 3600 },
523 .nttl = { 20, 60, 20 },
524 /* huh, what is the default cache size in glibc nscd? */
525 .size = { 256 * 8 / 3, 256 * 8 / 3, 256 * 8 / 3 },
528 static const char default_conffile[] = "/etc/nscd.conf";
529 static const char *self_exe_points_to = "/proc/self/exe";
533 ** Clients, workers machinery
536 /* Header common to all requests */
537 #define USER_REQ_STRUCT \
538 uint32_t version; /* Version number of the daemon interface */ \
539 uint32_t type; /* Service requested */ \
540 uint32_t key_len; /* Key length */
542 typedef struct user_req_header {
548 MAX_USER_REQ_SIZE = 1024,
549 USER_HDR_SIZE = sizeof(user_req_header),
550 /* DNS queries time out after 20 seconds,
551 * we will allow for a bit more */
552 WORKER_TIMEOUT_SEC = 30,
553 CLIENT_TIMEOUT_MS = 100,
554 SMALL_POLL_TIMEOUT_MS = 200,
557 typedef struct user_req {
559 struct { /* as came from client */
562 struct { /* when stored in cache, overlaps .version */
564 /* (timestamp24 * 256) == timestamp in ms */
565 unsigned timestamp24:24;
568 char reqbuf[MAX_USER_REQ_SIZE - USER_HDR_SIZE];
571 /* Compile-time check for correct size */
572 struct BUG_wrong_user_req_size {
573 char BUG_wrong_user_req_size[sizeof(user_req) == MAX_USER_REQ_SIZE ? 1 : -1];
585 SHUTDOWN, /* Shut the server down */
586 GETSTAT, /* Get the server statistic */
587 INVALIDATE, /* Invalidate one special cache */
599 static const char *const typestr[] = {
600 "GETPWBYNAME", /* done */
601 "GETPWBYUID", /* done */
602 "GETGRBYNAME", /* done */
603 "GETGRBYGID", /* done */
604 "GETHOSTBYNAME", /* done */
605 "GETHOSTBYNAMEv6", /* done */
606 "GETHOSTBYADDR", /* done */
607 "GETHOSTBYADDRv6", /* done */
608 "SHUTDOWN", /* done */
609 "GETSTAT", /* info? */
610 "INVALIDATE", /* done */
611 /* won't do: nscd passes a name of shmem segment
612 * which client can map and "see" the db */
614 "GETFDGR", /* won't do */
615 "GETFDHST", /* won't do */
617 "INITGROUPS", /* done */
618 "GETSERVBYNAME", /* prio 3 (no caching?) */
619 "GETSERVBYPORT", /* prio 3 (no caching?) */
620 "GETFDSERV" /* won't do */
623 extern const char *const typestr[];
625 static const smallint type_to_srv[] = {
626 [GETPWBYNAME ] = SRV_PASSWD,
627 [GETPWBYUID ] = SRV_PASSWD,
628 [GETGRBYNAME ] = SRV_GROUP,
629 [GETGRBYGID ] = SRV_GROUP,
630 [GETHOSTBYNAME ] = SRV_HOSTS,
631 [GETHOSTBYNAMEv6 ] = SRV_HOSTS,
632 [GETHOSTBYADDR ] = SRV_HOSTS,
633 [GETHOSTBYADDRv6 ] = SRV_HOSTS,
634 [GETAI ] = SRV_HOSTS,
635 [INITGROUPS ] = SRV_GROUP,
638 static int unsupported_ureq_type(unsigned type)
640 if (type == GETAI) return 0;
641 if (type == INITGROUPS) return 0;
642 if (type == GETSTAT) return 1;
643 if (type > INVALIDATE) return 1;
648 typedef struct client_info {
649 /* if client_fd != 0, we are waiting for the reply from worker
650 * on pfd[i].fd, and client_fd is saved client's fd
651 * (we need to put it back into pfd[i].fd later) */
653 unsigned bytecnt; /* bytes read from client */
654 unsigned bufidx; /* buffer# in global client_buf[] */
656 unsigned respos; /* response */
657 user_req *resptr; /* response */
658 user_req **cache_pp; /* cache entry address */
659 user_req *ureq; /* request (points to client_buf[x]) */
662 static unsigned g_now_ms;
663 static int min_closed = INT_MAX;
664 static int cnt_closed = 0;
665 static int num_clients = 2; /* two listening sockets are "clients" too */
667 /* We read up to max_reqnum requests in parallel */
668 static unsigned max_reqnum = 14;
670 /* To be allocated at init to become client_buf[max_reqnum][MAX_USER_REQ_SIZE].
671 * Note: it is a pointer to [MAX_USER_REQ_SIZE] arrays,
672 * not [MAX_USER_REQ_SIZE] array of pointers.
674 static char (*client_buf)[MAX_USER_REQ_SIZE];
675 static char *busy_cbuf;
676 static struct pollfd *pfd;
677 static client_info *cinfo;
679 /* Request, response and cache data structures:
681 * cache[] (defined later):
682 * cacheline_t cache[cache_size] array, or in other words,
683 * user_req* cache[cache_size][8] array.
684 * Every client request is hashed, hash value determines which cache[x]
685 * will have the response stored in one of its 8 elements.
686 * Cache entries have this format: request, then padding to 32 bits,
688 * Addresses in cache[x][y] may be NULL or:
689 * (&client_buf[z]) & 1: the cache miss is in progress ("future entry"):
690 * "the data is not in the cache (yet), wait for it to appear"
691 * (&client_buf[z]) & 3: the cache miss is in progress and other clients
692 * also want the same data ("shared future entry")
693 * else (non-NULL but low two bits are 0): cached data in malloc'ed block
695 * Each of these is a [max_reqnum] sized array:
696 * pfd[i] - given to poll() to wait for requests and replies.
697 * .fd: first two pfd[i]: listening Unix domain sockets, else
698 * .fd: open fd to a client, for reading client's request, or
699 * .fd: open fd to a worker, to send request and get response back
700 * cinfo[i] - auxiliary client data for pfd[i]
701 * .client_fd: open fd to a client, in case we already had read its
702 * request and got a cache miss, and created a worker or
703 * wait for another client's worker.
704 * Otherwise, it's 0 and client's fd is in pfd[i].fd
705 * .bufidx: index in client_buf[] we store client's request in
706 * .ureq: = client_buf[bufidx]
707 * .bytecnt: size of the request
708 * .started_ms: used to time out unresponsive clients
709 * .resptr: initially NULL. Later, same as cache[x][y] pointer to a cached
710 * response, or (a rare case) a "fake cache" entry:
711 * all cache[hash(request)][0..7] blocks were found busy,
712 * the result won't be cached.
713 * .respos: "write-out to client" offset
714 * .cache_pp: initially NULL. Later, &cache[x][y] where the response is,
715 * or will be stored. Remains NULL if "fake cache" entry is in use
717 * When a client has received its reply (or otherwise closed (timeout etc)),
718 * corresponding pfd[i] and cinfo[i] are removed by shifting [i+1], [i+2] etc
719 * elements down, so that both arrays never have free holes.
720 * [num_clients] is always the first free element.
722 * Each of these also is a [max_reqnum] sized array, but indexes
723 * do not correspond directly to pfd[i] and cinfo[i]:
724 * client_buf[n][MAX_USER_REQ_SIZE] - buffers we read client requests into
725 * busy_cbuf[n] - bool flags marking busy client_buf[]
727 /* Possible reductions:
728 * fd, bufidx - uint8_t
729 * started_ms -> uint16_t started_s
730 * ureq - eliminate (derivable from bufidx?)
733 /* Are special bits 0? is it a true cached entry? */
734 #define CACHED_ENTRY(p) ( ((long)(p) & 3) == 0 )
735 /* Are special bits 11? is it a shared future cache entry? */
736 #define CACHE_SHARED(p) ( ((long)(p) & 3) == 3 )
737 /* Return a ptr with special bits cleared (used for accessing data) */
738 #define CACHE_PTR(p) ( (void*) ((long)(p) & ~(long)3) )
739 /* Return a ptr with special bits set to x1: make future cache entry ptr */
740 #define MAKE_FUTURE_PTR(p) ( (void*) ((long)(p) | 1) )
741 /* Modify ptr, set special bits to 11: shared future cache entry */
742 #define MARK_PTR_SHARED(pp) ( *(long*)(pp) |= 3 )
744 static inline unsigned ureq_size(const user_req *ureq)
746 return sizeof(user_req_header) + ureq->key_len;
749 static unsigned cache_age(const user_req *ureq)
751 if (!CACHED_ENTRY(ureq))
753 return (uint32_t) (g_now_ms - (ureq->timestamp24 << 8));
756 static void set_cache_timestamp(user_req *ureq)
758 ureq->timestamp24 = g_now_ms >> 8;
761 static int alloc_buf_no(void)
766 next_buf = (next_buf + 1) % max_reqnum;
767 if (!busy_cbuf[cur]) {
771 } while (next_buf != n);
772 error_and_die("no free bufs?!");
775 static inline void *bufno2buf(int i)
777 return client_buf[i];
780 static void free_refcounted_ureq(user_req **ureqp);
782 static void close_client(unsigned i)
784 log(L_DEBUG, "closing client %u (fd %u,%u)", i, pfd[i].fd, cinfo[i].client_fd);
785 /* Paranoia. We had nasty bugs where client was closed twice. */
790 if (cinfo[i].client_fd && cinfo[i].client_fd != pfd[i].fd)
791 close(cinfo[i].client_fd);
792 pfd[i].fd = 0; /* flag as unused (coalescing needs this) */
793 busy_cbuf[cinfo[i].bufidx] = 0;
795 if (cinfo[i].cache_pp == NULL) {
796 user_req *resptr = cinfo[i].resptr;
798 log(L_DEBUG, "client %u: freeing fake cache entry %p", i, resptr);
802 /* Most of the time, it is not freed here,
803 * only refcounted--. Freeing happens
804 * if it was deleted from cache[] but retained
807 free_refcounted_ureq(&cinfo[i].resptr);
817 ** nscd API <-> C API conversion
820 typedef struct response_header {
821 uint32_t version_or_size;
826 typedef struct initgr_response_header {
827 uint32_t version_or_size;
830 /* code assumes gid_t == int32, let's check that */
831 int32_t gid[sizeof(gid_t) == sizeof(int32_t) ? 0 : -1];
832 /* char user_str[as_needed]; */
833 } initgr_response_header;
835 static initgr_response_header *obtain_initgroups(const char *username)
837 struct initgr_response_header *resp;
839 enum { MAGIC_OFFSET = sizeof(*resp) / sizeof(int32_t) };
843 pw = getpwnam(username);
846 resp->version_or_size = sizeof(*resp);
852 /* getgrouplist may be very expensive, it's much better to allocate
853 * a bit more than to run getgrouplist twice */
857 sz = sizeof(*resp) + sizeof(resp->gid[0]) * ngroups;
858 resp = xrealloc(resp, sz);
859 } while (getgrouplist(username, pw->pw_gid, (gid_t*) &resp->gid, &ngroups) == -1);
860 log(L_DEBUG, "ngroups=%d", ngroups);
862 sz = sizeof(*resp) + sizeof(resp->gid[0]) * ngroups;
863 /* resp = xrealloc(resp, sz); - why bother */
864 resp->version_or_size = sz;
866 resp->ngrps = ngroups;
871 typedef struct pw_response_header {
872 uint32_t version_or_size;
875 int32_t pw_passwd_len;
878 int32_t pw_gecos_len;
880 int32_t pw_shell_len;
881 /* char pw_name[pw_name_len]; */
882 /* char pw_passwd[pw_passwd_len]; */
883 /* char pw_gecos[pw_gecos_len]; */
884 /* char pw_dir[pw_dir_len]; */
885 /* char pw_shell[pw_shell_len]; */
886 } pw_response_header;
888 static pw_response_header *marshal_passwd(struct passwd *pw)
891 pw_response_header *resp;
892 unsigned pw_name_len;
893 unsigned pw_passwd_len;
894 unsigned pw_gecos_len;
896 unsigned pw_shell_len;
897 unsigned sz = sizeof(*resp);
899 sz += (pw_name_len = strsize(pw->pw_name));
900 sz += (pw_passwd_len = strsize(pw->pw_passwd));
901 sz += (pw_gecos_len = strsize(pw->pw_gecos));
902 sz += (pw_dir_len = strsize(pw->pw_dir));
903 sz += (pw_shell_len = strsize(pw->pw_shell));
906 resp->version_or_size = sz;
912 resp->pw_name_len = pw_name_len;
913 resp->pw_passwd_len = pw_passwd_len;
914 resp->pw_uid = pw->pw_uid;
915 resp->pw_gid = pw->pw_gid;
916 resp->pw_gecos_len = pw_gecos_len;
917 resp->pw_dir_len = pw_dir_len;
918 resp->pw_shell_len = pw_shell_len;
919 p = (char*)(resp + 1);
920 strcpy(p, pw->pw_name); p += pw_name_len;
921 strcpy(p, pw->pw_passwd); p += pw_passwd_len;
922 strcpy(p, pw->pw_gecos); p += pw_gecos_len;
923 strcpy(p, pw->pw_dir); p += pw_dir_len;
924 strcpy(p, pw->pw_shell); p += pw_shell_len;
925 log(L_DEBUG, "sz:%u realsz:%u", sz, p - (char*)resp);
930 typedef struct gr_response_header {
931 uint32_t version_or_size;
933 int32_t gr_name_len; /* strlen(gr->gr_name) + 1; */
934 int32_t gr_passwd_len; /* strlen(gr->gr_passwd) + 1; */
935 int32_t gr_gid; /* gr->gr_gid */
936 int32_t gr_mem_cnt; /* while (gr->gr_mem[gr_mem_cnt]) ++gr_mem_cnt; */
937 /* int32_t gr_mem_len[gr_mem_cnt]; */
938 /* char gr_name[gr_name_len]; */
939 /* char gr_passwd[gr_passwd_len]; */
940 /* char gr_mem[gr_mem_cnt][gr_mem_len[i]]; */
941 /* char gr_gid_str[as_needed]; - huh? */
942 /* char orig_key[as_needed]; - needed?? I don't do this ATM... */
944 glibc adds gr_gid_str, but client doesn't get/use it:
945 writev(3, [{"\2\0\0\0\2\0\0\0\5\0\0\0", 12}, {"root\0", 5}], 2) = 17
946 poll([{fd=3, events=POLLIN|POLLERR|POLLHUP, revents=POLLIN}], 1, 5000) = 1
947 read(3, "\2\0\0\0\1\0\0\0\10\0\0\0\4\0\0\0\0\0\0\0\0\0\0\0", 24) = 24
948 readv(3, [{"", 0}, {"root\0\0\0\0\0\0\0\0", 12}], 2) = 12
951 } gr_response_header;
953 static gr_response_header *marshal_group(struct group *gr)
956 gr_response_header *resp;
958 unsigned sz = sizeof(*resp);
960 sz += strsize(gr->gr_name);
961 sz += strsize(gr->gr_passwd);
963 while (gr->gr_mem[gr_mem_cnt]) {
964 sz += strsize(gr->gr_mem[gr_mem_cnt]);
967 /* for int32_t gr_mem_len[gr_mem_cnt]; */
968 sz += gr_mem_cnt * sizeof(int32_t);
971 resp->version_or_size = sz;
977 resp->gr_name_len = strsize(gr->gr_name);
978 resp->gr_passwd_len = strsize(gr->gr_passwd);
979 resp->gr_gid = gr->gr_gid;
980 resp->gr_mem_cnt = gr_mem_cnt;
981 p = (char*)(resp + 1);
982 /* int32_t gr_mem_len[gr_mem_cnt]; */
984 while (gr->gr_mem[gr_mem_cnt]) {
985 *(uint32_t*)p = strsize(gr->gr_mem[gr_mem_cnt]);
989 /* char gr_name[gr_name_len]; */
990 strcpy(p, gr->gr_name);
991 p += strsize(gr->gr_name);
992 /* char gr_passwd[gr_passwd_len]; */
993 strcpy(p, gr->gr_passwd);
994 p += strsize(gr->gr_passwd);
995 /* char gr_mem[gr_mem_cnt][gr_mem_len[i]]; */
997 while (gr->gr_mem[gr_mem_cnt]) {
998 strcpy(p, gr->gr_mem[gr_mem_cnt]);
999 p += strsize(gr->gr_mem[gr_mem_cnt]);
1002 log(L_DEBUG, "sz:%u realsz:%u", sz, p - (char*)resp);
1007 typedef struct hst_response_header {
1008 uint32_t version_or_size;
1011 int32_t h_aliases_cnt;
1012 int32_t h_addrtype; /* AF_INET or AF_INET6 */
1013 int32_t h_length; /* 4 or 16 */
1014 int32_t h_addr_list_cnt;
1016 /* char h_name[h_name_len]; - we pad it to 4 bytes */
1017 /* uint32_t h_aliases_len[h_aliases_cnt]; */
1018 /* char h_addr_list[h_addr_list_cnt][h_length]; - every one is the same size [h_length] (4 or 16) */
1019 /* char h_aliases[h_aliases_cnt][h_aliases_len[i]]; */
1020 } hst_response_header;
1022 static hst_response_header *marshal_hostent(struct hostent *h)
1025 hst_response_header *resp;
1026 unsigned h_name_len;
1027 unsigned h_aliases_cnt;
1028 unsigned h_addr_list_cnt;
1029 unsigned sz = sizeof(*resp);
1031 /* char h_name[h_name_len] */
1032 sz += h_name_len = strsize_aligned4(h->h_name);
1033 h_addr_list_cnt = 0;
1034 while (h->h_addr_list[h_addr_list_cnt]) {
1037 /* char h_addr_list[h_addr_list_cnt][h_length] */
1038 sz += h_addr_list_cnt * h->h_length;
1040 while (h->h_aliases[h_aliases_cnt]) {
1041 /* char h_aliases[h_aliases_cnt][h_aliases_len[i]] */
1042 sz += strsize(h->h_aliases[h_aliases_cnt]);
1045 /* uint32_t h_aliases_len[h_aliases_cnt] */
1046 sz += h_aliases_cnt * 4;
1049 resp->version_or_size = sz;
1051 /*resp->found = 0;*/
1052 resp->error = HOST_NOT_FOUND;
1056 resp->h_name_len = h_name_len;
1057 resp->h_aliases_cnt = h_aliases_cnt;
1058 resp->h_addrtype = h->h_addrtype;
1059 resp->h_length = h->h_length;
1060 resp->h_addr_list_cnt = h_addr_list_cnt;
1061 /*resp->error = 0;*/
1062 p = (char*)(resp + 1);
1063 /* char h_name[h_name_len]; */
1064 strcpy(p, h->h_name);
1066 /* uint32_t h_aliases_len[h_aliases_cnt]; */
1068 while (h->h_aliases[h_aliases_cnt]) {
1069 *(uint32_t*)p = strsize(h->h_aliases[h_aliases_cnt]);
1073 /* char h_addr_list[h_addr_list_cnt][h_length]; */
1074 h_addr_list_cnt = 0;
1075 while (h->h_addr_list[h_addr_list_cnt]) {
1076 memcpy(p, h->h_addr_list[h_addr_list_cnt], h->h_length);
1080 /* char h_aliases[h_aliases_cnt][h_aliases_len[i]]; */
1082 while (h->h_aliases[h_aliases_cnt]) {
1083 strcpy(p, h->h_aliases[h_aliases_cnt]);
1084 p += strsize(h->h_aliases[h_aliases_cnt]);
1087 log(L_DEBUG, "sz:%u realsz:%u", sz, p - (char*)resp);
1092 /* Reply to addrinfo query */
1093 typedef struct ai_response_header {
1094 uint32_t version_or_size;
1100 /* char ai_addr[naddrs][4 or 16]; - addrslen bytes in total */
1101 /* char ai_family[naddrs]; - AF_INET[6] each (determines ai_addr[i] length) */
1102 /* char ai_canonname[canonlen]; */
1103 } ai_response_header;
1105 static ai_response_header *obtain_addrinfo(const char *hostname)
1107 struct addrinfo hints;
1108 struct addrinfo *ai;
1109 struct addrinfo *ap;
1110 ai_response_header *resp;
1114 unsigned naddrs = 0;
1115 unsigned addrslen = 0;
1116 unsigned canonlen = 0;
1118 memset(&hints, 0, sizeof(hints));
1119 hints.ai_flags = AI_CANONNAME;
1120 /* kills dups (one for each possible SOCK_xxx) */
1121 /* this matches glibc behavior */
1122 hints.ai_socktype = SOCK_STREAM;
1123 ai = NULL; /* on failure getaddrinfo may leave it as-is */
1124 err = getaddrinfo(hostname, NULL, &hints, &ai);
1128 if (ai->ai_canonname)
1129 sz += canonlen = strsize(ai->ai_canonname);
1133 addrslen += (ap->ai_family == AF_INET ? 4 : 16);
1136 sz += naddrs + addrslen;
1139 resp->version_or_size = sz;
1142 /*resp->found = 0;*/
1146 resp->naddrs = naddrs;
1147 resp->addrslen = addrslen;
1148 resp->canonlen = canonlen;
1149 p = (char*)(resp + 1);
1150 family = p + addrslen;
1153 /* char ai_family[naddrs]; */
1154 *family++ = ap->ai_family;
1155 /* char ai_addr[naddrs][4 or 16]; */
1156 if (ap->ai_family == AF_INET) {
1157 memcpy(p, &(((struct sockaddr_in*)(ap->ai_addr))->sin_addr), 4);
1160 memcpy(p, &(((struct sockaddr_in6*)(ap->ai_addr))->sin6_addr), 16);
1165 /* char ai_canonname[canonlen]; */
1166 if (ai->ai_canonname)
1167 strcpy(family, ai->ai_canonname);
1168 log(L_DEBUG, "sz:%u realsz:%u", sz, family + strsize(ai->ai_canonname) - (char*)resp);
1170 /* glibc 2.3.6 segfaults here sometimes
1171 * (maybe my mistake, fixed by "ai = NULL;" above).
1172 * Since we are in worker and are going to exit anyway, why bother? */
1173 /*freeaddrinfo(ai);*/
1182 /* one 8-element "cacheline" */
1183 typedef user_req *cacheline_t[8];
1184 static unsigned cache_size;
1185 /* Points to cacheline_t cache[cache_size] array, or in other words,
1186 * points to user_req* cache[cache_size][8] array */
1187 static cacheline_t *cache;
1188 static unsigned cached_cnt;
1189 static unsigned cache_access_cnt = 1; /* prevent division by zero */
1190 static unsigned cache_hit_cnt = 1;
1191 static unsigned last_age_time;
1192 static unsigned aging_interval_ms;
1193 static unsigned min_aging_interval_ms;
1195 static response_header *ureq_response(user_req *ureq)
1197 /* Skip query part, find answer part
1198 * (answer is 32-bit aligned) */
1199 return (void*) ((char*)ureq + ((ureq_size(ureq) + 3) & ~3));
1202 /* This hash is supposed to be good for short textual data */
1203 static uint32_t bernstein_hash(void *p, unsigned sz, uint32_t hash)
1207 hash = (32 * hash + hash) ^ *key++;
1212 static void free_refcounted_ureq(user_req **ureqp)
1214 user_req *ureq = *ureqp;
1216 if (!CACHED_ENTRY(ureq))
1219 if (ureq->refcount) {
1221 log(L_DEBUG2, "--%p.refcount=%u", ureq, ureq->refcount);
1223 log(L_DEBUG2, "%p.refcount=0, freeing", ureq);
1229 static user_req **lookup_in_cache(user_req *ureq)
1231 user_req **cacheline;
1235 unsigned ureq_sz = ureq_size(ureq);
1237 /* prevent overflow and division by zero */
1239 if ((int)cache_access_cnt < 0) {
1240 cache_access_cnt = (cache_access_cnt >> 1) + 1;
1241 cache_hit_cnt = (cache_hit_cnt >> 1) + 1;
1244 hash = bernstein_hash(&ureq->key_len, ureq_sz - offsetof(user_req, key_len), ureq->type);
1245 log(L_DEBUG2, "hash:%08x", hash);
1246 hash = hash % cache_size;
1247 cacheline = cache[hash];
1250 for (i = 0; i < 8; i++) {
1251 user_req *cached = CACHE_PTR(cacheline[i]);
1253 if (free_cache == -1)
1257 /* ureq->version is always 2 and is reused in cache
1258 * for other purposes, we need to skip it here */
1259 if (memcmp(&ureq->type, &cached->type, ureq_sz - offsetof(user_req, type)) == 0) {
1260 log(L_DEBUG, "found in cache[%u][%u]", hash, i);
1262 return &cacheline[i];
1266 if (free_cache >= 0) {
1269 log(L_DEBUG, "not found, using free cache[%u][%u]", hash, i);
1273 unsigned oldest_idx = 0;
1274 unsigned oldest_age = 0;
1275 for (i = 0; i < 8; i++) {
1276 unsigned age = cache_age(cacheline[i]);
1277 if (age > oldest_age) {
1282 if (oldest_age == 0) {
1283 /* All entries in cacheline are "future" entries!
1284 * This is very unlikely, but we must still work correctly.
1285 * We call this "fake cache entry".
1286 * The data will be "cached" only for the duration
1287 * of this client's request lifetime.
1289 log(L_DEBUG, "not found, and cache[%u] is full: using fake cache entry", hash);
1293 log(L_DEBUG, "not found, freeing and reusing cache[%u][%u] (age %u)", hash, i, oldest_age);
1294 free_refcounted_ureq(&cacheline[i]);
1297 cacheline[i] = MAKE_FUTURE_PTR(ureq);
1298 return &cacheline[i];
1301 static void age_cache(unsigned free_all, int srv)
1303 user_req **cp = *cache;
1305 unsigned sv = cached_cnt;
1307 log(L_DEBUG, "aging cache, srv:%d, free_all:%u", srv, free_all);
1308 if (srv == -1 || free_all)
1309 aging_interval_ms = INT_MAX;
1312 user_req *cached = *cp;
1313 if (CACHED_ENTRY(cached) && cached != NULL) {
1314 int csrv = type_to_srv[cached->type];
1315 if (srv == -1 || srv == csrv) {
1318 free_refcounted_ureq(cp);
1320 unsigned age = cache_age(cached);
1321 response_header *resp = ureq_response(cached);
1322 unsigned ttl = (resp->found ? config.pttl : config.nttl)[csrv];
1324 log(L_DEBUG2, "freeing: age %u positive %d ttl %u", age, resp->found, ttl);
1326 free_refcounted_ureq(cp);
1327 } else if (srv == -1) {
1329 if (aging_interval_ms > ttl)
1330 aging_interval_ms = ttl;
1337 log(L_INFO, "aged cache, freed:%u, remain:%u", sv - cached_cnt, cached_cnt);
1338 log(L_DEBUG2, "aging interval now %u ms", aging_interval_ms);
1346 /* Spawns a worker and feeds it with user query on stdin */
1347 /* Returns stdout fd of the worker, in blocking mode */
1348 static int create_and_feed_worker(user_req *ureq)
1354 } to_child, to_parent;
1356 /* NB: these pipe fds are in blocking mode and non-CLOEXECed */
1357 xpipe(&to_child.rd);
1358 xpipe(&to_parent.rd);
1361 if (pid < 0) /* error */
1362 perror_and_die("vfork");
1363 if (!pid) { /* child */
1364 char param[sizeof(int)*3 + 2];
1368 close(to_parent.rd);
1369 xmovefd(to_child.rd, 0);
1370 xmovefd(to_parent.wr, 1);
1371 sprintf(param, "%u", debug);
1372 argv[0] = (char*) "worker_nscd";
1375 /* Re-exec ourself, cleaning up all allocated memory.
1376 * fds in parent are marked CLOEXEC and will be closed too
1378 /* Try link name first: it's better to have comm field
1379 * of "nscd" than "exe" (pgrep reported to fail to find us
1380 * by name when comm field contains "exe") */
1381 execve(self_exe_points_to, argv, argv+2);
1382 xexecve("/proc/self/exe", argv, argv+2);
1387 close(to_parent.wr);
1388 /* We do not expect child to block for any noticeably long time,
1389 * and also we expect write to be one-piece one:
1390 * ureq size is <= 1k and pipes are guaranteed to accept
1391 * at least PIPE_BUF at once */
1392 xsafe_write(to_child.wr, ureq, ureq_size(ureq));
1395 close_on_exec(to_parent.rd);
1396 return to_parent.rd;
1399 static user_req *worker_ureq;
1402 static const char *req_str(unsigned type, const char *buf)
1404 if (type == GETHOSTBYADDR) {
1406 in.s_addr = *((uint32_t*)buf);
1407 return inet_ntoa(in);
1409 if (type == GETHOSTBYADDRv6) {
1415 const char *req_str(unsigned type, const char *buf);
1418 static void worker_signal_handler(int sig)
1421 log(L_INFO, "worker:%d got sig:%d while handling req "
1422 "type:%d(%s) key_len:%d '%s'",
1424 worker_ureq->type, typestr[worker_ureq->type],
1425 worker_ureq->key_len,
1426 req_str(worker_ureq->type, worker_ureq->reqbuf)
1429 log(L_INFO, "worker:%d got sig:%d while handling req "
1430 "type:%d key_len:%d",
1432 worker_ureq->type, worker_ureq->key_len);
1437 static void worker(const char *param) NORETURN;
1438 static void worker(const char *param)
1443 debug = atoi(param);
1445 worker_ureq = &ureq; /* for signal handler */
1447 /* Make sure we won't hang, but rather die */
1448 if (WORKER_TIMEOUT_SEC)
1449 alarm(WORKER_TIMEOUT_SEC);
1451 /* NB: fds 0, 1 are in blocking mode */
1453 /* We block here (for a short time) */
1454 /* Due to ureq size < PIPE_BUF read is atomic */
1455 /* No error or size checking: we trust the parent */
1456 safe_read(0, &ureq, sizeof(ureq));
1458 signal(SIGSEGV, worker_signal_handler);
1459 signal(SIGBUS, worker_signal_handler);
1460 signal(SIGILL, worker_signal_handler);
1461 signal(SIGFPE, worker_signal_handler);
1462 signal(SIGABRT, worker_signal_handler);
1464 signal(SIGSTKFLT, worker_signal_handler);
1467 if (ureq.type == GETHOSTBYNAME
1468 || ureq.type == GETHOSTBYNAMEv6
1470 resp = marshal_hostent(
1471 ureq.type == GETHOSTBYNAME
1472 ? gethostbyname(ureq.reqbuf)
1473 : gethostbyname2(ureq.reqbuf, AF_INET6)
1475 } else if (ureq.type == GETHOSTBYADDR
1476 || ureq.type == GETHOSTBYADDRv6
1478 resp = marshal_hostent(gethostbyaddr(ureq.reqbuf, ureq.key_len,
1479 (ureq.type == GETHOSTBYADDR ? AF_INET : AF_INET6)
1481 } else if (ureq.type == GETPWBYNAME) {
1483 log(L_DEBUG2, "getpwnam('%s')", ureq.reqbuf);
1484 pw = getpwnam(ureq.reqbuf);
1485 log(L_DEBUG2, "getpwnam result:%p", pw);
1486 resp = marshal_passwd(pw);
1487 } else if (ureq.type == GETPWBYUID) {
1488 resp = marshal_passwd(getpwuid(atoi(ureq.reqbuf)));
1489 } else if (ureq.type == GETGRBYNAME) {
1490 struct group *gr = getgrnam(ureq.reqbuf);
1491 resp = marshal_group(gr);
1492 } else if (ureq.type == GETGRBYGID) {
1493 struct group *gr = getgrgid(atoi(ureq.reqbuf));
1494 resp = marshal_group(gr);
1495 } else if (ureq.type == GETAI) {
1496 resp = obtain_addrinfo(ureq.reqbuf);
1497 } else /*if (ureq.type == INITGROUPS)*/ {
1498 resp = obtain_initgroups(ureq.reqbuf);
1501 if (!((response_header*)resp)->found) {
1502 /* Parent knows about this special case */
1503 xfull_write(1, resp, 8);
1505 /* Responses can be big (getgrnam("guest") on a big user db),
1506 * we cannot rely on them being atomic. full_write loops
1508 xfull_write(1, resp, ((response_header*)resp)->version_or_size);
1518 static const char *const checked_filenames[] = {
1519 /* Note: compiler adds another \0 byte at the end of each array element,
1520 * so there are TWO \0's there.
1522 [SRV_PASSWD] = "/etc/passwd\0" "/etc/passwd.cache\0" "/etc/shadow\0",
1523 [SRV_GROUP] = "/etc/group\0" "/etc/group.cache\0",
1524 [SRV_HOSTS] = "/etc/hosts\0" "/etc/hosts.cache\0" "/etc/resolv.conf\0" "/etc/nsswitch.conf\0",
1525 /* ("foo.cache" files are maintained by libnss-cache) */
1528 static long checked_status[ARRAY_SIZE(checked_filenames)];
1530 static void check_files(int srv)
1533 const char *file = checked_filenames[srv];
1538 memset(&tsb, 0, sizeof(tsb));
1539 stat(file, &tsb); /* ignore errors */
1540 /* Comparing struct stat's was giving false positives.
1541 * Extracting only those fields which are interesting:
1543 v ^= (long)tsb.st_mtime ^ (long)tsb.st_size ^ (long)tsb.st_ino; /* ^ (long)tsb.st_dev ? */
1544 file += strlen(file) + 1;
1547 if (v != checked_status[srv]) {
1548 checked_status[srv] = v;
1549 log(L_INFO, "detected change in files related to service %d", srv);
1550 age_cache(/*free_all:*/ 1, srv);
1554 /* Returns 1 if we immediately have the answer */
1555 static int handle_client(int i)
1558 user_req *ureq = cinfo[i].ureq;
1559 user_req **cache_pp;
1560 user_req *ureq_and_resp;
1563 log(L_DEBUG, "version:%d type:%d(%s) key_len:%d '%s'",
1564 ureq->version, ureq->type,
1565 ureq->type < ARRAY_SIZE(typestr) ? typestr[ureq->type] : "?",
1566 ureq->key_len, req_str(ureq->type, ureq->reqbuf));
1569 if (ureq->version != NSCD_VERSION) {
1570 log(L_INFO, "wrong version");
1574 if (ureq->key_len > sizeof(ureq->reqbuf)) {
1575 log(L_INFO, "bogus key_len %u - ignoring", ureq->key_len);
1579 if (cinfo[i].bytecnt < USER_HDR_SIZE + ureq->key_len) {
1580 log(L_INFO, "read %d, need to read %d",
1581 cinfo[i].bytecnt, USER_HDR_SIZE + ureq->key_len);
1582 return 0; /* more to read */
1584 if (cinfo[i].bytecnt > USER_HDR_SIZE + ureq->key_len) {
1585 log(L_INFO, "read overflow: %u > %u",
1586 (int)cinfo[i].bytecnt, (int)(USER_HDR_SIZE + ureq->key_len));
1590 if (unsupported_ureq_type(ureq->type)) {
1591 /* We don't know this request. Just close the connection.
1592 * (glibc client interprets this like "not supported by this nscd")
1593 * Happens very often, thus DEBUG, not INFO */
1594 log(L_DEBUG, "unsupported query, dropping");
1598 srv = type_to_srv[ureq->type];
1599 if (!config.srv_enable[srv]) {
1600 log(L_INFO, "service %d is disabled, dropping", srv);
1605 hex_dump(cinfo[i].ureq, cinfo[i].bytecnt);
1607 if (ureq->type == SHUTDOWN
1608 || ureq->type == INVALIDATE
1611 struct ucred caller;
1612 socklen_t optlen = sizeof(caller);
1613 if (getsockopt(pfd[i].fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0) {
1614 log(L_INFO, "ignoring special request - cannot get caller's id: %s", strerror(errno));
1618 if (caller.uid != 0) {
1619 log(L_INFO, "special request from non-root - ignoring");
1624 if (ureq->type == SHUTDOWN) {
1625 log(L_INFO, "got shutdown request, exiting");
1628 if (!ureq->key_len || ureq->reqbuf[ureq->key_len - 1]) {
1629 log(L_INFO, "malformed invalidate request - ignoring");
1633 log(L_INFO, "got invalidate request, flushing cache");
1634 /* Frees entire cache. TODO: replace -1 with service (in ureq->reqbuf) */
1635 age_cache(/*free_all:*/ 1, -1);
1640 if (ureq->type != GETHOSTBYADDR
1641 && ureq->type != GETHOSTBYADDRv6
1643 if (ureq->key_len && ureq->reqbuf[ureq->key_len - 1] != '\0') {
1644 log(L_INFO, "badly terminated buffer");
1650 if (config.check_files[srv]) {
1654 cache_pp = lookup_in_cache(ureq);
1655 ureq_and_resp = cache_pp ? *cache_pp : NULL;
1657 if (ureq_and_resp) {
1658 if (CACHED_ENTRY(ureq_and_resp)) {
1659 /* Found. Save ptr to response into cinfo and return */
1660 response_header *resp = ureq_response(ureq_and_resp);
1661 unsigned sz = resp->version_or_size;
1663 log(L_DEBUG, "sz:%u", sz);
1665 /* cache shouldn't free it under us! */
1666 if (++ureq_and_resp->refcount == 0) {
1667 error_and_die("BUG! ++%p.refcount rolled over to 0, exiting", ureq_and_resp);
1669 log(L_DEBUG2, "++%p.refcount=%u", ureq_and_resp, ureq_and_resp->refcount);
1670 pfd[i].events = POLLOUT; /* we want to write out */
1671 cinfo[i].resptr = ureq_and_resp;
1672 /*cinfo[i].respos = 0; - already is */
1673 /* prevent future matches with anything */
1674 cinfo[i].cache_pp = (void *) 1;
1675 return 1; /* "ready to write data out to client" */
1678 /* Not found. Remember a pointer where it will appear */
1679 cinfo[i].cache_pp = cache_pp;
1681 /* If it does not point to our own ureq buffer... */
1682 if (CACHE_PTR(ureq_and_resp) != ureq) {
1683 /* We are not the first client who wants this */
1684 log(L_DEBUG, "another request is in progress (%p), waiting for its result", ureq_and_resp);
1685 MARK_PTR_SHARED(cache_pp); /* "please inform us when it's ready" */
1686 /* "we do not wait for client anymore" */
1687 cinfo[i].client_fd = pfd[i].fd;
1688 /* Don't wait on fd. Worker response will unblock us */
1692 /* else: lookup_in_cache inserted (ureq & 1) into *cache_pp:
1693 * we are the first client to miss on this ureq. */
1696 /* Start worker thread */
1697 log(L_DEBUG, "stored %p in cache, starting a worker", ureq_and_resp);
1698 /* Now we will wait on worker's fd, not client's! */
1699 cinfo[i].client_fd = pfd[i].fd;
1700 pfd[i].fd = create_and_feed_worker(ureq);
1704 static void prepare_for_writeout(unsigned i, user_req *cached)
1706 log(L_DEBUG2, "client %u: data is ready at %p", i, cached);
1708 if (cinfo[i].client_fd) {
1709 pfd[i].fd = cinfo[i].client_fd;
1710 cinfo[i].client_fd = 0; /* "we don't wait for worker reply" */
1712 pfd[i].events = POLLOUT;
1714 /* Writeout position etc */
1715 cinfo[i].resptr = cached;
1716 /*cinfo[i].respos = 0; - already is */
1717 /* if worker took some time to get info (e.g. DNS query),
1718 * prevent client timeout from triggering at once */
1719 cinfo[i].started_ms = g_now_ms;
1722 /* Worker seems to be ready to write the response.
1723 * When we return, response is fully read and stored in cache,
1724 * worker's fd is closed, pfd[i] and cinfo[i] are updated. */
1725 static void handle_worker_response(int i)
1727 struct { /* struct response_header + small body */
1728 uint32_t version_or_size;
1734 response_header *resp;
1735 unsigned sz, resp_sz;
1736 unsigned ureq_sz_aligned;
1739 ureq = cinfo[i].ureq;
1740 ureq_sz_aligned = (char*)ureq_response(ureq) - (char*)ureq;
1742 sz = full_read(pfd[i].fd, &sz_and_found, sizeof(sz_and_found));
1744 /* worker was killed? */
1745 log(L_DEBUG, "worker gave short reply:%u < 8", sz);
1749 resp_sz = sz_and_found.version_or_size;
1750 if (resp_sz < sz || resp_sz > 0x0fffffff) { /* 256 mb */
1751 error("BUG: bad size from worker:%u", resp_sz);
1755 /* Create new block of cached info */
1756 cached = xzalloc(ureq_sz_aligned + resp_sz);
1757 log(L_DEBUG2, "xzalloc(%u):%p sz:%u resp_sz:%u found:%u",
1758 ureq_sz_aligned + resp_sz, cached,
1760 (int)sz_and_found.found
1762 resp = (void*) (((char*) cached) + ureq_sz_aligned);
1763 memcpy(cached, ureq, ureq_size(ureq));
1764 memcpy(resp, &sz_and_found, sz);
1765 if (sz_and_found.found && resp_sz > sz) {
1766 /* We need to read data only if it's found
1767 * (otherwise worker sends only 8 bytes).
1769 * Replies can be big (getgrnam("guest") on a big user db),
1770 * we cannot rely on them being atomic. However, we know
1771 * that worker _always_ gives reply in one full_write(),
1772 * so we loop and read it all
1773 * (looping is implemented inside full_read())
1775 if (full_read(pfd[i].fd, ((char*) resp) + sz, resp_sz - sz) != resp_sz - sz) {
1776 /* worker was killed? */
1777 log(L_DEBUG, "worker gave short reply, free(%p)", cached);
1784 set_cache_timestamp(cached);
1785 hex_dump(resp, resp_sz);
1792 user_req **cache_pp = cinfo[i].cache_pp;
1793 if (cache_pp != NULL) { /* if not a fake entry */
1796 if (CACHE_SHARED(ureq)) {
1797 /* Other clients wait for this response too,
1798 * wake them (and us) up and set refcount = no_of_clients */
1801 for (j = 2; j < num_clients; j++) {
1802 if (cinfo[j].cache_pp == cache_pp) {
1803 /* This client uses the same cache entry */
1805 /* prevent future matches with anything */
1806 cinfo[j].cache_pp = (void *) 1;
1807 prepare_for_writeout(j, cached);
1812 /* prevent future matches with anything */
1813 cinfo[i].cache_pp = (void *) 1;
1817 prepare_for_writeout(i, cached);
1819 /* cache shouldn't free it under us! */
1821 cached->refcount = ref;
1822 log(L_DEBUG2, "%p.refcount=%u", cached, ref);
1824 aging_interval_ms = min_aging_interval_ms;
1827 static void main_loop(void)
1829 /* 1/2 of smallest negative TTL */
1830 min_aging_interval_ms = config.nttl[0];
1831 if (min_aging_interval_ms > config.nttl[1]) min_aging_interval_ms = config.nttl[1];
1832 if (min_aging_interval_ms > config.nttl[2]) min_aging_interval_ms = config.nttl[2];
1833 min_aging_interval_ms = (min_aging_interval_ms / 2) | 1;
1834 aging_interval_ms = min_aging_interval_ms;
1840 r = SMALL_POLL_TIMEOUT_MS;
1841 if (num_clients <= 2 && !cached_cnt)
1842 r = -1; /* infinite */
1843 else if (num_clients < max_reqnum)
1844 r = aging_interval_ms;
1845 #if 0 /* Debug: leak detector */
1847 static unsigned long long cnt;
1848 static unsigned long low_malloc = -1L;
1849 static unsigned long low_sbrk = -1L;
1850 void *p = malloc(540); /* should not be too small */
1853 if ((unsigned long)p < low_malloc)
1854 low_malloc = (unsigned long)p;
1855 if ((unsigned long)s < low_sbrk)
1856 low_sbrk = (unsigned long)s;
1857 log(L_INFO, "poll %llu (%d ms). clients:%u cached:%u %u/%u malloc:%p (%lu), sbrk:%p (%lu)",
1858 cnt, r, num_clients, cached_cnt, cache_hit_cnt, cache_access_cnt,
1859 p, (unsigned long)p - low_malloc,
1860 s, (unsigned long)s - low_sbrk);
1864 log(L_DEBUG, "poll %d ms. clients:%u cached:%u hit ratio:%u/%u",
1865 r, num_clients, cached_cnt, cache_hit_cnt, cache_access_cnt);
1868 r = poll(pfd, num_clients, r);
1869 log(L_DEBUG2, "poll returns %d", r);
1872 perror_and_die("poll");
1876 /* Everything between polls never sleeps.
1877 * There is no blocking I/O (except when we talk to worker thread
1878 * which is guaranteed to not block us for long) */
1880 g_now_ms = monotonic_ms();
1882 goto skip_fd_checks;
1884 for (i = 0; i < 2; i++) {
1886 if (!pfd[i].revents)
1888 /* pfd[i].revents = 0; - not needed */
1889 cfd = accept(pfd[i].fd, NULL, NULL);
1891 /* odd... poll() says we can accept but accept failed? */
1892 log(L_DEBUG2, "accept failed with %s", strerror(errno));
1897 /* x[num_clients] is next free element, taking it */
1898 log(L_DEBUG2, "new client %d, fd %d", num_clients, cfd);
1899 pfd[num_clients].fd = cfd;
1900 pfd[num_clients].events = POLLIN;
1901 /* this will make us do read() in next for() loop: */
1902 pfd[num_clients].revents = POLLIN;
1903 memset(&cinfo[num_clients], 0, sizeof(cinfo[num_clients]));
1904 /* cinfo[num_clients].bytecnt = 0; - done */
1905 cinfo[num_clients].started_ms = g_now_ms;
1906 cinfo[num_clients].bufidx = alloc_buf_no();
1907 cinfo[num_clients].ureq = bufno2buf(cinfo[num_clients].bufidx);
1909 if (num_clients >= max_reqnum) {
1910 /* stop accepting new connects for now */
1911 pfd[0].events = pfd[0].revents = 0;
1912 pfd[1].events = pfd[1].revents = 0;
1915 for (; i < num_clients; i++) {
1916 if (!pfd[i].revents)
1918 log(L_DEBUG2, "pfd[%d].revents:0x%x", i, pfd[i].revents);
1919 /* pfd[i].revents = 0; - not needed */
1921 /* "Write out result" case */
1922 if (pfd[i].revents == POLLOUT) {
1923 response_header *resp;
1925 if (!cinfo[i].resptr) {
1926 /* corner case: worker gave bad response earlier */
1931 resp = ureq_response(cinfo[i].resptr);
1932 resp_sz = resp->version_or_size;
1933 resp->version_or_size = NSCD_VERSION;
1935 r = safe_write(pfd[i].fd, ((char*) resp) + cinfo[i].respos, resp_sz - cinfo[i].respos);
1936 resp->version_or_size = resp_sz;
1938 if (r < 0 && errno == EAGAIN) {
1939 log(L_DEBUG, "client %u: EAGAIN on write", i);
1942 if (r <= 0) { /* client isn't there anymore */
1943 log(L_DEBUG, "client %u is gone (write returned:%d err:%s)",
1944 i, r, errno ? strerror(errno) : "-");
1948 cinfo[i].respos += r;
1949 if (cinfo[i].respos >= resp_sz) {
1950 /* We wrote everything */
1951 /* No point in trying to get next request, it won't come.
1952 * glibc 2.4 client closes its end after each request,
1953 * without testing for EOF from server. strace:
1955 * read(3, "www.google.com\0\0", 16) = 16
1958 log(L_DEBUG, "client %u: sent answer %u/%u/%u bytes", i, r, cinfo[i].respos, resp_sz);
1962 log(L_DEBUG, "client %u: sent partial answer %u/%u/%u bytes", i, r, cinfo[i].respos, resp_sz);
1966 /* "Read reply from worker" case. Worker may be
1967 * already dead, revents may contain other bits too
1969 if ((pfd[i].revents & POLLIN) && cinfo[i].client_fd) {
1970 log(L_DEBUG, "reading response for client %u", i);
1971 handle_worker_response(i);
1972 /* We can immediately try to write a response
1977 /* POLLHUP means pfd[i].fd is closed by peer.
1978 * POLLHUP+POLLOUT[+POLLERR] is seen when we writing out
1979 * and see that pfd[i].fd is closed by peer (for example,
1980 * it happens when client's result buffer is too small
1981 * to receive a huge GETGRBYNAME result).
1983 if ((pfd[i].revents & ~(POLLOUT+POLLERR)) == POLLHUP) {
1984 int is_client = (cinfo[i].client_fd == 0 || cinfo[i].client_fd == pfd[i].fd);
1985 log(L_INFO, "%s %u disappeared (got POLLHUP on fd %d)",
1986 is_client ? "client" : "worker",
1993 /* Read worker output anyway, error handling
1994 * in that function deals with short read.
1995 * Simply closing client is wrong: it leaks
1996 * shared future entries. */
1997 handle_worker_response(i);
2002 /* All strange and unexpected cases */
2003 if (pfd[i].revents != POLLIN) {
2004 /* Not just "can read", but some other bits are there */
2005 log(L_INFO, "client %u revents is strange:0x%x", i, pfd[i].revents);
2010 /* "Read request from client" case */
2011 r = safe_read(pfd[i].fd, (char*)(cinfo[i].ureq) + cinfo[i].bytecnt, MAX_USER_REQ_SIZE - cinfo[i].bytecnt);
2013 log(L_DEBUG2, "error reading from client: %s", strerror(errno));
2014 if (errno == EAGAIN)
2020 log(L_INFO, "premature EOF from client, dropping");
2024 cinfo[i].bytecnt += r;
2025 if (cinfo[i].bytecnt >= sizeof(user_req_header)) {
2026 if (handle_client(i)) {
2027 /* Response is found in cache! */
2031 } /* for each client[2..num_clients-1] */
2035 if ((g_now_ms - last_age_time) >= aging_interval_ms) {
2036 last_age_time = g_now_ms;
2037 age_cache(/*free_all:*/ 0, -1);
2040 /* Close timed out client connections */
2041 for (i = 2; i < num_clients; i++) {
2042 if (pfd[i].fd != 0 /* not closed yet? */
2043 && cinfo[i].client_fd == 0 /* do we still wait for client, not worker? */
2044 && (g_now_ms - cinfo[i].started_ms) > CLIENT_TIMEOUT_MS
2046 log(L_INFO, "timed out waiting for client %u (%u ms), dropping",
2047 i, (unsigned)(g_now_ms - cinfo[i].started_ms));
2055 /* We closed at least one client, coalesce pfd[], cinfo[] */
2056 if (min_closed + cnt_closed >= num_clients) {
2057 /* clients [min_closed..num_clients-1] are all closed */
2058 /* log(L_DEBUG, "taking shortcut"); - almost always happens */
2063 while (i < num_clients) {
2067 if (++i >= num_clients)
2071 cinfo[j++] = cinfo[i++];
2075 num_clients -= cnt_closed;
2076 log(L_DEBUG, "removing %d closed clients. clients:%d", cnt_closed, num_clients);
2077 min_closed = INT_MAX;
2079 /* start accepting new connects */
2080 pfd[0].events = POLLIN;
2081 pfd[1].events = POLLIN;
2090 #define NSCD_PIDFILE "/var/run/nscd/nscd.pid"
2091 #define NSCD_DIR "/var/run/nscd"
2092 #define NSCD_SOCKET "/var/run/nscd/socket"
2093 #define NSCD_SOCKET_OLD "/var/run/.nscd_socket"
2095 static smallint wrote_pidfile;
2097 static void cleanup_on_signal(int sig)
2100 unlink(NSCD_PIDFILE);
2101 unlink(NSCD_SOCKET_OLD);
2102 unlink(NSCD_SOCKET);
2106 static void write_pid(void)
2108 FILE *pid = fopen(NSCD_PIDFILE, "w");
2111 fprintf(pid, "%d\n", getpid());
2116 /* Open a listening nscd server socket */
2117 static int open_socket(const char *name)
2119 struct sockaddr_un sun;
2120 int sock = socket(AF_UNIX, SOCK_STREAM, 0);
2122 perror_and_die("cannot create unix domain socket");
2124 close_on_exec(sock);
2125 sun.sun_family = AF_UNIX;
2126 strcpy(sun.sun_path, name);
2128 if (bind(sock, (struct sockaddr *) &sun, sizeof(sun)) < 0)
2129 perror_and_die("bind(%s)", name);
2130 if (chmod(name, 0666) < 0)
2131 perror_and_die("chmod(%s)", name);
2132 if (listen(sock, (max_reqnum/8) | 1) < 0)
2133 perror_and_die("listen");
2137 static const struct option longopt[] = {
2138 /* name, has_arg, int *flag, int val */
2139 { "debug" , no_argument , NULL, 'd' },
2140 { "config-file", required_argument, NULL, 'f' },
2141 { "invalidate" , required_argument, NULL, 'i' },
2142 { "shutdown" , no_argument , NULL, 'K' },
2143 { "nthreads" , required_argument, NULL, 't' },
2144 { "version" , no_argument , NULL, 'V' },
2145 { "help" , no_argument , NULL, '?' },
2146 { "usage" , no_argument , NULL, '?' },
2147 /* just exit(0). TODO: "test" connect? */
2148 { "statistic" , no_argument , NULL, 'g' },
2149 { "secure" , no_argument , NULL, 'S' }, /* ? */
2153 static const char *const help[] = {
2154 "Do not daemonize; log to stderr (-dd: more verbosity)",
2155 "File to read configuration from",
2157 "Shut the server down",
2158 "Serve N requests in parallel",
2162 static void print_help_and_die(void)
2164 const struct option *opt = longopt;
2165 const char *const *h = help;
2167 puts("Usage: nscd [OPTION...]\n"
2168 "Name Service Cache Daemon\n");
2170 printf("\t" "-%c,--%-11s %s\n", opt->val, opt->name, *h);
2173 } while (opt->val != '?');
2177 static char *skip_service(int *srv, const char *s)
2179 if (strcmp("passwd", s) == 0) {
2182 } else if (strcmp("group", s) == 0) {
2184 } else if (strcmp("hosts", s) == 0) {
2189 return skip_whitespace(s + 6);
2192 static void handle_null(const char *str, int srv) {}
2194 static void handle_logfile(const char *str, int srv)
2196 config.logfile = xstrdup(str);
2199 static void handle_debuglvl(const char *str, int srv)
2201 debug |= (uint8_t) getnum(str);
2204 static void handle_threads(const char *str, int srv)
2206 unsigned n = getnum(str);
2211 static void handle_user(const char *str, int srv)
2213 config.user = xstrdup(str);
2216 static void handle_enable(const char *str, int srv)
2218 config.srv_enable[srv] = ((str[0] | 0x20) == 'y');
2221 static void handle_pttl(const char *str, int srv)
2223 config.pttl[srv] = getnum(str);
2226 static void handle_nttl(const char *str, int srv)
2228 config.nttl[srv] = getnum(str);
2231 static void handle_size(const char *str, int srv)
2233 config.size[srv] = getnum(str);
2236 static void handle_chfiles(const char *str, int srv)
2238 config.check_files[srv] = ((str[0] | 0x20) == 'y');
2241 static void parse_conffile(const char *conffile, int warn)
2243 static const struct confword {
2245 void (*handler)(const char *, int);
2247 { "_" "logfile" , handle_logfile },
2248 { "_" "debug-level" , handle_debuglvl },
2249 { "_" "threads" , handle_threads },
2250 { "_" "max-threads" , handle_threads },
2251 { "_" "server-user" , handle_user },
2252 /* ignore: any user can stat */
2253 { "_" "stat-user" , handle_null },
2254 { "_" "paranoia" , handle_null }, /* ? */
2255 /* ignore: design goal is to never crash/hang */
2256 { "_" "reload-count" , handle_null },
2257 { "_" "restart-interval" , handle_null },
2258 { "S" "enable-cache" , handle_enable },
2259 { "S" "positive-time-to-live" , handle_pttl },
2260 { "S" "negative-time-to-live" , handle_nttl },
2261 { "S" "suggested-size" , handle_size },
2262 { "S" "check-files" , handle_chfiles },
2263 { "S" "persistent" , handle_null }, /* ? */
2264 { "S" "shared" , handle_null }, /* ? */
2265 { "S" "auto-propagate" , handle_null }, /* ? */
2270 FILE *file = fopen(conffile, "r");
2274 if (conffile != default_conffile)
2275 perror_and_die("cannot open %s", conffile);
2279 while (fgets(buf, sizeof(buf), file) != NULL) {
2280 const struct confword *word;
2282 int len = strlen(buf);
2286 if (buf[len-1] != '\n') {
2287 if (len >= sizeof(buf) - 1)
2288 error_and_die("%s:%d: line is too long", conffile, lineno);
2289 len++; /* last line, not terminated by '\n' */
2293 p = strchr(buf, '#');
2297 p = skip_whitespace(buf);
2300 *skip_non_whitespace(p) = '\0';
2303 if (strcmp(word->str + 1, p) == 0) {
2305 p = skip_whitespace(p + strlen(p) + 1);
2306 *skip_non_whitespace(p) = '\0';
2307 if (word->str[0] == 'S') {
2308 char *p2 = skip_service(&srv, p);
2311 error("%s:%d: ignoring unknown service name '%s'", conffile, lineno, p);
2315 *skip_non_whitespace(p) = '\0';
2317 word->handler(p, srv);
2323 error("%s:%d: ignoring unknown directive '%s'", conffile, lineno, p);
2332 /* "XX,XX[,XX]..." -> gid_t[] */
2333 static gid_t* env_U_to_uid_and_gids(const char *str, int *sizep)
2344 ug = xmalloc(ng * sizeof(ug[0]));
2352 *gp++ = strtoul(sp, (char**)&sp, 16);
2353 if (errno || (*sp != ',' && *sp != '\0'))
2354 error_and_die("internal error");
2365 static char* user_to_env_U(const char *user)
2372 pw = getpwnam(user);
2374 perror_and_die("user '%s' is not known", user);
2377 /* 0th cell will be used for uid */
2378 ug = xmalloc((1 + ng) * sizeof(ug[0]));
2379 if (getgrouplist(user, pw->pw_gid, &ug[1], &ng) < 0) {
2380 ug = xrealloc(ug, (1 + ng) * sizeof(ug[0]));
2381 if (getgrouplist(user, pw->pw_gid, &ug[1], &ng) < 0)
2382 perror_and_die("can't get groups of user '%s'", user);
2387 /* How much do we need for "-Uxx,xx[,xx]..." string? */
2388 ug_str = xmalloc((sizeof(unsigned long)+1)*2 * ng + 3);
2394 sp += sprintf(sp, "%lx,", (unsigned long)(*gp++));
2403 /* not static - don't inline me, compiler! */
2404 void readlink_self_exe(void);
2405 void readlink_self_exe(void)
2407 char buf[PATH_MAX + 1];
2408 ssize_t sz = readlink("/proc/self/exe", buf, sizeof(buf) - 1);
2410 perror_and_die("readlink %s failed", "/proc/self/exe");
2412 self_exe_points_to = xstrdup(buf);
2416 static void special_op(const char *arg) NORETURN;
2417 static void special_op(const char *arg)
2419 static const user_req_header ureq = { NSCD_VERSION, SHUTDOWN, 0 };
2421 struct sockaddr_un addr;
2424 sock = socket(PF_UNIX, SOCK_STREAM, 0);
2426 error_and_die("cannot create AF_UNIX socket");
2428 addr.sun_family = AF_UNIX;
2429 strcpy(addr.sun_path, NSCD_SOCKET);
2430 if (connect(sock, (struct sockaddr *) &addr, sizeof(addr)) < 0)
2431 error_and_die("cannot connect to %s", NSCD_SOCKET);
2433 if (!arg) { /* shutdown */
2434 xfull_write(sock, &ureq, sizeof(ureq));
2435 printf("sent shutdown request, exiting\n");
2436 } else { /* invalidate */
2437 size_t arg_len = strlen(arg) + 1;
2439 user_req_header req;
2442 reqdata.req.version = NSCD_VERSION;
2443 reqdata.req.type = INVALIDATE;
2444 reqdata.req.key_len = arg_len;
2445 memcpy(reqdata.arg, arg, arg_len);
2446 xfull_write(sock, &reqdata, arg_len + sizeof(ureq));
2447 printf("sent invalidate(%s) request, exiting\n", arg);
2453 /* Callback for glibc-2.15 */
2455 static void do_nothing(size_t dbidx, struct traced_file *finfo)
2457 /* nscd from glibc-2.15 does something like this:
2458 if (!dbs[dbidx].enabled || !dbs[dbidx].check_file)
2460 add_file_to_watch_list(finfo->fname);
2464 /* This internal glibc function is called to disable trying to contact nscd.
2465 * We _are_ nscd, so we need to do the lookups, and not recurse.
2466 * Until 2.14, this function was taking no parameters.
2467 * In 2.15, it takes a function pointer from hell.
2469 void __nss_disable_nscd(void (*hell)(size_t, struct traced_file*));
2472 int main(int argc, char **argv)
2477 const char *conffile;
2479 /* make sure we don't get recursive calls */
2480 __nss_disable_nscd(do_nothing);
2482 if (argv[0][0] == 'w') /* "worker_nscd" */
2488 /* Make sure stdio is not closed */
2489 n = xopen3("/dev/null", O_RDWR, 0);
2492 /* Close unexpected open file descriptors */
2493 n |= 0xff; /* start from at least fd# 255 */
2498 /* For idiotic kernels which disallow "exec /proc/self/exe" */
2499 readlink_self_exe();
2501 conffile = default_conffile;
2503 while ((n = getopt_long(argc, argv, "df:i:KVgt:", longopt, NULL)) != -1) {
2514 special_op(optarg); /* exits */
2516 /* shutdown server */
2517 special_op(NULL); /* exits */
2519 puts("unscd - nscd which does not hang, v."PROGRAM_VERSION);
2525 max_reqnum = getnum(optarg);
2531 print_help_and_die();
2534 /* Multiple -d can bump debug regardless of nscd.conf:
2535 * no -d or -d: 0, -dd: 1,
2536 * -ddd: 3, -dddd: 7, -ddddd: 15
2539 debug |= (((1U << opt_d_cnt) >> 1) - 1) & L_ALL;
2541 env_U = getenv("U");
2542 /* Avoid duplicate warnings if $U exists */
2543 parse_conffile(conffile, /* warn? */ (env_U == NULL));
2545 /* I have a user report of (broken?) ldap nss library
2546 * opening and never closing a socket to a ldap server,
2547 * even across fork() and exec(). This messes up
2548 * worker child's operations for the reporter.
2550 * This strenghtens my belief that nscd _must not_ trust
2551 * nss libs to be written correctly.
2553 * Here, we need to jump through the hoops to guard against
2554 * such problems. If config file has server-user setting, we need
2555 * to setgroups + setuid. For that, we need to get uid and gid vector.
2556 * And that means possibly using buggy nss libs.
2557 * We will do it here, but then we will re-exec, passing uid+gids
2558 * in an environment variable.
2560 if (!env_U && config.user) {
2561 /* user_to_env_U() does getpwnam and getgrouplist */
2562 if (putenv(user_to_env_U(config.user)))
2563 error_and_die("out of memory");
2564 /* fds leaked by nss will be closed by execed copy */
2565 execv(self_exe_points_to, argv);
2566 xexecve("/proc/self/exe", argv, environ);
2569 /* Allocate dynamically sized stuff */
2570 max_reqnum += 2; /* account for 2 first "fake" clients */
2571 if (max_reqnum < 8) max_reqnum = 8; /* sanitize */
2572 /* Since refcount is a byte, can't serve more than 255-2 clients
2573 * at once. The rest will block in connect() */
2574 if (max_reqnum > 0xff) max_reqnum = 0xff;
2575 client_buf = xzalloc(max_reqnum * sizeof(client_buf[0]));
2576 busy_cbuf = xzalloc(max_reqnum * sizeof(busy_cbuf[0]));
2577 pfd = xzalloc(max_reqnum * sizeof(pfd[0]));
2578 cinfo = xzalloc(max_reqnum * sizeof(cinfo[0]));
2580 cache_size = (config.size[0] + config.size[1] + config.size[2]) / 8;
2581 if (cache_size < 8) cache_size = 8; /* 8*8 = 64 entries min */
2582 if (cache_size > 0xffff) cache_size = 0xffff; /* 8*64k entries max */
2583 cache_size |= 1; /* force it to be odd */
2584 cache = xzalloc(cache_size * sizeof(cache[0]));
2586 /* Register cleanup hooks */
2587 signal(SIGINT, cleanup_on_signal);
2588 signal(SIGTERM, cleanup_on_signal);
2589 /* Don't die if a client closes a socket on us */
2590 signal(SIGPIPE, SIG_IGN);
2591 /* Avoid creating zombies */
2592 signal(SIGCHLD, SIG_IGN);
2594 /* Ensure workers don't have SIGALRM ignored */
2595 signal(SIGALRM, SIG_DFL);
2598 if (mkdir(NSCD_DIR, 0755) == 0) {
2599 /* prevent bad mode of NSCD_DIR if umask is e.g. 077 */
2600 chmod(NSCD_DIR, 0755);
2602 pfd[0].fd = open_socket(NSCD_SOCKET);
2603 pfd[1].fd = open_socket(NSCD_SOCKET_OLD);
2604 pfd[0].events = POLLIN;
2605 pfd[1].events = POLLIN;
2607 if (debug & D_DAEMON) {
2608 daemon(/*nochdir*/ 1, /*noclose*/ 0);
2609 if (config.logfile) {
2610 /* nochdir=1: relative paths still work as expected */
2611 xmovefd(xopen3(config.logfile, O_WRONLY|O_CREAT|O_TRUNC, 0666), 2);
2614 debug = 0; /* why bother? it's /dev/null'ed anyway */
2616 chdir("/"); /* compat */
2619 /* ignore job control signals */
2620 signal(SIGTTOU, SIG_IGN);
2621 signal(SIGTTIN, SIG_IGN);
2622 signal(SIGTSTP, SIG_IGN);
2625 log(L_ALL, "unscd v" PROGRAM_VERSION ", debug level 0x%x", debug & L_ALL);
2626 log(L_DEBUG, "max %u requests in parallel", max_reqnum - 2);
2627 log(L_DEBUG, "cache size %u x 8 entries", cache_size);
2631 gid_t *ug = env_U_to_uid_and_gids(env_U, &size);
2633 if (setgroups(size - 1, &ug[1]) || setgid(ug[1]))
2634 perror_and_die("cannot set groups for user '%s'", config.user);
2637 perror_and_die("cannot set uid to %u", (unsigned)(ug[0]));
2641 for (n = 0; n < 3; n++) {
2642 log(L_DEBUG, "%s cache enabled:%u pttl:%u nttl:%u",
2644 config.srv_enable[n],
2647 config.pttl[n] *= 1000;
2648 config.nttl[n] *= 1000;