1 /* This file is part of unscd, a complete nscd replacement.
2 * Copyright (C) 2007 Denys Vlasenko. Licensed under the GPL version 2. */
4 /* unscd is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; version 2 of the License.
8 * unscd is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You can download the GNU General Public License from the GNU website
14 * at http://www.gnu.org/ or write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */
20 gcc -Os -o nscd nscd.c
22 gcc -fomit-frame-pointer -Wl,--sort-section -Wl,alignment -Wl,--sort-common
27 nscd problems are not exactly unheard of. Over the years, there were
28 quite a bit of bugs in it. This leads people to invent babysitters
29 which restart crashed/hung nscd. This is ugly.
31 After looking at nscd source in glibc I arrived to the conclusion
32 that its desidn is contributing to this significantly. Even if nscd's
33 code is 100.00% perfect and bug-free, it can still suffer from bugs
34 in libraries it calls.
36 As designed, it's a multithreaded program which calls NSS libraries.
37 These libraries are not part of libc, they may be provided
38 by third-party projects (samba, ldap, you name it).
40 Thus nscd cannot be sure that libraries it calls do not have memory
41 or file descriptor leaks and other bugs.
43 Since nscd is multithreaded program with single shared cache,
44 any resource leak in any NSS library has cumulative effect.
45 Even if an NSS library leaks a file descriptor 0.01% of the time,
46 this will make nscd crash or hang after some time.
48 Of course bugs in NSS .so modules should be fixed, but meanwhile
49 I do want nscd which does not crash or lock up.
51 So I went ahead and wrote a replacement.
53 It is a single-threaded server process which offloads all NSS
54 lookups to worker children (not threads, but fully independent
55 processes). Cache hits are handled by parent. Only cache misses
56 start worker children. This design is immune against
57 resource leaks and hangs in NSS libraries.
59 It is also many times smaller.
61 Currently (v0.36) it emulates glibc nscd pretty closely
62 (handles same command line flags and config file), and is moderately tested.
64 Please note that as of 2008-08 it is not in wide use (yet?).
65 If you have trouble compiling it, see an incompatibility with
66 "standard" one or experience hangs/crashes, please report it to
67 vda.linux@googlemail.com
69 ***********************************************************************/
71 /* Make struct ucred appear in sys/socket.h */
73 /* For all good things */
90 #include <sys/socket.h>
92 #include <sys/types.h>
98 /* For inet_ntoa (for debug build only) */
99 #include <arpa/inet.h>
102 * 0.21 add SEGV reporting to worker
103 * 0.22 don't do freeaddrinfo() in GETAI worker, it's crashy
104 * 0.23 add parameter parsing
105 * 0.24 add conf file parsing, not using results yet
106 * 0.25 used some of conf file settings (not tested)
107 * 0.26 almost all conf file settings are wired up
108 * 0.27 a bit more of almost all conf file settings are wired up
109 * 0.28 optimized cache aging
110 * 0.29 implemented invalidate and shutdown options
111 * 0.30 fixed buglet (sizeof(ptr) != sizeof(array))
112 * 0.31 reduced client_info by one member
113 * 0.32 fix nttl/size defaults; simpler check for worker child in main()
114 * 0.33 tweak includes so that it builds on my new machine (64-bit userspace);
115 * do not die on unknown service name, just warn
116 * ("services" is a new service we don't support)
117 * 0.34 create /var/run/nscd/nscd.pid pidfile like glibc nscd 2.8 does;
118 * delay setuid'ing itself to server-user after log and pidfile are open
119 * 0.35 readlink /proc/self/exe and use result if execing /proc/self/exe fails
120 * 0.36 excercise extreme paranoia handling server-user option;
121 * a little bit more verbose logging:
122 * L_DEBUG2 log level added, use debug-level 7 to get it
124 #define PROGRAM_VERSION "0.36"
126 #define DEBUG_BUILD 1
133 #define NORETURN __attribute__ ((__noreturn__))
136 #ifdef MY_CPU_HATES_CHARS
137 typedef int smallint;
139 typedef signed char smallint;
145 L_DEBUG = ((1 << 1) * DEBUG_BUILD),
146 L_DEBUG2 = ((1 << 2) * DEBUG_BUILD),
147 L_DUMP = ((1 << 3) * DEBUG_BUILD),
153 static smallint debug = D_DAEMON;
155 static void verror(const char *s, va_list p, const char *strerr)
158 int sz, rem, strerr_len;
162 if (debug & D_STAMP) {
163 gettimeofday(&tv, NULL);
164 sz = sprintf(msgbuf, "%02u:%02u:%02u.%05u ",
165 (unsigned)((tv.tv_sec / (60*60)) % 24),
166 (unsigned)((tv.tv_sec / 60) % 60),
167 (unsigned)(tv.tv_sec % 60),
168 (unsigned)(tv.tv_usec / 10));
170 rem = sizeof(msgbuf) - sz;
171 sz += vsnprintf(msgbuf + sz, rem, s, p);
172 rem = sizeof(msgbuf) - sz; /* can be negative after this! */
175 strerr_len = strlen(strerr);
176 if (rem >= strerr_len + 4) { /* ": STRERR\n\0" */
179 strcpy(msgbuf + sz, strerr);
188 fputs(msgbuf, stderr);
191 static void error(const char *msg, ...)
195 verror(msg, p, NULL);
199 static void error_and_die(const char *msg, ...) NORETURN;
200 static void error_and_die(const char *msg, ...)
204 verror(msg, p, NULL);
209 static void perror_and_die(const char *msg, ...) NORETURN;
210 static void perror_and_die(const char *msg, ...)
214 /* Guard against "<error message>: Success" */
215 verror(msg, p, errno ? strerror(errno) : NULL);
220 static void nscd_log(int mask, const char *msg, ...)
225 verror(msg, p, NULL);
230 #define log(lvl, ...) do { if (lvl) nscd_log(lvl, __VA_ARGS__); } while (0)
233 static void dump(const void *ptr, int len)
236 const unsigned char *buf;
239 if (!(debug & L_DUMP))
244 int chunk = ((len >= 16) ? 16 : len);
246 "%02x %02x %02x %02x %02x %02x %02x %02x "
247 "%02x %02x %02x %02x %02x %02x %02x %02x " + (16-chunk) * 5,
248 buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7],
249 buf[8], buf[9],buf[10],buf[11],buf[12],buf[13],buf[14],buf[15]
251 fprintf(stderr, "%*s", (16-chunk) * 3, "");
255 unsigned char c = *buf++;
256 *p++ = (c >= 32 && c < 127 ? c : '.');
264 void dump(const void *ptr, int len);
267 #define hex_dump(p,n) do { if (L_DUMP) dump(p,n); } while (0)
269 static int xopen3(const char *pathname, int flags, int mode)
271 int fd = open(pathname, flags, mode);
273 perror_and_die("open");
277 static void xpipe(int *fds)
280 perror_and_die("pipe");
283 static void xexecve(const char *filename, char **argv, char **envp) NORETURN;
284 static void xexecve(const char *filename, char **argv, char **envp)
286 execve(filename, argv, envp);
287 perror_and_die("cannot re-exec %s", filename);
290 static void ndelay_on(int fd)
292 int fl = fcntl(fd, F_GETFL);
294 perror_and_die("F_GETFL");
295 if (fcntl(fd, F_SETFL, fl | O_NONBLOCK) < 0)
296 perror_and_die("setting O_NONBLOCK");
299 static void close_on_exec(int fd)
301 if (fcntl(fd, F_SETFD, FD_CLOEXEC) < 0)
302 perror_and_die("setting FD_CLOEXEC");
305 static unsigned monotonic_ms(void)
308 if (syscall(__NR_clock_gettime, CLOCK_MONOTONIC, &ts))
309 perror_and_die("clock_gettime(MONOTONIC)");
310 return ts.tv_sec * 1000 + ts.tv_nsec / 1000000;
313 static unsigned strsize(const char *str)
315 return strlen(str) + 1;
318 static unsigned strsize_aligned4(const char *str)
320 return (strlen(str) + 1 + 3) & (~3);
323 static ssize_t safe_read(int fd, void *buf, size_t count)
327 n = read(fd, buf, count);
328 } while (n < 0 && errno == EINTR);
332 static ssize_t full_read(int fd, void *buf, size_t len)
338 cc = safe_read(fd, buf, len);
340 return cc; /* read() returns -1 on failure. */
343 buf = ((char *)buf) + cc;
351 static void xsafe_read(int fd, void *buf, size_t len)
353 if (len != safe_read(fd, buf, len))
354 perror_and_die("short read");
356 static void xfull_read(int fd, void *buf, size_t len)
358 if (len != full_read(fd, buf, len))
359 perror_and_die("short read");
363 static ssize_t safe_write(int fd, const void *buf, size_t count)
367 n = write(fd, buf, count);
368 } while (n < 0 && errno == EINTR);
372 static ssize_t full_write(int fd, const void *buf, size_t len)
379 cc = safe_write(fd, buf, len);
381 return cc; /* write() returns -1 on failure. */
383 buf = ((const char *)buf) + cc;
389 static void xsafe_write(int fd, const void *buf, size_t count)
391 if (count != safe_write(fd, buf, count))
392 perror_and_die("short write of %ld bytes", (long)count);
394 static void xfull_write(int fd, const void *buf, size_t count)
396 if (count != full_write(fd, buf, count))
397 perror_and_die("short write of %ld bytes", (long)count);
400 static void xmovefd(int from_fd, int to_fd)
402 if (from_fd != to_fd) {
403 if (dup2(from_fd, to_fd) < 0)
404 perror_and_die("dup2");
409 static unsigned getnum(const char *str)
411 if (str[0] >= '0' && str[0] <= '9') {
413 unsigned long l = strtoul(str, &p, 10);
414 /* must not overflow int even after x1000 */
415 if (!*p && l <= INT_MAX / 1000)
418 error_and_die("malformed or too big number '%s'", str);
421 static char *skip_whitespace(const char *s)
423 /* NB: isspace('\0') returns 0 */
424 while (isspace(*s)) ++s;
428 static char *skip_non_whitespace(const char *s)
430 while (*s && !isspace(*s)) ++s;
434 static void *xmalloc(unsigned sz)
436 void *p = malloc(sz);
438 error_and_die("out of memory");
442 static void *xzalloc(unsigned sz)
444 void *p = xmalloc(sz);
449 static void *xrealloc(void *p, unsigned size)
451 p = realloc(p, size);
453 error_and_die("out of memory");
457 static const char *xstrdup(const char *str)
459 const char *p = strdup(str);
461 error_and_die("out of memory");
479 smallint srv_enable[3];
480 smallint check_files[3];
485 /* We try to closely mimic glibc nscd */
486 .logfile = NULL, /* default is to not have a log file */
488 .srv_enable = { 0, 0, 0 },
489 .check_files = { 1, 1, 1 },
490 .pttl = { 3600, 3600, 3600 },
491 .nttl = { 20, 60, 20 },
492 /* huh, what is the default cache size in glibc nscd? */
493 .size = { 256 * 8 / 3, 256 * 8 / 3, 256 * 8 / 3 },
496 static const char default_conffile[] = "/etc/nscd.conf";
497 static const char *self_exe_points_to = "/proc/self/exe";
501 ** Clients, workers machinery
504 /* Header common to all requests */
505 #define USER_REQ_STRUCT \
506 int32_t version; /* Version number of the daemon interface */ \
507 int32_t type; /* Service requested */ \
508 int32_t key_len; /* Key length */
510 typedef struct user_req_header {
516 MAX_USER_REQ_SIZE = 1024,
517 USER_HDR_SIZE = sizeof(user_req_header),
518 /* DNS queries time out after 20 seconds,
519 * we will allow for a bit more */
520 WORKER_TIMEOUT_SEC = 30,
521 CLIENT_TIMEOUT_MS = 100,
522 SMALL_POLL_TIMEOUT_MS = 200,
525 typedef struct user_req {
527 struct { /* as came from client */
530 struct { /* when stored in cache, overlaps .version */
531 unsigned refcount:8; /* actually, can be 1 or 0 only */
532 /* (timestamp24 * 256) == timestamp in ms */
533 unsigned timestamp24:24;
536 char reqbuf[MAX_USER_REQ_SIZE - USER_HDR_SIZE];
539 /* Compile-time check for correct size */
540 struct BUG_wrong_user_req_size {
541 char BUG_wrong_user_req_size[sizeof(user_req) == MAX_USER_REQ_SIZE ? 1 : -1];
553 SHUTDOWN, /* Shut the server down */
554 GETSTAT, /* Get the server statistic */
555 INVALIDATE, /* Invalidate one special cache */
567 static const char *const typestr[] = {
568 "GETPWBYNAME", /* done */
569 "GETPWBYUID", /* done */
570 "GETGRBYNAME", /* done */
571 "GETGRBYGID", /* done */
572 "GETHOSTBYNAME", /* done */
573 "GETHOSTBYNAMEv6", /* done */
574 "GETHOSTBYADDR", /* done */
575 "GETHOSTBYADDRv6", /* done */
576 "SHUTDOWN", /* done */
577 "GETSTAT", /* info? */
578 "INVALIDATE", /* done */
579 /* won't do: nscd passes a name of shmem segment
580 * which client can map and "see" the db */
582 "GETFDGR", /* won't do */
583 "GETFDHST", /* won't do */
585 "INITGROUPS", /* done */
586 "GETSERVBYNAME", /* prio 3 (no caching?) */
587 "GETSERVBYPORT", /* prio 3 (no caching?) */
588 "GETFDSERV" /* won't do */
591 extern const char *const typestr[];
593 static const smallint type_to_srv[] = {
594 [GETPWBYNAME ] = SRV_PASSWD,
595 [GETPWBYUID ] = SRV_PASSWD,
596 [GETGRBYNAME ] = SRV_GROUP,
597 [GETGRBYGID ] = SRV_GROUP,
598 [GETHOSTBYNAME ] = SRV_HOSTS,
599 [GETHOSTBYNAMEv6 ] = SRV_HOSTS,
600 [GETHOSTBYADDR ] = SRV_HOSTS,
601 [GETHOSTBYADDRv6 ] = SRV_HOSTS,
602 [GETAI ] = SRV_HOSTS,
603 [INITGROUPS ] = SRV_GROUP,
606 static int unsupported_ureq_type(unsigned type)
608 if (type == GETAI) return 0;
609 if (type == INITGROUPS) return 0;
610 if (type > GETHOSTBYADDRv6) return 1;
614 /* Possible reductions:
615 * fd, bufidx - uint8_t
616 * started_ms -> uint16_t started_s
617 * ureq - eliminate (derivable from bufidx?)
618 * cell - eliminate (derivable from resptr?)
620 typedef struct client_info {
621 /* if client_fd != 0, we are waiting for the reply from worker
622 * on pfd[i].fd, and client_fd is saved client's fd
623 * (we need to put it back into pfd[i].fd later) */
625 unsigned bytecnt; /* bytes read from client */
626 unsigned bufidx; /* buffer# in global client_buf[] */
628 unsigned respos; /* response */
630 user_req *resptr; /* response */
631 user_req *ureq; /* request (points to client_buf[x]) */
632 user_req **cell; /* cache cell ptr */
635 static int min_closed = INT_MAX;
636 static int cnt_closed = 0;
637 static int num_clients = 2; /* two listening sockets are "clients" too */
639 /* We read up to max_reqnum requests in parallel */
640 static unsigned max_reqnum = 14;
642 /* Each of these points to [max_reqnum] sized array */
643 static char (*client_buf)[MAX_USER_REQ_SIZE];
644 static char *busy_cbuf;
645 static struct pollfd *pfd;
646 static client_info *cinfo;
648 static inline unsigned ureq_size(const user_req *ureq)
650 return sizeof(user_req_header) + ureq->key_len;
653 static unsigned cache_age(unsigned now_ms, const user_req *ureq)
655 return (uint32_t)now_ms - (ureq->timestamp24 << 8);
658 static void set_cache_timestamp(user_req *ureq, unsigned now_ms)
660 ureq->timestamp24 = now_ms >> 8;
663 static int alloc_buf_no(void)
668 next_buf = (next_buf + 1) % max_reqnum;
669 if (!busy_cbuf[cur]) {
673 } while (next_buf != n);
674 error_and_die("no free bufs?!");
677 static inline void *bufno2buf(int i)
679 return client_buf[i];
682 static void close_client(int i)
684 log(L_DEBUG, "closing client %d (fd %d)", i, pfd[i].fd);
686 pfd[i].fd = 0; /* flag as unused */
687 busy_cbuf[cinfo[i].bufidx] = 0;
695 ** ncsd API <-> C API conversion
698 typedef struct response_header {
699 uint32_t version_or_size;
704 typedef struct initgr_response_header {
705 uint32_t version_or_size;
708 /* code assumes gid_t == int32, let's check that */
709 int32_t gid[sizeof(gid_t) == sizeof(int32_t) ? 0 : -1];
710 /* char user_str[as_needed]; */
711 } initgr_response_header;
713 static initgr_response_header *obtain_initgroups(const char *username)
715 struct initgr_response_header *resp;
717 enum { MAGIC_OFFSET = sizeof(*resp) / sizeof(int32_t) };
721 pw = getpwnam(username);
724 resp->version_or_size = sizeof(*resp);
730 /* getgrouplist may be very expensive, it's much better to allocate
731 * a bit more than to run getgrouplist twice */
735 sz = sizeof(*resp) + sizeof(resp->gid[0]) * ngroups;
736 resp = xrealloc(resp, sz);
737 } while (getgrouplist(username, pw->pw_gid, (gid_t*) &resp->gid, &ngroups) == -1);
738 log(L_DEBUG, "ngroups=%d", ngroups);
740 sz = sizeof(*resp) + sizeof(resp->gid[0]) * ngroups;
741 /* resp = xrealloc(resp, sz); - why bother */
742 resp->version_or_size = sz;
744 resp->ngrps = ngroups;
749 typedef struct pw_response_header {
750 uint32_t version_or_size;
753 int32_t pw_passwd_len;
756 int32_t pw_gecos_len;
758 int32_t pw_shell_len;
759 /* char pw_name[pw_name_len]; */
760 /* char pw_passwd[pw_passwd_len]; */
761 /* char pw_gecos[pw_gecos_len]; */
762 /* char pw_dir[pw_dir_len]; */
763 /* char pw_shell[pw_shell_len]; */
764 } pw_response_header;
766 static pw_response_header *marshal_passwd(struct passwd *pw)
769 pw_response_header *resp;
770 unsigned pw_name_len;
771 unsigned pw_passwd_len;
772 unsigned pw_gecos_len;
774 unsigned pw_shell_len;
775 unsigned sz = sizeof(*resp);
777 sz += (pw_name_len = strsize(pw->pw_name));
778 sz += (pw_passwd_len = strsize(pw->pw_passwd));
779 sz += (pw_gecos_len = strsize(pw->pw_gecos));
780 sz += (pw_dir_len = strsize(pw->pw_dir));
781 sz += (pw_shell_len = strsize(pw->pw_shell));
784 resp->version_or_size = sz;
790 resp->pw_name_len = pw_name_len;
791 resp->pw_passwd_len = pw_passwd_len;
792 resp->pw_uid = pw->pw_uid;
793 resp->pw_gid = pw->pw_gid;
794 resp->pw_gecos_len = pw_gecos_len;
795 resp->pw_dir_len = pw_dir_len;
796 resp->pw_shell_len = pw_shell_len;
797 p = (char*)(resp + 1);
798 strcpy(p, pw->pw_name); p += pw_name_len;
799 strcpy(p, pw->pw_passwd); p += pw_passwd_len;
800 strcpy(p, pw->pw_gecos); p += pw_gecos_len;
801 strcpy(p, pw->pw_dir); p += pw_dir_len;
802 strcpy(p, pw->pw_shell); p += pw_shell_len;
803 log(L_DEBUG, "sz:%u realsz:%u", sz, p - (char*)resp);
808 typedef struct gr_response_header {
809 uint32_t version_or_size;
811 int32_t gr_name_len; /* strlen(gr->gr_name) + 1; */
812 int32_t gr_passwd_len; /* strlen(gr->gr_passwd) + 1; */
813 int32_t gr_gid; /* gr->gr_gid */
814 int32_t gr_mem_cnt; /* while (gr->gr_mem[gr_mem_cnt]) ++gr_mem_cnt; */
815 /* int32_t gr_mem_len[gr_mem_cnt]; */
816 /* char gr_name[gr_name_len]; */
817 /* char gr_passwd[gr_passwd_len]; */
818 /* char gr_mem[gr_mem_cnt][gr_mem_len[i]]; */
819 /* char gr_gid_str[as_needed]; - huh? */
820 /* char orig_key[as_needed]; - needed?? I don't do this ATM... */
822 glibc adds gr_gid_str, but client doesn't get/use it:
823 writev(3, [{"\2\0\0\0\2\0\0\0\5\0\0\0", 12}, {"root\0", 5}], 2) = 17
824 poll([{fd=3, events=POLLIN|POLLERR|POLLHUP, revents=POLLIN}], 1, 5000) = 1
825 read(3, "\2\0\0\0\1\0\0\0\10\0\0\0\4\0\0\0\0\0\0\0\0\0\0\0", 24) = 24
826 readv(3, [{"", 0}, {"root\0\0\0\0\0\0\0\0", 12}], 2) = 12
829 } gr_response_header;
831 static gr_response_header *marshal_group(struct group *gr)
834 gr_response_header *resp;
836 unsigned sz = sizeof(*resp);
838 sz += strsize(gr->gr_name);
839 sz += strsize(gr->gr_passwd);
841 while (gr->gr_mem[gr_mem_cnt]) {
842 sz += strsize(gr->gr_mem[gr_mem_cnt]);
845 /* for int32_t gr_mem_len[gr_mem_cnt]; */
846 sz += gr_mem_cnt * sizeof(int32_t);
849 resp->version_or_size = sz;
855 resp->gr_name_len = strsize(gr->gr_name);
856 resp->gr_passwd_len = strsize(gr->gr_passwd);
857 resp->gr_gid = gr->gr_gid;
858 resp->gr_mem_cnt = gr_mem_cnt;
859 p = (char*)(resp + 1);
860 /* int32_t gr_mem_len[gr_mem_cnt]; */
862 while (gr->gr_mem[gr_mem_cnt]) {
863 *(uint32_t*)p = strsize(gr->gr_mem[gr_mem_cnt]);
867 /* char gr_name[gr_name_len]; */
868 strcpy(p, gr->gr_name);
869 p += strsize(gr->gr_name);
870 /* char gr_passwd[gr_passwd_len]; */
871 strcpy(p, gr->gr_passwd);
872 p += strsize(gr->gr_passwd);
873 /* char gr_mem[gr_mem_cnt][gr_mem_len[i]]; */
875 while (gr->gr_mem[gr_mem_cnt]) {
876 strcpy(p, gr->gr_mem[gr_mem_cnt]);
877 p += strsize(gr->gr_mem[gr_mem_cnt]);
880 log(L_DEBUG, "sz:%u realsz:%u", sz, p - (char*)resp);
885 typedef struct hst_response_header {
886 uint32_t version_or_size;
889 int32_t h_aliases_cnt;
890 int32_t h_addrtype; /* AF_INET or AF_INET6 */
891 int32_t h_length; /* 4 or 16 */
892 int32_t h_addr_list_cnt;
894 /* char h_name[h_name_len]; - we pad it to 4 bytes */
895 /* uint32_t h_aliases_len[h_aliases_cnt]; */
896 /* char h_addr_list[h_addr_list_cnt][h_length]; - every one is the same size [h_length] (4 or 16) */
897 /* char h_aliases[h_aliases_cnt][h_aliases_len[i]]; */
898 } hst_response_header;
900 static hst_response_header *marshal_hostent(struct hostent *h)
903 hst_response_header *resp;
905 unsigned h_aliases_cnt;
906 unsigned h_addr_list_cnt;
907 unsigned sz = sizeof(*resp);
909 /* char h_name[h_name_len] */
910 sz += h_name_len = strsize_aligned4(h->h_name);
912 while (h->h_addr_list[h_addr_list_cnt]) {
915 /* char h_addr_list[h_addr_list_cnt][h_length] */
916 sz += h_addr_list_cnt * h->h_length;
918 while (h->h_aliases[h_aliases_cnt]) {
919 /* char h_aliases[h_aliases_cnt][h_aliases_len[i]] */
920 sz += strsize(h->h_aliases[h_aliases_cnt]);
923 /* uint32_t h_aliases_len[h_aliases_cnt] */
924 sz += h_aliases_cnt * 4;
927 resp->version_or_size = sz;
930 resp->error = HOST_NOT_FOUND;
934 resp->h_name_len = h_name_len;
935 resp->h_aliases_cnt = h_aliases_cnt;
936 resp->h_addrtype = h->h_addrtype;
937 resp->h_length = h->h_length;
938 resp->h_addr_list_cnt = h_addr_list_cnt;
940 p = (char*)(resp + 1);
941 /* char h_name[h_name_len]; */
942 strcpy(p, h->h_name);
944 /* uint32_t h_aliases_len[h_aliases_cnt]; */
946 while (h->h_aliases[h_aliases_cnt]) {
947 *(uint32_t*)p = strsize(h->h_aliases[h_aliases_cnt]);
951 /* char h_addr_list[h_addr_list_cnt][h_length]; */
953 while (h->h_addr_list[h_addr_list_cnt]) {
954 memcpy(p, h->h_addr_list[h_addr_list_cnt], h->h_length);
958 /* char h_aliases[h_aliases_cnt][h_aliases_len[i]]; */
960 while (h->h_aliases[h_aliases_cnt]) {
961 strcpy(p, h->h_aliases[h_aliases_cnt]);
962 p += strsize(h->h_aliases[h_aliases_cnt]);
965 log(L_DEBUG, "sz:%u realsz:%u", sz, p - (char*)resp);
970 /* Reply to addrinfo query */
971 typedef struct ai_response_header {
972 uint32_t version_or_size;
978 /* char ai_addr[naddrs][4 or 16]; - addrslen bytes in total */
979 /* char ai_family[naddrs]; - AF_INET[6] each (determines ai_addr[i] length) */
980 /* char ai_canonname[canonlen]; */
981 } ai_response_header;
983 static ai_response_header *obtain_addrinfo(const char *hostname)
985 struct addrinfo hints;
988 ai_response_header *resp;
993 unsigned addrslen = 0;
994 unsigned canonlen = 0;
996 memset(&hints, 0, sizeof(hints));
997 hints.ai_flags = AI_CANONNAME;
998 /* hinst.ai_socktype = SOCK_STREAM; - can kill dups (one for each possible SOCK_xxx) */
999 ai = NULL; /* on failure getaddrinfo may leave it as-is */
1000 err = getaddrinfo(hostname, NULL, &hints, &ai);
1004 if (ai->ai_canonname)
1005 sz += canonlen = strsize(ai->ai_canonname);
1009 addrslen += (ap->ai_family == AF_INET ? 4 : 16);
1012 sz += naddrs + addrslen;
1015 resp->version_or_size = sz;
1018 /*resp->found = 0;*/
1022 resp->naddrs = naddrs;
1023 resp->addrslen = addrslen;
1024 resp->canonlen = canonlen;
1025 p = (char*)(resp + 1);
1026 family = p + addrslen;
1029 /* char ai_family[naddrs]; */
1030 *family++ = ap->ai_family;
1031 /* char ai_addr[naddrs][4 or 16]; */
1032 if (ap->ai_family == AF_INET) {
1033 memcpy(p, &(((struct sockaddr_in*)(ap->ai_addr))->sin_addr), 4);
1036 memcpy(p, &(((struct sockaddr_in6*)(ap->ai_addr))->sin6_addr), 16);
1041 /* char ai_canonname[canonlen]; */
1042 if (ai->ai_canonname)
1043 strcpy(family, ai->ai_canonname);
1044 log(L_DEBUG, "sz:%u realsz:%u", sz, family + strsize(ai->ai_canonname) - (char*)resp);
1046 /* glibc 2.3.6 segfaults here sometimes
1047 * (maybe my mistake, fixed by "ai = NULL;" above).
1048 * Since we are in worker and are going to exit anyway, why bother? */
1049 /*freeaddrinfo(ai);*/
1058 /* one 8-element "cacheline" */
1059 typedef user_req *cacheline_t[8];
1060 static unsigned cache_size;
1061 /* Points to cacheline_t cache[cache_size] array, or in other words,
1062 * points to user_req* cache[cache_size][8] array */
1063 static cacheline_t *cache;
1064 static unsigned cached_cnt;
1065 static unsigned cache_access_cnt = 1; /* prevent division by zero */
1066 static unsigned cache_hit_cnt = 1;
1067 static unsigned last_age_time;
1068 static unsigned aging_interval_ms;
1069 static unsigned min_aging_interval_ms;
1071 static response_header *ureq_response(user_req *ureq)
1073 /* Skip query part, find answer part
1074 * (answer is 32-bit aligned) */
1075 return (void*) ((char*)ureq + ((ureq_size(ureq) + 3) & ~3));
1078 /* This hash is supposed to be good for short textual data */
1079 static uint32_t bernstein_hash(void *p, unsigned sz, uint32_t hash)
1083 hash = (32 * hash + hash) ^ *key++;
1088 static user_req *find_cell_and_response(user_req ***cellp, user_req *ureq)
1093 unsigned ureq_sz = ureq_size(ureq);
1095 /* prevent overflow and division by zero */
1096 if ((int)(cache_access_cnt+1) < 0) {
1097 cache_access_cnt = (cache_access_cnt >> 1) + 1;
1098 cache_hit_cnt = (cache_hit_cnt >> 1) + 1;
1102 hash = bernstein_hash(&ureq->key_len, ureq_sz - offsetof(user_req, key_len), ureq->type);
1103 log(L_DEBUG, "hash:%08x", hash);
1104 hash = hash % cache_size;
1105 (*cellp) = cell = cache[hash];
1107 for (i = 0; i < 8; i++) {
1110 // TODO: do secondary hash match
1111 /* ureq->version is always 2 and is reused in cache
1112 * for other purposes, we need to skip it here */
1113 if (memcmp(&ureq->type, &cell[i]->type, ureq_sz - offsetof(user_req, type)) == 0) {
1114 log(L_DEBUG, "found in cache[%u][%u]", hash, i);
1119 log(L_DEBUG, "not found in cache[%u][x]", hash);
1123 static void free_refcounted_ureq(user_req **ureqp)
1125 user_req *ureq = *ureqp;
1127 if (ureq->refcount) {
1128 ureq->refcount = 0; /* since it can be only 1 or 0... */
1135 static void save_in_cell(user_req **cell, user_req *new_cached, unsigned now_ms)
1137 unsigned oldest_idx = 0;
1138 unsigned oldest_age = 0;
1142 for (i = 0; i < 8; i++) {
1144 log(L_DEBUG, "using free cache[x][%u]", i);
1146 cell[i] = new_cached;
1147 aging_interval_ms = min_aging_interval_ms;
1150 age = cache_age(now_ms, cell[i]);
1151 if (age > oldest_age) {
1156 log(L_DEBUG, "freeing and reusing cache[x][%u] (age %u)", oldest_idx, oldest_age);
1157 if (cell[oldest_idx]) {
1158 free_refcounted_ureq(&cell[oldest_idx]);
1162 cell[oldest_idx] = new_cached;
1163 aging_interval_ms = min_aging_interval_ms;
1166 static void age_cache(unsigned now_ms, int srv)
1168 user_req **cp = *cache;
1170 unsigned sv = cached_cnt;
1172 log(L_DEBUG, "aging cache, srv:%d, now:%u", srv, now_ms);
1173 if (srv == -1 || !now_ms)
1174 aging_interval_ms = INT_MAX;
1177 user_req *cached = *cp;
1179 int csrv = type_to_srv[cached->type];
1180 if (srv == -1 || srv == csrv) {
1183 free_refcounted_ureq(cp);
1185 unsigned age = cache_age(now_ms, cached);
1186 response_header *resp = ureq_response(cached);
1187 unsigned ttl = (resp->found ? config.pttl : config.nttl)[csrv];
1189 log(L_DEBUG, "freeing: age %u positive %d ttl %u", age, resp->found, ttl);
1191 free_refcounted_ureq(cp);
1192 } else if (srv == -1) {
1194 if (aging_interval_ms > ttl)
1195 aging_interval_ms = ttl;
1202 log(L_INFO, "aged cache, freed:%u, remain:%u", sv - cached_cnt, cached_cnt);
1203 if (srv == -1 || !now_ms)
1204 log(L_DEBUG, "aging interval now %u ms", aging_interval_ms);
1212 /* Spawns a worker and feeds it with user query on stdin */
1213 /* Returns stdout fd of the worker, in blocking mode */
1214 static int create_and_feed_worker(user_req *ureq)
1216 static const char *const argv[] = { "worker_nscd", NULL };
1222 } to_child, to_parent;
1224 /* NB: these pipe fds are in blocking mode and non-CLOEXECed */
1225 xpipe(&to_child.rd);
1226 xpipe(&to_parent.rd);
1229 if (pid < 0) /* error */
1230 perror_and_die("vfork");
1231 if (!pid) { /* child */
1233 close(to_parent.rd);
1234 xmovefd(to_child.rd, 0);
1235 xmovefd(to_parent.wr, 1);
1236 /* Re-exec ourself, cleaning up all allocated memory.
1237 * fds in parent are marked CLOEXEC and will be closed too
1239 execve("/proc/self/exe", (char**)argv, (char**)(argv+1));
1240 xexecve(self_exe_points_to, (char**)argv, (char**)(argv+1));
1245 close(to_parent.wr);
1246 /* We do not expect child to block for any noticeably long time,
1247 * and also we expect write to be one-piece one:
1248 * ureq size is <= 1k and pipes are guaranteed to accept
1249 * at least PIPE_BUF at once */
1250 xsafe_write(to_child.wr, ureq, ureq_size(ureq));
1253 return to_parent.rd;
1256 static user_req *worker_ureq;
1259 static const char *req_str(unsigned type, const char *buf)
1261 if (type == GETHOSTBYADDR) {
1263 in.s_addr = *((uint32_t*)buf);
1264 return inet_ntoa(in);
1266 if (type == GETHOSTBYADDRv6) {
1272 const char *req_str(unsigned type, const char *buf);
1275 static void worker_signal_handler(int sig)
1278 log(L_INFO, "worker:%d got sig:%d while handling req "
1279 "type:%d(%s) key_len:%d '%s'",
1281 worker_ureq->type, typestr[worker_ureq->type],
1282 worker_ureq->key_len,
1283 req_str(worker_ureq->type, worker_ureq->reqbuf)
1286 log(L_INFO, "worker:%d got sig:%d while handling req "
1287 "type:%d key_len:%d",
1289 worker_ureq->type, worker_ureq->key_len);
1294 static void worker(void) NORETURN;
1295 static void worker(void)
1300 worker_ureq = &ureq; /* for signal handler */
1302 /* Make sure we won't hang, but rather die */
1303 if (WORKER_TIMEOUT_SEC)
1304 alarm(WORKER_TIMEOUT_SEC);
1306 /* NB: fds 0, 1 are in blocking mode */
1308 /* We block here (for a short time) */
1309 /* Due to ureq size < PIPE_BUF read is atomic */
1310 /* No error or size checking: we trust the parent */
1311 safe_read(0, &ureq, sizeof(ureq));
1313 signal(SIGSEGV, worker_signal_handler);
1314 signal(SIGBUS, worker_signal_handler);
1315 signal(SIGILL, worker_signal_handler);
1316 signal(SIGFPE, worker_signal_handler);
1317 signal(SIGABRT, worker_signal_handler);
1318 signal(SIGSTKFLT, worker_signal_handler);
1320 if (ureq.type == GETHOSTBYNAME
1321 || ureq.type == GETHOSTBYNAMEv6
1323 resp = marshal_hostent(
1324 ureq.type == GETHOSTBYNAME
1325 ? gethostbyname(ureq.reqbuf)
1326 : gethostbyname2(ureq.reqbuf, AF_INET6)
1328 } else if (ureq.type == GETHOSTBYADDR
1329 || ureq.type == GETHOSTBYADDRv6
1331 resp = marshal_hostent(gethostbyaddr(ureq.reqbuf, ureq.key_len,
1332 (ureq.type == GETHOSTBYADDR ? AF_INET : AF_INET6)
1334 } else if (ureq.type == GETPWBYNAME) {
1335 resp = marshal_passwd(getpwnam(ureq.reqbuf));
1336 } else if (ureq.type == GETPWBYUID) {
1337 resp = marshal_passwd(getpwuid(atoi(ureq.reqbuf)));
1338 } else if (ureq.type == GETGRBYNAME) {
1339 struct group *gr = getgrnam(ureq.reqbuf);
1340 resp = marshal_group(gr);
1341 } else if (ureq.type == GETGRBYGID) {
1342 struct group *gr = getgrgid(atoi(ureq.reqbuf));
1343 resp = marshal_group(gr);
1344 } else if (ureq.type == GETAI) {
1345 resp = obtain_addrinfo(ureq.reqbuf);
1346 } else /*if (ureq.type == INITGROUPS)*/ {
1347 resp = obtain_initgroups(ureq.reqbuf);
1350 if (!((response_header*)resp)->found) {
1351 /* Parent knows about this special case */
1352 xfull_write(1, resp, 8);
1354 /* Responses can be big (getgrnam("guest") on a big user db),
1355 * we cannot rely on them being atomic. full_write loops
1357 xfull_write(1, resp, ((response_header*)resp)->version_or_size);
1367 static const char check_filenames[][sizeof("/etc/passwd")] = {
1368 [SRV_PASSWD] = "/etc/passwd", /* "/etc/shadow"? */
1369 [SRV_GROUP] = "/etc/group",
1370 [SRV_HOSTS] = "/etc/hosts", /* "/etc/resolv.conf" "/etc/nsswitch.conf"? */
1373 static struct stat check_statbuf[sizeof(check_filenames) / sizeof(check_filenames[0])];
1375 static void check_files(int srv)
1377 const char *file = check_filenames[srv];
1378 struct stat *sb = &check_statbuf[srv];
1379 struct stat tempbuf;
1381 memset(&tempbuf, 0, sizeof(tempbuf));
1382 stat(file, &tempbuf); /* ignore errors */
1383 tempbuf.st_atime = 0; /* this is not a change */
1384 if (memcmp(sb, &tempbuf, sizeof(tempbuf)) != 0) {
1385 log(L_INFO, "detected change in %s", file);
1386 memcpy(sb, &tempbuf, sizeof(tempbuf));
1387 age_cache(0, srv); /* frees entire cache */
1391 /* Returns 1 if we immediately have the answer */
1392 static int handle_client(int i)
1395 user_req *ureq = cinfo[i].ureq;
1397 user_req *ureq_and_resp;
1399 log(L_DEBUG, "version:%d type:%d(%s) key_len:%d '%s'",
1400 ureq->version, ureq->type, typestr[ureq->type],
1401 ureq->key_len, req_str(ureq->type, ureq->reqbuf));
1402 hex_dump(cinfo[i].ureq, cinfo[i].bytecnt);
1404 if (unsupported_ureq_type(ureq->type)) {
1405 /* We don't know this request. Just close the connection */
1406 /* (glibc client interprets this like "not supported by this nscd") */
1407 log(L_INFO, "unsupported query, dropping");
1411 srv = type_to_srv[ureq->type];
1412 if (!config.srv_enable[srv]) {
1413 log(L_INFO, "service %d is disabled, dropping", srv);
1418 if (cinfo[i].bytecnt < USER_HDR_SIZE + ureq->key_len) {
1419 log(L_INFO, "read %d, need %d more to read",
1420 cinfo[i].bytecnt, USER_HDR_SIZE + ureq->key_len);
1421 return 0; /* more to read */
1423 if (cinfo[i].bytecnt > USER_HDR_SIZE + ureq->key_len) {
1424 log(L_INFO, "read overflow");
1428 if (ureq->version != NSCD_VERSION) {
1429 log(L_INFO, "wrong version");
1433 if (ureq->type != GETHOSTBYADDR
1434 && ureq->type != GETHOSTBYADDRv6
1436 if (ureq->key_len && ureq->reqbuf[ureq->key_len - 1] != '\0') {
1437 log(L_INFO, "badly terminated buffer");
1443 if (config.check_files[srv]) {
1447 /* If in cache, save ptr to response into cinfo and return */
1448 ureq_and_resp = find_cell_and_response(&cell, ureq);
1449 if (ureq_and_resp) {
1450 response_header *resp = ureq_response(ureq_and_resp);
1451 unsigned sz = resp->version_or_size;
1452 log(L_DEBUG, "sz:%u", sz);
1454 ureq_and_resp->refcount = 1; /* cache shouldn't free it under us! */
1455 pfd[i].events = POLLOUT; /* we want to write out */
1456 cinfo[i].resptr = ureq_and_resp;
1457 cinfo[i].respos = 0;
1458 //cinfo[i].resp_sz = sz;
1462 /* Start worker thread */
1463 cinfo[i].cell = cell;
1464 /* Now we will wait on worker's fd, not client's! */
1465 cinfo[i].client_fd = pfd[i].fd;
1466 pfd[i].fd = create_and_feed_worker(ureq);
1468 /* We can do it here, but we don't really need to.
1469 * We need to have client_buf[] big enough anyway for worst case scenario,
1470 * so we can simply keep cbuf allocated until we close a client.
1471 cinfo[i].ureq = NULL;
1472 busy_cbuf[cinfo[i].bufidx] = 0;
1477 /* When we return, reply is fully read and stored in cache,
1478 * worker's fd is closed, pfd[i] and cinfo[i] are updated. */
1479 static void handle_worker_response(int i, unsigned now_ms)
1481 response_header sz_and_found;
1483 user_req *ureq = cinfo[i].ureq;
1484 response_header *resp;
1486 unsigned ureq_sz_aligned = (char*)ureq_response(ureq) - (char*)ureq;
1488 /* Replies can be big (getgrnam("guest") on a big user db),
1489 * we cannot rely on them being atomic. However, we know that worker
1490 * _always_ gives reply in one full_write(), so loop and read it all
1491 * (looping is implemented inside full_read()) */
1492 resp_sz = full_read(pfd[i].fd, &sz_and_found, 8);
1494 /* worker was killed? */
1495 log(L_DEBUG, "worker gave short reply:%u != 8", resp_sz);
1499 resp_sz = sz_and_found.version_or_size;
1500 if (resp_sz < 8 || resp_sz > 0xfffffff) { /* 256 mb */
1501 error("BUG: bad size from worker:%u", resp_sz);
1505 /* Create new block of cached info */
1506 cached = xzalloc(ureq_sz_aligned + resp_sz);
1507 resp = (void*) ((char*)cached + ureq_sz_aligned);
1508 memcpy(cached, ureq, ureq_size(ureq));
1509 resp->version_or_size = resp_sz;
1510 resp->found = sz_and_found.found;
1511 if (sz_and_found.found) {
1512 /* We need to read data only if it's found
1513 * (otherwise worker sends only 8 bytes) */
1514 if (full_read(pfd[i].fd, resp->body, resp_sz - 8) != resp_sz - 8) {
1515 /* worker was killed? */
1516 log(L_DEBUG, "worker gave short reply");
1523 hex_dump(resp, resp_sz);
1525 cached->refcount = 1; /* cache shouldn't free it under us! */
1526 set_cache_timestamp(cached, now_ms);
1527 save_in_cell(cinfo[i].cell, cached, now_ms);
1532 /* schedule for writeout */
1533 pfd[i].fd = cinfo[i].client_fd;
1534 cinfo[i].client_fd = 0; /* no, we don't wait for worker reply anymore */
1535 pfd[i].events = POLLOUT;
1536 /* pfd[i].revents = 0; - not needed? */
1538 /* writeout position etc */
1539 cinfo[i].resptr = cached;
1540 cinfo[i].respos = 0;
1541 //cinfo[i].resp_sz = resp_sz;
1542 /* if worker took some time to get info (e.g. DNS query),
1543 * prevent client timeout from triggering at once */
1544 cinfo[i].started_ms = now_ms;
1547 static void main_loop(void)
1549 /* 1/2 of smallest negative TTL */
1550 min_aging_interval_ms = config.nttl[0];
1551 if (min_aging_interval_ms > config.nttl[1]) min_aging_interval_ms = config.nttl[1];
1552 if (min_aging_interval_ms > config.nttl[2]) min_aging_interval_ms = config.nttl[2];
1553 min_aging_interval_ms = (min_aging_interval_ms / 2) | 1;
1554 aging_interval_ms = min_aging_interval_ms;
1561 r = SMALL_POLL_TIMEOUT_MS;
1562 if (num_clients <= 2 && !cached_cnt)
1563 r = -1; /* infinite */
1564 else if (num_clients < max_reqnum)
1565 r = aging_interval_ms;
1567 #if 0 /* Debug: leak detector */
1569 static unsigned long long cnt;
1570 void *p = malloc(240); /* should not be too small */
1573 log(L_INFO, "entering poll %llu (%d ms). num_clients:%u cached:%u %u/%u next malloc:%p, sbrk:%p",
1574 cnt, r, num_clients, cached_cnt, cache_hit_cnt, cache_access_cnt, p, s);
1578 log(L_DEBUG, "entering poll (%d ms). num_clients:%u cached:%u hit_ratio:%u/%u",
1579 r, num_clients, cached_cnt, cache_hit_cnt, cache_access_cnt);
1582 r = poll(pfd, num_clients, r);
1583 log(L_DEBUG2, "poll returns %d", r);
1586 perror_and_die("poll");
1590 /* Everything between polls never sleeps.
1591 * There is no blocking I/O (except when we talk to worker thread
1592 * which is guaranteed to not block us for long) */
1594 now_ms = monotonic_ms();
1596 goto skip_fd_checks;
1598 for (i = 0; i < 2; i++) {
1600 if (!pfd[i].revents)
1602 /* pfd[i].revents = 0; - not needed */
1603 cfd = accept(pfd[i].fd, NULL, NULL);
1605 /* odd... poll() says we can accept but accept failed? */
1606 log(L_DEBUG2, "accept failed with %s", strerror(errno));
1611 /* x[num_clients] is next free element, taking it */
1612 pfd[num_clients].fd = cfd;
1613 pfd[num_clients].events = POLLIN;
1614 /* this will make us do read() in next for() loop: */
1615 pfd[num_clients].revents = POLLIN;
1616 memset(&cinfo[num_clients], 0, sizeof(cinfo[num_clients]));
1617 /* cinfo[num_clients].bytecnt = 0; - done */
1618 cinfo[num_clients].started_ms = now_ms;
1619 cinfo[num_clients].bufidx = alloc_buf_no();
1620 cinfo[num_clients].ureq = bufno2buf(cinfo[num_clients].bufidx);
1622 if (num_clients >= max_reqnum) {
1623 /* stop accepting new connects for now */
1624 pfd[0].events = pfd[0].revents = 0;
1625 pfd[1].events = pfd[1].revents = 0;
1628 for (; i < num_clients; i++) {
1629 if (!pfd[i].revents)
1631 log(L_DEBUG2, "pfd[%d].revents:0x%x", i, pfd[i].revents);
1632 /* pfd[i].revents = 0; - not needed */
1634 /* "Write out result" case */
1635 if (pfd[i].revents == POLLOUT) {
1636 response_header *resp;
1638 if (!cinfo[i].resptr) {
1639 /* corner case: worker gave bad response earlier */
1644 resp = ureq_response(cinfo[i].resptr);
1645 resp_sz = resp->version_or_size;
1646 resp->version_or_size = NSCD_VERSION;
1647 r = safe_write(pfd[i].fd, resp + cinfo[i].respos, resp_sz - cinfo[i].respos);
1648 resp->version_or_size = resp_sz;
1650 if (r < 0 && errno == EAGAIN)
1652 if (r <= 0) { /* client isn't there anymore */
1654 free_refcounted_ureq(&cinfo[i].resptr);
1658 cinfo[i].respos += r;
1659 if (cinfo[i].respos >= resp_sz) {
1660 /* We wrote everything */
1661 /* No point in trying to get next request, it won't come.
1662 * glibc 2.4 client closes its end after each request,
1663 * without testing for EOF from server. strace:
1665 * read(3, "www.google.com\0\0", 16) = 16
1668 goto write_out_is_done;
1672 /* "Read reply from worker" case. Worker may be
1673 * already dead, revents may contain other bits too */
1674 if ((pfd[i].revents & POLLIN) && cinfo[i].client_fd) {
1675 log(L_DEBUG, "reading response for client %u", i);
1676 handle_worker_response(i, now_ms);
1677 /* We can immediately try to write a response
1682 /* All strange and unexpected cases */
1683 if (pfd[i].revents != POLLIN) {
1684 /* Not just "can read" - prolly POLLHUP too */
1685 log(L_INFO, "client %u revents is strange:%x", i, pfd[i].revents);
1690 /* "Read request from client" case */
1691 r = safe_read(pfd[i].fd, (char*)(cinfo[i].ureq) + cinfo[i].bytecnt, MAX_USER_REQ_SIZE - cinfo[i].bytecnt);
1693 log(L_DEBUG2, "error reading from client: %s", strerror(errno));
1694 if (errno == EAGAIN)
1700 log(L_INFO, "premature EOF from client, dropping");
1704 cinfo[i].bytecnt += r;
1705 if (cinfo[i].bytecnt >= sizeof(user_req_header)) {
1706 if (cinfo[i].ureq->type == SHUTDOWN
1707 || cinfo[i].ureq->type == INVALIDATE
1709 const char *service;
1712 struct ucred caller;
1713 socklen_t optlen = sizeof(caller);
1714 if (getsockopt(pfd[i].fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0) {
1715 log(L_INFO, "ignoring special request - cannot get caller's id: %s", strerror(errno));
1719 if (caller.uid != 0) {
1720 log(L_INFO, "special request from non-root - ignoring");
1725 if (cinfo[i].ureq->type == SHUTDOWN) {
1726 log(L_INFO, "got shutdown request, exiting");
1727 return; /* exits nscd */;
1729 len = cinfo[i].ureq->key_len;
1730 service = (char*)&cinfo[i].ureq + len;
1731 if (sizeof(user_req_header) + len != cinfo[i].bytecnt
1733 || service[len-1] != '\0'
1735 log(L_INFO, "malformed invalidate request - ignoring");
1739 log(L_INFO, "got invalidate request, flushing cache");
1740 age_cache(0, -1); /* frees entire cache. TODO: replace -1 with service */
1744 if (handle_client(i)) {
1745 /* Response is found in cache! */
1749 } /* for each client[2..num_clients-1] */
1753 if ((now_ms - last_age_time) >= aging_interval_ms) {
1754 last_age_time = now_ms;
1755 age_cache(now_ms, -1);
1758 /* Close timed out client connections */
1759 for (i = 2; i < num_clients; i++) {
1760 if (pfd[i].fd && !cinfo[i].client_fd
1761 && (now_ms - cinfo[i].started_ms) > CLIENT_TIMEOUT_MS
1763 log(L_INFO, "timed out waiting for client %u, dropping", i);
1771 /* We closed at least one client, coalesce pfd[], cinfo[] */
1772 if (min_closed + cnt_closed >= num_clients) {
1773 /* clients [min_closed..num_clients-1] are all closed */
1774 /* log(L_DEBUG, "taking shortcut"); - almost always happens */
1779 while (i < num_clients) {
1783 if (++i >= num_clients)
1787 cinfo[j++] = cinfo[i++];
1791 num_clients -= cnt_closed;
1792 log(L_DEBUG, "removing %d closed clients. num_clients:%d", cnt_closed, num_clients);
1793 min_closed = INT_MAX;
1795 /* start accepting new connects */
1796 pfd[0].events = POLLIN;
1797 pfd[1].events = POLLIN;
1806 #define NSCD_PIDFILE "/var/run/nscd/nscd.pid"
1807 #define NSCD_DIR "/var/run/nscd"
1808 #define NSCD_SOCKET "/var/run/nscd/socket"
1809 #define NSCD_SOCKET_OLD "/var/run/.nscd_socket"
1811 static smallint wrote_pidfile;
1813 static void cleanup_on_signal(int sig)
1816 unlink(NSCD_PIDFILE);
1817 unlink(NSCD_SOCKET_OLD);
1818 unlink(NSCD_SOCKET);
1822 static void write_pid(void)
1824 FILE *pid = fopen(NSCD_PIDFILE, "w");
1827 fprintf(pid, "%d\n", getpid());
1832 /* Open a listening nscd server socket */
1833 static int open_socket(const char *name)
1835 struct sockaddr_un sun;
1836 int sock = socket(AF_UNIX, SOCK_STREAM, 0);
1838 perror_and_die("cannot create unix domain socket");
1840 close_on_exec(sock);
1841 sun.sun_family = AF_UNIX;
1842 strcpy(sun.sun_path, name);
1844 if (bind(sock, (struct sockaddr *) &sun, sizeof(sun)) < 0)
1845 perror_and_die("bind(%s)", name);
1846 if (chmod(name, 0666) < 0)
1847 perror_and_die("chmod(%s)", name);
1848 if (listen(sock, (max_reqnum/8) | 1) < 0)
1849 perror_and_die("listen");
1853 static const struct option longopt[] = {
1854 /* name, has_arg, int *flag, int val */
1855 { "debug" , no_argument , NULL, 'd' },
1856 { "config-file", required_argument, NULL, 'f' },
1857 { "invalidate" , required_argument, NULL, 'i' },
1858 { "shutdown" , no_argument , NULL, 'K' },
1859 { "nthreads" , required_argument, NULL, 't' },
1860 { "version" , no_argument , NULL, 'V' },
1861 { "help" , no_argument , NULL, '?' },
1862 { "usage" , no_argument , NULL, '?' },
1863 /* just exit(0). TODO: "test" connect? */
1864 { "statistic" , no_argument , NULL, 'g' },
1865 { "secure" , no_argument , NULL, 'S' }, /* ? */
1869 static const char *const help[] = {
1870 "Do not daemonize; log to stderr",
1871 "File to read configuration from",
1873 "Shut the server down",
1874 "Serve N requests in parallel",
1878 static void print_help_and_die(void)
1880 const struct option *opt = longopt;
1881 const char *const *h = help;
1883 puts("Usage: nscd [OPTION...]\n"
1884 "Name Service Cache Daemon\n");
1886 printf("\t" "-%c,--%-11s %s\n", opt->val, opt->name, *h);
1889 } while (opt->val != '?');
1893 static char *skip_service(int *srv, const char *s)
1895 if (strcmp("passwd", s) == 0) {
1898 } else if (strcmp("group", s) == 0) {
1900 } else if (strcmp("hosts", s) == 0) {
1905 return skip_whitespace(s + 6);
1908 static void handle_null(const char *str, int srv) {}
1910 static void handle_logfile(const char *str, int srv)
1912 config.logfile = xstrdup(str);
1915 static void handle_debuglvl(const char *str, int srv)
1917 debug |= getnum(str);
1920 static void handle_threads(const char *str, int srv)
1922 unsigned n = getnum(str);
1927 static void handle_user(const char *str, int srv)
1929 config.user = xstrdup(str);
1932 static void handle_enable(const char *str, int srv)
1934 config.srv_enable[srv] = ((str[0] | 0x20) == 'y');
1937 static void handle_pttl(const char *str, int srv)
1939 config.pttl[srv] = getnum(str);
1942 static void handle_nttl(const char *str, int srv)
1944 config.nttl[srv] = getnum(str);
1947 static void handle_size(const char *str, int srv)
1949 config.size[srv] = getnum(str);
1952 static void handle_chfiles(const char *str, int srv)
1954 config.check_files[srv] = ((str[0] | 0x20) == 'y');
1957 static void parse_conffile(const char *conffile, int warn)
1959 static const struct confword {
1961 void (*handler)(const char *, int);
1963 { "_" "logfile" , handle_logfile },
1964 { "_" "debug-level" , handle_debuglvl },
1965 { "_" "threads" , handle_threads },
1966 { "_" "max-threads" , handle_threads },
1967 { "_" "server-user" , handle_user },
1968 /* ignore: any user can stat */
1969 { "_" "stat-user" , handle_null },
1970 { "_" "paranoia" , handle_null }, /* ? */
1971 /* ignore: design goal is to never crash/hang */
1972 { "_" "reload-count" , handle_null },
1973 { "_" "restart-interval" , handle_null },
1974 { "S" "enable-cache" , handle_enable },
1975 { "S" "positive-time-to-live" , handle_pttl },
1976 { "S" "negative-time-to-live" , handle_nttl },
1977 { "S" "suggested-size" , handle_size },
1978 { "S" "check-files" , handle_chfiles },
1979 { "S" "persistent" , handle_null }, /* ? */
1980 { "S" "shared" , handle_null }, /* ? */
1981 { "S" "auto-propagate" , handle_null }, /* ? */
1986 FILE *file = fopen(conffile, "r");
1990 if (conffile != default_conffile)
1991 perror_and_die("cannot open %s", conffile);
1995 while (fgets(buf, sizeof(buf), file) != NULL) {
1996 const struct confword *word;
1998 int len = strlen(buf);
2002 if (buf[len-1] != '\n') {
2003 if (len >= sizeof(buf) - 1)
2004 error_and_die("%s:%d: line is too long", conffile, lineno);
2005 len++; /* last line, not terminated by '\n' */
2009 p = strchr(buf, '#');
2013 p = skip_whitespace(buf);
2016 *skip_non_whitespace(p) = '\0';
2019 if (strcmp(word->str + 1, p) == 0) {
2021 p = skip_whitespace(p + strlen(p) + 1);
2022 *skip_non_whitespace(p) = '\0';
2023 if (word->str[0] == 'S') {
2024 char *p2 = skip_service(&srv, p);
2027 error("%s:%d: ignoring unknown service name '%s'", conffile, lineno, p);
2031 *skip_non_whitespace(p) = '\0';
2033 word->handler(p, srv);
2039 error("%s:%d: ignoring unknown directive '%s'", conffile, lineno, p);
2048 /* "XX,XX[,XX]..." -> gid_t[] */
2049 static gid_t* env_U_to_uid_and_gids(const char *str, int *sizep)
2060 ug = xmalloc(ng * sizeof(ug[0]));
2068 *gp++ = strtoul(sp, (char**)&sp, 16);
2069 if (errno || (*sp != ',' && *sp != '\0'))
2070 error_and_die("internal error");
2081 static char* user_to_env_U(const char *user)
2088 pw = getpwnam(user);
2090 perror_and_die("user '%s' is not known", user);
2093 /* 0th cell will be used for uid */
2094 ug = xmalloc((1 + ng) * sizeof(ug[0]));
2095 if (getgrouplist(user, pw->pw_gid, &ug[1], &ng) < 0) {
2096 ug = xrealloc(ug, (1 + ng) * sizeof(ug[0]));
2097 if (getgrouplist(user, pw->pw_gid, &ug[1], &ng) < 0)
2098 perror_and_die("can't get groups of user '%s'", user);
2103 /* How much do we need for "-Uxx,xx[,xx]..." string? */
2104 ug_str = xmalloc((sizeof(unsigned long)+1)*2 * ng + 3);
2110 sp += sprintf(sp, "%lx,", (unsigned long)(*gp++));
2119 /* not static - don't inline me, compiler! */
2120 void readlink_self_exe(void)
2122 char buf[PATH_MAX + 1];
2123 ssize_t sz = readlink("/proc/self/exe", buf, sizeof(buf) - 1);
2125 perror_and_die("readlink %s failed", "/proc/self/exe");
2127 self_exe_points_to = xstrdup(buf);
2131 static void special_op(const char *arg) NORETURN;
2132 static void special_op(const char *arg)
2134 static const user_req_header ureq = { NSCD_VERSION, SHUTDOWN, 0 };
2136 struct sockaddr_un addr;
2139 sock = socket(PF_UNIX, SOCK_STREAM, 0);
2141 error_and_die("cannot create AF_UNIX socket");
2143 addr.sun_family = AF_UNIX;
2144 strcpy(addr.sun_path, NSCD_SOCKET);
2145 if (connect(sock, (struct sockaddr *) &addr, sizeof(addr)) < 0)
2146 error_and_die("cannot connect to %s", NSCD_SOCKET);
2148 if (!arg) { /* shutdown */
2149 xfull_write(sock, &ureq, sizeof(ureq));
2151 error_and_die("sent shutdown request, exiting");
2152 } else { /* invalidate */
2153 size_t arg_len = strlen(arg) + 1;
2155 user_req_header req;
2158 reqdata.req.version = NSCD_VERSION;
2159 reqdata.req.type = INVALIDATE;
2160 reqdata.req.key_len = arg_len;
2161 memcpy(reqdata.arg, arg, arg_len);
2162 xfull_write(sock, &reqdata, arg_len + sizeof(ureq));
2164 error_and_die("sent invalidate(%s) request, exiting", arg);
2169 /* This internal glibc function is called to disable trying to contact nscd.
2170 * We _are_ nscd, so we need to do the lookups, and not recurse. */
2171 void __nss_disable_nscd(void);
2173 int main(int argc, char **argv)
2177 const char *conffile;
2179 /* make sure we don't get recursive calls */
2180 __nss_disable_nscd();
2182 if (argv[0][0] == 'w') /* "worker_nscd" */
2188 /* For idiotic kernels which disallow "exec /proc/self/exe" */
2189 readlink_self_exe();
2191 conffile = default_conffile;
2192 while ((n = getopt_long(argc, argv, "df:i:KVgt:", longopt, NULL)) != -1) {
2202 special_op(optarg); /* exits */
2204 /* shutdown server */
2205 special_op(NULL); /* exits */
2207 puts("unscd - nscd which does not hang, v."PROGRAM_VERSION);
2213 max_reqnum = getnum(optarg);
2219 print_help_and_die();
2223 env_U = getenv("U");
2224 /* Avoid duplicate warnings if $U exists */
2225 parse_conffile(conffile, /* warn? */ (env_U == NULL));
2227 /* I have a user report of (broken?) ldap nss library
2228 * opening and never closing a socket to a ldap server,
2229 * even across fork() and exec(). This messes up
2230 * worker child's operations for the reporter.
2232 * This strenghtens my belief that nscd _must not_ trust
2233 * nss libs to be written correctly.
2235 * Here, we need to jump through the hoops to guard against
2236 * such problems. If config file has server-user setting, we need
2237 * to setgroups + setuid. For that, we need to get uid and gid vector.
2238 * And that means possibly using buggy nss libs.
2239 * We will do it here, but then we will re-exec, passing uid+gids
2240 * in an environment variable.
2242 if (!env_U && config.user) {
2243 /* user_to_env_U() does getpwnam and getgrouplist */
2244 if (putenv(user_to_env_U(config.user)))
2245 error_and_die("out of memory");
2246 /* fds leaked by nss will be closed by execed copy */
2247 execv("/proc/self/exe", argv);
2248 xexecve(self_exe_points_to, argv, environ);
2251 /* Allocate dynamically sized stuff */
2252 max_reqnum += 2; /* account for 2 first "fake" clients */
2253 if (max_reqnum < 8) max_reqnum = 8; /* sanitize */
2254 if (max_reqnum > 0xffff) max_reqnum = 0xffff;
2255 log(L_DEBUG, "will handle %u requests in parallel", max_reqnum - 2);
2256 client_buf = xzalloc(max_reqnum * sizeof(client_buf[0]));
2257 busy_cbuf = xzalloc(max_reqnum * sizeof(busy_cbuf[0]));
2258 pfd = xzalloc(max_reqnum * sizeof(pfd[0]));
2259 cinfo = xzalloc(max_reqnum * sizeof(cinfo[0]));
2261 cache_size = (config.size[0] + config.size[1] + config.size[2]) / 8;
2262 if (cache_size < 64) cache_size = 64; /* 8*64 = 512 entries min */
2263 if (cache_size > 0xffff) cache_size = 0xffff; /* 8*64k entries max */
2264 cache_size |= 1; /* force it to be odd */
2265 log(L_DEBUG, "cache size %u x 8 entries", cache_size);
2266 cache = xzalloc(cache_size * sizeof(cache[0]));
2268 /* Make sure stdio is not closed */
2269 n = xopen3("/dev/null", O_RDWR, 0);
2272 /* Close unexpected open file descriptors */
2273 n |= 0xff; /* start from at least fd# 255 */
2278 /* Register cleanup hooks */
2279 signal(SIGINT, cleanup_on_signal);
2280 signal(SIGTERM, cleanup_on_signal);
2281 /* Don't die if a client closes a socket on us */
2282 signal(SIGPIPE, SIG_IGN);
2283 /* Avoid creating zombies */
2284 signal(SIGCHLD, SIG_IGN);
2286 /* Ensure workers don't have SIGALRM ignored */
2287 signal(SIGALRM, SIG_DFL);
2290 mkdir(NSCD_DIR, 0777);
2291 pfd[0].fd = open_socket(NSCD_SOCKET);
2292 pfd[1].fd = open_socket(NSCD_SOCKET_OLD);
2293 pfd[0].events = POLLIN;
2294 pfd[1].events = POLLIN;
2296 if (debug & D_DAEMON) {
2297 daemon(/*nochdir*/ 1, /*noclose*/ 0);
2298 if (config.logfile) {
2299 /* nochdir=1: relative paths still work as expected */
2300 xmovefd(xopen3(config.logfile, O_WRONLY|O_CREAT|O_TRUNC, 0666), 2);
2303 debug = 0; /* why bother? it's /dev/null'ed anyway */
2305 chdir("/"); /* compat */
2308 /* ignore job control signals */
2309 signal(SIGTTOU, SIG_IGN);
2310 signal(SIGTTIN, SIG_IGN);
2311 signal(SIGTSTP, SIG_IGN);
2316 gid_t *ug = env_U_to_uid_and_gids(env_U, &size);
2317 if (setgroups(size, &ug[1]))
2318 perror_and_die("cannot set groups for user '%s'", config.user);
2320 perror_and_die("cannot set uid to %u", (unsigned)(ug[0]));
2324 log(L_ALL, "nscd v" PROGRAM_VERSION ", debug level %x", debug & L_ALL);
2325 log(L_DEBUG, "passwd cache: %d pttl %u nttl %u",
2326 config.srv_enable[SRV_PASSWD],
2327 config.pttl[SRV_PASSWD],
2328 config.nttl[SRV_PASSWD]);
2329 log(L_DEBUG, " group cache: %d pttl %u nttl %u",
2330 config.srv_enable[SRV_GROUP ],
2331 config.pttl[SRV_GROUP],
2332 config.nttl[SRV_GROUP]);
2333 log(L_DEBUG, " hosts cache: %d pttl %u nttl %u",
2334 config.srv_enable[SRV_HOSTS ],
2335 config.pttl[SRV_HOSTS],
2336 config.nttl[SRV_HOSTS]);
2338 for (n = 0; n < 3; n++) {
2339 config.pttl[n] *= 1000;
2340 config.nttl[n] *= 1000;