1#define _GNU_SOURCE
2#include <stdlib.h>
3#include <stdio.h>
4#include <string.h>
5#include <unistd.h>
6#include <stdnoreturn.h>
7#include <sys/types.h>
8#include <sys/stat.h>
9#include <sys/xattr.h>
10#include <fcntl.h>
11#include <dirent.h>
12#include <errno.h>
13#include <linux/capability.h>
14#include <sys/prctl.h>
15#include <limits.h>
16#include <stdint.h>
17#include <syscall.h>
18#include <byteswap.h>
19
20// imported from glibc
21#include "unsecvars.h"
22
23#ifndef SOURCE_PROG
24#error SOURCE_PROG should be defined via preprocessor commandline
25#endif
26
27// aborts when false, printing the failed expression
28#define ASSERT(expr) ((expr) ? (void) 0 : assert_failure(#expr))
29
30extern char **environ;
31
32// Wrapper debug variable name
33static char *wrapper_debug = "WRAPPER_DEBUG";
34
35#define CAP_SETPCAP 8
36
37#if __BYTE_ORDER == __BIG_ENDIAN
38#define LE32_TO_H(x) bswap_32(x)
39#else
40#define LE32_TO_H(x) (x)
41#endif
42
43static noreturn void assert_failure(const char *assertion) {
44 fprintf(stderr, "Assertion `%s` in NixOS's wrapper.c failed.\n", assertion);
45 fflush(stderr);
46 abort();
47}
48
49int get_last_cap(unsigned *last_cap) {
50 FILE* file = fopen("/proc/sys/kernel/cap_last_cap", "r");
51 if (file == NULL) {
52 int saved_errno = errno;
53 fprintf(stderr, "failed to open /proc/sys/kernel/cap_last_cap: %s\n", strerror(errno));
54 return -saved_errno;
55 }
56 int res = fscanf(file, "%u", last_cap);
57 if (res == EOF) {
58 int saved_errno = errno;
59 fprintf(stderr, "could not read number from /proc/sys/kernel/cap_last_cap: %s\n", strerror(errno));
60 return -saved_errno;
61 }
62 fclose(file);
63 return 0;
64}
65
66// Given the path to this program, fetch its configured capability set
67// (as set by `setcap ... /path/to/file`) and raise those capabilities
68// into the Ambient set.
69static int make_caps_ambient(const char *self_path) {
70 struct vfs_ns_cap_data data = {};
71 int r = getxattr(self_path, "security.capability", &data, sizeof(data));
72
73 if (r < 0) {
74 if (errno == ENODATA) {
75 // no capabilities set
76 return 0;
77 }
78 fprintf(stderr, "cannot get capabilities for %s: %s", self_path, strerror(errno));
79 return 1;
80 }
81
82 size_t size;
83 uint32_t version = LE32_TO_H(data.magic_etc) & VFS_CAP_REVISION_MASK;
84 switch (version) {
85 case VFS_CAP_REVISION_1:
86 size = VFS_CAP_U32_1;
87 break;
88 case VFS_CAP_REVISION_2:
89 case VFS_CAP_REVISION_3:
90 size = VFS_CAP_U32_3;
91 break;
92 default:
93 fprintf(stderr, "BUG! Unsupported capability version 0x%x on %s. Report to NixOS bugtracker\n", version, self_path);
94 return 1;
95 }
96
97 const struct __user_cap_header_struct header = {
98 .version = _LINUX_CAPABILITY_VERSION_3,
99 .pid = getpid(),
100 };
101 struct __user_cap_data_struct user_data[2] = {};
102
103 for (size_t i = 0; i < size; i++) {
104 // merge inheritable & permitted into one
105 user_data[i].permitted = user_data[i].inheritable =
106 LE32_TO_H(data.data[i].inheritable) | LE32_TO_H(data.data[i].permitted);
107 }
108
109 if (syscall(SYS_capset, &header, &user_data) < 0) {
110 fprintf(stderr, "failed to inherit capabilities: %s", strerror(errno));
111 return 1;
112 }
113 unsigned last_cap;
114 r = get_last_cap(&last_cap);
115 if (r < 0) {
116 return 1;
117 }
118 uint64_t set = user_data[0].permitted | (uint64_t)user_data[1].permitted << 32;
119 for (unsigned cap = 0; cap < last_cap; cap++) {
120 if (!(set & (1ULL << cap))) {
121 continue;
122 }
123
124 // Check for the cap_setpcap capability, we set this on the
125 // wrapper so it can elevate the capabilities to the Ambient
126 // set but we do not want to propagate it down into the
127 // wrapped program.
128 //
129 // TODO: what happens if that's the behavior you want
130 // though???? I'm preferring a strict vs. loose policy here.
131 if (cap == CAP_SETPCAP) {
132 if(getenv(wrapper_debug)) {
133 fprintf(stderr, "cap_setpcap in set, skipping it\n");
134 }
135 continue;
136 }
137 if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, (unsigned long) cap, 0, 0)) {
138 fprintf(stderr, "cannot raise the capability %d into the ambient set: %s\n", cap, strerror(errno));
139 return 1;
140 }
141 if (getenv(wrapper_debug)) {
142 fprintf(stderr, "raised %d into the ambient capability set\n", cap);
143 }
144 }
145
146 return 0;
147}
148
149// These are environment variable aliases for glibc tunables.
150// This list shouldn't grow further, since this is a legacy mechanism.
151// Any future tunables are expected to only be accessible through GLIBC_TUNABLES.
152//
153// They are not included in the glibc-provided UNSECURE_ENVVARS list,
154// since any SUID executable ignores them. This wrapper also serves
155// executables that are merely granted ambient capabilities, rather than
156// being SUID, and hence don't run in secure mode. We'd like them to
157// defend those in depth as well, so we clear these explicitly.
158//
159// Except for MALLOC_CHECK_ (which is marked SXID_ERASE), these are all
160// marked SXID_IGNORE (ignored in secure mode), so even the glibc version
161// of this wrapper would leave them intact.
162#define UNSECURE_ENVVARS_TUNABLES \
163 "MALLOC_CHECK_\0" \
164 "MALLOC_TOP_PAD_\0" \
165 "MALLOC_PERTURB_\0" \
166 "MALLOC_MMAP_THRESHOLD_\0" \
167 "MALLOC_TRIM_THRESHOLD_\0" \
168 "MALLOC_MMAP_MAX_\0" \
169 "MALLOC_ARENA_MAX\0" \
170 "MALLOC_ARENA_TEST\0"
171
172int main(int argc, char **argv) {
173 ASSERT(argc >= 1);
174
175 // argv[0] goes into a lot of places, to a far greater degree than other elements
176 // of argv. glibc has had buffer overflows relating to argv[0], eg CVE-2023-6246.
177 // Since we expect the wrappers to be invoked from either $PATH or /run/wrappers/bin,
178 // there should be no reason to pass any particularly large values here, so we can
179 // be strict for strictness' sake.
180 ASSERT(strlen(argv[0]) < 512);
181
182 int debug = getenv(wrapper_debug) != NULL;
183
184 // Drop insecure environment variables explicitly
185 //
186 // glibc does this automatically in SUID binaries, but we'd like to cover this:
187 //
188 // a) before it gets to glibc
189 // b) in binaries that are only granted ambient capabilities by the wrapper,
190 // but don't run with an altered effective UID/GID, nor directly gain
191 // capabilities themselves, and thus don't run in secure mode.
192 //
193 // We're using musl, which doesn't drop environment variables in secure mode,
194 // and we'd also like glibc-specific variables to be covered.
195 //
196 // If we don't explicitly unset them, it's quite easy to just set LD_PRELOAD,
197 // have it passed through to the wrapped program, and gain privileges.
198 for (char *unsec = UNSECURE_ENVVARS_TUNABLES UNSECURE_ENVVARS; *unsec; unsec = strchr(unsec, 0) + 1) {
199 if (debug) {
200 fprintf(stderr, "unsetting %s\n", unsec);
201 }
202 unsetenv(unsec);
203 }
204
205 // Read the capabilities set on the wrapper and raise them in to
206 // the ambient set so the program we're wrapping receives the
207 // capabilities too!
208 if (make_caps_ambient("/proc/self/exe") != 0) {
209 return 1;
210 }
211
212 execve(SOURCE_PROG, argv, environ);
213
214 fprintf(stderr, "%s: cannot run `%s': %s\n",
215 argv[0], SOURCE_PROG, strerror(errno));
216
217 return 1;
218}