1{ config, pkgs, lib, ... }:
2
3let
4 toplevelConfig = config;
5 inherit (lib) types;
6 inherit (import ../system/boot/systemd-lib.nix {
7 inherit config pkgs lib;
8 }) mkPathSafeName;
9in {
10 options.systemd.services = lib.mkOption {
11 type = types.attrsOf (types.submodule ({ name, config, ... }: {
12 options.confinement.enable = lib.mkOption {
13 type = types.bool;
14 default = false;
15 description = ''
16 If set, all the required runtime store paths for this service are
17 bind-mounted into a <literal>tmpfs</literal>-based <citerefentry>
18 <refentrytitle>chroot</refentrytitle>
19 <manvolnum>2</manvolnum>
20 </citerefentry>.
21 '';
22 };
23
24 options.confinement.fullUnit = lib.mkOption {
25 type = types.bool;
26 default = false;
27 description = ''
28 Whether to include the full closure of the systemd unit file into the
29 chroot, instead of just the dependencies for the executables.
30
31 <warning><para>While it may be tempting to just enable this option to
32 make things work quickly, please be aware that this might add paths
33 to the closure of the chroot that you didn't anticipate. It's better
34 to use <option>confinement.packages</option> to <emphasis
35 role="strong">explicitly</emphasis> add additional store paths to the
36 chroot.</para></warning>
37 '';
38 };
39
40 options.confinement.packages = lib.mkOption {
41 type = types.listOf (types.either types.str types.package);
42 default = [];
43 description = let
44 mkScOption = optName: "<option>serviceConfig.${optName}</option>";
45 in ''
46 Additional packages or strings with context to add to the closure of
47 the chroot. By default, this includes all the packages from the
48 ${lib.concatMapStringsSep ", " mkScOption [
49 "ExecReload" "ExecStartPost" "ExecStartPre" "ExecStop"
50 "ExecStopPost"
51 ]} and ${mkScOption "ExecStart"} options. If you want to have all the
52 dependencies of this systemd unit, you can use
53 <option>confinement.fullUnit</option>.
54
55 <note><para>The store paths listed in <option>path</option> are
56 <emphasis role="strong">not</emphasis> included in the closure as
57 well as paths from other options except those listed
58 above.</para></note>
59 '';
60 };
61
62 options.confinement.binSh = lib.mkOption {
63 type = types.nullOr types.path;
64 default = toplevelConfig.environment.binsh;
65 defaultText = "config.environment.binsh";
66 example = lib.literalExample "\${pkgs.dash}/bin/dash";
67 description = ''
68 The program to make available as <filename>/bin/sh</filename> inside
69 the chroot. If this is set to <literal>null</literal>, no
70 <filename>/bin/sh</filename> is provided at all.
71
72 This is useful for some applications, which for example use the
73 <citerefentry>
74 <refentrytitle>system</refentrytitle>
75 <manvolnum>3</manvolnum>
76 </citerefentry> library function to execute commands.
77 '';
78 };
79
80 options.confinement.mode = lib.mkOption {
81 type = types.enum [ "full-apivfs" "chroot-only" ];
82 default = "full-apivfs";
83 description = ''
84 The value <literal>full-apivfs</literal> (the default) sets up
85 private <filename class="directory">/dev</filename>, <filename
86 class="directory">/proc</filename>, <filename
87 class="directory">/sys</filename> and <filename
88 class="directory">/tmp</filename> file systems in a separate user
89 name space.
90
91 If this is set to <literal>chroot-only</literal>, only the file
92 system name space is set up along with the call to <citerefentry>
93 <refentrytitle>chroot</refentrytitle>
94 <manvolnum>2</manvolnum>
95 </citerefentry>.
96
97 <note><para>This doesn't cover network namespaces and is solely for
98 file system level isolation.</para></note>
99 '';
100 };
101
102 config = let
103 rootName = "${mkPathSafeName name}-chroot";
104 inherit (config.confinement) binSh fullUnit;
105 wantsAPIVFS = lib.mkDefault (config.confinement.mode == "full-apivfs");
106 in lib.mkIf config.confinement.enable {
107 serviceConfig = {
108 RootDirectory = pkgs.runCommand rootName {} "mkdir \"$out\"";
109 TemporaryFileSystem = "/";
110 PrivateMounts = lib.mkDefault true;
111
112 # https://github.com/NixOS/nixpkgs/issues/14645 is a future attempt
113 # to change some of these to default to true.
114 #
115 # If we run in chroot-only mode, having something like PrivateDevices
116 # set to true by default will mount /dev within the chroot, whereas
117 # with "chroot-only" it's expected that there are no /dev, /proc and
118 # /sys file systems available.
119 #
120 # However, if this suddenly becomes true, the attack surface will
121 # increase, so let's explicitly set these options to true/false
122 # depending on the mode.
123 MountAPIVFS = wantsAPIVFS;
124 PrivateDevices = wantsAPIVFS;
125 PrivateTmp = wantsAPIVFS;
126 PrivateUsers = wantsAPIVFS;
127 ProtectControlGroups = wantsAPIVFS;
128 ProtectKernelModules = wantsAPIVFS;
129 ProtectKernelTunables = wantsAPIVFS;
130 };
131 confinement.packages = let
132 execOpts = [
133 "ExecReload" "ExecStart" "ExecStartPost" "ExecStartPre" "ExecStop"
134 "ExecStopPost"
135 ];
136 execPkgs = lib.concatMap (opt: let
137 isSet = config.serviceConfig ? ${opt};
138 in lib.flatten (lib.optional isSet config.serviceConfig.${opt})) execOpts;
139 unitAttrs = toplevelConfig.systemd.units."${name}.service";
140 allPkgs = lib.singleton (builtins.toJSON unitAttrs);
141 unitPkgs = if fullUnit then allPkgs else execPkgs;
142 in unitPkgs ++ lib.optional (binSh != null) binSh;
143 };
144 }));
145 };
146
147 config.assertions = lib.concatLists (lib.mapAttrsToList (name: cfg: let
148 whatOpt = optName: "The 'serviceConfig' option '${optName}' for"
149 + " service '${name}' is enabled in conjunction with"
150 + " 'confinement.enable'";
151 in lib.optionals cfg.confinement.enable [
152 { assertion = !cfg.serviceConfig.RootDirectoryStartOnly or false;
153 message = "${whatOpt "RootDirectoryStartOnly"}, but right now systemd"
154 + " doesn't support restricting bind-mounts to 'ExecStart'."
155 + " Please either define a separate service or find a way to run"
156 + " commands other than ExecStart within the chroot.";
157 }
158 { assertion = !cfg.serviceConfig.DynamicUser or false;
159 message = "${whatOpt "DynamicUser"}. Please create a dedicated user via"
160 + " the 'users.users' option instead as this combination is"
161 + " currently not supported.";
162 }
163 { assertion = cfg.serviceConfig ? ProtectSystem -> cfg.serviceConfig.ProtectSystem == false;
164 message = "${whatOpt "ProtectSystem"}. ProtectSystem is not compatible"
165 + " with service confinement as it fails to remount /usr within"
166 + " our chroot. Please disable the option.";
167 }
168 ]) config.systemd.services);
169
170 config.systemd.packages = lib.concatLists (lib.mapAttrsToList (name: cfg: let
171 rootPaths = let
172 contents = lib.concatStringsSep "\n" cfg.confinement.packages;
173 in pkgs.writeText "${mkPathSafeName name}-string-contexts.txt" contents;
174
175 chrootPaths = pkgs.runCommand "${mkPathSafeName name}-chroot-paths" {
176 closureInfo = pkgs.closureInfo { inherit rootPaths; };
177 serviceName = "${name}.service";
178 excludedPath = rootPaths;
179 } ''
180 mkdir -p "$out/lib/systemd/system"
181 serviceFile="$out/lib/systemd/system/$serviceName"
182
183 echo '[Service]' > "$serviceFile"
184
185 # /bin/sh is special here, because the option value could contain a
186 # symlink and we need to properly resolve it.
187 ${lib.optionalString (cfg.confinement.binSh != null) ''
188 binsh=${lib.escapeShellArg cfg.confinement.binSh}
189 realprog="$(readlink -e "$binsh")"
190 echo "BindReadOnlyPaths=$realprog:/bin/sh" >> "$serviceFile"
191 ''}
192
193 while read storePath; do
194 if [ -L "$storePath" ]; then
195 # Currently, systemd can't cope with symlinks in Bind(ReadOnly)Paths,
196 # so let's just bind-mount the target to that location.
197 echo "BindReadOnlyPaths=$(readlink -e "$storePath"):$storePath"
198 elif [ "$storePath" != "$excludedPath" ]; then
199 echo "BindReadOnlyPaths=$storePath"
200 fi
201 done < "$closureInfo/store-paths" >> "$serviceFile"
202 '';
203 in lib.optional cfg.confinement.enable chrootPaths) config.systemd.services);
204}