1{ config, lib, pkgs, ... }:
2
3with lib;
4
5let
6
7 nixos-container = pkgs.substituteAll {
8 name = "nixos-container";
9 dir = "bin";
10 isExecutable = true;
11 src = ./nixos-container.pl;
12 perl = "${pkgs.perl}/bin/perl -I${pkgs.perlPackages.FileSlurp}/lib/perl5/site_perl";
13 su = "${pkgs.shadow.su}/bin/su";
14 inherit (pkgs) utillinux;
15
16 postInstall = ''
17 t=$out/etc/bash_completion.d
18 mkdir -p $t
19 cp ${./nixos-container-completion.sh} $t/nixos-container
20 '';
21 };
22
23 # The container's init script, a small wrapper around the regular
24 # NixOS stage-2 init script.
25 containerInit = pkgs.writeScript "container-init"
26 ''
27 #! ${pkgs.stdenv.shell} -e
28
29 # Initialise the container side of the veth pair.
30 if [ "$PRIVATE_NETWORK" = 1 ]; then
31 ip link set host0 name eth0
32 ip link set dev eth0 up
33 if [ -n "$HOST_ADDRESS" ]; then
34 ip route add $HOST_ADDRESS dev eth0
35 ip route add default via $HOST_ADDRESS
36 fi
37 if [ -n "$LOCAL_ADDRESS" ]; then
38 ip addr add $LOCAL_ADDRESS dev eth0
39 fi
40 fi
41
42 # Start the regular stage 1 script, passing the bind-mounted
43 # notification socket from the host to allow the container
44 # systemd to signal readiness to the host systemd.
45 NOTIFY_SOCKET=/var/lib/private/host-notify exec "$1"
46 '';
47
48 system = config.nixpkgs.system;
49
50 bindMountOpts = { name, config, ... }: {
51
52 options = {
53 mountPoint = mkOption {
54 example = "/mnt/usb";
55 type = types.str;
56 description = "Mount point on the container file system.";
57 };
58 hostPath = mkOption {
59 default = null;
60 example = "/home/alice";
61 type = types.nullOr types.str;
62 description = "Location of the host path to be mounted.";
63 };
64 isReadOnly = mkOption {
65 default = true;
66 example = true;
67 type = types.bool;
68 description = "Determine whether the mounted path will be accessed in read-only mode.";
69 };
70 };
71
72 config = {
73 mountPoint = mkDefault name;
74 };
75
76 };
77
78 mkBindFlag = d:
79 let flagPrefix = if d.isReadOnly then " --bind-ro=" else " --bind=";
80 mountstr = if d.hostPath != null then "${d.hostPath}:${d.mountPoint}" else "${d.mountPoint}";
81 in flagPrefix + mountstr ;
82
83 mkBindFlags = bs: concatMapStrings mkBindFlag (lib.attrValues bs);
84
85in
86
87{
88 options = {
89
90 boot.isContainer = mkOption {
91 type = types.bool;
92 default = false;
93 description = ''
94 Whether this NixOS machine is a lightweight container running
95 in another NixOS system.
96 '';
97 };
98
99 boot.enableContainers = mkOption {
100 type = types.bool;
101 default = !config.boot.isContainer;
102 description = ''
103 Whether to enable support for nixos containers.
104 '';
105 };
106
107 containers = mkOption {
108 type = types.attrsOf (types.submodule (
109 { config, options, name, ... }:
110 {
111 options = {
112
113 config = mkOption {
114 description = ''
115 A specification of the desired configuration of this
116 container, as a NixOS module.
117 '';
118 };
119
120 path = mkOption {
121 type = types.path;
122 example = "/nix/var/nix/profiles/containers/webserver";
123 description = ''
124 As an alternative to specifying
125 <option>config</option>, you can specify the path to
126 the evaluated NixOS system configuration, typically a
127 symlink to a system profile.
128 '';
129 };
130
131 privateNetwork = mkOption {
132 type = types.bool;
133 default = false;
134 description = ''
135 Whether to give the container its own private virtual
136 Ethernet interface. The interface is called
137 <literal>eth0</literal>, and is hooked up to the interface
138 <literal>ve-<replaceable>container-name</replaceable></literal>
139 on the host. If this option is not set, then the
140 container shares the network interfaces of the host,
141 and can bind to any port on any interface.
142 '';
143 };
144
145 hostAddress = mkOption {
146 type = types.nullOr types.str;
147 default = null;
148 example = "10.231.136.1";
149 description = ''
150 The IPv4 address assigned to the host interface.
151 '';
152 };
153
154 localAddress = mkOption {
155 type = types.nullOr types.str;
156 default = null;
157 example = "10.231.136.2";
158 description = ''
159 The IPv4 address assigned to <literal>eth0</literal>
160 in the container.
161 '';
162 };
163
164 interfaces = mkOption {
165 type = types.listOf types.string;
166 default = [];
167 example = [ "eth1" "eth2" ];
168 description = ''
169 The list of interfaces to be moved into the container.
170 '';
171 };
172
173 autoStart = mkOption {
174 type = types.bool;
175 default = false;
176 description = ''
177 Wether the container is automatically started at boot-time.
178 '';
179 };
180
181 bindMounts = mkOption {
182 type = types.loaOf types.optionSet;
183 options = [ bindMountOpts ];
184 default = {};
185 example = { "/home" = { hostPath = "/home/alice";
186 isReadOnly = false; };
187 };
188
189 description =
190 ''
191 An extra list of directories that is bound to the container.
192 '';
193 };
194
195 };
196
197 config = mkMerge
198 [ (mkIf options.config.isDefined {
199 path = (import ../../lib/eval-config.nix {
200 inherit system;
201 modules =
202 let extraConfig =
203 { boot.isContainer = true;
204 networking.hostName = mkDefault name;
205 networking.useDHCP = false;
206 };
207 in [ extraConfig config.config ];
208 prefix = [ "containers" name ];
209 }).config.system.build.toplevel;
210 })
211 ];
212 }));
213
214 default = {};
215 example = literalExample
216 ''
217 { webserver =
218 { path = "/nix/var/nix/profiles/webserver";
219 };
220 database =
221 { config =
222 { config, pkgs, ... }:
223 { services.postgresql.enable = true;
224 services.postgresql.package = pkgs.postgresql92;
225 };
226 };
227 }
228 '';
229 description = ''
230 A set of NixOS system configurations to be run as lightweight
231 containers. Each container appears as a service
232 <literal>container-<replaceable>name</replaceable></literal>
233 on the host system, allowing it to be started and stopped via
234 <command>systemctl</command> .
235 '';
236 };
237
238 };
239
240
241 config = mkIf (config.boot.enableContainers) {
242
243 systemd.services."container@" =
244 { description = "Container '%i'";
245
246 unitConfig.RequiresMountsFor = [ "/var/lib/containers/%i" ];
247
248 path = [ pkgs.iproute ];
249
250 environment.INSTANCE = "%i";
251 environment.root = "/var/lib/containers/%i";
252
253 preStart =
254 ''
255 # Clean up existing machined registration and interfaces.
256 machinectl terminate "$INSTANCE" 2> /dev/null || true
257
258 if [ "$PRIVATE_NETWORK" = 1 ]; then
259 ip link del dev "ve-$INSTANCE" 2> /dev/null || true
260 fi
261
262
263 if [ "$PRIVATE_NETWORK" = 1 ]; then
264 ip link del dev "ve-$INSTANCE" 2> /dev/null || true
265 fi
266 '';
267
268 script =
269 ''
270 mkdir -p -m 0755 "$root/etc" "$root/var/lib"
271 mkdir -p -m 0700 "$root/var/lib/private" "$root/root" /run/containers
272 if ! [ -e "$root/etc/os-release" ]; then
273 touch "$root/etc/os-release"
274 fi
275
276 mkdir -p -m 0755 \
277 "/nix/var/nix/profiles/per-container/$INSTANCE" \
278 "/nix/var/nix/gcroots/per-container/$INSTANCE"
279
280 cp --remove-destination /etc/resolv.conf "$root/etc/resolv.conf"
281
282 if [ "$PRIVATE_NETWORK" = 1 ]; then
283 extraFlags+=" --network-veth"
284 fi
285
286 for iface in $INTERFACES; do
287 extraFlags+=" --network-interface=$iface"
288 done
289
290 for iface in $MACVLANS; do
291 extraFlags+=" --network-macvlan=$iface"
292 done
293
294 # If the host is 64-bit and the container is 32-bit, add a
295 # --personality flag.
296 ${optionalString (config.nixpkgs.system == "x86_64-linux") ''
297 if [ "$(< ''${SYSTEM_PATH:-/nix/var/nix/profiles/per-container/$INSTANCE/system}/system)" = i686-linux ]; then
298 extraFlags+=" --personality=x86"
299 fi
300 ''}
301
302
303
304 # Run systemd-nspawn without startup notification (we'll
305 # wait for the container systemd to signal readiness).
306 EXIT_ON_REBOOT=1 NOTIFY_SOCKET= \
307 exec ${config.systemd.package}/bin/systemd-nspawn \
308 --keep-unit \
309 -M "$INSTANCE" -D "$root" $extraFlags \
310 $EXTRA_NSPAWN_FLAGS \
311 --bind-ro=/nix/store \
312 --bind-ro=/nix/var/nix/db \
313 --bind-ro=/nix/var/nix/daemon-socket \
314 --bind=/run/systemd/notify:/var/lib/private/host-notify \
315 --bind="/nix/var/nix/profiles/per-container/$INSTANCE:/nix/var/nix/profiles" \
316 --bind="/nix/var/nix/gcroots/per-container/$INSTANCE:/nix/var/nix/gcroots" \
317 --setenv PRIVATE_NETWORK="$PRIVATE_NETWORK" \
318 --setenv HOST_ADDRESS="$HOST_ADDRESS" \
319 --setenv LOCAL_ADDRESS="$LOCAL_ADDRESS" \
320 --setenv PATH="$PATH" \
321 ${containerInit} "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/init"
322 '';
323
324 postStart =
325 ''
326 if [ "$PRIVATE_NETWORK" = 1 ]; then
327 ifaceHost=ve-$INSTANCE
328 ip link set dev $ifaceHost up
329 if [ -n "$HOST_ADDRESS" ]; then
330 ip addr add $HOST_ADDRESS dev $ifaceHost
331 fi
332 if [ -n "$LOCAL_ADDRESS" ]; then
333 ip route add $LOCAL_ADDRESS dev $ifaceHost
334 fi
335 fi
336
337 # Get the leader PID so that we can signal it in
338 # preStop. We can't use machinectl there because D-Bus
339 # might be shutting down. FIXME: in systemd 219 we can
340 # just signal systemd-nspawn to do a clean shutdown.
341 machinectl show "$INSTANCE" | sed 's/Leader=\(.*\)/\1/;t;d' > "/run/containers/$INSTANCE.pid"
342 '';
343
344 preStop =
345 ''
346 pid="$(cat /run/containers/$INSTANCE.pid)"
347 if [ -n "$pid" ]; then
348 kill -RTMIN+4 "$pid"
349 fi
350 rm -f "/run/containers/$INSTANCE.pid"
351 '';
352
353 restartIfChanged = false;
354 #reloadIfChanged = true; # FIXME
355
356 serviceConfig = {
357 ExecReload = pkgs.writeScript "reload-container"
358 ''
359 #! ${pkgs.stdenv.shell} -e
360 ${nixos-container}/bin/nixos-container run "$INSTANCE" -- \
361 bash --login -c "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/bin/switch-to-configuration test"
362 '';
363
364 SyslogIdentifier = "container %i";
365
366 EnvironmentFile = "-/etc/containers/%i.conf";
367
368 Type = "notify";
369
370 NotifyAccess = "all";
371
372 # Note that on reboot, systemd-nspawn returns 133, so this
373 # unit will be restarted. On poweroff, it returns 0, so the
374 # unit won't be restarted.
375 RestartForceExitStatus = "133";
376 SuccessExitStatus = "133";
377
378 Restart = "on-failure";
379
380 # Hack: we don't want to kill systemd-nspawn, since we call
381 # "machinectl poweroff" in preStop to shut down the
382 # container cleanly. But systemd requires sending a signal
383 # (at least if we want remaining processes to be killed
384 # after the timeout). So send an ignored signal.
385 KillMode = "mixed";
386 KillSignal = "WINCH";
387 };
388 };
389
390 # Generate a configuration file in /etc/containers for each
391 # container so that container@.target can get the container
392 # configuration.
393 environment.etc = mapAttrs' (name: cfg: nameValuePair "containers/${name}.conf"
394 { text =
395 ''
396 SYSTEM_PATH=${cfg.path}
397 ${optionalString cfg.privateNetwork ''
398 PRIVATE_NETWORK=1
399 ${optionalString (cfg.hostAddress != null) ''
400 HOST_ADDRESS=${cfg.hostAddress}
401 ''}
402 ${optionalString (cfg.localAddress != null) ''
403 LOCAL_ADDRESS=${cfg.localAddress}
404 ''}
405 ''}
406 INTERFACES="${toString cfg.interfaces}"
407 ${optionalString cfg.autoStart ''
408 AUTO_START=1
409 ''}
410 EXTRA_NSPAWN_FLAGS="${mkBindFlags cfg.bindMounts}"
411 '';
412 }) config.containers;
413
414 # Generate /etc/hosts entries for the containers.
415 networking.extraHosts = concatStrings (mapAttrsToList (name: cfg: optionalString (cfg.localAddress != null)
416 ''
417 ${cfg.localAddress} ${name}.containers
418 '') config.containers);
419
420 networking.dhcpcd.denyInterfaces = [ "ve-*" ];
421
422 environment.systemPackages = [ nixos-container ];
423
424 # Start containers at boot time.
425 systemd.services.all-containers =
426 { description = "All Containers";
427
428 wantedBy = [ "multi-user.target" ];
429
430 unitConfig.ConditionDirectoryNotEmpty = "/etc/containers";
431
432 serviceConfig.Type = "oneshot";
433
434 script =
435 ''
436 res=0
437 shopt -s nullglob
438 for i in /etc/containers/*.conf; do
439 AUTO_START=
440 source "$i"
441 if [ "$AUTO_START" = 1 ]; then
442 systemctl start "container@$(basename "$i" .conf).service" || res=1
443 fi
444 done
445 exit $res
446 ''; # */
447 };
448
449 };
450}