1{ config, lib, pkgs, ... }:
2
3with lib;
4
5let
6
7 # The container's init script, a small wrapper around the regular
8 # NixOS stage-2 init script.
9 containerInit = (cfg:
10 let
11 renderExtraVeth = (name: cfg:
12 ''
13 echo "Bringing ${name} up"
14 ip link set dev ${name} up
15 ${optionalString (cfg.localAddress != null) ''
16 echo "Setting ip for ${name}"
17 ip addr add ${cfg.localAddress} dev ${name}
18 ''}
19 ${optionalString (cfg.localAddress6 != null) ''
20 echo "Setting ip6 for ${name}"
21 ip -6 addr add ${cfg.localAddress6} dev ${name}
22 ''}
23 ${optionalString (cfg.hostAddress != null) ''
24 echo "Setting route to host for ${name}"
25 ip route add ${cfg.hostAddress} dev ${name}
26 ''}
27 ${optionalString (cfg.hostAddress6 != null) ''
28 echo "Setting route6 to host for ${name}"
29 ip -6 route add ${cfg.hostAddress6} dev ${name}
30 ''}
31 ''
32 );
33 in
34 pkgs.writeScript "container-init"
35 ''
36 #! ${pkgs.runtimeShell} -e
37
38 # Exit early if we're asked to shut down.
39 trap "exit 0" SIGRTMIN+3
40
41 # Initialise the container side of the veth pair.
42 if [ -n "$HOST_ADDRESS" ] || [ -n "$HOST_ADDRESS6" ] ||
43 [ -n "$LOCAL_ADDRESS" ] || [ -n "$LOCAL_ADDRESS6" ] ||
44 [ -n "$HOST_BRIDGE" ]; then
45 ip link set host0 name eth0
46 ip link set dev eth0 up
47
48 if [ -n "$LOCAL_ADDRESS" ]; then
49 ip addr add $LOCAL_ADDRESS dev eth0
50 fi
51 if [ -n "$LOCAL_ADDRESS6" ]; then
52 ip -6 addr add $LOCAL_ADDRESS6 dev eth0
53 fi
54 if [ -n "$HOST_ADDRESS" ]; then
55 ip route add $HOST_ADDRESS dev eth0
56 ip route add default via $HOST_ADDRESS
57 fi
58 if [ -n "$HOST_ADDRESS6" ]; then
59 ip -6 route add $HOST_ADDRESS6 dev eth0
60 ip -6 route add default via $HOST_ADDRESS6
61 fi
62 fi
63
64 ${concatStringsSep "\n" (mapAttrsToList renderExtraVeth cfg.extraVeths)}
65
66 # Start the regular stage 2 script.
67 # We source instead of exec to not lose an early stop signal, which is
68 # also the only _reliable_ shutdown signal we have since early stop
69 # does not execute ExecStop* commands.
70 set +e
71 . "$1"
72 ''
73 );
74
75 nspawnExtraVethArgs = (name: cfg: "--network-veth-extra=${name}");
76
77 startScript = cfg:
78 ''
79 mkdir -p -m 0755 "$root/etc" "$root/var/lib"
80 mkdir -p -m 0700 "$root/var/lib/private" "$root/root" /run/containers
81 if ! [ -e "$root/etc/os-release" ]; then
82 touch "$root/etc/os-release"
83 fi
84
85 if ! [ -e "$root/etc/machine-id" ]; then
86 touch "$root/etc/machine-id"
87 fi
88
89 mkdir -p -m 0755 \
90 "/nix/var/nix/profiles/per-container/$INSTANCE" \
91 "/nix/var/nix/gcroots/per-container/$INSTANCE"
92
93 cp --remove-destination /etc/resolv.conf "$root/etc/resolv.conf"
94
95 if [ "$PRIVATE_NETWORK" = 1 ]; then
96 extraFlags+=" --private-network"
97 fi
98
99 if [ -n "$HOST_ADDRESS" ] || [ -n "$LOCAL_ADDRESS" ] ||
100 [ -n "$HOST_ADDRESS6" ] || [ -n "$LOCAL_ADDRESS6" ]; then
101 extraFlags+=" --network-veth"
102 fi
103
104 if [ -n "$HOST_PORT" ]; then
105 OIFS=$IFS
106 IFS=","
107 for i in $HOST_PORT
108 do
109 extraFlags+=" --port=$i"
110 done
111 IFS=$OIFS
112 fi
113
114 if [ -n "$HOST_BRIDGE" ]; then
115 extraFlags+=" --network-bridge=$HOST_BRIDGE"
116 fi
117
118 extraFlags+=" ${concatStringsSep " " (mapAttrsToList nspawnExtraVethArgs cfg.extraVeths)}"
119
120 for iface in $INTERFACES; do
121 extraFlags+=" --network-interface=$iface"
122 done
123
124 for iface in $MACVLANS; do
125 extraFlags+=" --network-macvlan=$iface"
126 done
127
128 # If the host is 64-bit and the container is 32-bit, add a
129 # --personality flag.
130 ${optionalString (config.nixpkgs.localSystem.system == "x86_64-linux") ''
131 if [ "$(< ''${SYSTEM_PATH:-/nix/var/nix/profiles/per-container/$INSTANCE/system}/system)" = i686-linux ]; then
132 extraFlags+=" --personality=x86"
133 fi
134 ''}
135
136 # Run systemd-nspawn without startup notification (we'll
137 # wait for the container systemd to signal readiness)
138 # Kill signal handling means systemd-nspawn will pass a system-halt signal
139 # to the container systemd when it receives SIGTERM for container shutdown;
140 # containerInit and stage2 have to handle this as well.
141 exec ${config.systemd.package}/bin/systemd-nspawn \
142 --keep-unit \
143 -M "$INSTANCE" -D "$root" $extraFlags \
144 $EXTRA_NSPAWN_FLAGS \
145 --notify-ready=yes \
146 --kill-signal=SIGRTMIN+3 \
147 --bind-ro=/nix/store \
148 --bind-ro=/nix/var/nix/db \
149 --bind-ro=/nix/var/nix/daemon-socket \
150 --bind="/nix/var/nix/profiles/per-container/$INSTANCE:/nix/var/nix/profiles" \
151 --bind="/nix/var/nix/gcroots/per-container/$INSTANCE:/nix/var/nix/gcroots" \
152 ${optionalString (!cfg.ephemeral) "--link-journal=try-guest"} \
153 --setenv PRIVATE_NETWORK="$PRIVATE_NETWORK" \
154 --setenv HOST_BRIDGE="$HOST_BRIDGE" \
155 --setenv HOST_ADDRESS="$HOST_ADDRESS" \
156 --setenv LOCAL_ADDRESS="$LOCAL_ADDRESS" \
157 --setenv HOST_ADDRESS6="$HOST_ADDRESS6" \
158 --setenv LOCAL_ADDRESS6="$LOCAL_ADDRESS6" \
159 --setenv HOST_PORT="$HOST_PORT" \
160 --setenv PATH="$PATH" \
161 ${optionalString cfg.ephemeral "--ephemeral"} \
162 ${if cfg.additionalCapabilities != null && cfg.additionalCapabilities != [] then
163 ''--capability="${concatStringsSep "," cfg.additionalCapabilities}"'' else ""
164 } \
165 ${if cfg.tmpfs != null && cfg.tmpfs != [] then
166 ''--tmpfs=${concatStringsSep " --tmpfs=" cfg.tmpfs}'' else ""
167 } \
168 ${containerInit cfg} "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/init"
169 '';
170
171 preStartScript = cfg:
172 ''
173 # Clean up existing machined registration and interfaces.
174 machinectl terminate "$INSTANCE" 2> /dev/null || true
175
176 if [ -n "$HOST_ADDRESS" ] || [ -n "$LOCAL_ADDRESS" ] ||
177 [ -n "$HOST_ADDRESS6" ] || [ -n "$LOCAL_ADDRESS6" ]; then
178 ip link del dev "ve-$INSTANCE" 2> /dev/null || true
179 ip link del dev "vb-$INSTANCE" 2> /dev/null || true
180 fi
181
182 ${concatStringsSep "\n" (
183 mapAttrsToList (name: cfg:
184 "ip link del dev ${name} 2> /dev/null || true "
185 ) cfg.extraVeths
186 )}
187 '';
188
189 postStartScript = (cfg:
190 let
191 ipcall = cfg: ipcmd: variable: attribute:
192 if cfg.${attribute} == null then
193 ''
194 if [ -n "${variable}" ]; then
195 ${ipcmd} add ${variable} dev $ifaceHost
196 fi
197 ''
198 else
199 "${ipcmd} add ${cfg.${attribute}} dev $ifaceHost";
200 renderExtraVeth = name: cfg:
201 if cfg.hostBridge != null then
202 ''
203 # Add ${name} to bridge ${cfg.hostBridge}
204 ip link set dev ${name} master ${cfg.hostBridge} up
205 ''
206 else
207 ''
208 echo "Bring ${name} up"
209 ip link set dev ${name} up
210 # Set IPs and routes for ${name}
211 ${optionalString (cfg.hostAddress != null) ''
212 ip addr add ${cfg.hostAddress} dev ${name}
213 ''}
214 ${optionalString (cfg.hostAddress6 != null) ''
215 ip -6 addr add ${cfg.hostAddress6} dev ${name}
216 ''}
217 ${optionalString (cfg.localAddress != null) ''
218 ip route add ${cfg.localAddress} dev ${name}
219 ''}
220 ${optionalString (cfg.localAddress6 != null) ''
221 ip -6 route add ${cfg.localAddress6} dev ${name}
222 ''}
223 '';
224 in
225 ''
226 if [ -n "$HOST_ADDRESS" ] || [ -n "$LOCAL_ADDRESS" ] ||
227 [ -n "$HOST_ADDRESS6" ] || [ -n "$LOCAL_ADDRESS6" ]; then
228 if [ -z "$HOST_BRIDGE" ]; then
229 ifaceHost=ve-$INSTANCE
230 ip link set dev $ifaceHost up
231
232 ${ipcall cfg "ip addr" "$HOST_ADDRESS" "hostAddress"}
233 ${ipcall cfg "ip -6 addr" "$HOST_ADDRESS6" "hostAddress6"}
234 ${ipcall cfg "ip route" "$LOCAL_ADDRESS" "localAddress"}
235 ${ipcall cfg "ip -6 route" "$LOCAL_ADDRESS6" "localAddress6"}
236 fi
237 fi
238 ${concatStringsSep "\n" (mapAttrsToList renderExtraVeth cfg.extraVeths)}
239 ''
240 );
241
242 serviceDirectives = cfg: {
243 ExecReload = pkgs.writeScript "reload-container"
244 ''
245 #! ${pkgs.runtimeShell} -e
246 ${pkgs.nixos-container}/bin/nixos-container run "$INSTANCE" -- \
247 bash --login -c "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/bin/switch-to-configuration test"
248 '';
249
250 SyslogIdentifier = "container %i";
251
252 EnvironmentFile = "-/etc/containers/%i.conf";
253
254 Type = "notify";
255
256 RuntimeDirectory = lib.optional cfg.ephemeral "containers/%i";
257
258 # Note that on reboot, systemd-nspawn returns 133, so this
259 # unit will be restarted. On poweroff, it returns 0, so the
260 # unit won't be restarted.
261 RestartForceExitStatus = "133";
262 SuccessExitStatus = "133";
263
264 # Some containers take long to start
265 # especially when you automatically start many at once
266 TimeoutStartSec = cfg.timeoutStartSec;
267
268 Restart = "on-failure";
269
270 Slice = "machine.slice";
271 Delegate = true;
272
273 # We rely on systemd-nspawn turning a SIGTERM to itself into a shutdown
274 # signal (SIGRTMIN+3) for the inner container.
275 KillMode = "mixed";
276 KillSignal = "TERM";
277
278 DevicePolicy = "closed";
279 DeviceAllow = map (d: "${d.node} ${d.modifier}") cfg.allowedDevices;
280 };
281
282 system = config.nixpkgs.localSystem.system;
283 kernelVersion = config.boot.kernelPackages.kernel.version;
284
285 bindMountOpts = { name, ... }: {
286
287 options = {
288 mountPoint = mkOption {
289 example = "/mnt/usb";
290 type = types.str;
291 description = "Mount point on the container file system.";
292 };
293 hostPath = mkOption {
294 default = null;
295 example = "/home/alice";
296 type = types.nullOr types.str;
297 description = "Location of the host path to be mounted.";
298 };
299 isReadOnly = mkOption {
300 default = true;
301 type = types.bool;
302 description = "Determine whether the mounted path will be accessed in read-only mode.";
303 };
304 };
305
306 config = {
307 mountPoint = mkDefault name;
308 };
309
310 };
311
312 allowedDeviceOpts = { ... }: {
313 options = {
314 node = mkOption {
315 example = "/dev/net/tun";
316 type = types.str;
317 description = "Path to device node";
318 };
319 modifier = mkOption {
320 example = "rw";
321 type = types.str;
322 description = ''
323 Device node access modifier. Takes a combination
324 <literal>r</literal> (read), <literal>w</literal> (write), and
325 <literal>m</literal> (mknod). See the
326 <literal>systemd.resource-control(5)</literal> man page for more
327 information.'';
328 };
329 };
330 };
331
332 mkBindFlag = d:
333 let flagPrefix = if d.isReadOnly then " --bind-ro=" else " --bind=";
334 mountstr = if d.hostPath != null then "${d.hostPath}:${d.mountPoint}" else "${d.mountPoint}";
335 in flagPrefix + mountstr ;
336
337 mkBindFlags = bs: concatMapStrings mkBindFlag (lib.attrValues bs);
338
339 networkOptions = {
340 hostBridge = mkOption {
341 type = types.nullOr types.str;
342 default = null;
343 example = "br0";
344 description = ''
345 Put the host-side of the veth-pair into the named bridge.
346 Only one of hostAddress* or hostBridge can be given.
347 '';
348 };
349
350 forwardPorts = mkOption {
351 type = types.listOf (types.submodule {
352 options = {
353 protocol = mkOption {
354 type = types.str;
355 default = "tcp";
356 description = "The protocol specifier for port forwarding between host and container";
357 };
358 hostPort = mkOption {
359 type = types.int;
360 description = "Source port of the external interface on host";
361 };
362 containerPort = mkOption {
363 type = types.nullOr types.int;
364 default = null;
365 description = "Target port of container";
366 };
367 };
368 });
369 default = [];
370 example = [ { protocol = "tcp"; hostPort = 8080; containerPort = 80; } ];
371 description = ''
372 List of forwarded ports from host to container. Each forwarded port
373 is specified by protocol, hostPort and containerPort. By default,
374 protocol is tcp and hostPort and containerPort are assumed to be
375 the same if containerPort is not explicitly given.
376 '';
377 };
378
379
380 hostAddress = mkOption {
381 type = types.nullOr types.str;
382 default = null;
383 example = "10.231.136.1";
384 description = ''
385 The IPv4 address assigned to the host interface.
386 (Not used when hostBridge is set.)
387 '';
388 };
389
390 hostAddress6 = mkOption {
391 type = types.nullOr types.str;
392 default = null;
393 example = "fc00::1";
394 description = ''
395 The IPv6 address assigned to the host interface.
396 (Not used when hostBridge is set.)
397 '';
398 };
399
400 localAddress = mkOption {
401 type = types.nullOr types.str;
402 default = null;
403 example = "10.231.136.2";
404 description = ''
405 The IPv4 address assigned to the interface in the container.
406 If a hostBridge is used, this should be given with netmask to access
407 the whole network. Otherwise the default netmask is /32 and routing is
408 set up from localAddress to hostAddress and back.
409 '';
410 };
411
412 localAddress6 = mkOption {
413 type = types.nullOr types.str;
414 default = null;
415 example = "fc00::2";
416 description = ''
417 The IPv6 address assigned to the interface in the container.
418 If a hostBridge is used, this should be given with netmask to access
419 the whole network. Otherwise the default netmask is /128 and routing is
420 set up from localAddress6 to hostAddress6 and back.
421 '';
422 };
423
424 };
425
426 dummyConfig =
427 {
428 extraVeths = {};
429 additionalCapabilities = [];
430 ephemeral = false;
431 timeoutStartSec = "1min";
432 allowedDevices = [];
433 hostAddress = null;
434 hostAddress6 = null;
435 localAddress = null;
436 localAddress6 = null;
437 tmpfs = null;
438 };
439
440in
441
442{
443 options = {
444
445 boot.isContainer = mkOption {
446 type = types.bool;
447 default = false;
448 description = ''
449 Whether this NixOS machine is a lightweight container running
450 in another NixOS system.
451 '';
452 };
453
454 boot.enableContainers = mkOption {
455 type = types.bool;
456 default = true;
457 description = ''
458 Whether to enable support for NixOS containers. Defaults to true
459 (at no cost if containers are not actually used).
460 '';
461 };
462
463 containers = mkOption {
464 type = types.attrsOf (types.submodule (
465 { config, options, name, ... }:
466 {
467 options = {
468 config = mkOption {
469 description = ''
470 A specification of the desired configuration of this
471 container, as a NixOS module.
472 '';
473 type = lib.mkOptionType {
474 name = "Toplevel NixOS config";
475 merge = loc: defs: (import "${toString config.nixpkgs}/nixos/lib/eval-config.nix" {
476 inherit system;
477 modules =
478 let
479 extraConfig = {
480 _file = "module at ${__curPos.file}:${toString __curPos.line}";
481 config = {
482 boot.isContainer = true;
483 networking.hostName = mkDefault name;
484 networking.useDHCP = false;
485 assertions = [
486 {
487 assertion =
488 (builtins.compareVersions kernelVersion "5.8" <= 0)
489 -> config.privateNetwork
490 -> stringLength name <= 11;
491 message = ''
492 Container name `${name}` is too long: When `privateNetwork` is enabled, container names can
493 not be longer than 11 characters, because the container's interface name is derived from it.
494 You should either make the container name shorter or upgrade to a more recent kernel that
495 supports interface altnames (i.e. at least Linux 5.8 - please see https://github.com/NixOS/nixpkgs/issues/38509
496 for details).
497 '';
498 }
499 ];
500 };
501 };
502 in [ extraConfig ] ++ (map (x: x.value) defs);
503 prefix = [ "containers" name ];
504 }).config;
505 };
506 };
507
508 path = mkOption {
509 type = types.path;
510 example = "/nix/var/nix/profiles/per-container/webserver";
511 description = ''
512 As an alternative to specifying
513 <option>config</option>, you can specify the path to
514 the evaluated NixOS system configuration, typically a
515 symlink to a system profile.
516 '';
517 };
518
519 additionalCapabilities = mkOption {
520 type = types.listOf types.str;
521 default = [];
522 example = [ "CAP_NET_ADMIN" "CAP_MKNOD" ];
523 description = ''
524 Grant additional capabilities to the container. See the
525 capabilities(7) and systemd-nspawn(1) man pages for more
526 information.
527 '';
528 };
529
530 nixpkgs = mkOption {
531 type = types.path;
532 default = pkgs.path;
533 defaultText = literalExpression "pkgs.path";
534 description = ''
535 A path to the nixpkgs that provide the modules, pkgs and lib for evaluating the container.
536
537 To only change the <literal>pkgs</literal> argument used inside the container modules,
538 set the <literal>nixpkgs.*</literal> options in the container <option>config</option>.
539 Setting <literal>config.nixpkgs.pkgs = pkgs</literal> speeds up the container evaluation
540 by reusing the system pkgs, but the <literal>nixpkgs.config</literal> option in the
541 container config is ignored in this case.
542 '';
543 };
544
545 ephemeral = mkOption {
546 type = types.bool;
547 default = false;
548 description = ''
549 Runs container in ephemeral mode with the empty root filesystem at boot.
550 This way container will be bootstrapped from scratch on each boot
551 and will be cleaned up on shutdown leaving no traces behind.
552 Useful for completely stateless, reproducible containers.
553
554 Note that this option might require to do some adjustments to the container configuration,
555 e.g. you might want to set
556 <varname>systemd.network.networks.$interface.dhcpV4Config.ClientIdentifier</varname> to "mac"
557 if you use <varname>macvlans</varname> option.
558 This way dhcp client identifier will be stable between the container restarts.
559
560 Note that the container journal will not be linked to the host if this option is enabled.
561 '';
562 };
563
564 enableTun = mkOption {
565 type = types.bool;
566 default = false;
567 description = ''
568 Allows the container to create and setup tunnel interfaces
569 by granting the <literal>NET_ADMIN</literal> capability and
570 enabling access to <literal>/dev/net/tun</literal>.
571 '';
572 };
573
574 privateNetwork = mkOption {
575 type = types.bool;
576 default = false;
577 description = ''
578 Whether to give the container its own private virtual
579 Ethernet interface. The interface is called
580 <literal>eth0</literal>, and is hooked up to the interface
581 <literal>ve-<replaceable>container-name</replaceable></literal>
582 on the host. If this option is not set, then the
583 container shares the network interfaces of the host,
584 and can bind to any port on any interface.
585 '';
586 };
587
588 interfaces = mkOption {
589 type = types.listOf types.str;
590 default = [];
591 example = [ "eth1" "eth2" ];
592 description = ''
593 The list of interfaces to be moved into the container.
594 '';
595 };
596
597 macvlans = mkOption {
598 type = types.listOf types.str;
599 default = [];
600 example = [ "eth1" "eth2" ];
601 description = ''
602 The list of host interfaces from which macvlans will be
603 created. For each interface specified, a macvlan interface
604 will be created and moved to the container.
605 '';
606 };
607
608 extraVeths = mkOption {
609 type = with types; attrsOf (submodule { options = networkOptions; });
610 default = {};
611 description = ''
612 Extra veth-pairs to be created for the container.
613 '';
614 };
615
616 autoStart = mkOption {
617 type = types.bool;
618 default = false;
619 description = ''
620 Whether the container is automatically started at boot-time.
621 '';
622 };
623
624 timeoutStartSec = mkOption {
625 type = types.str;
626 default = "1min";
627 description = ''
628 Time for the container to start. In case of a timeout,
629 the container processes get killed.
630 See <citerefentry><refentrytitle>systemd.time</refentrytitle>
631 <manvolnum>7</manvolnum></citerefentry>
632 for more information about the format.
633 '';
634 };
635
636 bindMounts = mkOption {
637 type = with types; attrsOf (submodule bindMountOpts);
638 default = {};
639 example = literalExpression ''
640 { "/home" = { hostPath = "/home/alice";
641 isReadOnly = false; };
642 }
643 '';
644
645 description =
646 ''
647 An extra list of directories that is bound to the container.
648 '';
649 };
650
651 allowedDevices = mkOption {
652 type = with types; listOf (submodule allowedDeviceOpts);
653 default = [];
654 example = [ { node = "/dev/net/tun"; modifier = "rw"; } ];
655 description = ''
656 A list of device nodes to which the containers has access to.
657 '';
658 };
659
660 tmpfs = mkOption {
661 type = types.listOf types.str;
662 default = [];
663 example = [ "/var" ];
664 description = ''
665 Mounts a set of tmpfs file systems into the container.
666 Multiple paths can be specified.
667 Valid items must conform to the --tmpfs argument
668 of systemd-nspawn. See systemd-nspawn(1) for details.
669 '';
670 };
671
672 extraFlags = mkOption {
673 type = types.listOf types.str;
674 default = [];
675 example = [ "--drop-capability=CAP_SYS_CHROOT" ];
676 description = ''
677 Extra flags passed to the systemd-nspawn command.
678 See systemd-nspawn(1) for details.
679 '';
680 };
681
682 # Removed option. See `checkAssertion` below for the accompanying error message.
683 pkgs = mkOption { visible = false; };
684 } // networkOptions;
685
686 config = let
687 # Throw an error when removed option `pkgs` is used.
688 # Because this is a submodule we cannot use `mkRemovedOptionModule` or option `assertions`.
689 optionPath = "containers.${name}.pkgs";
690 files = showFiles options.pkgs.files;
691 checkAssertion = if options.pkgs.isDefined then throw ''
692 The option definition `${optionPath}' in ${files} no longer has any effect; please remove it.
693
694 Alternatively, you can use the following options:
695 - containers.${name}.nixpkgs
696 This sets the nixpkgs (and thereby the modules, pkgs and lib) that
697 are used for evaluating the container.
698
699 - containers.${name}.config.nixpkgs.pkgs
700 This only sets the `pkgs` argument used inside the container modules.
701 ''
702 else null;
703 in {
704 path = builtins.seq checkAssertion
705 mkIf options.config.isDefined config.config.system.build.toplevel;
706 };
707 }));
708
709 default = {};
710 example = literalExpression
711 ''
712 { webserver =
713 { path = "/nix/var/nix/profiles/webserver";
714 };
715 database =
716 { config =
717 { config, pkgs, ... }:
718 { services.postgresql.enable = true;
719 services.postgresql.package = pkgs.postgresql_9_6;
720
721 system.stateVersion = "17.03";
722 };
723 };
724 }
725 '';
726 description = ''
727 A set of NixOS system configurations to be run as lightweight
728 containers. Each container appears as a service
729 <literal>container-<replaceable>name</replaceable></literal>
730 on the host system, allowing it to be started and stopped via
731 <command>systemctl</command>.
732 '';
733 };
734
735 };
736
737
738 config = mkIf (config.boot.enableContainers) (let
739
740 unit = {
741 description = "Container '%i'";
742
743 unitConfig.RequiresMountsFor = "/var/lib/containers/%i";
744
745 path = [ pkgs.iproute2 ];
746
747 environment = {
748 root = "/var/lib/containers/%i";
749 INSTANCE = "%i";
750 };
751
752 preStart = preStartScript dummyConfig;
753
754 script = startScript dummyConfig;
755
756 postStart = postStartScript dummyConfig;
757
758 restartIfChanged = false;
759
760 serviceConfig = serviceDirectives dummyConfig;
761 };
762 in {
763 systemd.targets.multi-user.wants = [ "machines.target" ];
764
765 systemd.services = listToAttrs (filter (x: x.value != null) (
766 # The generic container template used by imperative containers
767 [{ name = "container@"; value = unit; }]
768 # declarative containers
769 ++ (mapAttrsToList (name: cfg: nameValuePair "container@${name}" (let
770 containerConfig = cfg // (
771 if cfg.enableTun then
772 {
773 allowedDevices = cfg.allowedDevices
774 ++ [ { node = "/dev/net/tun"; modifier = "rw"; } ];
775 additionalCapabilities = cfg.additionalCapabilities
776 ++ [ "CAP_NET_ADMIN" ];
777 }
778 else {});
779 in
780 recursiveUpdate unit {
781 preStart = preStartScript containerConfig;
782 script = startScript containerConfig;
783 postStart = postStartScript containerConfig;
784 serviceConfig = serviceDirectives containerConfig;
785 unitConfig.RequiresMountsFor = lib.optional (!containerConfig.ephemeral) "/var/lib/containers/%i";
786 environment.root = if containerConfig.ephemeral then "/run/containers/%i" else "/var/lib/containers/%i";
787 } // (
788 if containerConfig.autoStart then
789 {
790 wantedBy = [ "machines.target" ];
791 wants = [ "network.target" ];
792 after = [ "network.target" ];
793 restartTriggers = [
794 containerConfig.path
795 config.environment.etc."containers/${name}.conf".source
796 ];
797 restartIfChanged = true;
798 }
799 else {})
800 )) config.containers)
801 ));
802
803 # Generate a configuration file in /etc/containers for each
804 # container so that container@.target can get the container
805 # configuration.
806 environment.etc =
807 let mkPortStr = p: p.protocol + ":" + (toString p.hostPort) + ":" + (if p.containerPort == null then toString p.hostPort else toString p.containerPort);
808 in mapAttrs' (name: cfg: nameValuePair "containers/${name}.conf"
809 { text =
810 ''
811 SYSTEM_PATH=${cfg.path}
812 ${optionalString cfg.privateNetwork ''
813 PRIVATE_NETWORK=1
814 ${optionalString (cfg.hostBridge != null) ''
815 HOST_BRIDGE=${cfg.hostBridge}
816 ''}
817 ${optionalString (length cfg.forwardPorts > 0) ''
818 HOST_PORT=${concatStringsSep "," (map mkPortStr cfg.forwardPorts)}
819 ''}
820 ${optionalString (cfg.hostAddress != null) ''
821 HOST_ADDRESS=${cfg.hostAddress}
822 ''}
823 ${optionalString (cfg.hostAddress6 != null) ''
824 HOST_ADDRESS6=${cfg.hostAddress6}
825 ''}
826 ${optionalString (cfg.localAddress != null) ''
827 LOCAL_ADDRESS=${cfg.localAddress}
828 ''}
829 ${optionalString (cfg.localAddress6 != null) ''
830 LOCAL_ADDRESS6=${cfg.localAddress6}
831 ''}
832 ''}
833 INTERFACES="${toString cfg.interfaces}"
834 MACVLANS="${toString cfg.macvlans}"
835 ${optionalString cfg.autoStart ''
836 AUTO_START=1
837 ''}
838 EXTRA_NSPAWN_FLAGS="${mkBindFlags cfg.bindMounts +
839 optionalString (cfg.extraFlags != [])
840 (" " + concatStringsSep " " cfg.extraFlags)}"
841 '';
842 }) config.containers;
843
844 # Generate /etc/hosts entries for the containers.
845 networking.extraHosts = concatStrings (mapAttrsToList (name: cfg: optionalString (cfg.localAddress != null)
846 ''
847 ${head (splitString "/" cfg.localAddress)} ${name}.containers
848 '') config.containers);
849
850 networking.dhcpcd.denyInterfaces = [ "ve-*" "vb-*" ];
851
852 services.udev.extraRules = optionalString config.networking.networkmanager.enable ''
853 # Don't manage interfaces created by nixos-container.
854 ENV{INTERFACE}=="v[eb]-*", ENV{NM_UNMANAGED}="1"
855 '';
856
857 environment.systemPackages = [ pkgs.nixos-container ];
858
859 boot.kernelModules = [
860 "bridge"
861 "macvlan"
862 "tap"
863 "tun"
864 ];
865 });
866}