1{ config, lib, pkgs, ... }@host:
2
3with lib;
4
5let
6
7 configurationPrefix = optionalString (versionAtLeast config.system.stateVersion "22.05") "nixos-";
8 configurationDirectoryName = "${configurationPrefix}containers";
9 configurationDirectory = "/etc/${configurationDirectoryName}";
10 stateDirectory = "/var/lib/${configurationPrefix}containers";
11
12 nixos-container = pkgs.nixos-container.override {
13 inherit stateDirectory configurationDirectory;
14 };
15
16 # The container's init script, a small wrapper around the regular
17 # NixOS stage-2 init script.
18 containerInit = (cfg:
19 let
20 renderExtraVeth = (name: cfg:
21 ''
22 echo "Bringing ${name} up"
23 ip link set dev ${name} up
24 ${optionalString (cfg.localAddress != null) ''
25 echo "Setting ip for ${name}"
26 ip addr add ${cfg.localAddress} dev ${name}
27 ''}
28 ${optionalString (cfg.localAddress6 != null) ''
29 echo "Setting ip6 for ${name}"
30 ip -6 addr add ${cfg.localAddress6} dev ${name}
31 ''}
32 ${optionalString (cfg.hostAddress != null) ''
33 echo "Setting route to host for ${name}"
34 ip route add ${cfg.hostAddress} dev ${name}
35 ''}
36 ${optionalString (cfg.hostAddress6 != null) ''
37 echo "Setting route6 to host for ${name}"
38 ip -6 route add ${cfg.hostAddress6} dev ${name}
39 ''}
40 ''
41 );
42 in
43 pkgs.writeScript "container-init"
44 ''
45 #! ${pkgs.runtimeShell} -e
46
47 # Exit early if we're asked to shut down.
48 trap "exit 0" SIGRTMIN+3
49
50 # Initialise the container side of the veth pair.
51 if [ -n "$HOST_ADDRESS" ] || [ -n "$HOST_ADDRESS6" ] ||
52 [ -n "$LOCAL_ADDRESS" ] || [ -n "$LOCAL_ADDRESS6" ] ||
53 [ -n "$HOST_BRIDGE" ]; then
54 ip link set host0 name eth0
55 ip link set dev eth0 up
56
57 if [ -n "$LOCAL_ADDRESS" ]; then
58 ip addr add $LOCAL_ADDRESS dev eth0
59 fi
60 if [ -n "$LOCAL_ADDRESS6" ]; then
61 ip -6 addr add $LOCAL_ADDRESS6 dev eth0
62 fi
63 if [ -n "$HOST_ADDRESS" ]; then
64 ip route add $HOST_ADDRESS dev eth0
65 ip route add default via $HOST_ADDRESS
66 fi
67 if [ -n "$HOST_ADDRESS6" ]; then
68 ip -6 route add $HOST_ADDRESS6 dev eth0
69 ip -6 route add default via $HOST_ADDRESS6
70 fi
71 fi
72
73 ${concatStringsSep "\n" (mapAttrsToList renderExtraVeth cfg.extraVeths)}
74
75 # Start the regular stage 2 script.
76 # We source instead of exec to not lose an early stop signal, which is
77 # also the only _reliable_ shutdown signal we have since early stop
78 # does not execute ExecStop* commands.
79 set +e
80 . "$1"
81 ''
82 );
83
84 nspawnExtraVethArgs = (name: cfg: "--network-veth-extra=${name}");
85
86 startScript = cfg:
87 ''
88 mkdir -p -m 0755 "$root/etc" "$root/var/lib"
89 mkdir -p -m 0700 "$root/var/lib/private" "$root/root" /run/nixos-containers
90 if ! [ -e "$root/etc/os-release" ]; then
91 touch "$root/etc/os-release"
92 fi
93
94 if ! [ -e "$root/etc/machine-id" ]; then
95 touch "$root/etc/machine-id"
96 fi
97
98 mkdir -p -m 0755 \
99 "/nix/var/nix/profiles/per-container/$INSTANCE" \
100 "/nix/var/nix/gcroots/per-container/$INSTANCE"
101
102 cp --remove-destination /etc/resolv.conf "$root/etc/resolv.conf"
103
104 if [ "$PRIVATE_NETWORK" = 1 ]; then
105 extraFlags+=" --private-network"
106 fi
107
108 if [ -n "$HOST_ADDRESS" ] || [ -n "$LOCAL_ADDRESS" ] ||
109 [ -n "$HOST_ADDRESS6" ] || [ -n "$LOCAL_ADDRESS6" ]; then
110 extraFlags+=" --network-veth"
111 fi
112
113 if [ -n "$HOST_PORT" ]; then
114 OIFS=$IFS
115 IFS=","
116 for i in $HOST_PORT
117 do
118 extraFlags+=" --port=$i"
119 done
120 IFS=$OIFS
121 fi
122
123 if [ -n "$HOST_BRIDGE" ]; then
124 extraFlags+=" --network-bridge=$HOST_BRIDGE"
125 fi
126
127 extraFlags+=" ${concatStringsSep " " (mapAttrsToList nspawnExtraVethArgs cfg.extraVeths)}"
128
129 for iface in $INTERFACES; do
130 extraFlags+=" --network-interface=$iface"
131 done
132
133 for iface in $MACVLANS; do
134 extraFlags+=" --network-macvlan=$iface"
135 done
136
137 # If the host is 64-bit and the container is 32-bit, add a
138 # --personality flag.
139 ${optionalString (pkgs.stdenv.hostPlatform.system == "x86_64-linux") ''
140 if [ "$(< ''${SYSTEM_PATH:-/nix/var/nix/profiles/per-container/$INSTANCE/system}/system)" = i686-linux ]; then
141 extraFlags+=" --personality=x86"
142 fi
143 ''}
144
145 export SYSTEMD_NSPAWN_UNIFIED_HIERARCHY=1
146
147 # Run systemd-nspawn without startup notification (we'll
148 # wait for the container systemd to signal readiness)
149 # Kill signal handling means systemd-nspawn will pass a system-halt signal
150 # to the container systemd when it receives SIGTERM for container shutdown;
151 # containerInit and stage2 have to handle this as well.
152 exec ${config.systemd.package}/bin/systemd-nspawn \
153 --keep-unit \
154 -M "$INSTANCE" -D "$root" $extraFlags \
155 $EXTRA_NSPAWN_FLAGS \
156 --notify-ready=yes \
157 --kill-signal=SIGRTMIN+3 \
158 --bind-ro=/nix/store \
159 --bind-ro=/nix/var/nix/db \
160 --bind-ro=/nix/var/nix/daemon-socket \
161 --bind="/nix/var/nix/profiles/per-container/$INSTANCE:/nix/var/nix/profiles" \
162 --bind="/nix/var/nix/gcroots/per-container/$INSTANCE:/nix/var/nix/gcroots" \
163 ${optionalString (!cfg.ephemeral) "--link-journal=try-guest"} \
164 --setenv PRIVATE_NETWORK="$PRIVATE_NETWORK" \
165 --setenv HOST_BRIDGE="$HOST_BRIDGE" \
166 --setenv HOST_ADDRESS="$HOST_ADDRESS" \
167 --setenv LOCAL_ADDRESS="$LOCAL_ADDRESS" \
168 --setenv HOST_ADDRESS6="$HOST_ADDRESS6" \
169 --setenv LOCAL_ADDRESS6="$LOCAL_ADDRESS6" \
170 --setenv HOST_PORT="$HOST_PORT" \
171 --setenv PATH="$PATH" \
172 ${optionalString cfg.ephemeral "--ephemeral"} \
173 ${optionalString (cfg.additionalCapabilities != null && cfg.additionalCapabilities != [])
174 ''--capability="${concatStringsSep "," cfg.additionalCapabilities}"''
175 } \
176 ${optionalString (cfg.tmpfs != null && cfg.tmpfs != [])
177 ''--tmpfs=${concatStringsSep " --tmpfs=" cfg.tmpfs}''
178 } \
179 ${containerInit cfg} "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/init"
180 '';
181
182 preStartScript = cfg:
183 ''
184 # Clean up existing machined registration and interfaces.
185 machinectl terminate "$INSTANCE" 2> /dev/null || true
186
187 if [ -n "$HOST_ADDRESS" ] || [ -n "$LOCAL_ADDRESS" ] ||
188 [ -n "$HOST_ADDRESS6" ] || [ -n "$LOCAL_ADDRESS6" ]; then
189 ip link del dev "ve-$INSTANCE" 2> /dev/null || true
190 ip link del dev "vb-$INSTANCE" 2> /dev/null || true
191 fi
192
193 ${concatStringsSep "\n" (
194 mapAttrsToList (name: cfg:
195 "ip link del dev ${name} 2> /dev/null || true "
196 ) cfg.extraVeths
197 )}
198 '';
199
200 postStartScript = (cfg:
201 let
202 ipcall = cfg: ipcmd: variable: attribute:
203 if cfg.${attribute} == null then
204 ''
205 if [ -n "${variable}" ]; then
206 ${ipcmd} add ${variable} dev $ifaceHost
207 fi
208 ''
209 else
210 "${ipcmd} add ${cfg.${attribute}} dev $ifaceHost";
211 renderExtraVeth = name: cfg:
212 if cfg.hostBridge != null then
213 ''
214 # Add ${name} to bridge ${cfg.hostBridge}
215 ip link set dev ${name} master ${cfg.hostBridge} up
216 ''
217 else
218 ''
219 echo "Bring ${name} up"
220 ip link set dev ${name} up
221 # Set IPs and routes for ${name}
222 ${optionalString (cfg.hostAddress != null) ''
223 ip addr add ${cfg.hostAddress} dev ${name}
224 ''}
225 ${optionalString (cfg.hostAddress6 != null) ''
226 ip -6 addr add ${cfg.hostAddress6} dev ${name}
227 ''}
228 ${optionalString (cfg.localAddress != null) ''
229 ip route add ${cfg.localAddress} dev ${name}
230 ''}
231 ${optionalString (cfg.localAddress6 != null) ''
232 ip -6 route add ${cfg.localAddress6} dev ${name}
233 ''}
234 '';
235 in
236 ''
237 if [ -n "$HOST_ADDRESS" ] || [ -n "$LOCAL_ADDRESS" ] ||
238 [ -n "$HOST_ADDRESS6" ] || [ -n "$LOCAL_ADDRESS6" ]; then
239 if [ -z "$HOST_BRIDGE" ]; then
240 ifaceHost=ve-$INSTANCE
241 ip link set dev $ifaceHost up
242
243 ${ipcall cfg "ip addr" "$HOST_ADDRESS" "hostAddress"}
244 ${ipcall cfg "ip -6 addr" "$HOST_ADDRESS6" "hostAddress6"}
245 ${ipcall cfg "ip route" "$LOCAL_ADDRESS" "localAddress"}
246 ${ipcall cfg "ip -6 route" "$LOCAL_ADDRESS6" "localAddress6"}
247 fi
248 fi
249 ${concatStringsSep "\n" (mapAttrsToList renderExtraVeth cfg.extraVeths)}
250 ''
251 );
252
253 serviceDirectives = cfg: {
254 ExecReload = pkgs.writeScript "reload-container"
255 ''
256 #! ${pkgs.runtimeShell} -e
257 ${nixos-container}/bin/nixos-container run "$INSTANCE" -- \
258 bash --login -c "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/bin/switch-to-configuration test"
259 '';
260
261 SyslogIdentifier = "container %i";
262
263 EnvironmentFile = "-${configurationDirectory}/%i.conf";
264
265 Type = "notify";
266
267 RuntimeDirectory = lib.optional cfg.ephemeral "${configurationDirectoryName}/%i";
268
269 # Note that on reboot, systemd-nspawn returns 133, so this
270 # unit will be restarted. On poweroff, it returns 0, so the
271 # unit won't be restarted.
272 RestartForceExitStatus = "133";
273 SuccessExitStatus = "133";
274
275 # Some containers take long to start
276 # especially when you automatically start many at once
277 TimeoutStartSec = cfg.timeoutStartSec;
278
279 Restart = "on-failure";
280
281 Slice = "machine.slice";
282 Delegate = true;
283
284 # We rely on systemd-nspawn turning a SIGTERM to itself into a shutdown
285 # signal (SIGRTMIN+3) for the inner container.
286 KillMode = "mixed";
287 KillSignal = "TERM";
288
289 DevicePolicy = "closed";
290 DeviceAllow = map (d: "${d.node} ${d.modifier}") cfg.allowedDevices;
291 };
292
293 kernelVersion = config.boot.kernelPackages.kernel.version;
294
295 bindMountOpts = { name, ... }: {
296
297 options = {
298 mountPoint = mkOption {
299 example = "/mnt/usb";
300 type = types.str;
301 description = lib.mdDoc "Mount point on the container file system.";
302 };
303 hostPath = mkOption {
304 default = null;
305 example = "/home/alice";
306 type = types.nullOr types.str;
307 description = lib.mdDoc "Location of the host path to be mounted.";
308 };
309 isReadOnly = mkOption {
310 default = true;
311 type = types.bool;
312 description = lib.mdDoc "Determine whether the mounted path will be accessed in read-only mode.";
313 };
314 };
315
316 config = {
317 mountPoint = mkDefault name;
318 };
319
320 };
321
322 allowedDeviceOpts = { ... }: {
323 options = {
324 node = mkOption {
325 example = "/dev/net/tun";
326 type = types.str;
327 description = lib.mdDoc "Path to device node";
328 };
329 modifier = mkOption {
330 example = "rw";
331 type = types.str;
332 description = lib.mdDoc ''
333 Device node access modifier. Takes a combination
334 `r` (read), `w` (write), and
335 `m` (mknod). See the
336 `systemd.resource-control(5)` man page for more
337 information.'';
338 };
339 };
340 };
341
342 mkBindFlag = d:
343 let flagPrefix = if d.isReadOnly then " --bind-ro=" else " --bind=";
344 mountstr = if d.hostPath != null then "${d.hostPath}:${d.mountPoint}" else "${d.mountPoint}";
345 in flagPrefix + mountstr ;
346
347 mkBindFlags = bs: concatMapStrings mkBindFlag (lib.attrValues bs);
348
349 networkOptions = {
350 hostBridge = mkOption {
351 type = types.nullOr types.str;
352 default = null;
353 example = "br0";
354 description = lib.mdDoc ''
355 Put the host-side of the veth-pair into the named bridge.
356 Only one of hostAddress* or hostBridge can be given.
357 '';
358 };
359
360 forwardPorts = mkOption {
361 type = types.listOf (types.submodule {
362 options = {
363 protocol = mkOption {
364 type = types.str;
365 default = "tcp";
366 description = lib.mdDoc "The protocol specifier for port forwarding between host and container";
367 };
368 hostPort = mkOption {
369 type = types.int;
370 description = lib.mdDoc "Source port of the external interface on host";
371 };
372 containerPort = mkOption {
373 type = types.nullOr types.int;
374 default = null;
375 description = lib.mdDoc "Target port of container";
376 };
377 };
378 });
379 default = [];
380 example = [ { protocol = "tcp"; hostPort = 8080; containerPort = 80; } ];
381 description = lib.mdDoc ''
382 List of forwarded ports from host to container. Each forwarded port
383 is specified by protocol, hostPort and containerPort. By default,
384 protocol is tcp and hostPort and containerPort are assumed to be
385 the same if containerPort is not explicitly given.
386 '';
387 };
388
389
390 hostAddress = mkOption {
391 type = types.nullOr types.str;
392 default = null;
393 example = "10.231.136.1";
394 description = lib.mdDoc ''
395 The IPv4 address assigned to the host interface.
396 (Not used when hostBridge is set.)
397 '';
398 };
399
400 hostAddress6 = mkOption {
401 type = types.nullOr types.str;
402 default = null;
403 example = "fc00::1";
404 description = lib.mdDoc ''
405 The IPv6 address assigned to the host interface.
406 (Not used when hostBridge is set.)
407 '';
408 };
409
410 localAddress = mkOption {
411 type = types.nullOr types.str;
412 default = null;
413 example = "10.231.136.2";
414 description = lib.mdDoc ''
415 The IPv4 address assigned to the interface in the container.
416 If a hostBridge is used, this should be given with netmask to access
417 the whole network. Otherwise the default netmask is /32 and routing is
418 set up from localAddress to hostAddress and back.
419 '';
420 };
421
422 localAddress6 = mkOption {
423 type = types.nullOr types.str;
424 default = null;
425 example = "fc00::2";
426 description = lib.mdDoc ''
427 The IPv6 address assigned to the interface in the container.
428 If a hostBridge is used, this should be given with netmask to access
429 the whole network. Otherwise the default netmask is /128 and routing is
430 set up from localAddress6 to hostAddress6 and back.
431 '';
432 };
433
434 };
435
436 dummyConfig =
437 {
438 extraVeths = {};
439 additionalCapabilities = [];
440 ephemeral = false;
441 timeoutStartSec = "1min";
442 allowedDevices = [];
443 hostAddress = null;
444 hostAddress6 = null;
445 localAddress = null;
446 localAddress6 = null;
447 tmpfs = null;
448 };
449
450in
451
452{
453 options = {
454
455 boot.isContainer = mkOption {
456 type = types.bool;
457 default = false;
458 description = lib.mdDoc ''
459 Whether this NixOS machine is a lightweight container running
460 in another NixOS system.
461 '';
462 };
463
464 boot.enableContainers = mkOption {
465 type = types.bool;
466 default = true;
467 description = lib.mdDoc ''
468 Whether to enable support for NixOS containers. Defaults to true
469 (at no cost if containers are not actually used).
470 '';
471 };
472
473 containers = mkOption {
474 type = types.attrsOf (types.submodule (
475 { config, options, name, ... }:
476 {
477 options = {
478 config = mkOption {
479 description = lib.mdDoc ''
480 A specification of the desired configuration of this
481 container, as a NixOS module.
482 '';
483 type = lib.mkOptionType {
484 name = "Toplevel NixOS config";
485 merge = loc: defs: (import "${toString config.nixpkgs}/nixos/lib/eval-config.nix" {
486 modules =
487 let
488 extraConfig = { options, ... }: {
489 _file = "module at ${__curPos.file}:${toString __curPos.line}";
490 config = {
491 nixpkgs = if options.nixpkgs?hostPlatform && host.options.nixpkgs.hostPlatform.isDefined
492 then { inherit (host.config.nixpkgs) hostPlatform; }
493 else { inherit (host.config.nixpkgs) localSystem; }
494 ;
495 boot.isContainer = true;
496 networking.hostName = mkDefault name;
497 networking.useDHCP = false;
498 assertions = [
499 {
500 assertion =
501 (builtins.compareVersions kernelVersion "5.8" <= 0)
502 -> config.privateNetwork
503 -> stringLength name <= 11;
504 message = ''
505 Container name `${name}` is too long: When `privateNetwork` is enabled, container names can
506 not be longer than 11 characters, because the container's interface name is derived from it.
507 You should either make the container name shorter or upgrade to a more recent kernel that
508 supports interface altnames (i.e. at least Linux 5.8 - please see https://github.com/NixOS/nixpkgs/issues/38509
509 for details).
510 '';
511 }
512 ];
513 };
514 };
515 in [ extraConfig ] ++ (map (x: x.value) defs);
516 prefix = [ "containers" name ];
517 inherit (config) specialArgs;
518
519 # The system is inherited from the host above.
520 # Set it to null, to remove the "legacy" entrypoint's non-hermetic default.
521 system = null;
522 }).config;
523 };
524 };
525
526 path = mkOption {
527 type = types.path;
528 example = "/nix/var/nix/profiles/per-container/webserver";
529 description = lib.mdDoc ''
530 As an alternative to specifying
531 {option}`config`, you can specify the path to
532 the evaluated NixOS system configuration, typically a
533 symlink to a system profile.
534 '';
535 };
536
537 additionalCapabilities = mkOption {
538 type = types.listOf types.str;
539 default = [];
540 example = [ "CAP_NET_ADMIN" "CAP_MKNOD" ];
541 description = lib.mdDoc ''
542 Grant additional capabilities to the container. See the
543 capabilities(7) and systemd-nspawn(1) man pages for more
544 information.
545 '';
546 };
547
548 nixpkgs = mkOption {
549 type = types.path;
550 default = pkgs.path;
551 defaultText = literalExpression "pkgs.path";
552 description = lib.mdDoc ''
553 A path to the nixpkgs that provide the modules, pkgs and lib for evaluating the container.
554
555 To only change the `pkgs` argument used inside the container modules,
556 set the `nixpkgs.*` options in the container {option}`config`.
557 Setting `config.nixpkgs.pkgs = pkgs` speeds up the container evaluation
558 by reusing the system pkgs, but the `nixpkgs.config` option in the
559 container config is ignored in this case.
560 '';
561 };
562
563 specialArgs = mkOption {
564 type = types.attrsOf types.unspecified;
565 default = {};
566 description = lib.mdDoc ''
567 A set of special arguments to be passed to NixOS modules.
568 This will be merged into the `specialArgs` used to evaluate
569 the NixOS configurations.
570 '';
571 };
572
573 ephemeral = mkOption {
574 type = types.bool;
575 default = false;
576 description = lib.mdDoc ''
577 Runs container in ephemeral mode with the empty root filesystem at boot.
578 This way container will be bootstrapped from scratch on each boot
579 and will be cleaned up on shutdown leaving no traces behind.
580 Useful for completely stateless, reproducible containers.
581
582 Note that this option might require to do some adjustments to the container configuration,
583 e.g. you might want to set
584 {var}`systemd.network.networks.$interface.dhcpV4Config.ClientIdentifier` to "mac"
585 if you use {var}`macvlans` option.
586 This way dhcp client identifier will be stable between the container restarts.
587
588 Note that the container journal will not be linked to the host if this option is enabled.
589 '';
590 };
591
592 enableTun = mkOption {
593 type = types.bool;
594 default = false;
595 description = lib.mdDoc ''
596 Allows the container to create and setup tunnel interfaces
597 by granting the `NET_ADMIN` capability and
598 enabling access to `/dev/net/tun`.
599 '';
600 };
601
602 privateNetwork = mkOption {
603 type = types.bool;
604 default = false;
605 description = lib.mdDoc ''
606 Whether to give the container its own private virtual
607 Ethernet interface. The interface is called
608 `eth0`, and is hooked up to the interface
609 `ve-«container-name»`
610 on the host. If this option is not set, then the
611 container shares the network interfaces of the host,
612 and can bind to any port on any interface.
613 '';
614 };
615
616 interfaces = mkOption {
617 type = types.listOf types.str;
618 default = [];
619 example = [ "eth1" "eth2" ];
620 description = lib.mdDoc ''
621 The list of interfaces to be moved into the container.
622 '';
623 };
624
625 macvlans = mkOption {
626 type = types.listOf types.str;
627 default = [];
628 example = [ "eth1" "eth2" ];
629 description = lib.mdDoc ''
630 The list of host interfaces from which macvlans will be
631 created. For each interface specified, a macvlan interface
632 will be created and moved to the container.
633 '';
634 };
635
636 extraVeths = mkOption {
637 type = with types; attrsOf (submodule { options = networkOptions; });
638 default = {};
639 description = lib.mdDoc ''
640 Extra veth-pairs to be created for the container.
641 '';
642 };
643
644 autoStart = mkOption {
645 type = types.bool;
646 default = false;
647 description = lib.mdDoc ''
648 Whether the container is automatically started at boot-time.
649 '';
650 };
651
652 restartIfChanged = mkOption {
653 type = types.bool;
654 default = true;
655 description = lib.mdDoc ''
656 Whether the container should be restarted during a NixOS
657 configuration switch if its definition has changed.
658 '';
659 };
660
661 timeoutStartSec = mkOption {
662 type = types.str;
663 default = "1min";
664 description = lib.mdDoc ''
665 Time for the container to start. In case of a timeout,
666 the container processes get killed.
667 See {manpage}`systemd.time(7)`
668 for more information about the format.
669 '';
670 };
671
672 bindMounts = mkOption {
673 type = with types; attrsOf (submodule bindMountOpts);
674 default = {};
675 example = literalExpression ''
676 { "/home" = { hostPath = "/home/alice";
677 isReadOnly = false; };
678 }
679 '';
680
681 description =
682 lib.mdDoc ''
683 An extra list of directories that is bound to the container.
684 '';
685 };
686
687 allowedDevices = mkOption {
688 type = with types; listOf (submodule allowedDeviceOpts);
689 default = [];
690 example = [ { node = "/dev/net/tun"; modifier = "rw"; } ];
691 description = lib.mdDoc ''
692 A list of device nodes to which the containers has access to.
693 '';
694 };
695
696 tmpfs = mkOption {
697 type = types.listOf types.str;
698 default = [];
699 example = [ "/var" ];
700 description = lib.mdDoc ''
701 Mounts a set of tmpfs file systems into the container.
702 Multiple paths can be specified.
703 Valid items must conform to the --tmpfs argument
704 of systemd-nspawn. See systemd-nspawn(1) for details.
705 '';
706 };
707
708 extraFlags = mkOption {
709 type = types.listOf types.str;
710 default = [];
711 example = [ "--drop-capability=CAP_SYS_CHROOT" ];
712 description = lib.mdDoc ''
713 Extra flags passed to the systemd-nspawn command.
714 See systemd-nspawn(1) for details.
715 '';
716 };
717
718 # Removed option. See `checkAssertion` below for the accompanying error message.
719 pkgs = mkOption { visible = false; };
720 } // networkOptions;
721
722 config = let
723 # Throw an error when removed option `pkgs` is used.
724 # Because this is a submodule we cannot use `mkRemovedOptionModule` or option `assertions`.
725 optionPath = "containers.${name}.pkgs";
726 files = showFiles options.pkgs.files;
727 checkAssertion = if options.pkgs.isDefined then throw ''
728 The option definition `${optionPath}' in ${files} no longer has any effect; please remove it.
729
730 Alternatively, you can use the following options:
731 - containers.${name}.nixpkgs
732 This sets the nixpkgs (and thereby the modules, pkgs and lib) that
733 are used for evaluating the container.
734
735 - containers.${name}.config.nixpkgs.pkgs
736 This only sets the `pkgs` argument used inside the container modules.
737 ''
738 else null;
739 in {
740 path = builtins.seq checkAssertion
741 mkIf options.config.isDefined config.config.system.build.toplevel;
742 };
743 }));
744
745 default = {};
746 example = literalExpression
747 ''
748 { webserver =
749 { path = "/nix/var/nix/profiles/webserver";
750 };
751 database =
752 { config =
753 { config, pkgs, ... }:
754 { services.postgresql.enable = true;
755 services.postgresql.package = pkgs.postgresql_14;
756
757 system.stateVersion = "${lib.trivial.release}";
758 };
759 };
760 }
761 '';
762 description = lib.mdDoc ''
763 A set of NixOS system configurations to be run as lightweight
764 containers. Each container appears as a service
765 `container-«name»`
766 on the host system, allowing it to be started and stopped via
767 {command}`systemctl`.
768 '';
769 };
770
771 };
772
773
774 config = mkIf (config.boot.enableContainers) (let
775
776 unit = {
777 description = "Container '%i'";
778
779 unitConfig.RequiresMountsFor = "${stateDirectory}/%i";
780
781 path = [ pkgs.iproute2 ];
782
783 environment = {
784 root = "${stateDirectory}/%i";
785 INSTANCE = "%i";
786 };
787
788 preStart = preStartScript dummyConfig;
789
790 script = startScript dummyConfig;
791
792 postStart = postStartScript dummyConfig;
793
794 restartIfChanged = false;
795
796 serviceConfig = serviceDirectives dummyConfig;
797 };
798 in {
799 warnings =
800 (optional (config.virtualisation.containers.enable && versionOlder config.system.stateVersion "22.05") ''
801 Enabling both boot.enableContainers & virtualisation.containers on system.stateVersion < 22.05 is unsupported.
802 '');
803
804 systemd.targets.multi-user.wants = [ "machines.target" ];
805
806 systemd.services = listToAttrs (filter (x: x.value != null) (
807 # The generic container template used by imperative containers
808 [{ name = "container@"; value = unit; }]
809 # declarative containers
810 ++ (mapAttrsToList (name: cfg: nameValuePair "container@${name}" (let
811 containerConfig = cfg // (
812 optionalAttrs cfg.enableTun
813 {
814 allowedDevices = cfg.allowedDevices
815 ++ [ { node = "/dev/net/tun"; modifier = "rw"; } ];
816 additionalCapabilities = cfg.additionalCapabilities
817 ++ [ "CAP_NET_ADMIN" ];
818 }
819 );
820 in
821 recursiveUpdate unit {
822 preStart = preStartScript containerConfig;
823 script = startScript containerConfig;
824 postStart = postStartScript containerConfig;
825 serviceConfig = serviceDirectives containerConfig;
826 unitConfig.RequiresMountsFor = lib.optional (!containerConfig.ephemeral) "${stateDirectory}/%i";
827 environment.root = if containerConfig.ephemeral then "/run/nixos-containers/%i" else "${stateDirectory}/%i";
828 } // (
829 optionalAttrs containerConfig.autoStart
830 {
831 wantedBy = [ "machines.target" ];
832 wants = [ "network.target" ];
833 after = [ "network.target" ];
834 restartTriggers = [
835 containerConfig.path
836 config.environment.etc."${configurationDirectoryName}/${name}.conf".source
837 ];
838 restartIfChanged = containerConfig.restartIfChanged;
839 }
840 )
841 )) config.containers)
842 ));
843
844 # Generate a configuration file in /etc/nixos-containers for each
845 # container so that container@.target can get the container
846 # configuration.
847 environment.etc =
848 let mkPortStr = p: p.protocol + ":" + (toString p.hostPort) + ":" + (if p.containerPort == null then toString p.hostPort else toString p.containerPort);
849 in mapAttrs' (name: cfg: nameValuePair "${configurationDirectoryName}/${name}.conf"
850 { text =
851 ''
852 SYSTEM_PATH=${cfg.path}
853 ${optionalString cfg.privateNetwork ''
854 PRIVATE_NETWORK=1
855 ${optionalString (cfg.hostBridge != null) ''
856 HOST_BRIDGE=${cfg.hostBridge}
857 ''}
858 ${optionalString (length cfg.forwardPorts > 0) ''
859 HOST_PORT=${concatStringsSep "," (map mkPortStr cfg.forwardPorts)}
860 ''}
861 ${optionalString (cfg.hostAddress != null) ''
862 HOST_ADDRESS=${cfg.hostAddress}
863 ''}
864 ${optionalString (cfg.hostAddress6 != null) ''
865 HOST_ADDRESS6=${cfg.hostAddress6}
866 ''}
867 ${optionalString (cfg.localAddress != null) ''
868 LOCAL_ADDRESS=${cfg.localAddress}
869 ''}
870 ${optionalString (cfg.localAddress6 != null) ''
871 LOCAL_ADDRESS6=${cfg.localAddress6}
872 ''}
873 ''}
874 INTERFACES="${toString cfg.interfaces}"
875 MACVLANS="${toString cfg.macvlans}"
876 ${optionalString cfg.autoStart ''
877 AUTO_START=1
878 ''}
879 EXTRA_NSPAWN_FLAGS="${mkBindFlags cfg.bindMounts +
880 optionalString (cfg.extraFlags != [])
881 (" " + concatStringsSep " " cfg.extraFlags)}"
882 '';
883 }) config.containers;
884
885 # Generate /etc/hosts entries for the containers.
886 networking.extraHosts = concatStrings (mapAttrsToList (name: cfg: optionalString (cfg.localAddress != null)
887 ''
888 ${head (splitString "/" cfg.localAddress)} ${name}.containers
889 '') config.containers);
890
891 networking.dhcpcd.denyInterfaces = [ "ve-*" "vb-*" ];
892
893 services.udev.extraRules = optionalString config.networking.networkmanager.enable ''
894 # Don't manage interfaces created by nixos-container.
895 ENV{INTERFACE}=="v[eb]-*", ENV{NM_UNMANAGED}="1"
896 '';
897
898 environment.systemPackages = [
899 nixos-container
900 ];
901
902 boot.kernelModules = [
903 "bridge"
904 "macvlan"
905 "tap"
906 "tun"
907 ];
908 });
909
910 meta.buildDocsInSandbox = false;
911}