at 24.11-pre 34 kB view raw
1{ config, lib, pkgs, ... }@host: 2 3with lib; 4 5let 6 7 configurationPrefix = optionalString (versionAtLeast config.system.stateVersion "22.05") "nixos-"; 8 configurationDirectoryName = "${configurationPrefix}containers"; 9 configurationDirectory = "/etc/${configurationDirectoryName}"; 10 stateDirectory = "/var/lib/${configurationPrefix}containers"; 11 12 nixos-container = pkgs.nixos-container.override { 13 inherit stateDirectory configurationDirectory; 14 }; 15 16 # The container's init script, a small wrapper around the regular 17 # NixOS stage-2 init script. 18 containerInit = (cfg: 19 let 20 renderExtraVeth = (name: cfg: 21 '' 22 echo "Bringing ${name} up" 23 ip link set dev ${name} up 24 ${optionalString (cfg.localAddress != null) '' 25 echo "Setting ip for ${name}" 26 ip addr add ${cfg.localAddress} dev ${name} 27 ''} 28 ${optionalString (cfg.localAddress6 != null) '' 29 echo "Setting ip6 for ${name}" 30 ip -6 addr add ${cfg.localAddress6} dev ${name} 31 ''} 32 ${optionalString (cfg.hostAddress != null) '' 33 echo "Setting route to host for ${name}" 34 ip route add ${cfg.hostAddress} dev ${name} 35 ''} 36 ${optionalString (cfg.hostAddress6 != null) '' 37 echo "Setting route6 to host for ${name}" 38 ip -6 route add ${cfg.hostAddress6} dev ${name} 39 ''} 40 '' 41 ); 42 in 43 pkgs.writeScript "container-init" 44 '' 45 #! ${pkgs.runtimeShell} -e 46 47 # Exit early if we're asked to shut down. 48 trap "exit 0" SIGRTMIN+3 49 50 # Initialise the container side of the veth pair. 51 if [ -n "$HOST_ADDRESS" ] || [ -n "$HOST_ADDRESS6" ] || 52 [ -n "$LOCAL_ADDRESS" ] || [ -n "$LOCAL_ADDRESS6" ] || 53 [ -n "$HOST_BRIDGE" ]; then 54 ip link set host0 name eth0 55 ip link set dev eth0 up 56 57 if [ -n "$LOCAL_ADDRESS" ]; then 58 ip addr add $LOCAL_ADDRESS dev eth0 59 fi 60 if [ -n "$LOCAL_ADDRESS6" ]; then 61 ip -6 addr add $LOCAL_ADDRESS6 dev eth0 62 fi 63 if [ -n "$HOST_ADDRESS" ]; then 64 ip route add $HOST_ADDRESS dev eth0 65 ip route add default via $HOST_ADDRESS 66 fi 67 if [ -n "$HOST_ADDRESS6" ]; then 68 ip -6 route add $HOST_ADDRESS6 dev eth0 69 ip -6 route add default via $HOST_ADDRESS6 70 fi 71 fi 72 73 ${concatStringsSep "\n" (mapAttrsToList renderExtraVeth cfg.extraVeths)} 74 75 # Start the regular stage 2 script. 76 # We source instead of exec to not lose an early stop signal, which is 77 # also the only _reliable_ shutdown signal we have since early stop 78 # does not execute ExecStop* commands. 79 set +e 80 . "$1" 81 '' 82 ); 83 84 nspawnExtraVethArgs = (name: cfg: "--network-veth-extra=${name}"); 85 86 startScript = cfg: 87 '' 88 mkdir -p -m 0755 "$root/etc" "$root/var/lib" 89 mkdir -p -m 0700 "$root/var/lib/private" "$root/root" /run/nixos-containers 90 if ! [ -e "$root/etc/os-release" ]; then 91 touch "$root/etc/os-release" 92 fi 93 94 if ! [ -e "$root/etc/machine-id" ]; then 95 touch "$root/etc/machine-id" 96 fi 97 98 mkdir -p -m 0755 \ 99 "/nix/var/nix/profiles/per-container/$INSTANCE" \ 100 "/nix/var/nix/gcroots/per-container/$INSTANCE" 101 102 cp --remove-destination /etc/resolv.conf "$root/etc/resolv.conf" 103 104 if [ "$PRIVATE_NETWORK" = 1 ]; then 105 extraFlags+=" --private-network" 106 fi 107 108 if [ -n "$HOST_ADDRESS" ] || [ -n "$LOCAL_ADDRESS" ] || 109 [ -n "$HOST_ADDRESS6" ] || [ -n "$LOCAL_ADDRESS6" ]; then 110 extraFlags+=" --network-veth" 111 fi 112 113 if [ -n "$HOST_PORT" ]; then 114 OIFS=$IFS 115 IFS="," 116 for i in $HOST_PORT 117 do 118 extraFlags+=" --port=$i" 119 done 120 IFS=$OIFS 121 fi 122 123 if [ -n "$HOST_BRIDGE" ]; then 124 extraFlags+=" --network-bridge=$HOST_BRIDGE" 125 fi 126 127 extraFlags+=" ${concatStringsSep " " (mapAttrsToList nspawnExtraVethArgs cfg.extraVeths)}" 128 129 for iface in $INTERFACES; do 130 extraFlags+=" --network-interface=$iface" 131 done 132 133 for iface in $MACVLANS; do 134 extraFlags+=" --network-macvlan=$iface" 135 done 136 137 # If the host is 64-bit and the container is 32-bit, add a 138 # --personality flag. 139 ${optionalString (pkgs.stdenv.hostPlatform.system == "x86_64-linux") '' 140 if [ "$(< ''${SYSTEM_PATH:-/nix/var/nix/profiles/per-container/$INSTANCE/system}/system)" = i686-linux ]; then 141 extraFlags+=" --personality=x86" 142 fi 143 ''} 144 145 export SYSTEMD_NSPAWN_UNIFIED_HIERARCHY=1 146 147 # Run systemd-nspawn without startup notification (we'll 148 # wait for the container systemd to signal readiness) 149 # Kill signal handling means systemd-nspawn will pass a system-halt signal 150 # to the container systemd when it receives SIGTERM for container shutdown; 151 # containerInit and stage2 have to handle this as well. 152 exec ${config.systemd.package}/bin/systemd-nspawn \ 153 --keep-unit \ 154 -M "$INSTANCE" -D "$root" $extraFlags \ 155 $EXTRA_NSPAWN_FLAGS \ 156 --notify-ready=yes \ 157 --kill-signal=SIGRTMIN+3 \ 158 --bind-ro=/nix/store \ 159 --bind-ro=/nix/var/nix/db \ 160 --bind-ro=/nix/var/nix/daemon-socket \ 161 --bind="/nix/var/nix/profiles/per-container/$INSTANCE:/nix/var/nix/profiles" \ 162 --bind="/nix/var/nix/gcroots/per-container/$INSTANCE:/nix/var/nix/gcroots" \ 163 ${optionalString (!cfg.ephemeral) "--link-journal=try-guest"} \ 164 --setenv PRIVATE_NETWORK="$PRIVATE_NETWORK" \ 165 --setenv HOST_BRIDGE="$HOST_BRIDGE" \ 166 --setenv HOST_ADDRESS="$HOST_ADDRESS" \ 167 --setenv LOCAL_ADDRESS="$LOCAL_ADDRESS" \ 168 --setenv HOST_ADDRESS6="$HOST_ADDRESS6" \ 169 --setenv LOCAL_ADDRESS6="$LOCAL_ADDRESS6" \ 170 --setenv HOST_PORT="$HOST_PORT" \ 171 --setenv PATH="$PATH" \ 172 ${optionalString cfg.ephemeral "--ephemeral"} \ 173 ${optionalString (cfg.additionalCapabilities != null && cfg.additionalCapabilities != []) 174 ''--capability="${concatStringsSep "," cfg.additionalCapabilities}"'' 175 } \ 176 ${optionalString (cfg.tmpfs != null && cfg.tmpfs != []) 177 ''--tmpfs=${concatStringsSep " --tmpfs=" cfg.tmpfs}'' 178 } \ 179 ${containerInit cfg} "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/init" 180 ''; 181 182 preStartScript = cfg: 183 '' 184 # Clean up existing machined registration and interfaces. 185 machinectl terminate "$INSTANCE" 2> /dev/null || true 186 187 if [ -n "$HOST_ADDRESS" ] || [ -n "$LOCAL_ADDRESS" ] || 188 [ -n "$HOST_ADDRESS6" ] || [ -n "$LOCAL_ADDRESS6" ]; then 189 ip link del dev "ve-$INSTANCE" 2> /dev/null || true 190 ip link del dev "vb-$INSTANCE" 2> /dev/null || true 191 fi 192 193 ${concatStringsSep "\n" ( 194 mapAttrsToList (name: cfg: 195 "ip link del dev ${name} 2> /dev/null || true " 196 ) cfg.extraVeths 197 )} 198 ''; 199 200 postStartScript = (cfg: 201 let 202 ipcall = cfg: ipcmd: variable: attribute: 203 if cfg.${attribute} == null then 204 '' 205 if [ -n "${variable}" ]; then 206 ${ipcmd} add ${variable} dev $ifaceHost 207 fi 208 '' 209 else 210 "${ipcmd} add ${cfg.${attribute}} dev $ifaceHost"; 211 renderExtraVeth = name: cfg: 212 if cfg.hostBridge != null then 213 '' 214 # Add ${name} to bridge ${cfg.hostBridge} 215 ip link set dev ${name} master ${cfg.hostBridge} up 216 '' 217 else 218 '' 219 echo "Bring ${name} up" 220 ip link set dev ${name} up 221 # Set IPs and routes for ${name} 222 ${optionalString (cfg.hostAddress != null) '' 223 ip addr add ${cfg.hostAddress} dev ${name} 224 ''} 225 ${optionalString (cfg.hostAddress6 != null) '' 226 ip -6 addr add ${cfg.hostAddress6} dev ${name} 227 ''} 228 ${optionalString (cfg.localAddress != null) '' 229 ip route add ${cfg.localAddress} dev ${name} 230 ''} 231 ${optionalString (cfg.localAddress6 != null) '' 232 ip -6 route add ${cfg.localAddress6} dev ${name} 233 ''} 234 ''; 235 in 236 '' 237 if [ -n "$HOST_ADDRESS" ] || [ -n "$LOCAL_ADDRESS" ] || 238 [ -n "$HOST_ADDRESS6" ] || [ -n "$LOCAL_ADDRESS6" ]; then 239 if [ -z "$HOST_BRIDGE" ]; then 240 ifaceHost=ve-$INSTANCE 241 ip link set dev $ifaceHost up 242 243 ${ipcall cfg "ip addr" "$HOST_ADDRESS" "hostAddress"} 244 ${ipcall cfg "ip -6 addr" "$HOST_ADDRESS6" "hostAddress6"} 245 ${ipcall cfg "ip route" "$LOCAL_ADDRESS" "localAddress"} 246 ${ipcall cfg "ip -6 route" "$LOCAL_ADDRESS6" "localAddress6"} 247 fi 248 fi 249 ${concatStringsSep "\n" (mapAttrsToList renderExtraVeth cfg.extraVeths)} 250 '' 251 ); 252 253 serviceDirectives = cfg: { 254 ExecReload = pkgs.writeScript "reload-container" 255 '' 256 #! ${pkgs.runtimeShell} -e 257 ${nixos-container}/bin/nixos-container run "$INSTANCE" -- \ 258 bash --login -c "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/bin/switch-to-configuration test" 259 ''; 260 261 SyslogIdentifier = "container %i"; 262 263 EnvironmentFile = "-${configurationDirectory}/%i.conf"; 264 265 Type = "notify"; 266 267 RuntimeDirectory = lib.optional cfg.ephemeral "${configurationDirectoryName}/%i"; 268 269 # Note that on reboot, systemd-nspawn returns 133, so this 270 # unit will be restarted. On poweroff, it returns 0, so the 271 # unit won't be restarted. 272 RestartForceExitStatus = "133"; 273 SuccessExitStatus = "133"; 274 275 # Some containers take long to start 276 # especially when you automatically start many at once 277 TimeoutStartSec = cfg.timeoutStartSec; 278 279 Restart = "on-failure"; 280 281 Slice = "machine.slice"; 282 Delegate = true; 283 284 # We rely on systemd-nspawn turning a SIGTERM to itself into a shutdown 285 # signal (SIGRTMIN+3) for the inner container. 286 KillMode = "mixed"; 287 KillSignal = "TERM"; 288 289 DevicePolicy = "closed"; 290 DeviceAllow = map (d: "${d.node} ${d.modifier}") cfg.allowedDevices; 291 }; 292 293 kernelVersion = config.boot.kernelPackages.kernel.version; 294 295 bindMountOpts = { name, ... }: { 296 297 options = { 298 mountPoint = mkOption { 299 example = "/mnt/usb"; 300 type = types.str; 301 description = "Mount point on the container file system."; 302 }; 303 hostPath = mkOption { 304 default = null; 305 example = "/home/alice"; 306 type = types.nullOr types.str; 307 description = "Location of the host path to be mounted."; 308 }; 309 isReadOnly = mkOption { 310 default = true; 311 type = types.bool; 312 description = "Determine whether the mounted path will be accessed in read-only mode."; 313 }; 314 }; 315 316 config = { 317 mountPoint = mkDefault name; 318 }; 319 320 }; 321 322 allowedDeviceOpts = { ... }: { 323 options = { 324 node = mkOption { 325 example = "/dev/net/tun"; 326 type = types.str; 327 description = "Path to device node"; 328 }; 329 modifier = mkOption { 330 example = "rw"; 331 type = types.str; 332 description = '' 333 Device node access modifier. Takes a combination 334 `r` (read), `w` (write), and 335 `m` (mknod). See the 336 `systemd.resource-control(5)` man page for more 337 information.''; 338 }; 339 }; 340 }; 341 342 mkBindFlag = d: 343 let flagPrefix = if d.isReadOnly then " --bind-ro=" else " --bind="; 344 mountstr = if d.hostPath != null then "${d.hostPath}:${d.mountPoint}" else "${d.mountPoint}"; 345 in flagPrefix + mountstr ; 346 347 mkBindFlags = bs: concatMapStrings mkBindFlag (lib.attrValues bs); 348 349 networkOptions = { 350 hostBridge = mkOption { 351 type = types.nullOr types.str; 352 default = null; 353 example = "br0"; 354 description = '' 355 Put the host-side of the veth-pair into the named bridge. 356 Only one of hostAddress* or hostBridge can be given. 357 ''; 358 }; 359 360 forwardPorts = mkOption { 361 type = types.listOf (types.submodule { 362 options = { 363 protocol = mkOption { 364 type = types.str; 365 default = "tcp"; 366 description = "The protocol specifier for port forwarding between host and container"; 367 }; 368 hostPort = mkOption { 369 type = types.int; 370 description = "Source port of the external interface on host"; 371 }; 372 containerPort = mkOption { 373 type = types.nullOr types.int; 374 default = null; 375 description = "Target port of container"; 376 }; 377 }; 378 }); 379 default = []; 380 example = [ { protocol = "tcp"; hostPort = 8080; containerPort = 80; } ]; 381 description = '' 382 List of forwarded ports from host to container. Each forwarded port 383 is specified by protocol, hostPort and containerPort. By default, 384 protocol is tcp and hostPort and containerPort are assumed to be 385 the same if containerPort is not explicitly given. 386 ''; 387 }; 388 389 390 hostAddress = mkOption { 391 type = types.nullOr types.str; 392 default = null; 393 example = "10.231.136.1"; 394 description = '' 395 The IPv4 address assigned to the host interface. 396 (Not used when hostBridge is set.) 397 ''; 398 }; 399 400 hostAddress6 = mkOption { 401 type = types.nullOr types.str; 402 default = null; 403 example = "fc00::1"; 404 description = '' 405 The IPv6 address assigned to the host interface. 406 (Not used when hostBridge is set.) 407 ''; 408 }; 409 410 localAddress = mkOption { 411 type = types.nullOr types.str; 412 default = null; 413 example = "10.231.136.2"; 414 description = '' 415 The IPv4 address assigned to the interface in the container. 416 If a hostBridge is used, this should be given with netmask to access 417 the whole network. Otherwise the default netmask is /32 and routing is 418 set up from localAddress to hostAddress and back. 419 ''; 420 }; 421 422 localAddress6 = mkOption { 423 type = types.nullOr types.str; 424 default = null; 425 example = "fc00::2"; 426 description = '' 427 The IPv6 address assigned to the interface in the container. 428 If a hostBridge is used, this should be given with netmask to access 429 the whole network. Otherwise the default netmask is /128 and routing is 430 set up from localAddress6 to hostAddress6 and back. 431 ''; 432 }; 433 434 }; 435 436 dummyConfig = 437 { 438 extraVeths = {}; 439 additionalCapabilities = []; 440 ephemeral = false; 441 timeoutStartSec = "1min"; 442 allowedDevices = []; 443 hostAddress = null; 444 hostAddress6 = null; 445 localAddress = null; 446 localAddress6 = null; 447 tmpfs = null; 448 }; 449 450in 451 452{ 453 options = { 454 455 boot.isContainer = mkOption { 456 type = types.bool; 457 default = false; 458 description = '' 459 Whether this NixOS machine is a lightweight container running 460 in another NixOS system. 461 ''; 462 }; 463 464 boot.enableContainers = mkOption { 465 type = types.bool; 466 default = true; 467 description = '' 468 Whether to enable support for NixOS containers. Defaults to true 469 (at no cost if containers are not actually used). 470 ''; 471 }; 472 473 containers = mkOption { 474 type = types.attrsOf (types.submodule ( 475 { config, options, name, ... }: 476 { 477 options = { 478 config = mkOption { 479 description = '' 480 A specification of the desired configuration of this 481 container, as a NixOS module. 482 ''; 483 type = lib.mkOptionType { 484 name = "Toplevel NixOS config"; 485 merge = loc: defs: (import "${toString config.nixpkgs}/nixos/lib/eval-config.nix" { 486 modules = 487 let 488 extraConfig = { options, ... }: { 489 _file = "module at ${__curPos.file}:${toString __curPos.line}"; 490 config = { 491 nixpkgs = if options.nixpkgs?hostPlatform && host.options.nixpkgs.hostPlatform.isDefined 492 then { inherit (host.config.nixpkgs) hostPlatform; } 493 else { inherit (host.config.nixpkgs) localSystem; } 494 ; 495 boot.isContainer = true; 496 networking.hostName = mkDefault name; 497 networking.useDHCP = false; 498 assertions = [ 499 { 500 assertion = 501 (builtins.compareVersions kernelVersion "5.8" <= 0) 502 -> config.privateNetwork 503 -> stringLength name <= 11; 504 message = '' 505 Container name `${name}` is too long: When `privateNetwork` is enabled, container names can 506 not be longer than 11 characters, because the container's interface name is derived from it. 507 You should either make the container name shorter or upgrade to a more recent kernel that 508 supports interface altnames (i.e. at least Linux 5.8 - please see https://github.com/NixOS/nixpkgs/issues/38509 509 for details). 510 ''; 511 } 512 { 513 assertion = !lib.strings.hasInfix "_" name; 514 message = '' 515 Names containing underscores are not allowed in nixos-containers. Please rename the container '${name}' 516 ''; 517 } 518 ]; 519 }; 520 }; 521 in [ extraConfig ] ++ (map (x: x.value) defs); 522 prefix = [ "containers" name ]; 523 inherit (config) specialArgs; 524 525 # The system is inherited from the host above. 526 # Set it to null, to remove the "legacy" entrypoint's non-hermetic default. 527 system = null; 528 }).config; 529 }; 530 }; 531 532 path = mkOption { 533 type = types.path; 534 example = "/nix/var/nix/profiles/per-container/webserver"; 535 description = '' 536 As an alternative to specifying 537 {option}`config`, you can specify the path to 538 the evaluated NixOS system configuration, typically a 539 symlink to a system profile. 540 ''; 541 }; 542 543 additionalCapabilities = mkOption { 544 type = types.listOf types.str; 545 default = []; 546 example = [ "CAP_NET_ADMIN" "CAP_MKNOD" ]; 547 description = '' 548 Grant additional capabilities to the container. See the 549 capabilities(7) and systemd-nspawn(1) man pages for more 550 information. 551 ''; 552 }; 553 554 nixpkgs = mkOption { 555 type = types.path; 556 default = pkgs.path; 557 defaultText = literalExpression "pkgs.path"; 558 description = '' 559 A path to the nixpkgs that provide the modules, pkgs and lib for evaluating the container. 560 561 To only change the `pkgs` argument used inside the container modules, 562 set the `nixpkgs.*` options in the container {option}`config`. 563 Setting `config.nixpkgs.pkgs = pkgs` speeds up the container evaluation 564 by reusing the system pkgs, but the `nixpkgs.config` option in the 565 container config is ignored in this case. 566 ''; 567 }; 568 569 specialArgs = mkOption { 570 type = types.attrsOf types.unspecified; 571 default = {}; 572 description = '' 573 A set of special arguments to be passed to NixOS modules. 574 This will be merged into the `specialArgs` used to evaluate 575 the NixOS configurations. 576 ''; 577 }; 578 579 ephemeral = mkOption { 580 type = types.bool; 581 default = false; 582 description = '' 583 Runs container in ephemeral mode with the empty root filesystem at boot. 584 This way container will be bootstrapped from scratch on each boot 585 and will be cleaned up on shutdown leaving no traces behind. 586 Useful for completely stateless, reproducible containers. 587 588 Note that this option might require to do some adjustments to the container configuration, 589 e.g. you might want to set 590 {var}`systemd.network.networks.$interface.dhcpV4Config.ClientIdentifier` to "mac" 591 if you use {var}`macvlans` option. 592 This way dhcp client identifier will be stable between the container restarts. 593 594 Note that the container journal will not be linked to the host if this option is enabled. 595 ''; 596 }; 597 598 enableTun = mkOption { 599 type = types.bool; 600 default = false; 601 description = '' 602 Allows the container to create and setup tunnel interfaces 603 by granting the `NET_ADMIN` capability and 604 enabling access to `/dev/net/tun`. 605 ''; 606 }; 607 608 privateNetwork = mkOption { 609 type = types.bool; 610 default = false; 611 description = '' 612 Whether to give the container its own private virtual 613 Ethernet interface. The interface is called 614 `eth0`, and is hooked up to the interface 615 `ve-«container-name»` 616 on the host. If this option is not set, then the 617 container shares the network interfaces of the host, 618 and can bind to any port on any interface. 619 ''; 620 }; 621 622 interfaces = mkOption { 623 type = types.listOf types.str; 624 default = []; 625 example = [ "eth1" "eth2" ]; 626 description = '' 627 The list of interfaces to be moved into the container. 628 ''; 629 }; 630 631 macvlans = mkOption { 632 type = types.listOf types.str; 633 default = []; 634 example = [ "eth1" "eth2" ]; 635 description = '' 636 The list of host interfaces from which macvlans will be 637 created. For each interface specified, a macvlan interface 638 will be created and moved to the container. 639 ''; 640 }; 641 642 extraVeths = mkOption { 643 type = with types; attrsOf (submodule { options = networkOptions; }); 644 default = {}; 645 description = '' 646 Extra veth-pairs to be created for the container. 647 ''; 648 }; 649 650 autoStart = mkOption { 651 type = types.bool; 652 default = false; 653 description = '' 654 Whether the container is automatically started at boot-time. 655 ''; 656 }; 657 658 restartIfChanged = mkOption { 659 type = types.bool; 660 default = true; 661 description = '' 662 Whether the container should be restarted during a NixOS 663 configuration switch if its definition has changed. 664 ''; 665 }; 666 667 timeoutStartSec = mkOption { 668 type = types.str; 669 default = "1min"; 670 description = '' 671 Time for the container to start. In case of a timeout, 672 the container processes get killed. 673 See {manpage}`systemd.time(7)` 674 for more information about the format. 675 ''; 676 }; 677 678 bindMounts = mkOption { 679 type = with types; attrsOf (submodule bindMountOpts); 680 default = {}; 681 example = literalExpression '' 682 { "/home" = { hostPath = "/home/alice"; 683 isReadOnly = false; }; 684 } 685 ''; 686 687 description = '' 688 An extra list of directories that is bound to the container. 689 ''; 690 }; 691 692 allowedDevices = mkOption { 693 type = with types; listOf (submodule allowedDeviceOpts); 694 default = []; 695 example = [ { node = "/dev/net/tun"; modifier = "rw"; } ]; 696 description = '' 697 A list of device nodes to which the containers has access to. 698 ''; 699 }; 700 701 tmpfs = mkOption { 702 type = types.listOf types.str; 703 default = []; 704 example = [ "/var" ]; 705 description = '' 706 Mounts a set of tmpfs file systems into the container. 707 Multiple paths can be specified. 708 Valid items must conform to the --tmpfs argument 709 of systemd-nspawn. See systemd-nspawn(1) for details. 710 ''; 711 }; 712 713 extraFlags = mkOption { 714 type = types.listOf types.str; 715 default = []; 716 example = [ "--drop-capability=CAP_SYS_CHROOT" ]; 717 description = '' 718 Extra flags passed to the systemd-nspawn command. 719 See systemd-nspawn(1) for details. 720 ''; 721 }; 722 723 # Removed option. See `checkAssertion` below for the accompanying error message. 724 pkgs = mkOption { visible = false; }; 725 } // networkOptions; 726 727 config = let 728 # Throw an error when removed option `pkgs` is used. 729 # Because this is a submodule we cannot use `mkRemovedOptionModule` or option `assertions`. 730 optionPath = "containers.${name}.pkgs"; 731 files = showFiles options.pkgs.files; 732 checkAssertion = if options.pkgs.isDefined then throw '' 733 The option definition `${optionPath}' in ${files} no longer has any effect; please remove it. 734 735 Alternatively, you can use the following options: 736 - containers.${name}.nixpkgs 737 This sets the nixpkgs (and thereby the modules, pkgs and lib) that 738 are used for evaluating the container. 739 740 - containers.${name}.config.nixpkgs.pkgs 741 This only sets the `pkgs` argument used inside the container modules. 742 '' 743 else null; 744 in { 745 path = builtins.seq checkAssertion 746 mkIf options.config.isDefined config.config.system.build.toplevel; 747 }; 748 })); 749 750 default = {}; 751 example = literalExpression 752 '' 753 { webserver = 754 { path = "/nix/var/nix/profiles/webserver"; 755 }; 756 database = 757 { config = 758 { config, pkgs, ... }: 759 { services.postgresql.enable = true; 760 services.postgresql.package = pkgs.postgresql_14; 761 762 system.stateVersion = "${lib.trivial.release}"; 763 }; 764 }; 765 } 766 ''; 767 description = '' 768 A set of NixOS system configurations to be run as lightweight 769 containers. Each container appears as a service 770 `container-«name»` 771 on the host system, allowing it to be started and stopped via 772 {command}`systemctl`. 773 ''; 774 }; 775 776 }; 777 778 779 config = mkMerge [ 780 { 781 warnings = optional (!config.boot.enableContainers && config.containers != {}) 782 "containers.<name> is used, but boot.enableContainers is false. To use containers.<name>, set boot.enableContainers to true."; 783 } 784 785 (mkIf (config.boot.enableContainers) (let 786 unit = { 787 description = "Container '%i'"; 788 789 unitConfig.RequiresMountsFor = "${stateDirectory}/%i"; 790 791 path = [ pkgs.iproute2 ]; 792 793 environment = { 794 root = "${stateDirectory}/%i"; 795 INSTANCE = "%i"; 796 }; 797 798 preStart = preStartScript dummyConfig; 799 800 script = startScript dummyConfig; 801 802 postStart = postStartScript dummyConfig; 803 804 restartIfChanged = false; 805 806 serviceConfig = serviceDirectives dummyConfig; 807 }; 808 in { 809 warnings = 810 (optional (config.virtualisation.containers.enable && versionOlder config.system.stateVersion "22.05") '' 811 Enabling both boot.enableContainers & virtualisation.containers on system.stateVersion < 22.05 is unsupported. 812 ''); 813 814 systemd.targets.multi-user.wants = [ "machines.target" ]; 815 816 systemd.services = listToAttrs (filter (x: x.value != null) ( 817 # The generic container template used by imperative containers 818 [{ name = "container@"; value = unit; }] 819 # declarative containers 820 ++ (mapAttrsToList (name: cfg: nameValuePair "container@${name}" (let 821 containerConfig = cfg // ( 822 optionalAttrs cfg.enableTun 823 { 824 allowedDevices = cfg.allowedDevices 825 ++ [ { node = "/dev/net/tun"; modifier = "rw"; } ]; 826 additionalCapabilities = cfg.additionalCapabilities 827 ++ [ "CAP_NET_ADMIN" ]; 828 } 829 ); 830 in 831 recursiveUpdate unit { 832 preStart = preStartScript containerConfig; 833 script = startScript containerConfig; 834 postStart = postStartScript containerConfig; 835 serviceConfig = serviceDirectives containerConfig; 836 unitConfig.RequiresMountsFor = lib.optional (!containerConfig.ephemeral) "${stateDirectory}/%i" 837 ++ builtins.map 838 (d: if d.hostPath != null then d.hostPath else d.mountPoint) 839 (builtins.attrValues cfg.bindMounts); 840 environment.root = if containerConfig.ephemeral then "/run/nixos-containers/%i" else "${stateDirectory}/%i"; 841 } // ( 842 optionalAttrs containerConfig.autoStart 843 { 844 wantedBy = [ "machines.target" ]; 845 wants = [ "network.target" ]; 846 after = [ "network.target" ]; 847 restartTriggers = [ 848 containerConfig.path 849 config.environment.etc."${configurationDirectoryName}/${name}.conf".source 850 ]; 851 restartIfChanged = containerConfig.restartIfChanged; 852 } 853 ) 854 )) config.containers) 855 )); 856 857 # Generate a configuration file in /etc/nixos-containers for each 858 # container so that container@.target can get the container 859 # configuration. 860 environment.etc = 861 let mkPortStr = p: p.protocol + ":" + (toString p.hostPort) + ":" + (if p.containerPort == null then toString p.hostPort else toString p.containerPort); 862 in mapAttrs' (name: cfg: nameValuePair "${configurationDirectoryName}/${name}.conf" 863 { text = 864 '' 865 SYSTEM_PATH=${cfg.path} 866 ${optionalString cfg.privateNetwork '' 867 PRIVATE_NETWORK=1 868 ${optionalString (cfg.hostBridge != null) '' 869 HOST_BRIDGE=${cfg.hostBridge} 870 ''} 871 ${optionalString (length cfg.forwardPorts > 0) '' 872 HOST_PORT=${concatStringsSep "," (map mkPortStr cfg.forwardPorts)} 873 ''} 874 ${optionalString (cfg.hostAddress != null) '' 875 HOST_ADDRESS=${cfg.hostAddress} 876 ''} 877 ${optionalString (cfg.hostAddress6 != null) '' 878 HOST_ADDRESS6=${cfg.hostAddress6} 879 ''} 880 ${optionalString (cfg.localAddress != null) '' 881 LOCAL_ADDRESS=${cfg.localAddress} 882 ''} 883 ${optionalString (cfg.localAddress6 != null) '' 884 LOCAL_ADDRESS6=${cfg.localAddress6} 885 ''} 886 ''} 887 INTERFACES="${toString cfg.interfaces}" 888 MACVLANS="${toString cfg.macvlans}" 889 ${optionalString cfg.autoStart '' 890 AUTO_START=1 891 ''} 892 EXTRA_NSPAWN_FLAGS="${mkBindFlags cfg.bindMounts + 893 optionalString (cfg.extraFlags != []) 894 (" " + concatStringsSep " " cfg.extraFlags)}" 895 ''; 896 }) config.containers; 897 898 # Generate /etc/hosts entries for the containers. 899 networking.extraHosts = concatStrings (mapAttrsToList (name: cfg: optionalString (cfg.localAddress != null) 900 '' 901 ${head (splitString "/" cfg.localAddress)} ${name}.containers 902 '') config.containers); 903 904 networking.dhcpcd.denyInterfaces = [ "ve-*" "vb-*" ]; 905 906 services.udev.extraRules = optionalString config.networking.networkmanager.enable '' 907 # Don't manage interfaces created by nixos-container. 908 ENV{INTERFACE}=="v[eb]-*", ENV{NM_UNMANAGED}="1" 909 ''; 910 911 environment.systemPackages = [ 912 nixos-container 913 ]; 914 915 boot.kernelModules = [ 916 "bridge" 917 "macvlan" 918 "tap" 919 "tun" 920 ]; 921 })) 922 ]; 923 924 meta.buildDocsInSandbox = false; 925}