at master 42 kB view raw
1{ 2 config, 3 lib, 4 pkgs, 5 ... 6}@host: 7 8with lib; 9 10let 11 12 configurationPrefix = optionalString (versionAtLeast config.system.stateVersion "22.05") "nixos-"; 13 configurationDirectoryName = "${configurationPrefix}containers"; 14 configurationDirectory = "/etc/${configurationDirectoryName}"; 15 stateDirectory = "/var/lib/${configurationPrefix}containers"; 16 17 nixos-container = pkgs.nixos-container.override { 18 inherit stateDirectory configurationDirectory; 19 }; 20 21 # The container's init script, a small wrapper around the regular 22 # NixOS stage-2 init script. 23 containerInit = ( 24 cfg: 25 let 26 renderExtraVeth = ( 27 name: cfg: '' 28 echo "Bringing ${name} up" 29 ip link set dev ${name} up 30 ${optionalString (cfg.localAddress != null) '' 31 echo "Setting ip for ${name}" 32 ip addr add ${cfg.localAddress} dev ${name} 33 ''} 34 ${optionalString (cfg.localAddress6 != null) '' 35 echo "Setting ip6 for ${name}" 36 ip -6 addr add ${cfg.localAddress6} dev ${name} 37 ''} 38 ${optionalString (cfg.hostAddress != null) '' 39 echo "Setting route to host for ${name}" 40 ip route add ${cfg.hostAddress} dev ${name} 41 ''} 42 ${optionalString (cfg.hostAddress6 != null) '' 43 echo "Setting route6 to host for ${name}" 44 ip -6 route add ${cfg.hostAddress6} dev ${name} 45 ''} 46 '' 47 ); 48 in 49 pkgs.writeScript "container-init" '' 50 #! ${pkgs.runtimeShell} -e 51 52 # Exit early if we're asked to shut down. 53 trap "exit 0" SIGRTMIN+3 54 55 # Initialise the container side of the veth pair. 56 if [ -n "$HOST_ADDRESS" ] || [ -n "$HOST_ADDRESS6" ] || 57 [ -n "$LOCAL_ADDRESS" ] || [ -n "$LOCAL_ADDRESS6" ] || 58 [ -n "$HOST_BRIDGE" ]; then 59 ip link set host0 name eth0 60 ip link set dev eth0 up 61 62 if [ -n "$LOCAL_ADDRESS" ]; then 63 ip addr add $LOCAL_ADDRESS dev eth0 64 fi 65 if [ -n "$LOCAL_ADDRESS6" ]; then 66 ip -6 addr add $LOCAL_ADDRESS6 dev eth0 67 fi 68 if [ -n "$HOST_ADDRESS" ]; then 69 ip route add $HOST_ADDRESS dev eth0 70 ip route add default via $HOST_ADDRESS 71 fi 72 if [ -n "$HOST_ADDRESS6" ]; then 73 ip -6 route add $HOST_ADDRESS6 dev eth0 74 ip -6 route add default via $HOST_ADDRESS6 75 fi 76 fi 77 78 ${concatStringsSep "\n" (mapAttrsToList renderExtraVeth cfg.extraVeths)} 79 80 # Start the regular stage 2 script. 81 # We source instead of exec to not lose an early stop signal, which is 82 # also the only _reliable_ shutdown signal we have since early stop 83 # does not execute ExecStop* commands. 84 set +e 85 . "$1" 86 '' 87 ); 88 89 nspawnExtraVethArgs = (name: cfg: "--network-veth-extra=${name}"); 90 91 startScript = cfg: '' 92 # Declare root explicitly to avoid shellcheck warnings, it comes from the env 93 declare root 94 95 mkdir -p "$root/etc" "$root/var/lib" 96 chmod 0755 "$root/etc" "$root/var/lib" 97 mkdir -p "$root/var/lib/private" "$root/root" /run/nixos-containers 98 chmod 0700 "$root/var/lib/private" "$root/root" /run/nixos-containers 99 if ! [ -e "$root/etc/os-release" ] && ! [ -h "$root/etc/os-release" ]; then 100 touch "$root/etc/os-release" 101 fi 102 103 if ! [ -e "$root/etc/machine-id" ]; then 104 touch "$root/etc/machine-id" 105 fi 106 107 mkdir -p \ 108 "/nix/var/nix/profiles/per-container/$INSTANCE" \ 109 "/nix/var/nix/gcroots/per-container/$INSTANCE" 110 chmod 0755 \ 111 "/nix/var/nix/profiles/per-container/$INSTANCE" \ 112 "/nix/var/nix/gcroots/per-container/$INSTANCE" 113 114 cp --remove-destination /etc/resolv.conf "$root/etc/resolv.conf" 115 116 if [ -n "$FLAKE" ] && [ ! -e "/nix/var/nix/profiles/per-container/$INSTANCE/system" ]; then 117 # we create the etc/nixos-container config file, then if we utilize the update function, we can then build all the necessary system files for the container 118 ${lib.getExe nixos-container} update "$INSTANCE" 119 fi 120 121 declare -a extraFlags 122 123 if [ "$PRIVATE_NETWORK" = 1 ]; then 124 extraFlags+=("--private-network") 125 fi 126 127 NIX_BIND_OPT="" 128 if [ -n "$PRIVATE_USERS" ]; then 129 extraFlags+=("--private-users=$PRIVATE_USERS") 130 if [[ 131 "$PRIVATE_USERS" = "pick" 132 || ("$PRIVATE_USERS" =~ ^[[:digit:]]+$ && "$PRIVATE_USERS" -gt 0) 133 ]]; then 134 # when user namespacing is enabled, we use `idmap` mount option so that 135 # bind mounts under /nix get proper owner (and not nobody/nogroup). 136 NIX_BIND_OPT=":idmap" 137 fi 138 fi 139 140 if [ -n "$HOST_ADDRESS" ] || [ -n "$LOCAL_ADDRESS" ] || 141 [ -n "$HOST_ADDRESS6" ] || [ -n "$LOCAL_ADDRESS6" ]; then 142 extraFlags+=("--network-veth") 143 fi 144 145 if [ -n "$HOST_PORT" ]; then 146 OIFS=$IFS 147 IFS="," 148 for i in $HOST_PORT 149 do 150 extraFlags+=("--port=$i") 151 done 152 IFS=$OIFS 153 fi 154 155 if [ -n "$HOST_BRIDGE" ]; then 156 extraFlags+=("--network-bridge=$HOST_BRIDGE") 157 fi 158 159 if [ -n "$NETWORK_NAMESPACE_PATH" ]; then 160 extraFlags+=("--network-namespace-path=$NETWORK_NAMESPACE_PATH") 161 fi 162 163 extraFlags+=(${lib.escapeShellArgs (mapAttrsToList nspawnExtraVethArgs cfg.extraVeths)}) 164 165 for iface in $INTERFACES; do 166 extraFlags+=("--network-interface=$iface") 167 done 168 169 for iface in $MACVLANS; do 170 extraFlags+=("--network-macvlan=$iface") 171 done 172 173 # If the host is 64-bit and the container is 32-bit, add a 174 # --personality flag. 175 ${optionalString (pkgs.stdenv.hostPlatform.system == "x86_64-linux") '' 176 if [ "$(< "''${SYSTEM_PATH:-/nix/var/nix/profiles/per-container/$INSTANCE/system}/system")" = i686-linux ]; then 177 extraFlags+=("--personality=x86") 178 fi 179 ''} 180 181 export SYSTEMD_NSPAWN_UNIFIED_HIERARCHY=1 182 183 # Run systemd-nspawn without startup notification (we'll 184 # wait for the container systemd to signal readiness) 185 # Kill signal handling means systemd-nspawn will pass a system-halt signal 186 # to the container systemd when it receives SIGTERM for container shutdown; 187 # containerInit and stage2 have to handle this as well. 188 # TODO: fix shellcheck issue properly 189 # shellcheck disable=SC2086 190 exec ${config.systemd.package}/bin/systemd-nspawn \ 191 --keep-unit \ 192 -M "$INSTANCE" -D "$root" "''${extraFlags[@]}" \ 193 --notify-ready=yes \ 194 --kill-signal=SIGRTMIN+3 \ 195 --bind-ro=/nix/store:/nix/store$NIX_BIND_OPT \ 196 --bind-ro=/nix/var/nix/db:/nix/var/nix/db$NIX_BIND_OPT \ 197 --bind-ro=/nix/var/nix/daemon-socket:/nix/var/nix/daemon-socket$NIX_BIND_OPT \ 198 --bind="/nix/var/nix/profiles/per-container/$INSTANCE:/nix/var/nix/profiles$NIX_BIND_OPT" \ 199 --bind="/nix/var/nix/gcroots/per-container/$INSTANCE:/nix/var/nix/gcroots$NIX_BIND_OPT" \ 200 ${optionalString (!cfg.ephemeral) "--link-journal=try-guest"} \ 201 --setenv PRIVATE_NETWORK="$PRIVATE_NETWORK" \ 202 --setenv PRIVATE_USERS="$PRIVATE_USERS" \ 203 --setenv HOST_BRIDGE="$HOST_BRIDGE" \ 204 --setenv HOST_ADDRESS="$HOST_ADDRESS" \ 205 --setenv LOCAL_ADDRESS="$LOCAL_ADDRESS" \ 206 --setenv HOST_ADDRESS6="$HOST_ADDRESS6" \ 207 --setenv LOCAL_ADDRESS6="$LOCAL_ADDRESS6" \ 208 --setenv HOST_PORT="$HOST_PORT" \ 209 --setenv PATH="$PATH" \ 210 ${optionalString cfg.ephemeral "--ephemeral"} \ 211 ${ 212 optionalString ( 213 cfg.additionalCapabilities != null && cfg.additionalCapabilities != [ ] 214 ) ''--capability="${concatStringsSep "," cfg.additionalCapabilities}"'' 215 } \ 216 ${ 217 optionalString ( 218 cfg.tmpfs != null && cfg.tmpfs != [ ] 219 ) ''--tmpfs=${concatStringsSep " --tmpfs=" cfg.tmpfs}'' 220 } \ 221 $EXTRA_NSPAWN_FLAGS \ 222 ${containerInit cfg} "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/init" 223 ''; 224 225 preStartScript = cfg: '' 226 # Clean up existing machined registration and interfaces. 227 machinectl terminate "$INSTANCE" 2> /dev/null || true 228 229 if [ -n "$HOST_ADDRESS" ] || [ -n "$LOCAL_ADDRESS" ] || 230 [ -n "$HOST_ADDRESS6" ] || [ -n "$LOCAL_ADDRESS6" ]; then 231 ip link del dev "ve-$INSTANCE" 2> /dev/null || true 232 ip link del dev "vb-$INSTANCE" 2> /dev/null || true 233 fi 234 235 ${concatStringsSep "\n" ( 236 mapAttrsToList (name: cfg: "ip link del dev ${name} 2> /dev/null || true ") cfg.extraVeths 237 )} 238 ''; 239 240 postStartScript = ( 241 cfg: 242 let 243 ipcall = 244 cfg: ipcmd: variable: attribute: 245 if cfg.${attribute} == null then 246 '' 247 if [ -n "${variable}" ]; then 248 ${ipcmd} add "${variable}" dev "$ifaceHost" 249 fi 250 '' 251 else 252 ''${ipcmd} add ${cfg.${attribute}} dev "$ifaceHost"''; 253 renderExtraVeth = 254 name: cfg: 255 if cfg.hostBridge != null then 256 '' 257 # Add ${name} to bridge ${cfg.hostBridge} 258 ip link set dev "${name}" master "${cfg.hostBridge}" up 259 '' 260 else 261 '' 262 echo "Bring ${name} up" 263 ip link set dev "${name}" up 264 # Set IPs and routes for ${name} 265 ${optionalString (cfg.hostAddress != null) '' 266 ip addr add ${cfg.hostAddress} dev "${name}" 267 ''} 268 ${optionalString (cfg.hostAddress6 != null) '' 269 ip -6 addr add ${cfg.hostAddress6} dev "${name}" 270 ''} 271 ${optionalString (cfg.localAddress != null) '' 272 ip route add ${cfg.localAddress} dev "${name}" 273 ''} 274 ${optionalString (cfg.localAddress6 != null) '' 275 ip -6 route add ${cfg.localAddress6} dev "${name}" 276 ''} 277 ''; 278 in 279 '' 280 if [ -n "$HOST_ADDRESS" ] || [ -n "$LOCAL_ADDRESS" ] || 281 [ -n "$HOST_ADDRESS6" ] || [ -n "$LOCAL_ADDRESS6" ]; then 282 if [ -z "$HOST_BRIDGE" ]; then 283 ifaceHost=ve-$INSTANCE 284 ip link set dev "$ifaceHost" up 285 286 ${ipcall cfg "ip addr" "$HOST_ADDRESS" "hostAddress"} 287 ${ipcall cfg "ip -6 addr" "$HOST_ADDRESS6" "hostAddress6"} 288 ${ipcall cfg "ip route" "$LOCAL_ADDRESS" "localAddress"} 289 ${ipcall cfg "ip -6 route" "$LOCAL_ADDRESS6" "localAddress6"} 290 fi 291 fi 292 ${concatStringsSep "\n" (mapAttrsToList renderExtraVeth cfg.extraVeths)} 293 '' 294 ); 295 296 serviceDirectives = cfg: { 297 ExecReload = pkgs.writeScript "reload-container" '' 298 #! ${pkgs.runtimeShell} -e 299 ${nixos-container}/bin/nixos-container run "$INSTANCE" -- \ 300 bash --login -c "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/bin/switch-to-configuration test" 301 ''; 302 303 SyslogIdentifier = "container %i"; 304 305 EnvironmentFile = "-${configurationDirectory}/%i.conf"; 306 307 Type = "notify"; 308 309 RuntimeDirectory = lib.optional cfg.ephemeral "${configurationDirectoryName}/%i"; 310 311 # Note that on reboot, systemd-nspawn returns 133, so this 312 # unit will be restarted. On poweroff, it returns 0, so the 313 # unit won't be restarted. 314 RestartForceExitStatus = "133"; 315 SuccessExitStatus = "133"; 316 317 # Some containers take long to start 318 # especially when you automatically start many at once 319 TimeoutStartSec = cfg.timeoutStartSec; 320 321 Restart = "on-failure"; 322 323 Slice = "machine.slice"; 324 Delegate = true; 325 326 # We rely on systemd-nspawn turning a SIGTERM to itself into a shutdown 327 # signal (SIGRTMIN+3) for the inner container. 328 KillMode = "mixed"; 329 KillSignal = "TERM"; 330 331 DevicePolicy = "closed"; 332 DeviceAllow = map (d: "${d.node} ${d.modifier}") cfg.allowedDevices; 333 }; 334 335 kernelVersion = config.boot.kernelPackages.kernel.version; 336 337 bindMountOpts = 338 { name, ... }: 339 { 340 341 options = { 342 mountPoint = mkOption { 343 example = "/mnt/usb"; 344 type = types.str; 345 description = "Mount point on the container file system."; 346 }; 347 hostPath = mkOption { 348 default = null; 349 example = "/home/alice"; 350 type = types.nullOr types.str; 351 description = "Location of the host path to be mounted."; 352 }; 353 isReadOnly = mkOption { 354 default = true; 355 type = types.bool; 356 description = "Determine whether the mounted path will be accessed in read-only mode."; 357 }; 358 }; 359 360 config = { 361 mountPoint = mkDefault name; 362 }; 363 364 }; 365 366 allowedDeviceOpts = 367 { ... }: 368 { 369 options = { 370 node = mkOption { 371 example = "/dev/net/tun"; 372 type = types.str; 373 description = "Path to device node"; 374 }; 375 modifier = mkOption { 376 example = "rw"; 377 type = types.str; 378 description = '' 379 Device node access modifier. Takes a combination 380 `r` (read), `w` (write), and 381 `m` (mknod). See the 382 {manpage}`systemd.resource-control(5)` man page for more 383 information.''; 384 }; 385 }; 386 }; 387 388 mkBindFlag = 389 d: 390 let 391 flagPrefix = if d.isReadOnly then " --bind-ro=" else " --bind="; 392 mountstr = if d.hostPath != null then "${d.hostPath}:${d.mountPoint}" else "${d.mountPoint}"; 393 in 394 flagPrefix + mountstr; 395 396 mkBindFlags = bs: concatMapStrings mkBindFlag (lib.attrValues bs); 397 398 networkOptions = { 399 hostBridge = mkOption { 400 type = types.nullOr types.str; 401 default = null; 402 example = "br0"; 403 description = '' 404 Put the host-side of the veth-pair into the named bridge. 405 Only one of hostAddress* or hostBridge can be given. 406 ''; 407 }; 408 409 forwardPorts = mkOption { 410 type = types.listOf ( 411 types.submodule { 412 options = { 413 protocol = mkOption { 414 type = types.str; 415 default = "tcp"; 416 description = "The protocol specifier for port forwarding between host and container"; 417 }; 418 hostPort = mkOption { 419 type = types.port; 420 description = "Source port of the external interface on host"; 421 }; 422 containerPort = mkOption { 423 type = types.nullOr types.port; 424 default = null; 425 description = "Target port of container"; 426 }; 427 }; 428 } 429 ); 430 default = [ ]; 431 example = [ 432 { 433 protocol = "tcp"; 434 hostPort = 8080; 435 containerPort = 80; 436 } 437 ]; 438 description = '' 439 List of forwarded ports from host to container. Each forwarded port 440 is specified by protocol, hostPort and containerPort. By default, 441 protocol is tcp and hostPort and containerPort are assumed to be 442 the same if containerPort is not explicitly given. 443 ''; 444 }; 445 446 hostAddress = mkOption { 447 type = types.nullOr types.str; 448 default = null; 449 example = "10.231.136.1"; 450 description = '' 451 The IPv4 address assigned to the host interface. 452 (Not used when hostBridge is set.) 453 ''; 454 }; 455 456 hostAddress6 = mkOption { 457 type = types.nullOr types.str; 458 default = null; 459 example = "fc00::1"; 460 description = '' 461 The IPv6 address assigned to the host interface. 462 (Not used when hostBridge is set.) 463 ''; 464 }; 465 466 localAddress = mkOption { 467 type = types.nullOr types.str; 468 default = null; 469 example = "10.231.136.2"; 470 description = '' 471 The IPv4 address assigned to the interface in the container. 472 If a hostBridge is used, this should be given with netmask to access 473 the whole network. Otherwise the default netmask is /32 and routing is 474 set up from localAddress to hostAddress and back. 475 ''; 476 }; 477 478 localAddress6 = mkOption { 479 type = types.nullOr types.str; 480 default = null; 481 example = "fc00::2"; 482 description = '' 483 The IPv6 address assigned to the interface in the container. 484 If a hostBridge is used, this should be given with netmask to access 485 the whole network. Otherwise the default netmask is /128 and routing is 486 set up from localAddress6 to hostAddress6 and back. 487 ''; 488 }; 489 490 }; 491 492 dummyConfig = { 493 extraVeths = { }; 494 additionalCapabilities = [ ]; 495 ephemeral = false; 496 timeoutStartSec = "1min"; 497 allowedDevices = [ ]; 498 hostAddress = null; 499 hostAddress6 = null; 500 localAddress = null; 501 localAddress6 = null; 502 tmpfs = null; 503 }; 504 505in 506 507{ 508 options = { 509 510 boot.isContainer = mkOption { 511 type = types.bool; 512 default = false; 513 description = '' 514 Whether this NixOS machine is a lightweight container running 515 in another NixOS system. 516 ''; 517 }; 518 519 boot.enableContainers = mkOption { 520 type = types.bool; 521 default = config.containers != { }; 522 defaultText = lib.literalExpression "config.containers != { }"; 523 description = '' 524 Whether to enable support for NixOS containers. 525 ''; 526 }; 527 528 containers = mkOption { 529 type = types.attrsOf ( 530 types.submodule ( 531 { 532 config, 533 options, 534 name, 535 ... 536 }: 537 { 538 options = { 539 config = mkOption { 540 description = '' 541 A specification of the desired configuration of this 542 container, as a NixOS module. 543 ''; 544 type = lib.mkOptionType { 545 name = "Toplevel NixOS config"; 546 merge = 547 loc: defs: 548 (import "${toString config.nixpkgs}/nixos/lib/eval-config.nix" { 549 modules = 550 let 551 extraConfig = 552 { options, ... }: 553 { 554 _file = "module at ${__curPos.file}:${toString __curPos.line}"; 555 config = { 556 nixpkgs = 557 if options.nixpkgs ? hostPlatform then 558 { inherit (host.pkgs.stdenv) hostPlatform; } 559 else 560 { localSystem = host.pkgs.stdenv.hostPlatform; }; 561 boot.isContainer = true; 562 networking.hostName = mkDefault name; 563 networking.useDHCP = false; 564 assertions = [ 565 { 566 assertion = 567 (builtins.compareVersions kernelVersion "5.8" <= 0) 568 -> config.privateNetwork 569 -> stringLength name <= 11; 570 message = '' 571 Container name `${name}` is too long: When `privateNetwork` is enabled, container names can 572 not be longer than 11 characters, because the container's interface name is derived from it. 573 You should either make the container name shorter or upgrade to a more recent kernel that 574 supports interface altnames (i.e. at least Linux 5.8 - please see https://github.com/NixOS/nixpkgs/issues/38509 575 for details). 576 ''; 577 } 578 { 579 assertion = !lib.strings.hasInfix "_" name; 580 message = '' 581 Names containing underscores are not allowed in nixos-containers. Please rename the container '${name}' 582 ''; 583 } 584 ]; 585 }; 586 }; 587 in 588 [ extraConfig ] ++ (map (x: x.value) defs); 589 prefix = [ 590 "containers" 591 name 592 ]; 593 inherit (config) specialArgs; 594 595 # The system is inherited from the host above. 596 # Set it to null, to remove the "legacy" entrypoint's non-hermetic default. 597 system = null; 598 }).config; 599 }; 600 }; 601 602 path = mkOption { 603 type = types.path; 604 example = "/nix/var/nix/profiles/per-container/webserver"; 605 description = '' 606 As an alternative to specifying 607 {option}`config`, you can specify the path to 608 the evaluated NixOS system configuration, typically a 609 symlink to a system profile. 610 ''; 611 }; 612 613 additionalCapabilities = mkOption { 614 type = types.listOf types.str; 615 default = [ ]; 616 example = [ 617 "CAP_NET_ADMIN" 618 "CAP_MKNOD" 619 ]; 620 description = '' 621 Grant additional capabilities to the container. See the 622 {manpage}`capabilities(7)` and {manpage}`systemd-nspawn(1)` man pages for more 623 information. 624 ''; 625 }; 626 627 nixpkgs = mkOption { 628 type = types.path; 629 default = pkgs.path; 630 defaultText = literalExpression "pkgs.path"; 631 description = '' 632 A path to the nixpkgs that provide the modules, pkgs and lib for evaluating the container. 633 634 To only change the `pkgs` argument used inside the container modules, 635 set the `nixpkgs.*` options in the container {option}`config`. 636 Setting `config.nixpkgs.pkgs = pkgs` speeds up the container evaluation 637 by reusing the system pkgs, but the `nixpkgs.config` option in the 638 container config is ignored in this case. 639 ''; 640 }; 641 642 specialArgs = mkOption { 643 type = types.attrsOf types.unspecified; 644 default = { }; 645 description = '' 646 A set of special arguments to be passed to NixOS modules. 647 This will be merged into the `specialArgs` used to evaluate 648 the NixOS configurations. 649 ''; 650 }; 651 652 ephemeral = mkOption { 653 type = types.bool; 654 default = false; 655 description = '' 656 Runs container in ephemeral mode with the empty root filesystem at boot. 657 This way container will be bootstrapped from scratch on each boot 658 and will be cleaned up on shutdown leaving no traces behind. 659 Useful for completely stateless, reproducible containers. 660 661 Note that this option might require to do some adjustments to the container configuration, 662 e.g. you might want to set 663 {var}`systemd.network.networks.$interface.dhcpV4Config.ClientIdentifier` to "mac" 664 if you use {var}`macvlans` option. 665 This way dhcp client identifier will be stable between the container restarts. 666 667 Note that the container journal will not be linked to the host if this option is enabled. 668 ''; 669 }; 670 671 enableTun = mkOption { 672 type = types.bool; 673 default = false; 674 description = '' 675 Allows the container to create and setup tunnel interfaces 676 by granting the `NET_ADMIN` capability and 677 enabling access to `/dev/net/tun`. 678 ''; 679 }; 680 681 privateNetwork = mkOption { 682 type = types.bool; 683 default = false; 684 description = '' 685 Whether to give the container its own private virtual 686 Ethernet interface. The interface is called 687 `eth0`, and is hooked up to the interface 688 `ve-«container-name»` 689 on the host. If this option is not set, then the 690 container shares the network interfaces of the host, 691 and can bind to any port on any interface. 692 ''; 693 }; 694 695 networkNamespace = mkOption { 696 type = types.nullOr types.path; 697 default = null; 698 description = '' 699 Takes the path to a file representing a kernel network namespace that the container 700 shall run in. The specified path should refer to a (possibly bind-mounted) network 701 namespace file, as exposed by the kernel below /proc/<PID>/ns/net. This makes the 702 container enter the given network namespace. One of the typical use cases is to give 703 a network namespace under /run/netns created by {manpage}`ip-netns(8)`. 704 Note that this option cannot be used together with other network-related options, 705 such as --private-network or --network-interface=. 706 ''; 707 }; 708 709 privateUsers = mkOption { 710 type = types.either types.ints.u32 ( 711 types.enum [ 712 "no" 713 "identity" 714 "pick" 715 ] 716 ); 717 default = "no"; 718 description = '' 719 Whether to give the container its own private UIDs/GIDs space (user namespacing). 720 Disabled by default (`no`). 721 722 If set to a number (usually above host's UID/GID range: 65536), 723 user namespacing is enabled and the container UID/GIDs will start at that number. 724 725 If set to `identity`, mostly equivalent to `0`, this will only provide 726 process capability isolation (no UID/GID isolation, as they are the same as host). 727 728 If set to `pick`, user namespacing is enabled and the UID/GID range is automatically chosen, 729 so that no overlapping UID/GID ranges are assigned to multiple containers. 730 This is the recommanded option as it enhances container security massively and operates fully automatically in most cases. 731 732 See <https://www.freedesktop.org/software/systemd/man/latest/systemd-nspawn.html#--private-users=> for details. 733 ''; 734 }; 735 736 interfaces = mkOption { 737 type = types.listOf types.str; 738 default = [ ]; 739 example = [ 740 "eth1" 741 "eth2" 742 ]; 743 description = '' 744 The list of interfaces to be moved into the container. 745 ''; 746 }; 747 748 macvlans = mkOption { 749 type = types.listOf types.str; 750 default = [ ]; 751 example = [ 752 "eth1" 753 "eth2" 754 ]; 755 description = '' 756 The list of host interfaces from which macvlans will be 757 created. For each interface specified, a macvlan interface 758 will be created and moved to the container. 759 ''; 760 }; 761 762 extraVeths = mkOption { 763 type = 764 with types; 765 attrsOf (submodule { 766 options = networkOptions; 767 }); 768 default = { }; 769 description = '' 770 Extra veth-pairs to be created for the container. 771 ''; 772 }; 773 774 autoStart = mkOption { 775 type = types.bool; 776 default = false; 777 description = '' 778 Whether the container is automatically started at boot-time. 779 ''; 780 }; 781 782 restartIfChanged = mkOption { 783 type = types.bool; 784 default = true; 785 description = '' 786 Whether the container should be restarted during a NixOS 787 configuration switch if its definition has changed. 788 ''; 789 }; 790 791 timeoutStartSec = mkOption { 792 type = types.str; 793 default = "1min"; 794 description = '' 795 Time for the container to start. In case of a timeout, 796 the container processes get killed. 797 See {manpage}`systemd.time(7)` 798 for more information about the format. 799 ''; 800 }; 801 802 bindMounts = mkOption { 803 type = with types; attrsOf (submodule bindMountOpts); 804 default = { }; 805 example = literalExpression '' 806 { "/home" = { hostPath = "/home/alice"; 807 isReadOnly = false; }; 808 } 809 ''; 810 811 description = '' 812 An extra list of directories that is bound to the container. 813 ''; 814 }; 815 816 allowedDevices = mkOption { 817 type = with types; listOf (submodule allowedDeviceOpts); 818 default = [ ]; 819 example = [ 820 { 821 node = "/dev/net/tun"; 822 modifier = "rwm"; 823 } 824 ]; 825 description = '' 826 A list of device nodes to which the containers has access to. 827 ''; 828 }; 829 830 tmpfs = mkOption { 831 type = types.listOf types.str; 832 default = [ ]; 833 example = [ "/var" ]; 834 description = '' 835 Mounts a set of tmpfs file systems into the container. 836 Multiple paths can be specified. 837 Valid items must conform to the --tmpfs argument 838 of systemd-nspawn. See {manpage}`systemd-nspawn(1)` for details. 839 ''; 840 }; 841 842 extraFlags = mkOption { 843 type = types.listOf types.str; 844 default = [ ]; 845 example = [ "--drop-capability=CAP_SYS_CHROOT" ]; 846 description = '' 847 Extra flags passed to the systemd-nspawn command. 848 See {manpage}`systemd-nspawn(1)` for details. 849 ''; 850 }; 851 852 flake = lib.mkOption { 853 type = lib.types.nullOr lib.types.str; 854 default = null; 855 example = "github:NixOS/nixpkgs/master"; 856 description = '' 857 The Flake URI of the NixOS configuration to use for the container. 858 Replaces the option {option}`containers.<name>.path`. 859 ''; 860 }; 861 862 # Removed option. See `checkAssertion` below for the accompanying error message. 863 pkgs = mkOption { visible = false; }; 864 } 865 // networkOptions; 866 867 config = 868 let 869 # Throw an error when removed option `pkgs` is used. 870 # Because this is a submodule we cannot use `mkRemovedOptionModule` or option `assertions`. 871 optionPath = "containers.${name}.pkgs"; 872 files = showFiles options.pkgs.files; 873 checkAssertion = 874 if options.pkgs.isDefined then 875 throw '' 876 The option definition `${optionPath}' in ${files} no longer has any effect; please remove it. 877 878 Alternatively, you can use the following options: 879 - containers.${name}.nixpkgs 880 This sets the nixpkgs (and thereby the modules, pkgs and lib) that 881 are used for evaluating the container. 882 883 - containers.${name}.config.nixpkgs.pkgs 884 This only sets the `pkgs` argument used inside the container modules. 885 '' 886 else if options.config.isDefined && (options.flake.value != null) then 887 throw '' 888 The options 'containers.${name}.path' and 'containers.${name}.flake' cannot both be set. 889 '' 890 else 891 null; 892 in 893 { 894 path = builtins.seq checkAssertion mkMerge [ 895 (mkIf options.config.isDefined config.config.system.build.toplevel) 896 (mkIf (config.flake != null) "/nix/var/nix/profiles/per-container/${name}") 897 ]; 898 }; 899 } 900 ) 901 ); 902 903 default = { }; 904 example = literalExpression '' 905 { webserver = 906 { path = "/nix/var/nix/profiles/webserver"; 907 }; 908 database = 909 { config = 910 { config, pkgs, ... }: 911 { services.postgresql.enable = true; 912 services.postgresql.package = pkgs.postgresql_14; 913 914 system.stateVersion = "${lib.trivial.release}"; 915 }; 916 }; 917 } 918 ''; 919 description = '' 920 A set of NixOS system configurations to be run as lightweight 921 containers. Each container appears as a service 922 `container-«name»` 923 on the host system, allowing it to be started and stopped via 924 {command}`systemctl`. 925 ''; 926 }; 927 928 }; 929 930 config = mkMerge [ 931 { 932 warnings = 933 optional (!config.boot.enableContainers && config.containers != { }) 934 "containers.<name> is used, but boot.enableContainers is false. To use containers.<name>, set boot.enableContainers to true."; 935 936 assertions = 937 let 938 mapper = 939 name: cfg: 940 optional (cfg.networkNamespace != null && (cfg.privateNetwork || cfg.interfaces != [ ])) 941 "containers.${name}.networkNamespace is mutally exclusive to containers.${name}.privateNetwork and containers.${name}.interfaces."; 942 in 943 mkMerge (mapAttrsToList mapper config.containers); 944 } 945 946 (mkIf (config.boot.enableContainers) ( 947 let 948 unit = { 949 description = "Container '%i'"; 950 951 unitConfig.RequiresMountsFor = "${stateDirectory}/%i"; 952 953 path = [ 954 pkgs.iproute2 955 config.nix.package 956 ]; 957 958 environment = { 959 root = "${stateDirectory}/%i"; 960 INSTANCE = "%i"; 961 }; 962 963 preStart = preStartScript dummyConfig; 964 965 script = startScript dummyConfig; 966 967 postStart = postStartScript dummyConfig; 968 969 restartIfChanged = false; 970 971 serviceConfig = serviceDirectives dummyConfig; 972 }; 973 in 974 { 975 warnings = ( 976 optional 977 (config.virtualisation.containers.enable && versionOlder config.system.stateVersion "22.05") 978 '' 979 Enabling both boot.enableContainers & virtualisation.containers on system.stateVersion < 22.05 is unsupported. 980 '' 981 ); 982 983 systemd.targets.multi-user.wants = [ "machines.target" ]; 984 985 systemd.services = listToAttrs ( 986 filter (x: x.value != null) ( 987 # The generic container template used by imperative containers 988 [ 989 { 990 name = "container@"; 991 value = unit; 992 } 993 ] 994 # declarative containers 995 ++ (mapAttrsToList ( 996 name: cfg: 997 nameValuePair "container@${name}" ( 998 let 999 containerConfig = 1000 cfg 1001 // (optionalAttrs cfg.enableTun { 1002 allowedDevices = cfg.allowedDevices ++ [ 1003 { 1004 node = "/dev/net/tun"; 1005 modifier = "rwm"; 1006 } 1007 ]; 1008 additionalCapabilities = cfg.additionalCapabilities ++ [ "CAP_NET_ADMIN" ]; 1009 }) 1010 // (optionalAttrs 1011 ( 1012 !cfg.enableTun 1013 && cfg.privateNetwork 1014 && (cfg.privateUsers == "pick" || (builtins.isInt cfg.privateUsers && cfg.privateUsers > 0)) 1015 ) 1016 { 1017 allowedDevices = cfg.allowedDevices ++ [ 1018 { 1019 node = "/dev/net/tun"; 1020 modifier = "rwm"; 1021 } 1022 ]; 1023 } 1024 ); 1025 in 1026 recursiveUpdate unit { 1027 preStart = preStartScript containerConfig; 1028 script = startScript containerConfig; 1029 postStart = postStartScript containerConfig; 1030 serviceConfig = serviceDirectives containerConfig; 1031 unitConfig.RequiresMountsFor = 1032 lib.optional (!containerConfig.ephemeral) "${stateDirectory}/%i" 1033 ++ builtins.map (d: if d.hostPath != null then d.hostPath else d.mountPoint) ( 1034 builtins.attrValues cfg.bindMounts 1035 ); 1036 environment.root = 1037 if containerConfig.ephemeral then "/run/nixos-containers/%i" else "${stateDirectory}/%i"; 1038 } 1039 // (optionalAttrs containerConfig.autoStart { 1040 wantedBy = [ "machines.target" ]; 1041 wants = [ "network.target" ] ++ (map (i: "sys-subsystem-net-devices-${i}.device") cfg.interfaces); 1042 after = [ "network.target" ] ++ (map (i: "sys-subsystem-net-devices-${i}.device") cfg.interfaces); 1043 restartTriggers = [ 1044 containerConfig.path 1045 config.environment.etc."${configurationDirectoryName}/${name}.conf".source 1046 ]; 1047 restartIfChanged = containerConfig.restartIfChanged; 1048 }) 1049 ) 1050 ) config.containers) 1051 ) 1052 ); 1053 1054 # Generate a configuration file in /etc/nixos-containers for each 1055 # container so that container@.target can get the container 1056 # configuration. 1057 environment.etc = 1058 let 1059 mkPortStr = 1060 p: 1061 p.protocol 1062 + ":" 1063 + (toString p.hostPort) 1064 + ":" 1065 + (if p.containerPort == null then toString p.hostPort else toString p.containerPort); 1066 in 1067 mapAttrs' ( 1068 name: cfg: 1069 nameValuePair "${configurationDirectoryName}/${name}.conf" { 1070 text = '' 1071 ${optionalString (cfg.flake == null) '' 1072 SYSTEM_PATH=${cfg.path} 1073 ''} 1074 ${optionalString (cfg.flake != null) '' 1075 FLAKE=${cfg.flake} 1076 ''} 1077 ${optionalString cfg.privateNetwork '' 1078 PRIVATE_NETWORK=1 1079 ${optionalString (cfg.hostBridge != null) '' 1080 HOST_BRIDGE=${cfg.hostBridge} 1081 ''} 1082 ${optionalString (length cfg.forwardPorts > 0) '' 1083 HOST_PORT=${concatStringsSep "," (map mkPortStr cfg.forwardPorts)} 1084 ''} 1085 ${optionalString (cfg.hostAddress != null) '' 1086 HOST_ADDRESS=${cfg.hostAddress} 1087 ''} 1088 ${optionalString (cfg.hostAddress6 != null) '' 1089 HOST_ADDRESS6=${cfg.hostAddress6} 1090 ''} 1091 ${optionalString (cfg.localAddress != null) '' 1092 LOCAL_ADDRESS=${cfg.localAddress} 1093 ''} 1094 ${optionalString (cfg.localAddress6 != null) '' 1095 LOCAL_ADDRESS6=${cfg.localAddress6} 1096 ''} 1097 ''} 1098 ${optionalString (cfg.networkNamespace != null) '' 1099 NETWORK_NAMESPACE_PATH=${cfg.networkNamespace} 1100 ''} 1101 PRIVATE_USERS=${toString cfg.privateUsers} 1102 INTERFACES="${toString cfg.interfaces}" 1103 MACVLANS="${toString cfg.macvlans}" 1104 ${optionalString cfg.autoStart '' 1105 AUTO_START=1 1106 ''} 1107 EXTRA_NSPAWN_FLAGS="${ 1108 mkBindFlags cfg.bindMounts 1109 + optionalString (cfg.extraFlags != [ ]) (" " + concatStringsSep " " cfg.extraFlags) 1110 }" 1111 ''; 1112 } 1113 ) config.containers; 1114 1115 # Generate /etc/hosts entries for the containers. 1116 networking.extraHosts = concatStrings ( 1117 mapAttrsToList ( 1118 name: cfg: 1119 optionalString (cfg.localAddress != null) '' 1120 ${head (splitString "/" cfg.localAddress)} ${name}.containers 1121 '' 1122 ) config.containers 1123 ); 1124 1125 networking.dhcpcd.denyInterfaces = [ 1126 "ve-*" 1127 "vb-*" 1128 ]; 1129 1130 services.udev.extraRules = optionalString config.networking.networkmanager.enable '' 1131 # Don't manage interfaces created by nixos-container. 1132 ENV{INTERFACE}=="v[eb]-*", ENV{NM_UNMANAGED}="1" 1133 ''; 1134 1135 environment.systemPackages = [ 1136 nixos-container 1137 ]; 1138 1139 boot.kernelModules = [ 1140 "bridge" 1141 "macvlan" 1142 "tap" 1143 "tun" 1144 ]; 1145 } 1146 )) 1147 ]; 1148 1149 meta.buildDocsInSandbox = false; 1150}