at 25.11-pre 41 kB view raw
1{ 2 config, 3 lib, 4 pkgs, 5 ... 6}@host: 7 8with lib; 9 10let 11 12 configurationPrefix = optionalString (versionAtLeast config.system.stateVersion "22.05") "nixos-"; 13 configurationDirectoryName = "${configurationPrefix}containers"; 14 configurationDirectory = "/etc/${configurationDirectoryName}"; 15 stateDirectory = "/var/lib/${configurationPrefix}containers"; 16 17 nixos-container = pkgs.nixos-container.override { 18 inherit stateDirectory configurationDirectory; 19 }; 20 21 # The container's init script, a small wrapper around the regular 22 # NixOS stage-2 init script. 23 containerInit = ( 24 cfg: 25 let 26 renderExtraVeth = ( 27 name: cfg: '' 28 echo "Bringing ${name} up" 29 ip link set dev ${name} up 30 ${optionalString (cfg.localAddress != null) '' 31 echo "Setting ip for ${name}" 32 ip addr add ${cfg.localAddress} dev ${name} 33 ''} 34 ${optionalString (cfg.localAddress6 != null) '' 35 echo "Setting ip6 for ${name}" 36 ip -6 addr add ${cfg.localAddress6} dev ${name} 37 ''} 38 ${optionalString (cfg.hostAddress != null) '' 39 echo "Setting route to host for ${name}" 40 ip route add ${cfg.hostAddress} dev ${name} 41 ''} 42 ${optionalString (cfg.hostAddress6 != null) '' 43 echo "Setting route6 to host for ${name}" 44 ip -6 route add ${cfg.hostAddress6} dev ${name} 45 ''} 46 '' 47 ); 48 in 49 pkgs.writeScript "container-init" '' 50 #! ${pkgs.runtimeShell} -e 51 52 # Exit early if we're asked to shut down. 53 trap "exit 0" SIGRTMIN+3 54 55 # Initialise the container side of the veth pair. 56 if [ -n "$HOST_ADDRESS" ] || [ -n "$HOST_ADDRESS6" ] || 57 [ -n "$LOCAL_ADDRESS" ] || [ -n "$LOCAL_ADDRESS6" ] || 58 [ -n "$HOST_BRIDGE" ]; then 59 ip link set host0 name eth0 60 ip link set dev eth0 up 61 62 if [ -n "$LOCAL_ADDRESS" ]; then 63 ip addr add $LOCAL_ADDRESS dev eth0 64 fi 65 if [ -n "$LOCAL_ADDRESS6" ]; then 66 ip -6 addr add $LOCAL_ADDRESS6 dev eth0 67 fi 68 if [ -n "$HOST_ADDRESS" ]; then 69 ip route add $HOST_ADDRESS dev eth0 70 ip route add default via $HOST_ADDRESS 71 fi 72 if [ -n "$HOST_ADDRESS6" ]; then 73 ip -6 route add $HOST_ADDRESS6 dev eth0 74 ip -6 route add default via $HOST_ADDRESS6 75 fi 76 fi 77 78 ${concatStringsSep "\n" (mapAttrsToList renderExtraVeth cfg.extraVeths)} 79 80 # Start the regular stage 2 script. 81 # We source instead of exec to not lose an early stop signal, which is 82 # also the only _reliable_ shutdown signal we have since early stop 83 # does not execute ExecStop* commands. 84 set +e 85 . "$1" 86 '' 87 ); 88 89 nspawnExtraVethArgs = (name: cfg: "--network-veth-extra=${name}"); 90 91 startScript = cfg: '' 92 # Declare root explicitly to avoid shellcheck warnings, it comes from the env 93 declare root 94 95 mkdir -p "$root/etc" "$root/var/lib" 96 chmod 0755 "$root/etc" "$root/var/lib" 97 mkdir -p "$root/var/lib/private" "$root/root" /run/nixos-containers 98 chmod 0700 "$root/var/lib/private" "$root/root" /run/nixos-containers 99 if ! [ -e "$root/etc/os-release" ] && ! [ -h "$root/etc/os-release" ]; then 100 touch "$root/etc/os-release" 101 fi 102 103 if ! [ -e "$root/etc/machine-id" ]; then 104 touch "$root/etc/machine-id" 105 fi 106 107 mkdir -p \ 108 "/nix/var/nix/profiles/per-container/$INSTANCE" \ 109 "/nix/var/nix/gcroots/per-container/$INSTANCE" 110 chmod 0755 \ 111 "/nix/var/nix/profiles/per-container/$INSTANCE" \ 112 "/nix/var/nix/gcroots/per-container/$INSTANCE" 113 114 cp --remove-destination /etc/resolv.conf "$root/etc/resolv.conf" 115 116 declare -a extraFlags 117 118 if [ "$PRIVATE_NETWORK" = 1 ]; then 119 extraFlags+=("--private-network") 120 fi 121 122 NIX_BIND_OPT="" 123 if [ -n "$PRIVATE_USERS" ]; then 124 extraFlags+=("--private-users=$PRIVATE_USERS") 125 if [[ 126 "$PRIVATE_USERS" = "pick" 127 || ("$PRIVATE_USERS" =~ ^[[:digit:]]+$ && "$PRIVATE_USERS" -gt 0) 128 ]]; then 129 # when user namespacing is enabled, we use `idmap` mount option so that 130 # bind mounts under /nix get proper owner (and not nobody/nogroup). 131 NIX_BIND_OPT=":idmap" 132 fi 133 fi 134 135 if [ -n "$HOST_ADDRESS" ] || [ -n "$LOCAL_ADDRESS" ] || 136 [ -n "$HOST_ADDRESS6" ] || [ -n "$LOCAL_ADDRESS6" ]; then 137 extraFlags+=("--network-veth") 138 fi 139 140 if [ -n "$HOST_PORT" ]; then 141 OIFS=$IFS 142 IFS="," 143 for i in $HOST_PORT 144 do 145 extraFlags+=("--port=$i") 146 done 147 IFS=$OIFS 148 fi 149 150 if [ -n "$HOST_BRIDGE" ]; then 151 extraFlags+=("--network-bridge=$HOST_BRIDGE") 152 fi 153 154 if [ -n "$NETWORK_NAMESPACE_PATH" ]; then 155 extraFlags+=("--network-namespace-path=$NETWORK_NAMESPACE_PATH") 156 fi 157 158 extraFlags+=(${lib.escapeShellArgs (mapAttrsToList nspawnExtraVethArgs cfg.extraVeths)}) 159 160 for iface in $INTERFACES; do 161 extraFlags+=("--network-interface=$iface") 162 done 163 164 for iface in $MACVLANS; do 165 extraFlags+=("--network-macvlan=$iface") 166 done 167 168 # If the host is 64-bit and the container is 32-bit, add a 169 # --personality flag. 170 ${optionalString (pkgs.stdenv.hostPlatform.system == "x86_64-linux") '' 171 if [ "$(< "''${SYSTEM_PATH:-/nix/var/nix/profiles/per-container/$INSTANCE/system}/system")" = i686-linux ]; then 172 extraFlags+=("--personality=x86") 173 fi 174 ''} 175 176 export SYSTEMD_NSPAWN_UNIFIED_HIERARCHY=1 177 178 # Run systemd-nspawn without startup notification (we'll 179 # wait for the container systemd to signal readiness) 180 # Kill signal handling means systemd-nspawn will pass a system-halt signal 181 # to the container systemd when it receives SIGTERM for container shutdown; 182 # containerInit and stage2 have to handle this as well. 183 # TODO: fix shellcheck issue properly 184 # shellcheck disable=SC2086 185 exec ${config.systemd.package}/bin/systemd-nspawn \ 186 --keep-unit \ 187 -M "$INSTANCE" -D "$root" "''${extraFlags[@]}" \ 188 --notify-ready=yes \ 189 --kill-signal=SIGRTMIN+3 \ 190 --bind-ro=/nix/store:/nix/store$NIX_BIND_OPT \ 191 --bind-ro=/nix/var/nix/db:/nix/var/nix/db$NIX_BIND_OPT \ 192 --bind-ro=/nix/var/nix/daemon-socket:/nix/var/nix/daemon-socket$NIX_BIND_OPT \ 193 --bind="/nix/var/nix/profiles/per-container/$INSTANCE:/nix/var/nix/profiles$NIX_BIND_OPT" \ 194 --bind="/nix/var/nix/gcroots/per-container/$INSTANCE:/nix/var/nix/gcroots$NIX_BIND_OPT" \ 195 ${optionalString (!cfg.ephemeral) "--link-journal=try-guest"} \ 196 --setenv PRIVATE_NETWORK="$PRIVATE_NETWORK" \ 197 --setenv PRIVATE_USERS="$PRIVATE_USERS" \ 198 --setenv HOST_BRIDGE="$HOST_BRIDGE" \ 199 --setenv HOST_ADDRESS="$HOST_ADDRESS" \ 200 --setenv LOCAL_ADDRESS="$LOCAL_ADDRESS" \ 201 --setenv HOST_ADDRESS6="$HOST_ADDRESS6" \ 202 --setenv LOCAL_ADDRESS6="$LOCAL_ADDRESS6" \ 203 --setenv HOST_PORT="$HOST_PORT" \ 204 --setenv PATH="$PATH" \ 205 ${optionalString cfg.ephemeral "--ephemeral"} \ 206 ${ 207 optionalString ( 208 cfg.additionalCapabilities != null && cfg.additionalCapabilities != [ ] 209 ) ''--capability="${concatStringsSep "," cfg.additionalCapabilities}"'' 210 } \ 211 ${ 212 optionalString ( 213 cfg.tmpfs != null && cfg.tmpfs != [ ] 214 ) ''--tmpfs=${concatStringsSep " --tmpfs=" cfg.tmpfs}'' 215 } \ 216 $EXTRA_NSPAWN_FLAGS \ 217 ${containerInit cfg} "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/init" 218 ''; 219 220 preStartScript = cfg: '' 221 # Clean up existing machined registration and interfaces. 222 machinectl terminate "$INSTANCE" 2> /dev/null || true 223 224 if [ -n "$HOST_ADDRESS" ] || [ -n "$LOCAL_ADDRESS" ] || 225 [ -n "$HOST_ADDRESS6" ] || [ -n "$LOCAL_ADDRESS6" ]; then 226 ip link del dev "ve-$INSTANCE" 2> /dev/null || true 227 ip link del dev "vb-$INSTANCE" 2> /dev/null || true 228 fi 229 230 ${concatStringsSep "\n" ( 231 mapAttrsToList (name: cfg: "ip link del dev ${name} 2> /dev/null || true ") cfg.extraVeths 232 )} 233 ''; 234 235 postStartScript = ( 236 cfg: 237 let 238 ipcall = 239 cfg: ipcmd: variable: attribute: 240 if cfg.${attribute} == null then 241 '' 242 if [ -n "${variable}" ]; then 243 ${ipcmd} add "${variable}" dev "$ifaceHost" 244 fi 245 '' 246 else 247 ''${ipcmd} add ${cfg.${attribute}} dev "$ifaceHost"''; 248 renderExtraVeth = 249 name: cfg: 250 if cfg.hostBridge != null then 251 '' 252 # Add ${name} to bridge ${cfg.hostBridge} 253 ip link set dev "${name}" master "${cfg.hostBridge}" up 254 '' 255 else 256 '' 257 echo "Bring ${name} up" 258 ip link set dev "${name}" up 259 # Set IPs and routes for ${name} 260 ${optionalString (cfg.hostAddress != null) '' 261 ip addr add ${cfg.hostAddress} dev "${name}" 262 ''} 263 ${optionalString (cfg.hostAddress6 != null) '' 264 ip -6 addr add ${cfg.hostAddress6} dev "${name}" 265 ''} 266 ${optionalString (cfg.localAddress != null) '' 267 ip route add ${cfg.localAddress} dev "${name}" 268 ''} 269 ${optionalString (cfg.localAddress6 != null) '' 270 ip -6 route add ${cfg.localAddress6} dev "${name}" 271 ''} 272 ''; 273 in 274 '' 275 if [ -n "$HOST_ADDRESS" ] || [ -n "$LOCAL_ADDRESS" ] || 276 [ -n "$HOST_ADDRESS6" ] || [ -n "$LOCAL_ADDRESS6" ]; then 277 if [ -z "$HOST_BRIDGE" ]; then 278 ifaceHost=ve-$INSTANCE 279 ip link set dev "$ifaceHost" up 280 281 ${ipcall cfg "ip addr" "$HOST_ADDRESS" "hostAddress"} 282 ${ipcall cfg "ip -6 addr" "$HOST_ADDRESS6" "hostAddress6"} 283 ${ipcall cfg "ip route" "$LOCAL_ADDRESS" "localAddress"} 284 ${ipcall cfg "ip -6 route" "$LOCAL_ADDRESS6" "localAddress6"} 285 fi 286 fi 287 ${concatStringsSep "\n" (mapAttrsToList renderExtraVeth cfg.extraVeths)} 288 '' 289 ); 290 291 serviceDirectives = cfg: { 292 ExecReload = pkgs.writeScript "reload-container" '' 293 #! ${pkgs.runtimeShell} -e 294 ${nixos-container}/bin/nixos-container run "$INSTANCE" -- \ 295 bash --login -c "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/bin/switch-to-configuration test" 296 ''; 297 298 SyslogIdentifier = "container %i"; 299 300 EnvironmentFile = "-${configurationDirectory}/%i.conf"; 301 302 Type = "notify"; 303 304 RuntimeDirectory = lib.optional cfg.ephemeral "${configurationDirectoryName}/%i"; 305 306 # Note that on reboot, systemd-nspawn returns 133, so this 307 # unit will be restarted. On poweroff, it returns 0, so the 308 # unit won't be restarted. 309 RestartForceExitStatus = "133"; 310 SuccessExitStatus = "133"; 311 312 # Some containers take long to start 313 # especially when you automatically start many at once 314 TimeoutStartSec = cfg.timeoutStartSec; 315 316 Restart = "on-failure"; 317 318 Slice = "machine.slice"; 319 Delegate = true; 320 321 # We rely on systemd-nspawn turning a SIGTERM to itself into a shutdown 322 # signal (SIGRTMIN+3) for the inner container. 323 KillMode = "mixed"; 324 KillSignal = "TERM"; 325 326 DevicePolicy = "closed"; 327 DeviceAllow = map (d: "${d.node} ${d.modifier}") cfg.allowedDevices; 328 }; 329 330 kernelVersion = config.boot.kernelPackages.kernel.version; 331 332 bindMountOpts = 333 { name, ... }: 334 { 335 336 options = { 337 mountPoint = mkOption { 338 example = "/mnt/usb"; 339 type = types.str; 340 description = "Mount point on the container file system."; 341 }; 342 hostPath = mkOption { 343 default = null; 344 example = "/home/alice"; 345 type = types.nullOr types.str; 346 description = "Location of the host path to be mounted."; 347 }; 348 isReadOnly = mkOption { 349 default = true; 350 type = types.bool; 351 description = "Determine whether the mounted path will be accessed in read-only mode."; 352 }; 353 }; 354 355 config = { 356 mountPoint = mkDefault name; 357 }; 358 359 }; 360 361 allowedDeviceOpts = 362 { ... }: 363 { 364 options = { 365 node = mkOption { 366 example = "/dev/net/tun"; 367 type = types.str; 368 description = "Path to device node"; 369 }; 370 modifier = mkOption { 371 example = "rw"; 372 type = types.str; 373 description = '' 374 Device node access modifier. Takes a combination 375 `r` (read), `w` (write), and 376 `m` (mknod). See the 377 {manpage}`systemd.resource-control(5)` man page for more 378 information.''; 379 }; 380 }; 381 }; 382 383 mkBindFlag = 384 d: 385 let 386 flagPrefix = if d.isReadOnly then " --bind-ro=" else " --bind="; 387 mountstr = if d.hostPath != null then "${d.hostPath}:${d.mountPoint}" else "${d.mountPoint}"; 388 in 389 flagPrefix + mountstr; 390 391 mkBindFlags = bs: concatMapStrings mkBindFlag (lib.attrValues bs); 392 393 networkOptions = { 394 hostBridge = mkOption { 395 type = types.nullOr types.str; 396 default = null; 397 example = "br0"; 398 description = '' 399 Put the host-side of the veth-pair into the named bridge. 400 Only one of hostAddress* or hostBridge can be given. 401 ''; 402 }; 403 404 forwardPorts = mkOption { 405 type = types.listOf ( 406 types.submodule { 407 options = { 408 protocol = mkOption { 409 type = types.str; 410 default = "tcp"; 411 description = "The protocol specifier for port forwarding between host and container"; 412 }; 413 hostPort = mkOption { 414 type = types.int; 415 description = "Source port of the external interface on host"; 416 }; 417 containerPort = mkOption { 418 type = types.nullOr types.int; 419 default = null; 420 description = "Target port of container"; 421 }; 422 }; 423 } 424 ); 425 default = [ ]; 426 example = [ 427 { 428 protocol = "tcp"; 429 hostPort = 8080; 430 containerPort = 80; 431 } 432 ]; 433 description = '' 434 List of forwarded ports from host to container. Each forwarded port 435 is specified by protocol, hostPort and containerPort. By default, 436 protocol is tcp and hostPort and containerPort are assumed to be 437 the same if containerPort is not explicitly given. 438 ''; 439 }; 440 441 hostAddress = mkOption { 442 type = types.nullOr types.str; 443 default = null; 444 example = "10.231.136.1"; 445 description = '' 446 The IPv4 address assigned to the host interface. 447 (Not used when hostBridge is set.) 448 ''; 449 }; 450 451 hostAddress6 = mkOption { 452 type = types.nullOr types.str; 453 default = null; 454 example = "fc00::1"; 455 description = '' 456 The IPv6 address assigned to the host interface. 457 (Not used when hostBridge is set.) 458 ''; 459 }; 460 461 localAddress = mkOption { 462 type = types.nullOr types.str; 463 default = null; 464 example = "10.231.136.2"; 465 description = '' 466 The IPv4 address assigned to the interface in the container. 467 If a hostBridge is used, this should be given with netmask to access 468 the whole network. Otherwise the default netmask is /32 and routing is 469 set up from localAddress to hostAddress and back. 470 ''; 471 }; 472 473 localAddress6 = mkOption { 474 type = types.nullOr types.str; 475 default = null; 476 example = "fc00::2"; 477 description = '' 478 The IPv6 address assigned to the interface in the container. 479 If a hostBridge is used, this should be given with netmask to access 480 the whole network. Otherwise the default netmask is /128 and routing is 481 set up from localAddress6 to hostAddress6 and back. 482 ''; 483 }; 484 485 }; 486 487 dummyConfig = { 488 extraVeths = { }; 489 additionalCapabilities = [ ]; 490 ephemeral = false; 491 timeoutStartSec = "1min"; 492 allowedDevices = [ ]; 493 hostAddress = null; 494 hostAddress6 = null; 495 localAddress = null; 496 localAddress6 = null; 497 tmpfs = null; 498 }; 499 500in 501 502{ 503 options = { 504 505 boot.isContainer = mkOption { 506 type = types.bool; 507 default = false; 508 description = '' 509 Whether this NixOS machine is a lightweight container running 510 in another NixOS system. 511 ''; 512 }; 513 514 boot.enableContainers = mkOption { 515 type = types.bool; 516 default = true; 517 description = '' 518 Whether to enable support for NixOS containers. Defaults to true 519 (at no cost if containers are not actually used). 520 ''; 521 }; 522 523 containers = mkOption { 524 type = types.attrsOf ( 525 types.submodule ( 526 { 527 config, 528 options, 529 name, 530 ... 531 }: 532 { 533 options = { 534 config = mkOption { 535 description = '' 536 A specification of the desired configuration of this 537 container, as a NixOS module. 538 ''; 539 type = lib.mkOptionType { 540 name = "Toplevel NixOS config"; 541 merge = 542 loc: defs: 543 (import "${toString config.nixpkgs}/nixos/lib/eval-config.nix" { 544 modules = 545 let 546 extraConfig = 547 { options, ... }: 548 { 549 _file = "module at ${__curPos.file}:${toString __curPos.line}"; 550 config = { 551 nixpkgs = 552 if options.nixpkgs ? hostPlatform then 553 { inherit (host.pkgs.stdenv) hostPlatform; } 554 else 555 { localSystem = host.pkgs.stdenv.hostPlatform; }; 556 boot.isContainer = true; 557 networking.hostName = mkDefault name; 558 networking.useDHCP = false; 559 assertions = [ 560 { 561 assertion = 562 (builtins.compareVersions kernelVersion "5.8" <= 0) 563 -> config.privateNetwork 564 -> stringLength name <= 11; 565 message = '' 566 Container name `${name}` is too long: When `privateNetwork` is enabled, container names can 567 not be longer than 11 characters, because the container's interface name is derived from it. 568 You should either make the container name shorter or upgrade to a more recent kernel that 569 supports interface altnames (i.e. at least Linux 5.8 - please see https://github.com/NixOS/nixpkgs/issues/38509 570 for details). 571 ''; 572 } 573 { 574 assertion = !lib.strings.hasInfix "_" name; 575 message = '' 576 Names containing underscores are not allowed in nixos-containers. Please rename the container '${name}' 577 ''; 578 } 579 ]; 580 }; 581 }; 582 in 583 [ extraConfig ] ++ (map (x: x.value) defs); 584 prefix = [ 585 "containers" 586 name 587 ]; 588 inherit (config) specialArgs; 589 590 # The system is inherited from the host above. 591 # Set it to null, to remove the "legacy" entrypoint's non-hermetic default. 592 system = null; 593 }).config; 594 }; 595 }; 596 597 path = mkOption { 598 type = types.path; 599 example = "/nix/var/nix/profiles/per-container/webserver"; 600 description = '' 601 As an alternative to specifying 602 {option}`config`, you can specify the path to 603 the evaluated NixOS system configuration, typically a 604 symlink to a system profile. 605 ''; 606 }; 607 608 additionalCapabilities = mkOption { 609 type = types.listOf types.str; 610 default = [ ]; 611 example = [ 612 "CAP_NET_ADMIN" 613 "CAP_MKNOD" 614 ]; 615 description = '' 616 Grant additional capabilities to the container. See the 617 {manpage}`capabilities(7)` and {manpage}`systemd-nspawn(1)` man pages for more 618 information. 619 ''; 620 }; 621 622 nixpkgs = mkOption { 623 type = types.path; 624 default = pkgs.path; 625 defaultText = literalExpression "pkgs.path"; 626 description = '' 627 A path to the nixpkgs that provide the modules, pkgs and lib for evaluating the container. 628 629 To only change the `pkgs` argument used inside the container modules, 630 set the `nixpkgs.*` options in the container {option}`config`. 631 Setting `config.nixpkgs.pkgs = pkgs` speeds up the container evaluation 632 by reusing the system pkgs, but the `nixpkgs.config` option in the 633 container config is ignored in this case. 634 ''; 635 }; 636 637 specialArgs = mkOption { 638 type = types.attrsOf types.unspecified; 639 default = { }; 640 description = '' 641 A set of special arguments to be passed to NixOS modules. 642 This will be merged into the `specialArgs` used to evaluate 643 the NixOS configurations. 644 ''; 645 }; 646 647 ephemeral = mkOption { 648 type = types.bool; 649 default = false; 650 description = '' 651 Runs container in ephemeral mode with the empty root filesystem at boot. 652 This way container will be bootstrapped from scratch on each boot 653 and will be cleaned up on shutdown leaving no traces behind. 654 Useful for completely stateless, reproducible containers. 655 656 Note that this option might require to do some adjustments to the container configuration, 657 e.g. you might want to set 658 {var}`systemd.network.networks.$interface.dhcpV4Config.ClientIdentifier` to "mac" 659 if you use {var}`macvlans` option. 660 This way dhcp client identifier will be stable between the container restarts. 661 662 Note that the container journal will not be linked to the host if this option is enabled. 663 ''; 664 }; 665 666 enableTun = mkOption { 667 type = types.bool; 668 default = false; 669 description = '' 670 Allows the container to create and setup tunnel interfaces 671 by granting the `NET_ADMIN` capability and 672 enabling access to `/dev/net/tun`. 673 ''; 674 }; 675 676 privateNetwork = mkOption { 677 type = types.bool; 678 default = false; 679 description = '' 680 Whether to give the container its own private virtual 681 Ethernet interface. The interface is called 682 `eth0`, and is hooked up to the interface 683 `ve-«container-name»` 684 on the host. If this option is not set, then the 685 container shares the network interfaces of the host, 686 and can bind to any port on any interface. 687 ''; 688 }; 689 690 networkNamespace = mkOption { 691 type = types.nullOr types.path; 692 default = null; 693 description = '' 694 Takes the path to a file representing a kernel network namespace that the container 695 shall run in. The specified path should refer to a (possibly bind-mounted) network 696 namespace file, as exposed by the kernel below /proc/<PID>/ns/net. This makes the 697 container enter the given network namespace. One of the typical use cases is to give 698 a network namespace under /run/netns created by {manpage}`ip-netns(8)`. 699 Note that this option cannot be used together with other network-related options, 700 such as --private-network or --network-interface=. 701 ''; 702 }; 703 704 privateUsers = mkOption { 705 type = types.either types.ints.u32 ( 706 types.enum [ 707 "no" 708 "identity" 709 "pick" 710 ] 711 ); 712 default = "no"; 713 description = '' 714 Whether to give the container its own private UIDs/GIDs space (user namespacing). 715 Disabled by default (`no`). 716 717 If set to a number (usually above host's UID/GID range: 65536), 718 user namespacing is enabled and the container UID/GIDs will start at that number. 719 720 If set to `identity`, mostly equivalent to `0`, this will only provide 721 process capability isolation (no UID/GID isolation, as they are the same as host). 722 723 If set to `pick`, user namespacing is enabled and the UID/GID range is automatically chosen, 724 so that no overlapping UID/GID ranges are assigned to multiple containers. 725 This is the recommanded option as it enhances container security massively and operates fully automatically in most cases. 726 727 See https://www.freedesktop.org/software/systemd/man/latest/systemd-nspawn.html#--private-users= for details. 728 ''; 729 }; 730 731 interfaces = mkOption { 732 type = types.listOf types.str; 733 default = [ ]; 734 example = [ 735 "eth1" 736 "eth2" 737 ]; 738 description = '' 739 The list of interfaces to be moved into the container. 740 ''; 741 }; 742 743 macvlans = mkOption { 744 type = types.listOf types.str; 745 default = [ ]; 746 example = [ 747 "eth1" 748 "eth2" 749 ]; 750 description = '' 751 The list of host interfaces from which macvlans will be 752 created. For each interface specified, a macvlan interface 753 will be created and moved to the container. 754 ''; 755 }; 756 757 extraVeths = mkOption { 758 type = 759 with types; 760 attrsOf (submodule { 761 options = networkOptions; 762 }); 763 default = { }; 764 description = '' 765 Extra veth-pairs to be created for the container. 766 ''; 767 }; 768 769 autoStart = mkOption { 770 type = types.bool; 771 default = false; 772 description = '' 773 Whether the container is automatically started at boot-time. 774 ''; 775 }; 776 777 restartIfChanged = mkOption { 778 type = types.bool; 779 default = true; 780 description = '' 781 Whether the container should be restarted during a NixOS 782 configuration switch if its definition has changed. 783 ''; 784 }; 785 786 timeoutStartSec = mkOption { 787 type = types.str; 788 default = "1min"; 789 description = '' 790 Time for the container to start. In case of a timeout, 791 the container processes get killed. 792 See {manpage}`systemd.time(7)` 793 for more information about the format. 794 ''; 795 }; 796 797 bindMounts = mkOption { 798 type = with types; attrsOf (submodule bindMountOpts); 799 default = { }; 800 example = literalExpression '' 801 { "/home" = { hostPath = "/home/alice"; 802 isReadOnly = false; }; 803 } 804 ''; 805 806 description = '' 807 An extra list of directories that is bound to the container. 808 ''; 809 }; 810 811 allowedDevices = mkOption { 812 type = with types; listOf (submodule allowedDeviceOpts); 813 default = [ ]; 814 example = [ 815 { 816 node = "/dev/net/tun"; 817 modifier = "rwm"; 818 } 819 ]; 820 description = '' 821 A list of device nodes to which the containers has access to. 822 ''; 823 }; 824 825 tmpfs = mkOption { 826 type = types.listOf types.str; 827 default = [ ]; 828 example = [ "/var" ]; 829 description = '' 830 Mounts a set of tmpfs file systems into the container. 831 Multiple paths can be specified. 832 Valid items must conform to the --tmpfs argument 833 of systemd-nspawn. See {manpage}`systemd-nspawn(1)` for details. 834 ''; 835 }; 836 837 extraFlags = mkOption { 838 type = types.listOf types.str; 839 default = [ ]; 840 example = [ "--drop-capability=CAP_SYS_CHROOT" ]; 841 description = '' 842 Extra flags passed to the systemd-nspawn command. 843 See {manpage}`systemd-nspawn(1)` for details. 844 ''; 845 }; 846 847 # Removed option. See `checkAssertion` below for the accompanying error message. 848 pkgs = mkOption { visible = false; }; 849 } // networkOptions; 850 851 config = 852 let 853 # Throw an error when removed option `pkgs` is used. 854 # Because this is a submodule we cannot use `mkRemovedOptionModule` or option `assertions`. 855 optionPath = "containers.${name}.pkgs"; 856 files = showFiles options.pkgs.files; 857 checkAssertion = 858 if options.pkgs.isDefined then 859 throw '' 860 The option definition `${optionPath}' in ${files} no longer has any effect; please remove it. 861 862 Alternatively, you can use the following options: 863 - containers.${name}.nixpkgs 864 This sets the nixpkgs (and thereby the modules, pkgs and lib) that 865 are used for evaluating the container. 866 867 - containers.${name}.config.nixpkgs.pkgs 868 This only sets the `pkgs` argument used inside the container modules. 869 '' 870 else 871 null; 872 in 873 { 874 path = 875 builtins.seq checkAssertion mkIf options.config.isDefined 876 config.config.system.build.toplevel; 877 }; 878 } 879 ) 880 ); 881 882 default = { }; 883 example = literalExpression '' 884 { webserver = 885 { path = "/nix/var/nix/profiles/webserver"; 886 }; 887 database = 888 { config = 889 { config, pkgs, ... }: 890 { services.postgresql.enable = true; 891 services.postgresql.package = pkgs.postgresql_14; 892 893 system.stateVersion = "${lib.trivial.release}"; 894 }; 895 }; 896 } 897 ''; 898 description = '' 899 A set of NixOS system configurations to be run as lightweight 900 containers. Each container appears as a service 901 `container-«name»` 902 on the host system, allowing it to be started and stopped via 903 {command}`systemctl`. 904 ''; 905 }; 906 907 }; 908 909 config = mkMerge [ 910 { 911 warnings = 912 optional (!config.boot.enableContainers && config.containers != { }) 913 "containers.<name> is used, but boot.enableContainers is false. To use containers.<name>, set boot.enableContainers to true."; 914 915 assertions = 916 let 917 mapper = 918 name: cfg: 919 optional (cfg.networkNamespace != null && (cfg.privateNetwork || cfg.interfaces != [ ])) 920 "containers.${name}.networkNamespace is mutally exclusive to containers.${name}.privateNetwork and containers.${name}.interfaces."; 921 in 922 mkMerge (mapAttrsToList mapper config.containers); 923 } 924 925 (mkIf (config.boot.enableContainers) ( 926 let 927 unit = { 928 description = "Container '%i'"; 929 930 unitConfig.RequiresMountsFor = "${stateDirectory}/%i"; 931 932 path = [ pkgs.iproute2 ]; 933 934 environment = { 935 root = "${stateDirectory}/%i"; 936 INSTANCE = "%i"; 937 }; 938 939 preStart = preStartScript dummyConfig; 940 941 script = startScript dummyConfig; 942 943 postStart = postStartScript dummyConfig; 944 945 restartIfChanged = false; 946 947 serviceConfig = serviceDirectives dummyConfig; 948 }; 949 in 950 { 951 warnings = ( 952 optional 953 (config.virtualisation.containers.enable && versionOlder config.system.stateVersion "22.05") 954 '' 955 Enabling both boot.enableContainers & virtualisation.containers on system.stateVersion < 22.05 is unsupported. 956 '' 957 ); 958 959 systemd.targets.multi-user.wants = [ "machines.target" ]; 960 961 systemd.services = listToAttrs ( 962 filter (x: x.value != null) ( 963 # The generic container template used by imperative containers 964 [ 965 { 966 name = "container@"; 967 value = unit; 968 } 969 ] 970 # declarative containers 971 ++ (mapAttrsToList ( 972 name: cfg: 973 nameValuePair "container@${name}" ( 974 let 975 containerConfig = 976 cfg 977 // (optionalAttrs cfg.enableTun { 978 allowedDevices = cfg.allowedDevices ++ [ 979 { 980 node = "/dev/net/tun"; 981 modifier = "rwm"; 982 } 983 ]; 984 additionalCapabilities = cfg.additionalCapabilities ++ [ "CAP_NET_ADMIN" ]; 985 }) 986 // (optionalAttrs 987 ( 988 !cfg.enableTun 989 && cfg.privateNetwork 990 && (cfg.privateUsers == "pick" || (builtins.isInt cfg.privateUsers && cfg.privateUsers > 0)) 991 ) 992 { 993 allowedDevices = cfg.allowedDevices ++ [ 994 { 995 node = "/dev/net/tun"; 996 modifier = "rwm"; 997 } 998 ]; 999 } 1000 ); 1001 in 1002 recursiveUpdate unit { 1003 preStart = preStartScript containerConfig; 1004 script = startScript containerConfig; 1005 postStart = postStartScript containerConfig; 1006 serviceConfig = serviceDirectives containerConfig; 1007 unitConfig.RequiresMountsFor = 1008 lib.optional (!containerConfig.ephemeral) "${stateDirectory}/%i" 1009 ++ builtins.map (d: if d.hostPath != null then d.hostPath else d.mountPoint) ( 1010 builtins.attrValues cfg.bindMounts 1011 ); 1012 environment.root = 1013 if containerConfig.ephemeral then "/run/nixos-containers/%i" else "${stateDirectory}/%i"; 1014 } 1015 // (optionalAttrs containerConfig.autoStart { 1016 wantedBy = [ "machines.target" ]; 1017 wants = [ "network.target" ] ++ (map (i: "sys-subsystem-net-devices-${i}.device") cfg.interfaces); 1018 after = [ "network.target" ] ++ (map (i: "sys-subsystem-net-devices-${i}.device") cfg.interfaces); 1019 restartTriggers = [ 1020 containerConfig.path 1021 config.environment.etc."${configurationDirectoryName}/${name}.conf".source 1022 ]; 1023 restartIfChanged = containerConfig.restartIfChanged; 1024 }) 1025 ) 1026 ) config.containers) 1027 ) 1028 ); 1029 1030 # Generate a configuration file in /etc/nixos-containers for each 1031 # container so that container@.target can get the container 1032 # configuration. 1033 environment.etc = 1034 let 1035 mkPortStr = 1036 p: 1037 p.protocol 1038 + ":" 1039 + (toString p.hostPort) 1040 + ":" 1041 + (if p.containerPort == null then toString p.hostPort else toString p.containerPort); 1042 in 1043 mapAttrs' ( 1044 name: cfg: 1045 nameValuePair "${configurationDirectoryName}/${name}.conf" { 1046 text = '' 1047 SYSTEM_PATH=${cfg.path} 1048 ${optionalString cfg.privateNetwork '' 1049 PRIVATE_NETWORK=1 1050 ${optionalString (cfg.hostBridge != null) '' 1051 HOST_BRIDGE=${cfg.hostBridge} 1052 ''} 1053 ${optionalString (length cfg.forwardPorts > 0) '' 1054 HOST_PORT=${concatStringsSep "," (map mkPortStr cfg.forwardPorts)} 1055 ''} 1056 ${optionalString (cfg.hostAddress != null) '' 1057 HOST_ADDRESS=${cfg.hostAddress} 1058 ''} 1059 ${optionalString (cfg.hostAddress6 != null) '' 1060 HOST_ADDRESS6=${cfg.hostAddress6} 1061 ''} 1062 ${optionalString (cfg.localAddress != null) '' 1063 LOCAL_ADDRESS=${cfg.localAddress} 1064 ''} 1065 ${optionalString (cfg.localAddress6 != null) '' 1066 LOCAL_ADDRESS6=${cfg.localAddress6} 1067 ''} 1068 ''} 1069 ${optionalString (cfg.networkNamespace != null) '' 1070 NETWORK_NAMESPACE_PATH=${cfg.networkNamespace} 1071 ''} 1072 PRIVATE_USERS=${toString cfg.privateUsers} 1073 INTERFACES="${toString cfg.interfaces}" 1074 MACVLANS="${toString cfg.macvlans}" 1075 ${optionalString cfg.autoStart '' 1076 AUTO_START=1 1077 ''} 1078 EXTRA_NSPAWN_FLAGS="${ 1079 mkBindFlags cfg.bindMounts 1080 + optionalString (cfg.extraFlags != [ ]) (" " + concatStringsSep " " cfg.extraFlags) 1081 }" 1082 ''; 1083 } 1084 ) config.containers; 1085 1086 # Generate /etc/hosts entries for the containers. 1087 networking.extraHosts = concatStrings ( 1088 mapAttrsToList ( 1089 name: cfg: 1090 optionalString (cfg.localAddress != null) '' 1091 ${head (splitString "/" cfg.localAddress)} ${name}.containers 1092 '' 1093 ) config.containers 1094 ); 1095 1096 networking.dhcpcd.denyInterfaces = [ 1097 "ve-*" 1098 "vb-*" 1099 ]; 1100 1101 services.udev.extraRules = optionalString config.networking.networkmanager.enable '' 1102 # Don't manage interfaces created by nixos-container. 1103 ENV{INTERFACE}=="v[eb]-*", ENV{NM_UNMANAGED}="1" 1104 ''; 1105 1106 environment.systemPackages = [ 1107 nixos-container 1108 ]; 1109 1110 boot.kernelModules = [ 1111 "bridge" 1112 "macvlan" 1113 "tap" 1114 "tun" 1115 ]; 1116 } 1117 )) 1118 ]; 1119 1120 meta.buildDocsInSandbox = false; 1121}