at 16.09-beta 19 kB view raw
1{ config, lib, pkgs, ... }: 2 3with lib; 4 5let 6 7 # The container's init script, a small wrapper around the regular 8 # NixOS stage-2 init script. 9 containerInit = (cfg: 10 let 11 renderExtraVeth = (name: cfg: 12 '' 13 echo "Bringing ${name} up" 14 ip link set dev ${name} up 15 ${optionalString (cfg . "localAddress" or null != null) '' 16 echo "Setting ip for ${name}" 17 ip addr add ${cfg . "localAddress"} dev ${name} 18 ''} 19 ${optionalString (cfg . "localAddress6" or null != null) '' 20 echo "Setting ip6 for ${name}" 21 ip -6 addr add ${cfg . "localAddress6"} dev ${name} 22 ''} 23 ${optionalString (cfg . "hostAddress" or null != null) '' 24 echo "Setting route to host for ${name}" 25 ip route add ${cfg . "hostAddress"} dev ${name} 26 ''} 27 ${optionalString (cfg . "hostAddress6" or null != null) '' 28 echo "Setting route6 to host for ${name}" 29 ip -6 route add ${cfg . "hostAddress6"} dev ${name} 30 ''} 31 '' 32 ); 33 in 34 pkgs.writeScript "container-init" 35 '' 36 #! ${pkgs.stdenv.shell} -e 37 38 # Initialise the container side of the veth pair. 39 if [ "$PRIVATE_NETWORK" = 1 ]; then 40 41 ip link set host0 name eth0 42 ip link set dev eth0 up 43 44 if [ -n "$LOCAL_ADDRESS" ]; then 45 ip addr add $LOCAL_ADDRESS dev eth0 46 fi 47 if [ -n "$LOCAL_ADDRESS6" ]; then 48 ip -6 addr add $LOCAL_ADDRESS6 dev eth0 49 fi 50 if [ -n "$HOST_ADDRESS" ]; then 51 ip route add $HOST_ADDRESS dev eth0 52 ip route add default via $HOST_ADDRESS 53 fi 54 if [ -n "$HOST_ADDRESS6" ]; then 55 ip -6 route add $HOST_ADDRESS6 dev eth0 56 ip -6 route add default via $HOST_ADDRESS6 57 fi 58 59 ${concatStringsSep "\n" (mapAttrsToList renderExtraVeth cfg . "extraVeths" or {})} 60 ip a 61 ip r 62 fi 63 64 # Start the regular stage 1 script. 65 exec "$1" 66 '' 67 ); 68 69 nspawnExtraVethArgs = (name: cfg: "--network-veth-extra=${name}"); 70 startScript = (cfg: 71 '' 72 mkdir -p -m 0755 "$root/etc" "$root/var/lib" 73 mkdir -p -m 0700 "$root/var/lib/private" "$root/root" /run/containers 74 if ! [ -e "$root/etc/os-release" ]; then 75 touch "$root/etc/os-release" 76 fi 77 78 if ! [ -e "$root/etc/machine-id" ]; then 79 touch "$root/etc/machine-id" 80 fi 81 82 mkdir -p -m 0755 \ 83 "/nix/var/nix/profiles/per-container/$INSTANCE" \ 84 "/nix/var/nix/gcroots/per-container/$INSTANCE" 85 86 cp --remove-destination /etc/resolv.conf "$root/etc/resolv.conf" 87 88 if [ "$PRIVATE_NETWORK" = 1 ]; then 89 extraFlags+=" --network-veth" 90 if [ -n "$HOST_BRIDGE" ]; then 91 extraFlags+=" --network-bridge=$HOST_BRIDGE" 92 fi 93 fi 94 95 ${if cfg . "extraVeths" or null != null then 96 ''extraFlags+=" ${concatStringsSep " " (mapAttrsToList nspawnExtraVethArgs cfg . "extraVeths" or {})}"'' 97 else 98 ''# No extra veth pairs to create'' 99 } 100 101 for iface in $INTERFACES; do 102 extraFlags+=" --network-interface=$iface" 103 done 104 105 for iface in $MACVLANS; do 106 extraFlags+=" --network-macvlan=$iface" 107 done 108 109 # If the host is 64-bit and the container is 32-bit, add a 110 # --personality flag. 111 ${optionalString (config.nixpkgs.system == "x86_64-linux") '' 112 if [ "$(< ''${SYSTEM_PATH:-/nix/var/nix/profiles/per-container/$INSTANCE/system}/system)" = i686-linux ]; then 113 extraFlags+=" --personality=x86" 114 fi 115 ''} 116 117 # Run systemd-nspawn without startup notification (we'll 118 # wait for the container systemd to signal readiness). 119 EXIT_ON_REBOOT=1 \ 120 exec ${config.systemd.package}/bin/systemd-nspawn \ 121 --keep-unit \ 122 -M "$INSTANCE" -D "$root" $extraFlags \ 123 $EXTRA_NSPAWN_FLAGS \ 124 --notify-ready=yes \ 125 --bind-ro=/nix/store \ 126 --bind-ro=/nix/var/nix/db \ 127 --bind-ro=/nix/var/nix/daemon-socket \ 128 --bind="/nix/var/nix/profiles/per-container/$INSTANCE:/nix/var/nix/profiles" \ 129 --bind="/nix/var/nix/gcroots/per-container/$INSTANCE:/nix/var/nix/gcroots" \ 130 --setenv PRIVATE_NETWORK="$PRIVATE_NETWORK" \ 131 --setenv HOST_BRIDGE="$HOST_BRIDGE" \ 132 --setenv HOST_ADDRESS="$HOST_ADDRESS" \ 133 --setenv LOCAL_ADDRESS="$LOCAL_ADDRESS" \ 134 --setenv HOST_ADDRESS6="$HOST_ADDRESS6" \ 135 --setenv LOCAL_ADDRESS6="$LOCAL_ADDRESS6" \ 136 --setenv PATH="$PATH" \ 137 ${containerInit cfg} "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/init" 138 '' 139 ); 140 141 preStartScript = (cfg: 142 '' 143 # Clean up existing machined registration and interfaces. 144 machinectl terminate "$INSTANCE" 2> /dev/null || true 145 146 if [ "$PRIVATE_NETWORK" = 1 ]; then 147 ip link del dev "ve-$INSTANCE" 2> /dev/null || true 148 ip link del dev "vb-$INSTANCE" 2> /dev/null || true 149 fi 150 151 ${concatStringsSep "\n" ( 152 mapAttrsToList (name: cfg: 153 ''ip link del dev ${name} 2> /dev/null || true '' 154 ) cfg . "extraVeths" or {} 155 )} 156 '' 157 ); 158 postStartScript = (cfg: 159 let 160 ipcall = (cfg: ipcmd: variable: attribute: 161 if cfg . attribute or null == null then 162 '' 163 if [ -n "${variable}" ]; then 164 ${ipcmd} add ${variable} dev $ifaceHost 165 fi 166 '' 167 else 168 ''${ipcmd} add ${cfg . attribute} dev $ifaceHost'' 169 ); 170 renderExtraVeth = (name: cfg: 171 if cfg . "hostBridge" or null != null then 172 '' 173 # Add ${name} to bridge ${cfg.hostBridge} 174 ip link set dev ${name} master ${cfg.hostBridge} up 175 '' 176 else 177 '' 178 # Set IPs and routes for ${name} 179 ${optionalString (cfg . "hostAddress" or null != null) '' 180 ip addr add ${cfg . "hostAddress"} dev ${name} 181 ''} 182 ${optionalString (cfg . "hostAddress6" or null != null) '' 183 ip -6 addr add ${cfg . "hostAddress6"} dev ${name} 184 ''} 185 ${optionalString (cfg . "localAddress" or null != null) '' 186 ip route add ${cfg . "localAddress"} dev ${name} 187 ''} 188 ${optionalString (cfg . "localAddress6" or null != null) '' 189 ip -6 route add ${cfg . "localAddress6"} dev ${name} 190 ''} 191 '' 192 ); 193 in 194 '' 195 if [ "$PRIVATE_NETWORK" = 1 ]; then 196 if [ -z "$HOST_BRIDGE" ]; then 197 ifaceHost=ve-$INSTANCE 198 ip link set dev $ifaceHost up 199 200 ${ipcall cfg "ip addr" "$HOST_ADDRESS" "hostAddress"} 201 ${ipcall cfg "ip -6 addr" "$HOST_ADDRESS6" "hostAddress6"} 202 ${ipcall cfg "ip route" "$LOCAL_ADDRESS" "localAddress"} 203 ${ipcall cfg "ip -6 route" "$LOCAL_ADDRESS6" "localAddress6"} 204 fi 205 ${concatStringsSep "\n" (mapAttrsToList renderExtraVeth cfg . "extraVeths" or {})} 206 fi 207 208 # Get the leader PID so that we can signal it in 209 # preStop. We can't use machinectl there because D-Bus 210 # might be shutting down. FIXME: in systemd 219 we can 211 # just signal systemd-nspawn to do a clean shutdown. 212 machinectl show "$INSTANCE" | sed 's/Leader=\(.*\)/\1/;t;d' > "/run/containers/$INSTANCE.pid" 213 '' 214 ); 215 216 system = config.nixpkgs.system; 217 218 bindMountOpts = { name, config, ... }: { 219 220 options = { 221 mountPoint = mkOption { 222 example = "/mnt/usb"; 223 type = types.str; 224 description = "Mount point on the container file system."; 225 }; 226 hostPath = mkOption { 227 default = null; 228 example = "/home/alice"; 229 type = types.nullOr types.str; 230 description = "Location of the host path to be mounted."; 231 }; 232 isReadOnly = mkOption { 233 default = true; 234 example = true; 235 type = types.bool; 236 description = "Determine whether the mounted path will be accessed in read-only mode."; 237 }; 238 }; 239 240 config = { 241 mountPoint = mkDefault name; 242 }; 243 244 }; 245 246 mkBindFlag = d: 247 let flagPrefix = if d.isReadOnly then " --bind-ro=" else " --bind="; 248 mountstr = if d.hostPath != null then "${d.hostPath}:${d.mountPoint}" else "${d.mountPoint}"; 249 in flagPrefix + mountstr ; 250 251 mkBindFlags = bs: concatMapStrings mkBindFlag (lib.attrValues bs); 252 253 networkOptions = { 254 hostBridge = mkOption { 255 type = types.nullOr types.string; 256 default = null; 257 example = "br0"; 258 description = '' 259 Put the host-side of the veth-pair into the named bridge. 260 Only one of hostAddress* or hostBridge can be given. 261 ''; 262 }; 263 264 hostAddress = mkOption { 265 type = types.nullOr types.str; 266 default = null; 267 example = "10.231.136.1"; 268 description = '' 269 The IPv4 address assigned to the host interface. 270 (Not used when hostBridge is set.) 271 ''; 272 }; 273 274 hostAddress6 = mkOption { 275 type = types.nullOr types.string; 276 default = null; 277 example = "fc00::1"; 278 description = '' 279 The IPv6 address assigned to the host interface. 280 (Not used when hostBridge is set.) 281 ''; 282 }; 283 284 localAddress = mkOption { 285 type = types.nullOr types.str; 286 default = null; 287 example = "10.231.136.2"; 288 description = '' 289 The IPv4 address assigned to the interface in the container. 290 If a hostBridge is used, this should be given with netmask to access 291 the whole network. Otherwise the default netmask is /32 and routing is 292 set up from localAddress to hostAddress and back. 293 ''; 294 }; 295 296 localAddress6 = mkOption { 297 type = types.nullOr types.string; 298 default = null; 299 example = "fc00::2"; 300 description = '' 301 The IPv6 address assigned to the interface in the container. 302 If a hostBridge is used, this should be given with netmask to access 303 the whole network. Otherwise the default netmask is /128 and routing is 304 set up from localAddress6 to hostAddress6 and back. 305 ''; 306 }; 307 308 }; 309 310in 311 312{ 313 options = { 314 315 boot.isContainer = mkOption { 316 type = types.bool; 317 default = false; 318 description = '' 319 Whether this NixOS machine is a lightweight container running 320 in another NixOS system. 321 ''; 322 }; 323 324 boot.enableContainers = mkOption { 325 type = types.bool; 326 default = !config.boot.isContainer; 327 description = '' 328 Whether to enable support for nixos containers. 329 ''; 330 }; 331 332 containers = mkOption { 333 type = types.attrsOf (types.submodule ( 334 { config, options, name, ... }: 335 { 336 options = { 337 338 config = mkOption { 339 description = '' 340 A specification of the desired configuration of this 341 container, as a NixOS module. 342 ''; 343 type = lib.mkOptionType { 344 name = "Toplevel NixOS config"; 345 merge = loc: defs: (import ../../lib/eval-config.nix { 346 inherit system; 347 modules = 348 let extraConfig = 349 { boot.isContainer = true; 350 networking.hostName = mkDefault name; 351 networking.useDHCP = false; 352 }; 353 in [ extraConfig ] ++ (map (x: x.value) defs); 354 prefix = [ "containers" name ]; 355 }).config; 356 }; 357 }; 358 359 path = mkOption { 360 type = types.path; 361 example = "/nix/var/nix/profiles/containers/webserver"; 362 description = '' 363 As an alternative to specifying 364 <option>config</option>, you can specify the path to 365 the evaluated NixOS system configuration, typically a 366 symlink to a system profile. 367 ''; 368 }; 369 370 privateNetwork = mkOption { 371 type = types.bool; 372 default = false; 373 description = '' 374 Whether to give the container its own private virtual 375 Ethernet interface. The interface is called 376 <literal>eth0</literal>, and is hooked up to the interface 377 <literal>ve-<replaceable>container-name</replaceable></literal> 378 on the host. If this option is not set, then the 379 container shares the network interfaces of the host, 380 and can bind to any port on any interface. 381 ''; 382 }; 383 384 interfaces = mkOption { 385 type = types.listOf types.string; 386 default = []; 387 example = [ "eth1" "eth2" ]; 388 description = '' 389 The list of interfaces to be moved into the container. 390 ''; 391 }; 392 393 extraVeths = mkOption { 394 type = types.attrsOf types.optionSet; 395 default = {}; 396 options = networkOptions; 397 description = '' 398 Extra veth-pairs to be created for the container 399 ''; 400 }; 401 402 autoStart = mkOption { 403 type = types.bool; 404 default = false; 405 description = '' 406 Wether the container is automatically started at boot-time. 407 ''; 408 }; 409 410 bindMounts = mkOption { 411 type = types.loaOf types.optionSet; 412 options = [ bindMountOpts ]; 413 default = {}; 414 example = { "/home" = { hostPath = "/home/alice"; 415 isReadOnly = false; }; 416 }; 417 418 description = 419 '' 420 An extra list of directories that is bound to the container. 421 ''; 422 }; 423 424 } // networkOptions; 425 426 config = mkMerge 427 [ 428 (mkIf options.config.isDefined { 429 path = config.config.system.build.toplevel; 430 }) 431 ]; 432 })); 433 434 default = {}; 435 example = literalExample 436 '' 437 { webserver = 438 { path = "/nix/var/nix/profiles/webserver"; 439 }; 440 database = 441 { config = 442 { config, pkgs, ... }: 443 { services.postgresql.enable = true; 444 services.postgresql.package = pkgs.postgresql92; 445 }; 446 }; 447 } 448 ''; 449 description = '' 450 A set of NixOS system configurations to be run as lightweight 451 containers. Each container appears as a service 452 <literal>container-<replaceable>name</replaceable></literal> 453 on the host system, allowing it to be started and stopped via 454 <command>systemctl</command> . 455 ''; 456 }; 457 458 }; 459 460 461 config = mkIf (config.boot.enableContainers) (let 462 463 unit = { 464 description = "Container '%i'"; 465 466 unitConfig.RequiresMountsFor = [ "/var/lib/containers/%i" ]; 467 468 path = [ pkgs.iproute ]; 469 470 environment.INSTANCE = "%i"; 471 environment.root = "/var/lib/containers/%i"; 472 473 preStart = preStartScript {}; 474 475 script = startScript {}; 476 477 postStart = postStartScript {}; 478 479 preStop = 480 '' 481 pid="$(cat /run/containers/$INSTANCE.pid)" 482 if [ -n "$pid" ]; then 483 kill -RTMIN+4 "$pid" 484 fi 485 rm -f "/run/containers/$INSTANCE.pid" 486 ''; 487 488 restartIfChanged = false; 489 490 serviceConfig = { 491 ExecReload = pkgs.writeScript "reload-container" 492 '' 493 #! ${pkgs.stdenv.shell} -e 494 ${pkgs.nixos-container}/bin/nixos-container run "$INSTANCE" -- \ 495 bash --login -c "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/bin/switch-to-configuration test" 496 ''; 497 498 SyslogIdentifier = "container %i"; 499 500 EnvironmentFile = "-/etc/containers/%i.conf"; 501 502 Type = "notify"; 503 504 # Note that on reboot, systemd-nspawn returns 133, so this 505 # unit will be restarted. On poweroff, it returns 0, so the 506 # unit won't be restarted. 507 RestartForceExitStatus = "133"; 508 SuccessExitStatus = "133"; 509 510 Restart = "on-failure"; 511 512 # Hack: we don't want to kill systemd-nspawn, since we call 513 # "machinectl poweroff" in preStop to shut down the 514 # container cleanly. But systemd requires sending a signal 515 # (at least if we want remaining processes to be killed 516 # after the timeout). So send an ignored signal. 517 KillMode = "mixed"; 518 KillSignal = "WINCH"; 519 520 DevicePolicy = "closed"; 521 }; 522 }; 523 in { 524 systemd.services = listToAttrs (filter (x: x.value != null) ( 525 # The generic container template used by imperative containers 526 [{ name = "container@"; value = unit; }] 527 # declarative containers 528 ++ (mapAttrsToList (name: cfg: nameValuePair "container@${name}" ( 529 unit // { 530 preStart = preStartScript cfg; 531 script = startScript cfg; 532 postStart = postStartScript cfg; 533 } // ( 534 if cfg.autoStart then 535 { 536 wantedBy = [ "multi-user.target" ]; 537 wants = [ "network.target" ]; 538 after = [ "network.target" ]; 539 restartTriggers = [ cfg.path ]; 540 reloadIfChanged = true; 541 } 542 else {}) 543 )) config.containers) 544 )); 545 546 # Generate a configuration file in /etc/containers for each 547 # container so that container@.target can get the container 548 # configuration. 549 environment.etc = mapAttrs' (name: cfg: nameValuePair "containers/${name}.conf" 550 { text = 551 '' 552 SYSTEM_PATH=${cfg.path} 553 ${optionalString cfg.privateNetwork '' 554 PRIVATE_NETWORK=1 555 ${optionalString (cfg.hostBridge != null) '' 556 HOST_BRIDGE=${cfg.hostBridge} 557 ''} 558 ${optionalString (cfg.hostAddress != null) '' 559 HOST_ADDRESS=${cfg.hostAddress} 560 ''} 561 ${optionalString (cfg.hostAddress6 != null) '' 562 HOST_ADDRESS6=${cfg.hostAddress6} 563 ''} 564 ${optionalString (cfg.localAddress != null) '' 565 LOCAL_ADDRESS=${cfg.localAddress} 566 ''} 567 ${optionalString (cfg.localAddress6 != null) '' 568 LOCAL_ADDRESS6=${cfg.localAddress6} 569 ''} 570 ''} 571 INTERFACES="${toString cfg.interfaces}" 572 ${optionalString cfg.autoStart '' 573 AUTO_START=1 574 ''} 575 EXTRA_NSPAWN_FLAGS="${mkBindFlags cfg.bindMounts}" 576 ''; 577 }) config.containers; 578 579 # Generate /etc/hosts entries for the containers. 580 networking.extraHosts = concatStrings (mapAttrsToList (name: cfg: optionalString (cfg.localAddress != null) 581 '' 582 ${cfg.localAddress} ${name}.containers 583 '') config.containers); 584 585 networking.dhcpcd.denyInterfaces = [ "ve-*" ]; 586 587 environment.systemPackages = [ pkgs.nixos-container ]; 588 }); 589}