1{ config, lib, pkgs, ... }: 2 3with lib; 4 5let 6 7 nixos-container = pkgs.substituteAll { 8 name = "nixos-container"; 9 dir = "bin"; 10 isExecutable = true; 11 src = ./nixos-container.pl; 12 perl = "${pkgs.perl}/bin/perl -I${pkgs.perlPackages.FileSlurp}/lib/perl5/site_perl"; 13 su = "${pkgs.shadow.su}/bin/su"; 14 inherit (pkgs) utillinux; 15 16 postInstall = '' 17 t=$out/etc/bash_completion.d 18 mkdir -p $t 19 cp ${./nixos-container-completion.sh} $t/nixos-container 20 ''; 21 }; 22 23 # The container's init script, a small wrapper around the regular 24 # NixOS stage-2 init script. 25 containerInit = pkgs.writeScript "container-init" 26 '' 27 #! ${pkgs.stdenv.shell} -e 28 29 # Initialise the container side of the veth pair. 30 if [ "$PRIVATE_NETWORK" = 1 ]; then 31 ip link set host0 name eth0 32 ip link set dev eth0 up 33 if [ -n "$HOST_ADDRESS" ]; then 34 ip route add $HOST_ADDRESS dev eth0 35 ip route add default via $HOST_ADDRESS 36 fi 37 if [ -n "$LOCAL_ADDRESS" ]; then 38 ip addr add $LOCAL_ADDRESS dev eth0 39 fi 40 fi 41 42 # Start the regular stage 1 script, passing the bind-mounted 43 # notification socket from the host to allow the container 44 # systemd to signal readiness to the host systemd. 45 NOTIFY_SOCKET=/var/lib/private/host-notify exec "$1" 46 ''; 47 48 system = config.nixpkgs.system; 49 50 bindMountOpts = { name, config, ... }: { 51 52 options = { 53 mountPoint = mkOption { 54 example = "/mnt/usb"; 55 type = types.str; 56 description = "Mount point on the container file system."; 57 }; 58 hostPath = mkOption { 59 default = null; 60 example = "/home/alice"; 61 type = types.nullOr types.str; 62 description = "Location of the host path to be mounted."; 63 }; 64 isReadOnly = mkOption { 65 default = true; 66 example = true; 67 type = types.bool; 68 description = "Determine whether the mounted path will be accessed in read-only mode."; 69 }; 70 }; 71 72 config = { 73 mountPoint = mkDefault name; 74 }; 75 76 }; 77 78 mkBindFlag = d: 79 let flagPrefix = if d.isReadOnly then " --bind-ro=" else " --bind="; 80 mountstr = if d.hostPath != null then "${d.hostPath}:${d.mountPoint}" else "${d.mountPoint}"; 81 in flagPrefix + mountstr ; 82 83 mkBindFlags = bs: concatMapStrings mkBindFlag (lib.attrValues bs); 84 85in 86 87{ 88 options = { 89 90 boot.isContainer = mkOption { 91 type = types.bool; 92 default = false; 93 description = '' 94 Whether this NixOS machine is a lightweight container running 95 in another NixOS system. 96 ''; 97 }; 98 99 boot.enableContainers = mkOption { 100 type = types.bool; 101 default = !config.boot.isContainer; 102 description = '' 103 Whether to enable support for nixos containers. 104 ''; 105 }; 106 107 containers = mkOption { 108 type = types.attrsOf (types.submodule ( 109 { config, options, name, ... }: 110 { 111 options = { 112 113 config = mkOption { 114 description = '' 115 A specification of the desired configuration of this 116 container, as a NixOS module. 117 ''; 118 }; 119 120 path = mkOption { 121 type = types.path; 122 example = "/nix/var/nix/profiles/containers/webserver"; 123 description = '' 124 As an alternative to specifying 125 <option>config</option>, you can specify the path to 126 the evaluated NixOS system configuration, typically a 127 symlink to a system profile. 128 ''; 129 }; 130 131 privateNetwork = mkOption { 132 type = types.bool; 133 default = false; 134 description = '' 135 Whether to give the container its own private virtual 136 Ethernet interface. The interface is called 137 <literal>eth0</literal>, and is hooked up to the interface 138 <literal>ve-<replaceable>container-name</replaceable></literal> 139 on the host. If this option is not set, then the 140 container shares the network interfaces of the host, 141 and can bind to any port on any interface. 142 ''; 143 }; 144 145 hostAddress = mkOption { 146 type = types.nullOr types.str; 147 default = null; 148 example = "10.231.136.1"; 149 description = '' 150 The IPv4 address assigned to the host interface. 151 ''; 152 }; 153 154 localAddress = mkOption { 155 type = types.nullOr types.str; 156 default = null; 157 example = "10.231.136.2"; 158 description = '' 159 The IPv4 address assigned to <literal>eth0</literal> 160 in the container. 161 ''; 162 }; 163 164 interfaces = mkOption { 165 type = types.listOf types.string; 166 default = []; 167 example = [ "eth1" "eth2" ]; 168 description = '' 169 The list of interfaces to be moved into the container. 170 ''; 171 }; 172 173 autoStart = mkOption { 174 type = types.bool; 175 default = false; 176 description = '' 177 Wether the container is automatically started at boot-time. 178 ''; 179 }; 180 181 bindMounts = mkOption { 182 type = types.loaOf types.optionSet; 183 options = [ bindMountOpts ]; 184 default = {}; 185 example = { "/home" = { hostPath = "/home/alice"; 186 isReadOnly = false; }; 187 }; 188 189 description = 190 '' 191 An extra list of directories that is bound to the container. 192 ''; 193 }; 194 195 }; 196 197 config = mkMerge 198 [ (mkIf options.config.isDefined { 199 path = (import ../../lib/eval-config.nix { 200 inherit system; 201 modules = 202 let extraConfig = 203 { boot.isContainer = true; 204 networking.hostName = mkDefault name; 205 networking.useDHCP = false; 206 }; 207 in [ extraConfig config.config ]; 208 prefix = [ "containers" name ]; 209 }).config.system.build.toplevel; 210 }) 211 ]; 212 })); 213 214 default = {}; 215 example = literalExample 216 '' 217 { webserver = 218 { path = "/nix/var/nix/profiles/webserver"; 219 }; 220 database = 221 { config = 222 { config, pkgs, ... }: 223 { services.postgresql.enable = true; 224 services.postgresql.package = pkgs.postgresql92; 225 }; 226 }; 227 } 228 ''; 229 description = '' 230 A set of NixOS system configurations to be run as lightweight 231 containers. Each container appears as a service 232 <literal>container-<replaceable>name</replaceable></literal> 233 on the host system, allowing it to be started and stopped via 234 <command>systemctl</command> . 235 ''; 236 }; 237 238 }; 239 240 241 config = mkIf (config.boot.enableContainers) { 242 243 systemd.services."container@" = 244 { description = "Container '%i'"; 245 246 unitConfig.RequiresMountsFor = [ "/var/lib/containers/%i" ]; 247 248 path = [ pkgs.iproute ]; 249 250 environment.INSTANCE = "%i"; 251 environment.root = "/var/lib/containers/%i"; 252 253 preStart = 254 '' 255 # Clean up existing machined registration and interfaces. 256 machinectl terminate "$INSTANCE" 2> /dev/null || true 257 258 if [ "$PRIVATE_NETWORK" = 1 ]; then 259 ip link del dev "ve-$INSTANCE" 2> /dev/null || true 260 fi 261 262 263 if [ "$PRIVATE_NETWORK" = 1 ]; then 264 ip link del dev "ve-$INSTANCE" 2> /dev/null || true 265 fi 266 ''; 267 268 script = 269 '' 270 mkdir -p -m 0755 "$root/etc" "$root/var/lib" 271 mkdir -p -m 0700 "$root/var/lib/private" "$root/root" /run/containers 272 if ! [ -e "$root/etc/os-release" ]; then 273 touch "$root/etc/os-release" 274 fi 275 276 mkdir -p -m 0755 \ 277 "/nix/var/nix/profiles/per-container/$INSTANCE" \ 278 "/nix/var/nix/gcroots/per-container/$INSTANCE" 279 280 cp --remove-destination /etc/resolv.conf "$root/etc/resolv.conf" 281 282 if [ "$PRIVATE_NETWORK" = 1 ]; then 283 extraFlags+=" --network-veth" 284 fi 285 286 for iface in $INTERFACES; do 287 extraFlags+=" --network-interface=$iface" 288 done 289 290 for iface in $MACVLANS; do 291 extraFlags+=" --network-macvlan=$iface" 292 done 293 294 # If the host is 64-bit and the container is 32-bit, add a 295 # --personality flag. 296 ${optionalString (config.nixpkgs.system == "x86_64-linux") '' 297 if [ "$(< ''${SYSTEM_PATH:-/nix/var/nix/profiles/per-container/$INSTANCE/system}/system)" = i686-linux ]; then 298 extraFlags+=" --personality=x86" 299 fi 300 ''} 301 302 303 304 # Run systemd-nspawn without startup notification (we'll 305 # wait for the container systemd to signal readiness). 306 EXIT_ON_REBOOT=1 NOTIFY_SOCKET= \ 307 exec ${config.systemd.package}/bin/systemd-nspawn \ 308 --keep-unit \ 309 -M "$INSTANCE" -D "$root" $extraFlags \ 310 $EXTRA_NSPAWN_FLAGS \ 311 --bind-ro=/nix/store \ 312 --bind-ro=/nix/var/nix/db \ 313 --bind-ro=/nix/var/nix/daemon-socket \ 314 --bind=/run/systemd/notify:/var/lib/private/host-notify \ 315 --bind="/nix/var/nix/profiles/per-container/$INSTANCE:/nix/var/nix/profiles" \ 316 --bind="/nix/var/nix/gcroots/per-container/$INSTANCE:/nix/var/nix/gcroots" \ 317 --setenv PRIVATE_NETWORK="$PRIVATE_NETWORK" \ 318 --setenv HOST_ADDRESS="$HOST_ADDRESS" \ 319 --setenv LOCAL_ADDRESS="$LOCAL_ADDRESS" \ 320 --setenv PATH="$PATH" \ 321 ${containerInit} "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/init" 322 ''; 323 324 postStart = 325 '' 326 if [ "$PRIVATE_NETWORK" = 1 ]; then 327 ifaceHost=ve-$INSTANCE 328 ip link set dev $ifaceHost up 329 if [ -n "$HOST_ADDRESS" ]; then 330 ip addr add $HOST_ADDRESS dev $ifaceHost 331 fi 332 if [ -n "$LOCAL_ADDRESS" ]; then 333 ip route add $LOCAL_ADDRESS dev $ifaceHost 334 fi 335 fi 336 337 # Get the leader PID so that we can signal it in 338 # preStop. We can't use machinectl there because D-Bus 339 # might be shutting down. FIXME: in systemd 219 we can 340 # just signal systemd-nspawn to do a clean shutdown. 341 machinectl show "$INSTANCE" | sed 's/Leader=\(.*\)/\1/;t;d' > "/run/containers/$INSTANCE.pid" 342 ''; 343 344 preStop = 345 '' 346 pid="$(cat /run/containers/$INSTANCE.pid)" 347 if [ -n "$pid" ]; then 348 kill -RTMIN+4 "$pid" 349 fi 350 rm -f "/run/containers/$INSTANCE.pid" 351 ''; 352 353 restartIfChanged = false; 354 #reloadIfChanged = true; # FIXME 355 356 serviceConfig = { 357 ExecReload = pkgs.writeScript "reload-container" 358 '' 359 #! ${pkgs.stdenv.shell} -e 360 ${nixos-container}/bin/nixos-container run "$INSTANCE" -- \ 361 bash --login -c "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/bin/switch-to-configuration test" 362 ''; 363 364 SyslogIdentifier = "container %i"; 365 366 EnvironmentFile = "-/etc/containers/%i.conf"; 367 368 Type = "notify"; 369 370 NotifyAccess = "all"; 371 372 # Note that on reboot, systemd-nspawn returns 133, so this 373 # unit will be restarted. On poweroff, it returns 0, so the 374 # unit won't be restarted. 375 RestartForceExitStatus = "133"; 376 SuccessExitStatus = "133"; 377 378 Restart = "on-failure"; 379 380 # Hack: we don't want to kill systemd-nspawn, since we call 381 # "machinectl poweroff" in preStop to shut down the 382 # container cleanly. But systemd requires sending a signal 383 # (at least if we want remaining processes to be killed 384 # after the timeout). So send an ignored signal. 385 KillMode = "mixed"; 386 KillSignal = "WINCH"; 387 }; 388 }; 389 390 # Generate a configuration file in /etc/containers for each 391 # container so that container@.target can get the container 392 # configuration. 393 environment.etc = mapAttrs' (name: cfg: nameValuePair "containers/${name}.conf" 394 { text = 395 '' 396 SYSTEM_PATH=${cfg.path} 397 ${optionalString cfg.privateNetwork '' 398 PRIVATE_NETWORK=1 399 ${optionalString (cfg.hostAddress != null) '' 400 HOST_ADDRESS=${cfg.hostAddress} 401 ''} 402 ${optionalString (cfg.localAddress != null) '' 403 LOCAL_ADDRESS=${cfg.localAddress} 404 ''} 405 ''} 406 INTERFACES="${toString cfg.interfaces}" 407 ${optionalString cfg.autoStart '' 408 AUTO_START=1 409 ''} 410 EXTRA_NSPAWN_FLAGS="${mkBindFlags cfg.bindMounts}" 411 ''; 412 }) config.containers; 413 414 # Generate /etc/hosts entries for the containers. 415 networking.extraHosts = concatStrings (mapAttrsToList (name: cfg: optionalString (cfg.localAddress != null) 416 '' 417 ${cfg.localAddress} ${name}.containers 418 '') config.containers); 419 420 networking.dhcpcd.denyInterfaces = [ "ve-*" ]; 421 422 environment.systemPackages = [ nixos-container ]; 423 424 # Start containers at boot time. 425 systemd.services.all-containers = 426 { description = "All Containers"; 427 428 wantedBy = [ "multi-user.target" ]; 429 430 unitConfig.ConditionDirectoryNotEmpty = "/etc/containers"; 431 432 serviceConfig.Type = "oneshot"; 433 434 script = 435 '' 436 res=0 437 shopt -s nullglob 438 for i in /etc/containers/*.conf; do 439 AUTO_START= 440 source "$i" 441 if [ "$AUTO_START" = 1 ]; then 442 systemctl start "container@$(basename "$i" .conf).service" || res=1 443 fi 444 done 445 exit $res 446 ''; # */ 447 }; 448 449 }; 450}