1{ config, lib, pkgs, ... }:
2
3with lib;
4
5let
6
7 nixos-container = pkgs.substituteAll {
8 name = "nixos-container";
9 dir = "bin";
10 isExecutable = true;
11 src = ./nixos-container.pl;
12 perl = "${pkgs.perl}/bin/perl -I${pkgs.perlPackages.FileSlurp}/lib/perl5/site_perl";
13 su = "${pkgs.shadow.su}/bin/su";
14 inherit (pkgs) utillinux;
15
16 postInstall = ''
17 t=$out/etc/bash_completion.d
18 mkdir -p $t
19 cp ${./nixos-container-completion.sh} $t/nixos-container
20 '';
21 };
22
23 # The container's init script, a small wrapper around the regular
24 # NixOS stage-2 init script.
25 containerInit = pkgs.writeScript "container-init"
26 ''
27 #! ${pkgs.stdenv.shell} -e
28
29 # Initialise the container side of the veth pair.
30 if [ "$PRIVATE_NETWORK" = 1 ]; then
31 ip link set host0 name eth0
32 ip link set dev eth0 up
33 if [ -n "$HOST_ADDRESS" ]; then
34 ip route add $HOST_ADDRESS dev eth0
35 ip route add default via $HOST_ADDRESS
36 fi
37 if [ -n "$LOCAL_ADDRESS" ]; then
38 ip addr add $LOCAL_ADDRESS dev eth0
39 fi
40 fi
41
42 # Start the regular stage 1 script, passing the bind-mounted
43 # notification socket from the host to allow the container
44 # systemd to signal readiness to the host systemd.
45 NOTIFY_SOCKET=/var/lib/private/host-notify exec "$1"
46 '';
47
48 system = config.nixpkgs.system;
49
50in
51
52{
53 options = {
54
55 boot.isContainer = mkOption {
56 type = types.bool;
57 default = false;
58 description = ''
59 Whether this NixOS machine is a lightweight container running
60 in another NixOS system.
61 '';
62 };
63
64 boot.enableContainers = mkOption {
65 type = types.bool;
66 default = !config.boot.isContainer;
67 description = ''
68 Whether to enable support for nixos containers.
69 '';
70 };
71
72 containers = mkOption {
73 type = types.attrsOf (types.submodule (
74 { config, options, name, ... }:
75 {
76 options = {
77
78 config = mkOption {
79 description = ''
80 A specification of the desired configuration of this
81 container, as a NixOS module.
82 '';
83 };
84
85 path = mkOption {
86 type = types.path;
87 example = "/nix/var/nix/profiles/containers/webserver";
88 description = ''
89 As an alternative to specifying
90 <option>config</option>, you can specify the path to
91 the evaluated NixOS system configuration, typically a
92 symlink to a system profile.
93 '';
94 };
95
96 privateNetwork = mkOption {
97 type = types.bool;
98 default = false;
99 description = ''
100 Whether to give the container its own private virtual
101 Ethernet interface. The interface is called
102 <literal>eth0</literal>, and is hooked up to the interface
103 <literal>ve-<replaceable>container-name</replaceable></literal>
104 on the host. If this option is not set, then the
105 container shares the network interfaces of the host,
106 and can bind to any port on any interface.
107 '';
108 };
109
110 hostAddress = mkOption {
111 type = types.nullOr types.string;
112 default = null;
113 example = "10.231.136.1";
114 description = ''
115 The IPv4 address assigned to the host interface.
116 '';
117 };
118
119 localAddress = mkOption {
120 type = types.nullOr types.string;
121 default = null;
122 example = "10.231.136.2";
123 description = ''
124 The IPv4 address assigned to <literal>eth0</literal>
125 in the container.
126 '';
127 };
128
129 interfaces = mkOption {
130 type = types.listOf types.string;
131 default = [];
132 example = [ "eth1" "eth2" ];
133 description = ''
134 The list of interfaces to be moved into the container.
135 '';
136 };
137
138 autoStart = mkOption {
139 type = types.bool;
140 default = false;
141 description = ''
142 Wether the container is automatically started at boot-time.
143 '';
144 };
145 };
146
147 config = mkMerge
148 [ (mkIf options.config.isDefined {
149 path = (import ../../lib/eval-config.nix {
150 inherit system;
151 modules =
152 let extraConfig =
153 { boot.isContainer = true;
154 networking.hostName = mkDefault name;
155 networking.useDHCP = false;
156 };
157 in [ extraConfig config.config ];
158 prefix = [ "containers" name ];
159 }).config.system.build.toplevel;
160 })
161 ];
162 }));
163
164 default = {};
165 example = literalExample
166 ''
167 { webserver =
168 { path = "/nix/var/nix/profiles/webserver";
169 };
170 database =
171 { config =
172 { config, pkgs, ... }:
173 { services.postgresql.enable = true;
174 services.postgresql.package = pkgs.postgresql92;
175 };
176 };
177 }
178 '';
179 description = ''
180 A set of NixOS system configurations to be run as lightweight
181 containers. Each container appears as a service
182 <literal>container-<replaceable>name</replaceable></literal>
183 on the host system, allowing it to be started and stopped via
184 <command>systemctl</command> .
185 '';
186 };
187
188 };
189
190
191 config = mkIf (config.boot.enableContainers) {
192
193 systemd.services."container@" =
194 { description = "Container '%i'";
195
196 unitConfig.RequiresMountsFor = [ "/var/lib/containers/%i" ];
197
198 path = [ pkgs.iproute ];
199
200 environment.INSTANCE = "%i";
201 environment.root = "/var/lib/containers/%i";
202
203 preStart =
204 ''
205 # Clean up existing machined registration and interfaces.
206 machinectl terminate "$INSTANCE" 2> /dev/null || true
207
208 if [ "$PRIVATE_NETWORK" = 1 ]; then
209 ip link del dev "ve-$INSTANCE" 2> /dev/null || true
210 fi
211
212
213 if [ "$PRIVATE_NETWORK" = 1 ]; then
214 ip link del dev "ve-$INSTANCE" 2> /dev/null || true
215 fi
216 '';
217
218 script =
219 ''
220 mkdir -p -m 0755 "$root/etc" "$root/var/lib"
221 mkdir -p -m 0700 "$root/var/lib/private" "$root/root" /run/containers
222 if ! [ -e "$root/etc/os-release" ]; then
223 touch "$root/etc/os-release"
224 fi
225
226 mkdir -p -m 0755 \
227 "/nix/var/nix/profiles/per-container/$INSTANCE" \
228 "/nix/var/nix/gcroots/per-container/$INSTANCE"
229
230 cp --remove-destination /etc/resolv.conf "$root/etc/resolv.conf"
231
232 if [ "$PRIVATE_NETWORK" = 1 ]; then
233 extraFlags+=" --network-veth"
234 fi
235
236 for iface in $INTERFACES; do
237 extraFlags+=" --network-interface=$iface"
238 done
239
240 for iface in $MACVLANS; do
241 extraFlags+=" --network-macvlan=$iface"
242 done
243
244 # If the host is 64-bit and the container is 32-bit, add a
245 # --personality flag.
246 ${optionalString (config.nixpkgs.system == "x86_64-linux") ''
247 if [ "$(< ''${SYSTEM_PATH:-/nix/var/nix/profiles/per-container/$INSTANCE/system}/system)" = i686-linux ]; then
248 extraFlags+=" --personality=x86"
249 fi
250 ''}
251
252 # Run systemd-nspawn without startup notification (we'll
253 # wait for the container systemd to signal readiness).
254 EXIT_ON_REBOOT=1 NOTIFY_SOCKET= \
255 exec ${config.systemd.package}/bin/systemd-nspawn \
256 --keep-unit \
257 -M "$INSTANCE" -D "$root" $extraFlags \
258 --bind-ro=/nix/store \
259 --bind-ro=/nix/var/nix/db \
260 --bind-ro=/nix/var/nix/daemon-socket \
261 --bind=/run/systemd/notify:/var/lib/private/host-notify \
262 --bind="/nix/var/nix/profiles/per-container/$INSTANCE:/nix/var/nix/profiles" \
263 --bind="/nix/var/nix/gcroots/per-container/$INSTANCE:/nix/var/nix/gcroots" \
264 --setenv PRIVATE_NETWORK="$PRIVATE_NETWORK" \
265 --setenv HOST_ADDRESS="$HOST_ADDRESS" \
266 --setenv LOCAL_ADDRESS="$LOCAL_ADDRESS" \
267 --setenv PATH="$PATH" \
268 ${containerInit} "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/init"
269 '';
270
271 postStart =
272 ''
273 if [ "$PRIVATE_NETWORK" = 1 ]; then
274 ifaceHost=ve-$INSTANCE
275 ip link set dev $ifaceHost up
276 if [ -n "$HOST_ADDRESS" ]; then
277 ip addr add $HOST_ADDRESS dev $ifaceHost
278 fi
279 if [ -n "$LOCAL_ADDRESS" ]; then
280 ip route add $LOCAL_ADDRESS dev $ifaceHost
281 fi
282 fi
283
284 # Get the leader PID so that we can signal it in
285 # preStop. We can't use machinectl there because D-Bus
286 # might be shutting down. FIXME: in systemd 219 we can
287 # just signal systemd-nspawn to do a clean shutdown.
288 machinectl show "$INSTANCE" | sed 's/Leader=\(.*\)/\1/;t;d' > "/run/containers/$INSTANCE.pid"
289 '';
290
291 preStop =
292 ''
293 pid="$(cat /run/containers/$INSTANCE.pid)"
294 if [ -n "$pid" ]; then
295 kill -RTMIN+4 "$pid"
296 fi
297 rm -f "/run/containers/$INSTANCE.pid"
298 '';
299
300 restartIfChanged = false;
301 #reloadIfChanged = true; # FIXME
302
303 serviceConfig = {
304 ExecReload = pkgs.writeScript "reload-container"
305 ''
306 #! ${pkgs.stdenv.shell} -e
307 ${nixos-container}/bin/nixos-container run "$INSTANCE" -- \
308 bash --login -c "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/bin/switch-to-configuration test"
309 '';
310
311 SyslogIdentifier = "container %i";
312
313 EnvironmentFile = "-/etc/containers/%i.conf";
314
315 Type = "notify";
316
317 NotifyAccess = "all";
318
319 # Note that on reboot, systemd-nspawn returns 133, so this
320 # unit will be restarted. On poweroff, it returns 0, so the
321 # unit won't be restarted.
322 RestartForceExitStatus = "133";
323 SuccessExitStatus = "133";
324
325 Restart = "on-failure";
326
327 # Hack: we don't want to kill systemd-nspawn, since we call
328 # "machinectl poweroff" in preStop to shut down the
329 # container cleanly. But systemd requires sending a signal
330 # (at least if we want remaining processes to be killed
331 # after the timeout). So send an ignored signal.
332 KillMode = "mixed";
333 KillSignal = "WINCH";
334 };
335 };
336
337 # Generate a configuration file in /etc/containers for each
338 # container so that container@.target can get the container
339 # configuration.
340 environment.etc = mapAttrs' (name: cfg: nameValuePair "containers/${name}.conf"
341 { text =
342 ''
343 SYSTEM_PATH=${cfg.path}
344 ${optionalString cfg.privateNetwork ''
345 PRIVATE_NETWORK=1
346 ${optionalString (cfg.hostAddress != null) ''
347 HOST_ADDRESS=${cfg.hostAddress}
348 ''}
349 ${optionalString (cfg.localAddress != null) ''
350 LOCAL_ADDRESS=${cfg.localAddress}
351 ''}
352 ''}
353 INTERFACES="${toString cfg.interfaces}"
354 ${optionalString cfg.autoStart ''
355 AUTO_START=1
356 ''}
357 '';
358 }) config.containers;
359
360 # Generate /etc/hosts entries for the containers.
361 networking.extraHosts = concatStrings (mapAttrsToList (name: cfg: optionalString (cfg.localAddress != null)
362 ''
363 ${cfg.localAddress} ${name}.containers
364 '') config.containers);
365
366 networking.dhcpcd.denyInterfaces = [ "ve-*" ];
367
368 environment.systemPackages = [ nixos-container ];
369
370 # Start containers at boot time.
371 systemd.services.all-containers =
372 { description = "All Containers";
373
374 wantedBy = [ "multi-user.target" ];
375
376 unitConfig.ConditionDirectoryNotEmpty = "/etc/containers";
377
378 serviceConfig.Type = "oneshot";
379
380 script =
381 ''
382 res=0
383 shopt -s nullglob
384 for i in /etc/containers/*.conf; do
385 AUTO_START=
386 source "$i"
387 if [ "$AUTO_START" = 1 ]; then
388 systemctl start "container@$(basename "$i" .conf).service" || res=1
389 fi
390 done
391 exit $res
392 ''; # */
393 };
394
395 };
396}