1{ config, lib, pkgs, ... }:
2
3with lib;
4
5let
6
7 nixos-container = pkgs.substituteAll {
8 name = "nixos-container";
9 dir = "bin";
10 isExecutable = true;
11 src = ./nixos-container.pl;
12 perl = "${pkgs.perl}/bin/perl -I${pkgs.perlPackages.FileSlurp}/lib/perl5/site_perl";
13 su = "${pkgs.shadow.su}/bin/su";
14 inherit (pkgs) utillinux;
15 };
16
17 # The container's init script, a small wrapper around the regular
18 # NixOS stage-2 init script.
19 containerInit = pkgs.writeScript "container-init"
20 ''
21 #! ${pkgs.stdenv.shell} -e
22
23 # Initialise the container side of the veth pair.
24 if [ "$PRIVATE_NETWORK" = 1 ]; then
25 ip link set host0 name eth0
26 ip link set dev eth0 up
27 if [ -n "$HOST_ADDRESS" ]; then
28 ip route add $HOST_ADDRESS dev eth0
29 ip route add default via $HOST_ADDRESS
30 fi
31 if [ -n "$LOCAL_ADDRESS" ]; then
32 ip addr add $LOCAL_ADDRESS dev eth0
33 fi
34 fi
35
36 # Start the regular stage 1 script, passing the bind-mounted
37 # notification socket from the host to allow the container
38 # systemd to signal readiness to the host systemd.
39 NOTIFY_SOCKET=/var/lib/private/host-notify exec "$1"
40 '';
41
42 system = config.nixpkgs.system;
43
44in
45
46{
47 options = {
48
49 boot.isContainer = mkOption {
50 type = types.bool;
51 default = false;
52 description = ''
53 Whether this NixOS machine is a lightweight container running
54 in another NixOS system.
55 '';
56 };
57
58 boot.enableContainers = mkOption {
59 type = types.bool;
60 default = !config.boot.isContainer;
61 description = ''
62 Whether to enable support for nixos containers.
63 '';
64 };
65
66 containers = mkOption {
67 type = types.attrsOf (types.submodule (
68 { config, options, name, ... }:
69 {
70 options = {
71
72 config = mkOption {
73 description = ''
74 A specification of the desired configuration of this
75 container, as a NixOS module.
76 '';
77 };
78
79 path = mkOption {
80 type = types.path;
81 example = "/nix/var/nix/profiles/containers/webserver";
82 description = ''
83 As an alternative to specifying
84 <option>config</option>, you can specify the path to
85 the evaluated NixOS system configuration, typically a
86 symlink to a system profile.
87 '';
88 };
89
90 privateNetwork = mkOption {
91 type = types.bool;
92 default = false;
93 description = ''
94 Whether to give the container its own private virtual
95 Ethernet interface. The interface is called
96 <literal>eth0</literal>, and is hooked up to the interface
97 <literal>ve-<replaceable>container-name</replaceable></literal>
98 on the host. If this option is not set, then the
99 container shares the network interfaces of the host,
100 and can bind to any port on any interface.
101 '';
102 };
103
104 hostAddress = mkOption {
105 type = types.nullOr types.string;
106 default = null;
107 example = "10.231.136.1";
108 description = ''
109 The IPv4 address assigned to the host interface.
110 '';
111 };
112
113 localAddress = mkOption {
114 type = types.nullOr types.string;
115 default = null;
116 example = "10.231.136.2";
117 description = ''
118 The IPv4 address assigned to <literal>eth0</literal>
119 in the container.
120 '';
121 };
122
123 interfaces = mkOption {
124 type = types.listOf types.string;
125 default = [];
126 example = [ "eth1" "eth2" ];
127 description = ''
128 The list of interfaces to be moved into the container.
129 '';
130 };
131
132 autoStart = mkOption {
133 type = types.bool;
134 default = false;
135 description = ''
136 Wether the container is automatically started at boot-time.
137 '';
138 };
139 };
140
141 config = mkMerge
142 [ (mkIf options.config.isDefined {
143 path = (import ../../lib/eval-config.nix {
144 inherit system;
145 modules =
146 let extraConfig =
147 { boot.isContainer = true;
148 networking.hostName = mkDefault name;
149 networking.useDHCP = false;
150 };
151 in [ extraConfig config.config ];
152 prefix = [ "containers" name ];
153 }).config.system.build.toplevel;
154 })
155 ];
156 }));
157
158 default = {};
159 example = literalExample
160 ''
161 { webserver =
162 { path = "/nix/var/nix/profiles/webserver";
163 };
164 database =
165 { config =
166 { config, pkgs, ... }:
167 { services.postgresql.enable = true;
168 services.postgresql.package = pkgs.postgresql92;
169 };
170 };
171 }
172 '';
173 description = ''
174 A set of NixOS system configurations to be run as lightweight
175 containers. Each container appears as a service
176 <literal>container-<replaceable>name</replaceable></literal>
177 on the host system, allowing it to be started and stopped via
178 <command>systemctl</command> .
179 '';
180 };
181
182 };
183
184
185 config = mkIf (config.boot.enableContainers) {
186
187 systemd.services."container@" =
188 { description = "Container '%i'";
189
190 unitConfig.RequiresMountsFor = [ "/var/lib/containers/%i" ];
191
192 path = [ pkgs.iproute ];
193
194 environment.INSTANCE = "%i";
195 environment.root = "/var/lib/containers/%i";
196
197 preStart =
198 ''
199 # Clean up existing machined registration and interfaces.
200 machinectl terminate "$INSTANCE" 2> /dev/null || true
201
202 if [ "$PRIVATE_NETWORK" = 1 ]; then
203 ip link del dev "ve-$INSTANCE" 2> /dev/null || true
204 fi
205
206
207 if [ "$PRIVATE_NETWORK" = 1 ]; then
208 ip link del dev "ve-$INSTANCE" 2> /dev/null || true
209 fi
210 '';
211
212 script =
213 ''
214 mkdir -p -m 0755 "$root/etc" "$root/var/lib"
215 mkdir -p -m 0700 "$root/var/lib/private" "$root/root" /run/containers
216 if ! [ -e "$root/etc/os-release" ]; then
217 touch "$root/etc/os-release"
218 fi
219
220 mkdir -p -m 0755 \
221 "/nix/var/nix/profiles/per-container/$INSTANCE" \
222 "/nix/var/nix/gcroots/per-container/$INSTANCE"
223
224 cp --remove-destination /etc/resolv.conf "$root/etc/resolv.conf"
225
226 if [ "$PRIVATE_NETWORK" = 1 ]; then
227 extraFlags+=" --network-veth"
228 fi
229
230 for iface in $INTERFACES; do
231 extraFlags+=" --network-interface=$iface"
232 done
233
234 for iface in $MACVLANS; do
235 extraFlags+=" --network-macvlan=$iface"
236 done
237
238 # If the host is 64-bit and the container is 32-bit, add a
239 # --personality flag.
240 ${optionalString (config.nixpkgs.system == "x86_64-linux") ''
241 if [ "$(< ''${SYSTEM_PATH:-/nix/var/nix/profiles/per-container/$INSTANCE/system}/system)" = i686-linux ]; then
242 extraFlags+=" --personality=x86"
243 fi
244 ''}
245
246 # Run systemd-nspawn without startup notification (we'll
247 # wait for the container systemd to signal readiness).
248 EXIT_ON_REBOOT=1 NOTIFY_SOCKET= \
249 exec ${config.systemd.package}/bin/systemd-nspawn \
250 --keep-unit \
251 -M "$INSTANCE" -D "$root" $extraFlags \
252 --bind-ro=/nix/store \
253 --bind-ro=/nix/var/nix/db \
254 --bind-ro=/nix/var/nix/daemon-socket \
255 --bind=/run/systemd/notify:/var/lib/private/host-notify \
256 --bind="/nix/var/nix/profiles/per-container/$INSTANCE:/nix/var/nix/profiles" \
257 --bind="/nix/var/nix/gcroots/per-container/$INSTANCE:/nix/var/nix/gcroots" \
258 --setenv PRIVATE_NETWORK="$PRIVATE_NETWORK" \
259 --setenv HOST_ADDRESS="$HOST_ADDRESS" \
260 --setenv LOCAL_ADDRESS="$LOCAL_ADDRESS" \
261 --setenv PATH="$PATH" \
262 ${containerInit} "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/init"
263 '';
264
265 postStart =
266 ''
267 if [ "$PRIVATE_NETWORK" = 1 ]; then
268 ifaceHost=ve-$INSTANCE
269 ip link set dev $ifaceHost up
270 if [ -n "$HOST_ADDRESS" ]; then
271 ip addr add $HOST_ADDRESS dev $ifaceHost
272 fi
273 if [ -n "$LOCAL_ADDRESS" ]; then
274 ip route add $LOCAL_ADDRESS dev $ifaceHost
275 fi
276 fi
277
278 # Get the leader PID so that we can signal it in
279 # preStop. We can't use machinectl there because D-Bus
280 # might be shutting down. FIXME: in systemd 219 we can
281 # just signal systemd-nspawn to do a clean shutdown.
282 machinectl show "$INSTANCE" | sed 's/Leader=\(.*\)/\1/;t;d' > "/run/containers/$INSTANCE.pid"
283 '';
284
285 preStop =
286 ''
287 pid="$(cat /run/containers/$INSTANCE.pid)"
288 if [ -n "$pid" ]; then
289 kill -RTMIN+4 "$pid"
290 fi
291 rm -f "/run/containers/$INSTANCE.pid"
292 '';
293
294 restartIfChanged = false;
295 #reloadIfChanged = true; # FIXME
296
297 serviceConfig = {
298 ExecReload = pkgs.writeScript "reload-container"
299 ''
300 #! ${pkgs.stdenv.shell} -e
301 ${nixos-container}/bin/nixos-container run "$INSTANCE" -- \
302 bash --login -c "/nix/var/nix/profiles/system/bin/switch-to-configuration test"
303 '';
304
305 SyslogIdentifier = "container %i";
306
307 EnvironmentFile = "-/etc/containers/%i.conf";
308
309 Type = "notify";
310
311 NotifyAccess = "all";
312
313 # Note that on reboot, systemd-nspawn returns 133, so this
314 # unit will be restarted. On poweroff, it returns 0, so the
315 # unit won't be restarted.
316 RestartForceExitStatus = "133";
317 SuccessExitStatus = "133";
318
319 Restart = "on-failure";
320
321 # Hack: we don't want to kill systemd-nspawn, since we call
322 # "machinectl poweroff" in preStop to shut down the
323 # container cleanly. But systemd requires sending a signal
324 # (at least if we want remaining processes to be killed
325 # after the timeout). So send an ignored signal.
326 KillMode = "mixed";
327 KillSignal = "WINCH";
328 };
329 };
330
331 # Generate a configuration file in /etc/containers for each
332 # container so that container@.target can get the container
333 # configuration.
334 environment.etc = mapAttrs' (name: cfg: nameValuePair "containers/${name}.conf"
335 { text =
336 ''
337 SYSTEM_PATH=${cfg.path}
338 ${optionalString cfg.privateNetwork ''
339 PRIVATE_NETWORK=1
340 ${optionalString (cfg.hostAddress != null) ''
341 HOST_ADDRESS=${cfg.hostAddress}
342 ''}
343 ${optionalString (cfg.localAddress != null) ''
344 LOCAL_ADDRESS=${cfg.localAddress}
345 ''}
346 ''}
347 INTERFACES="${toString cfg.interfaces}"
348 ${optionalString cfg.autoStart ''
349 AUTO_START=1
350 ''}
351 '';
352 }) config.containers;
353
354 # Generate /etc/hosts entries for the containers.
355 networking.extraHosts = concatStrings (mapAttrsToList (name: cfg: optionalString (cfg.localAddress != null)
356 ''
357 ${cfg.localAddress} ${name}.containers
358 '') config.containers);
359
360 networking.dhcpcd.denyInterfaces = [ "ve-*" ];
361
362 environment.systemPackages = [ nixos-container ];
363
364 # Start containers at boot time.
365 systemd.services.all-containers =
366 { description = "All Containers";
367
368 wantedBy = [ "multi-user.target" ];
369
370 unitConfig.ConditionDirectoryNotEmpty = "/etc/containers";
371
372 serviceConfig.Type = "oneshot";
373
374 script =
375 ''
376 res=0
377 shopt -s nullglob
378 for i in /etc/containers/*.conf; do
379 AUTO_START=
380 source "$i"
381 if [ "$AUTO_START" = 1 ]; then
382 systemctl start "container@$(basename "$i" .conf).service" || res=1
383 fi
384 done
385 exit $res
386 ''; # */
387 };
388
389 };
390}