1# This module creates a virtual machine from the NixOS configuration.
2# Building the `config.system.build.vm' attribute gives you a command
3# that starts a KVM/QEMU VM running the NixOS configuration defined in
4# `config'. The Nix store is shared read-only with the host, which
5# makes (re)building VMs very efficient. However, it also means you
6# can't reconfigure the guest inside the guest - you need to rebuild
7# the VM in the host. On the other hand, the root filesystem is a
8# read/writable disk image persistent across VM reboots.
9
10{ config, lib, pkgs, ... }:
11
12with lib;
13with import ../../lib/qemu-flags.nix { inherit pkgs; };
14
15let
16
17 qemu = config.system.build.qemu or pkgs.qemu_test;
18
19 vmName =
20 if config.networking.hostName == ""
21 then "noname"
22 else config.networking.hostName;
23
24 cfg = config.virtualisation;
25
26 qemuGraphics = lib.optionalString (!cfg.graphics) "-nographic";
27
28 consoles = lib.concatMapStringsSep " " (c: "console=${c}") cfg.qemu.consoles;
29
30 # XXX: This is very ugly and in the future we really should use attribute
31 # sets to build ALL of the QEMU flags instead of this mixed mess of Nix
32 # expressions and shell script stuff.
33 mkDiskIfaceDriveFlag = idx: driveArgs: let
34 inherit (cfg.qemu) diskInterface;
35 # The drive identifier created by incrementing the index by one using the
36 # shell.
37 drvId = "drive$((${idx} + 1))";
38 # NOTE: DO NOT shell escape, because this may contain shell variables.
39 commonArgs = "index=${idx},id=${drvId},${driveArgs}";
40 isSCSI = diskInterface == "scsi";
41 devArgs = "${diskInterface}-hd,drive=${drvId}";
42 args = "-drive ${commonArgs},if=none -device lsi53c895a -device ${devArgs}";
43 in if isSCSI then args else "-drive ${commonArgs},if=${diskInterface}";
44
45 # Shell script to start the VM.
46 startVM =
47 ''
48 #! ${pkgs.runtimeShell}
49
50 NIX_DISK_IMAGE=$(readlink -f ''${NIX_DISK_IMAGE:-${config.virtualisation.diskImage}})
51
52 if ! test -e "$NIX_DISK_IMAGE"; then
53 ${qemu}/bin/qemu-img create -f qcow2 "$NIX_DISK_IMAGE" \
54 ${toString config.virtualisation.diskSize}M || exit 1
55 fi
56
57 # Create a directory for storing temporary data of the running VM.
58 if [ -z "$TMPDIR" -o -z "$USE_TMPDIR" ]; then
59 TMPDIR=$(mktemp -d nix-vm.XXXXXXXXXX --tmpdir)
60 fi
61
62 # Create a directory for exchanging data with the VM.
63 mkdir -p $TMPDIR/xchg
64
65 ${if cfg.useBootLoader then ''
66 # Create a writable copy/snapshot of the boot disk.
67 # A writable boot disk can be booted from automatically.
68 ${qemu}/bin/qemu-img create -f qcow2 -b ${bootDisk}/disk.img $TMPDIR/disk.img || exit 1
69
70 ${if cfg.useEFIBoot then ''
71 # VM needs a writable flash BIOS.
72 cp ${bootDisk}/bios.bin $TMPDIR || exit 1
73 chmod 0644 $TMPDIR/bios.bin || exit 1
74 '' else ''
75 ''}
76 '' else ''
77 ''}
78
79 cd $TMPDIR
80 idx=2
81 extraDisks=""
82 ${flip concatMapStrings cfg.emptyDiskImages (size: ''
83 if ! test -e "empty$idx.qcow2"; then
84 ${qemu}/bin/qemu-img create -f qcow2 "empty$idx.qcow2" "${toString size}M"
85 fi
86 extraDisks="$extraDisks ${mkDiskIfaceDriveFlag "$idx" "file=$(pwd)/empty$idx.qcow2,werror=report"}"
87 idx=$((idx + 1))
88 '')}
89
90 # Start QEMU.
91 exec ${qemuBinary qemu} \
92 -name ${vmName} \
93 -m ${toString config.virtualisation.memorySize} \
94 -smp ${toString config.virtualisation.cores} \
95 -device virtio-rng-pci \
96 ${concatStringsSep " " config.virtualisation.qemu.networkingOptions} \
97 -virtfs local,path=/nix/store,security_model=none,mount_tag=store \
98 -virtfs local,path=$TMPDIR/xchg,security_model=none,mount_tag=xchg \
99 -virtfs local,path=''${SHARED_DIR:-$TMPDIR/xchg},security_model=none,mount_tag=shared \
100 ${if cfg.useBootLoader then ''
101 ${mkDiskIfaceDriveFlag "0" "file=$NIX_DISK_IMAGE,cache=writeback,werror=report"} \
102 ${mkDiskIfaceDriveFlag "1" "file=$TMPDIR/disk.img,media=disk"} \
103 ${if cfg.useEFIBoot then ''
104 -pflash $TMPDIR/bios.bin \
105 '' else ''
106 ''}
107 '' else ''
108 ${mkDiskIfaceDriveFlag "0" "file=$NIX_DISK_IMAGE,cache=writeback,werror=report"} \
109 -kernel ${config.system.build.toplevel}/kernel \
110 -initrd ${config.system.build.toplevel}/initrd \
111 -append "$(cat ${config.system.build.toplevel}/kernel-params) init=${config.system.build.toplevel}/init regInfo=${regInfo}/registration ${consoles} $QEMU_KERNEL_PARAMS" \
112 ''} \
113 $extraDisks \
114 ${qemuGraphics} \
115 ${toString config.virtualisation.qemu.options} \
116 $QEMU_OPTS \
117 "$@"
118 '';
119
120
121 regInfo = pkgs.closureInfo { rootPaths = config.virtualisation.pathsInNixDB; };
122
123
124 # Generate a hard disk image containing a /boot partition and GRUB
125 # in the MBR. Used when the `useBootLoader' option is set.
126 # FIXME: use nixos/lib/make-disk-image.nix.
127 bootDisk =
128 pkgs.vmTools.runInLinuxVM (
129 pkgs.runCommand "nixos-boot-disk"
130 { preVM =
131 ''
132 mkdir $out
133 diskImage=$out/disk.img
134 bootFlash=$out/bios.bin
135 ${qemu}/bin/qemu-img create -f qcow2 $diskImage "40M"
136 ${if cfg.useEFIBoot then ''
137 cp ${pkgs.OVMF-CSM.fd}/FV/OVMF.fd $bootFlash
138 chmod 0644 $bootFlash
139 '' else ''
140 ''}
141 '';
142 buildInputs = [ pkgs.utillinux ];
143 QEMU_OPTS = if cfg.useEFIBoot
144 then "-pflash $out/bios.bin -nographic -serial pty"
145 else "-nographic -serial pty";
146 }
147 ''
148 # Create a /boot EFI partition with 40M and arbitrary but fixed GUIDs for reproducibility
149 ${pkgs.gptfdisk}/bin/sgdisk \
150 --set-alignment=1 --new=1:34:2047 --change-name=1:BIOSBootPartition --typecode=1:ef02 \
151 --set-alignment=512 --largest-new=2 --change-name=2:EFISystem --typecode=2:ef00 \
152 --attributes=1:set:1 \
153 --attributes=2:set:2 \
154 --disk-guid=97FD5997-D90B-4AA3-8D16-C1723AEA73C1 \
155 --partition-guid=1:1C06F03B-704E-4657-B9CD-681A087A2FDC \
156 --partition-guid=2:970C694F-AFD0-4B99-B750-CDB7A329AB6F \
157 --hybrid 2 \
158 --recompute-chs /dev/vda
159 ${pkgs.dosfstools}/bin/mkfs.fat -F16 /dev/vda2
160 export MTOOLS_SKIP_CHECK=1
161 ${pkgs.mtools}/bin/mlabel -i /dev/vda2 ::boot
162
163 # Mount /boot; load necessary modules first.
164 ${pkgs.kmod}/bin/insmod ${pkgs.linux}/lib/modules/*/kernel/fs/nls/nls_cp437.ko.xz || true
165 ${pkgs.kmod}/bin/insmod ${pkgs.linux}/lib/modules/*/kernel/fs/nls/nls_iso8859-1.ko.xz || true
166 ${pkgs.kmod}/bin/insmod ${pkgs.linux}/lib/modules/*/kernel/fs/fat/fat.ko.xz || true
167 ${pkgs.kmod}/bin/insmod ${pkgs.linux}/lib/modules/*/kernel/fs/fat/vfat.ko.xz || true
168 ${pkgs.kmod}/bin/insmod ${pkgs.linux}/lib/modules/*/kernel/fs/efivarfs/efivarfs.ko.xz || true
169 mkdir /boot
170 mount /dev/vda2 /boot
171
172 # This is needed for GRUB 0.97, which doesn't know about virtio devices.
173 mkdir /boot/grub
174 echo '(hd0) /dev/vda' > /boot/grub/device.map
175
176 # Install GRUB and generate the GRUB boot menu.
177 touch /etc/NIXOS
178 mkdir -p /nix/var/nix/profiles
179 ${config.system.build.toplevel}/bin/switch-to-configuration boot
180
181 umount /boot
182 '' # */
183 );
184
185in
186
187{
188 imports = [ ../profiles/qemu-guest.nix ];
189
190 options = {
191
192 virtualisation.memorySize =
193 mkOption {
194 default = 384;
195 description =
196 ''
197 Memory size (M) of virtual machine.
198 '';
199 };
200
201 virtualisation.diskSize =
202 mkOption {
203 default = 512;
204 description =
205 ''
206 Disk size (M) of virtual machine.
207 '';
208 };
209
210 virtualisation.diskImage =
211 mkOption {
212 default = "./${vmName}.qcow2";
213 description =
214 ''
215 Path to the disk image containing the root filesystem.
216 The image will be created on startup if it does not
217 exist.
218 '';
219 };
220
221 virtualisation.bootDevice =
222 mkOption {
223 type = types.str;
224 example = "/dev/vda";
225 description =
226 ''
227 The disk to be used for the root filesystem.
228 '';
229 };
230
231 virtualisation.emptyDiskImages =
232 mkOption {
233 default = [];
234 type = types.listOf types.int;
235 description =
236 ''
237 Additional disk images to provide to the VM. The value is
238 a list of size in megabytes of each disk. These disks are
239 writeable by the VM.
240 '';
241 };
242
243 virtualisation.graphics =
244 mkOption {
245 default = true;
246 description =
247 ''
248 Whether to run QEMU with a graphics window, or in nographic mode.
249 Serial console will be enabled on both settings, but this will
250 change the preferred console.
251 '';
252 };
253
254 virtualisation.cores =
255 mkOption {
256 default = 1;
257 type = types.int;
258 description =
259 ''
260 Specify the number of cores the guest is permitted to use.
261 The number can be higher than the available cores on the
262 host system.
263 '';
264 };
265
266 virtualisation.pathsInNixDB =
267 mkOption {
268 default = [];
269 description =
270 ''
271 The list of paths whose closure is registered in the Nix
272 database in the VM. All other paths in the host Nix store
273 appear in the guest Nix store as well, but are considered
274 garbage (because they are not registered in the Nix
275 database in the guest).
276 '';
277 };
278
279 virtualisation.vlans =
280 mkOption {
281 default = [ 1 ];
282 example = [ 1 2 ];
283 description =
284 ''
285 Virtual networks to which the VM is connected. Each
286 number <replaceable>N</replaceable> in this list causes
287 the VM to have a virtual Ethernet interface attached to a
288 separate virtual network on which it will be assigned IP
289 address
290 <literal>192.168.<replaceable>N</replaceable>.<replaceable>M</replaceable></literal>,
291 where <replaceable>M</replaceable> is the index of this VM
292 in the list of VMs.
293 '';
294 };
295
296 virtualisation.writableStore =
297 mkOption {
298 default = true; # FIXME
299 description =
300 ''
301 If enabled, the Nix store in the VM is made writable by
302 layering an overlay filesystem on top of the host's Nix
303 store.
304 '';
305 };
306
307 virtualisation.writableStoreUseTmpfs =
308 mkOption {
309 default = true;
310 description =
311 ''
312 Use a tmpfs for the writable store instead of writing to the VM's
313 own filesystem.
314 '';
315 };
316
317 networking.primaryIPAddress =
318 mkOption {
319 default = "";
320 internal = true;
321 description = "Primary IP address used in /etc/hosts.";
322 };
323
324 virtualisation.qemu = {
325 options =
326 mkOption {
327 type = types.listOf types.unspecified;
328 default = [];
329 example = [ "-vga std" ];
330 description = "Options passed to QEMU.";
331 };
332
333 consoles = mkOption {
334 type = types.listOf types.str;
335 default = let
336 consoles = [ "${qemuSerialDevice},115200n8" "tty0" ];
337 in if cfg.graphics then consoles else reverseList consoles;
338 example = [ "console=tty1" ];
339 description = ''
340 The output console devices to pass to the kernel command line via the
341 <literal>console</literal> parameter, the primary console is the last
342 item of this list.
343
344 By default it enables both serial console and
345 <literal>tty0</literal>. The preferred console (last one) is based on
346 the value of <option>virtualisation.graphics</option>.
347 '';
348 };
349
350 networkingOptions =
351 mkOption {
352 default = [
353 "-net nic,netdev=user.0,model=virtio"
354 "-netdev user,id=user.0\${QEMU_NET_OPTS:+,$QEMU_NET_OPTS}"
355 ];
356 type = types.listOf types.str;
357 description = ''
358 Networking-related command-line options that should be passed to qemu.
359 The default is to use userspace networking (slirp).
360
361 If you override this option, be advised to keep
362 ''${QEMU_NET_OPTS:+,$QEMU_NET_OPTS} (as seen in the default)
363 to keep the default runtime behaviour.
364 '';
365 };
366
367 diskInterface =
368 mkOption {
369 default = "virtio";
370 example = "scsi";
371 type = types.enum [ "virtio" "scsi" "ide" ];
372 description = "The interface used for the virtual hard disks.";
373 };
374
375 guestAgent.enable =
376 mkOption {
377 default = true;
378 type = types.bool;
379 description = ''
380 Enable the Qemu guest agent.
381 '';
382 };
383 };
384
385 virtualisation.useBootLoader =
386 mkOption {
387 default = false;
388 description =
389 ''
390 If enabled, the virtual machine will be booted using the
391 regular boot loader (i.e., GRUB 1 or 2). This allows
392 testing of the boot loader. If
393 disabled (the default), the VM directly boots the NixOS
394 kernel and initial ramdisk, bypassing the boot loader
395 altogether.
396 '';
397 };
398
399 virtualisation.useEFIBoot =
400 mkOption {
401 default = false;
402 description =
403 ''
404 If enabled, the virtual machine will provide a EFI boot
405 manager.
406 useEFIBoot is ignored if useBootLoader == false.
407 '';
408 };
409
410 };
411
412 config = {
413
414 boot.loader.grub.device = mkVMOverride cfg.bootDevice;
415
416 boot.initrd.extraUtilsCommands =
417 ''
418 # We need mke2fs in the initrd.
419 copy_bin_and_libs ${pkgs.e2fsprogs}/bin/mke2fs
420 '';
421
422 boot.initrd.postDeviceCommands =
423 ''
424 # If the disk image appears to be empty, run mke2fs to
425 # initialise.
426 FSTYPE=$(blkid -o value -s TYPE ${cfg.bootDevice} || true)
427 if test -z "$FSTYPE"; then
428 mke2fs -t ext4 ${cfg.bootDevice}
429 fi
430 '';
431
432 boot.initrd.postMountCommands =
433 ''
434 # Mark this as a NixOS machine.
435 mkdir -p $targetRoot/etc
436 echo -n > $targetRoot/etc/NIXOS
437
438 # Fix the permissions on /tmp.
439 chmod 1777 $targetRoot/tmp
440
441 mkdir -p $targetRoot/boot
442
443 ${optionalString cfg.writableStore ''
444 echo "mounting overlay filesystem on /nix/store..."
445 mkdir -p 0755 $targetRoot/nix/.rw-store/store $targetRoot/nix/.rw-store/work $targetRoot/nix/store
446 mount -t overlay overlay $targetRoot/nix/store \
447 -o lowerdir=$targetRoot/nix/.ro-store,upperdir=$targetRoot/nix/.rw-store/store,workdir=$targetRoot/nix/.rw-store/work || fail
448 ''}
449 '';
450
451 # After booting, register the closure of the paths in
452 # `virtualisation.pathsInNixDB' in the Nix database in the VM. This
453 # allows Nix operations to work in the VM. The path to the
454 # registration file is passed through the kernel command line to
455 # allow `system.build.toplevel' to be included. (If we had a direct
456 # reference to ${regInfo} here, then we would get a cyclic
457 # dependency.)
458 boot.postBootCommands =
459 ''
460 if [[ "$(cat /proc/cmdline)" =~ regInfo=([^ ]*) ]]; then
461 ${config.nix.package.out}/bin/nix-store --load-db < ''${BASH_REMATCH[1]}
462 fi
463 '';
464
465 boot.initrd.availableKernelModules =
466 optional cfg.writableStore "overlay"
467 ++ optional (cfg.qemu.diskInterface == "scsi") "sym53c8xx";
468
469 virtualisation.bootDevice =
470 mkDefault (if cfg.qemu.diskInterface == "scsi" then "/dev/sda" else "/dev/vda");
471
472 virtualisation.pathsInNixDB = [ config.system.build.toplevel ];
473
474 # FIXME: Consolidate this one day.
475 virtualisation.qemu.options = mkMerge [
476 (mkIf (pkgs.stdenv.isi686 || pkgs.stdenv.isx86_64) [ "-vga std" "-usb" "-device usb-tablet,bus=usb-bus.0" ])
477 (mkIf (pkgs.stdenv.isAarch32 || pkgs.stdenv.isAarch64) [ "-device virtio-gpu-pci" "-device usb-ehci,id=usb0" "-device usb-kbd" "-device usb-tablet" ])
478 ];
479
480 # Mount the host filesystem via 9P, and bind-mount the Nix store
481 # of the host into our own filesystem. We use mkVMOverride to
482 # allow this module to be applied to "normal" NixOS system
483 # configuration, where the regular value for the `fileSystems'
484 # attribute should be disregarded for the purpose of building a VM
485 # test image (since those filesystems don't exist in the VM).
486 fileSystems = mkVMOverride (
487 { "/".device = cfg.bootDevice;
488 ${if cfg.writableStore then "/nix/.ro-store" else "/nix/store"} =
489 { device = "store";
490 fsType = "9p";
491 options = [ "trans=virtio" "version=9p2000.L" "cache=loose" ];
492 neededForBoot = true;
493 };
494 "/tmp" = mkIf config.boot.tmpOnTmpfs
495 { device = "tmpfs";
496 fsType = "tmpfs";
497 neededForBoot = true;
498 # Sync with systemd's tmp.mount;
499 options = [ "mode=1777" "strictatime" "nosuid" "nodev" ];
500 };
501 "/tmp/xchg" =
502 { device = "xchg";
503 fsType = "9p";
504 options = [ "trans=virtio" "version=9p2000.L" "cache=loose" ];
505 neededForBoot = true;
506 };
507 "/tmp/shared" =
508 { device = "shared";
509 fsType = "9p";
510 options = [ "trans=virtio" "version=9p2000.L" ];
511 neededForBoot = true;
512 };
513 } // optionalAttrs (cfg.writableStore && cfg.writableStoreUseTmpfs)
514 { "/nix/.rw-store" =
515 { fsType = "tmpfs";
516 options = [ "mode=0755" ];
517 neededForBoot = true;
518 };
519 } // optionalAttrs cfg.useBootLoader
520 { "/boot" =
521 { device = "/dev/vdb2";
522 fsType = "vfat";
523 options = [ "ro" ];
524 noCheck = true; # fsck fails on a r/o filesystem
525 };
526 });
527
528 swapDevices = mkVMOverride [ ];
529 boot.initrd.luks.devices = mkVMOverride {};
530
531 # Don't run ntpd in the guest. It should get the correct time from KVM.
532 services.timesyncd.enable = false;
533
534 services.qemuGuest.enable = cfg.qemu.guestAgent.enable;
535
536 system.build.vm = pkgs.runCommand "nixos-vm" { preferLocalBuild = true; }
537 ''
538 mkdir -p $out/bin
539 ln -s ${config.system.build.toplevel} $out/system
540 ln -s ${pkgs.writeScript "run-nixos-vm" startVM} $out/bin/run-${vmName}-vm
541 '';
542
543 # When building a regular system configuration, override whatever
544 # video driver the host uses.
545 services.xserver.videoDrivers = mkVMOverride [ "modesetting" ];
546 services.xserver.defaultDepth = mkVMOverride 0;
547 services.xserver.resolutions = mkVMOverride [ { x = 1024; y = 768; } ];
548 services.xserver.monitorSection =
549 ''
550 # Set a higher refresh rate so that resolutions > 800x600 work.
551 HorizSync 30-140
552 VertRefresh 50-160
553 '';
554
555 # Wireless won't work in the VM.
556 networking.wireless.enable = mkVMOverride false;
557 networking.connman.enable = mkVMOverride false;
558
559 # Speed up booting by not waiting for ARP.
560 networking.dhcpcd.extraConfig = "noarp";
561
562 networking.usePredictableInterfaceNames = false;
563
564 system.requiredKernelConfig = with config.lib.kernelConfig;
565 [ (isEnabled "VIRTIO_BLK")
566 (isEnabled "VIRTIO_PCI")
567 (isEnabled "VIRTIO_NET")
568 (isEnabled "EXT4_FS")
569 (isYes "BLK_DEV")
570 (isYes "PCI")
571 (isYes "EXPERIMENTAL")
572 (isYes "NETDEVICES")
573 (isYes "NET_CORE")
574 (isYes "INET")
575 (isYes "NETWORK_FILESYSTEMS")
576 ] ++ optional (!cfg.graphics) [
577 (isYes "SERIAL_8250_CONSOLE")
578 (isYes "SERIAL_8250")
579 ];
580
581 };
582}