at 24.11-pre 5.0 kB view raw
1{ config, lib, ... }: 2 3with lib; 4 5{ 6 meta = { 7 maintainers = [ maintainers.joachifm ]; 8 }; 9 10 imports = [ 11 (lib.mkRenamedOptionModule [ "security" "virtualization" "flushL1DataCache" ] [ "security" "virtualisation" "flushL1DataCache" ]) 12 ]; 13 14 options = { 15 security.allowUserNamespaces = mkOption { 16 type = types.bool; 17 default = true; 18 description = '' 19 Whether to allow creation of user namespaces. 20 21 The motivation for disabling user namespaces is the potential 22 presence of code paths where the kernel's permission checking 23 logic fails to account for namespacing, instead permitting a 24 namespaced process to act outside the namespace with the same 25 privileges as it would have inside it. This is particularly 26 damaging in the common case of running as root within the namespace. 27 28 When user namespace creation is disallowed, attempting to create a 29 user namespace fails with "no space left on device" (ENOSPC). 30 root may re-enable user namespace creation at runtime. 31 ''; 32 }; 33 34 security.unprivilegedUsernsClone = mkOption { 35 type = types.bool; 36 default = false; 37 description = '' 38 When disabled, unprivileged users will not be able to create new namespaces. 39 By default unprivileged user namespaces are disabled. 40 This option only works in a hardened profile. 41 ''; 42 }; 43 44 security.protectKernelImage = mkOption { 45 type = types.bool; 46 default = false; 47 description = '' 48 Whether to prevent replacing the running kernel image. 49 ''; 50 }; 51 52 security.allowSimultaneousMultithreading = mkOption { 53 type = types.bool; 54 default = true; 55 description = '' 56 Whether to allow SMT/hyperthreading. Disabling SMT means that only 57 physical CPU cores will be usable at runtime, potentially at 58 significant performance cost. 59 60 The primary motivation for disabling SMT is to mitigate the risk of 61 leaking data between threads running on the same CPU core (due to 62 e.g., shared caches). This attack vector is unproven. 63 64 Disabling SMT is a supplement to the L1 data cache flushing mitigation 65 (see [](#opt-security.virtualisation.flushL1DataCache)) 66 versus malicious VM guests (SMT could "bring back" previously flushed 67 data). 68 ''; 69 }; 70 71 security.forcePageTableIsolation = mkOption { 72 type = types.bool; 73 default = false; 74 description = '' 75 Whether to force-enable the Page Table Isolation (PTI) Linux kernel 76 feature even on CPU models that claim to be safe from Meltdown. 77 78 This hardening feature is most beneficial to systems that run untrusted 79 workloads that rely on address space isolation for security. 80 ''; 81 }; 82 83 security.virtualisation.flushL1DataCache = mkOption { 84 type = types.nullOr (types.enum [ "never" "cond" "always" ]); 85 default = null; 86 description = '' 87 Whether the hypervisor should flush the L1 data cache before 88 entering guests. 89 See also [](#opt-security.allowSimultaneousMultithreading). 90 91 - `null`: uses the kernel default 92 - `"never"`: disables L1 data cache flushing entirely. 93 May be appropriate if all guests are trusted. 94 - `"cond"`: flushes L1 data cache only for pre-determined 95 code paths. May leak information about the host address space 96 layout. 97 - `"always"`: flushes L1 data cache every time the hypervisor 98 enters the guest. May incur significant performance cost. 99 ''; 100 }; 101 }; 102 103 config = mkMerge [ 104 (mkIf (!config.security.allowUserNamespaces) { 105 # Setting the number of allowed user namespaces to 0 effectively disables 106 # the feature at runtime. Note that root may raise the limit again 107 # at any time. 108 boot.kernel.sysctl."user.max_user_namespaces" = 0; 109 110 assertions = [ 111 { assertion = config.nix.settings.sandbox -> config.security.allowUserNamespaces; 112 message = "`nix.settings.sandbox = true` conflicts with `!security.allowUserNamespaces`."; 113 } 114 ]; 115 }) 116 117 (mkIf config.security.unprivilegedUsernsClone { 118 boot.kernel.sysctl."kernel.unprivileged_userns_clone" = mkDefault true; 119 }) 120 121 (mkIf config.security.protectKernelImage { 122 # Disable hibernation (allows replacing the running kernel) 123 boot.kernelParams = [ "nohibernate" ]; 124 # Prevent replacing the running kernel image w/o reboot 125 boot.kernel.sysctl."kernel.kexec_load_disabled" = mkDefault true; 126 }) 127 128 (mkIf (!config.security.allowSimultaneousMultithreading) { 129 boot.kernelParams = [ "nosmt" ]; 130 }) 131 132 (mkIf config.security.forcePageTableIsolation { 133 boot.kernelParams = [ "pti=on" ]; 134 }) 135 136 (mkIf (config.security.virtualisation.flushL1DataCache != null) { 137 boot.kernelParams = [ "kvm-intel.vmentry_l1d_flush=${config.security.virtualisation.flushL1DataCache}" ]; 138 }) 139 ]; 140}