1{ 2 config, 3 lib, 4 pkgs, 5 ... 6}: 7 8{ 9 imports = [ 10 (lib.mkRenamedOptionModule 11 [ "virtualisation" "containers" "cdi" "dynamic" "nvidia" "enable" ] 12 [ "hardware" "nvidia-container-toolkit" "enable" ] 13 ) 14 ]; 15 16 options = 17 let 18 mountType = { 19 options = { 20 hostPath = lib.mkOption { 21 type = lib.types.str; 22 description = "Host path."; 23 }; 24 containerPath = lib.mkOption { 25 type = lib.types.str; 26 description = "Container path."; 27 }; 28 mountOptions = lib.mkOption { 29 default = [ 30 "ro" 31 "nosuid" 32 "nodev" 33 "bind" 34 ]; 35 type = lib.types.listOf lib.types.str; 36 description = "Mount options."; 37 }; 38 }; 39 }; 40 in 41 { 42 43 hardware.nvidia-container-toolkit = { 44 enable = lib.mkOption { 45 default = false; 46 type = lib.types.bool; 47 description = '' 48 Enable dynamic CDI configuration for Nvidia devices by running 49 nvidia-container-toolkit on boot. 50 ''; 51 }; 52 53 suppressNvidiaDriverAssertion = lib.mkOption { 54 default = false; 55 type = lib.types.bool; 56 description = '' 57 Suppress the assertion for installing Nvidia driver. 58 Useful in WSL where drivers are mounted from Windows, not provided by NixOS. 59 ''; 60 }; 61 62 mounts = lib.mkOption { 63 type = lib.types.listOf (lib.types.submodule mountType); 64 default = [ ]; 65 description = "Mounts to be added to every container under the Nvidia CDI profile."; 66 }; 67 68 mount-nvidia-executables = lib.mkOption { 69 default = true; 70 type = lib.types.bool; 71 description = '' 72 Mount executables nvidia-smi, nvidia-cuda-mps-control, nvidia-cuda-mps-server, 73 nvidia-debugdump, nvidia-powerd and nvidia-ctk on containers. 74 ''; 75 }; 76 77 device-name-strategy = lib.mkOption { 78 default = "index"; 79 type = lib.types.enum [ 80 "index" 81 "uuid" 82 "type-index" 83 ]; 84 description = '' 85 Specify the strategy for generating device names, 86 passed to `nvidia-ctk cdi generate`. This will affect how 87 you reference the device using `nvidia.com/gpu=` in 88 the container runtime. 89 ''; 90 }; 91 92 mount-nvidia-docker-1-directories = lib.mkOption { 93 default = true; 94 type = lib.types.bool; 95 description = '' 96 Mount nvidia-docker-1 directories on containers: /usr/local/nvidia/lib and 97 /usr/local/nvidia/lib64. 98 ''; 99 }; 100 101 package = lib.mkPackageOption pkgs "nvidia-container-toolkit" { }; 102 }; 103 104 }; 105 106 config = lib.mkIf config.hardware.nvidia-container-toolkit.enable { 107 assertions = [ 108 { 109 assertion = 110 config.hardware.nvidia.datacenter.enable 111 || lib.elem "nvidia" config.services.xserver.videoDrivers 112 || config.hardware.nvidia-container-toolkit.suppressNvidiaDriverAssertion; 113 message = ''`nvidia-container-toolkit` requires nvidia drivers: set `hardware.nvidia.datacenter.enable`, add "nvidia" to `services.xserver.videoDrivers`, or set `hardware.nvidia-container-toolkit.suppressNvidiaDriverAssertion` if the driver is provided by another NixOS module (e.g. from NixOS-WSL)''; 114 } 115 ]; 116 117 virtualisation.docker = { 118 daemon.settings = lib.mkIf (lib.versionAtLeast config.virtualisation.docker.package.version "25") { 119 features.cdi = true; 120 }; 121 122 rootless.daemon.settings = 123 lib.mkIf 124 ( 125 config.virtualisation.docker.rootless.enable 126 && (lib.versionAtLeast config.virtualisation.docker.package.version "25") 127 ) 128 { 129 features.cdi = true; 130 }; 131 }; 132 133 hardware = { 134 graphics.enable = lib.mkIf (!config.hardware.nvidia.datacenter.enable) true; 135 136 nvidia-container-toolkit.mounts = 137 let 138 nvidia-driver = config.hardware.nvidia.package; 139 in 140 (lib.mkMerge [ 141 [ 142 { 143 hostPath = pkgs.addDriverRunpath.driverLink; 144 containerPath = pkgs.addDriverRunpath.driverLink; 145 } 146 { 147 hostPath = "${lib.getLib nvidia-driver}/etc"; 148 containerPath = "${lib.getLib nvidia-driver}/etc"; 149 } 150 { 151 hostPath = "${lib.getLib nvidia-driver}/share"; 152 containerPath = "${lib.getLib nvidia-driver}/share"; 153 } 154 { 155 hostPath = "${lib.getLib pkgs.glibc}/lib"; 156 containerPath = "${lib.getLib pkgs.glibc}/lib"; 157 } 158 { 159 hostPath = "${lib.getLib pkgs.glibc}/lib64"; 160 containerPath = "${lib.getLib pkgs.glibc}/lib64"; 161 } 162 ] 163 (lib.mkIf config.hardware.nvidia-container-toolkit.mount-nvidia-executables [ 164 { 165 hostPath = lib.getExe' nvidia-driver "nvidia-cuda-mps-control"; 166 containerPath = "/usr/bin/nvidia-cuda-mps-control"; 167 } 168 { 169 hostPath = lib.getExe' nvidia-driver "nvidia-cuda-mps-server"; 170 containerPath = "/usr/bin/nvidia-cuda-mps-server"; 171 } 172 { 173 hostPath = lib.getExe' nvidia-driver "nvidia-debugdump"; 174 containerPath = "/usr/bin/nvidia-debugdump"; 175 } 176 { 177 hostPath = lib.getExe' nvidia-driver "nvidia-powerd"; 178 containerPath = "/usr/bin/nvidia-powerd"; 179 } 180 { 181 hostPath = lib.getExe' nvidia-driver "nvidia-smi"; 182 containerPath = "/usr/bin/nvidia-smi"; 183 } 184 ]) 185 # nvidia-docker 1.0 uses /usr/local/nvidia/lib{,64} 186 # e.g. 187 # - https://gitlab.com/nvidia/container-images/cuda/-/blob/e3ff10eab3a1424fe394899df0e0f8ca5a410f0f/dist/12.3.1/ubi9/base/Dockerfile#L44 188 # - https://github.com/NVIDIA/nvidia-docker/blob/01d2c9436620d7dde4672e414698afe6da4a282f/src/nvidia/volumes.go#L104-L173 189 (lib.mkIf config.hardware.nvidia-container-toolkit.mount-nvidia-docker-1-directories [ 190 { 191 hostPath = "${lib.getLib nvidia-driver}/lib"; 192 containerPath = "/usr/local/nvidia/lib"; 193 } 194 { 195 hostPath = "${lib.getLib nvidia-driver}/lib"; 196 containerPath = "/usr/local/nvidia/lib64"; 197 } 198 ]) 199 ]); 200 }; 201 202 systemd.services.nvidia-container-toolkit-cdi-generator = { 203 description = "Container Device Interface (CDI) for Nvidia generator"; 204 wantedBy = [ "multi-user.target" ]; 205 after = [ "systemd-udev-settle.service" ]; 206 serviceConfig = { 207 RuntimeDirectory = "cdi"; 208 RemainAfterExit = true; 209 ExecStart = 210 let 211 script = pkgs.callPackage ./cdi-generate.nix { 212 inherit (config.hardware.nvidia-container-toolkit) mounts; 213 nvidia-container-toolkit = config.hardware.nvidia-container-toolkit.package; 214 nvidia-driver = config.hardware.nvidia.package; 215 deviceNameStrategy = config.hardware.nvidia-container-toolkit.device-name-strategy; 216 }; 217 in 218 lib.getExe script; 219 Type = "oneshot"; 220 }; 221 }; 222 223 }; 224 225}