1{
2 config,
3 lib,
4 pkgs,
5 ...
6}:
7
8{
9 imports = [
10 (lib.mkRenamedOptionModule
11 [ "virtualisation" "containers" "cdi" "dynamic" "nvidia" "enable" ]
12 [ "hardware" "nvidia-container-toolkit" "enable" ]
13 )
14 ];
15
16 options =
17 let
18 mountType = {
19 options = {
20 hostPath = lib.mkOption {
21 type = lib.types.str;
22 description = "Host path.";
23 };
24 containerPath = lib.mkOption {
25 type = lib.types.str;
26 description = "Container path.";
27 };
28 mountOptions = lib.mkOption {
29 default = [
30 "ro"
31 "nosuid"
32 "nodev"
33 "bind"
34 ];
35 type = lib.types.listOf lib.types.str;
36 description = "Mount options.";
37 };
38 };
39 };
40 in
41 {
42
43 hardware.nvidia-container-toolkit = {
44 enable = lib.mkOption {
45 default = false;
46 type = lib.types.bool;
47 description = ''
48 Enable dynamic CDI configuration for Nvidia devices by running
49 nvidia-container-toolkit on boot.
50 '';
51 };
52
53 suppressNvidiaDriverAssertion = lib.mkOption {
54 default = false;
55 type = lib.types.bool;
56 description = ''
57 Suppress the assertion for installing Nvidia driver.
58 Useful in WSL where drivers are mounted from Windows, not provided by NixOS.
59 '';
60 };
61
62 mounts = lib.mkOption {
63 type = lib.types.listOf (lib.types.submodule mountType);
64 default = [ ];
65 description = "Mounts to be added to every container under the Nvidia CDI profile.";
66 };
67
68 mount-nvidia-executables = lib.mkOption {
69 default = true;
70 type = lib.types.bool;
71 description = ''
72 Mount executables nvidia-smi, nvidia-cuda-mps-control, nvidia-cuda-mps-server,
73 nvidia-debugdump, nvidia-powerd and nvidia-ctk on containers.
74 '';
75 };
76
77 device-name-strategy = lib.mkOption {
78 default = "index";
79 type = lib.types.enum [
80 "index"
81 "uuid"
82 "type-index"
83 ];
84 description = ''
85 Specify the strategy for generating device names,
86 passed to `nvidia-ctk cdi generate`. This will affect how
87 you reference the device using `nvidia.com/gpu=` in
88 the container runtime.
89 '';
90 };
91
92 mount-nvidia-docker-1-directories = lib.mkOption {
93 default = true;
94 type = lib.types.bool;
95 description = ''
96 Mount nvidia-docker-1 directories on containers: /usr/local/nvidia/lib and
97 /usr/local/nvidia/lib64.
98 '';
99 };
100
101 package = lib.mkPackageOption pkgs "nvidia-container-toolkit" { };
102 };
103
104 };
105
106 config = lib.mkIf config.hardware.nvidia-container-toolkit.enable {
107 assertions = [
108 {
109 assertion =
110 config.hardware.nvidia.datacenter.enable
111 || lib.elem "nvidia" config.services.xserver.videoDrivers
112 || config.hardware.nvidia-container-toolkit.suppressNvidiaDriverAssertion;
113 message = ''`nvidia-container-toolkit` requires nvidia drivers: set `hardware.nvidia.datacenter.enable`, add "nvidia" to `services.xserver.videoDrivers`, or set `hardware.nvidia-container-toolkit.suppressNvidiaDriverAssertion` if the driver is provided by another NixOS module (e.g. from NixOS-WSL)'';
114 }
115 ];
116
117 virtualisation.docker = {
118 daemon.settings = lib.mkIf (lib.versionAtLeast config.virtualisation.docker.package.version "25") {
119 features.cdi = true;
120 };
121
122 rootless.daemon.settings =
123 lib.mkIf
124 (
125 config.virtualisation.docker.rootless.enable
126 && (lib.versionAtLeast config.virtualisation.docker.package.version "25")
127 )
128 {
129 features.cdi = true;
130 };
131 };
132
133 hardware = {
134 graphics.enable = lib.mkIf (!config.hardware.nvidia.datacenter.enable) true;
135
136 nvidia-container-toolkit.mounts =
137 let
138 nvidia-driver = config.hardware.nvidia.package;
139 in
140 (lib.mkMerge [
141 [
142 {
143 hostPath = pkgs.addDriverRunpath.driverLink;
144 containerPath = pkgs.addDriverRunpath.driverLink;
145 }
146 {
147 hostPath = "${lib.getLib nvidia-driver}/etc";
148 containerPath = "${lib.getLib nvidia-driver}/etc";
149 }
150 {
151 hostPath = "${lib.getLib nvidia-driver}/share";
152 containerPath = "${lib.getLib nvidia-driver}/share";
153 }
154 {
155 hostPath = "${lib.getLib pkgs.glibc}/lib";
156 containerPath = "${lib.getLib pkgs.glibc}/lib";
157 }
158 {
159 hostPath = "${lib.getLib pkgs.glibc}/lib64";
160 containerPath = "${lib.getLib pkgs.glibc}/lib64";
161 }
162 ]
163 (lib.mkIf config.hardware.nvidia-container-toolkit.mount-nvidia-executables [
164 {
165 hostPath = lib.getExe' nvidia-driver "nvidia-cuda-mps-control";
166 containerPath = "/usr/bin/nvidia-cuda-mps-control";
167 }
168 {
169 hostPath = lib.getExe' nvidia-driver "nvidia-cuda-mps-server";
170 containerPath = "/usr/bin/nvidia-cuda-mps-server";
171 }
172 {
173 hostPath = lib.getExe' nvidia-driver "nvidia-debugdump";
174 containerPath = "/usr/bin/nvidia-debugdump";
175 }
176 {
177 hostPath = lib.getExe' nvidia-driver "nvidia-powerd";
178 containerPath = "/usr/bin/nvidia-powerd";
179 }
180 {
181 hostPath = lib.getExe' nvidia-driver "nvidia-smi";
182 containerPath = "/usr/bin/nvidia-smi";
183 }
184 ])
185 # nvidia-docker 1.0 uses /usr/local/nvidia/lib{,64}
186 # e.g.
187 # - https://gitlab.com/nvidia/container-images/cuda/-/blob/e3ff10eab3a1424fe394899df0e0f8ca5a410f0f/dist/12.3.1/ubi9/base/Dockerfile#L44
188 # - https://github.com/NVIDIA/nvidia-docker/blob/01d2c9436620d7dde4672e414698afe6da4a282f/src/nvidia/volumes.go#L104-L173
189 (lib.mkIf config.hardware.nvidia-container-toolkit.mount-nvidia-docker-1-directories [
190 {
191 hostPath = "${lib.getLib nvidia-driver}/lib";
192 containerPath = "/usr/local/nvidia/lib";
193 }
194 {
195 hostPath = "${lib.getLib nvidia-driver}/lib";
196 containerPath = "/usr/local/nvidia/lib64";
197 }
198 ])
199 ]);
200 };
201
202 systemd.services.nvidia-container-toolkit-cdi-generator = {
203 description = "Container Device Interface (CDI) for Nvidia generator";
204 wantedBy = [ "multi-user.target" ];
205 after = [ "systemd-udev-settle.service" ];
206 serviceConfig = {
207 RuntimeDirectory = "cdi";
208 RemainAfterExit = true;
209 ExecStart =
210 let
211 script = pkgs.callPackage ./cdi-generate.nix {
212 inherit (config.hardware.nvidia-container-toolkit) mounts;
213 nvidia-container-toolkit = config.hardware.nvidia-container-toolkit.package;
214 nvidia-driver = config.hardware.nvidia.package;
215 deviceNameStrategy = config.hardware.nvidia-container-toolkit.device-name-strategy;
216 };
217 in
218 lib.getExe script;
219 Type = "oneshot";
220 };
221 };
222
223 };
224
225}