1{
2 config,
3 lib,
4 options,
5 pkgs,
6 ...
7}:
8let
9
10 cfg = config.services.slurm;
11 opt = options.services.slurm;
12 # configuration file can be generated by https://slurm.schedmd.com/configurator.html
13
14 defaultUser = "slurm";
15
16 configFile = pkgs.writeTextDir "slurm.conf" ''
17 ClusterName=${cfg.clusterName}
18 StateSaveLocation=${cfg.stateSaveLocation}
19 SlurmUser=${cfg.user}
20 ${lib.optionalString (cfg.controlMachine != null) "controlMachine=${cfg.controlMachine}"}
21 ${lib.optionalString (cfg.controlAddr != null) "controlAddr=${cfg.controlAddr}"}
22 ${toString (map (x: "NodeName=${x}\n") cfg.nodeName)}
23 ${toString (map (x: "PartitionName=${x}\n") cfg.partitionName)}
24 PlugStackConfig=${plugStackConfig}/plugstack.conf
25 ProctrackType=${cfg.procTrackType}
26 ${cfg.extraConfig}
27 '';
28
29 plugStackConfig = pkgs.writeTextDir "plugstack.conf" ''
30 ${lib.optionalString cfg.enableSrunX11 "optional ${pkgs.slurm-spank-x11}/lib/x11.so"}
31 ${cfg.extraPlugstackConfig}
32 '';
33
34 cgroupConfig = pkgs.writeTextDir "cgroup.conf" ''
35 ${cfg.extraCgroupConfig}
36 '';
37
38 mpiConf = pkgs.writeTextDir "mpi.conf" ''
39 PMIxCliTmpDirBase=${cfg.mpi.PmixCliTmpDirBase}
40 ${cfg.mpi.extraMpiConfig}
41 '';
42
43 slurmdbdConf = pkgs.writeText "slurmdbd.conf" ''
44 DbdHost=${cfg.dbdserver.dbdHost}
45 SlurmUser=${cfg.user}
46 StorageType=accounting_storage/mysql
47 StorageUser=${cfg.dbdserver.storageUser}
48 ${cfg.dbdserver.extraConfig}
49 '';
50
51 # slurm expects some additional config files to be
52 # in the same directory as slurm.conf
53 etcSlurm = pkgs.symlinkJoin {
54 name = "etc-slurm";
55 paths = [
56 configFile
57 cgroupConfig
58 plugStackConfig
59 mpiConf
60 ] ++ cfg.extraConfigPaths;
61 };
62in
63
64{
65
66 ###### interface
67
68 meta.maintainers = [ lib.maintainers.markuskowa ];
69
70 options = {
71
72 services.slurm = {
73
74 server = {
75 enable = lib.mkOption {
76 type = lib.types.bool;
77 default = false;
78 description = ''
79 Whether to enable the slurm control daemon.
80 Note that the standard authentication method is "munge".
81 The "munge" service needs to be provided with a password file in order for
82 slurm to work properly (see `services.munge.password`).
83 '';
84 };
85 };
86
87 dbdserver = {
88 enable = lib.mkEnableOption "SlurmDBD service";
89
90 dbdHost = lib.mkOption {
91 type = lib.types.str;
92 default = config.networking.hostName;
93 defaultText = lib.literalExpression "config.networking.hostName";
94 description = ''
95 Hostname of the machine where `slurmdbd`
96 is running (i.e. name returned by `hostname -s`).
97 '';
98 };
99
100 storageUser = lib.mkOption {
101 type = lib.types.str;
102 default = cfg.user;
103 defaultText = lib.literalExpression "config.${opt.user}";
104 description = ''
105 Database user name.
106 '';
107 };
108
109 storagePassFile = lib.mkOption {
110 type = with lib.types; nullOr str;
111 default = null;
112 description = ''
113 Path to file with database password. The content of this will be used to
114 create the password for the `StoragePass` option.
115 '';
116 };
117
118 extraConfig = lib.mkOption {
119 type = lib.types.lines;
120 default = "";
121 description = ''
122 Extra configuration for `slurmdbd.conf` See also:
123 {manpage}`slurmdbd.conf(8)`.
124 '';
125 };
126 };
127
128 client = {
129 enable = lib.mkEnableOption "slurm client daemon";
130 };
131
132 enableStools = lib.mkOption {
133 type = lib.types.bool;
134 default = false;
135 description = ''
136 Whether to provide a slurm.conf file.
137 Enable this option if you do not run a slurm daemon on this host
138 (i.e. `server.enable` and `client.enable` are `false`)
139 but you still want to run slurm commands from this host.
140 '';
141 };
142
143 package =
144 lib.mkPackageOption pkgs "slurm" {
145 example = "slurm-full";
146 }
147 // {
148 default = pkgs.slurm.override { enableX11 = !cfg.enableSrunX11; };
149 };
150
151 controlMachine = lib.mkOption {
152 type = lib.types.nullOr lib.types.str;
153 default = null;
154 example = null;
155 description = ''
156 The short hostname of the machine where SLURM control functions are
157 executed (i.e. the name returned by the command "hostname -s", use "tux001"
158 rather than "tux001.my.com").
159 '';
160 };
161
162 controlAddr = lib.mkOption {
163 type = lib.types.nullOr lib.types.str;
164 default = cfg.controlMachine;
165 defaultText = lib.literalExpression "config.${opt.controlMachine}";
166 example = null;
167 description = ''
168 Name that ControlMachine should be referred to in establishing a
169 communications path.
170 '';
171 };
172
173 clusterName = lib.mkOption {
174 type = lib.types.str;
175 default = "default";
176 example = "myCluster";
177 description = ''
178 Necessary to distinguish accounting records in a multi-cluster environment.
179 '';
180 };
181
182 nodeName = lib.mkOption {
183 type = lib.types.listOf lib.types.str;
184 default = [ ];
185 example = lib.literalExpression ''[ "linux[1-32] CPUs=1 State=UNKNOWN" ];'';
186 description = ''
187 Name that SLURM uses to refer to a node (or base partition for BlueGene
188 systems). Typically this would be the string that "/bin/hostname -s"
189 returns. Note that now you have to write node's parameters after the name.
190 '';
191 };
192
193 partitionName = lib.mkOption {
194 type = lib.types.listOf lib.types.str;
195 default = [ ];
196 example = lib.literalExpression ''[ "debug Nodes=linux[1-32] Default=YES MaxTime=INFINITE State=UP" ];'';
197 description = ''
198 Name by which the partition may be referenced. Note that now you have
199 to write the partition's parameters after the name.
200 '';
201 };
202
203 enableSrunX11 = lib.mkOption {
204 default = false;
205 type = lib.types.bool;
206 description = ''
207 If enabled srun will accept the option "--x11" to allow for X11 forwarding
208 from within an interactive session or a batch job. This activates the
209 slurm-spank-x11 module. Note that this option also enables
210 {option}`services.openssh.forwardX11` on the client.
211
212 This option requires slurm to be compiled without native X11 support.
213 The default behavior is to re-compile the slurm package with native X11
214 support disabled if this option is set to true.
215
216 To use the native X11 support add `PrologFlags=X11` in {option}`extraConfig`.
217 Note that this method will only work RSA SSH host keys.
218 '';
219 };
220
221 procTrackType = lib.mkOption {
222 type = lib.types.str;
223 default = "proctrack/linuxproc";
224 description = ''
225 Plugin to be used for process tracking on a job step basis.
226 The slurmd daemon uses this mechanism to identify all processes
227 which are children of processes it spawns for a user job step.
228 '';
229 };
230
231 stateSaveLocation = lib.mkOption {
232 type = lib.types.str;
233 default = "/var/spool/slurmctld";
234 description = ''
235 Directory into which the Slurm controller, slurmctld, saves its state.
236 '';
237 };
238
239 user = lib.mkOption {
240 type = lib.types.str;
241 default = defaultUser;
242 description = ''
243 Set this option when you want to run the slurmctld daemon
244 as something else than the default slurm user "slurm".
245 Note that the UID of this user needs to be the same
246 on all nodes.
247 '';
248 };
249
250 extraConfig = lib.mkOption {
251 default = "";
252 type = lib.types.lines;
253 description = ''
254 Extra configuration options that will be added verbatim at
255 the end of the slurm configuration file.
256 '';
257 };
258
259 mpi = {
260 PmixCliTmpDirBase = lib.mkOption {
261 default = "/tmp/pmix";
262 type = lib.types.str;
263 description = ''
264 Base path for PMIx temporary files.
265 '';
266 };
267
268 extraMpiConfig = lib.mkOption {
269 default = "";
270 type = lib.types.lines;
271 description = ''
272 Extra configuration for that will be added to `mpi.conf`.
273 '';
274 };
275 };
276
277 extraPlugstackConfig = lib.mkOption {
278 default = "";
279 type = lib.types.lines;
280 description = ''
281 Extra configuration that will be added to the end of `plugstack.conf`.
282 '';
283 };
284
285 extraCgroupConfig = lib.mkOption {
286 default = "";
287 type = lib.types.lines;
288 description = ''
289 Extra configuration for `cgroup.conf`. This file is
290 used when `procTrackType=proctrack/cgroup`.
291 '';
292 };
293
294 extraConfigPaths = lib.mkOption {
295 type = with lib.types; listOf path;
296 default = [ ];
297 description = ''
298 Slurm expects config files for plugins in the same path
299 as `slurm.conf`. Add extra nix store
300 paths that should be merged into same directory as
301 `slurm.conf`.
302 '';
303 };
304
305 etcSlurm = lib.mkOption {
306 type = lib.types.path;
307 internal = true;
308 default = etcSlurm;
309 defaultText = lib.literalMD ''
310 Directory created from generated config files and
311 `config.${opt.extraConfigPaths}`.
312 '';
313 description = ''
314 Path to directory with slurm config files. This option is set by default from the
315 Slurm module and is meant to make the Slurm config file available to other modules.
316 '';
317 };
318
319 };
320
321 };
322
323 imports = [
324 (lib.mkRemovedOptionModule [ "services" "slurm" "dbdserver" "storagePass" ] ''
325 This option has been removed so that the database password is not exposed via the nix store.
326 Use services.slurm.dbdserver.storagePassFile to provide the database password.
327 '')
328 (lib.mkRemovedOptionModule [ "services" "slurm" "dbdserver" "configFile" ] ''
329 This option has been removed. Use services.slurm.dbdserver.storagePassFile
330 and services.slurm.dbdserver.extraConfig instead.
331 '')
332 ];
333
334 ###### implementation
335
336 config =
337 let
338 wrappedSlurm = pkgs.stdenv.mkDerivation {
339 name = "wrappedSlurm";
340
341 builder = pkgs.writeText "builder.sh" ''
342 mkdir -p $out/bin
343 find ${lib.getBin cfg.package}/bin -type f -executable | while read EXE
344 do
345 exename="$(basename $EXE)"
346 wrappername="$out/bin/$exename"
347 cat > "$wrappername" <<EOT
348 #!/bin/sh
349 if [ -z "$SLURM_CONF" ]
350 then
351 SLURM_CONF="${cfg.etcSlurm}/slurm.conf" "$EXE" "\$@"
352 else
353 "$EXE" "\$0"
354 fi
355 EOT
356 chmod +x "$wrappername"
357 done
358
359 mkdir -p $out/share
360 ln -s ${lib.getBin cfg.package}/share/man $out/share/man
361 '';
362 };
363
364 in
365 lib.mkIf (cfg.enableStools || cfg.client.enable || cfg.server.enable || cfg.dbdserver.enable) {
366
367 environment.systemPackages = [ wrappedSlurm ];
368
369 services.munge.enable = lib.mkDefault true;
370
371 # use a static uid as default to ensure it is the same on all nodes
372 users.users.slurm = lib.mkIf (cfg.user == defaultUser) {
373 name = defaultUser;
374 group = "slurm";
375 uid = config.ids.uids.slurm;
376 };
377
378 users.groups.slurm.gid = config.ids.uids.slurm;
379
380 systemd.services.slurmd = lib.mkIf (cfg.client.enable) {
381 path =
382 with pkgs;
383 [
384 wrappedSlurm
385 coreutils
386 ]
387 ++ lib.optional cfg.enableSrunX11 slurm-spank-x11;
388
389 wantedBy = [ "multi-user.target" ];
390 after = [
391 "systemd-tmpfiles-clean.service"
392 "munge.service"
393 "network-online.target"
394 "remote-fs.target"
395 ];
396 wants = [ "network-online.target" ];
397
398 serviceConfig = {
399 Type = "forking";
400 KillMode = "process";
401 ExecStart = "${wrappedSlurm}/bin/slurmd";
402 PIDFile = "/run/slurmd.pid";
403 ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID";
404 LimitMEMLOCK = "infinity";
405 Delegate = "Yes";
406 };
407 };
408
409 systemd.tmpfiles.rules = lib.optionals cfg.client.enable [
410 "d /var/spool/slurmd 755 root root -"
411 "d ${cfg.mpi.PmixCliTmpDirBase} 755 root root -"
412 ];
413
414 services.openssh.settings.X11Forwarding = lib.mkIf cfg.client.enable (lib.mkDefault true);
415
416 systemd.services.slurmctld = lib.mkIf (cfg.server.enable) {
417 path =
418 with pkgs;
419 [
420 wrappedSlurm
421 munge
422 coreutils
423 ]
424 ++ lib.optional cfg.enableSrunX11 slurm-spank-x11;
425
426 wantedBy = [ "multi-user.target" ];
427 after = [
428 "network.target"
429 "munged.service"
430 ];
431 requires = [ "munged.service" ];
432
433 serviceConfig = {
434 Type = "forking";
435 ExecStart = "${wrappedSlurm}/bin/slurmctld";
436 PIDFile = "/run/slurmctld.pid";
437 ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID";
438 };
439
440 preStart = ''
441 mkdir -p ${cfg.stateSaveLocation}
442 chown -R ${cfg.user}:slurm ${cfg.stateSaveLocation}
443 '';
444 };
445
446 systemd.services.slurmdbd =
447 let
448 # slurm strips the last component off the path
449 configPath = "$RUNTIME_DIRECTORY/slurmdbd.conf";
450 in
451 lib.mkIf (cfg.dbdserver.enable) {
452 path = with pkgs; [
453 wrappedSlurm
454 munge
455 coreutils
456 ];
457
458 wantedBy = [ "multi-user.target" ];
459 after = [
460 "network.target"
461 "munged.service"
462 "mysql.service"
463 ];
464 requires = [
465 "munged.service"
466 "mysql.service"
467 ];
468
469 preStart = ''
470 install -m 600 -o ${cfg.user} -T ${slurmdbdConf} ${configPath}
471 ${lib.optionalString (cfg.dbdserver.storagePassFile != null) ''
472 echo "StoragePass=$(cat ${cfg.dbdserver.storagePassFile})" \
473 >> ${configPath}
474 ''}
475 '';
476
477 script = ''
478 export SLURM_CONF=${configPath}
479 exec ${cfg.package}/bin/slurmdbd -D
480 '';
481
482 serviceConfig = {
483 RuntimeDirectory = "slurmdbd";
484 Type = "simple";
485 PIDFile = "/run/slurmdbd.pid";
486 ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID";
487 };
488 };
489
490 };
491
492}