1{ config, lib, options, pkgs, ... }:
2
3with lib;
4
5let
6
7 cfg = config.services.slurm;
8 opt = options.services.slurm;
9 # configuration file can be generated by http://slurm.schedmd.com/configurator.html
10
11 defaultUser = "slurm";
12
13 configFile = pkgs.writeTextDir "slurm.conf"
14 ''
15 ClusterName=${cfg.clusterName}
16 StateSaveLocation=${cfg.stateSaveLocation}
17 SlurmUser=${cfg.user}
18 ${optionalString (cfg.controlMachine != null) "controlMachine=${cfg.controlMachine}"}
19 ${optionalString (cfg.controlAddr != null) "controlAddr=${cfg.controlAddr}"}
20 ${toString (map (x: "NodeName=${x}\n") cfg.nodeName)}
21 ${toString (map (x: "PartitionName=${x}\n") cfg.partitionName)}
22 PlugStackConfig=${plugStackConfig}/plugstack.conf
23 ProctrackType=${cfg.procTrackType}
24 ${cfg.extraConfig}
25 '';
26
27 plugStackConfig = pkgs.writeTextDir "plugstack.conf"
28 ''
29 ${optionalString cfg.enableSrunX11 "optional ${pkgs.slurm-spank-x11}/lib/x11.so"}
30 ${cfg.extraPlugstackConfig}
31 '';
32
33 cgroupConfig = pkgs.writeTextDir "cgroup.conf"
34 ''
35 ${cfg.extraCgroupConfig}
36 '';
37
38 slurmdbdConf = pkgs.writeText "slurmdbd.conf"
39 ''
40 DbdHost=${cfg.dbdserver.dbdHost}
41 SlurmUser=${cfg.user}
42 StorageType=accounting_storage/mysql
43 StorageUser=${cfg.dbdserver.storageUser}
44 ${cfg.dbdserver.extraConfig}
45 '';
46
47 # slurm expects some additional config files to be
48 # in the same directory as slurm.conf
49 etcSlurm = pkgs.symlinkJoin {
50 name = "etc-slurm";
51 paths = [ configFile cgroupConfig plugStackConfig ] ++ cfg.extraConfigPaths;
52 };
53in
54
55{
56
57 ###### interface
58
59 meta.maintainers = [ maintainers.markuskowa ];
60
61 options = {
62
63 services.slurm = {
64
65 server = {
66 enable = mkOption {
67 type = types.bool;
68 default = false;
69 description = lib.mdDoc ''
70 Whether to enable the slurm control daemon.
71 Note that the standard authentication method is "munge".
72 The "munge" service needs to be provided with a password file in order for
73 slurm to work properly (see `services.munge.password`).
74 '';
75 };
76 };
77
78 dbdserver = {
79 enable = mkEnableOption (lib.mdDoc "SlurmDBD service");
80
81 dbdHost = mkOption {
82 type = types.str;
83 default = config.networking.hostName;
84 defaultText = literalExpression "config.networking.hostName";
85 description = lib.mdDoc ''
86 Hostname of the machine where `slurmdbd`
87 is running (i.e. name returned by `hostname -s`).
88 '';
89 };
90
91 storageUser = mkOption {
92 type = types.str;
93 default = cfg.user;
94 defaultText = literalExpression "config.${opt.user}";
95 description = lib.mdDoc ''
96 Database user name.
97 '';
98 };
99
100 storagePassFile = mkOption {
101 type = with types; nullOr str;
102 default = null;
103 description = lib.mdDoc ''
104 Path to file with database password. The content of this will be used to
105 create the password for the `StoragePass` option.
106 '';
107 };
108
109 extraConfig = mkOption {
110 type = types.lines;
111 default = "";
112 description = lib.mdDoc ''
113 Extra configuration for `slurmdbd.conf` See also:
114 {manpage}`slurmdbd.conf(8)`.
115 '';
116 };
117 };
118
119 client = {
120 enable = mkEnableOption (lib.mdDoc "slurm client daemon");
121 };
122
123 enableStools = mkOption {
124 type = types.bool;
125 default = false;
126 description = lib.mdDoc ''
127 Whether to provide a slurm.conf file.
128 Enable this option if you do not run a slurm daemon on this host
129 (i.e. `server.enable` and `client.enable` are `false`)
130 but you still want to run slurm commands from this host.
131 '';
132 };
133
134 package = mkOption {
135 type = types.package;
136 default = pkgs.slurm.override { enableX11 = ! cfg.enableSrunX11; };
137 defaultText = literalExpression "pkgs.slurm";
138 example = literalExpression "pkgs.slurm-full";
139 description = lib.mdDoc ''
140 The package to use for slurm binaries.
141 '';
142 };
143
144 controlMachine = mkOption {
145 type = types.nullOr types.str;
146 default = null;
147 example = null;
148 description = lib.mdDoc ''
149 The short hostname of the machine where SLURM control functions are
150 executed (i.e. the name returned by the command "hostname -s", use "tux001"
151 rather than "tux001.my.com").
152 '';
153 };
154
155 controlAddr = mkOption {
156 type = types.nullOr types.str;
157 default = cfg.controlMachine;
158 defaultText = literalExpression "config.${opt.controlMachine}";
159 example = null;
160 description = lib.mdDoc ''
161 Name that ControlMachine should be referred to in establishing a
162 communications path.
163 '';
164 };
165
166 clusterName = mkOption {
167 type = types.str;
168 default = "default";
169 example = "myCluster";
170 description = lib.mdDoc ''
171 Necessary to distinguish accounting records in a multi-cluster environment.
172 '';
173 };
174
175 nodeName = mkOption {
176 type = types.listOf types.str;
177 default = [];
178 example = literalExpression ''[ "linux[1-32] CPUs=1 State=UNKNOWN" ];'';
179 description = lib.mdDoc ''
180 Name that SLURM uses to refer to a node (or base partition for BlueGene
181 systems). Typically this would be the string that "/bin/hostname -s"
182 returns. Note that now you have to write node's parameters after the name.
183 '';
184 };
185
186 partitionName = mkOption {
187 type = types.listOf types.str;
188 default = [];
189 example = literalExpression ''[ "debug Nodes=linux[1-32] Default=YES MaxTime=INFINITE State=UP" ];'';
190 description = lib.mdDoc ''
191 Name by which the partition may be referenced. Note that now you have
192 to write the partition's parameters after the name.
193 '';
194 };
195
196 enableSrunX11 = mkOption {
197 default = false;
198 type = types.bool;
199 description = lib.mdDoc ''
200 If enabled srun will accept the option "--x11" to allow for X11 forwarding
201 from within an interactive session or a batch job. This activates the
202 slurm-spank-x11 module. Note that this option also enables
203 {option}`services.openssh.forwardX11` on the client.
204
205 This option requires slurm to be compiled without native X11 support.
206 The default behavior is to re-compile the slurm package with native X11
207 support disabled if this option is set to true.
208
209 To use the native X11 support add `PrologFlags=X11` in {option}`extraConfig`.
210 Note that this method will only work RSA SSH host keys.
211 '';
212 };
213
214 procTrackType = mkOption {
215 type = types.str;
216 default = "proctrack/linuxproc";
217 description = lib.mdDoc ''
218 Plugin to be used for process tracking on a job step basis.
219 The slurmd daemon uses this mechanism to identify all processes
220 which are children of processes it spawns for a user job step.
221 '';
222 };
223
224 stateSaveLocation = mkOption {
225 type = types.str;
226 default = "/var/spool/slurmctld";
227 description = lib.mdDoc ''
228 Directory into which the Slurm controller, slurmctld, saves its state.
229 '';
230 };
231
232 user = mkOption {
233 type = types.str;
234 default = defaultUser;
235 description = lib.mdDoc ''
236 Set this option when you want to run the slurmctld daemon
237 as something else than the default slurm user "slurm".
238 Note that the UID of this user needs to be the same
239 on all nodes.
240 '';
241 };
242
243 extraConfig = mkOption {
244 default = "";
245 type = types.lines;
246 description = lib.mdDoc ''
247 Extra configuration options that will be added verbatim at
248 the end of the slurm configuration file.
249 '';
250 };
251
252 extraPlugstackConfig = mkOption {
253 default = "";
254 type = types.lines;
255 description = lib.mdDoc ''
256 Extra configuration that will be added to the end of `plugstack.conf`.
257 '';
258 };
259
260 extraCgroupConfig = mkOption {
261 default = "";
262 type = types.lines;
263 description = lib.mdDoc ''
264 Extra configuration for `cgroup.conf`. This file is
265 used when `procTrackType=proctrack/cgroup`.
266 '';
267 };
268
269 extraConfigPaths = mkOption {
270 type = with types; listOf path;
271 default = [];
272 description = lib.mdDoc ''
273 Slurm expects config files for plugins in the same path
274 as `slurm.conf`. Add extra nix store
275 paths that should be merged into same directory as
276 `slurm.conf`.
277 '';
278 };
279
280 etcSlurm = mkOption {
281 type = types.path;
282 internal = true;
283 default = etcSlurm;
284 defaultText = literalMD ''
285 Directory created from generated config files and
286 `config.${opt.extraConfigPaths}`.
287 '';
288 description = lib.mdDoc ''
289 Path to directory with slurm config files. This option is set by default from the
290 Slurm module and is meant to make the Slurm config file available to other modules.
291 '';
292 };
293
294 };
295
296 };
297
298 imports = [
299 (mkRemovedOptionModule [ "services" "slurm" "dbdserver" "storagePass" ] ''
300 This option has been removed so that the database password is not exposed via the nix store.
301 Use services.slurm.dbdserver.storagePassFile to provide the database password.
302 '')
303 (mkRemovedOptionModule [ "services" "slurm" "dbdserver" "configFile" ] ''
304 This option has been removed. Use services.slurm.dbdserver.storagePassFile
305 and services.slurm.dbdserver.extraConfig instead.
306 '')
307 ];
308
309 ###### implementation
310
311 config =
312 let
313 wrappedSlurm = pkgs.stdenv.mkDerivation {
314 name = "wrappedSlurm";
315
316 builder = pkgs.writeText "builder.sh" ''
317 source $stdenv/setup
318 mkdir -p $out/bin
319 find ${getBin cfg.package}/bin -type f -executable | while read EXE
320 do
321 exename="$(basename $EXE)"
322 wrappername="$out/bin/$exename"
323 cat > "$wrappername" <<EOT
324 #!/bin/sh
325 if [ -z "$SLURM_CONF" ]
326 then
327 SLURM_CONF="${cfg.etcSlurm}/slurm.conf" "$EXE" "\$@"
328 else
329 "$EXE" "\$0"
330 fi
331 EOT
332 chmod +x "$wrappername"
333 done
334
335 mkdir -p $out/share
336 ln -s ${getBin cfg.package}/share/man $out/share/man
337 '';
338 };
339
340 in mkIf ( cfg.enableStools ||
341 cfg.client.enable ||
342 cfg.server.enable ||
343 cfg.dbdserver.enable ) {
344
345 environment.systemPackages = [ wrappedSlurm ];
346
347 services.munge.enable = mkDefault true;
348
349 # use a static uid as default to ensure it is the same on all nodes
350 users.users.slurm = mkIf (cfg.user == defaultUser) {
351 name = defaultUser;
352 group = "slurm";
353 uid = config.ids.uids.slurm;
354 };
355
356 users.groups.slurm.gid = config.ids.uids.slurm;
357
358 systemd.services.slurmd = mkIf (cfg.client.enable) {
359 path = with pkgs; [ wrappedSlurm coreutils ]
360 ++ lib.optional cfg.enableSrunX11 slurm-spank-x11;
361
362 wantedBy = [ "multi-user.target" ];
363 after = [
364 "systemd-tmpfiles-clean.service"
365 "munge.service"
366 "network-online.target"
367 "remote-fs.target"
368 ];
369 wants = [ "network-online.target" ];
370
371 serviceConfig = {
372 Type = "forking";
373 KillMode = "process";
374 ExecStart = "${wrappedSlurm}/bin/slurmd";
375 PIDFile = "/run/slurmd.pid";
376 ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID";
377 LimitMEMLOCK = "infinity";
378 Delegate="Yes";
379 };
380 };
381
382 systemd.tmpfiles.rules = mkIf cfg.client.enable [
383 "d /var/spool/slurmd 755 root root -"
384 ];
385
386 services.openssh.settings.X11Forwarding = mkIf cfg.client.enable (mkDefault true);
387
388 systemd.services.slurmctld = mkIf (cfg.server.enable) {
389 path = with pkgs; [ wrappedSlurm munge coreutils ]
390 ++ lib.optional cfg.enableSrunX11 slurm-spank-x11;
391
392 wantedBy = [ "multi-user.target" ];
393 after = [ "network.target" "munged.service" ];
394 requires = [ "munged.service" ];
395
396 serviceConfig = {
397 Type = "forking";
398 ExecStart = "${wrappedSlurm}/bin/slurmctld";
399 PIDFile = "/run/slurmctld.pid";
400 ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID";
401 };
402
403 preStart = ''
404 mkdir -p ${cfg.stateSaveLocation}
405 chown -R ${cfg.user}:slurm ${cfg.stateSaveLocation}
406 '';
407 };
408
409 systemd.services.slurmdbd = let
410 # slurm strips the last component off the path
411 configPath = "$RUNTIME_DIRECTORY/slurmdbd.conf";
412 in mkIf (cfg.dbdserver.enable) {
413 path = with pkgs; [ wrappedSlurm munge coreutils ];
414
415 wantedBy = [ "multi-user.target" ];
416 after = [ "network.target" "munged.service" "mysql.service" ];
417 requires = [ "munged.service" "mysql.service" ];
418
419 preStart = ''
420 install -m 600 -o ${cfg.user} -T ${slurmdbdConf} ${configPath}
421 ${optionalString (cfg.dbdserver.storagePassFile != null) ''
422 echo "StoragePass=$(cat ${cfg.dbdserver.storagePassFile})" \
423 >> ${configPath}
424 ''}
425 '';
426
427 script = ''
428 export SLURM_CONF=${configPath}
429 exec ${cfg.package}/bin/slurmdbd -D
430 '';
431
432 serviceConfig = {
433 RuntimeDirectory = "slurmdbd";
434 Type = "simple";
435 PIDFile = "/run/slurmdbd.pid";
436 ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID";
437 };
438 };
439
440 };
441
442}