1{ config, lib, pkgs, ... }:
2
3with lib;
4
5let
6
7 cfg = config.services.slurm;
8 # configuration file can be generated by http://slurm.schedmd.com/configurator.html
9
10 defaultUser = "slurm";
11
12 configFile = pkgs.writeTextDir "slurm.conf"
13 ''
14 ClusterName=${cfg.clusterName}
15 StateSaveLocation=${cfg.stateSaveLocation}
16 SlurmUser=${cfg.user}
17 ${optionalString (cfg.controlMachine != null) "controlMachine=${cfg.controlMachine}"}
18 ${optionalString (cfg.controlAddr != null) "controlAddr=${cfg.controlAddr}"}
19 ${toString (map (x: "NodeName=${x}\n") cfg.nodeName)}
20 ${toString (map (x: "PartitionName=${x}\n") cfg.partitionName)}
21 PlugStackConfig=${plugStackConfig}/plugstack.conf
22 ProctrackType=${cfg.procTrackType}
23 ${cfg.extraConfig}
24 '';
25
26 plugStackConfig = pkgs.writeTextDir "plugstack.conf"
27 ''
28 ${optionalString cfg.enableSrunX11 "optional ${pkgs.slurm-spank-x11}/lib/x11.so"}
29 ${cfg.extraPlugstackConfig}
30 '';
31
32 cgroupConfig = pkgs.writeTextDir "cgroup.conf"
33 ''
34 ${cfg.extraCgroupConfig}
35 '';
36
37 slurmdbdConf = pkgs.writeText "slurmdbd.conf"
38 ''
39 DbdHost=${cfg.dbdserver.dbdHost}
40 SlurmUser=${cfg.user}
41 StorageType=accounting_storage/mysql
42 StorageUser=${cfg.dbdserver.storageUser}
43 ${cfg.dbdserver.extraConfig}
44 '';
45
46 # slurm expects some additional config files to be
47 # in the same directory as slurm.conf
48 etcSlurm = pkgs.symlinkJoin {
49 name = "etc-slurm";
50 paths = [ configFile cgroupConfig plugStackConfig ] ++ cfg.extraConfigPaths;
51 };
52in
53
54{
55
56 ###### interface
57
58 meta.maintainers = [ maintainers.markuskowa ];
59
60 options = {
61
62 services.slurm = {
63
64 server = {
65 enable = mkOption {
66 type = types.bool;
67 default = false;
68 description = ''
69 Whether to enable the slurm control daemon.
70 Note that the standard authentication method is "munge".
71 The "munge" service needs to be provided with a password file in order for
72 slurm to work properly (see <literal>services.munge.password</literal>).
73 '';
74 };
75 };
76
77 dbdserver = {
78 enable = mkEnableOption "SlurmDBD service";
79
80 dbdHost = mkOption {
81 type = types.str;
82 default = config.networking.hostName;
83 description = ''
84 Hostname of the machine where <literal>slurmdbd</literal>
85 is running (i.e. name returned by <literal>hostname -s</literal>).
86 '';
87 };
88
89 storageUser = mkOption {
90 type = types.str;
91 default = cfg.user;
92 description = ''
93 Database user name.
94 '';
95 };
96
97 storagePassFile = mkOption {
98 type = with types; nullOr str;
99 default = null;
100 description = ''
101 Path to file with database password. The content of this will be used to
102 create the password for the <literal>StoragePass</literal> option.
103 '';
104 };
105
106 extraConfig = mkOption {
107 type = types.lines;
108 default = "";
109 description = ''
110 Extra configuration for <literal>slurmdbd.conf</literal> See also:
111 <citerefentry><refentrytitle>slurmdbd.conf</refentrytitle>
112 <manvolnum>8</manvolnum></citerefentry>.
113 '';
114 };
115 };
116
117 client = {
118 enable = mkEnableOption "slurm client daemon";
119 };
120
121 enableStools = mkOption {
122 type = types.bool;
123 default = false;
124 description = ''
125 Whether to provide a slurm.conf file.
126 Enable this option if you do not run a slurm daemon on this host
127 (i.e. <literal>server.enable</literal> and <literal>client.enable</literal> are <literal>false</literal>)
128 but you still want to run slurm commands from this host.
129 '';
130 };
131
132 package = mkOption {
133 type = types.package;
134 default = pkgs.slurm.override { enableX11 = ! cfg.enableSrunX11; };
135 defaultText = literalExpression "pkgs.slurm";
136 example = literalExpression "pkgs.slurm-full";
137 description = ''
138 The package to use for slurm binaries.
139 '';
140 };
141
142 controlMachine = mkOption {
143 type = types.nullOr types.str;
144 default = null;
145 example = null;
146 description = ''
147 The short hostname of the machine where SLURM control functions are
148 executed (i.e. the name returned by the command "hostname -s", use "tux001"
149 rather than "tux001.my.com").
150 '';
151 };
152
153 controlAddr = mkOption {
154 type = types.nullOr types.str;
155 default = cfg.controlMachine;
156 example = null;
157 description = ''
158 Name that ControlMachine should be referred to in establishing a
159 communications path.
160 '';
161 };
162
163 clusterName = mkOption {
164 type = types.str;
165 default = "default";
166 example = "myCluster";
167 description = ''
168 Necessary to distinguish accounting records in a multi-cluster environment.
169 '';
170 };
171
172 nodeName = mkOption {
173 type = types.listOf types.str;
174 default = [];
175 example = literalExpression ''[ "linux[1-32] CPUs=1 State=UNKNOWN" ];'';
176 description = ''
177 Name that SLURM uses to refer to a node (or base partition for BlueGene
178 systems). Typically this would be the string that "/bin/hostname -s"
179 returns. Note that now you have to write node's parameters after the name.
180 '';
181 };
182
183 partitionName = mkOption {
184 type = types.listOf types.str;
185 default = [];
186 example = literalExpression ''[ "debug Nodes=linux[1-32] Default=YES MaxTime=INFINITE State=UP" ];'';
187 description = ''
188 Name by which the partition may be referenced. Note that now you have
189 to write the partition's parameters after the name.
190 '';
191 };
192
193 enableSrunX11 = mkOption {
194 default = false;
195 type = types.bool;
196 description = ''
197 If enabled srun will accept the option "--x11" to allow for X11 forwarding
198 from within an interactive session or a batch job. This activates the
199 slurm-spank-x11 module. Note that this option also enables
200 <option>services.openssh.forwardX11</option> on the client.
201
202 This option requires slurm to be compiled without native X11 support.
203 The default behavior is to re-compile the slurm package with native X11
204 support disabled if this option is set to true.
205
206 To use the native X11 support add <literal>PrologFlags=X11</literal> in <option>extraConfig</option>.
207 Note that this method will only work RSA SSH host keys.
208 '';
209 };
210
211 procTrackType = mkOption {
212 type = types.str;
213 default = "proctrack/linuxproc";
214 description = ''
215 Plugin to be used for process tracking on a job step basis.
216 The slurmd daemon uses this mechanism to identify all processes
217 which are children of processes it spawns for a user job step.
218 '';
219 };
220
221 stateSaveLocation = mkOption {
222 type = types.str;
223 default = "/var/spool/slurmctld";
224 description = ''
225 Directory into which the Slurm controller, slurmctld, saves its state.
226 '';
227 };
228
229 user = mkOption {
230 type = types.str;
231 default = defaultUser;
232 description = ''
233 Set this option when you want to run the slurmctld daemon
234 as something else than the default slurm user "slurm".
235 Note that the UID of this user needs to be the same
236 on all nodes.
237 '';
238 };
239
240 extraConfig = mkOption {
241 default = "";
242 type = types.lines;
243 description = ''
244 Extra configuration options that will be added verbatim at
245 the end of the slurm configuration file.
246 '';
247 };
248
249 extraPlugstackConfig = mkOption {
250 default = "";
251 type = types.lines;
252 description = ''
253 Extra configuration that will be added to the end of <literal>plugstack.conf</literal>.
254 '';
255 };
256
257 extraCgroupConfig = mkOption {
258 default = "";
259 type = types.lines;
260 description = ''
261 Extra configuration for <literal>cgroup.conf</literal>. This file is
262 used when <literal>procTrackType=proctrack/cgroup</literal>.
263 '';
264 };
265
266 extraConfigPaths = mkOption {
267 type = with types; listOf path;
268 default = [];
269 description = ''
270 Slurm expects config files for plugins in the same path
271 as <literal>slurm.conf</literal>. Add extra nix store
272 paths that should be merged into same directory as
273 <literal>slurm.conf</literal>.
274 '';
275 };
276
277 etcSlurm = mkOption {
278 type = types.path;
279 internal = true;
280 default = etcSlurm;
281 description = ''
282 Path to directory with slurm config files. This option is set by default from the
283 Slurm module and is meant to make the Slurm config file available to other modules.
284 '';
285 };
286
287 };
288
289 };
290
291 imports = [
292 (mkRemovedOptionModule [ "services" "slurm" "dbdserver" "storagePass" ] ''
293 This option has been removed so that the database password is not exposed via the nix store.
294 Use services.slurm.dbdserver.storagePassFile to provide the database password.
295 '')
296 (mkRemovedOptionModule [ "services" "slurm" "dbdserver" "configFile" ] ''
297 This option has been removed. Use services.slurm.dbdserver.storagePassFile
298 and services.slurm.dbdserver.extraConfig instead.
299 '')
300 ];
301
302 ###### implementation
303
304 config =
305 let
306 wrappedSlurm = pkgs.stdenv.mkDerivation {
307 name = "wrappedSlurm";
308
309 builder = pkgs.writeText "builder.sh" ''
310 source $stdenv/setup
311 mkdir -p $out/bin
312 find ${getBin cfg.package}/bin -type f -executable | while read EXE
313 do
314 exename="$(basename $EXE)"
315 wrappername="$out/bin/$exename"
316 cat > "$wrappername" <<EOT
317 #!/bin/sh
318 if [ -z "$SLURM_CONF" ]
319 then
320 SLURM_CONF="${cfg.etcSlurm}/slurm.conf" "$EXE" "\$@"
321 else
322 "$EXE" "\$0"
323 fi
324 EOT
325 chmod +x "$wrappername"
326 done
327
328 mkdir -p $out/share
329 ln -s ${getBin cfg.package}/share/man $out/share/man
330 '';
331 };
332
333 in mkIf ( cfg.enableStools ||
334 cfg.client.enable ||
335 cfg.server.enable ||
336 cfg.dbdserver.enable ) {
337
338 environment.systemPackages = [ wrappedSlurm ];
339
340 services.munge.enable = mkDefault true;
341
342 # use a static uid as default to ensure it is the same on all nodes
343 users.users.slurm = mkIf (cfg.user == defaultUser) {
344 name = defaultUser;
345 group = "slurm";
346 uid = config.ids.uids.slurm;
347 };
348
349 users.groups.slurm.gid = config.ids.uids.slurm;
350
351 systemd.services.slurmd = mkIf (cfg.client.enable) {
352 path = with pkgs; [ wrappedSlurm coreutils ]
353 ++ lib.optional cfg.enableSrunX11 slurm-spank-x11;
354
355 wantedBy = [ "multi-user.target" ];
356 after = [ "systemd-tmpfiles-clean.service" ];
357
358 serviceConfig = {
359 Type = "forking";
360 KillMode = "process";
361 ExecStart = "${wrappedSlurm}/bin/slurmd";
362 PIDFile = "/run/slurmd.pid";
363 ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID";
364 LimitMEMLOCK = "infinity";
365 };
366
367 preStart = ''
368 mkdir -p /var/spool
369 '';
370 };
371
372 services.openssh.forwardX11 = mkIf cfg.client.enable (mkDefault true);
373
374 systemd.services.slurmctld = mkIf (cfg.server.enable) {
375 path = with pkgs; [ wrappedSlurm munge coreutils ]
376 ++ lib.optional cfg.enableSrunX11 slurm-spank-x11;
377
378 wantedBy = [ "multi-user.target" ];
379 after = [ "network.target" "munged.service" ];
380 requires = [ "munged.service" ];
381
382 serviceConfig = {
383 Type = "forking";
384 ExecStart = "${wrappedSlurm}/bin/slurmctld";
385 PIDFile = "/run/slurmctld.pid";
386 ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID";
387 };
388
389 preStart = ''
390 mkdir -p ${cfg.stateSaveLocation}
391 chown -R ${cfg.user}:slurm ${cfg.stateSaveLocation}
392 '';
393 };
394
395 systemd.services.slurmdbd = let
396 # slurm strips the last component off the path
397 configPath = "$RUNTIME_DIRECTORY/slurmdbd.conf";
398 in mkIf (cfg.dbdserver.enable) {
399 path = with pkgs; [ wrappedSlurm munge coreutils ];
400
401 wantedBy = [ "multi-user.target" ];
402 after = [ "network.target" "munged.service" "mysql.service" ];
403 requires = [ "munged.service" "mysql.service" ];
404
405 preStart = ''
406 install -m 600 -o ${cfg.user} -T ${slurmdbdConf} ${configPath}
407 ${optionalString (cfg.dbdserver.storagePassFile != null) ''
408 echo "StoragePass=$(cat ${cfg.dbdserver.storagePassFile})" \
409 >> ${configPath}
410 ''}
411 '';
412
413 script = ''
414 export SLURM_CONF=${configPath}
415 exec ${cfg.package}/bin/slurmdbd -D
416 '';
417
418 serviceConfig = {
419 RuntimeDirectory = "slurmdbd";
420 Type = "simple";
421 PIDFile = "/run/slurmdbd.pid";
422 ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID";
423 };
424 };
425
426 };
427
428}