at 25.11-pre 15 kB view raw
1{ 2 config, 3 lib, 4 options, 5 pkgs, 6 ... 7}: 8let 9 10 cfg = config.services.slurm; 11 opt = options.services.slurm; 12 # configuration file can be generated by https://slurm.schedmd.com/configurator.html 13 14 defaultUser = "slurm"; 15 16 configFile = pkgs.writeTextDir "slurm.conf" '' 17 ClusterName=${cfg.clusterName} 18 StateSaveLocation=${cfg.stateSaveLocation} 19 SlurmUser=${cfg.user} 20 ${lib.optionalString (cfg.controlMachine != null) "controlMachine=${cfg.controlMachine}"} 21 ${lib.optionalString (cfg.controlAddr != null) "controlAddr=${cfg.controlAddr}"} 22 ${toString (map (x: "NodeName=${x}\n") cfg.nodeName)} 23 ${toString (map (x: "PartitionName=${x}\n") cfg.partitionName)} 24 PlugStackConfig=${plugStackConfig}/plugstack.conf 25 ProctrackType=${cfg.procTrackType} 26 ${cfg.extraConfig} 27 ''; 28 29 plugStackConfig = pkgs.writeTextDir "plugstack.conf" '' 30 ${lib.optionalString cfg.enableSrunX11 "optional ${pkgs.slurm-spank-x11}/lib/x11.so"} 31 ${cfg.extraPlugstackConfig} 32 ''; 33 34 cgroupConfig = pkgs.writeTextDir "cgroup.conf" '' 35 ${cfg.extraCgroupConfig} 36 ''; 37 38 mpiConf = pkgs.writeTextDir "mpi.conf" '' 39 PMIxCliTmpDirBase=${cfg.mpi.PmixCliTmpDirBase} 40 ${cfg.mpi.extraMpiConfig} 41 ''; 42 43 slurmdbdConf = pkgs.writeText "slurmdbd.conf" '' 44 DbdHost=${cfg.dbdserver.dbdHost} 45 SlurmUser=${cfg.user} 46 StorageType=accounting_storage/mysql 47 StorageUser=${cfg.dbdserver.storageUser} 48 ${cfg.dbdserver.extraConfig} 49 ''; 50 51 # slurm expects some additional config files to be 52 # in the same directory as slurm.conf 53 etcSlurm = pkgs.symlinkJoin { 54 name = "etc-slurm"; 55 paths = [ 56 configFile 57 cgroupConfig 58 plugStackConfig 59 mpiConf 60 ] ++ cfg.extraConfigPaths; 61 }; 62in 63 64{ 65 66 ###### interface 67 68 meta.maintainers = [ lib.maintainers.markuskowa ]; 69 70 options = { 71 72 services.slurm = { 73 74 server = { 75 enable = lib.mkOption { 76 type = lib.types.bool; 77 default = false; 78 description = '' 79 Whether to enable the slurm control daemon. 80 Note that the standard authentication method is "munge". 81 The "munge" service needs to be provided with a password file in order for 82 slurm to work properly (see `services.munge.password`). 83 ''; 84 }; 85 }; 86 87 dbdserver = { 88 enable = lib.mkEnableOption "SlurmDBD service"; 89 90 dbdHost = lib.mkOption { 91 type = lib.types.str; 92 default = config.networking.hostName; 93 defaultText = lib.literalExpression "config.networking.hostName"; 94 description = '' 95 Hostname of the machine where `slurmdbd` 96 is running (i.e. name returned by `hostname -s`). 97 ''; 98 }; 99 100 storageUser = lib.mkOption { 101 type = lib.types.str; 102 default = cfg.user; 103 defaultText = lib.literalExpression "config.${opt.user}"; 104 description = '' 105 Database user name. 106 ''; 107 }; 108 109 storagePassFile = lib.mkOption { 110 type = with lib.types; nullOr str; 111 default = null; 112 description = '' 113 Path to file with database password. The content of this will be used to 114 create the password for the `StoragePass` option. 115 ''; 116 }; 117 118 extraConfig = lib.mkOption { 119 type = lib.types.lines; 120 default = ""; 121 description = '' 122 Extra configuration for `slurmdbd.conf` See also: 123 {manpage}`slurmdbd.conf(8)`. 124 ''; 125 }; 126 }; 127 128 client = { 129 enable = lib.mkEnableOption "slurm client daemon"; 130 }; 131 132 enableStools = lib.mkOption { 133 type = lib.types.bool; 134 default = false; 135 description = '' 136 Whether to provide a slurm.conf file. 137 Enable this option if you do not run a slurm daemon on this host 138 (i.e. `server.enable` and `client.enable` are `false`) 139 but you still want to run slurm commands from this host. 140 ''; 141 }; 142 143 package = 144 lib.mkPackageOption pkgs "slurm" { 145 example = "slurm-full"; 146 } 147 // { 148 default = pkgs.slurm.override { enableX11 = !cfg.enableSrunX11; }; 149 }; 150 151 controlMachine = lib.mkOption { 152 type = lib.types.nullOr lib.types.str; 153 default = null; 154 example = null; 155 description = '' 156 The short hostname of the machine where SLURM control functions are 157 executed (i.e. the name returned by the command "hostname -s", use "tux001" 158 rather than "tux001.my.com"). 159 ''; 160 }; 161 162 controlAddr = lib.mkOption { 163 type = lib.types.nullOr lib.types.str; 164 default = cfg.controlMachine; 165 defaultText = lib.literalExpression "config.${opt.controlMachine}"; 166 example = null; 167 description = '' 168 Name that ControlMachine should be referred to in establishing a 169 communications path. 170 ''; 171 }; 172 173 clusterName = lib.mkOption { 174 type = lib.types.str; 175 default = "default"; 176 example = "myCluster"; 177 description = '' 178 Necessary to distinguish accounting records in a multi-cluster environment. 179 ''; 180 }; 181 182 nodeName = lib.mkOption { 183 type = lib.types.listOf lib.types.str; 184 default = [ ]; 185 example = lib.literalExpression ''[ "linux[1-32] CPUs=1 State=UNKNOWN" ];''; 186 description = '' 187 Name that SLURM uses to refer to a node (or base partition for BlueGene 188 systems). Typically this would be the string that "/bin/hostname -s" 189 returns. Note that now you have to write node's parameters after the name. 190 ''; 191 }; 192 193 partitionName = lib.mkOption { 194 type = lib.types.listOf lib.types.str; 195 default = [ ]; 196 example = lib.literalExpression ''[ "debug Nodes=linux[1-32] Default=YES MaxTime=INFINITE State=UP" ];''; 197 description = '' 198 Name by which the partition may be referenced. Note that now you have 199 to write the partition's parameters after the name. 200 ''; 201 }; 202 203 enableSrunX11 = lib.mkOption { 204 default = false; 205 type = lib.types.bool; 206 description = '' 207 If enabled srun will accept the option "--x11" to allow for X11 forwarding 208 from within an interactive session or a batch job. This activates the 209 slurm-spank-x11 module. Note that this option also enables 210 {option}`services.openssh.forwardX11` on the client. 211 212 This option requires slurm to be compiled without native X11 support. 213 The default behavior is to re-compile the slurm package with native X11 214 support disabled if this option is set to true. 215 216 To use the native X11 support add `PrologFlags=X11` in {option}`extraConfig`. 217 Note that this method will only work RSA SSH host keys. 218 ''; 219 }; 220 221 procTrackType = lib.mkOption { 222 type = lib.types.str; 223 default = "proctrack/linuxproc"; 224 description = '' 225 Plugin to be used for process tracking on a job step basis. 226 The slurmd daemon uses this mechanism to identify all processes 227 which are children of processes it spawns for a user job step. 228 ''; 229 }; 230 231 stateSaveLocation = lib.mkOption { 232 type = lib.types.str; 233 default = "/var/spool/slurmctld"; 234 description = '' 235 Directory into which the Slurm controller, slurmctld, saves its state. 236 ''; 237 }; 238 239 user = lib.mkOption { 240 type = lib.types.str; 241 default = defaultUser; 242 description = '' 243 Set this option when you want to run the slurmctld daemon 244 as something else than the default slurm user "slurm". 245 Note that the UID of this user needs to be the same 246 on all nodes. 247 ''; 248 }; 249 250 extraConfig = lib.mkOption { 251 default = ""; 252 type = lib.types.lines; 253 description = '' 254 Extra configuration options that will be added verbatim at 255 the end of the slurm configuration file. 256 ''; 257 }; 258 259 mpi = { 260 PmixCliTmpDirBase = lib.mkOption { 261 default = "/tmp/pmix"; 262 type = lib.types.str; 263 description = '' 264 Base path for PMIx temporary files. 265 ''; 266 }; 267 268 extraMpiConfig = lib.mkOption { 269 default = ""; 270 type = lib.types.lines; 271 description = '' 272 Extra configuration for that will be added to `mpi.conf`. 273 ''; 274 }; 275 }; 276 277 extraPlugstackConfig = lib.mkOption { 278 default = ""; 279 type = lib.types.lines; 280 description = '' 281 Extra configuration that will be added to the end of `plugstack.conf`. 282 ''; 283 }; 284 285 extraCgroupConfig = lib.mkOption { 286 default = ""; 287 type = lib.types.lines; 288 description = '' 289 Extra configuration for `cgroup.conf`. This file is 290 used when `procTrackType=proctrack/cgroup`. 291 ''; 292 }; 293 294 extraConfigPaths = lib.mkOption { 295 type = with lib.types; listOf path; 296 default = [ ]; 297 description = '' 298 Slurm expects config files for plugins in the same path 299 as `slurm.conf`. Add extra nix store 300 paths that should be merged into same directory as 301 `slurm.conf`. 302 ''; 303 }; 304 305 etcSlurm = lib.mkOption { 306 type = lib.types.path; 307 internal = true; 308 default = etcSlurm; 309 defaultText = lib.literalMD '' 310 Directory created from generated config files and 311 `config.${opt.extraConfigPaths}`. 312 ''; 313 description = '' 314 Path to directory with slurm config files. This option is set by default from the 315 Slurm module and is meant to make the Slurm config file available to other modules. 316 ''; 317 }; 318 319 }; 320 321 }; 322 323 imports = [ 324 (lib.mkRemovedOptionModule [ "services" "slurm" "dbdserver" "storagePass" ] '' 325 This option has been removed so that the database password is not exposed via the nix store. 326 Use services.slurm.dbdserver.storagePassFile to provide the database password. 327 '') 328 (lib.mkRemovedOptionModule [ "services" "slurm" "dbdserver" "configFile" ] '' 329 This option has been removed. Use services.slurm.dbdserver.storagePassFile 330 and services.slurm.dbdserver.extraConfig instead. 331 '') 332 ]; 333 334 ###### implementation 335 336 config = 337 let 338 wrappedSlurm = pkgs.stdenv.mkDerivation { 339 name = "wrappedSlurm"; 340 341 builder = pkgs.writeText "builder.sh" '' 342 mkdir -p $out/bin 343 find ${lib.getBin cfg.package}/bin -type f -executable | while read EXE 344 do 345 exename="$(basename $EXE)" 346 wrappername="$out/bin/$exename" 347 cat > "$wrappername" <<EOT 348 #!/bin/sh 349 if [ -z "$SLURM_CONF" ] 350 then 351 SLURM_CONF="${cfg.etcSlurm}/slurm.conf" "$EXE" "\$@" 352 else 353 "$EXE" "\$0" 354 fi 355 EOT 356 chmod +x "$wrappername" 357 done 358 359 mkdir -p $out/share 360 ln -s ${lib.getBin cfg.package}/share/man $out/share/man 361 ''; 362 }; 363 364 in 365 lib.mkIf (cfg.enableStools || cfg.client.enable || cfg.server.enable || cfg.dbdserver.enable) { 366 367 environment.systemPackages = [ wrappedSlurm ]; 368 369 services.munge.enable = lib.mkDefault true; 370 371 # use a static uid as default to ensure it is the same on all nodes 372 users.users.slurm = lib.mkIf (cfg.user == defaultUser) { 373 name = defaultUser; 374 group = "slurm"; 375 uid = config.ids.uids.slurm; 376 }; 377 378 users.groups.slurm.gid = config.ids.uids.slurm; 379 380 systemd.services.slurmd = lib.mkIf (cfg.client.enable) { 381 path = 382 with pkgs; 383 [ 384 wrappedSlurm 385 coreutils 386 ] 387 ++ lib.optional cfg.enableSrunX11 slurm-spank-x11; 388 389 wantedBy = [ "multi-user.target" ]; 390 after = [ 391 "systemd-tmpfiles-clean.service" 392 "munge.service" 393 "network-online.target" 394 "remote-fs.target" 395 ]; 396 wants = [ "network-online.target" ]; 397 398 serviceConfig = { 399 Type = "forking"; 400 KillMode = "process"; 401 ExecStart = "${wrappedSlurm}/bin/slurmd"; 402 PIDFile = "/run/slurmd.pid"; 403 ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID"; 404 LimitMEMLOCK = "infinity"; 405 Delegate = "Yes"; 406 }; 407 }; 408 409 systemd.tmpfiles.rules = lib.optionals cfg.client.enable [ 410 "d /var/spool/slurmd 755 root root -" 411 "d ${cfg.mpi.PmixCliTmpDirBase} 755 root root -" 412 ]; 413 414 services.openssh.settings.X11Forwarding = lib.mkIf cfg.client.enable (lib.mkDefault true); 415 416 systemd.services.slurmctld = lib.mkIf (cfg.server.enable) { 417 path = 418 with pkgs; 419 [ 420 wrappedSlurm 421 munge 422 coreutils 423 ] 424 ++ lib.optional cfg.enableSrunX11 slurm-spank-x11; 425 426 wantedBy = [ "multi-user.target" ]; 427 after = [ 428 "network.target" 429 "munged.service" 430 ]; 431 requires = [ "munged.service" ]; 432 433 serviceConfig = { 434 Type = "forking"; 435 ExecStart = "${wrappedSlurm}/bin/slurmctld"; 436 PIDFile = "/run/slurmctld.pid"; 437 ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID"; 438 }; 439 440 preStart = '' 441 mkdir -p ${cfg.stateSaveLocation} 442 chown -R ${cfg.user}:slurm ${cfg.stateSaveLocation} 443 ''; 444 }; 445 446 systemd.services.slurmdbd = 447 let 448 # slurm strips the last component off the path 449 configPath = "$RUNTIME_DIRECTORY/slurmdbd.conf"; 450 in 451 lib.mkIf (cfg.dbdserver.enable) { 452 path = with pkgs; [ 453 wrappedSlurm 454 munge 455 coreutils 456 ]; 457 458 wantedBy = [ "multi-user.target" ]; 459 after = [ 460 "network.target" 461 "munged.service" 462 "mysql.service" 463 ]; 464 requires = [ 465 "munged.service" 466 "mysql.service" 467 ]; 468 469 preStart = '' 470 install -m 600 -o ${cfg.user} -T ${slurmdbdConf} ${configPath} 471 ${lib.optionalString (cfg.dbdserver.storagePassFile != null) '' 472 echo "StoragePass=$(cat ${cfg.dbdserver.storagePassFile})" \ 473 >> ${configPath} 474 ''} 475 ''; 476 477 script = '' 478 export SLURM_CONF=${configPath} 479 exec ${cfg.package}/bin/slurmdbd -D 480 ''; 481 482 serviceConfig = { 483 RuntimeDirectory = "slurmdbd"; 484 Type = "simple"; 485 PIDFile = "/run/slurmdbd.pid"; 486 ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID"; 487 }; 488 }; 489 490 }; 491 492}