at master 15 kB view raw
1{ 2 config, 3 lib, 4 options, 5 pkgs, 6 ... 7}: 8let 9 10 cfg = config.services.slurm; 11 opt = options.services.slurm; 12 # configuration file can be generated by https://slurm.schedmd.com/configurator.html 13 14 defaultUser = "slurm"; 15 16 configFile = pkgs.writeTextDir "slurm.conf" '' 17 ClusterName=${cfg.clusterName} 18 StateSaveLocation=${cfg.stateSaveLocation} 19 SlurmUser=${cfg.user} 20 ${lib.optionalString (cfg.controlMachine != null) "controlMachine=${cfg.controlMachine}"} 21 ${lib.optionalString (cfg.controlAddr != null) "controlAddr=${cfg.controlAddr}"} 22 ${toString (map (x: "NodeName=${x}\n") cfg.nodeName)} 23 ${toString (map (x: "PartitionName=${x}\n") cfg.partitionName)} 24 PlugStackConfig=${plugStackConfig}/plugstack.conf 25 ProctrackType=${cfg.procTrackType} 26 ${cfg.extraConfig} 27 ''; 28 29 plugStackConfig = pkgs.writeTextDir "plugstack.conf" '' 30 ${lib.optionalString cfg.enableSrunX11 "optional ${pkgs.slurm-spank-x11}/lib/x11.so"} 31 ${cfg.extraPlugstackConfig} 32 ''; 33 34 cgroupConfig = pkgs.writeTextDir "cgroup.conf" '' 35 ${cfg.extraCgroupConfig} 36 ''; 37 38 mpiConf = pkgs.writeTextDir "mpi.conf" '' 39 PMIxCliTmpDirBase=${cfg.mpi.PmixCliTmpDirBase} 40 ${cfg.mpi.extraMpiConfig} 41 ''; 42 43 slurmdbdConf = pkgs.writeText "slurmdbd.conf" '' 44 DbdHost=${cfg.dbdserver.dbdHost} 45 SlurmUser=${cfg.user} 46 StorageType=accounting_storage/mysql 47 StorageUser=${cfg.dbdserver.storageUser} 48 ${cfg.dbdserver.extraConfig} 49 ''; 50 51 # slurm expects some additional config files to be 52 # in the same directory as slurm.conf 53 etcSlurm = pkgs.symlinkJoin { 54 name = "etc-slurm"; 55 paths = [ 56 configFile 57 cgroupConfig 58 plugStackConfig 59 mpiConf 60 ] 61 ++ cfg.extraConfigPaths; 62 }; 63in 64 65{ 66 67 ###### interface 68 69 meta.maintainers = [ lib.maintainers.markuskowa ]; 70 71 options = { 72 73 services.slurm = { 74 75 server = { 76 enable = lib.mkOption { 77 type = lib.types.bool; 78 default = false; 79 description = '' 80 Whether to enable the slurm control daemon. 81 Note that the standard authentication method is "munge". 82 The "munge" service needs to be provided with a password file in order for 83 slurm to work properly (see `services.munge.password`). 84 ''; 85 }; 86 }; 87 88 dbdserver = { 89 enable = lib.mkEnableOption "SlurmDBD service"; 90 91 dbdHost = lib.mkOption { 92 type = lib.types.str; 93 default = config.networking.hostName; 94 defaultText = lib.literalExpression "config.networking.hostName"; 95 description = '' 96 Hostname of the machine where `slurmdbd` 97 is running (i.e. name returned by `hostname -s`). 98 ''; 99 }; 100 101 storageUser = lib.mkOption { 102 type = lib.types.str; 103 default = cfg.user; 104 defaultText = lib.literalExpression "config.${opt.user}"; 105 description = '' 106 Database user name. 107 ''; 108 }; 109 110 storagePassFile = lib.mkOption { 111 type = with lib.types; nullOr str; 112 default = null; 113 description = '' 114 Path to file with database password. The content of this will be used to 115 create the password for the `StoragePass` option. 116 ''; 117 }; 118 119 extraConfig = lib.mkOption { 120 type = lib.types.lines; 121 default = ""; 122 description = '' 123 Extra configuration for `slurmdbd.conf` See also: 124 {manpage}`slurmdbd.conf(8)`. 125 ''; 126 }; 127 }; 128 129 client = { 130 enable = lib.mkEnableOption "slurm client daemon"; 131 }; 132 133 enableStools = lib.mkOption { 134 type = lib.types.bool; 135 default = false; 136 description = '' 137 Whether to provide a slurm.conf file. 138 Enable this option if you do not run a slurm daemon on this host 139 (i.e. `server.enable` and `client.enable` are `false`) 140 but you still want to run slurm commands from this host. 141 ''; 142 }; 143 144 package = 145 lib.mkPackageOption pkgs "slurm" { 146 example = "slurm-full"; 147 } 148 // { 149 default = pkgs.slurm.override { enableX11 = !cfg.enableSrunX11; }; 150 }; 151 152 controlMachine = lib.mkOption { 153 type = lib.types.nullOr lib.types.str; 154 default = null; 155 example = null; 156 description = '' 157 The short hostname of the machine where SLURM control functions are 158 executed (i.e. the name returned by the command "hostname -s", use "tux001" 159 rather than "tux001.my.com"). 160 ''; 161 }; 162 163 controlAddr = lib.mkOption { 164 type = lib.types.nullOr lib.types.str; 165 default = cfg.controlMachine; 166 defaultText = lib.literalExpression "config.${opt.controlMachine}"; 167 example = null; 168 description = '' 169 Name that ControlMachine should be referred to in establishing a 170 communications path. 171 ''; 172 }; 173 174 clusterName = lib.mkOption { 175 type = lib.types.str; 176 default = "default"; 177 example = "myCluster"; 178 description = '' 179 Necessary to distinguish accounting records in a multi-cluster environment. 180 ''; 181 }; 182 183 nodeName = lib.mkOption { 184 type = lib.types.listOf lib.types.str; 185 default = [ ]; 186 example = lib.literalExpression ''[ "linux[1-32] CPUs=1 State=UNKNOWN" ];''; 187 description = '' 188 Name that SLURM uses to refer to a node (or base partition for BlueGene 189 systems). Typically this would be the string that "/bin/hostname -s" 190 returns. Note that now you have to write node's parameters after the name. 191 ''; 192 }; 193 194 partitionName = lib.mkOption { 195 type = lib.types.listOf lib.types.str; 196 default = [ ]; 197 example = lib.literalExpression ''[ "debug Nodes=linux[1-32] Default=YES MaxTime=INFINITE State=UP" ];''; 198 description = '' 199 Name by which the partition may be referenced. Note that now you have 200 to write the partition's parameters after the name. 201 ''; 202 }; 203 204 enableSrunX11 = lib.mkOption { 205 default = false; 206 type = lib.types.bool; 207 description = '' 208 If enabled srun will accept the option "--x11" to allow for X11 forwarding 209 from within an interactive session or a batch job. This activates the 210 slurm-spank-x11 module. Note that this option also enables 211 {option}`services.openssh.forwardX11` on the client. 212 213 This option requires slurm to be compiled without native X11 support. 214 The default behavior is to re-compile the slurm package with native X11 215 support disabled if this option is set to true. 216 217 To use the native X11 support add `PrologFlags=X11` in {option}`extraConfig`. 218 Note that this method will only work RSA SSH host keys. 219 ''; 220 }; 221 222 procTrackType = lib.mkOption { 223 type = lib.types.str; 224 default = "proctrack/linuxproc"; 225 description = '' 226 Plugin to be used for process tracking on a job step basis. 227 The slurmd daemon uses this mechanism to identify all processes 228 which are children of processes it spawns for a user job step. 229 ''; 230 }; 231 232 stateSaveLocation = lib.mkOption { 233 type = lib.types.str; 234 default = "/var/spool/slurmctld"; 235 description = '' 236 Directory into which the Slurm controller, slurmctld, saves its state. 237 ''; 238 }; 239 240 user = lib.mkOption { 241 type = lib.types.str; 242 default = defaultUser; 243 description = '' 244 Set this option when you want to run the slurmctld daemon 245 as something else than the default slurm user "slurm". 246 Note that the UID of this user needs to be the same 247 on all nodes. 248 ''; 249 }; 250 251 extraConfig = lib.mkOption { 252 default = ""; 253 type = lib.types.lines; 254 description = '' 255 Extra configuration options that will be added verbatim at 256 the end of the slurm configuration file. 257 ''; 258 }; 259 260 mpi = { 261 PmixCliTmpDirBase = lib.mkOption { 262 default = "/tmp/pmix"; 263 type = lib.types.str; 264 description = '' 265 Base path for PMIx temporary files. 266 ''; 267 }; 268 269 extraMpiConfig = lib.mkOption { 270 default = ""; 271 type = lib.types.lines; 272 description = '' 273 Extra configuration for that will be added to `mpi.conf`. 274 ''; 275 }; 276 }; 277 278 extraPlugstackConfig = lib.mkOption { 279 default = ""; 280 type = lib.types.lines; 281 description = '' 282 Extra configuration that will be added to the end of `plugstack.conf`. 283 ''; 284 }; 285 286 extraCgroupConfig = lib.mkOption { 287 default = ""; 288 type = lib.types.lines; 289 description = '' 290 Extra configuration for `cgroup.conf`. This file is 291 used when `procTrackType=proctrack/cgroup`. 292 ''; 293 }; 294 295 extraConfigPaths = lib.mkOption { 296 type = with lib.types; listOf path; 297 default = [ ]; 298 description = '' 299 Slurm expects config files for plugins in the same path 300 as `slurm.conf`. Add extra nix store 301 paths that should be merged into same directory as 302 `slurm.conf`. 303 ''; 304 }; 305 306 etcSlurm = lib.mkOption { 307 type = lib.types.path; 308 internal = true; 309 default = etcSlurm; 310 defaultText = lib.literalMD '' 311 Directory created from generated config files and 312 `config.${opt.extraConfigPaths}`. 313 ''; 314 description = '' 315 Path to directory with slurm config files. This option is set by default from the 316 Slurm module and is meant to make the Slurm config file available to other modules. 317 ''; 318 }; 319 320 }; 321 322 }; 323 324 imports = [ 325 (lib.mkRemovedOptionModule [ "services" "slurm" "dbdserver" "storagePass" ] '' 326 This option has been removed so that the database password is not exposed via the nix store. 327 Use services.slurm.dbdserver.storagePassFile to provide the database password. 328 '') 329 (lib.mkRemovedOptionModule [ "services" "slurm" "dbdserver" "configFile" ] '' 330 This option has been removed. Use services.slurm.dbdserver.storagePassFile 331 and services.slurm.dbdserver.extraConfig instead. 332 '') 333 ]; 334 335 ###### implementation 336 337 config = 338 let 339 wrappedSlurm = pkgs.stdenv.mkDerivation { 340 name = "wrappedSlurm"; 341 342 builder = pkgs.writeText "builder.sh" '' 343 mkdir -p $out/bin 344 find ${lib.getBin cfg.package}/bin -type f -executable | while read EXE 345 do 346 exename="$(basename $EXE)" 347 wrappername="$out/bin/$exename" 348 cat > "$wrappername" <<EOT 349 #!/bin/sh 350 if [ -z "$SLURM_CONF" ] 351 then 352 SLURM_CONF="${cfg.etcSlurm}/slurm.conf" "$EXE" "\$@" 353 else 354 "$EXE" "\$0" 355 fi 356 EOT 357 chmod +x "$wrappername" 358 done 359 360 mkdir -p $out/share 361 ln -s ${lib.getBin cfg.package}/share/man $out/share/man 362 ''; 363 }; 364 365 in 366 lib.mkIf (cfg.enableStools || cfg.client.enable || cfg.server.enable || cfg.dbdserver.enable) { 367 368 environment.systemPackages = [ wrappedSlurm ]; 369 370 services.munge.enable = lib.mkDefault true; 371 372 # use a static uid as default to ensure it is the same on all nodes 373 users.users.slurm = lib.mkIf (cfg.user == defaultUser) { 374 name = defaultUser; 375 group = "slurm"; 376 uid = config.ids.uids.slurm; 377 }; 378 379 users.groups.slurm.gid = config.ids.uids.slurm; 380 381 systemd.services.slurmd = lib.mkIf (cfg.client.enable) { 382 path = 383 with pkgs; 384 [ 385 wrappedSlurm 386 coreutils 387 ] 388 ++ lib.optional cfg.enableSrunX11 slurm-spank-x11; 389 390 wantedBy = [ "multi-user.target" ]; 391 after = [ 392 "systemd-tmpfiles-clean.service" 393 "munge.service" 394 "network-online.target" 395 "remote-fs.target" 396 ]; 397 wants = [ "network-online.target" ]; 398 399 serviceConfig = { 400 Type = "forking"; 401 KillMode = "process"; 402 ExecStart = "${wrappedSlurm}/bin/slurmd"; 403 PIDFile = "/run/slurmd.pid"; 404 ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID"; 405 LimitMEMLOCK = "infinity"; 406 Delegate = "Yes"; 407 }; 408 }; 409 410 systemd.tmpfiles.rules = lib.optionals cfg.client.enable [ 411 "d /var/spool/slurmd 755 root root -" 412 "d ${cfg.mpi.PmixCliTmpDirBase} 755 root root -" 413 ]; 414 415 services.openssh.settings.X11Forwarding = lib.mkIf cfg.client.enable (lib.mkDefault true); 416 417 systemd.services.slurmctld = lib.mkIf (cfg.server.enable) { 418 path = 419 with pkgs; 420 [ 421 wrappedSlurm 422 munge 423 coreutils 424 ] 425 ++ lib.optional cfg.enableSrunX11 slurm-spank-x11; 426 427 wantedBy = [ "multi-user.target" ]; 428 after = [ 429 "network.target" 430 "munged.service" 431 ]; 432 requires = [ "munged.service" ]; 433 434 serviceConfig = { 435 Type = "forking"; 436 ExecStart = "${wrappedSlurm}/bin/slurmctld"; 437 PIDFile = "/run/slurmctld.pid"; 438 ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID"; 439 }; 440 441 preStart = '' 442 mkdir -p ${cfg.stateSaveLocation} 443 chown -R ${cfg.user}:slurm ${cfg.stateSaveLocation} 444 ''; 445 }; 446 447 systemd.services.slurmdbd = 448 let 449 # slurm strips the last component off the path 450 configPath = "$RUNTIME_DIRECTORY/slurmdbd.conf"; 451 in 452 lib.mkIf (cfg.dbdserver.enable) { 453 path = with pkgs; [ 454 wrappedSlurm 455 munge 456 coreutils 457 ]; 458 459 wantedBy = [ "multi-user.target" ]; 460 after = [ 461 "network.target" 462 "munged.service" 463 "mysql.service" 464 ]; 465 requires = [ 466 "munged.service" 467 "mysql.service" 468 ]; 469 470 preStart = '' 471 install -m 600 -o ${cfg.user} -T ${slurmdbdConf} ${configPath} 472 ${lib.optionalString (cfg.dbdserver.storagePassFile != null) '' 473 echo "StoragePass=$(cat ${cfg.dbdserver.storagePassFile})" \ 474 >> ${configPath} 475 ''} 476 ''; 477 478 script = '' 479 export SLURM_CONF=${configPath} 480 exec ${cfg.package}/bin/slurmdbd -D 481 ''; 482 483 serviceConfig = { 484 RuntimeDirectory = "slurmdbd"; 485 Type = "simple"; 486 PIDFile = "/run/slurmdbd.pid"; 487 ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID"; 488 }; 489 }; 490 491 }; 492 493}