at 23.11-pre 14 kB view raw
1{ config, lib, options, pkgs, ... }: 2 3with lib; 4 5let 6 7 cfg = config.services.slurm; 8 opt = options.services.slurm; 9 # configuration file can be generated by http://slurm.schedmd.com/configurator.html 10 11 defaultUser = "slurm"; 12 13 configFile = pkgs.writeTextDir "slurm.conf" 14 '' 15 ClusterName=${cfg.clusterName} 16 StateSaveLocation=${cfg.stateSaveLocation} 17 SlurmUser=${cfg.user} 18 ${optionalString (cfg.controlMachine != null) "controlMachine=${cfg.controlMachine}"} 19 ${optionalString (cfg.controlAddr != null) "controlAddr=${cfg.controlAddr}"} 20 ${toString (map (x: "NodeName=${x}\n") cfg.nodeName)} 21 ${toString (map (x: "PartitionName=${x}\n") cfg.partitionName)} 22 PlugStackConfig=${plugStackConfig}/plugstack.conf 23 ProctrackType=${cfg.procTrackType} 24 ${cfg.extraConfig} 25 ''; 26 27 plugStackConfig = pkgs.writeTextDir "plugstack.conf" 28 '' 29 ${optionalString cfg.enableSrunX11 "optional ${pkgs.slurm-spank-x11}/lib/x11.so"} 30 ${cfg.extraPlugstackConfig} 31 ''; 32 33 cgroupConfig = pkgs.writeTextDir "cgroup.conf" 34 '' 35 ${cfg.extraCgroupConfig} 36 ''; 37 38 slurmdbdConf = pkgs.writeText "slurmdbd.conf" 39 '' 40 DbdHost=${cfg.dbdserver.dbdHost} 41 SlurmUser=${cfg.user} 42 StorageType=accounting_storage/mysql 43 StorageUser=${cfg.dbdserver.storageUser} 44 ${cfg.dbdserver.extraConfig} 45 ''; 46 47 # slurm expects some additional config files to be 48 # in the same directory as slurm.conf 49 etcSlurm = pkgs.symlinkJoin { 50 name = "etc-slurm"; 51 paths = [ configFile cgroupConfig plugStackConfig ] ++ cfg.extraConfigPaths; 52 }; 53in 54 55{ 56 57 ###### interface 58 59 meta.maintainers = [ maintainers.markuskowa ]; 60 61 options = { 62 63 services.slurm = { 64 65 server = { 66 enable = mkOption { 67 type = types.bool; 68 default = false; 69 description = lib.mdDoc '' 70 Whether to enable the slurm control daemon. 71 Note that the standard authentication method is "munge". 72 The "munge" service needs to be provided with a password file in order for 73 slurm to work properly (see `services.munge.password`). 74 ''; 75 }; 76 }; 77 78 dbdserver = { 79 enable = mkEnableOption (lib.mdDoc "SlurmDBD service"); 80 81 dbdHost = mkOption { 82 type = types.str; 83 default = config.networking.hostName; 84 defaultText = literalExpression "config.networking.hostName"; 85 description = lib.mdDoc '' 86 Hostname of the machine where `slurmdbd` 87 is running (i.e. name returned by `hostname -s`). 88 ''; 89 }; 90 91 storageUser = mkOption { 92 type = types.str; 93 default = cfg.user; 94 defaultText = literalExpression "config.${opt.user}"; 95 description = lib.mdDoc '' 96 Database user name. 97 ''; 98 }; 99 100 storagePassFile = mkOption { 101 type = with types; nullOr str; 102 default = null; 103 description = lib.mdDoc '' 104 Path to file with database password. The content of this will be used to 105 create the password for the `StoragePass` option. 106 ''; 107 }; 108 109 extraConfig = mkOption { 110 type = types.lines; 111 default = ""; 112 description = lib.mdDoc '' 113 Extra configuration for `slurmdbd.conf` See also: 114 {manpage}`slurmdbd.conf(8)`. 115 ''; 116 }; 117 }; 118 119 client = { 120 enable = mkEnableOption (lib.mdDoc "slurm client daemon"); 121 }; 122 123 enableStools = mkOption { 124 type = types.bool; 125 default = false; 126 description = lib.mdDoc '' 127 Whether to provide a slurm.conf file. 128 Enable this option if you do not run a slurm daemon on this host 129 (i.e. `server.enable` and `client.enable` are `false`) 130 but you still want to run slurm commands from this host. 131 ''; 132 }; 133 134 package = mkOption { 135 type = types.package; 136 default = pkgs.slurm.override { enableX11 = ! cfg.enableSrunX11; }; 137 defaultText = literalExpression "pkgs.slurm"; 138 example = literalExpression "pkgs.slurm-full"; 139 description = lib.mdDoc '' 140 The package to use for slurm binaries. 141 ''; 142 }; 143 144 controlMachine = mkOption { 145 type = types.nullOr types.str; 146 default = null; 147 example = null; 148 description = lib.mdDoc '' 149 The short hostname of the machine where SLURM control functions are 150 executed (i.e. the name returned by the command "hostname -s", use "tux001" 151 rather than "tux001.my.com"). 152 ''; 153 }; 154 155 controlAddr = mkOption { 156 type = types.nullOr types.str; 157 default = cfg.controlMachine; 158 defaultText = literalExpression "config.${opt.controlMachine}"; 159 example = null; 160 description = lib.mdDoc '' 161 Name that ControlMachine should be referred to in establishing a 162 communications path. 163 ''; 164 }; 165 166 clusterName = mkOption { 167 type = types.str; 168 default = "default"; 169 example = "myCluster"; 170 description = lib.mdDoc '' 171 Necessary to distinguish accounting records in a multi-cluster environment. 172 ''; 173 }; 174 175 nodeName = mkOption { 176 type = types.listOf types.str; 177 default = []; 178 example = literalExpression ''[ "linux[1-32] CPUs=1 State=UNKNOWN" ];''; 179 description = lib.mdDoc '' 180 Name that SLURM uses to refer to a node (or base partition for BlueGene 181 systems). Typically this would be the string that "/bin/hostname -s" 182 returns. Note that now you have to write node's parameters after the name. 183 ''; 184 }; 185 186 partitionName = mkOption { 187 type = types.listOf types.str; 188 default = []; 189 example = literalExpression ''[ "debug Nodes=linux[1-32] Default=YES MaxTime=INFINITE State=UP" ];''; 190 description = lib.mdDoc '' 191 Name by which the partition may be referenced. Note that now you have 192 to write the partition's parameters after the name. 193 ''; 194 }; 195 196 enableSrunX11 = mkOption { 197 default = false; 198 type = types.bool; 199 description = lib.mdDoc '' 200 If enabled srun will accept the option "--x11" to allow for X11 forwarding 201 from within an interactive session or a batch job. This activates the 202 slurm-spank-x11 module. Note that this option also enables 203 {option}`services.openssh.forwardX11` on the client. 204 205 This option requires slurm to be compiled without native X11 support. 206 The default behavior is to re-compile the slurm package with native X11 207 support disabled if this option is set to true. 208 209 To use the native X11 support add `PrologFlags=X11` in {option}`extraConfig`. 210 Note that this method will only work RSA SSH host keys. 211 ''; 212 }; 213 214 procTrackType = mkOption { 215 type = types.str; 216 default = "proctrack/linuxproc"; 217 description = lib.mdDoc '' 218 Plugin to be used for process tracking on a job step basis. 219 The slurmd daemon uses this mechanism to identify all processes 220 which are children of processes it spawns for a user job step. 221 ''; 222 }; 223 224 stateSaveLocation = mkOption { 225 type = types.str; 226 default = "/var/spool/slurmctld"; 227 description = lib.mdDoc '' 228 Directory into which the Slurm controller, slurmctld, saves its state. 229 ''; 230 }; 231 232 user = mkOption { 233 type = types.str; 234 default = defaultUser; 235 description = lib.mdDoc '' 236 Set this option when you want to run the slurmctld daemon 237 as something else than the default slurm user "slurm". 238 Note that the UID of this user needs to be the same 239 on all nodes. 240 ''; 241 }; 242 243 extraConfig = mkOption { 244 default = ""; 245 type = types.lines; 246 description = lib.mdDoc '' 247 Extra configuration options that will be added verbatim at 248 the end of the slurm configuration file. 249 ''; 250 }; 251 252 extraPlugstackConfig = mkOption { 253 default = ""; 254 type = types.lines; 255 description = lib.mdDoc '' 256 Extra configuration that will be added to the end of `plugstack.conf`. 257 ''; 258 }; 259 260 extraCgroupConfig = mkOption { 261 default = ""; 262 type = types.lines; 263 description = lib.mdDoc '' 264 Extra configuration for `cgroup.conf`. This file is 265 used when `procTrackType=proctrack/cgroup`. 266 ''; 267 }; 268 269 extraConfigPaths = mkOption { 270 type = with types; listOf path; 271 default = []; 272 description = lib.mdDoc '' 273 Slurm expects config files for plugins in the same path 274 as `slurm.conf`. Add extra nix store 275 paths that should be merged into same directory as 276 `slurm.conf`. 277 ''; 278 }; 279 280 etcSlurm = mkOption { 281 type = types.path; 282 internal = true; 283 default = etcSlurm; 284 defaultText = literalMD '' 285 Directory created from generated config files and 286 `config.${opt.extraConfigPaths}`. 287 ''; 288 description = lib.mdDoc '' 289 Path to directory with slurm config files. This option is set by default from the 290 Slurm module and is meant to make the Slurm config file available to other modules. 291 ''; 292 }; 293 294 }; 295 296 }; 297 298 imports = [ 299 (mkRemovedOptionModule [ "services" "slurm" "dbdserver" "storagePass" ] '' 300 This option has been removed so that the database password is not exposed via the nix store. 301 Use services.slurm.dbdserver.storagePassFile to provide the database password. 302 '') 303 (mkRemovedOptionModule [ "services" "slurm" "dbdserver" "configFile" ] '' 304 This option has been removed. Use services.slurm.dbdserver.storagePassFile 305 and services.slurm.dbdserver.extraConfig instead. 306 '') 307 ]; 308 309 ###### implementation 310 311 config = 312 let 313 wrappedSlurm = pkgs.stdenv.mkDerivation { 314 name = "wrappedSlurm"; 315 316 builder = pkgs.writeText "builder.sh" '' 317 source $stdenv/setup 318 mkdir -p $out/bin 319 find ${getBin cfg.package}/bin -type f -executable | while read EXE 320 do 321 exename="$(basename $EXE)" 322 wrappername="$out/bin/$exename" 323 cat > "$wrappername" <<EOT 324 #!/bin/sh 325 if [ -z "$SLURM_CONF" ] 326 then 327 SLURM_CONF="${cfg.etcSlurm}/slurm.conf" "$EXE" "\$@" 328 else 329 "$EXE" "\$0" 330 fi 331 EOT 332 chmod +x "$wrappername" 333 done 334 335 mkdir -p $out/share 336 ln -s ${getBin cfg.package}/share/man $out/share/man 337 ''; 338 }; 339 340 in mkIf ( cfg.enableStools || 341 cfg.client.enable || 342 cfg.server.enable || 343 cfg.dbdserver.enable ) { 344 345 environment.systemPackages = [ wrappedSlurm ]; 346 347 services.munge.enable = mkDefault true; 348 349 # use a static uid as default to ensure it is the same on all nodes 350 users.users.slurm = mkIf (cfg.user == defaultUser) { 351 name = defaultUser; 352 group = "slurm"; 353 uid = config.ids.uids.slurm; 354 }; 355 356 users.groups.slurm.gid = config.ids.uids.slurm; 357 358 systemd.services.slurmd = mkIf (cfg.client.enable) { 359 path = with pkgs; [ wrappedSlurm coreutils ] 360 ++ lib.optional cfg.enableSrunX11 slurm-spank-x11; 361 362 wantedBy = [ "multi-user.target" ]; 363 after = [ 364 "systemd-tmpfiles-clean.service" 365 "munge.service" 366 "network-online.target" 367 "remote-fs.target" 368 ]; 369 wants = [ "network-online.target" ]; 370 371 serviceConfig = { 372 Type = "forking"; 373 KillMode = "process"; 374 ExecStart = "${wrappedSlurm}/bin/slurmd"; 375 PIDFile = "/run/slurmd.pid"; 376 ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID"; 377 LimitMEMLOCK = "infinity"; 378 Delegate="Yes"; 379 }; 380 }; 381 382 systemd.tmpfiles.rules = mkIf cfg.client.enable [ 383 "d /var/spool/slurmd 755 root root -" 384 ]; 385 386 services.openssh.settings.X11Forwarding = mkIf cfg.client.enable (mkDefault true); 387 388 systemd.services.slurmctld = mkIf (cfg.server.enable) { 389 path = with pkgs; [ wrappedSlurm munge coreutils ] 390 ++ lib.optional cfg.enableSrunX11 slurm-spank-x11; 391 392 wantedBy = [ "multi-user.target" ]; 393 after = [ "network.target" "munged.service" ]; 394 requires = [ "munged.service" ]; 395 396 serviceConfig = { 397 Type = "forking"; 398 ExecStart = "${wrappedSlurm}/bin/slurmctld"; 399 PIDFile = "/run/slurmctld.pid"; 400 ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID"; 401 }; 402 403 preStart = '' 404 mkdir -p ${cfg.stateSaveLocation} 405 chown -R ${cfg.user}:slurm ${cfg.stateSaveLocation} 406 ''; 407 }; 408 409 systemd.services.slurmdbd = let 410 # slurm strips the last component off the path 411 configPath = "$RUNTIME_DIRECTORY/slurmdbd.conf"; 412 in mkIf (cfg.dbdserver.enable) { 413 path = with pkgs; [ wrappedSlurm munge coreutils ]; 414 415 wantedBy = [ "multi-user.target" ]; 416 after = [ "network.target" "munged.service" "mysql.service" ]; 417 requires = [ "munged.service" "mysql.service" ]; 418 419 preStart = '' 420 install -m 600 -o ${cfg.user} -T ${slurmdbdConf} ${configPath} 421 ${optionalString (cfg.dbdserver.storagePassFile != null) '' 422 echo "StoragePass=$(cat ${cfg.dbdserver.storagePassFile})" \ 423 >> ${configPath} 424 ''} 425 ''; 426 427 script = '' 428 export SLURM_CONF=${configPath} 429 exec ${cfg.package}/bin/slurmdbd -D 430 ''; 431 432 serviceConfig = { 433 RuntimeDirectory = "slurmdbd"; 434 Type = "simple"; 435 PIDFile = "/run/slurmdbd.pid"; 436 ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID"; 437 }; 438 }; 439 440 }; 441 442}