at 22.05-pre 14 kB view raw
1{ config, lib, pkgs, ... }: 2 3with lib; 4 5let 6 7 cfg = config.services.slurm; 8 # configuration file can be generated by http://slurm.schedmd.com/configurator.html 9 10 defaultUser = "slurm"; 11 12 configFile = pkgs.writeTextDir "slurm.conf" 13 '' 14 ClusterName=${cfg.clusterName} 15 StateSaveLocation=${cfg.stateSaveLocation} 16 SlurmUser=${cfg.user} 17 ${optionalString (cfg.controlMachine != null) "controlMachine=${cfg.controlMachine}"} 18 ${optionalString (cfg.controlAddr != null) "controlAddr=${cfg.controlAddr}"} 19 ${toString (map (x: "NodeName=${x}\n") cfg.nodeName)} 20 ${toString (map (x: "PartitionName=${x}\n") cfg.partitionName)} 21 PlugStackConfig=${plugStackConfig}/plugstack.conf 22 ProctrackType=${cfg.procTrackType} 23 ${cfg.extraConfig} 24 ''; 25 26 plugStackConfig = pkgs.writeTextDir "plugstack.conf" 27 '' 28 ${optionalString cfg.enableSrunX11 "optional ${pkgs.slurm-spank-x11}/lib/x11.so"} 29 ${cfg.extraPlugstackConfig} 30 ''; 31 32 cgroupConfig = pkgs.writeTextDir "cgroup.conf" 33 '' 34 ${cfg.extraCgroupConfig} 35 ''; 36 37 slurmdbdConf = pkgs.writeText "slurmdbd.conf" 38 '' 39 DbdHost=${cfg.dbdserver.dbdHost} 40 SlurmUser=${cfg.user} 41 StorageType=accounting_storage/mysql 42 StorageUser=${cfg.dbdserver.storageUser} 43 ${cfg.dbdserver.extraConfig} 44 ''; 45 46 # slurm expects some additional config files to be 47 # in the same directory as slurm.conf 48 etcSlurm = pkgs.symlinkJoin { 49 name = "etc-slurm"; 50 paths = [ configFile cgroupConfig plugStackConfig ] ++ cfg.extraConfigPaths; 51 }; 52in 53 54{ 55 56 ###### interface 57 58 meta.maintainers = [ maintainers.markuskowa ]; 59 60 options = { 61 62 services.slurm = { 63 64 server = { 65 enable = mkOption { 66 type = types.bool; 67 default = false; 68 description = '' 69 Whether to enable the slurm control daemon. 70 Note that the standard authentication method is "munge". 71 The "munge" service needs to be provided with a password file in order for 72 slurm to work properly (see <literal>services.munge.password</literal>). 73 ''; 74 }; 75 }; 76 77 dbdserver = { 78 enable = mkEnableOption "SlurmDBD service"; 79 80 dbdHost = mkOption { 81 type = types.str; 82 default = config.networking.hostName; 83 description = '' 84 Hostname of the machine where <literal>slurmdbd</literal> 85 is running (i.e. name returned by <literal>hostname -s</literal>). 86 ''; 87 }; 88 89 storageUser = mkOption { 90 type = types.str; 91 default = cfg.user; 92 description = '' 93 Database user name. 94 ''; 95 }; 96 97 storagePassFile = mkOption { 98 type = with types; nullOr str; 99 default = null; 100 description = '' 101 Path to file with database password. The content of this will be used to 102 create the password for the <literal>StoragePass</literal> option. 103 ''; 104 }; 105 106 extraConfig = mkOption { 107 type = types.lines; 108 default = ""; 109 description = '' 110 Extra configuration for <literal>slurmdbd.conf</literal> See also: 111 <citerefentry><refentrytitle>slurmdbd.conf</refentrytitle> 112 <manvolnum>8</manvolnum></citerefentry>. 113 ''; 114 }; 115 }; 116 117 client = { 118 enable = mkEnableOption "slurm client daemon"; 119 }; 120 121 enableStools = mkOption { 122 type = types.bool; 123 default = false; 124 description = '' 125 Whether to provide a slurm.conf file. 126 Enable this option if you do not run a slurm daemon on this host 127 (i.e. <literal>server.enable</literal> and <literal>client.enable</literal> are <literal>false</literal>) 128 but you still want to run slurm commands from this host. 129 ''; 130 }; 131 132 package = mkOption { 133 type = types.package; 134 default = pkgs.slurm.override { enableX11 = ! cfg.enableSrunX11; }; 135 defaultText = literalExpression "pkgs.slurm"; 136 example = literalExpression "pkgs.slurm-full"; 137 description = '' 138 The package to use for slurm binaries. 139 ''; 140 }; 141 142 controlMachine = mkOption { 143 type = types.nullOr types.str; 144 default = null; 145 example = null; 146 description = '' 147 The short hostname of the machine where SLURM control functions are 148 executed (i.e. the name returned by the command "hostname -s", use "tux001" 149 rather than "tux001.my.com"). 150 ''; 151 }; 152 153 controlAddr = mkOption { 154 type = types.nullOr types.str; 155 default = cfg.controlMachine; 156 example = null; 157 description = '' 158 Name that ControlMachine should be referred to in establishing a 159 communications path. 160 ''; 161 }; 162 163 clusterName = mkOption { 164 type = types.str; 165 default = "default"; 166 example = "myCluster"; 167 description = '' 168 Necessary to distinguish accounting records in a multi-cluster environment. 169 ''; 170 }; 171 172 nodeName = mkOption { 173 type = types.listOf types.str; 174 default = []; 175 example = literalExpression ''[ "linux[1-32] CPUs=1 State=UNKNOWN" ];''; 176 description = '' 177 Name that SLURM uses to refer to a node (or base partition for BlueGene 178 systems). Typically this would be the string that "/bin/hostname -s" 179 returns. Note that now you have to write node's parameters after the name. 180 ''; 181 }; 182 183 partitionName = mkOption { 184 type = types.listOf types.str; 185 default = []; 186 example = literalExpression ''[ "debug Nodes=linux[1-32] Default=YES MaxTime=INFINITE State=UP" ];''; 187 description = '' 188 Name by which the partition may be referenced. Note that now you have 189 to write the partition's parameters after the name. 190 ''; 191 }; 192 193 enableSrunX11 = mkOption { 194 default = false; 195 type = types.bool; 196 description = '' 197 If enabled srun will accept the option "--x11" to allow for X11 forwarding 198 from within an interactive session or a batch job. This activates the 199 slurm-spank-x11 module. Note that this option also enables 200 <option>services.openssh.forwardX11</option> on the client. 201 202 This option requires slurm to be compiled without native X11 support. 203 The default behavior is to re-compile the slurm package with native X11 204 support disabled if this option is set to true. 205 206 To use the native X11 support add <literal>PrologFlags=X11</literal> in <option>extraConfig</option>. 207 Note that this method will only work RSA SSH host keys. 208 ''; 209 }; 210 211 procTrackType = mkOption { 212 type = types.str; 213 default = "proctrack/linuxproc"; 214 description = '' 215 Plugin to be used for process tracking on a job step basis. 216 The slurmd daemon uses this mechanism to identify all processes 217 which are children of processes it spawns for a user job step. 218 ''; 219 }; 220 221 stateSaveLocation = mkOption { 222 type = types.str; 223 default = "/var/spool/slurmctld"; 224 description = '' 225 Directory into which the Slurm controller, slurmctld, saves its state. 226 ''; 227 }; 228 229 user = mkOption { 230 type = types.str; 231 default = defaultUser; 232 description = '' 233 Set this option when you want to run the slurmctld daemon 234 as something else than the default slurm user "slurm". 235 Note that the UID of this user needs to be the same 236 on all nodes. 237 ''; 238 }; 239 240 extraConfig = mkOption { 241 default = ""; 242 type = types.lines; 243 description = '' 244 Extra configuration options that will be added verbatim at 245 the end of the slurm configuration file. 246 ''; 247 }; 248 249 extraPlugstackConfig = mkOption { 250 default = ""; 251 type = types.lines; 252 description = '' 253 Extra configuration that will be added to the end of <literal>plugstack.conf</literal>. 254 ''; 255 }; 256 257 extraCgroupConfig = mkOption { 258 default = ""; 259 type = types.lines; 260 description = '' 261 Extra configuration for <literal>cgroup.conf</literal>. This file is 262 used when <literal>procTrackType=proctrack/cgroup</literal>. 263 ''; 264 }; 265 266 extraConfigPaths = mkOption { 267 type = with types; listOf path; 268 default = []; 269 description = '' 270 Slurm expects config files for plugins in the same path 271 as <literal>slurm.conf</literal>. Add extra nix store 272 paths that should be merged into same directory as 273 <literal>slurm.conf</literal>. 274 ''; 275 }; 276 277 etcSlurm = mkOption { 278 type = types.path; 279 internal = true; 280 default = etcSlurm; 281 description = '' 282 Path to directory with slurm config files. This option is set by default from the 283 Slurm module and is meant to make the Slurm config file available to other modules. 284 ''; 285 }; 286 287 }; 288 289 }; 290 291 imports = [ 292 (mkRemovedOptionModule [ "services" "slurm" "dbdserver" "storagePass" ] '' 293 This option has been removed so that the database password is not exposed via the nix store. 294 Use services.slurm.dbdserver.storagePassFile to provide the database password. 295 '') 296 (mkRemovedOptionModule [ "services" "slurm" "dbdserver" "configFile" ] '' 297 This option has been removed. Use services.slurm.dbdserver.storagePassFile 298 and services.slurm.dbdserver.extraConfig instead. 299 '') 300 ]; 301 302 ###### implementation 303 304 config = 305 let 306 wrappedSlurm = pkgs.stdenv.mkDerivation { 307 name = "wrappedSlurm"; 308 309 builder = pkgs.writeText "builder.sh" '' 310 source $stdenv/setup 311 mkdir -p $out/bin 312 find ${getBin cfg.package}/bin -type f -executable | while read EXE 313 do 314 exename="$(basename $EXE)" 315 wrappername="$out/bin/$exename" 316 cat > "$wrappername" <<EOT 317 #!/bin/sh 318 if [ -z "$SLURM_CONF" ] 319 then 320 SLURM_CONF="${cfg.etcSlurm}/slurm.conf" "$EXE" "\$@" 321 else 322 "$EXE" "\$0" 323 fi 324 EOT 325 chmod +x "$wrappername" 326 done 327 328 mkdir -p $out/share 329 ln -s ${getBin cfg.package}/share/man $out/share/man 330 ''; 331 }; 332 333 in mkIf ( cfg.enableStools || 334 cfg.client.enable || 335 cfg.server.enable || 336 cfg.dbdserver.enable ) { 337 338 environment.systemPackages = [ wrappedSlurm ]; 339 340 services.munge.enable = mkDefault true; 341 342 # use a static uid as default to ensure it is the same on all nodes 343 users.users.slurm = mkIf (cfg.user == defaultUser) { 344 name = defaultUser; 345 group = "slurm"; 346 uid = config.ids.uids.slurm; 347 }; 348 349 users.groups.slurm.gid = config.ids.uids.slurm; 350 351 systemd.services.slurmd = mkIf (cfg.client.enable) { 352 path = with pkgs; [ wrappedSlurm coreutils ] 353 ++ lib.optional cfg.enableSrunX11 slurm-spank-x11; 354 355 wantedBy = [ "multi-user.target" ]; 356 after = [ "systemd-tmpfiles-clean.service" ]; 357 358 serviceConfig = { 359 Type = "forking"; 360 KillMode = "process"; 361 ExecStart = "${wrappedSlurm}/bin/slurmd"; 362 PIDFile = "/run/slurmd.pid"; 363 ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID"; 364 LimitMEMLOCK = "infinity"; 365 }; 366 367 preStart = '' 368 mkdir -p /var/spool 369 ''; 370 }; 371 372 services.openssh.forwardX11 = mkIf cfg.client.enable (mkDefault true); 373 374 systemd.services.slurmctld = mkIf (cfg.server.enable) { 375 path = with pkgs; [ wrappedSlurm munge coreutils ] 376 ++ lib.optional cfg.enableSrunX11 slurm-spank-x11; 377 378 wantedBy = [ "multi-user.target" ]; 379 after = [ "network.target" "munged.service" ]; 380 requires = [ "munged.service" ]; 381 382 serviceConfig = { 383 Type = "forking"; 384 ExecStart = "${wrappedSlurm}/bin/slurmctld"; 385 PIDFile = "/run/slurmctld.pid"; 386 ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID"; 387 }; 388 389 preStart = '' 390 mkdir -p ${cfg.stateSaveLocation} 391 chown -R ${cfg.user}:slurm ${cfg.stateSaveLocation} 392 ''; 393 }; 394 395 systemd.services.slurmdbd = let 396 # slurm strips the last component off the path 397 configPath = "$RUNTIME_DIRECTORY/slurmdbd.conf"; 398 in mkIf (cfg.dbdserver.enable) { 399 path = with pkgs; [ wrappedSlurm munge coreutils ]; 400 401 wantedBy = [ "multi-user.target" ]; 402 after = [ "network.target" "munged.service" "mysql.service" ]; 403 requires = [ "munged.service" "mysql.service" ]; 404 405 preStart = '' 406 install -m 600 -o ${cfg.user} -T ${slurmdbdConf} ${configPath} 407 ${optionalString (cfg.dbdserver.storagePassFile != null) '' 408 echo "StoragePass=$(cat ${cfg.dbdserver.storagePassFile})" \ 409 >> ${configPath} 410 ''} 411 ''; 412 413 script = '' 414 export SLURM_CONF=${configPath} 415 exec ${cfg.package}/bin/slurmdbd -D 416 ''; 417 418 serviceConfig = { 419 RuntimeDirectory = "slurmdbd"; 420 Type = "simple"; 421 PIDFile = "/run/slurmdbd.pid"; 422 ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID"; 423 }; 424 }; 425 426 }; 427 428}