Merge pull request #41377 from markuskowa/slurm-ext-pr

nixos/slurm: Improve slurm configuration options and features.

Changed files
+95 -20
nixos
modules
services
computing
slurm
tests
pkgs
servers
computing
slurm
+56 -9
nixos/modules/services/computing/slurm/slurm.nix
···
cfg = config.services.slurm;
# configuration file can be generated by http://slurm.schedmd.com/configurator.html
-
configFile = pkgs.writeText "slurm.conf"
+
configFile = pkgs.writeTextDir "slurm.conf"
''
${optionalString (cfg.controlMachine != null) ''controlMachine=${cfg.controlMachine}''}
${optionalString (cfg.controlAddr != null) ''controlAddr=${cfg.controlAddr}''}
···
${cfg.extraConfig}
'';
-
plugStackConfig = pkgs.writeText "plugstack.conf"
+
plugStackConfig = pkgs.writeTextDir "plugstack.conf"
''
${optionalString cfg.enableSrunX11 ''optional ${pkgs.slurm-spank-x11}/lib/x11.so''}
+
${cfg.extraPlugstackConfig}
'';
+
+
+
cgroupConfig = pkgs.writeTextDir "cgroup.conf"
+
''
+
${cfg.extraCgroupConfig}
+
'';
+
+
# slurm expects some additional config files to be
+
# in the same directory as slurm.conf
+
etcSlurm = pkgs.symlinkJoin {
+
name = "etc-slurm";
+
paths = [ configFile cgroupConfig plugStackConfig ];
+
};
+
in
{
···
client = {
enable = mkEnableOption "slurm client daemon";
+
};
+
enableStools = mkOption {
+
type = types.bool;
+
default = false;
+
description = ''
+
Wether to provide a slurm.conf file.
+
Enable this option if you do not run a slurm daemon on this host
+
(i.e. <literal>server.enable</literal> and <literal>client.enable</literal> are <literal>false</literal>)
+
but you still want to run slurm commands from this host.
+
'';
};
package = mkOption {
···
example = "debug Nodes=linux[1-32] Default=YES MaxTime=INFINITE State=UP";
description = ''
Name by which the partition may be referenced. Note that now you have
-
to write patrition's parameters after the name.
+
to write the partition's parameters after the name.
'';
};
···
description = ''
If enabled srun will accept the option "--x11" to allow for X11 forwarding
from within an interactive session or a batch job. This activates the
-
slurm-spank-x11 module. Note that this requires 'services.openssh.forwardX11'
-
to be enabled on the compute nodes.
+
slurm-spank-x11 module. Note that this option also enables
+
'services.openssh.forwardX11' on the client.
+
+
This option requires slurm to be compiled without native X11 support.
'';
};
···
the end of the slurm configuration file.
'';
};
+
+
extraPlugstackConfig = mkOption {
+
default = "";
+
type = types.lines;
+
description = ''
+
Extra configuration that will be added to the end of <literal>plugstack.conf</literal>.
+
'';
+
};
+
+
extraCgroupConfig = mkOption {
+
default = "";
+
type = types.lines;
+
description = ''
+
Extra configuration for <literal>cgroup.conf</literal>. This file is
+
used when <literal>procTrackType=proctrack/cgroup</literal>.
+
'';
+
};
};
};
···
wrappedSlurm = pkgs.stdenv.mkDerivation {
name = "wrappedSlurm";
-
propagatedBuildInputs = [ cfg.package configFile ];
-
builder = pkgs.writeText "builder.sh" ''
source $stdenv/setup
mkdir -p $out/bin
···
#!/bin/sh
if [ -z "$SLURM_CONF" ]
then
-
SLURM_CONF="${configFile}" "$EXE" "\$@"
+
SLURM_CONF="${etcSlurm}/slurm.conf" "$EXE" "\$@"
else
"$EXE" "\$0"
fi
EOT
chmod +x "$wrappername"
done
+
+
mkdir -p $out/share
+
ln -s ${getBin cfg.package}/share/man $out/share/man
'';
};
-
in mkIf (cfg.client.enable || cfg.server.enable) {
+
in mkIf (cfg.enableStools || cfg.client.enable || cfg.server.enable) {
environment.systemPackages = [ wrappedSlurm ];
···
mkdir -p /var/spool
'';
};
+
+
services.openssh.forwardX11 = mkIf cfg.client.enable (mkDefault true);
systemd.services.slurmctld = mkIf (cfg.server.enable) {
path = with pkgs; [ wrappedSlurm munge coreutils ]
+18 -5
nixos/tests/slurm.nix
···
import ./make-test.nix ({ pkgs, ... }:
let mungekey = "mungeverryweakkeybuteasytointegratoinatest";
slurmconfig = {
-
client.enable = true;
controlMachine = "control";
nodeName = ''
control
···
# TODO slrumd port and slurmctld port should be configurations and
# automatically allowed by the firewall.
networking.firewall.enable = false;
-
services.slurm = slurmconfig;
+
services.slurm = {
+
client.enable = true;
+
} // slurmconfig;
};
in {
+
control =
{ config, pkgs, ...}:
{
···
server.enable = true;
} // slurmconfig;
};
+
+
submit =
+
{ config, pkgs, ...}:
+
{
+
networking.firewall.enable = false;
+
services.slurm = {
+
enableStools = true;
+
} // slurmconfig;
+
};
+
node1 = computeNode;
node2 = computeNode;
node3 = computeNode;
};
+
testScript =
''
startAll;
# Set up authentification across the cluster
-
foreach my $node (($control,$node1,$node2,$node3))
+
foreach my $node (($submit,$control,$node1,$node2,$node3))
{
$node->waitForUnit("default.target");
···
};
subtest "can_start_slurmd", sub {
-
foreach my $node (($control,$node1,$node2,$node3))
+
foreach my $node (($node1,$node2,$node3))
{
$node->succeed("systemctl restart slurmd.service");
$node->waitForUnit("slurmd");
···
subtest "run_distributed_command", sub {
# Run `hostname` on 3 nodes of the partition (so on all the 3 nodes).
# The output must contain the 3 different names
-
$control->succeed("srun -N 3 hostname | sort | uniq | wc -l | xargs test 3 -eq");
+
$submit->succeed("srun -N 3 hostname | sort | uniq | wc -l | xargs test 3 -eq");
};
'';
})
+21 -6
pkgs/servers/computing/slurm/default.nix
···
-
{ stdenv, fetchurl, pkgconfig, libtool, curl, python, munge, perl, pam, openssl
+
{ stdenv, fetchurl, pkgconfig, libtool, curl
+
, python, munge, perl, pam, openssl
, ncurses, mysql, gtk2, lua, hwloc, numactl
+
, readline, freeipmi, libssh2, xorg
+
# enable internal X11 support via libssh2
+
, enableX11 ? true
}:
stdenv.mkDerivation rec {
···
outputs = [ "out" "dev" ];
+
prePatch = stdenv.lib.optional enableX11 ''
+
substituteInPlace src/common/x11_util.c \
+
--replace '"/usr/bin/xauth"' '"${xorg.xauth}/bin/xauth"'
+
'';
+
# nixos test fails to start slurmd with 'undefined symbol: slurm_job_preempt_mode'
# https://groups.google.com/forum/#!topic/slurm-devel/QHOajQ84_Es
# this doesn't fix tests completely at least makes slurmd to launch
···
nativeBuildInputs = [ pkgconfig libtool ];
buildInputs = [
-
curl python munge perl pam openssl mysql.connector-c ncurses gtk2 lua hwloc numactl
-
];
+
curl python munge perl pam openssl
+
mysql.connector-c ncurses gtk2
+
lua hwloc numactl readline freeipmi
+
] ++ stdenv.lib.optionals enableX11 [ libssh2 xorg.xauth ];
-
configureFlags =
+
configureFlags = with stdenv.lib;
[ "--with-munge=${munge}"
"--with-ssl=${openssl.dev}"
+
"--with-hwloc=${hwloc.dev}"
+
"--with-freeipmi=${freeipmi}"
"--sysconfdir=/etc/slurm"
-
] ++ stdenv.lib.optional (gtk2 == null) "--disable-gtktest";
+
] ++ (optional (gtk2 == null) "--disable-gtktest")
+
++ (optional enableX11 "--with-libssh2=${libssh2.dev}");
+
preConfigure = ''
patchShebangs ./doc/html/shtml2html.py
···
description = "Simple Linux Utility for Resource Management";
platforms = platforms.linux;
license = licenses.gpl2;
-
maintainers = [ maintainers.jagajaga ];
+
maintainers = with maintainers; [ jagajaga markuskowa ];
};
}