at 18.09-beta 2.3 kB view raw
1import ./make-test.nix ({ ... }: 2let mungekey = "mungeverryweakkeybuteasytointegratoinatest"; 3 slurmconfig = { 4 controlMachine = "control"; 5 nodeName = '' 6 control 7 NodeName=node[1-3] CPUs=1 State=UNKNOWN 8 ''; 9 partitionName = "debug Nodes=node[1-3] Default=YES MaxTime=INFINITE State=UP"; 10 }; 11in { 12 name = "slurm"; 13 14 nodes = 15 let 16 computeNode = 17 { ...}: 18 { 19 # TODO slrumd port and slurmctld port should be configurations and 20 # automatically allowed by the firewall. 21 networking.firewall.enable = false; 22 services.slurm = { 23 client.enable = true; 24 } // slurmconfig; 25 }; 26 in { 27 28 control = 29 { ...}: 30 { 31 networking.firewall.enable = false; 32 services.slurm = { 33 server.enable = true; 34 } // slurmconfig; 35 }; 36 37 submit = 38 { ...}: 39 { 40 networking.firewall.enable = false; 41 services.slurm = { 42 enableStools = true; 43 } // slurmconfig; 44 }; 45 46 node1 = computeNode; 47 node2 = computeNode; 48 node3 = computeNode; 49 }; 50 51 52 testScript = 53 '' 54 startAll; 55 56 # Set up authentification across the cluster 57 foreach my $node (($submit,$control,$node1,$node2,$node3)) 58 { 59 $node->waitForUnit("default.target"); 60 61 $node->succeed("mkdir /etc/munge"); 62 $node->succeed("echo '${mungekey}' > /etc/munge/munge.key"); 63 $node->succeed("chmod 0400 /etc/munge/munge.key"); 64 $node->succeed("chown munge:munge /etc/munge/munge.key"); 65 $node->succeed("systemctl restart munged"); 66 } 67 68 # Restart the services since they have probably failed due to the munge init 69 # failure 70 71 subtest "can_start_slurmctld", sub { 72 $control->succeed("systemctl restart slurmctld"); 73 $control->waitForUnit("slurmctld.service"); 74 }; 75 76 subtest "can_start_slurmd", sub { 77 foreach my $node (($node1,$node2,$node3)) 78 { 79 $node->succeed("systemctl restart slurmd.service"); 80 $node->waitForUnit("slurmd"); 81 } 82 }; 83 84 # Test that the cluster work and can distribute jobs; 85 86 subtest "run_distributed_command", sub { 87 # Run `hostname` on 3 nodes of the partition (so on all the 3 nodes). 88 # The output must contain the 3 different names 89 $submit->succeed("srun -N 3 hostname | sort | uniq | wc -l | xargs test 3 -eq"); 90 }; 91 ''; 92})