1import ./make-test-python.nix ({ pkgs, lib, ... }: rec {
2 name = "pacemaker";
3 meta = with pkgs.lib.maintainers; {
4 maintainers = [ astro ];
5 };
6
7 nodes =
8 let
9 node = i: {
10 networking.interfaces.eth1.ipv4.addresses = [ {
11 address = "192.168.0.${toString i}";
12 prefixLength = 24;
13 } ];
14
15 services.corosync = {
16 enable = true;
17 clusterName = "zentralwerk-network";
18 nodelist = lib.imap (i: name: {
19 nodeid = i;
20 inherit name;
21 ring_addrs = [
22 (builtins.head nodes.${name}.networking.interfaces.eth1.ipv4.addresses).address
23 ];
24 }) (builtins.attrNames nodes);
25 };
26 environment.etc."corosync/authkey" = {
27 source = builtins.toFile "authkey"
28 # minimum length: 128 bytes
29 "testtesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttest";
30 mode = "0400";
31 };
32
33 services.pacemaker.enable = true;
34
35 # used for pacemaker resource
36 systemd.services.ha-cat = {
37 description = "Highly available netcat";
38 serviceConfig.ExecStart = "${pkgs.netcat}/bin/nc -l discard";
39 };
40 };
41 in {
42 node1 = node 1;
43 node2 = node 2;
44 node3 = node 3;
45 };
46
47 # sets up pacemaker with resources configuration, then crashes a
48 # node and waits for service restart on another node
49 testScript =
50 let
51 resources = builtins.toFile "cib-resources.xml" ''
52 <resources>
53 <primitive id="cat" class="systemd" type="ha-cat">
54 <operations>
55 <op id="stop-cat" name="start" interval="0" timeout="1s"/>
56 <op id="start-cat" name="start" interval="0" timeout="1s"/>
57 <op id="monitor-cat" name="monitor" interval="1s" timeout="1s"/>
58 </operations>
59 </primitive>
60 </resources>
61 '';
62 in ''
63 import re
64 import time
65
66 start_all()
67
68 ${lib.concatMapStrings (node: ''
69 ${node}.wait_until_succeeds("corosync-quorumtool")
70 ${node}.wait_for_unit("pacemaker.service")
71 '') (builtins.attrNames nodes)}
72
73 # No STONITH device
74 node1.succeed("crm_attribute -t crm_config -n stonith-enabled -v false")
75 # Configure the cat resource
76 node1.succeed("cibadmin --replace --scope resources --xml-file ${resources}")
77
78 # wait until the service is started
79 while True:
80 output = node1.succeed("crm_resource -r cat --locate")
81 match = re.search("is running on: (.+)", output)
82 if match:
83 for machine in machines:
84 if machine.name == match.group(1):
85 current_node = machine
86 break
87 time.sleep(1)
88
89 current_node.log("Service running here!")
90 current_node.crash()
91
92 # pick another node that's still up
93 for machine in machines:
94 if machine.booted:
95 check_node = machine
96 # find where the service has been started next
97 while True:
98 output = check_node.succeed("crm_resource -r cat --locate")
99 match = re.search("is running on: (.+)", output)
100 # output will remain the old current_node until the crash is detected by pacemaker
101 if match and match.group(1) != current_node.name:
102 for machine in machines:
103 if machine.name == match.group(1):
104 next_node = machine
105 break
106 time.sleep(1)
107
108 next_node.log("Service migrated here!")
109 '';
110})