at 23.05-pre 3.6 kB view raw
1import ./make-test-python.nix ({ pkgs, lib, ... }: rec { 2 name = "pacemaker"; 3 meta = with pkgs.lib.maintainers; { 4 maintainers = [ astro ]; 5 }; 6 7 nodes = 8 let 9 node = i: { 10 networking.interfaces.eth1.ipv4.addresses = [ { 11 address = "192.168.0.${toString i}"; 12 prefixLength = 24; 13 } ]; 14 15 services.corosync = { 16 enable = true; 17 clusterName = "zentralwerk-network"; 18 nodelist = lib.imap (i: name: { 19 nodeid = i; 20 inherit name; 21 ring_addrs = [ 22 (builtins.head nodes.${name}.networking.interfaces.eth1.ipv4.addresses).address 23 ]; 24 }) (builtins.attrNames nodes); 25 }; 26 environment.etc."corosync/authkey" = { 27 source = builtins.toFile "authkey" 28 # minimum length: 128 bytes 29 "testtesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttest"; 30 mode = "0400"; 31 }; 32 33 services.pacemaker.enable = true; 34 35 # used for pacemaker resource 36 systemd.services.ha-cat = { 37 description = "Highly available netcat"; 38 serviceConfig.ExecStart = "${pkgs.netcat}/bin/nc -l discard"; 39 }; 40 }; 41 in { 42 node1 = node 1; 43 node2 = node 2; 44 node3 = node 3; 45 }; 46 47 # sets up pacemaker with resources configuration, then crashes a 48 # node and waits for service restart on another node 49 testScript = 50 let 51 resources = builtins.toFile "cib-resources.xml" '' 52 <resources> 53 <primitive id="cat" class="systemd" type="ha-cat"> 54 <operations> 55 <op id="stop-cat" name="start" interval="0" timeout="1s"/> 56 <op id="start-cat" name="start" interval="0" timeout="1s"/> 57 <op id="monitor-cat" name="monitor" interval="1s" timeout="1s"/> 58 </operations> 59 </primitive> 60 </resources> 61 ''; 62 in '' 63 import re 64 import time 65 66 start_all() 67 68 ${lib.concatMapStrings (node: '' 69 ${node}.wait_until_succeeds("corosync-quorumtool") 70 ${node}.wait_for_unit("pacemaker.service") 71 '') (builtins.attrNames nodes)} 72 73 # No STONITH device 74 node1.succeed("crm_attribute -t crm_config -n stonith-enabled -v false") 75 # Configure the cat resource 76 node1.succeed("cibadmin --replace --scope resources --xml-file ${resources}") 77 78 # wait until the service is started 79 while True: 80 output = node1.succeed("crm_resource -r cat --locate") 81 match = re.search("is running on: (.+)", output) 82 if match: 83 for machine in machines: 84 if machine.name == match.group(1): 85 current_node = machine 86 break 87 time.sleep(1) 88 89 current_node.log("Service running here!") 90 current_node.crash() 91 92 # pick another node that's still up 93 for machine in machines: 94 if machine.booted: 95 check_node = machine 96 # find where the service has been started next 97 while True: 98 output = check_node.succeed("crm_resource -r cat --locate") 99 match = re.search("is running on: (.+)", output) 100 # output will remain the old current_node until the crash is detected by pacemaker 101 if match and match.group(1) != current_node.name: 102 for machine in machines: 103 if machine.name == match.group(1): 104 next_node = machine 105 break 106 time.sleep(1) 107 108 next_node.log("Service migrated here!") 109 ''; 110})