at 25.11-pre 3.8 kB view raw
1import ./make-test-python.nix ( 2 { pkgs, lib, ... }: 3 rec { 4 name = "pacemaker"; 5 meta = with pkgs.lib.maintainers; { 6 maintainers = [ astro ]; 7 }; 8 9 nodes = 10 let 11 node = i: { 12 networking.interfaces.eth1.ipv4.addresses = [ 13 { 14 address = "192.168.0.${toString i}"; 15 prefixLength = 24; 16 } 17 ]; 18 19 services.corosync = { 20 enable = true; 21 clusterName = "zentralwerk-network"; 22 nodelist = lib.imap (i: name: { 23 nodeid = i; 24 inherit name; 25 ring_addrs = [ 26 (builtins.head nodes.${name}.networking.interfaces.eth1.ipv4.addresses).address 27 ]; 28 }) (builtins.attrNames nodes); 29 }; 30 environment.etc."corosync/authkey" = { 31 source = 32 builtins.toFile "authkey" 33 # minimum length: 128 bytes 34 "testtesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttest"; 35 mode = "0400"; 36 }; 37 38 services.pacemaker.enable = true; 39 40 # used for pacemaker resource 41 systemd.services.ha-cat = { 42 description = "Highly available netcat"; 43 serviceConfig.ExecStart = "${pkgs.netcat}/bin/nc -l discard"; 44 }; 45 }; 46 in 47 { 48 node1 = node 1; 49 node2 = node 2; 50 node3 = node 3; 51 }; 52 53 # sets up pacemaker with resources configuration, then crashes a 54 # node and waits for service restart on another node 55 testScript = 56 let 57 resources = builtins.toFile "cib-resources.xml" '' 58 <resources> 59 <primitive id="cat" class="systemd" type="ha-cat"> 60 <operations> 61 <op id="stop-cat" name="start" interval="0" timeout="1s"/> 62 <op id="start-cat" name="start" interval="0" timeout="1s"/> 63 <op id="monitor-cat" name="monitor" interval="1s" timeout="1s"/> 64 </operations> 65 </primitive> 66 </resources> 67 ''; 68 in 69 '' 70 import re 71 import time 72 73 start_all() 74 75 ${lib.concatMapStrings (node: '' 76 ${node}.wait_until_succeeds("corosync-quorumtool") 77 ${node}.wait_for_unit("pacemaker.service") 78 '') (builtins.attrNames nodes)} 79 80 # No STONITH device 81 node1.succeed("crm_attribute -t crm_config -n stonith-enabled -v false") 82 # Configure the cat resource 83 node1.succeed("cibadmin --replace --scope resources --xml-file ${resources}") 84 85 # wait until the service is started 86 while True: 87 output = node1.succeed("crm_resource -r cat --locate") 88 match = re.search("is running on: (.+)", output) 89 if match: 90 for machine in machines: 91 if machine.name == match.group(1): 92 current_node = machine 93 break 94 time.sleep(1) 95 96 current_node.log("Service running here!") 97 current_node.crash() 98 99 # pick another node that's still up 100 for machine in machines: 101 if machine.booted: 102 check_node = machine 103 # find where the service has been started next 104 while True: 105 output = check_node.succeed("crm_resource -r cat --locate") 106 match = re.search("is running on: (.+)", output) 107 # output will remain the old current_node until the crash is detected by pacemaker 108 if match and match.group(1) != current_node.name: 109 for machine in machines: 110 if machine.name == match.group(1): 111 next_node = machine 112 break 113 time.sleep(1) 114 115 next_node.log("Service migrated here!") 116 ''; 117 } 118)