at master 6.0 kB view raw
1# This performs a full 'end-to-end' test of a multi-node CockroachDB cluster 2# using the built-in 'cockroach workload' command, to simulate a semi-realistic 3# test load. It generally takes anywhere from 3-5 minutes to run and 1-2GB of 4# RAM (though each of 3 workers gets 2GB allocated) 5# 6# CockroachDB requires synchronized system clocks within a small error window 7# (~500ms by default) on each node in order to maintain a multi-node cluster. 8# Cluster joins that are outside this window will fail, and nodes that skew 9# outside the window after joining will promptly get kicked out. 10# 11# To accommodate this, we use QEMU/virtio infrastructure and load the 'ptp_kvm' 12# driver inside a guest. This driver allows the host machine to pass its clock 13# through to the guest as a hardware clock that appears as a Precision Time 14# Protocol (PTP) Clock device, generally /dev/ptp0. PTP devices can be measured 15# and used as hardware reference clocks (similar to an on-board GPS clock) by 16# NTP software. In our case, we use Chrony to synchronize to the reference 17# clock. 18# 19# This test is currently NOT enabled as a continuously-checked NixOS test. 20# Ideally, this test would be run by Hydra and Borg on all relevant changes, 21# except: 22# 23# - Not every build machine is compatible with the ptp_kvm driver. 24# Virtualized EC2 instances, for example, do not support loading the ptp_kvm 25# driver into guests. However, bare metal builders (e.g. Packet) do seem to 26# work just fine. In practice, this means x86_64-linux builds would fail 27# randomly, depending on which build machine got the job. (This is probably 28# worth some investigation; I imagine it's based on ptp_kvm's usage of paravirt 29# support which may not be available in 'nested' environments.) 30# 31# - ptp_kvm is not supported on aarch64, otherwise it seems likely Cockroach 32# could be tested there, as well. This seems to be due to the usage of 33# the TSC in ptp_kvm, which isn't supported (easily) on AArch64. (And: 34# testing stuff, not just making sure it builds, is important to ensure 35# aarch64 support remains viable.) 36# 37# For future developers who are reading this message, are daring and would want 38# to fix this, some options are: 39# 40# - Just test a single node cluster instead (boring and less thorough). 41# - Move all CI to bare metal packet builders, and we can at least do x86_64-linux. 42# - Get virtualized clocking working in aarch64, somehow. 43# - Add a 4th node that acts as an NTP service and uses no PTP clocks for 44# references, at the client level. This bloats the node and memory 45# requirements, but would probably allow both aarch64/x86_64 to work. 46# 47 48{ lib, ... }: 49 50let 51 # Creates a node. If 'joinNode' parameter, a string containing an IP address, 52 # is non-null, then the CockroachDB server will attempt to join/connect to 53 # the cluster node specified at that address. 54 makeNode = 55 locality: myAddr: joinNode: 56 { 57 nodes, 58 pkgs, 59 lib, 60 config, 61 ... 62 }: 63 { 64 # Bank/TPC-C benchmarks take some memory to complete 65 virtualisation.memorySize = 2048; 66 67 # Install the KVM PTP "Virtualized Clock" driver. This allows a /dev/ptp0 68 # device to appear as a reference clock, synchronized to the host clock. 69 # Because CockroachDB *requires* a time-synchronization mechanism for 70 # the system time in a cluster scenario, this is necessary to work. 71 boot.kernelModules = [ "ptp_kvm" ]; 72 73 # Enable and configure Chrony, using the given virtualized clock passed 74 # through by KVM. 75 services.chrony.enable = true; 76 services.chrony.servers = lib.mkForce [ ]; 77 services.chrony.extraConfig = '' 78 refclock PHC /dev/ptp0 poll 2 prefer require refid KVM 79 makestep 0.1 3 80 ''; 81 82 # Enable CockroachDB. In order to ensure that Chrony has performed its 83 # first synchronization at boot-time (which may take ~10 seconds) before 84 # starting CockroachDB, we block the ExecStartPre directive using the 85 # 'waitsync' command. This ensures Cockroach doesn't have its system time 86 # leap forward out of nowhere during startup/execution. 87 # 88 # Note that the default threshold for NTP-based skew in CockroachDB is 89 # ~500ms by default, so making sure it's started *after* accurate time 90 # synchronization is extremely important. 91 services.cockroachdb.enable = true; 92 services.cockroachdb.insecure = true; 93 services.cockroachdb.openPorts = true; 94 services.cockroachdb.locality = locality; 95 services.cockroachdb.listen.address = myAddr; 96 services.cockroachdb.join = lib.mkIf (joinNode != null) joinNode; 97 98 systemd.services.chronyd.unitConfig.ConditionPathExists = "/dev/ptp0"; 99 100 # Hold startup until Chrony has performed its first measurement (which 101 # will probably result in a full timeskip, thanks to makestep) 102 systemd.services.cockroachdb.preStart = '' 103 ${pkgs.chrony}/bin/chronyc waitsync 104 ''; 105 }; 106in 107{ 108 name = "cockroachdb"; 109 meta.maintainers = with lib.maintainers; [ thoughtpolice ]; 110 111 nodes = { 112 node1 = makeNode "country=us,region=east,dc=1" "192.168.1.1" null; 113 node2 = makeNode "country=us,region=west,dc=2b" "192.168.1.2" "192.168.1.1"; 114 node3 = makeNode "country=eu,region=west,dc=2" "192.168.1.3" "192.168.1.1"; 115 }; 116 117 # NOTE: All the nodes must start in order and you must NOT use startAll, because 118 # there's otherwise no way to guarantee that node1 will start before the others try 119 # to join it. 120 testScript = '' 121 for node in node1, node2, node3: 122 node.start() 123 node.wait_for_unit("cockroachdb") 124 node1.succeed( 125 "cockroach sql --host=192.168.1.1 --insecure -e 'SHOW ALL CLUSTER SETTINGS' 2>&1", 126 "cockroach workload init bank 'postgresql://root@192.168.1.1:26257?sslmode=disable'", 127 "cockroach workload run bank --duration=1m 'postgresql://root@192.168.1.1:26257?sslmode=disable'", 128 ) 129 ''; 130}