at master 3.6 kB view raw
1{ pkgs, lib, ... }: 2{ 3 name = "netbird"; 4 5 meta.maintainers = with pkgs.lib.maintainers; [ 6 nazarewk 7 ]; 8 9 nodes = { 10 node = 11 { ... }: 12 { 13 services.netbird.enable = true; 14 services.netbird.clients.custom.port = 51819; 15 }; 16 }; 17 18 /* 19 Historically waiting for the NetBird client daemon initialization helped catch number of bugs with the service, 20 so we keep try to keep it here in as much details as it makes sense. 21 22 Initially `netbird status` returns a "Disconnected" messages: 23 OS: linux/amd64 24 Daemon version: 0.54.0 25 CLI version: 0.54.0 26 Profile: default 27 Management: Disconnected, reason: rpc error: code = FailedPrecondition desc = failed connecting to Management Service : context deadline exceeded 28 Signal: Disconnected 29 Relays: 0/0 Available 30 Nameservers: 0/0 Available 31 FQDN: 32 NetBird IP: N/A 33 Interface type: N/A 34 Quantum resistance: false 35 Lazy connection: false 36 Networks: - 37 Forwarding rules: 0 38 Peers count: 0/0 Connected 39 40 After a while passes it should start returning "NeedsLogin" help message. 41 42 As of ~0.53.0+ in ~30 second intervals the `netbird status` instead of "NeedsLogin" it briefly (for under 2 seconds) crashes with: 43 44 Error: status failed: failed connecting to Management Service : context deadline exceeded 45 46 This might be related to the following log line: 47 48 2025-08-11T15:03:25Z ERRO shared/management/client/grpc.go:65: failed creating connection to Management Service: context deadline exceeded 49 */ 50 # TODO: confirm the whole solution is working end-to-end when netbird server is implemented 51 testScript = '' 52 import textwrap 53 import time 54 55 start_all() 56 57 def run_with_debug(node, cmd, check=True, display=True, **kwargs): 58 cmd = f"{cmd} 2>&1" 59 start = time.time() 60 ret, output = node.execute(cmd, **kwargs) 61 duration = time.time() - start 62 txt = f">>> {cmd=} {ret=} {duration=:.2f}:\n{textwrap.indent(output, '... ')}" 63 if check: 64 assert ret == 0, txt 65 if display: 66 print(txt) 67 return ret, output 68 69 def wait_until_rcode(node, cmd, rcode=0, retries=30, **kwargs): 70 def check_success(_last_try): 71 nonlocal output 72 ret, output = run_with_debug(node, cmd, **kwargs) 73 return ret == rcode 74 75 kwargs.setdefault('check', False) 76 output = None 77 with node.nested(f"waiting for {cmd=} to exit with {rcode=}"): 78 retry(check_success, retries) 79 return output 80 81 instances = ["netbird", "netbird-custom"] 82 83 for name in instances: 84 node.wait_for_unit(f"{name}.service") 85 node.wait_for_file(f"/var/run/{name}/sock") 86 87 for name in instances: 88 wait_until_rcode(node, f"{name} status |& grep -C20 Disconnected", 0, retries=5) 89 '' 90 # The status used to turn into `NeedsLogin`, but recently started crashing instead. 91 # leaving the snippets in here, in case some update goes back to the old behavior and can be tested again 92 + lib.optionalString false '' 93 for name in instances: 94 #wait_until_rcode(node, f"{name} status |& grep -C20 NeedsLogin", 0, retries=20) 95 output = wait_until_rcode(node, f"{name} status", 1, retries=61) 96 msg = "Error: status failed: failed connecting to Management Service : context deadline exceeded" 97 assert output.strip() == msg, f"expected {msg=}, got {output=} instead" 98 wait_until_rcode(node, f"{name} status |& grep -C20 Disconnected", 0, retries=10) 99 ''; 100}