1{ pkgs, lib, ... }:
2{
3 name = "netbird";
4
5 meta.maintainers = with pkgs.lib.maintainers; [
6 nazarewk
7 ];
8
9 nodes = {
10 node =
11 { ... }:
12 {
13 services.netbird.enable = true;
14 services.netbird.clients.custom.port = 51819;
15 };
16 };
17
18 /*
19 Historically waiting for the NetBird client daemon initialization helped catch number of bugs with the service,
20 so we keep try to keep it here in as much details as it makes sense.
21
22 Initially `netbird status` returns a "Disconnected" messages:
23 OS: linux/amd64
24 Daemon version: 0.54.0
25 CLI version: 0.54.0
26 Profile: default
27 Management: Disconnected, reason: rpc error: code = FailedPrecondition desc = failed connecting to Management Service : context deadline exceeded
28 Signal: Disconnected
29 Relays: 0/0 Available
30 Nameservers: 0/0 Available
31 FQDN:
32 NetBird IP: N/A
33 Interface type: N/A
34 Quantum resistance: false
35 Lazy connection: false
36 Networks: -
37 Forwarding rules: 0
38 Peers count: 0/0 Connected
39
40 After a while passes it should start returning "NeedsLogin" help message.
41
42 As of ~0.53.0+ in ~30 second intervals the `netbird status` instead of "NeedsLogin" it briefly (for under 2 seconds) crashes with:
43
44 Error: status failed: failed connecting to Management Service : context deadline exceeded
45
46 This might be related to the following log line:
47
48 2025-08-11T15:03:25Z ERRO shared/management/client/grpc.go:65: failed creating connection to Management Service: context deadline exceeded
49 */
50 # TODO: confirm the whole solution is working end-to-end when netbird server is implemented
51 testScript = ''
52 import textwrap
53 import time
54
55 start_all()
56
57 def run_with_debug(node, cmd, check=True, display=True, **kwargs):
58 cmd = f"{cmd} 2>&1"
59 start = time.time()
60 ret, output = node.execute(cmd, **kwargs)
61 duration = time.time() - start
62 txt = f">>> {cmd=} {ret=} {duration=:.2f}:\n{textwrap.indent(output, '... ')}"
63 if check:
64 assert ret == 0, txt
65 if display:
66 print(txt)
67 return ret, output
68
69 def wait_until_rcode(node, cmd, rcode=0, retries=30, **kwargs):
70 def check_success(_last_try):
71 nonlocal output
72 ret, output = run_with_debug(node, cmd, **kwargs)
73 return ret == rcode
74
75 kwargs.setdefault('check', False)
76 output = None
77 with node.nested(f"waiting for {cmd=} to exit with {rcode=}"):
78 retry(check_success, retries)
79 return output
80
81 instances = ["netbird", "netbird-custom"]
82
83 for name in instances:
84 node.wait_for_unit(f"{name}.service")
85 node.wait_for_file(f"/var/run/{name}/sock")
86
87 for name in instances:
88 wait_until_rcode(node, f"{name} status |& grep -C20 Disconnected", 0, retries=5)
89 ''
90 # The status used to turn into `NeedsLogin`, but recently started crashing instead.
91 # leaving the snippets in here, in case some update goes back to the old behavior and can be tested again
92 + lib.optionalString false ''
93 for name in instances:
94 #wait_until_rcode(node, f"{name} status |& grep -C20 NeedsLogin", 0, retries=20)
95 output = wait_until_rcode(node, f"{name} status", 1, retries=61)
96 msg = "Error: status failed: failed connecting to Management Service : context deadline exceeded"
97 assert output.strip() == msg, f"expected {msg=}, got {output=} instead"
98 wait_until_rcode(node, f"{name} status |& grep -C20 Disconnected", 0, retries=10)
99 '';
100}