1import ./make-test-python.nix (
2 { lib, pkgs, ... }:
3 {
4 name = "kthxbye";
5
6 meta = with lib.maintainers; {
7 maintainers = [ nukaduka ];
8 };
9
10 nodes.server =
11 { ... }:
12 {
13 environment.systemPackages = with pkgs; [ prometheus-alertmanager ];
14 services.prometheus = {
15 enable = true;
16
17 globalConfig = {
18 scrape_interval = "5s";
19 scrape_timeout = "5s";
20 evaluation_interval = "5s";
21 };
22
23 scrapeConfigs = [
24 {
25 job_name = "prometheus";
26 scrape_interval = "5s";
27 static_configs = [
28 {
29 targets = [ "localhost:9090" ];
30 }
31 ];
32 }
33 ];
34
35 rules = [
36 ''
37 groups:
38 - name: test
39 rules:
40 - alert: node_up
41 expr: up != 0
42 for: 5s
43 labels:
44 severity: bottom of the barrel
45 annotations:
46 summary: node is fine
47 ''
48 ];
49
50 alertmanagers = [
51 {
52 static_configs = [
53 {
54 targets = [
55 "localhost:9093"
56 ];
57 }
58 ];
59 }
60 ];
61
62 alertmanager = {
63 enable = true;
64 openFirewall = true;
65 configuration.route = {
66 receiver = "test";
67 group_wait = "5s";
68 group_interval = "5s";
69 group_by = [ "..." ];
70 };
71 configuration.receivers = [
72 {
73 name = "test";
74 webhook_configs = [
75 {
76 url = "http://localhost:1234";
77 }
78 ];
79 }
80 ];
81 };
82 };
83
84 services.kthxbye = {
85 enable = true;
86 openFirewall = true;
87 extendIfExpiringIn = "30s";
88 logJSON = true;
89 maxDuration = "15m";
90 interval = "5s";
91 };
92 };
93
94 testScript = ''
95 with subtest("start the server"):
96 start_all()
97 server.wait_for_unit("prometheus.service")
98 server.wait_for_unit("alertmanager.service")
99 server.wait_for_unit("kthxbye.service")
100
101 server.sleep(2) # wait for units to settle
102 server.systemctl("restart kthxbye.service") # make sure kthxbye comes up after alertmanager
103 server.sleep(2)
104
105 with subtest("set up test silence which expires in 20s"):
106 server.succeed('amtool --alertmanager.url "http://localhost:9093" silence add alertname="node_up" -a "nixosTest" -d "20s" -c "ACK! this server is fine!!"')
107
108 with subtest("wait for 21 seconds and check if the silence is still active"):
109 server.sleep(21)
110 server.systemctl("status kthxbye.service")
111 server.succeed("amtool --alertmanager.url 'http://localhost:9093' silence | grep 'ACK'")
112 '';
113 }
114)