1import ./make-test-python.nix ({ lib, pkgs, ... }:
2{
3 name = "kthxbye";
4
5 meta = with lib.maintainers; {
6 maintainers = [ nukaduka ];
7 };
8
9 nodes.server = { ... }: {
10 environment.systemPackages = with pkgs; [ prometheus-alertmanager ];
11 services.prometheus = {
12 enable = true;
13
14 globalConfig = {
15 scrape_interval = "5s";
16 scrape_timeout = "5s";
17 evaluation_interval = "5s";
18 };
19
20 scrapeConfigs = [
21 {
22 job_name = "prometheus";
23 scrape_interval = "5s";
24 static_configs = [
25 {
26 targets = [ "localhost:9090" ];
27 }
28 ];
29 }
30 ];
31
32 rules = [
33 ''
34 groups:
35 - name: test
36 rules:
37 - alert: node_up
38 expr: up != 0
39 for: 5s
40 labels:
41 severity: bottom of the barrel
42 annotations:
43 summary: node is fine
44 ''
45 ];
46
47 alertmanagers = [
48 {
49 static_configs = [
50 {
51 targets = [
52 "localhost:9093"
53 ];
54 }
55 ];
56 }
57 ];
58
59 alertmanager = {
60 enable = true;
61 openFirewall = true;
62 configuration.route = {
63 receiver = "test";
64 group_wait = "5s";
65 group_interval = "5s";
66 group_by = [ "..." ];
67 };
68 configuration.receivers = [
69 {
70 name = "test";
71 webhook_configs = [
72 {
73 url = "http://localhost:1234";
74 }
75 ];
76 }
77 ];
78 };
79 };
80
81 services.kthxbye = {
82 enable = true;
83 openFirewall = true;
84 extendIfExpiringIn = "30s";
85 logJSON = true;
86 maxDuration = "15m";
87 interval = "5s";
88 };
89 };
90
91 testScript = ''
92 with subtest("start the server"):
93 start_all()
94 server.wait_for_unit("prometheus.service")
95 server.wait_for_unit("alertmanager.service")
96 server.wait_for_unit("kthxbye.service")
97
98 server.sleep(2) # wait for units to settle
99 server.systemctl("restart kthxbye.service") # make sure kthxbye comes up after alertmanager
100 server.sleep(2)
101
102 with subtest("set up test silence which expires in 20s"):
103 server.succeed('amtool --alertmanager.url "http://localhost:9093" silence add alertname="node_up" -a "nixosTest" -d "20s" -c "ACK! this server is fine!!"')
104
105 with subtest("wait for 21 seconds and check if the silence is still active"):
106 server.sleep(21)
107 server.systemctl("status kthxbye.service")
108 server.succeed("amtool --alertmanager.url 'http://localhost:9093' silence | grep 'ACK'")
109 '';
110})