1{ pkgs, ... }:
2
3{
4 name = "prometheus-alertmanager";
5
6 nodes = {
7 prometheus =
8 { config, pkgs, ... }:
9 {
10 environment.systemPackages = [ pkgs.jq ];
11
12 networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ];
13
14 services.prometheus = {
15 enable = true;
16 globalConfig.scrape_interval = "2s";
17
18 alertmanagers = [
19 {
20 scheme = "http";
21 static_configs = [
22 { targets = [ "alertmanager:${toString config.services.prometheus.alertmanager.port}" ]; }
23 ];
24 }
25 ];
26
27 rules = [
28 ''
29 groups:
30 - name: test
31 rules:
32 - alert: InstanceDown
33 expr: up == 0
34 for: 5s
35 labels:
36 severity: page
37 annotations:
38 summary: "Instance {{ $labels.instance }} down"
39 ''
40 ];
41
42 scrapeConfigs = [
43 {
44 job_name = "alertmanager";
45 static_configs = [
46 { targets = [ "alertmanager:${toString config.services.prometheus.alertmanager.port}" ]; }
47 ];
48 }
49 {
50 job_name = "node";
51 static_configs = [
52 { targets = [ "node:${toString config.services.prometheus.exporters.node.port}" ]; }
53 ];
54 }
55 ];
56 };
57 };
58
59 alertmanager =
60 { config, pkgs, ... }:
61 {
62 services.prometheus.alertmanager = {
63 enable = true;
64 openFirewall = true;
65
66 configuration = {
67 global = {
68 resolve_timeout = "1m";
69 };
70
71 route = {
72 # Root route node
73 receiver = "test";
74 group_by = [ "..." ];
75 continue = false;
76 group_wait = "1s";
77 group_interval = "15s";
78 repeat_interval = "24h";
79 };
80
81 receivers = [
82 {
83 name = "test";
84 webhook_configs = [
85 {
86 url = "http://logger:6725";
87 send_resolved = true;
88 max_alerts = 0;
89 }
90 ];
91 }
92 ];
93 };
94 };
95 };
96
97 logger =
98 { config, pkgs, ... }:
99 {
100 networking.firewall.allowedTCPPorts = [ 6725 ];
101
102 services.prometheus.alertmanagerWebhookLogger.enable = true;
103 };
104 };
105
106 testScript = ''
107 alertmanager.wait_for_unit("alertmanager")
108 alertmanager.wait_for_open_port(9093)
109 alertmanager.wait_until_succeeds("curl -s http://127.0.0.1:9093/-/ready")
110 #alertmanager.wait_until_succeeds("journalctl -o cat -u alertmanager.service | grep 'version=${pkgs.prometheus-alertmanager.version}'")
111
112 logger.wait_for_unit("alertmanager-webhook-logger")
113 logger.wait_for_open_port(6725)
114
115 prometheus.wait_for_unit("prometheus")
116 prometheus.wait_for_open_port(9090)
117
118 prometheus.wait_until_succeeds(
119 "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=count(up\{job=\"alertmanager\"\}==1)' | "
120 + "jq '.data.result[0].value[1]' | grep '\"1\"'"
121 )
122
123 prometheus.wait_until_succeeds(
124 "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=sum(alertmanager_build_info)%20by%20(version)' | "
125 + "jq '.data.result[0].metric.version' | grep '\"${pkgs.prometheus-alertmanager.version}\"'"
126 )
127
128 prometheus.wait_until_succeeds(
129 "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=count(up\{job=\"node\"\}!=1)' | "
130 + "jq '.data.result[0].value[1]' | grep '\"1\"'"
131 )
132
133 prometheus.wait_until_succeeds(
134 "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=alertmanager_notifications_total\{integration=\"webhook\"\}' | "
135 + "jq '.data.result[0].value[1]' | grep -v '\"0\"'"
136 )
137
138 logger.wait_until_succeeds(
139 "journalctl -o cat -u alertmanager-webhook-logger.service | grep '\"alertname\":\"InstanceDown\"'"
140 )
141
142 logger.log(logger.succeed("systemd-analyze security alertmanager-webhook-logger.service | grep -v '✓'"))
143
144 alertmanager.log(alertmanager.succeed("systemd-analyze security alertmanager.service | grep -v '✓'"))
145 '';
146}