1# Primarily reference the implementation of <nixos/tests/prometheus/alertmanager.nix>
2{ lib, pkgs, ... }:
3{
4 name = "victoriametrics-vmalert";
5 meta = with lib.maintainers; {
6 maintainers = [
7 yorickvp
8 ryan4yin
9 ];
10 };
11
12 nodes = {
13 victoriametrics =
14 { config, pkgs, ... }:
15 {
16 environment.systemPackages = [ pkgs.jq ];
17 networking.firewall.allowedTCPPorts = [ 8428 ];
18 services.victoriametrics = {
19 enable = true;
20 prometheusConfig = {
21 global = {
22 scrape_interval = "2s";
23 };
24 scrape_configs = [
25 {
26 job_name = "alertmanager";
27 static_configs = [
28 {
29 targets = [
30 "alertmanager:${toString config.services.prometheus.alertmanager.port}"
31 ];
32 }
33 ];
34 }
35 {
36 job_name = "node";
37 static_configs = [
38 {
39 targets = [
40 "node:${toString config.services.prometheus.exporters.node.port}"
41 ];
42 }
43 ];
44 }
45 ];
46 };
47 };
48
49 services.vmalert.instances."" = {
50 enable = true;
51 settings = {
52 "datasource.url" = "http://localhost:8428"; # victoriametrics' api
53 "notifier.url" = [
54 "http://alertmanager:${toString config.services.prometheus.alertmanager.port}"
55 ]; # alertmanager's api
56 rule = [
57 (pkgs.writeText "instance-down.yml" ''
58 groups:
59 - name: test
60 rules:
61 - alert: InstanceDown
62 expr: up == 0
63 for: 5s
64 labels:
65 severity: page
66 annotations:
67 summary: "Instance {{ $labels.instance }} down"
68 '')
69 ];
70 };
71 };
72 };
73
74 alertmanager = {
75 services.prometheus.alertmanager = {
76 enable = true;
77 openFirewall = true;
78
79 configuration = {
80 global = {
81 resolve_timeout = "1m";
82 };
83
84 route = {
85 # Root route node
86 receiver = "test";
87 group_by = [ "..." ];
88 continue = false;
89 group_wait = "1s";
90 group_interval = "15s";
91 repeat_interval = "24h";
92 };
93
94 receivers = [
95 {
96 name = "test";
97 webhook_configs = [
98 {
99 url = "http://logger:6725";
100 send_resolved = true;
101 max_alerts = 0;
102 }
103 ];
104 }
105 ];
106 };
107 };
108 };
109
110 logger = {
111 networking.firewall.allowedTCPPorts = [ 6725 ];
112
113 services.prometheus.alertmanagerWebhookLogger.enable = true;
114 };
115 };
116
117 testScript = ''
118 alertmanager.wait_for_unit("alertmanager")
119 alertmanager.wait_for_open_port(9093)
120 alertmanager.wait_until_succeeds("curl -s http://127.0.0.1:9093/-/ready")
121
122 logger.wait_for_unit("alertmanager-webhook-logger")
123 logger.wait_for_open_port(6725)
124
125 victoriametrics.wait_for_unit("victoriametrics")
126 victoriametrics.wait_for_unit("vmalert")
127 victoriametrics.wait_for_open_port(8428)
128
129 victoriametrics.wait_until_succeeds(
130 "curl -sf 'http://127.0.0.1:8428/api/v1/query?query=count(up\{job=\"alertmanager\"\}==1)' | "
131 + "jq '.data.result[0].value[1]' | grep '\"1\"'"
132 )
133
134 victoriametrics.wait_until_succeeds(
135 "curl -sf 'http://127.0.0.1:8428/api/v1/query?query=sum(alertmanager_build_info)%20by%20(version)' | "
136 + "jq '.data.result[0].metric.version' | grep '\"${pkgs.prometheus-alertmanager.version}\"'"
137 )
138
139 victoriametrics.wait_until_succeeds(
140 "curl -sf 'http://127.0.0.1:8428/api/v1/query?query=count(up\{job=\"node\"\}!=1)' | "
141 + "jq '.data.result[0].value[1]' | grep '\"1\"'"
142 )
143
144 victoriametrics.wait_until_succeeds(
145 "curl -sf 'http://127.0.0.1:8428/api/v1/query?query=alertmanager_notifications_total\{integration=\"webhook\"\}' | "
146 + "jq '.data.result[0].value[1]' | grep -v '\"0\"'"
147 )
148
149 logger.wait_until_succeeds(
150 "journalctl -o cat -u alertmanager-webhook-logger.service | grep '\"alertname\":\"InstanceDown\"'"
151 )
152
153 logger.log(logger.succeed("systemd-analyze security alertmanager-webhook-logger.service | grep -v '✓'"))
154
155 alertmanager.log(alertmanager.succeed("systemd-analyze security alertmanager.service | grep -v '✓'"))
156 '';
157}