1{ pkgs, ... }:
2
3{
4 name = "prometheus-pair";
5
6 nodes = {
7 prometheus1 =
8 { config, pkgs, ... }:
9 {
10 environment.systemPackages = [ pkgs.jq ];
11
12 networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ];
13
14 services.prometheus = {
15 enable = true;
16 globalConfig.scrape_interval = "2s";
17 extraFlags = [
18 "--storage.tsdb.min-block-duration=15s"
19 ];
20 scrapeConfigs = [
21 {
22 job_name = "prometheus";
23 static_configs = [
24 {
25 targets = [
26 "prometheus1:${toString config.services.prometheus.port}"
27 "prometheus2:${toString config.services.prometheus.port}"
28 ];
29 }
30 ];
31 }
32 ];
33 };
34 };
35
36 prometheus2 =
37 { config, pkgs, ... }:
38 {
39 environment.systemPackages = [ pkgs.jq ];
40
41 networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ];
42
43 services.prometheus = {
44 enable = true;
45 globalConfig.scrape_interval = "2s";
46 extraFlags = [
47 "--storage.tsdb.min-block-duration=15s"
48 ];
49 scrapeConfigs = [
50 {
51 job_name = "prometheus";
52 static_configs = [
53 {
54 targets = [
55 "prometheus1:${toString config.services.prometheus.port}"
56 "prometheus2:${toString config.services.prometheus.port}"
57 ];
58 }
59 ];
60 }
61 ];
62 };
63 };
64 };
65
66 testScript = ''
67 for machine in prometheus1, prometheus2:
68 machine.wait_for_unit("prometheus")
69 machine.wait_for_open_port(9090)
70 machine.wait_until_succeeds("journalctl -o cat -u prometheus.service | grep 'version=${pkgs.prometheus.version}'")
71 machine.wait_until_succeeds("curl -sSf http://localhost:9090/-/healthy")
72
73 # Prometheii ready - run some queries
74 for machine in prometheus1, prometheus2:
75 machine.wait_until_succeeds(
76 "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=prometheus_build_info\{instance=\"prometheus1:9090\",version=\"${pkgs.prometheus.version}\"\}' | "
77 + "jq '.data.result[0].value[1]' | grep '\"1\"'"
78 )
79
80 machine.wait_until_succeeds(
81 "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=prometheus_build_info\{instance=\"prometheus1:9090\"\}' | "
82 + "jq '.data.result[0].value[1]' | grep '\"1\"'"
83 )
84
85 machine.wait_until_succeeds(
86 "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=sum(prometheus_build_info)%20by%20(version)' | "
87 + "jq '.data.result[0].metric.version' | grep '\"${pkgs.prometheus.version}\"'"
88 )
89
90 machine.wait_until_succeeds(
91 "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=sum(prometheus_build_info)%20by%20(version)' | "
92 + "jq '.data.result[0].value[1]' | grep '\"2\"'"
93 )
94
95 machine.wait_until_succeeds(
96 "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=prometheus_tsdb_head_series_created_total\{instance=\"prometheus1:9090\"\}' | "
97 + "jq '.data.result[0].value[1]' | grep -v '\"0\"'"
98 )
99
100 with subtest("Compaction verification"):
101 for machine in prometheus1, prometheus2:
102 machine.wait_until_succeeds("journalctl -o cat -u prometheus.service | grep -E '(log=ERROR|write block)'")
103
104 machine.wait_until_succeeds("journalctl -o cat -u prometheus.service | grep 'Head GC completed'")
105
106 machine.wait_until_succeeds("journalctl -o cat -u prometheus.service | grep 'Creating checkpoint'")
107
108 machine.wait_until_succeeds("journalctl -o cat -u prometheus.service | grep 'WAL checkpoint complete'")
109
110 machine.wait_until_succeeds("journalctl -o cat -u prometheus.service | grep 'compact blocks'")
111
112 machine.wait_until_succeeds("journalctl -o cat -u prometheus.service | grep 'Deleting obsolete block'")
113
114 machine.wait_until_succeeds(
115 "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=prometheus_tsdb_compactions_total\{instance=\"prometheus1:9090\"\}' | "
116 + "jq '.data.result[0].value[1]' | grep -v '\"0\"'"
117 )
118
119 machine.wait_until_succeeds(
120 "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=prometheus_tsdb_compactions_failed_total\{instance=\"prometheus1:9090\"\}' | "
121 + "jq '.data.result[0].value[1]' | grep '\"0\"'"
122 )
123
124 for machine in prometheus1, prometheus2:
125 machine.fail("journalctl -o cat -u prometheus.service | grep 'level=ERROR'")
126
127 prometheus1.log(prometheus1.succeed("systemd-analyze security prometheus.service | grep -v '✓'"))
128 '';
129}