1{
2 name = "prometheus-config-reload";
3
4 nodes = {
5 prometheus =
6 { config, pkgs, ... }:
7 {
8 environment.systemPackages = [ pkgs.jq ];
9
10 networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ];
11
12 services.prometheus = {
13 enable = true;
14 enableReload = true;
15 globalConfig.scrape_interval = "2s";
16 scrapeConfigs = [
17 {
18 job_name = "prometheus";
19 static_configs = [ { targets = [ "prometheus:${toString config.services.prometheus.port}" ]; } ];
20 }
21 ];
22 };
23
24 specialisation = {
25 "prometheus-config-change" = {
26 configuration = {
27 environment.systemPackages = [ pkgs.yq ];
28
29 # This configuration just adds a new prometheus job
30 # to scrape the node_exporter metrics of the s3 machine.
31 services.prometheus = {
32 scrapeConfigs = [
33 {
34 job_name = "node";
35 static_configs = [
36 { targets = [ "node:${toString config.services.prometheus.exporters.node.port}" ]; }
37 ];
38 }
39 ];
40 };
41 };
42 };
43 };
44 };
45 };
46
47 testScript = ''
48 prometheus.wait_for_unit("prometheus")
49 prometheus.wait_for_open_port(9090)
50
51 # Check if switching to a NixOS configuration that changes the prometheus
52 # configuration reloads (instead of restarts) prometheus before the switch
53 # finishes successfully:
54 with subtest("config change reloads prometheus"):
55 import json
56 # We check if prometheus has finished reloading by looking for the message
57 # "Completed loading of configuration file" in the journal between the start
58 # and finish of switching to the new NixOS configuration.
59 #
60 # To mark the start we record the journal cursor before starting the switch:
61 cursor_before_switching = json.loads(
62 prometheus.succeed("journalctl -n1 -o json --output-fields=__CURSOR")
63 )["__CURSOR"]
64
65 # Now we switch:
66 prometheus_config_change = prometheus.succeed(
67 "readlink /run/current-system/specialisation/prometheus-config-change"
68 ).strip()
69 prometheus.succeed(prometheus_config_change + "/bin/switch-to-configuration test")
70
71 # Next we retrieve all logs since the start of switching:
72 logs_after_starting_switching = prometheus.succeed(
73 """
74 journalctl --after-cursor='{cursor_before_switching}' -o json --output-fields=MESSAGE
75 """.format(
76 cursor_before_switching=cursor_before_switching
77 )
78 )
79
80 # Finally we check if the message "Completed loading of configuration file"
81 # occurs before the "finished switching to system configuration" message:
82 finished_switching_msg = (
83 "finished switching to system configuration " + prometheus_config_change
84 )
85 reloaded_before_switching_finished = False
86 finished_switching = False
87 for log_line in logs_after_starting_switching.split("\n"):
88 msg = json.loads(log_line)["MESSAGE"]
89 if "Completed loading of configuration file" in msg:
90 reloaded_before_switching_finished = True
91 if msg == finished_switching_msg:
92 finished_switching = True
93 break
94
95 assert reloaded_before_switching_finished
96 assert finished_switching
97
98 # Check if the reloaded config includes the new node job:
99 prometheus.succeed(
100 """
101 curl -sf http://127.0.0.1:9090/api/v1/status/config \
102 | jq -r .data.yaml \
103 | yq '.scrape_configs | any(.job_name == "node")' \
104 | grep true
105 """
106 )
107 '';
108}