at master 3.8 kB view raw
1{ 2 name = "prometheus-config-reload"; 3 4 nodes = { 5 prometheus = 6 { config, pkgs, ... }: 7 { 8 environment.systemPackages = [ pkgs.jq ]; 9 10 networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ]; 11 12 services.prometheus = { 13 enable = true; 14 enableReload = true; 15 globalConfig.scrape_interval = "2s"; 16 scrapeConfigs = [ 17 { 18 job_name = "prometheus"; 19 static_configs = [ { targets = [ "prometheus:${toString config.services.prometheus.port}" ]; } ]; 20 } 21 ]; 22 }; 23 24 specialisation = { 25 "prometheus-config-change" = { 26 configuration = { 27 environment.systemPackages = [ pkgs.yq ]; 28 29 # This configuration just adds a new prometheus job 30 # to scrape the node_exporter metrics of the s3 machine. 31 services.prometheus = { 32 scrapeConfigs = [ 33 { 34 job_name = "node"; 35 static_configs = [ 36 { targets = [ "node:${toString config.services.prometheus.exporters.node.port}" ]; } 37 ]; 38 } 39 ]; 40 }; 41 }; 42 }; 43 }; 44 }; 45 }; 46 47 testScript = '' 48 prometheus.wait_for_unit("prometheus") 49 prometheus.wait_for_open_port(9090) 50 51 # Check if switching to a NixOS configuration that changes the prometheus 52 # configuration reloads (instead of restarts) prometheus before the switch 53 # finishes successfully: 54 with subtest("config change reloads prometheus"): 55 import json 56 # We check if prometheus has finished reloading by looking for the message 57 # "Completed loading of configuration file" in the journal between the start 58 # and finish of switching to the new NixOS configuration. 59 # 60 # To mark the start we record the journal cursor before starting the switch: 61 cursor_before_switching = json.loads( 62 prometheus.succeed("journalctl -n1 -o json --output-fields=__CURSOR") 63 )["__CURSOR"] 64 65 # Now we switch: 66 prometheus_config_change = prometheus.succeed( 67 "readlink /run/current-system/specialisation/prometheus-config-change" 68 ).strip() 69 prometheus.succeed(prometheus_config_change + "/bin/switch-to-configuration test") 70 71 # Next we retrieve all logs since the start of switching: 72 logs_after_starting_switching = prometheus.succeed( 73 """ 74 journalctl --after-cursor='{cursor_before_switching}' -o json --output-fields=MESSAGE 75 """.format( 76 cursor_before_switching=cursor_before_switching 77 ) 78 ) 79 80 # Finally we check if the message "Completed loading of configuration file" 81 # occurs before the "finished switching to system configuration" message: 82 finished_switching_msg = ( 83 "finished switching to system configuration " + prometheus_config_change 84 ) 85 reloaded_before_switching_finished = False 86 finished_switching = False 87 for log_line in logs_after_starting_switching.split("\n"): 88 msg = json.loads(log_line)["MESSAGE"] 89 if "Completed loading of configuration file" in msg: 90 reloaded_before_switching_finished = True 91 if msg == finished_switching_msg: 92 finished_switching = True 93 break 94 95 assert reloaded_before_switching_finished 96 assert finished_switching 97 98 # Check if the reloaded config includes the new node job: 99 prometheus.succeed( 100 """ 101 curl -sf http://127.0.0.1:9090/api/v1/status/config \ 102 | jq -r .data.yaml \ 103 | yq '.scrape_configs | any(.job_name == "node")' \ 104 | grep true 105 """ 106 ) 107 ''; 108}