1{ pkgs, ... }: 2 3{ 4 name = "prometheus-pair"; 5 6 nodes = { 7 prometheus1 = 8 { config, pkgs, ... }: 9 { 10 environment.systemPackages = [ pkgs.jq ]; 11 12 networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ]; 13 14 services.prometheus = { 15 enable = true; 16 globalConfig.scrape_interval = "2s"; 17 extraFlags = [ 18 "--storage.tsdb.min-block-duration=15s" 19 ]; 20 scrapeConfigs = [ 21 { 22 job_name = "prometheus"; 23 static_configs = [ 24 { 25 targets = [ 26 "prometheus1:${toString config.services.prometheus.port}" 27 "prometheus2:${toString config.services.prometheus.port}" 28 ]; 29 } 30 ]; 31 } 32 ]; 33 }; 34 }; 35 36 prometheus2 = 37 { config, pkgs, ... }: 38 { 39 environment.systemPackages = [ pkgs.jq ]; 40 41 networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ]; 42 43 services.prometheus = { 44 enable = true; 45 globalConfig.scrape_interval = "2s"; 46 extraFlags = [ 47 "--storage.tsdb.min-block-duration=15s" 48 ]; 49 scrapeConfigs = [ 50 { 51 job_name = "prometheus"; 52 static_configs = [ 53 { 54 targets = [ 55 "prometheus1:${toString config.services.prometheus.port}" 56 "prometheus2:${toString config.services.prometheus.port}" 57 ]; 58 } 59 ]; 60 } 61 ]; 62 }; 63 }; 64 }; 65 66 testScript = '' 67 for machine in prometheus1, prometheus2: 68 machine.wait_for_unit("prometheus") 69 machine.wait_for_open_port(9090) 70 machine.wait_until_succeeds("journalctl -o cat -u prometheus.service | grep 'version=${pkgs.prometheus.version}'") 71 machine.wait_until_succeeds("curl -sSf http://localhost:9090/-/healthy") 72 73 # Prometheii ready - run some queries 74 for machine in prometheus1, prometheus2: 75 machine.wait_until_succeeds( 76 "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=prometheus_build_info\{instance=\"prometheus1:9090\",version=\"${pkgs.prometheus.version}\"\}' | " 77 + "jq '.data.result[0].value[1]' | grep '\"1\"'" 78 ) 79 80 machine.wait_until_succeeds( 81 "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=prometheus_build_info\{instance=\"prometheus1:9090\"\}' | " 82 + "jq '.data.result[0].value[1]' | grep '\"1\"'" 83 ) 84 85 machine.wait_until_succeeds( 86 "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=sum(prometheus_build_info)%20by%20(version)' | " 87 + "jq '.data.result[0].metric.version' | grep '\"${pkgs.prometheus.version}\"'" 88 ) 89 90 machine.wait_until_succeeds( 91 "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=sum(prometheus_build_info)%20by%20(version)' | " 92 + "jq '.data.result[0].value[1]' | grep '\"2\"'" 93 ) 94 95 machine.wait_until_succeeds( 96 "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=prometheus_tsdb_head_series_created_total\{instance=\"prometheus1:9090\"\}' | " 97 + "jq '.data.result[0].value[1]' | grep -v '\"0\"'" 98 ) 99 100 with subtest("Compaction verification"): 101 for machine in prometheus1, prometheus2: 102 machine.wait_until_succeeds("journalctl -o cat -u prometheus.service | grep -E '(log=ERROR|write block)'") 103 104 machine.wait_until_succeeds("journalctl -o cat -u prometheus.service | grep 'Head GC completed'") 105 106 machine.wait_until_succeeds("journalctl -o cat -u prometheus.service | grep 'Creating checkpoint'") 107 108 machine.wait_until_succeeds("journalctl -o cat -u prometheus.service | grep 'WAL checkpoint complete'") 109 110 machine.wait_until_succeeds("journalctl -o cat -u prometheus.service | grep 'compact blocks'") 111 112 machine.wait_until_succeeds("journalctl -o cat -u prometheus.service | grep 'Deleting obsolete block'") 113 114 machine.wait_until_succeeds( 115 "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=prometheus_tsdb_compactions_total\{instance=\"prometheus1:9090\"\}' | " 116 + "jq '.data.result[0].value[1]' | grep -v '\"0\"'" 117 ) 118 119 machine.wait_until_succeeds( 120 "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=prometheus_tsdb_compactions_failed_total\{instance=\"prometheus1:9090\"\}' | " 121 + "jq '.data.result[0].value[1]' | grep '\"0\"'" 122 ) 123 124 for machine in prometheus1, prometheus2: 125 machine.fail("journalctl -o cat -u prometheus.service | grep 'level=ERROR'") 126 127 prometheus1.log(prometheus1.succeed("systemd-analyze security prometheus.service | grep -v ''")) 128 ''; 129}