at master 4.4 kB view raw
1{ pkgs, ... }: 2 3{ 4 name = "prometheus-alertmanager"; 5 6 nodes = { 7 prometheus = 8 { config, pkgs, ... }: 9 { 10 environment.systemPackages = [ pkgs.jq ]; 11 12 networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ]; 13 14 services.prometheus = { 15 enable = true; 16 globalConfig.scrape_interval = "2s"; 17 18 alertmanagers = [ 19 { 20 scheme = "http"; 21 static_configs = [ 22 { targets = [ "alertmanager:${toString config.services.prometheus.alertmanager.port}" ]; } 23 ]; 24 } 25 ]; 26 27 rules = [ 28 '' 29 groups: 30 - name: test 31 rules: 32 - alert: InstanceDown 33 expr: up == 0 34 for: 5s 35 labels: 36 severity: page 37 annotations: 38 summary: "Instance {{ $labels.instance }} down" 39 '' 40 ]; 41 42 scrapeConfigs = [ 43 { 44 job_name = "alertmanager"; 45 static_configs = [ 46 { targets = [ "alertmanager:${toString config.services.prometheus.alertmanager.port}" ]; } 47 ]; 48 } 49 { 50 job_name = "node"; 51 static_configs = [ 52 { targets = [ "node:${toString config.services.prometheus.exporters.node.port}" ]; } 53 ]; 54 } 55 ]; 56 }; 57 }; 58 59 alertmanager = 60 { config, pkgs, ... }: 61 { 62 services.prometheus.alertmanager = { 63 enable = true; 64 openFirewall = true; 65 66 configuration = { 67 global = { 68 resolve_timeout = "1m"; 69 }; 70 71 route = { 72 # Root route node 73 receiver = "test"; 74 group_by = [ "..." ]; 75 continue = false; 76 group_wait = "1s"; 77 group_interval = "15s"; 78 repeat_interval = "24h"; 79 }; 80 81 receivers = [ 82 { 83 name = "test"; 84 webhook_configs = [ 85 { 86 url = "http://logger:6725"; 87 send_resolved = true; 88 max_alerts = 0; 89 } 90 ]; 91 } 92 ]; 93 }; 94 }; 95 }; 96 97 logger = 98 { config, pkgs, ... }: 99 { 100 networking.firewall.allowedTCPPorts = [ 6725 ]; 101 102 services.prometheus.alertmanagerWebhookLogger.enable = true; 103 }; 104 }; 105 106 testScript = '' 107 alertmanager.wait_for_unit("alertmanager") 108 alertmanager.wait_for_open_port(9093) 109 alertmanager.wait_until_succeeds("curl -s http://127.0.0.1:9093/-/ready") 110 #alertmanager.wait_until_succeeds("journalctl -o cat -u alertmanager.service | grep 'version=${pkgs.prometheus-alertmanager.version}'") 111 112 logger.wait_for_unit("alertmanager-webhook-logger") 113 logger.wait_for_open_port(6725) 114 115 prometheus.wait_for_unit("prometheus") 116 prometheus.wait_for_open_port(9090) 117 118 prometheus.wait_until_succeeds( 119 "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=count(up\{job=\"alertmanager\"\}==1)' | " 120 + "jq '.data.result[0].value[1]' | grep '\"1\"'" 121 ) 122 123 prometheus.wait_until_succeeds( 124 "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=sum(alertmanager_build_info)%20by%20(version)' | " 125 + "jq '.data.result[0].metric.version' | grep '\"${pkgs.prometheus-alertmanager.version}\"'" 126 ) 127 128 prometheus.wait_until_succeeds( 129 "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=count(up\{job=\"node\"\}!=1)' | " 130 + "jq '.data.result[0].value[1]' | grep '\"1\"'" 131 ) 132 133 prometheus.wait_until_succeeds( 134 "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=alertmanager_notifications_total\{integration=\"webhook\"\}' | " 135 + "jq '.data.result[0].value[1]' | grep -v '\"0\"'" 136 ) 137 138 logger.wait_until_succeeds( 139 "journalctl -o cat -u alertmanager-webhook-logger.service | grep '\"alertname\":\"InstanceDown\"'" 140 ) 141 142 logger.log(logger.succeed("systemd-analyze security alertmanager-webhook-logger.service | grep -v ''")) 143 144 alertmanager.log(alertmanager.succeed("systemd-analyze security alertmanager.service | grep -v ''")) 145 ''; 146}