at master 4.8 kB view raw
1# Primarily reference the implementation of <nixos/tests/prometheus/alertmanager.nix> 2{ lib, pkgs, ... }: 3{ 4 name = "victoriametrics-vmalert"; 5 meta = with lib.maintainers; { 6 maintainers = [ 7 yorickvp 8 ryan4yin 9 ]; 10 }; 11 12 nodes = { 13 victoriametrics = 14 { config, pkgs, ... }: 15 { 16 environment.systemPackages = [ pkgs.jq ]; 17 networking.firewall.allowedTCPPorts = [ 8428 ]; 18 services.victoriametrics = { 19 enable = true; 20 prometheusConfig = { 21 global = { 22 scrape_interval = "2s"; 23 }; 24 scrape_configs = [ 25 { 26 job_name = "alertmanager"; 27 static_configs = [ 28 { 29 targets = [ 30 "alertmanager:${toString config.services.prometheus.alertmanager.port}" 31 ]; 32 } 33 ]; 34 } 35 { 36 job_name = "node"; 37 static_configs = [ 38 { 39 targets = [ 40 "node:${toString config.services.prometheus.exporters.node.port}" 41 ]; 42 } 43 ]; 44 } 45 ]; 46 }; 47 }; 48 49 services.vmalert.instances."" = { 50 enable = true; 51 settings = { 52 "datasource.url" = "http://localhost:8428"; # victoriametrics' api 53 "notifier.url" = [ 54 "http://alertmanager:${toString config.services.prometheus.alertmanager.port}" 55 ]; # alertmanager's api 56 rule = [ 57 (pkgs.writeText "instance-down.yml" '' 58 groups: 59 - name: test 60 rules: 61 - alert: InstanceDown 62 expr: up == 0 63 for: 5s 64 labels: 65 severity: page 66 annotations: 67 summary: "Instance {{ $labels.instance }} down" 68 '') 69 ]; 70 }; 71 }; 72 }; 73 74 alertmanager = { 75 services.prometheus.alertmanager = { 76 enable = true; 77 openFirewall = true; 78 79 configuration = { 80 global = { 81 resolve_timeout = "1m"; 82 }; 83 84 route = { 85 # Root route node 86 receiver = "test"; 87 group_by = [ "..." ]; 88 continue = false; 89 group_wait = "1s"; 90 group_interval = "15s"; 91 repeat_interval = "24h"; 92 }; 93 94 receivers = [ 95 { 96 name = "test"; 97 webhook_configs = [ 98 { 99 url = "http://logger:6725"; 100 send_resolved = true; 101 max_alerts = 0; 102 } 103 ]; 104 } 105 ]; 106 }; 107 }; 108 }; 109 110 logger = { 111 networking.firewall.allowedTCPPorts = [ 6725 ]; 112 113 services.prometheus.alertmanagerWebhookLogger.enable = true; 114 }; 115 }; 116 117 testScript = '' 118 alertmanager.wait_for_unit("alertmanager") 119 alertmanager.wait_for_open_port(9093) 120 alertmanager.wait_until_succeeds("curl -s http://127.0.0.1:9093/-/ready") 121 122 logger.wait_for_unit("alertmanager-webhook-logger") 123 logger.wait_for_open_port(6725) 124 125 victoriametrics.wait_for_unit("victoriametrics") 126 victoriametrics.wait_for_unit("vmalert") 127 victoriametrics.wait_for_open_port(8428) 128 129 victoriametrics.wait_until_succeeds( 130 "curl -sf 'http://127.0.0.1:8428/api/v1/query?query=count(up\{job=\"alertmanager\"\}==1)' | " 131 + "jq '.data.result[0].value[1]' | grep '\"1\"'" 132 ) 133 134 victoriametrics.wait_until_succeeds( 135 "curl -sf 'http://127.0.0.1:8428/api/v1/query?query=sum(alertmanager_build_info)%20by%20(version)' | " 136 + "jq '.data.result[0].metric.version' | grep '\"${pkgs.prometheus-alertmanager.version}\"'" 137 ) 138 139 victoriametrics.wait_until_succeeds( 140 "curl -sf 'http://127.0.0.1:8428/api/v1/query?query=count(up\{job=\"node\"\}!=1)' | " 141 + "jq '.data.result[0].value[1]' | grep '\"1\"'" 142 ) 143 144 victoriametrics.wait_until_succeeds( 145 "curl -sf 'http://127.0.0.1:8428/api/v1/query?query=alertmanager_notifications_total\{integration=\"webhook\"\}' | " 146 + "jq '.data.result[0].value[1]' | grep -v '\"0\"'" 147 ) 148 149 logger.wait_until_succeeds( 150 "journalctl -o cat -u alertmanager-webhook-logger.service | grep '\"alertname\":\"InstanceDown\"'" 151 ) 152 153 logger.log(logger.succeed("systemd-analyze security alertmanager-webhook-logger.service | grep -v ''")) 154 155 alertmanager.log(alertmanager.succeed("systemd-analyze security alertmanager.service | grep -v ''")) 156 ''; 157}