at 25.11-pre 3.1 kB view raw
1import ./make-test-python.nix ( 2 { lib, pkgs, ... }: 3 { 4 name = "kthxbye"; 5 6 meta = with lib.maintainers; { 7 maintainers = [ nukaduka ]; 8 }; 9 10 nodes.server = 11 { ... }: 12 { 13 environment.systemPackages = with pkgs; [ prometheus-alertmanager ]; 14 services.prometheus = { 15 enable = true; 16 17 globalConfig = { 18 scrape_interval = "5s"; 19 scrape_timeout = "5s"; 20 evaluation_interval = "5s"; 21 }; 22 23 scrapeConfigs = [ 24 { 25 job_name = "prometheus"; 26 scrape_interval = "5s"; 27 static_configs = [ 28 { 29 targets = [ "localhost:9090" ]; 30 } 31 ]; 32 } 33 ]; 34 35 rules = [ 36 '' 37 groups: 38 - name: test 39 rules: 40 - alert: node_up 41 expr: up != 0 42 for: 5s 43 labels: 44 severity: bottom of the barrel 45 annotations: 46 summary: node is fine 47 '' 48 ]; 49 50 alertmanagers = [ 51 { 52 static_configs = [ 53 { 54 targets = [ 55 "localhost:9093" 56 ]; 57 } 58 ]; 59 } 60 ]; 61 62 alertmanager = { 63 enable = true; 64 openFirewall = true; 65 configuration.route = { 66 receiver = "test"; 67 group_wait = "5s"; 68 group_interval = "5s"; 69 group_by = [ "..." ]; 70 }; 71 configuration.receivers = [ 72 { 73 name = "test"; 74 webhook_configs = [ 75 { 76 url = "http://localhost:1234"; 77 } 78 ]; 79 } 80 ]; 81 }; 82 }; 83 84 services.kthxbye = { 85 enable = true; 86 openFirewall = true; 87 extendIfExpiringIn = "30s"; 88 logJSON = true; 89 maxDuration = "15m"; 90 interval = "5s"; 91 }; 92 }; 93 94 testScript = '' 95 with subtest("start the server"): 96 start_all() 97 server.wait_for_unit("prometheus.service") 98 server.wait_for_unit("alertmanager.service") 99 server.wait_for_unit("kthxbye.service") 100 101 server.sleep(2) # wait for units to settle 102 server.systemctl("restart kthxbye.service") # make sure kthxbye comes up after alertmanager 103 server.sleep(2) 104 105 with subtest("set up test silence which expires in 20s"): 106 server.succeed('amtool --alertmanager.url "http://localhost:9093" silence add alertname="node_up" -a "nixosTest" -d "20s" -c "ACK! this server is fine!!"') 107 108 with subtest("wait for 21 seconds and check if the silence is still active"): 109 server.sleep(21) 110 server.systemctl("status kthxbye.service") 111 server.succeed("amtool --alertmanager.url 'http://localhost:9093' silence | grep 'ACK'") 112 ''; 113 } 114)