at 23.05-pre 2.8 kB view raw
1import ./make-test-python.nix ({ lib, pkgs, ... }: 2{ 3 name = "kthxbye"; 4 5 meta = with lib.maintainers; { 6 maintainers = [ nukaduka ]; 7 }; 8 9 nodes.server = { ... }: { 10 environment.systemPackages = with pkgs; [ prometheus-alertmanager ]; 11 services.prometheus = { 12 enable = true; 13 14 globalConfig = { 15 scrape_interval = "5s"; 16 scrape_timeout = "5s"; 17 evaluation_interval = "5s"; 18 }; 19 20 scrapeConfigs = [ 21 { 22 job_name = "prometheus"; 23 scrape_interval = "5s"; 24 static_configs = [ 25 { 26 targets = [ "localhost:9090" ]; 27 } 28 ]; 29 } 30 ]; 31 32 rules = [ 33 '' 34 groups: 35 - name: test 36 rules: 37 - alert: node_up 38 expr: up != 0 39 for: 5s 40 labels: 41 severity: bottom of the barrel 42 annotations: 43 summary: node is fine 44 '' 45 ]; 46 47 alertmanagers = [ 48 { 49 static_configs = [ 50 { 51 targets = [ 52 "localhost:9093" 53 ]; 54 } 55 ]; 56 } 57 ]; 58 59 alertmanager = { 60 enable = true; 61 openFirewall = true; 62 configuration.route = { 63 receiver = "test"; 64 group_wait = "5s"; 65 group_interval = "5s"; 66 group_by = [ "..." ]; 67 }; 68 configuration.receivers = [ 69 { 70 name = "test"; 71 webhook_configs = [ 72 { 73 url = "http://localhost:1234"; 74 } 75 ]; 76 } 77 ]; 78 }; 79 }; 80 81 services.kthxbye = { 82 enable = true; 83 openFirewall = true; 84 extendIfExpiringIn = "30s"; 85 logJSON = true; 86 maxDuration = "15m"; 87 interval = "5s"; 88 }; 89 }; 90 91 testScript = '' 92 with subtest("start the server"): 93 start_all() 94 server.wait_for_unit("prometheus.service") 95 server.wait_for_unit("alertmanager.service") 96 server.wait_for_unit("kthxbye.service") 97 98 server.sleep(2) # wait for units to settle 99 server.systemctl("restart kthxbye.service") # make sure kthxbye comes up after alertmanager 100 server.sleep(2) 101 102 with subtest("set up test silence which expires in 20s"): 103 server.succeed('amtool --alertmanager.url "http://localhost:9093" silence add alertname="node_up" -a "nixosTest" -d "20s" -c "ACK! this server is fine!!"') 104 105 with subtest("wait for 21 seconds and check if the silence is still active"): 106 server.sleep(21) 107 server.systemctl("status kthxbye.service") 108 server.succeed("amtool --alertmanager.url 'http://localhost:9093' silence | grep 'ACK'") 109 ''; 110})