1{ lib, ... }:
2{
3 name = "nagios";
4 meta = with lib.maintainers; {
5 maintainers = [ symphorien ];
6 };
7
8 nodes.machine =
9 { pkgs, ... }:
10 let
11 writer = pkgs.writeShellScript "write" ''
12 set -x
13 echo "$@" >> /tmp/notifications
14 '';
15 in
16 {
17 # tested service
18 services.sshd.enable = true;
19 # nagios
20 services.nagios = {
21 enable = true;
22 # make state transitions faster
23 extraConfig.interval_length = "5";
24 objectDefs =
25 (map (x: "${pkgs.nagios}/etc/objects/${x}.cfg") [
26 "templates"
27 "timeperiods"
28 "commands"
29 ])
30 ++ [
31 (pkgs.writeText "objects.cfg" ''
32 # notifications are written to /tmp/notifications
33 define command {
34 command_name notify-host-by-file
35 command_line ${writer} "$HOSTNAME is $HOSTSTATE$"
36 }
37 define command {
38 command_name notify-service-by-file
39 command_line ${writer} "$SERVICEDESC$ is $SERVICESTATE$"
40 }
41
42 # nagios boilerplate
43 define contact {
44 contact_name alice
45 alias alice
46 host_notifications_enabled 1
47 service_notifications_enabled 1
48 service_notification_period 24x7
49 host_notification_period 24x7
50 service_notification_options w,u,c,r,f,s
51 host_notification_options d,u,r,f,s
52 service_notification_commands notify-service-by-file
53 host_notification_commands notify-host-by-file
54 email foo@example.com
55 }
56 define contactgroup {
57 contactgroup_name admins
58 alias Admins
59 members alice
60 }
61 define hostgroup{
62 hostgroup_name allhosts
63 alias All hosts
64 }
65
66 # monitored objects
67 define host {
68 use generic-host
69 host_name localhost
70 alias localhost
71 address localhost
72 hostgroups allhosts
73 contact_groups admins
74 # make state transitions faster.
75 max_check_attempts 2
76 check_interval 1
77 retry_interval 1
78 }
79 define service {
80 use generic-service
81 host_name localhost
82 service_description ssh
83 check_command check_ssh
84 # make state transitions faster.
85 max_check_attempts 2
86 check_interval 1
87 retry_interval 1
88 }
89 '')
90 ];
91 };
92 };
93
94 testScript = ''
95 with subtest("ensure sshd starts"):
96 machine.wait_for_unit("sshd.service")
97
98
99 with subtest("ensure nagios starts"):
100 machine.wait_for_file("/var/log/nagios/current")
101
102
103 def assert_notify(text):
104 machine.wait_for_file("/tmp/notifications")
105 real = machine.succeed("cat /tmp/notifications").strip()
106 print(f"got {real!r}, expected {text!r}")
107 assert text == real
108
109
110 with subtest("ensure we get a notification when sshd is down"):
111 machine.succeed("systemctl stop sshd")
112 assert_notify("ssh is CRITICAL")
113
114
115 with subtest("ensure tests can succeed"):
116 machine.succeed("systemctl start sshd")
117 machine.succeed("rm /tmp/notifications")
118 assert_notify("ssh is OK")
119 '';
120}