1{
2 config,
3 lib,
4 pkgs,
5 ...
6}:
7let
8
9 cfg = config.hardware.rasdaemon;
10
11in
12{
13 options.hardware.rasdaemon = {
14
15 enable = lib.mkEnableOption "RAS logging daemon";
16
17 package = lib.mkPackageOption pkgs "rasdaemon" { };
18
19 record = lib.mkOption {
20 type = lib.types.bool;
21 default = true;
22 description = "record events via sqlite3, required for ras-mc-ctl";
23 };
24
25 mainboard = lib.mkOption {
26 type = lib.types.lines;
27 default = "";
28 description = "Custom mainboard description, see {manpage}`ras-mc-ctl(8)` for more details.";
29 example = ''
30 vendor = ASRock
31 model = B450M Pro4
32
33 # it should default to such values from
34 # /sys/class/dmi/id/board_[vendor|name]
35 # alternatively one can supply a script
36 # that returns the same format as above
37
38 script = <path to script>
39 '';
40 };
41
42 # TODO, accept `rasdaemon.labels = " ";` or `rasdaemon.labels = { dell = " "; asrock = " "; };'
43
44 labels = lib.mkOption {
45 type = lib.types.lines;
46 default = "";
47 description = "Additional memory module label descriptions to be placed in /etc/ras/dimm_labels.d/labels";
48 example = ''
49 # vendor and model may be shown by 'ras-mc-ctl --mainboard'
50 vendor: ASRock
51 product: To Be Filled By O.E.M.
52 model: B450M Pro4
53 # these labels are names for the motherboard slots
54 # the numbers may be shown by `ras-mc-ctl --error-count`
55 # they are mc:csrow:channel
56 DDR4_A1: 0.2.0; DDR4_B1: 0.2.1;
57 DDR4_A2: 0.3.0; DDR4_B2: 0.3.1;
58 '';
59 };
60
61 config = lib.mkOption {
62 type = lib.types.lines;
63 default = "";
64 description = ''
65 rasdaemon configuration, currently only used for CE PFA
66 for details, read rasdaemon.outPath/etc/sysconfig/rasdaemon's comments
67 '';
68 example = ''
69 # defaults from included config
70 PAGE_CE_REFRESH_CYCLE="24h"
71 PAGE_CE_THRESHOLD="50"
72 PAGE_CE_ACTION="soft"
73 '';
74 };
75
76 extraModules = lib.mkOption {
77 type = lib.types.listOf lib.types.str;
78 default = [ ];
79 description = "extra kernel modules to load";
80 example = [ "i7core_edac" ];
81 };
82
83 testing = lib.mkEnableOption "error injection infrastructure";
84 };
85
86 config = lib.mkIf cfg.enable {
87
88 environment.etc = {
89 "ras/mainboard" = {
90 enable = cfg.mainboard != "";
91 text = cfg.mainboard;
92 };
93 # TODO, handle multiple cfg.labels.brand = " ";
94 "ras/dimm_labels.d/labels" = {
95 enable = cfg.labels != "";
96 text = cfg.labels;
97 };
98 "sysconfig/rasdaemon" = {
99 enable = cfg.config != "";
100 text = cfg.config;
101 };
102 };
103 environment.systemPackages =
104 [ cfg.package ]
105 ++ lib.optionals (cfg.testing) (
106 with pkgs.error-inject;
107 [
108 edac-inject
109 mce-inject
110 aer-inject
111 ]
112 );
113
114 boot.initrd.kernelModules =
115 cfg.extraModules
116 ++ lib.optionals (cfg.testing) [
117 # edac_core and amd64_edac should get loaded automatically
118 # i7core_edac may not be, and may not be required, but should load successfully
119 "edac_core"
120 "amd64_edac"
121 "i7core_edac"
122 "mce-inject"
123 "aer-inject"
124 ];
125
126 boot.kernelPatches = lib.optionals (cfg.testing) [
127 {
128 name = "rasdaemon-tests";
129 patch = null;
130 extraConfig = ''
131 EDAC_DEBUG y
132 X86_MCE_INJECT y
133
134 PCIEPORTBUS y
135 PCIEAER y
136 PCIEAER_INJECT y
137 '';
138 }
139 ];
140
141 # i tried to set up a group for this
142 # but rasdaemon needs higher permissions?
143 # `rasdaemon: Can't locate a mounted debugfs`
144
145 # most of this taken from src/misc/
146 systemd.services = {
147 rasdaemon = {
148 description = "the RAS logging daemon";
149 documentation = [ "man:rasdaemon(1)" ];
150 wantedBy = [ "multi-user.target" ];
151
152 serviceConfig = {
153 StateDirectory = lib.optionalString (cfg.record) "rasdaemon";
154
155 ExecStart =
156 "${cfg.package}/bin/rasdaemon --foreground" + lib.optionalString (cfg.record) " --record";
157 ExecStop = "${cfg.package}/bin/rasdaemon --disable";
158 Restart = "on-abort";
159
160 # src/misc/rasdaemon.service.in shows this:
161 # ExecStartPost = ${cfg.package}/bin/rasdaemon --enable
162 # but that results in unpredictable existence of the database
163 # and everything seems to be enabled without this...
164 };
165 };
166 ras-mc-ctl = lib.mkIf (cfg.labels != "") {
167 description = "register DIMM labels on startup";
168 documentation = [ "man:ras-mc-ctl(8)" ];
169 wantedBy = [ "multi-user.target" ];
170 serviceConfig = {
171 Type = "oneshot";
172 ExecStart = "${cfg.package}/bin/ras-mc-ctl --register-labels";
173 RemainAfterExit = true;
174 };
175 };
176 };
177 };
178
179 meta.maintainers = [ lib.maintainers.evils ];
180
181}