at 25.11-pre 5.0 kB view raw
1{ 2 config, 3 lib, 4 pkgs, 5 ... 6}: 7let 8 9 cfg = config.hardware.rasdaemon; 10 11in 12{ 13 options.hardware.rasdaemon = { 14 15 enable = lib.mkEnableOption "RAS logging daemon"; 16 17 package = lib.mkPackageOption pkgs "rasdaemon" { }; 18 19 record = lib.mkOption { 20 type = lib.types.bool; 21 default = true; 22 description = "record events via sqlite3, required for ras-mc-ctl"; 23 }; 24 25 mainboard = lib.mkOption { 26 type = lib.types.lines; 27 default = ""; 28 description = "Custom mainboard description, see {manpage}`ras-mc-ctl(8)` for more details."; 29 example = '' 30 vendor = ASRock 31 model = B450M Pro4 32 33 # it should default to such values from 34 # /sys/class/dmi/id/board_[vendor|name] 35 # alternatively one can supply a script 36 # that returns the same format as above 37 38 script = <path to script> 39 ''; 40 }; 41 42 # TODO, accept `rasdaemon.labels = " ";` or `rasdaemon.labels = { dell = " "; asrock = " "; };' 43 44 labels = lib.mkOption { 45 type = lib.types.lines; 46 default = ""; 47 description = "Additional memory module label descriptions to be placed in /etc/ras/dimm_labels.d/labels"; 48 example = '' 49 # vendor and model may be shown by 'ras-mc-ctl --mainboard' 50 vendor: ASRock 51 product: To Be Filled By O.E.M. 52 model: B450M Pro4 53 # these labels are names for the motherboard slots 54 # the numbers may be shown by `ras-mc-ctl --error-count` 55 # they are mc:csrow:channel 56 DDR4_A1: 0.2.0; DDR4_B1: 0.2.1; 57 DDR4_A2: 0.3.0; DDR4_B2: 0.3.1; 58 ''; 59 }; 60 61 config = lib.mkOption { 62 type = lib.types.lines; 63 default = ""; 64 description = '' 65 rasdaemon configuration, currently only used for CE PFA 66 for details, read rasdaemon.outPath/etc/sysconfig/rasdaemon's comments 67 ''; 68 example = '' 69 # defaults from included config 70 PAGE_CE_REFRESH_CYCLE="24h" 71 PAGE_CE_THRESHOLD="50" 72 PAGE_CE_ACTION="soft" 73 ''; 74 }; 75 76 extraModules = lib.mkOption { 77 type = lib.types.listOf lib.types.str; 78 default = [ ]; 79 description = "extra kernel modules to load"; 80 example = [ "i7core_edac" ]; 81 }; 82 83 testing = lib.mkEnableOption "error injection infrastructure"; 84 }; 85 86 config = lib.mkIf cfg.enable { 87 88 environment.etc = { 89 "ras/mainboard" = { 90 enable = cfg.mainboard != ""; 91 text = cfg.mainboard; 92 }; 93 # TODO, handle multiple cfg.labels.brand = " "; 94 "ras/dimm_labels.d/labels" = { 95 enable = cfg.labels != ""; 96 text = cfg.labels; 97 }; 98 "sysconfig/rasdaemon" = { 99 enable = cfg.config != ""; 100 text = cfg.config; 101 }; 102 }; 103 environment.systemPackages = 104 [ cfg.package ] 105 ++ lib.optionals (cfg.testing) ( 106 with pkgs.error-inject; 107 [ 108 edac-inject 109 mce-inject 110 aer-inject 111 ] 112 ); 113 114 boot.initrd.kernelModules = 115 cfg.extraModules 116 ++ lib.optionals (cfg.testing) [ 117 # edac_core and amd64_edac should get loaded automatically 118 # i7core_edac may not be, and may not be required, but should load successfully 119 "edac_core" 120 "amd64_edac" 121 "i7core_edac" 122 "mce-inject" 123 "aer-inject" 124 ]; 125 126 boot.kernelPatches = lib.optionals (cfg.testing) [ 127 { 128 name = "rasdaemon-tests"; 129 patch = null; 130 extraConfig = '' 131 EDAC_DEBUG y 132 X86_MCE_INJECT y 133 134 PCIEPORTBUS y 135 PCIEAER y 136 PCIEAER_INJECT y 137 ''; 138 } 139 ]; 140 141 # i tried to set up a group for this 142 # but rasdaemon needs higher permissions? 143 # `rasdaemon: Can't locate a mounted debugfs` 144 145 # most of this taken from src/misc/ 146 systemd.services = { 147 rasdaemon = { 148 description = "the RAS logging daemon"; 149 documentation = [ "man:rasdaemon(1)" ]; 150 wantedBy = [ "multi-user.target" ]; 151 152 serviceConfig = { 153 StateDirectory = lib.optionalString (cfg.record) "rasdaemon"; 154 155 ExecStart = 156 "${cfg.package}/bin/rasdaemon --foreground" + lib.optionalString (cfg.record) " --record"; 157 ExecStop = "${cfg.package}/bin/rasdaemon --disable"; 158 Restart = "on-abort"; 159 160 # src/misc/rasdaemon.service.in shows this: 161 # ExecStartPost = ${cfg.package}/bin/rasdaemon --enable 162 # but that results in unpredictable existence of the database 163 # and everything seems to be enabled without this... 164 }; 165 }; 166 ras-mc-ctl = lib.mkIf (cfg.labels != "") { 167 description = "register DIMM labels on startup"; 168 documentation = [ "man:ras-mc-ctl(8)" ]; 169 wantedBy = [ "multi-user.target" ]; 170 serviceConfig = { 171 Type = "oneshot"; 172 ExecStart = "${cfg.package}/bin/ras-mc-ctl --register-labels"; 173 RemainAfterExit = true; 174 }; 175 }; 176 }; 177 }; 178 179 meta.maintainers = [ lib.maintainers.evils ]; 180 181}