at master 10 kB view raw
1{ 2 config, 3 pkgs, 4 lib, 5 utils, 6 ... 7}: 8 9let 10 toplevelConfig = config; 11 inherit (lib) types; 12 inherit (utils.systemdUtils.lib) mkPathSafeName; 13in 14{ 15 options.systemd.services = lib.mkOption { 16 type = types.attrsOf ( 17 types.submodule ( 18 { name, config, ... }: 19 { 20 options.confinement.enable = lib.mkOption { 21 type = types.bool; 22 default = false; 23 description = '' 24 If set, all the required runtime store paths for this service are 25 bind-mounted into a `tmpfs`-based 26 {manpage}`chroot(2)`. 27 ''; 28 }; 29 30 options.confinement.fullUnit = lib.mkOption { 31 type = types.bool; 32 default = false; 33 description = '' 34 Whether to include the full closure of the systemd unit file into the 35 chroot, instead of just the dependencies for the executables. 36 37 ::: {.warning} 38 While it may be tempting to just enable this option to 39 make things work quickly, please be aware that this might add paths 40 to the closure of the chroot that you didn't anticipate. It's better 41 to use {option}`confinement.packages` to **explicitly** add additional store paths to the 42 chroot. 43 ::: 44 ''; 45 }; 46 47 options.confinement.packages = lib.mkOption { 48 type = types.listOf (types.either types.str types.package); 49 default = [ ]; 50 description = 51 let 52 mkScOption = optName: "{option}`serviceConfig.${optName}`"; 53 in 54 '' 55 Additional packages or strings with context to add to the closure of 56 the chroot. By default, this includes all the packages from the 57 ${ 58 lib.concatMapStringsSep ", " mkScOption [ 59 "ExecReload" 60 "ExecStartPost" 61 "ExecStartPre" 62 "ExecStop" 63 "ExecStopPost" 64 ] 65 } and ${mkScOption "ExecStart"} options. If you want to have all the 66 dependencies of this systemd unit, you can use 67 {option}`confinement.fullUnit`. 68 69 ::: {.note} 70 The store paths listed in {option}`path` are 71 **not** included in the closure as 72 well as paths from other options except those listed 73 above. 74 ::: 75 ''; 76 }; 77 78 options.confinement.binSh = lib.mkOption { 79 type = types.nullOr types.path; 80 default = toplevelConfig.environment.binsh; 81 defaultText = lib.literalExpression "config.environment.binsh"; 82 example = lib.literalExpression ''"''${pkgs.dash}/bin/dash"''; 83 description = '' 84 The program to make available as {file}`/bin/sh` inside 85 the chroot. If this is set to `null`, no 86 {file}`/bin/sh` is provided at all. 87 88 This is useful for some applications, which for example use the 89 {manpage}`system(3)` library function to execute commands. 90 ''; 91 }; 92 93 options.confinement.mode = lib.mkOption { 94 type = types.enum [ 95 "full-apivfs" 96 "chroot-only" 97 ]; 98 default = "full-apivfs"; 99 description = '' 100 The value `full-apivfs` (the default) sets up 101 private {file}`/dev`, {file}`/proc`, 102 {file}`/sys`, {file}`/tmp` and {file}`/var/tmp` file systems 103 in a separate user name space. 104 105 If this is set to `chroot-only`, only the file 106 system name space is set up along with the call to 107 {manpage}`chroot(2)`. 108 109 In all cases, unless `serviceConfig.PrivateTmp=true` is set, 110 both {file}`/tmp` and {file}`/var/tmp` paths are added to `InaccessiblePaths=`. 111 This is to overcome options like `DynamicUser=true` 112 implying `PrivateTmp=true` without letting it being turned off. 113 Beware however that giving processes the `CAP_SYS_ADMIN` and `@mount` privileges 114 can let them undo the effects of `InaccessiblePaths=`. 115 116 ::: {.note} 117 This doesn't cover network namespaces and is solely for 118 file system level isolation. 119 ::: 120 ''; 121 }; 122 123 config = 124 let 125 inherit (config.confinement) binSh fullUnit; 126 wantsAPIVFS = lib.mkDefault (config.confinement.mode == "full-apivfs"); 127 in 128 lib.mkIf config.confinement.enable { 129 serviceConfig = { 130 ReadOnlyPaths = [ "+/" ]; 131 RuntimeDirectory = [ "confinement/${mkPathSafeName name}" ]; 132 RootDirectory = "/run/confinement/${mkPathSafeName name}"; 133 InaccessiblePaths = [ 134 "-+/run/confinement/${mkPathSafeName name}" 135 ]; 136 PrivateMounts = lib.mkDefault true; 137 138 # https://github.com/NixOS/nixpkgs/issues/14645 is a future attempt 139 # to change some of these to default to true. 140 # 141 # If we run in chroot-only mode, having something like PrivateDevices 142 # set to true by default will mount /dev within the chroot, whereas 143 # with "chroot-only" it's expected that there are no /dev, /proc and 144 # /sys file systems available. 145 # 146 # However, if this suddenly becomes true, the attack surface will 147 # increase, so let's explicitly set these options to true/false 148 # depending on the mode. 149 MountAPIVFS = wantsAPIVFS; 150 PrivateDevices = wantsAPIVFS; 151 PrivateTmp = wantsAPIVFS; 152 PrivateUsers = wantsAPIVFS; 153 ProtectControlGroups = wantsAPIVFS; 154 ProtectKernelModules = wantsAPIVFS; 155 ProtectKernelTunables = wantsAPIVFS; 156 }; 157 confinement.packages = 158 let 159 execOpts = [ 160 "ExecReload" 161 "ExecStart" 162 "ExecStartPost" 163 "ExecStartPre" 164 "ExecStop" 165 "ExecStopPost" 166 ]; 167 execPkgs = lib.concatMap ( 168 opt: 169 let 170 isSet = config.serviceConfig ? ${opt}; 171 in 172 lib.flatten (lib.optional isSet config.serviceConfig.${opt}) 173 ) execOpts; 174 unitAttrs = toplevelConfig.systemd.units."${name}.service"; 175 allPkgs = lib.singleton (builtins.toJSON unitAttrs); 176 unitPkgs = if fullUnit then allPkgs else execPkgs; 177 in 178 unitPkgs ++ lib.optional (binSh != null) binSh; 179 }; 180 } 181 ) 182 ); 183 }; 184 185 config.assertions = lib.concatLists ( 186 lib.mapAttrsToList ( 187 name: cfg: 188 let 189 whatOpt = 190 optName: 191 "The 'serviceConfig' option '${optName}' for" 192 + " service '${name}' is enabled in conjunction with" 193 + " 'confinement.enable'"; 194 in 195 lib.optionals cfg.confinement.enable [ 196 { 197 assertion = !cfg.serviceConfig.RootDirectoryStartOnly or false; 198 message = 199 "${whatOpt "RootDirectoryStartOnly"}, but right now systemd" 200 + " doesn't support restricting bind-mounts to 'ExecStart'." 201 + " Please either define a separate service or find a way to run" 202 + " commands other than ExecStart within the chroot."; 203 } 204 ] 205 ) config.systemd.services 206 ); 207 208 config.systemd.packages = lib.concatLists ( 209 lib.mapAttrsToList ( 210 name: cfg: 211 let 212 rootPaths = 213 let 214 contents = lib.concatStringsSep "\n" cfg.confinement.packages; 215 in 216 pkgs.writeText "${mkPathSafeName name}-string-contexts.txt" contents; 217 218 chrootPaths = 219 pkgs.runCommand "${mkPathSafeName name}-chroot-paths" 220 { 221 closureInfo = pkgs.closureInfo { inherit rootPaths; }; 222 serviceName = "${name}.service"; 223 excludedPath = rootPaths; 224 } 225 '' 226 mkdir -p "$out/lib/systemd/system/$serviceName.d" 227 serviceFile="$out/lib/systemd/system/$serviceName.d/confinement.conf" 228 229 echo '[Service]' > "$serviceFile" 230 231 # /bin/sh is special here, because the option value could contain a 232 # symlink and we need to properly resolve it. 233 ${lib.optionalString (cfg.confinement.binSh != null) '' 234 binsh=${lib.escapeShellArg cfg.confinement.binSh} 235 realprog="$(readlink -e "$binsh")" 236 echo "BindReadOnlyPaths=$realprog:/bin/sh" >> "$serviceFile" 237 ''} 238 239 # If DynamicUser= is enabled, PrivateTmp=true is implied (and cannot be turned off). 240 # so disable them unless PrivateTmp=true is explicitely set. 241 ${lib.optionalString (!cfg.serviceConfig.PrivateTmp) '' 242 echo "InaccessiblePaths=-+/tmp" >> "$serviceFile" 243 echo "InaccessiblePaths=-+/var/tmp" >> "$serviceFile" 244 ''} 245 246 while read storePath; do 247 if [ -L "$storePath" ]; then 248 # Currently, systemd can't cope with symlinks in Bind(ReadOnly)Paths, 249 # so let's just bind-mount the target to that location. 250 echo "BindReadOnlyPaths=$(readlink -e "$storePath"):$storePath" 251 elif [ "$storePath" != "$excludedPath" ]; then 252 echo "BindReadOnlyPaths=$storePath" 253 fi 254 done < "$closureInfo/store-paths" >> "$serviceFile" 255 ''; 256 in 257 lib.optional cfg.confinement.enable chrootPaths 258 ) config.systemd.services 259 ); 260}