at 23.11-pre 8.3 kB view raw
1{ config, pkgs, lib, utils, ... }: 2 3let 4 toplevelConfig = config; 5 inherit (lib) types; 6 inherit (utils.systemdUtils.lib) mkPathSafeName; 7in { 8 options.systemd.services = lib.mkOption { 9 type = types.attrsOf (types.submodule ({ name, config, ... }: { 10 options.confinement.enable = lib.mkOption { 11 type = types.bool; 12 default = false; 13 description = lib.mdDoc '' 14 If set, all the required runtime store paths for this service are 15 bind-mounted into a `tmpfs`-based 16 {manpage}`chroot(2)`. 17 ''; 18 }; 19 20 options.confinement.fullUnit = lib.mkOption { 21 type = types.bool; 22 default = false; 23 description = lib.mdDoc '' 24 Whether to include the full closure of the systemd unit file into the 25 chroot, instead of just the dependencies for the executables. 26 27 ::: {.warning} 28 While it may be tempting to just enable this option to 29 make things work quickly, please be aware that this might add paths 30 to the closure of the chroot that you didn't anticipate. It's better 31 to use {option}`confinement.packages` to **explicitly** add additional store paths to the 32 chroot. 33 ::: 34 ''; 35 }; 36 37 options.confinement.packages = lib.mkOption { 38 type = types.listOf (types.either types.str types.package); 39 default = []; 40 description = let 41 mkScOption = optName: "{option}`serviceConfig.${optName}`"; 42 in lib.mdDoc '' 43 Additional packages or strings with context to add to the closure of 44 the chroot. By default, this includes all the packages from the 45 ${lib.concatMapStringsSep ", " mkScOption [ 46 "ExecReload" "ExecStartPost" "ExecStartPre" "ExecStop" 47 "ExecStopPost" 48 ]} and ${mkScOption "ExecStart"} options. If you want to have all the 49 dependencies of this systemd unit, you can use 50 {option}`confinement.fullUnit`. 51 52 ::: {.note} 53 The store paths listed in {option}`path` are 54 **not** included in the closure as 55 well as paths from other options except those listed 56 above. 57 ::: 58 ''; 59 }; 60 61 options.confinement.binSh = lib.mkOption { 62 type = types.nullOr types.path; 63 default = toplevelConfig.environment.binsh; 64 defaultText = lib.literalExpression "config.environment.binsh"; 65 example = lib.literalExpression ''"''${pkgs.dash}/bin/dash"''; 66 description = lib.mdDoc '' 67 The program to make available as {file}`/bin/sh` inside 68 the chroot. If this is set to `null`, no 69 {file}`/bin/sh` is provided at all. 70 71 This is useful for some applications, which for example use the 72 {manpage}`system(3)` library function to execute commands. 73 ''; 74 }; 75 76 options.confinement.mode = lib.mkOption { 77 type = types.enum [ "full-apivfs" "chroot-only" ]; 78 default = "full-apivfs"; 79 description = lib.mdDoc '' 80 The value `full-apivfs` (the default) sets up 81 private {file}`/dev`, {file}`/proc`, 82 {file}`/sys` and {file}`/tmp` file systems in a separate user 83 name space. 84 85 If this is set to `chroot-only`, only the file 86 system name space is set up along with the call to 87 {manpage}`chroot(2)`. 88 89 ::: {.note} 90 This doesn't cover network namespaces and is solely for 91 file system level isolation. 92 ::: 93 ''; 94 }; 95 96 config = let 97 inherit (config.confinement) binSh fullUnit; 98 wantsAPIVFS = lib.mkDefault (config.confinement.mode == "full-apivfs"); 99 in lib.mkIf config.confinement.enable { 100 serviceConfig = { 101 RootDirectory = "/var/empty"; 102 TemporaryFileSystem = "/"; 103 PrivateMounts = lib.mkDefault true; 104 105 # https://github.com/NixOS/nixpkgs/issues/14645 is a future attempt 106 # to change some of these to default to true. 107 # 108 # If we run in chroot-only mode, having something like PrivateDevices 109 # set to true by default will mount /dev within the chroot, whereas 110 # with "chroot-only" it's expected that there are no /dev, /proc and 111 # /sys file systems available. 112 # 113 # However, if this suddenly becomes true, the attack surface will 114 # increase, so let's explicitly set these options to true/false 115 # depending on the mode. 116 MountAPIVFS = wantsAPIVFS; 117 PrivateDevices = wantsAPIVFS; 118 PrivateTmp = wantsAPIVFS; 119 PrivateUsers = wantsAPIVFS; 120 ProtectControlGroups = wantsAPIVFS; 121 ProtectKernelModules = wantsAPIVFS; 122 ProtectKernelTunables = wantsAPIVFS; 123 }; 124 confinement.packages = let 125 execOpts = [ 126 "ExecReload" "ExecStart" "ExecStartPost" "ExecStartPre" "ExecStop" 127 "ExecStopPost" 128 ]; 129 execPkgs = lib.concatMap (opt: let 130 isSet = config.serviceConfig ? ${opt}; 131 in lib.flatten (lib.optional isSet config.serviceConfig.${opt})) execOpts; 132 unitAttrs = toplevelConfig.systemd.units."${name}.service"; 133 allPkgs = lib.singleton (builtins.toJSON unitAttrs); 134 unitPkgs = if fullUnit then allPkgs else execPkgs; 135 in unitPkgs ++ lib.optional (binSh != null) binSh; 136 }; 137 })); 138 }; 139 140 config.assertions = lib.concatLists (lib.mapAttrsToList (name: cfg: let 141 whatOpt = optName: "The 'serviceConfig' option '${optName}' for" 142 + " service '${name}' is enabled in conjunction with" 143 + " 'confinement.enable'"; 144 in lib.optionals cfg.confinement.enable [ 145 { assertion = !cfg.serviceConfig.RootDirectoryStartOnly or false; 146 message = "${whatOpt "RootDirectoryStartOnly"}, but right now systemd" 147 + " doesn't support restricting bind-mounts to 'ExecStart'." 148 + " Please either define a separate service or find a way to run" 149 + " commands other than ExecStart within the chroot."; 150 } 151 { assertion = !cfg.serviceConfig.DynamicUser or false; 152 message = "${whatOpt "DynamicUser"}. Please create a dedicated user via" 153 + " the 'users.users' option instead as this combination is" 154 + " currently not supported."; 155 } 156 { assertion = cfg.serviceConfig ? ProtectSystem -> cfg.serviceConfig.ProtectSystem == false; 157 message = "${whatOpt "ProtectSystem"}. ProtectSystem is not compatible" 158 + " with service confinement as it fails to remount /usr within" 159 + " our chroot. Please disable the option."; 160 } 161 ]) config.systemd.services); 162 163 config.systemd.packages = lib.concatLists (lib.mapAttrsToList (name: cfg: let 164 rootPaths = let 165 contents = lib.concatStringsSep "\n" cfg.confinement.packages; 166 in pkgs.writeText "${mkPathSafeName name}-string-contexts.txt" contents; 167 168 chrootPaths = pkgs.runCommand "${mkPathSafeName name}-chroot-paths" { 169 closureInfo = pkgs.closureInfo { inherit rootPaths; }; 170 serviceName = "${name}.service"; 171 excludedPath = rootPaths; 172 } '' 173 mkdir -p "$out/lib/systemd/system/$serviceName.d" 174 serviceFile="$out/lib/systemd/system/$serviceName.d/confinement.conf" 175 176 echo '[Service]' > "$serviceFile" 177 178 # /bin/sh is special here, because the option value could contain a 179 # symlink and we need to properly resolve it. 180 ${lib.optionalString (cfg.confinement.binSh != null) '' 181 binsh=${lib.escapeShellArg cfg.confinement.binSh} 182 realprog="$(readlink -e "$binsh")" 183 echo "BindReadOnlyPaths=$realprog:/bin/sh" >> "$serviceFile" 184 ''} 185 186 while read storePath; do 187 if [ -L "$storePath" ]; then 188 # Currently, systemd can't cope with symlinks in Bind(ReadOnly)Paths, 189 # so let's just bind-mount the target to that location. 190 echo "BindReadOnlyPaths=$(readlink -e "$storePath"):$storePath" 191 elif [ "$storePath" != "$excludedPath" ]; then 192 echo "BindReadOnlyPaths=$storePath" 193 fi 194 done < "$closureInfo/store-paths" >> "$serviceFile" 195 ''; 196 in lib.optional cfg.confinement.enable chrootPaths) config.systemd.services); 197}