at master 9.8 kB view raw
1{ 2 config, 3 lib, 4 pkgs, 5 ... 6}: 7let 8 inherit (lib) literalExpression types; 9 10 cfg = config.services.ollama; 11 ollamaPackage = cfg.package.override { inherit (cfg) acceleration; }; 12 13 staticUser = cfg.user != null && cfg.group != null; 14in 15{ 16 imports = [ 17 (lib.mkRemovedOptionModule [ 18 "services" 19 "ollama" 20 "listenAddress" 21 ] "Use `services.ollama.host` and `services.ollama.port` instead.") 22 (lib.mkRemovedOptionModule [ 23 "services" 24 "ollama" 25 "sandbox" 26 ] "Set `services.ollama.user` and `services.ollama.group` instead.") 27 (lib.mkRemovedOptionModule 28 [ 29 "services" 30 "ollama" 31 "writablePaths" 32 ] 33 "The `models` directory is now always writable. To make other directories writable, use `systemd.services.ollama.serviceConfig.ReadWritePaths`." 34 ) 35 ]; 36 37 options = { 38 services.ollama = { 39 enable = lib.mkEnableOption "ollama server for local large language models"; 40 package = lib.mkPackageOption pkgs "ollama" { }; 41 42 user = lib.mkOption { 43 type = with types; nullOr str; 44 default = null; 45 example = "ollama"; 46 description = '' 47 User account under which to run ollama. Defaults to [`DynamicUser`](https://www.freedesktop.org/software/systemd/man/latest/systemd.exec.html#DynamicUser=) 48 when set to `null`. 49 50 The user will automatically be created, if this option is set to a non-null value. 51 ''; 52 }; 53 group = lib.mkOption { 54 type = with types; nullOr str; 55 default = cfg.user; 56 defaultText = literalExpression "config.services.ollama.user"; 57 example = "ollama"; 58 description = '' 59 Group under which to run ollama. Only used when `services.ollama.user` is set. 60 61 The group will automatically be created, if this option is set to a non-null value. 62 ''; 63 }; 64 65 home = lib.mkOption { 66 type = types.str; 67 default = "/var/lib/ollama"; 68 example = "/home/foo"; 69 description = '' 70 The home directory that the ollama service is started in. 71 ''; 72 }; 73 models = lib.mkOption { 74 type = types.str; 75 default = "${cfg.home}/models"; 76 defaultText = "\${config.services.ollama.home}/models"; 77 example = "/path/to/ollama/models"; 78 description = '' 79 The directory that the ollama service will read models from and download new models to. 80 ''; 81 }; 82 83 host = lib.mkOption { 84 type = types.str; 85 default = "127.0.0.1"; 86 example = "[::]"; 87 description = '' 88 The host address which the ollama server HTTP interface listens to. 89 ''; 90 }; 91 port = lib.mkOption { 92 type = types.port; 93 default = 11434; 94 example = 11111; 95 description = '' 96 Which port the ollama server listens to. 97 ''; 98 }; 99 100 acceleration = lib.mkOption { 101 type = types.nullOr ( 102 types.enum [ 103 false 104 "rocm" 105 "cuda" 106 ] 107 ); 108 default = null; 109 example = "rocm"; 110 description = '' 111 What interface to use for hardware acceleration. 112 113 - `null`: default behavior 114 - if `nixpkgs.config.rocmSupport` is enabled, uses `"rocm"` 115 - if `nixpkgs.config.cudaSupport` is enabled, uses `"cuda"` 116 - otherwise defaults to `false` 117 - `false`: disable GPU, only use CPU 118 - `"rocm"`: supported by most modern AMD GPUs 119 - may require overriding gpu type with `services.ollama.rocmOverrideGfx` 120 if rocm doesn't detect your AMD gpu 121 - `"cuda"`: supported by most modern NVIDIA GPUs 122 ''; 123 }; 124 rocmOverrideGfx = lib.mkOption { 125 type = types.nullOr types.str; 126 default = null; 127 example = "10.3.0"; 128 description = '' 129 Override what rocm will detect your gpu model as. 130 For example, if you have an RX 5700 XT, try setting this to `"10.1.0"` (gfx 1010). 131 132 This sets the value of `HSA_OVERRIDE_GFX_VERSION`. See [ollama's docs]( 133 https://github.com/ollama/ollama/blob/main/docs/gpu.md#amd-radeon 134 ) for details. 135 ''; 136 }; 137 138 environmentVariables = lib.mkOption { 139 type = types.attrsOf types.str; 140 default = { }; 141 example = { 142 OLLAMA_LLM_LIBRARY = "cpu"; 143 HIP_VISIBLE_DEVICES = "0,1"; 144 }; 145 description = '' 146 Set arbitrary environment variables for the ollama service. 147 148 Be aware that these are only seen by the ollama server (systemd service), 149 not normal invocations like `ollama run`. 150 Since `ollama run` is mostly a shell around the ollama server, this is usually sufficient. 151 ''; 152 }; 153 loadModels = lib.mkOption { 154 type = types.listOf types.str; 155 default = [ ]; 156 description = '' 157 Download these models using `ollama pull` as soon as `ollama.service` has started. 158 159 This creates a systemd unit `ollama-model-loader.service`. 160 161 Search for models of your choice from: <https://ollama.com/library> 162 ''; 163 }; 164 openFirewall = lib.mkOption { 165 type = types.bool; 166 default = false; 167 description = '' 168 Whether to open the firewall for ollama. 169 170 This adds `services.ollama.port` to `networking.firewall.allowedTCPPorts`. 171 ''; 172 }; 173 }; 174 }; 175 176 config = lib.mkIf cfg.enable { 177 users = lib.mkIf staticUser { 178 users.${cfg.user} = { 179 inherit (cfg) home; 180 isSystemUser = true; 181 group = cfg.group; 182 }; 183 groups.${cfg.group} = { }; 184 }; 185 186 systemd.services.ollama = { 187 description = "Server for local large language models"; 188 wantedBy = [ "multi-user.target" ]; 189 after = [ "network.target" ]; 190 environment = 191 cfg.environmentVariables 192 // { 193 HOME = cfg.home; 194 OLLAMA_MODELS = cfg.models; 195 OLLAMA_HOST = "${cfg.host}:${toString cfg.port}"; 196 } 197 // lib.optionalAttrs (cfg.rocmOverrideGfx != null) { 198 HSA_OVERRIDE_GFX_VERSION = cfg.rocmOverrideGfx; 199 }; 200 serviceConfig = 201 lib.optionalAttrs staticUser { 202 User = cfg.user; 203 Group = cfg.group; 204 } 205 // { 206 Type = "exec"; 207 DynamicUser = true; 208 ExecStart = "${lib.getExe ollamaPackage} serve"; 209 WorkingDirectory = cfg.home; 210 StateDirectory = [ "ollama" ]; 211 ReadWritePaths = [ 212 cfg.home 213 cfg.models 214 ]; 215 216 CapabilityBoundingSet = [ "" ]; 217 DeviceAllow = [ 218 # CUDA 219 # https://docs.nvidia.com/dgx/pdf/dgx-os-5-user-guide.pdf 220 "char-nvidiactl" 221 "char-nvidia-caps" 222 "char-nvidia-frontend" 223 "char-nvidia-uvm" 224 # ROCm 225 "char-drm" 226 "char-fb" 227 "char-kfd" 228 # WSL (Windows Subsystem for Linux) 229 "/dev/dxg" 230 ]; 231 DevicePolicy = "closed"; 232 LockPersonality = true; 233 MemoryDenyWriteExecute = true; 234 NoNewPrivileges = true; 235 PrivateDevices = false; # hides acceleration devices 236 PrivateTmp = true; 237 PrivateUsers = true; 238 ProcSubset = "all"; # /proc/meminfo 239 ProtectClock = true; 240 ProtectControlGroups = true; 241 ProtectHome = true; 242 ProtectHostname = true; 243 ProtectKernelLogs = true; 244 ProtectKernelModules = true; 245 ProtectKernelTunables = true; 246 ProtectProc = "invisible"; 247 ProtectSystem = "strict"; 248 RemoveIPC = true; 249 RestrictNamespaces = true; 250 RestrictRealtime = true; 251 RestrictSUIDSGID = true; 252 RestrictAddressFamilies = [ 253 "AF_INET" 254 "AF_INET6" 255 "AF_UNIX" 256 ]; 257 SupplementaryGroups = [ "render" ]; # for rocm to access /dev/dri/renderD* devices 258 SystemCallArchitectures = "native"; 259 SystemCallFilter = [ 260 "@system-service @resources" 261 "~@privileged" 262 ]; 263 UMask = "0077"; 264 }; 265 }; 266 267 systemd.services.ollama-model-loader = lib.mkIf (cfg.loadModels != [ ]) { 268 description = "Download ollama models in the background"; 269 wantedBy = [ 270 "multi-user.target" 271 "ollama.service" 272 ]; 273 wants = [ "network-online.target" ]; 274 after = [ 275 "ollama.service" 276 "network-online.target" 277 ]; 278 bindsTo = [ "ollama.service" ]; 279 environment = config.systemd.services.ollama.environment; 280 serviceConfig = { 281 Type = "exec"; 282 DynamicUser = true; 283 Restart = "on-failure"; 284 # bounded exponential backoff 285 RestartSec = "1s"; 286 RestartMaxDelaySec = "2h"; 287 RestartSteps = "10"; 288 }; 289 290 script = '' 291 total=${toString (builtins.length cfg.loadModels)} 292 failed=0 293 294 for model in ${lib.escapeShellArgs cfg.loadModels}; do 295 '${lib.getExe ollamaPackage}' pull "$model" & 296 done 297 298 for job in $(jobs -p); do 299 set +e 300 wait $job 301 exit_code=$? 302 set -e 303 304 if [ $exit_code != 0 ]; then 305 failed=$((failed + 1)) 306 fi 307 done 308 309 if [ $failed != 0 ]; then 310 echo "error: $failed out of $total attempted model downloads failed" >&2 311 exit 1 312 fi 313 ''; 314 }; 315 316 networking.firewall = lib.mkIf cfg.openFirewall { allowedTCPPorts = [ cfg.port ]; }; 317 318 environment.systemPackages = [ ollamaPackage ]; 319 }; 320 321 meta.maintainers = with lib.maintainers; [ 322 abysssol 323 onny 324 ]; 325}