1{
2 config,
3 lib,
4 pkgs,
5 ...
6}:
7let
8 inherit (lib) literalExpression types;
9
10 cfg = config.services.ollama;
11 ollamaPackage = cfg.package.override { inherit (cfg) acceleration; };
12
13 staticUser = cfg.user != null && cfg.group != null;
14in
15{
16 imports = [
17 (lib.mkRemovedOptionModule [
18 "services"
19 "ollama"
20 "listenAddress"
21 ] "Use `services.ollama.host` and `services.ollama.port` instead.")
22 (lib.mkRemovedOptionModule [
23 "services"
24 "ollama"
25 "sandbox"
26 ] "Set `services.ollama.user` and `services.ollama.group` instead.")
27 (lib.mkRemovedOptionModule
28 [
29 "services"
30 "ollama"
31 "writablePaths"
32 ]
33 "The `models` directory is now always writable. To make other directories writable, use `systemd.services.ollama.serviceConfig.ReadWritePaths`."
34 )
35 ];
36
37 options = {
38 services.ollama = {
39 enable = lib.mkEnableOption "ollama server for local large language models";
40 package = lib.mkPackageOption pkgs "ollama" { };
41
42 user = lib.mkOption {
43 type = with types; nullOr str;
44 default = null;
45 example = "ollama";
46 description = ''
47 User account under which to run ollama. Defaults to [`DynamicUser`](https://www.freedesktop.org/software/systemd/man/latest/systemd.exec.html#DynamicUser=)
48 when set to `null`.
49
50 The user will automatically be created, if this option is set to a non-null value.
51 '';
52 };
53 group = lib.mkOption {
54 type = with types; nullOr str;
55 default = cfg.user;
56 defaultText = literalExpression "config.services.ollama.user";
57 example = "ollama";
58 description = ''
59 Group under which to run ollama. Only used when `services.ollama.user` is set.
60
61 The group will automatically be created, if this option is set to a non-null value.
62 '';
63 };
64
65 home = lib.mkOption {
66 type = types.str;
67 default = "/var/lib/ollama";
68 example = "/home/foo";
69 description = ''
70 The home directory that the ollama service is started in.
71 '';
72 };
73 models = lib.mkOption {
74 type = types.str;
75 default = "${cfg.home}/models";
76 defaultText = "\${config.services.ollama.home}/models";
77 example = "/path/to/ollama/models";
78 description = ''
79 The directory that the ollama service will read models from and download new models to.
80 '';
81 };
82
83 host = lib.mkOption {
84 type = types.str;
85 default = "127.0.0.1";
86 example = "[::]";
87 description = ''
88 The host address which the ollama server HTTP interface listens to.
89 '';
90 };
91 port = lib.mkOption {
92 type = types.port;
93 default = 11434;
94 example = 11111;
95 description = ''
96 Which port the ollama server listens to.
97 '';
98 };
99
100 acceleration = lib.mkOption {
101 type = types.nullOr (
102 types.enum [
103 false
104 "rocm"
105 "cuda"
106 ]
107 );
108 default = null;
109 example = "rocm";
110 description = ''
111 What interface to use for hardware acceleration.
112
113 - `null`: default behavior
114 - if `nixpkgs.config.rocmSupport` is enabled, uses `"rocm"`
115 - if `nixpkgs.config.cudaSupport` is enabled, uses `"cuda"`
116 - otherwise defaults to `false`
117 - `false`: disable GPU, only use CPU
118 - `"rocm"`: supported by most modern AMD GPUs
119 - may require overriding gpu type with `services.ollama.rocmOverrideGfx`
120 if rocm doesn't detect your AMD gpu
121 - `"cuda"`: supported by most modern NVIDIA GPUs
122 '';
123 };
124 rocmOverrideGfx = lib.mkOption {
125 type = types.nullOr types.str;
126 default = null;
127 example = "10.3.0";
128 description = ''
129 Override what rocm will detect your gpu model as.
130 For example, if you have an RX 5700 XT, try setting this to `"10.1.0"` (gfx 1010).
131
132 This sets the value of `HSA_OVERRIDE_GFX_VERSION`. See [ollama's docs](
133 https://github.com/ollama/ollama/blob/main/docs/gpu.md#amd-radeon
134 ) for details.
135 '';
136 };
137
138 environmentVariables = lib.mkOption {
139 type = types.attrsOf types.str;
140 default = { };
141 example = {
142 OLLAMA_LLM_LIBRARY = "cpu";
143 HIP_VISIBLE_DEVICES = "0,1";
144 };
145 description = ''
146 Set arbitrary environment variables for the ollama service.
147
148 Be aware that these are only seen by the ollama server (systemd service),
149 not normal invocations like `ollama run`.
150 Since `ollama run` is mostly a shell around the ollama server, this is usually sufficient.
151 '';
152 };
153 loadModels = lib.mkOption {
154 type = types.listOf types.str;
155 default = [ ];
156 description = ''
157 Download these models using `ollama pull` as soon as `ollama.service` has started.
158
159 This creates a systemd unit `ollama-model-loader.service`.
160
161 Search for models of your choice from: <https://ollama.com/library>
162 '';
163 };
164 openFirewall = lib.mkOption {
165 type = types.bool;
166 default = false;
167 description = ''
168 Whether to open the firewall for ollama.
169
170 This adds `services.ollama.port` to `networking.firewall.allowedTCPPorts`.
171 '';
172 };
173 };
174 };
175
176 config = lib.mkIf cfg.enable {
177 users = lib.mkIf staticUser {
178 users.${cfg.user} = {
179 inherit (cfg) home;
180 isSystemUser = true;
181 group = cfg.group;
182 };
183 groups.${cfg.group} = { };
184 };
185
186 systemd.services.ollama = {
187 description = "Server for local large language models";
188 wantedBy = [ "multi-user.target" ];
189 after = [ "network.target" ];
190 environment =
191 cfg.environmentVariables
192 // {
193 HOME = cfg.home;
194 OLLAMA_MODELS = cfg.models;
195 OLLAMA_HOST = "${cfg.host}:${toString cfg.port}";
196 }
197 // lib.optionalAttrs (cfg.rocmOverrideGfx != null) {
198 HSA_OVERRIDE_GFX_VERSION = cfg.rocmOverrideGfx;
199 };
200 serviceConfig =
201 lib.optionalAttrs staticUser {
202 User = cfg.user;
203 Group = cfg.group;
204 }
205 // {
206 Type = "exec";
207 DynamicUser = true;
208 ExecStart = "${lib.getExe ollamaPackage} serve";
209 WorkingDirectory = cfg.home;
210 StateDirectory = [ "ollama" ];
211 ReadWritePaths = [
212 cfg.home
213 cfg.models
214 ];
215
216 CapabilityBoundingSet = [ "" ];
217 DeviceAllow = [
218 # CUDA
219 # https://docs.nvidia.com/dgx/pdf/dgx-os-5-user-guide.pdf
220 "char-nvidiactl"
221 "char-nvidia-caps"
222 "char-nvidia-frontend"
223 "char-nvidia-uvm"
224 # ROCm
225 "char-drm"
226 "char-fb"
227 "char-kfd"
228 # WSL (Windows Subsystem for Linux)
229 "/dev/dxg"
230 ];
231 DevicePolicy = "closed";
232 LockPersonality = true;
233 MemoryDenyWriteExecute = true;
234 NoNewPrivileges = true;
235 PrivateDevices = false; # hides acceleration devices
236 PrivateTmp = true;
237 PrivateUsers = true;
238 ProcSubset = "all"; # /proc/meminfo
239 ProtectClock = true;
240 ProtectControlGroups = true;
241 ProtectHome = true;
242 ProtectHostname = true;
243 ProtectKernelLogs = true;
244 ProtectKernelModules = true;
245 ProtectKernelTunables = true;
246 ProtectProc = "invisible";
247 ProtectSystem = "strict";
248 RemoveIPC = true;
249 RestrictNamespaces = true;
250 RestrictRealtime = true;
251 RestrictSUIDSGID = true;
252 RestrictAddressFamilies = [
253 "AF_INET"
254 "AF_INET6"
255 "AF_UNIX"
256 ];
257 SupplementaryGroups = [ "render" ]; # for rocm to access /dev/dri/renderD* devices
258 SystemCallArchitectures = "native";
259 SystemCallFilter = [
260 "@system-service @resources"
261 "~@privileged"
262 ];
263 UMask = "0077";
264 };
265 };
266
267 systemd.services.ollama-model-loader = lib.mkIf (cfg.loadModels != [ ]) {
268 description = "Download ollama models in the background";
269 wantedBy = [
270 "multi-user.target"
271 "ollama.service"
272 ];
273 wants = [ "network-online.target" ];
274 after = [
275 "ollama.service"
276 "network-online.target"
277 ];
278 bindsTo = [ "ollama.service" ];
279 environment = config.systemd.services.ollama.environment;
280 serviceConfig = {
281 Type = "exec";
282 DynamicUser = true;
283 Restart = "on-failure";
284 # bounded exponential backoff
285 RestartSec = "1s";
286 RestartMaxDelaySec = "2h";
287 RestartSteps = "10";
288 };
289
290 script = ''
291 total=${toString (builtins.length cfg.loadModels)}
292 failed=0
293
294 for model in ${lib.escapeShellArgs cfg.loadModels}; do
295 '${lib.getExe ollamaPackage}' pull "$model" &
296 done
297
298 for job in $(jobs -p); do
299 set +e
300 wait $job
301 exit_code=$?
302 set -e
303
304 if [ $exit_code != 0 ]; then
305 failed=$((failed + 1))
306 fi
307 done
308
309 if [ $failed != 0 ]; then
310 echo "error: $failed out of $total attempted model downloads failed" >&2
311 exit 1
312 fi
313 '';
314 };
315
316 networking.firewall = lib.mkIf cfg.openFirewall { allowedTCPPorts = [ cfg.port ]; };
317
318 environment.systemPackages = [ ollamaPackage ];
319 };
320
321 meta.maintainers = with lib.maintainers; [
322 abysssol
323 onny
324 ];
325}