nixos/modules/services/misc/ollama.nix at master · pyrox.dev/nixpkgs

pyrox.dev / nixpkgs
lol
nixpkgs / nixos / modules / services / misc / ollama.nix
at master 9.8 kB view raw
  1{
  2  config,
  3  lib,
  4  pkgs,
  5  ...
  6}:
  7let
  8  inherit (lib) literalExpression types;
  9
 10  cfg = config.services.ollama;
 11  ollamaPackage = cfg.package.override { inherit (cfg) acceleration; };
 12
 13  staticUser = cfg.user != null && cfg.group != null;
 14in
 15{
 16  imports = [
 17    (lib.mkRemovedOptionModule [
 18      "services"
 19      "ollama"
 20      "listenAddress"
 21    ] "Use `services.ollama.host` and `services.ollama.port` instead.")
 22    (lib.mkRemovedOptionModule [
 23      "services"
 24      "ollama"
 25      "sandbox"
 26    ] "Set `services.ollama.user` and `services.ollama.group` instead.")
 27    (lib.mkRemovedOptionModule
 28      [
 29        "services"
 30        "ollama"
 31        "writablePaths"
 32      ]
 33      "The `models` directory is now always writable. To make other directories writable, use `systemd.services.ollama.serviceConfig.ReadWritePaths`."
 34    )
 35  ];
 36
 37  options = {
 38    services.ollama = {
 39      enable = lib.mkEnableOption "ollama server for local large language models";
 40      package = lib.mkPackageOption pkgs "ollama" { };
 41
 42      user = lib.mkOption {
 43        type = with types; nullOr str;
 44        default = null;
 45        example = "ollama";
 46        description = ''
 47          User account under which to run ollama. Defaults to [`DynamicUser`](https://www.freedesktop.org/software/systemd/man/latest/systemd.exec.html#DynamicUser=)
 48          when set to `null`.
 49
 50          The user will automatically be created, if this option is set to a non-null value.
 51        '';
 52      };
 53      group = lib.mkOption {
 54        type = with types; nullOr str;
 55        default = cfg.user;
 56        defaultText = literalExpression "config.services.ollama.user";
 57        example = "ollama";
 58        description = ''
 59          Group under which to run ollama. Only used when `services.ollama.user` is set.
 60
 61          The group will automatically be created, if this option is set to a non-null value.
 62        '';
 63      };
 64
 65      home = lib.mkOption {
 66        type = types.str;
 67        default = "/var/lib/ollama";
 68        example = "/home/foo";
 69        description = ''
 70          The home directory that the ollama service is started in.
 71        '';
 72      };
 73      models = lib.mkOption {
 74        type = types.str;
 75        default = "${cfg.home}/models";
 76        defaultText = "\${config.services.ollama.home}/models";
 77        example = "/path/to/ollama/models";
 78        description = ''
 79          The directory that the ollama service will read models from and download new models to.
 80        '';
 81      };
 82
 83      host = lib.mkOption {
 84        type = types.str;
 85        default = "127.0.0.1";
 86        example = "[::]";
 87        description = ''
 88          The host address which the ollama server HTTP interface listens to.
 89        '';
 90      };
 91      port = lib.mkOption {
 92        type = types.port;
 93        default = 11434;
 94        example = 11111;
 95        description = ''
 96          Which port the ollama server listens to.
 97        '';
 98      };
 99
100      acceleration = lib.mkOption {
101        type = types.nullOr (
102          types.enum [
103            false
104            "rocm"
105            "cuda"
106          ]
107        );
108        default = null;
109        example = "rocm";
110        description = ''
111          What interface to use for hardware acceleration.
112
113          - `null`: default behavior
114            - if `nixpkgs.config.rocmSupport` is enabled, uses `"rocm"`
115            - if `nixpkgs.config.cudaSupport` is enabled, uses `"cuda"`
116            - otherwise defaults to `false`
117          - `false`: disable GPU, only use CPU
118          - `"rocm"`: supported by most modern AMD GPUs
119            - may require overriding gpu type with `services.ollama.rocmOverrideGfx`
120              if rocm doesn't detect your AMD gpu
121          - `"cuda"`: supported by most modern NVIDIA GPUs
122        '';
123      };
124      rocmOverrideGfx = lib.mkOption {
125        type = types.nullOr types.str;
126        default = null;
127        example = "10.3.0";
128        description = ''
129          Override what rocm will detect your gpu model as.
130          For example, if you have an RX 5700 XT, try setting this to `"10.1.0"` (gfx 1010).
131
132          This sets the value of `HSA_OVERRIDE_GFX_VERSION`. See [ollama's docs](
133          https://github.com/ollama/ollama/blob/main/docs/gpu.md#amd-radeon
134          ) for details.
135        '';
136      };
137
138      environmentVariables = lib.mkOption {
139        type = types.attrsOf types.str;
140        default = { };
141        example = {
142          OLLAMA_LLM_LIBRARY = "cpu";
143          HIP_VISIBLE_DEVICES = "0,1";
144        };
145        description = ''
146          Set arbitrary environment variables for the ollama service.
147
148          Be aware that these are only seen by the ollama server (systemd service),
149          not normal invocations like `ollama run`.
150          Since `ollama run` is mostly a shell around the ollama server, this is usually sufficient.
151        '';
152      };
153      loadModels = lib.mkOption {
154        type = types.listOf types.str;
155        default = [ ];
156        description = ''
157          Download these models using `ollama pull` as soon as `ollama.service` has started.
158
159          This creates a systemd unit `ollama-model-loader.service`.
160
161          Search for models of your choice from: <https://ollama.com/library>
162        '';
163      };
164      openFirewall = lib.mkOption {
165        type = types.bool;
166        default = false;
167        description = ''
168          Whether to open the firewall for ollama.
169
170          This adds `services.ollama.port` to `networking.firewall.allowedTCPPorts`.
171        '';
172      };
173    };
174  };
175
176  config = lib.mkIf cfg.enable {
177    users = lib.mkIf staticUser {
178      users.${cfg.user} = {
179        inherit (cfg) home;
180        isSystemUser = true;
181        group = cfg.group;
182      };
183      groups.${cfg.group} = { };
184    };
185
186    systemd.services.ollama = {
187      description = "Server for local large language models";
188      wantedBy = [ "multi-user.target" ];
189      after = [ "network.target" ];
190      environment =
191        cfg.environmentVariables
192        // {
193          HOME = cfg.home;
194          OLLAMA_MODELS = cfg.models;
195          OLLAMA_HOST = "${cfg.host}:${toString cfg.port}";
196        }
197        // lib.optionalAttrs (cfg.rocmOverrideGfx != null) {
198          HSA_OVERRIDE_GFX_VERSION = cfg.rocmOverrideGfx;
199        };
200      serviceConfig =
201        lib.optionalAttrs staticUser {
202          User = cfg.user;
203          Group = cfg.group;
204        }
205        // {
206          Type = "exec";
207          DynamicUser = true;
208          ExecStart = "${lib.getExe ollamaPackage} serve";
209          WorkingDirectory = cfg.home;
210          StateDirectory = [ "ollama" ];
211          ReadWritePaths = [
212            cfg.home
213            cfg.models
214          ];
215
216          CapabilityBoundingSet = [ "" ];
217          DeviceAllow = [
218            # CUDA
219            # https://docs.nvidia.com/dgx/pdf/dgx-os-5-user-guide.pdf
220            "char-nvidiactl"
221            "char-nvidia-caps"
222            "char-nvidia-frontend"
223            "char-nvidia-uvm"
224            # ROCm
225            "char-drm"
226            "char-fb"
227            "char-kfd"
228            # WSL (Windows Subsystem for Linux)
229            "/dev/dxg"
230          ];
231          DevicePolicy = "closed";
232          LockPersonality = true;
233          MemoryDenyWriteExecute = true;
234          NoNewPrivileges = true;
235          PrivateDevices = false; # hides acceleration devices
236          PrivateTmp = true;
237          PrivateUsers = true;
238          ProcSubset = "all"; # /proc/meminfo
239          ProtectClock = true;
240          ProtectControlGroups = true;
241          ProtectHome = true;
242          ProtectHostname = true;
243          ProtectKernelLogs = true;
244          ProtectKernelModules = true;
245          ProtectKernelTunables = true;
246          ProtectProc = "invisible";
247          ProtectSystem = "strict";
248          RemoveIPC = true;
249          RestrictNamespaces = true;
250          RestrictRealtime = true;
251          RestrictSUIDSGID = true;
252          RestrictAddressFamilies = [
253            "AF_INET"
254            "AF_INET6"
255            "AF_UNIX"
256          ];
257          SupplementaryGroups = [ "render" ]; # for rocm to access /dev/dri/renderD* devices
258          SystemCallArchitectures = "native";
259          SystemCallFilter = [
260            "@system-service @resources"
261            "~@privileged"
262          ];
263          UMask = "0077";
264        };
265    };
266
267    systemd.services.ollama-model-loader = lib.mkIf (cfg.loadModels != [ ]) {
268      description = "Download ollama models in the background";
269      wantedBy = [
270        "multi-user.target"
271        "ollama.service"
272      ];
273      wants = [ "network-online.target" ];
274      after = [
275        "ollama.service"
276        "network-online.target"
277      ];
278      bindsTo = [ "ollama.service" ];
279      environment = config.systemd.services.ollama.environment;
280      serviceConfig = {
281        Type = "exec";
282        DynamicUser = true;
283        Restart = "on-failure";
284        # bounded exponential backoff
285        RestartSec = "1s";
286        RestartMaxDelaySec = "2h";
287        RestartSteps = "10";
288      };
289
290      script = ''
291        total=${toString (builtins.length cfg.loadModels)}
292        failed=0
293
294        for model in ${lib.escapeShellArgs cfg.loadModels}; do
295          '${lib.getExe ollamaPackage}' pull "$model" &
296        done
297
298        for job in $(jobs -p); do
299          set +e
300          wait $job
301          exit_code=$?
302          set -e
303
304          if [ $exit_code != 0 ]; then
305            failed=$((failed + 1))
306          fi
307        done
308
309        if [ $failed != 0 ]; then
310          echo "error: $failed out of $total attempted model downloads failed" >&2
311          exit 1
312        fi
313      '';
314    };
315
316    networking.firewall = lib.mkIf cfg.openFirewall { allowedTCPPorts = [ cfg.port ]; };
317
318    environment.systemPackages = [ ollamaPackage ];
319  };
320
321  meta.maintainers = with lib.maintainers; [
322    abysssol
323    onny
324  ];
325}