home/apps/ollama.nix at main · kitten.sh/system

kitten.sh / system
Personal Nix setup
system / home / apps / ollama.nix
at main 4.1 kB view raw
  1{ lib, config, helpers, pkgs, ... }:
  2
  3with lib;
  4let
  5  cfg = config.modules.apps;
  6  ollama = cfg.ollama.package;
  7  ollamaArgs = [
  8    "${ollama}/bin/ollama"
  9    "serve"
 10  ];
 11
 12  toEnvironmentCfg = vars: mapAttrsToList (k: v: "${k}=${escapeShellArg v}") vars;
 13
 14  env = {
 15    OLLAMA_HOST = cfg.ollama.host;
 16    OLLAMA_FLASH_ATTENTION = if cfg.ollama.flashAttention then "1" else "0";
 17    OLLAMA_SCHED_SPREAD = if cfg.ollama.schedSpread then "1" else "0";
 18    OLLAMA_INTEL_GPU = if cfg.ollama.intelGpu then "1" else "0";
 19    OLLAMA_NEW_ENGINE = if cfg.ollama.newEngine then "1" else "0";
 20    OLLAMA_KV_CACHE_TYPE = cfg.ollama.kvCacheType;
 21    OLLAMA_CONTEXT_LENGTH = toString cfg.ollama.defaultContextLength;
 22    OLLAMA_MAX_LOADED_MODELS = toString cfg.ollama.maxLoadedModels;
 23  };
 24in {
 25  options.modules.apps.ollama = {
 26    enable = mkOption {
 27      default = false;
 28      description = "Whether to enable Ollama.";
 29      type = types.bool;
 30    };
 31
 32    enableServer = mkOption {
 33      default = true;
 34      description = "Whether to enable Ollama's server.";
 35      type = types.bool;
 36    };
 37
 38    package = mkOption {
 39      default = pkgs.ollama;
 40      type = types.package;
 41    };
 42
 43    host = mkOption {
 44      default = "http://0.0.0.0:11434";
 45      description = "Determines the host and port to listen on";
 46      type = types.str;
 47    };
 48
 49    maxLoadedModels = mkOption {
 50      default = 3;
 51      type = types.int;
 52    };
 53
 54    defaultContextLength = mkOption {
 55      default = 32768;
 56      type = types.int;
 57    };
 58
 59    flashAttention = mkOption {
 60      default = true;
 61      description = ''
 62        Enables experimental flash attention feature.
 63        Effect: Activates an experimental optimization for attention mechanisms.
 64        Scenario: Can potentially improve performance on compatible hardware but may introduce instability.
 65      '';
 66      type = types.bool;
 67    };
 68
 69    kvCacheType = mkOption {
 70      default = "q8_0";
 71      type = types.enum [ "f16" "q8_0" "q4_0" ];
 72      description = ''
 73        Determines the K/V cache quantization type
 74        Effect: Activates quantization of the K/V cache reducing memory usage with flash attention.
 75        Scenario: Can lead to reduced VRAM usage at the cost of accuracy.
 76        Models with a higher Grouped Query Attention (GQA) count (e.g. Qwen 2) will see a larger negative impact.
 77      '';
 78    };
 79
 80    schedSpread = mkOption {
 81      default = false;
 82      description = ''
 83        Allows scheduling models across all GPUs.
 84        Effect: Enables multi-GPU usage for model inference.
 85        Scenario: Beneficial in high-performance computing environments with multiple GPUs to maximize hardware utilization.
 86      '';
 87      type = types.bool;
 88    };
 89
 90    newEngine = mkOption {
 91      default = true;
 92      type = types.bool;
 93    };
 94
 95    intelGpu = mkOption {
 96      default = false;
 97      description = ''
 98        Enables experimental Intel GPU detection.
 99        Effect: Allows usage of Intel GPUs for model inference.
100        Scenario: Useful for organizations leveraging Intel GPU hardware for AI workloads.
101      '';
102      type = types.bool;
103    };
104  };
105
106  config = mkIf (cfg.enable && cfg.ollama.enable) (mkMerge [
107    {
108      home.packages = [ ollama ];
109    }
110
111    (helpers.mkIfLinux {
112      systemd.user.services.ollama = mkIf cfg.ollama.enableServer {
113        Unit = {
114          Description = "Ollama";
115          Documentation = "https://github.com/jmorganca/ollama";
116        };
117        Install.WantedBy = [ "default.target" ];
118        Service = {
119          Environment = toEnvironmentCfg env;
120          ExecStart = escapeShellArgs ollamaArgs;
121          Restart = "on-failure";
122          RestartSec = 5;
123        };
124      };
125    })
126
127    (helpers.mkIfDarwin {
128      launchd.agents.ollama = mkIf cfg.ollama.enableServer{
129        enable = true;
130        config = {
131          EnvironmentVariables = env;
132          ProcessType = "Background";
133          ProgramArguments = ollamaArgs;
134          KeepAlive = {
135            Crashed = true;
136            SuccessfulExit = false;
137          };
138        };
139      };
140    })
141  ]);
142}