···
"${pkgs.ollama}/bin/ollama"
11
+
toEnvironmentCfg = vars: mapAttrsToList (k: v: "${k}=${escapeShellArg v}") vars;
14
+
OLLAMA_HOST = cfg.ollama.host;
15
+
OLLAMA_FLASH_ATTENTION = if cfg.ollama.flashAttention then "1" else "0";
16
+
OLLAMA_SCHED_SPREAD = if cfg.ollama.schedSpread then "1" else "0";
17
+
OLLAMA_INTEL_GPU = if cfg.ollama.intelGpu then "1" else "0";
options.modules.apps.ollama = {
···
description = "Whether to enable Ollama.";
28
+
default = "http://0.0.0.0:11434";
29
+
description = "Determines the host and port to listen on";
33
+
flashAttention = mkOption {
36
+
Enables experimental flash att ention feature.
37
+
Effect: Activates an experimental optimization for attention mechanisms.
38
+
Scenario: Can potentially improve performance on compatible hardware but may introduce instability.
43
+
schedSpread = mkOption {
46
+
Allows scheduling models across all GPUs.
47
+
Effect: Enables multi-GPU usage for model inference.
48
+
Scenario: Beneficial in high-performance computing environments with multiple GPUs to maximize hardware utilization.
53
+
intelGpu = mkOption {
56
+
Enables experimental Intel GPU detection.
57
+
Effect: Allows usage of Intel GPUs for model inference.
58
+
Scenario: Useful for organizations leveraging Intel GPU hardware for AI workloads.
config = mkIf (cfg.enable && cfg.ollama.enable) (mkMerge [
···
Install.WantedBy = [ "default.target" ];
77
+
Environment = toEnvironmentCfg env;
ExecStart = escapeShellArgs ollamaArgs;
···
launchd.agents.ollama = {
89
+
EnvironmentVariables = env;
ProcessType = "Background";
ProgramArguments = ollamaArgs;