···
"${pkgs.ollama}/bin/ollama"
+
toEnvironmentCfg = vars: mapAttrsToList (k: v: "${k}=${escapeShellArg v}") vars;
+
OLLAMA_HOST = cfg.ollama.host;
+
OLLAMA_FLASH_ATTENTION = if cfg.ollama.flashAttention then "1" else "0";
+
OLLAMA_SCHED_SPREAD = if cfg.ollama.schedSpread then "1" else "0";
+
OLLAMA_INTEL_GPU = if cfg.ollama.intelGpu then "1" else "0";
options.modules.apps.ollama = {
···
description = "Whether to enable Ollama.";
+
default = "http://0.0.0.0:11434";
+
description = "Determines the host and port to listen on";
+
flashAttention = mkOption {
+
Enables experimental flash att ention feature.
+
Effect: Activates an experimental optimization for attention mechanisms.
+
Scenario: Can potentially improve performance on compatible hardware but may introduce instability.
+
schedSpread = mkOption {
+
Allows scheduling models across all GPUs.
+
Effect: Enables multi-GPU usage for model inference.
+
Scenario: Beneficial in high-performance computing environments with multiple GPUs to maximize hardware utilization.
+
Enables experimental Intel GPU detection.
+
Effect: Allows usage of Intel GPUs for model inference.
+
Scenario: Useful for organizations leveraging Intel GPU hardware for AI workloads.
config = mkIf (cfg.enable && cfg.ollama.enable) (mkMerge [
···
Install.WantedBy = [ "default.target" ];
+
Environment = toEnvironmentCfg env;
ExecStart = escapeShellArgs ollamaArgs;
···
launchd.agents.ollama = {
+
EnvironmentVariables = env;
ProcessType = "Background";
ProgramArguments = ollamaArgs;