at 24.11-pre 3.0 kB view raw
1{ config, lib, pkgs, utils, ... }: 2 3let 4 cfg = config.services.llama-cpp; 5in { 6 7 options = { 8 9 services.llama-cpp = { 10 enable = lib.mkEnableOption "LLaMA C++ server"; 11 12 package = lib.mkPackageOption pkgs "llama-cpp" { }; 13 14 model = lib.mkOption { 15 type = lib.types.path; 16 example = "/models/mistral-instruct-7b/ggml-model-q4_0.gguf"; 17 description = "Model path."; 18 }; 19 20 extraFlags = lib.mkOption { 21 type = lib.types.listOf lib.types.str; 22 description = "Extra flags passed to llama-cpp-server."; 23 example = ["-c" "4096" "-ngl" "32" "--numa" "numactl"]; 24 default = []; 25 }; 26 27 host = lib.mkOption { 28 type = lib.types.str; 29 default = "127.0.0.1"; 30 example = "0.0.0.0"; 31 description = "IP address the LLaMA C++ server listens on."; 32 }; 33 34 port = lib.mkOption { 35 type = lib.types.port; 36 default = 8080; 37 description = "Listen port for LLaMA C++ server."; 38 }; 39 40 openFirewall = lib.mkOption { 41 type = lib.types.bool; 42 default = false; 43 description = "Open ports in the firewall for LLaMA C++ server."; 44 }; 45 }; 46 47 }; 48 49 config = lib.mkIf cfg.enable { 50 51 systemd.services.llama-cpp = { 52 description = "LLaMA C++ server"; 53 after = ["network.target"]; 54 wantedBy = ["multi-user.target"]; 55 56 serviceConfig = { 57 Type = "idle"; 58 KillSignal = "SIGINT"; 59 ExecStart = "${cfg.package}/bin/llama-server --log-disable --host ${cfg.host} --port ${builtins.toString cfg.port} -m ${cfg.model} ${utils.escapeSystemdExecArgs cfg.extraFlags}"; 60 Restart = "on-failure"; 61 RestartSec = 300; 62 63 # for GPU acceleration 64 PrivateDevices = false; 65 66 # hardening 67 DynamicUser = true; 68 CapabilityBoundingSet = ""; 69 RestrictAddressFamilies = [ 70 "AF_INET" 71 "AF_INET6" 72 "AF_UNIX" 73 ]; 74 NoNewPrivileges = true; 75 PrivateMounts = true; 76 PrivateTmp = true; 77 PrivateUsers = true; 78 ProtectClock = true; 79 ProtectControlGroups = true; 80 ProtectHome = true; 81 ProtectKernelLogs = true; 82 ProtectKernelModules = true; 83 ProtectKernelTunables = true; 84 ProtectSystem = "strict"; 85 MemoryDenyWriteExecute = true; 86 LockPersonality = true; 87 RemoveIPC = true; 88 RestrictNamespaces = true; 89 RestrictRealtime = true; 90 RestrictSUIDSGID = true; 91 SystemCallArchitectures = "native"; 92 SystemCallFilter = [ 93 "@system-service" 94 "~@privileged" 95 ]; 96 SystemCallErrorNumber = "EPERM"; 97 ProtectProc = "invisible"; 98 ProtectHostname = true; 99 ProcSubset = "pid"; 100 }; 101 }; 102 103 networking.firewall = lib.mkIf cfg.openFirewall { 104 allowedTCPPorts = [ cfg.port ]; 105 }; 106 107 }; 108 109 meta.maintainers = with lib.maintainers; [ newam ]; 110}