at 25.11-pre 3.0 kB view raw
1{ 2 config, 3 lib, 4 pkgs, 5 utils, 6 ... 7}: 8 9let 10 cfg = config.services.llama-cpp; 11in 12{ 13 14 options = { 15 16 services.llama-cpp = { 17 enable = lib.mkEnableOption "LLaMA C++ server"; 18 19 package = lib.mkPackageOption pkgs "llama-cpp" { }; 20 21 model = lib.mkOption { 22 type = lib.types.path; 23 example = "/models/mistral-instruct-7b/ggml-model-q4_0.gguf"; 24 description = "Model path."; 25 }; 26 27 extraFlags = lib.mkOption { 28 type = lib.types.listOf lib.types.str; 29 description = "Extra flags passed to llama-cpp-server."; 30 example = [ 31 "-c" 32 "4096" 33 "-ngl" 34 "32" 35 "--numa" 36 "numactl" 37 ]; 38 default = [ ]; 39 }; 40 41 host = lib.mkOption { 42 type = lib.types.str; 43 default = "127.0.0.1"; 44 example = "0.0.0.0"; 45 description = "IP address the LLaMA C++ server listens on."; 46 }; 47 48 port = lib.mkOption { 49 type = lib.types.port; 50 default = 8080; 51 description = "Listen port for LLaMA C++ server."; 52 }; 53 54 openFirewall = lib.mkOption { 55 type = lib.types.bool; 56 default = false; 57 description = "Open ports in the firewall for LLaMA C++ server."; 58 }; 59 }; 60 61 }; 62 63 config = lib.mkIf cfg.enable { 64 65 systemd.services.llama-cpp = { 66 description = "LLaMA C++ server"; 67 after = [ "network.target" ]; 68 wantedBy = [ "multi-user.target" ]; 69 70 serviceConfig = { 71 Type = "idle"; 72 KillSignal = "SIGINT"; 73 ExecStart = "${cfg.package}/bin/llama-server --log-disable --host ${cfg.host} --port ${builtins.toString cfg.port} -m ${cfg.model} ${utils.escapeSystemdExecArgs cfg.extraFlags}"; 74 Restart = "on-failure"; 75 RestartSec = 300; 76 77 # for GPU acceleration 78 PrivateDevices = false; 79 80 # hardening 81 DynamicUser = true; 82 CapabilityBoundingSet = ""; 83 RestrictAddressFamilies = [ 84 "AF_INET" 85 "AF_INET6" 86 "AF_UNIX" 87 ]; 88 NoNewPrivileges = true; 89 PrivateMounts = true; 90 PrivateTmp = true; 91 PrivateUsers = true; 92 ProtectClock = true; 93 ProtectControlGroups = true; 94 ProtectHome = true; 95 ProtectKernelLogs = true; 96 ProtectKernelModules = true; 97 ProtectKernelTunables = true; 98 ProtectSystem = "strict"; 99 MemoryDenyWriteExecute = true; 100 LockPersonality = true; 101 RemoveIPC = true; 102 RestrictNamespaces = true; 103 RestrictRealtime = true; 104 RestrictSUIDSGID = true; 105 SystemCallArchitectures = "native"; 106 SystemCallFilter = [ 107 "@system-service" 108 "~@privileged" 109 ]; 110 SystemCallErrorNumber = "EPERM"; 111 ProtectProc = "invisible"; 112 ProtectHostname = true; 113 ProcSubset = "pid"; 114 }; 115 }; 116 117 networking.firewall = lib.mkIf cfg.openFirewall { 118 allowedTCPPorts = [ cfg.port ]; 119 }; 120 121 }; 122 123 meta.maintainers = with lib.maintainers; [ newam ]; 124}