1{ config, lib, pkgs, utils, ... }:
2
3let
4 cfg = config.services.llama-cpp;
5in {
6
7 options = {
8
9 services.llama-cpp = {
10 enable = lib.mkEnableOption "LLaMA C++ server";
11
12 package = lib.mkPackageOption pkgs "llama-cpp" { };
13
14 model = lib.mkOption {
15 type = lib.types.path;
16 example = "/models/mistral-instruct-7b/ggml-model-q4_0.gguf";
17 description = "Model path.";
18 };
19
20 extraFlags = lib.mkOption {
21 type = lib.types.listOf lib.types.str;
22 description = "Extra flags passed to llama-cpp-server.";
23 example = ["-c" "4096" "-ngl" "32" "--numa" "numactl"];
24 default = [];
25 };
26
27 host = lib.mkOption {
28 type = lib.types.str;
29 default = "127.0.0.1";
30 example = "0.0.0.0";
31 description = "IP address the LLaMA C++ server listens on.";
32 };
33
34 port = lib.mkOption {
35 type = lib.types.port;
36 default = 8080;
37 description = "Listen port for LLaMA C++ server.";
38 };
39
40 openFirewall = lib.mkOption {
41 type = lib.types.bool;
42 default = false;
43 description = "Open ports in the firewall for LLaMA C++ server.";
44 };
45 };
46
47 };
48
49 config = lib.mkIf cfg.enable {
50
51 systemd.services.llama-cpp = {
52 description = "LLaMA C++ server";
53 after = ["network.target"];
54 wantedBy = ["multi-user.target"];
55
56 serviceConfig = {
57 Type = "idle";
58 KillSignal = "SIGINT";
59 ExecStart = "${cfg.package}/bin/llama-server --log-disable --host ${cfg.host} --port ${builtins.toString cfg.port} -m ${cfg.model} ${utils.escapeSystemdExecArgs cfg.extraFlags}";
60 Restart = "on-failure";
61 RestartSec = 300;
62
63 # for GPU acceleration
64 PrivateDevices = false;
65
66 # hardening
67 DynamicUser = true;
68 CapabilityBoundingSet = "";
69 RestrictAddressFamilies = [
70 "AF_INET"
71 "AF_INET6"
72 "AF_UNIX"
73 ];
74 NoNewPrivileges = true;
75 PrivateMounts = true;
76 PrivateTmp = true;
77 PrivateUsers = true;
78 ProtectClock = true;
79 ProtectControlGroups = true;
80 ProtectHome = true;
81 ProtectKernelLogs = true;
82 ProtectKernelModules = true;
83 ProtectKernelTunables = true;
84 ProtectSystem = "strict";
85 MemoryDenyWriteExecute = true;
86 LockPersonality = true;
87 RemoveIPC = true;
88 RestrictNamespaces = true;
89 RestrictRealtime = true;
90 RestrictSUIDSGID = true;
91 SystemCallArchitectures = "native";
92 SystemCallFilter = [
93 "@system-service"
94 "~@privileged"
95 ];
96 SystemCallErrorNumber = "EPERM";
97 ProtectProc = "invisible";
98 ProtectHostname = true;
99 ProcSubset = "pid";
100 };
101 };
102
103 networking.firewall = lib.mkIf cfg.openFirewall {
104 allowedTCPPorts = [ cfg.port ];
105 };
106
107 };
108
109 meta.maintainers = with lib.maintainers; [ newam ];
110}