1{
2 config,
3 lib,
4 pkgs,
5 utils,
6 ...
7}:
8
9let
10 cfg = config.services.llama-cpp;
11in
12{
13
14 options = {
15
16 services.llama-cpp = {
17 enable = lib.mkEnableOption "LLaMA C++ server";
18
19 package = lib.mkPackageOption pkgs "llama-cpp" { };
20
21 model = lib.mkOption {
22 type = lib.types.path;
23 example = "/models/mistral-instruct-7b/ggml-model-q4_0.gguf";
24 description = "Model path.";
25 };
26
27 extraFlags = lib.mkOption {
28 type = lib.types.listOf lib.types.str;
29 description = "Extra flags passed to llama-cpp-server.";
30 example = [
31 "-c"
32 "4096"
33 "-ngl"
34 "32"
35 "--numa"
36 "numactl"
37 ];
38 default = [ ];
39 };
40
41 host = lib.mkOption {
42 type = lib.types.str;
43 default = "127.0.0.1";
44 example = "0.0.0.0";
45 description = "IP address the LLaMA C++ server listens on.";
46 };
47
48 port = lib.mkOption {
49 type = lib.types.port;
50 default = 8080;
51 description = "Listen port for LLaMA C++ server.";
52 };
53
54 openFirewall = lib.mkOption {
55 type = lib.types.bool;
56 default = false;
57 description = "Open ports in the firewall for LLaMA C++ server.";
58 };
59 };
60
61 };
62
63 config = lib.mkIf cfg.enable {
64
65 systemd.services.llama-cpp = {
66 description = "LLaMA C++ server";
67 after = [ "network.target" ];
68 wantedBy = [ "multi-user.target" ];
69
70 serviceConfig = {
71 Type = "idle";
72 KillSignal = "SIGINT";
73 ExecStart = "${cfg.package}/bin/llama-server --log-disable --host ${cfg.host} --port ${builtins.toString cfg.port} -m ${cfg.model} ${utils.escapeSystemdExecArgs cfg.extraFlags}";
74 Restart = "on-failure";
75 RestartSec = 300;
76
77 # for GPU acceleration
78 PrivateDevices = false;
79
80 # hardening
81 DynamicUser = true;
82 CapabilityBoundingSet = "";
83 RestrictAddressFamilies = [
84 "AF_INET"
85 "AF_INET6"
86 "AF_UNIX"
87 ];
88 NoNewPrivileges = true;
89 PrivateMounts = true;
90 PrivateTmp = true;
91 PrivateUsers = true;
92 ProtectClock = true;
93 ProtectControlGroups = true;
94 ProtectHome = true;
95 ProtectKernelLogs = true;
96 ProtectKernelModules = true;
97 ProtectKernelTunables = true;
98 ProtectSystem = "strict";
99 MemoryDenyWriteExecute = true;
100 LockPersonality = true;
101 RemoveIPC = true;
102 RestrictNamespaces = true;
103 RestrictRealtime = true;
104 RestrictSUIDSGID = true;
105 SystemCallArchitectures = "native";
106 SystemCallFilter = [
107 "@system-service"
108 "~@privileged"
109 ];
110 SystemCallErrorNumber = "EPERM";
111 ProtectProc = "invisible";
112 ProtectHostname = true;
113 ProcSubset = "pid";
114 };
115 };
116
117 networking.firewall = lib.mkIf cfg.openFirewall {
118 allowedTCPPorts = [ cfg.port ];
119 };
120
121 };
122
123 meta.maintainers = with lib.maintainers; [ newam ];
124}