1{
2 config,
3 lib,
4 pkgs,
5 ...
6}:
7let
8 cfg = config.services.llama-swap;
9 settingsFormat = pkgs.formats.yaml { };
10 configFile = settingsFormat.generate "config.yaml" cfg.settings;
11in
12{
13 options.services.llama-swap = {
14 enable = lib.mkEnableOption "enable the llama-swap service";
15
16 package = lib.mkPackageOption pkgs "llama-swap" { };
17
18 port = lib.mkOption {
19 default = 8080;
20 example = 11343;
21 type = lib.types.port;
22 description = ''
23 Port that llama-swap listens on.
24 '';
25 };
26
27 openFirewall = lib.mkOption {
28 type = lib.types.bool;
29 default = false;
30 description = ''
31 Whether to open the firewall for llama-swap.
32 This adds {option}`port` to [](#opt-networking.firewall.allowedTCPPorts).
33 '';
34 };
35
36 settings = lib.mkOption {
37 type = lib.types.submodule { freeformType = settingsFormat.type; };
38 description = ''
39 llama-swap configuration. Refer to the [llama-swap example configuration](https://github.com/mostlygeek/llama-swap/blob/main/config.example.yaml)
40 for details on supported values.
41 '';
42 example = lib.literalExpression ''
43 let
44 llama-cpp = pkgs.llama-cpp.override { rocmSupport = true; };
45 llama-server = lib.getExe' llama-cpp "llama-server";
46 in
47 {
48 healthCheckTimeout = 60;
49 models = {
50 "some-model" = {
51 cmd = "$\{llama-server\} --port ''\${PORT} -m /var/lib/llama-cpp/models/some-model.gguf -ngl 0 --no-webui";
52 aliases = [
53 "the-best"
54 ];
55 };
56 "other-model" = {
57 proxy = "http://127.0.0.1:5555";
58 cmd = "$\{llama-server\} --port 5555 -m /var/lib/llama-cpp/models/other-model.gguf -ngl 0 -c 4096 -np 4 --no-webui";
59 concurrencyLimit = 4;
60 };
61 };
62 };
63 '';
64 };
65 };
66 config = lib.mkIf cfg.enable {
67 systemd.services.llama-swap = {
68 description = "Model swapping for LLaMA C++ Server (or any local OpenAPI compatible server)";
69 after = [ "network.target" ];
70 wantedBy = [ "multi-user.target" ];
71
72 serviceConfig = {
73 Type = "exec";
74 ExecStart = "${lib.getExe cfg.package} --listen :${toString cfg.port} --config ${configFile}";
75 Restart = "on-failure";
76 RestartSec = 3;
77
78 # for GPU acceleration
79 PrivateDevices = false;
80
81 # hardening
82 DynamicUser = true;
83 CapabilityBoundingSet = "";
84 RestrictAddressFamilies = [
85 "AF_INET"
86 "AF_INET6"
87 "AF_UNIX"
88 ];
89 NoNewPrivileges = true;
90 PrivateMounts = true;
91 PrivateTmp = true;
92 PrivateUsers = true;
93 ProtectClock = true;
94 ProtectControlGroups = true;
95 ProtectHome = true;
96 ProtectKernelLogs = true;
97 ProtectKernelModules = true;
98 ProtectKernelTunables = true;
99 ProtectSystem = "strict";
100 MemoryDenyWriteExecute = true;
101 LockPersonality = true;
102 RemoveIPC = true;
103 RestrictNamespaces = true;
104 RestrictRealtime = true;
105 RestrictSUIDSGID = true;
106 SystemCallArchitectures = "native";
107 SystemCallFilter = [
108 "@system-service"
109 "~@privileged"
110 ];
111 SystemCallErrorNumber = "EPERM";
112 ProtectProc = "invisible";
113 ProtectHostname = true;
114 ProcSubset = "pid";
115 };
116 };
117 networking.firewall = lib.mkIf cfg.openFirewall { allowedTCPPorts = [ cfg.port ]; };
118 };
119
120 meta.maintainers = with lib.maintainers; [
121 jk
122 podium868909
123 ];
124}