at master 3.7 kB view raw
1{ 2 config, 3 lib, 4 pkgs, 5 ... 6}: 7let 8 cfg = config.services.llama-swap; 9 settingsFormat = pkgs.formats.yaml { }; 10 configFile = settingsFormat.generate "config.yaml" cfg.settings; 11in 12{ 13 options.services.llama-swap = { 14 enable = lib.mkEnableOption "enable the llama-swap service"; 15 16 package = lib.mkPackageOption pkgs "llama-swap" { }; 17 18 port = lib.mkOption { 19 default = 8080; 20 example = 11343; 21 type = lib.types.port; 22 description = '' 23 Port that llama-swap listens on. 24 ''; 25 }; 26 27 openFirewall = lib.mkOption { 28 type = lib.types.bool; 29 default = false; 30 description = '' 31 Whether to open the firewall for llama-swap. 32 This adds {option}`port` to [](#opt-networking.firewall.allowedTCPPorts). 33 ''; 34 }; 35 36 settings = lib.mkOption { 37 type = lib.types.submodule { freeformType = settingsFormat.type; }; 38 description = '' 39 llama-swap configuration. Refer to the [llama-swap example configuration](https://github.com/mostlygeek/llama-swap/blob/main/config.example.yaml) 40 for details on supported values. 41 ''; 42 example = lib.literalExpression '' 43 let 44 llama-cpp = pkgs.llama-cpp.override { rocmSupport = true; }; 45 llama-server = lib.getExe' llama-cpp "llama-server"; 46 in 47 { 48 healthCheckTimeout = 60; 49 models = { 50 "some-model" = { 51 cmd = "$\{llama-server\} --port ''\${PORT} -m /var/lib/llama-cpp/models/some-model.gguf -ngl 0 --no-webui"; 52 aliases = [ 53 "the-best" 54 ]; 55 }; 56 "other-model" = { 57 proxy = "http://127.0.0.1:5555"; 58 cmd = "$\{llama-server\} --port 5555 -m /var/lib/llama-cpp/models/other-model.gguf -ngl 0 -c 4096 -np 4 --no-webui"; 59 concurrencyLimit = 4; 60 }; 61 }; 62 }; 63 ''; 64 }; 65 }; 66 config = lib.mkIf cfg.enable { 67 systemd.services.llama-swap = { 68 description = "Model swapping for LLaMA C++ Server (or any local OpenAPI compatible server)"; 69 after = [ "network.target" ]; 70 wantedBy = [ "multi-user.target" ]; 71 72 serviceConfig = { 73 Type = "exec"; 74 ExecStart = "${lib.getExe cfg.package} --listen :${toString cfg.port} --config ${configFile}"; 75 Restart = "on-failure"; 76 RestartSec = 3; 77 78 # for GPU acceleration 79 PrivateDevices = false; 80 81 # hardening 82 DynamicUser = true; 83 CapabilityBoundingSet = ""; 84 RestrictAddressFamilies = [ 85 "AF_INET" 86 "AF_INET6" 87 "AF_UNIX" 88 ]; 89 NoNewPrivileges = true; 90 PrivateMounts = true; 91 PrivateTmp = true; 92 PrivateUsers = true; 93 ProtectClock = true; 94 ProtectControlGroups = true; 95 ProtectHome = true; 96 ProtectKernelLogs = true; 97 ProtectKernelModules = true; 98 ProtectKernelTunables = true; 99 ProtectSystem = "strict"; 100 MemoryDenyWriteExecute = true; 101 LockPersonality = true; 102 RemoveIPC = true; 103 RestrictNamespaces = true; 104 RestrictRealtime = true; 105 RestrictSUIDSGID = true; 106 SystemCallArchitectures = "native"; 107 SystemCallFilter = [ 108 "@system-service" 109 "~@privileged" 110 ]; 111 SystemCallErrorNumber = "EPERM"; 112 ProtectProc = "invisible"; 113 ProtectHostname = true; 114 ProcSubset = "pid"; 115 }; 116 }; 117 networking.firewall = lib.mkIf cfg.openFirewall { allowedTCPPorts = [ cfg.port ]; }; 118 }; 119 120 meta.maintainers = with lib.maintainers; [ 121 jk 122 podium868909 123 ]; 124}