1{
2 config,
3 lib,
4 pkgs,
5 ...
6}:
7let
8 cfg = config.services.rke2;
9in
10{
11 imports = [ ];
12
13 options.services.rke2 = {
14 enable = lib.mkEnableOption "rke2";
15
16 package = lib.mkPackageOption pkgs "rke2" { };
17
18 role = lib.mkOption {
19 type = lib.types.enum [
20 "server"
21 "agent"
22 ];
23 description = ''
24 Whether rke2 should run as a server or agent.
25
26 If it's a server:
27
28 - By default it also runs workloads as an agent.
29 - any optionals is allowed.
30
31 If it's an agent:
32
33 - `serverAddr` is required.
34 - `token` or `tokenFile` is required.
35 - `agentToken` or `agentTokenFile` or `disable` or `cni` are not allowed.
36 '';
37 default = "server";
38 };
39
40 configPath = lib.mkOption {
41 type = lib.types.path;
42 description = "Load configuration from FILE.";
43 default = "/etc/rancher/rke2/config.yaml";
44 };
45
46 debug = lib.mkOption {
47 type = lib.types.bool;
48 description = "Turn on debug logs.";
49 default = false;
50 };
51
52 dataDir = lib.mkOption {
53 type = lib.types.path;
54 description = "The folder to hold state in.";
55 default = "/var/lib/rancher/rke2";
56 };
57
58 token = lib.mkOption {
59 type = lib.types.str;
60 description = ''
61 Shared secret used to join a server or agent to a cluster.
62
63 > WARNING: This option will expose store your token unencrypted world-readable in the nix store.
64 If this is undesired use the `tokenFile` option instead.
65 '';
66 default = "";
67 };
68
69 tokenFile = lib.mkOption {
70 type = lib.types.nullOr lib.types.path;
71 description = "File path containing rke2 token to use when connecting to the server.";
72 default = null;
73 };
74
75 disable = lib.mkOption {
76 type = lib.types.listOf lib.types.str;
77 description = "Do not deploy packaged components and delete any deployed components.";
78 default = [ ];
79 };
80
81 nodeName = lib.mkOption {
82 type = lib.types.nullOr lib.types.str;
83 description = "Node name.";
84 default = null;
85 };
86
87 nodeLabel = lib.mkOption {
88 type = lib.types.listOf lib.types.str;
89 description = "Registering and starting kubelet with set of labels.";
90 default = [ ];
91 };
92
93 nodeTaint = lib.mkOption {
94 type = lib.types.listOf lib.types.str;
95 description = "Registering kubelet with set of taints.";
96 default = [ ];
97 };
98
99 nodeIP = lib.mkOption {
100 type = lib.types.nullOr lib.types.str;
101 description = "IPv4/IPv6 addresses to advertise for node.";
102 default = null;
103 };
104
105 agentToken = lib.mkOption {
106 type = lib.types.str;
107 description = ''
108 Shared secret used to join agents to the cluster, but not servers.
109
110 > **WARNING**: This option will expose store your token unencrypted world-readable in the nix store.
111 If this is undesired use the `agentTokenFile` option instead.
112 '';
113 default = "";
114 };
115
116 agentTokenFile = lib.mkOption {
117 type = lib.types.nullOr lib.types.path;
118 description = "File path containing rke2 agent token to use when connecting to the server.";
119 default = null;
120 };
121
122 serverAddr = lib.mkOption {
123 type = lib.types.str;
124 description = "The rke2 server to connect to, used to join a cluster.";
125 example = "https://10.0.0.10:6443";
126 default = "";
127 };
128
129 selinux = lib.mkOption {
130 type = lib.types.bool;
131 description = "Enable SELinux in containerd.";
132 default = false;
133 };
134
135 cni = lib.mkOption {
136 type = lib.types.enum [
137 "none"
138 "canal"
139 "cilium"
140 "calico"
141 "flannel"
142 ];
143 description = ''
144 CNI Plugins to deploy, one of `none`, `calico`, `canal`, `cilium` or `flannel`.
145
146 All CNI plugins get installed via a helm chart after the main components are up and running
147 and can be [customized by modifying the helm chart options](https://docs.rke2.io/helm).
148
149 [Learn more about RKE2 and CNI plugins](https://docs.rke2.io/networking/basic_network_options)
150
151 > **WARNING**: Flannel support in RKE2 is currently experimental.
152 '';
153 default = "canal";
154 };
155
156 cisHardening = lib.mkOption {
157 type = lib.types.bool;
158 description = ''
159 Enable CIS Hardening for RKE2.
160
161 It will set the configurations and controls required to address Kubernetes benchmark controls
162 from the Center for Internet Security (CIS).
163
164 Learn more about [CIS Hardening for RKE2](https://docs.rke2.io/security/hardening_guide).
165
166 > **NOTICE**:
167 >
168 > You may need restart the `systemd-sysctl` muaually by:
169 >
170 > ```shell
171 > sudo systemctl restart systemd-sysctl
172 > ```
173 '';
174 default = false;
175 };
176
177 extraFlags = lib.mkOption {
178 type = lib.types.listOf lib.types.str;
179 description = ''
180 Extra flags to pass to the rke2 service/agent.
181
182 Here you can find all the available flags:
183
184 - [Server Configuration Reference](https://docs.rke2.io/reference/server_config)
185 - [Agent Configuration Reference](https://docs.rke2.io/reference/linux_agent_config)
186 '';
187 example = [
188 "--disable-kube-proxy"
189 "--cluster-cidr=10.24.0.0/16"
190 ];
191 default = [ ];
192 };
193
194 environmentVars = lib.mkOption {
195 type = lib.types.attrsOf lib.types.str;
196 description = ''
197 Environment variables for configuring the rke2 service/agent.
198
199 Here you can find all the available environment variables:
200
201 - [Server Configuration Reference](https://docs.rke2.io/reference/server_config)
202 - [Agent Configuration Reference](https://docs.rke2.io/reference/linux_agent_config)
203
204 Besides the options above, you can also active environment variables by edit/create those files:
205
206 - `/etc/default/rke2`
207 - `/etc/sysconfig/rke2`
208 - `/usr/local/lib/systemd/system/rke2.env`
209 '';
210 # See: https://github.com/rancher/rke2/blob/master/bundle/lib/systemd/system/rke2-server.env#L1
211 default = {
212 HOME = "/root";
213 };
214 };
215 };
216
217 config = lib.mkIf cfg.enable {
218 assertions = [
219 {
220 assertion = cfg.role == "agent" -> (builtins.pathExists cfg.configPath || cfg.serverAddr != "");
221 message = "serverAddr or configPath (with 'server' key) should be set if role is 'agent'";
222 }
223 {
224 assertion =
225 cfg.role == "agent"
226 -> (builtins.pathExists cfg.configPath || cfg.tokenFile != null || cfg.token != "");
227 message = "token or tokenFile or configPath (with 'token' or 'token-file' keys) should be set if role is 'agent'";
228 }
229 {
230 assertion = cfg.role == "agent" -> !(cfg.agentTokenFile != null || cfg.agentToken != "");
231 message = "agentToken or agentTokenFile should be set if role is 'agent'";
232 }
233 {
234 assertion = cfg.role == "agent" -> !(cfg.disable != [ ]);
235 message = "disable should not be set if role is 'agent'";
236 }
237 {
238 assertion = cfg.role == "agent" -> !(cfg.cni != "canal");
239 message = "cni should not be set if role is 'agent'";
240 }
241 ];
242
243 environment.systemPackages = [ config.services.rke2.package ];
244 # To configure NetworkManager to ignore calico/flannel related network interfaces.
245 # See: https://docs.rke2.io/known_issues#networkmanager
246 environment.etc."NetworkManager/conf.d/rke2-canal.conf" = {
247 enable = config.networking.networkmanager.enable;
248 text = ''
249 [keyfile]
250 unmanaged-devices=interface-name:cali*;interface-name:flannel*
251 '';
252 };
253 # See: https://docs.rke2.io/security/hardening_guide#set-kernel-parameters
254 boot.kernel.sysctl = lib.mkIf cfg.cisHardening {
255 "vm.panic_on_oom" = 0;
256 "vm.overcommit_memory" = 1;
257 "kernel.panic" = 10;
258 "kernel.panic_on_oops" = 1;
259 };
260
261 systemd.services."rke2-${cfg.role}" = {
262 description = "Rancher Kubernetes Engine v2";
263 documentation = [ "https://github.com/rancher/rke2#readme" ];
264 after = [ "network-online.target" ];
265 wants = [ "network-online.target" ];
266 wantedBy = [ "multi-user.target" ];
267 serviceConfig = {
268 Type = if cfg.role == "agent" then "exec" else "notify";
269 EnvironmentFile = [
270 "-/etc/default/%N"
271 "-/etc/sysconfig/%N"
272 "-/usr/local/lib/systemd/system/%N.env"
273 ];
274 Environment = lib.mapAttrsToList (k: v: "${k}=${v}") cfg.environmentVars;
275 KillMode = "process";
276 Delegate = "yes";
277 LimitNOFILE = 1048576;
278 LimitNPROC = "infinity";
279 LimitCORE = "infinity";
280 TasksMax = "infinity";
281 TimeoutStartSec = 0;
282 Restart = "always";
283 RestartSec = "5s";
284 ExecStartPre = [
285 # There is a conflict between RKE2 and `nm-cloud-setup.service`. This service add a routing table that
286 # interfere with the CNI plugin's configuration. This script checks if the service is enabled and if so,
287 # failed the RKE2 start.
288 # See: https://github.com/rancher/rke2/issues/1053
289 (pkgs.writeScript "check-nm-cloud-setup.sh" ''
290 #! ${pkgs.runtimeShell}
291 set -x
292 ! /run/current-system/systemd/bin/systemctl is-enabled --quiet nm-cloud-setup.service
293 '')
294 "-${pkgs.kmod}/bin/modprobe br_netfilter"
295 "-${pkgs.kmod}/bin/modprobe overlay"
296 ];
297 ExecStart = "${cfg.package}/bin/rke2 '${cfg.role}' ${
298 lib.escapeShellArgs (
299 (lib.optional (cfg.configPath != "/etc/rancher/rke2/config.yaml") "--config=${cfg.configPath}")
300 ++ (lib.optional cfg.debug "--debug")
301 ++ (lib.optional (cfg.dataDir != "/var/lib/rancher/rke2") "--data-dir=${cfg.dataDir}")
302 ++ (lib.optional (cfg.token != "") "--token=${cfg.token}")
303 ++ (lib.optional (cfg.tokenFile != null) "--token-file=${cfg.tokenFile}")
304 ++ (lib.optionals (cfg.role == "server" && cfg.disable != [ ]) (
305 map (d: "--disable=${d}") cfg.disable
306 ))
307 ++ (lib.optional (cfg.nodeName != null) "--node-name=${cfg.nodeName}")
308 ++ (lib.optionals (cfg.nodeLabel != [ ]) (map (l: "--node-label=${l}") cfg.nodeLabel))
309 ++ (lib.optionals (cfg.nodeTaint != [ ]) (map (t: "--node-taint=${t}") cfg.nodeTaint))
310 ++ (lib.optional (cfg.nodeIP != null) "--node-ip=${cfg.nodeIP}")
311 ++ (lib.optional (cfg.role == "server" && cfg.agentToken != "") "--agent-token=${cfg.agentToken}")
312 ++ (lib.optional (
313 cfg.role == "server" && cfg.agentTokenFile != null
314 ) "--agent-token-file=${cfg.agentTokenFile}")
315 ++ (lib.optional (cfg.serverAddr != "") "--server=${cfg.serverAddr}")
316 ++ (lib.optional cfg.selinux "--selinux")
317 ++ (lib.optional (cfg.role == "server" && cfg.cni != "canal") "--cni=${cfg.cni}")
318 ++ (lib.optional cfg.cisHardening "--profile=${
319 if cfg.package.version >= "1.25" then "cis-1.23" else "cis-1.6"
320 }")
321 ++ cfg.extraFlags
322 )
323 }";
324 ExecStopPost =
325 let
326 killProcess = pkgs.writeScript "kill-process.sh" ''
327 #! ${pkgs.runtimeShell}
328 /run/current-system/systemd/bin/systemd-cgls /system.slice/$1 | \
329 ${pkgs.gnugrep}/bin/grep -Eo '[0-9]+ (containerd|kubelet)' | \
330 ${pkgs.gawk}/bin/awk '{print $1}' | \
331 ${pkgs.findutils}/bin/xargs -r ${pkgs.util-linux}/bin/kill
332 '';
333 in
334 "-${killProcess} %n";
335 };
336 };
337 };
338}