nixos/rke2: make tests work in test driver sandbox (#395775)

rorosen cb60a011 b222bbdb

Changed files
+198 -167
nixos
pkgs
applications
networking
cluster
+128 -113
nixos/tests/rke2/multi-node.nix
···
...
}:
let
-
pauseImage = pkgs.dockerTools.streamLayeredImage {
-
name = "test.local/pause";
+
throwSystem = throw "RKE2: Unsupported system: ${pkgs.stdenv.hostPlatform.system}";
+
coreImages =
+
{
+
aarch64-linux = rke2.images-core-linux-arm64-tar-zst;
+
x86_64-linux = rke2.images-core-linux-amd64-tar-zst;
+
}
+
.${pkgs.stdenv.hostPlatform.system} or throwSystem;
+
canalImages =
+
{
+
aarch64-linux = rke2.images-canal-linux-arm64-tar-zst;
+
x86_64-linux = rke2.images-canal-linux-amd64-tar-zst;
+
}
+
.${pkgs.stdenv.hostPlatform.system} or throwSystem;
+
helloImage = pkgs.dockerTools.buildImage {
+
name = "test.local/hello";
tag = "local";
-
contents = pkgs.buildEnv {
-
name = "rke2-pause-image-env";
+
compressor = "zstd";
+
copyToRoot = pkgs.buildEnv {
+
name = "rke2-hello-image-env";
paths = with pkgs; [
-
tini
-
bashInteractive
coreutils
socat
];
};
-
config.Entrypoint = [
-
"/bin/tini"
-
"--"
-
"/bin/sleep"
-
"inf"
-
];
};
-
# A daemonset that responds 'server' on port 8000
+
# A daemonset that responds 'hello' on port 8000
networkTestDaemonset = pkgs.writeText "test.yml" ''
apiVersion: apps/v1
kind: DaemonSet
···
spec:
containers:
- name: test
-
image: test.local/pause:local
+
image: test.local/hello:local
imagePullPolicy: Never
resources:
limits:
memory: 20Mi
-
command: ["socat", "TCP4-LISTEN:8000,fork", "EXEC:echo server"]
+
command: ["socat", "TCP4-LISTEN:8000,fork", "EXEC:echo hello"]
'';
tokenFile = pkgs.writeText "token" "p@s$w0rd";
-
agentTokenFile = pkgs.writeText "agent-token" "p@s$w0rd";
+
agentTokenFile = pkgs.writeText "agent-token" "agentP@s$w0rd";
+
# Let flannel use eth1 to enable inter-node communication in tests
+
canalConfig = pkgs.writeText "rke2-canal-config.yaml" ''
+
apiVersion: helm.cattle.io/v1
+
kind: HelmChartConfig
+
metadata:
+
name: rke2-canal
+
namespace: kube-system
+
spec:
+
valuesContent: |-
+
flannel:
+
iface: "eth1"
+
'';
in
{
name = "${rke2.name}-multi-node";
meta.maintainers = rke2.meta.maintainers;
nodes = {
-
server1 =
-
{ pkgs, ... }:
+
server =
+
{
+
config,
+
nodes,
+
pkgs,
+
...
+
}:
{
-
networking.firewall.enable = false;
-
networking.useDHCP = false;
-
networking.defaultGateway = "192.168.1.1";
-
networking.interfaces.eth1.ipv4.addresses = pkgs.lib.mkForce [
-
{
-
address = "192.168.1.1";
-
prefixLength = 24;
-
}
+
# Setup image archives to be imported by rke2
+
systemd.tmpfiles.settings."10-rke2" = {
+
"/var/lib/rancher/rke2/agent/images/rke2-images-core.tar.zst" = {
+
"L+".argument = "${coreImages}";
+
};
+
"/var/lib/rancher/rke2/agent/images/rke2-images-canal.tar.zst" = {
+
"L+".argument = "${canalImages}";
+
};
+
"/var/lib/rancher/rke2/agent/images/hello.tar.zst" = {
+
"L+".argument = "${helloImage}";
+
};
+
# Copy the canal config so that rke2 can write the remaining default values to it
+
"/var/lib/rancher/rke2/server/manifests/rke2-canal-config.yaml" = {
+
"C".argument = "${canalConfig}";
+
};
+
};
+
+
# Canal CNI with VXLAN
+
networking.firewall.allowedUDPPorts = [ 8472 ];
+
networking.firewall.allowedTCPPorts = [
+
# Kubernetes API
+
6443
+
# Canal CNI health checks
+
9099
+
# RKE2 supervisor API
+
9345
];
-
virtualisation.memorySize = 1536;
-
virtualisation.diskSize = 4096;
+
# RKE2 needs more resources than the default
+
virtualisation.cores = 4;
+
virtualisation.memorySize = 4096;
+
virtualisation.diskSize = 8092;
services.rke2 = {
enable = true;
role = "server";
+
package = rke2;
inherit tokenFile;
inherit agentTokenFile;
-
nodeName = "${rke2.name}-server1";
-
package = rke2;
-
nodeIP = "192.168.1.1";
+
# Without nodeIP the apiserver starts with the wrong service IP family
+
nodeIP = config.networking.primaryIPAddress;
disable = [
"rke2-coredns"
"rke2-metrics-server"
"rke2-ingress-nginx"
-
];
-
extraFlags = [
-
"--cluster-reset"
+
"rke2-snapshot-controller"
+
"rke2-snapshot-controller-crd"
+
"rke2-snapshot-validation-webhook"
];
};
};
-
server2 =
-
{ pkgs, ... }:
+
agent =
+
{
+
config,
+
nodes,
+
pkgs,
+
...
+
}:
{
-
networking.firewall.enable = false;
-
networking.useDHCP = false;
-
networking.defaultGateway = "192.168.1.2";
-
networking.interfaces.eth1.ipv4.addresses = pkgs.lib.mkForce [
-
{
-
address = "192.168.1.2";
-
prefixLength = 24;
-
}
-
];
-
-
virtualisation.memorySize = 1536;
-
virtualisation.diskSize = 4096;
-
-
services.rke2 = {
-
enable = true;
-
role = "server";
-
serverAddr = "https://192.168.1.1:6443";
-
inherit tokenFile;
-
inherit agentTokenFile;
-
nodeName = "${rke2.name}-server2";
-
package = rke2;
-
nodeIP = "192.168.1.2";
-
disable = [
-
"rke2-coredns"
-
"rke2-metrics-server"
-
"rke2-ingress-nginx"
-
];
+
# Setup image archives to be imported by rke2
+
systemd.tmpfiles.settings."10-rke2" = {
+
"/var/lib/rancher/rke2/agent/images/rke2-images-core.linux-amd64.tar.zst" = {
+
"L+".argument = "${coreImages}";
+
};
+
"/var/lib/rancher/rke2/agent/images/rke2-images-canal.linux-amd64.tar.zst" = {
+
"L+".argument = "${canalImages}";
+
};
+
"/var/lib/rancher/rke2/agent/images/hello.tar.zst" = {
+
"L+".argument = "${helloImage}";
+
};
+
"/var/lib/rancher/rke2/server/manifests/rke2-canal-config.yaml" = {
+
"C".argument = "${canalConfig}";
+
};
};
-
};
-
agent1 =
-
{ pkgs, ... }:
-
{
-
networking.firewall.enable = false;
-
networking.useDHCP = false;
-
networking.defaultGateway = "192.168.1.3";
-
networking.interfaces.eth1.ipv4.addresses = pkgs.lib.mkForce [
-
{
-
address = "192.168.1.3";
-
prefixLength = 24;
-
}
-
];
+
# Canal CNI health checks
+
networking.firewall.allowedTCPPorts = [ 9099 ];
+
# Canal CNI with VXLAN
+
networking.firewall.allowedUDPPorts = [ 8472 ];
-
virtualisation.memorySize = 1536;
-
virtualisation.diskSize = 4096;
+
# The agent node can work with less resources
+
virtualisation.memorySize = 2048;
+
virtualisation.diskSize = 8092;
services.rke2 = {
enable = true;
role = "agent";
-
tokenFile = agentTokenFile;
-
serverAddr = "https://192.168.1.2:6443";
-
nodeName = "${rke2.name}-agent1";
package = rke2;
-
nodeIP = "192.168.1.3";
+
tokenFile = agentTokenFile;
+
serverAddr = "https://${nodes.server.networking.primaryIPAddress}:9345";
+
nodeIP = config.networking.primaryIPAddress;
};
};
};
···
testScript =
let
kubectl = "${pkgs.kubectl}/bin/kubectl --kubeconfig=/etc/rancher/rke2/rke2.yaml";
-
ctr = "${pkgs.containerd}/bin/ctr -a /run/k3s/containerd/containerd.sock";
jq = "${pkgs.jq}/bin/jq";
-
ping = "${pkgs.iputils}/bin/ping";
in
+
# python
''
-
machines = [server1, server2, agent1]
-
-
for machine in machines:
-
machine.start()
-
machine.wait_for_unit("rke2")
+
start_all()
-
# wait for the agent to show up
-
server1.succeed("${kubectl} get node ${rke2.name}-agent1")
+
server.wait_for_unit("rke2-server")
+
agent.wait_for_unit("rke2-agent")
-
for machine in machines:
-
machine.succeed("${pauseImage} | ${ctr} image import -")
+
# Wait for the agent to be ready
+
server.wait_until_succeeds(r"""${kubectl} wait --for='jsonpath={.status.conditions[?(@.type=="Ready")].status}=True' nodes/agent""")
-
server1.succeed("${kubectl} cluster-info")
-
server1.wait_until_succeeds("${kubectl} get serviceaccount default")
+
server.succeed("${kubectl} cluster-info")
+
server.wait_until_succeeds("${kubectl} get serviceaccount default")
# Now create a pod on each node via a daemonset and verify they can talk to each other.
-
server1.succeed("${kubectl} apply -f ${networkTestDaemonset}")
-
server1.wait_until_succeeds(
+
server.succeed("${kubectl} apply -f ${networkTestDaemonset}")
+
server.wait_until_succeeds(
f'[ "$(${kubectl} get ds test -o json | ${jq} .status.numberReady)" -eq {len(machines)} ]'
)
# Get pod IPs
-
pods = server1.succeed("${kubectl} get po -o json | ${jq} '.items[].metadata.name' -r").splitlines()
+
pods = server.succeed("${kubectl} get po -o json | ${jq} '.items[].metadata.name' -r").splitlines()
pod_ips = [
-
server1.succeed(f"${kubectl} get po {n} -o json | ${jq} '.status.podIP' -cr").strip() for n in pods
+
server.succeed(f"${kubectl} get po {n} -o json | ${jq} '.status.podIP' -cr").strip() for n in pods
]
-
# Verify each server can ping each pod ip
+
# Verify each node can ping each pod ip
for pod_ip in pod_ips:
-
server1.succeed(f"${ping} -c 1 {pod_ip}")
-
agent1.succeed(f"${ping} -c 1 {pod_ip}")
-
-
# Verify the pods can talk to each other
-
resp = server1.wait_until_succeeds(f"${kubectl} exec {pods[0]} -- socat TCP:{pod_ips[1]}:8000 -")
-
assert resp.strip() == "server"
-
resp = server1.wait_until_succeeds(f"${kubectl} exec {pods[1]} -- socat TCP:{pod_ips[0]}:8000 -")
-
assert resp.strip() == "server"
-
-
# Cleanup
-
server1.succeed("${kubectl} delete -f ${networkTestDaemonset}")
-
for machine in machines:
-
machine.shutdown()
+
# The CNI sometimes needs a little time
+
server.wait_until_succeeds(f"ping -c 1 {pod_ip}", timeout=5)
+
agent.wait_until_succeeds(f"ping -c 1 {pod_ip}", timeout=5)
+
# Verify the server can exec into the pod
+
# for pod in pods:
+
# resp = server.succeed(f"${kubectl} exec {pod} -- socat TCP:{pod_ip}:8000 -")
+
# assert resp.strip() == "hello", f"Unexpected response from hello daemonset: {resp.strip()}"
'';
}
)
+61 -51
nixos/tests/rke2/single-node.nix
···
...
}:
let
-
pauseImage = pkgs.dockerTools.streamLayeredImage {
-
name = "test.local/pause";
+
throwSystem = throw "RKE2: Unsupported system: ${pkgs.stdenv.hostPlatform.system}";
+
coreImages =
+
{
+
aarch64-linux = rke2.images-core-linux-arm64-tar-zst;
+
x86_64-linux = rke2.images-core-linux-amd64-tar-zst;
+
}
+
.${pkgs.stdenv.hostPlatform.system} or throwSystem;
+
canalImages =
+
{
+
aarch64-linux = rke2.images-canal-linux-arm64-tar-zst;
+
x86_64-linux = rke2.images-canal-linux-amd64-tar-zst;
+
}
+
.${pkgs.stdenv.hostPlatform.system} or throwSystem;
+
helloImage = pkgs.dockerTools.buildImage {
+
name = "test.local/hello";
tag = "local";
-
contents = pkgs.buildEnv {
-
name = "rke2-pause-image-env";
-
paths = with pkgs; [
-
tini
-
(hiPrio coreutils)
-
busybox
-
];
-
};
-
config.Entrypoint = [
-
"/bin/tini"
-
"--"
-
"/bin/sleep"
-
"inf"
-
];
+
compressor = "zstd";
+
copyToRoot = pkgs.hello;
+
config.Entrypoint = [ "${pkgs.hello}/bin/hello" ];
};
-
testPodYaml = pkgs.writeText "test.yaml" ''
-
apiVersion: v1
-
kind: Pod
+
testJobYaml = pkgs.writeText "test.yaml" ''
+
apiVersion: batch/v1
+
kind: Job
metadata:
name: test
spec:
-
containers:
-
- name: test
-
image: test.local/pause:local
-
imagePullPolicy: Never
-
command: ["sh", "-c", "sleep inf"]
+
template:
+
spec:
+
containers:
+
- name: test
+
image: "test.local/hello:local"
+
restartPolicy: Never
'';
in
{
name = "${rke2.name}-single-node";
meta.maintainers = rke2.meta.maintainers;
-
nodes.machine =
-
{ pkgs, ... }:
{
-
networking.firewall.enable = false;
-
networking.useDHCP = false;
-
networking.defaultGateway = "192.168.1.1";
-
networking.interfaces.eth1.ipv4.addresses = pkgs.lib.mkForce [
-
{
-
address = "192.168.1.1";
-
prefixLength = 24;
-
}
-
];
+
config,
+
nodes,
+
pkgs,
+
...
+
}:
+
{
+
# Setup image archives to be imported by rke2
+
systemd.tmpfiles.settings."10-rke2" = {
+
"/var/lib/rancher/rke2/agent/images/rke2-images-core.tar.zst" = {
+
"L+".argument = "${coreImages}";
+
};
+
"/var/lib/rancher/rke2/agent/images/rke2-images-canal.tar.zst" = {
+
"L+".argument = "${canalImages}";
+
};
+
"/var/lib/rancher/rke2/agent/images/hello.tar.zst" = {
+
"L+".argument = "${helloImage}";
+
};
+
};
-
virtualisation.memorySize = 1536;
-
virtualisation.diskSize = 4096;
+
# RKE2 needs more resources than the default
+
virtualisation.cores = 4;
+
virtualisation.memorySize = 4096;
+
virtualisation.diskSize = 8092;
services.rke2 = {
enable = true;
role = "server";
package = rke2;
-
nodeIP = "192.168.1.1";
+
# Without nodeIP the apiserver starts with the wrong service IP family
+
nodeIP = config.networking.primaryIPAddress;
+
# Slightly reduce resource consumption
disable = [
"rke2-coredns"
"rke2-metrics-server"
"rke2-ingress-nginx"
-
];
-
extraFlags = [
-
"--cluster-reset"
+
"rke2-snapshot-controller"
+
"rke2-snapshot-controller-crd"
+
"rke2-snapshot-validation-webhook"
];
};
};
···
testScript =
let
kubectl = "${pkgs.kubectl}/bin/kubectl --kubeconfig=/etc/rancher/rke2/rke2.yaml";
-
ctr = "${pkgs.containerd}/bin/ctr -a /run/k3s/containerd/containerd.sock";
in
+
# python
''
start_all()
-
machine.wait_for_unit("rke2")
+
machine.wait_for_unit("rke2-server")
machine.succeed("${kubectl} cluster-info")
-
machine.wait_until_succeeds(
-
"${pauseImage} | ${ctr} -n k8s.io image import -"
-
)
machine.wait_until_succeeds("${kubectl} get serviceaccount default")
-
machine.succeed("${kubectl} apply -f ${testPodYaml}")
-
machine.succeed("${kubectl} wait --for 'condition=Ready' pod/test")
-
machine.succeed("${kubectl} delete -f ${testPodYaml}")
-
-
machine.shutdown()
+
machine.succeed("${kubectl} apply -f ${testJobYaml}")
+
machine.wait_until_succeeds("${kubectl} wait --for 'condition=complete' job/test")
+
output = machine.succeed("${kubectl} logs -l batch.kubernetes.io/job-name=test")
+
assert output.rstrip() == "Hello, world!", f"unexpected output of test job: {output}"
'';
}
)
+9 -3
pkgs/applications/networking/cluster/rke2/builder.nix
···
passthru = {
inherit updateScript;
tests =
+
let
+
moduleTests =
+
let
+
package_version =
+
"rke2_" + lib.replaceStrings [ "." ] [ "_" ] (lib.versions.majorMinor rke2Version);
+
in
+
lib.mapAttrs (name: value: nixosTests.rke2.${name}.${package_version}) nixosTests.rke2;
+
in
{
version = testers.testVersion {
package = rke2;
version = "v${version}";
};
}
-
// lib.optionalAttrs stdenv.hostPlatform.isLinux {
-
inherit (nixosTests) rke2;
-
};
+
// moduleTests;
} // (lib.mapAttrs (_: value: fetchurl value) imagesVersions);
meta = with lib; {