nixos/kubernetes: docker -> containerd

also, nixos/containerd: module init

Changed files
+129 -185
nixos
doc
manual
release-notes
modules
services
virtualisation
tests
kubernetes
pkgs
applications
networking
cluster
+9
nixos/doc/manual/release-notes/rl-2105.xml
···
and use Maturin as their build tool.
</para>
</listitem>
</itemizedlist>
</section>
</section>
···
and use Maturin as their build tool.
</para>
</listitem>
+
<listitem>
+
<para>
+
Kubernetes has <link xlink:href="https://kubernetes.io/blog/2020/12/02/dont-panic-kubernetes-and-docker/">deprecated docker</link> as container runtime.
+
As a consequence, the Kubernetes module now has support for configuration of custom remote container runtimes and enables containerd by default.
+
Note that containerd is more strict regarding container image OCI-compliance.
+
As an example, images with CMD or ENTRYPOINT defined as strings (not lists) will fail on containerd, while working fine on docker.
+
Please test your setup and container images with containerd prior to upgrading.
+
</para>
+
</listitem>
</itemizedlist>
</section>
</section>
+1
nixos/modules/module-list.nix
···
./testing/service-runner.nix
./virtualisation/anbox.nix
./virtualisation/container-config.nix
./virtualisation/containers.nix
./virtualisation/nixos-containers.nix
./virtualisation/oci-containers.nix
···
./testing/service-runner.nix
./virtualisation/anbox.nix
./virtualisation/container-config.nix
+
./virtualisation/containerd.nix
./virtualisation/containers.nix
./virtualisation/nixos-containers.nix
./virtualisation/oci-containers.nix
-2
nixos/modules/services/cluster/kubernetes/apiserver.nix
···
account token issuer. The issuer will sign issued ID tokens with this
private key.
'';
-
default = top.serviceAccountSigningKeyFile;
type = path;
};
···
different files. If unspecified, --tls-private-key-file is used.
Must be specified when --service-account-signing-key is provided
'';
-
default = top.serviceAccountKeyFile;
type = path;
};
···
account token issuer. The issuer will sign issued ID tokens with this
private key.
'';
type = path;
};
···
different files. If unspecified, --tls-private-key-file is used.
Must be specified when --service-account-signing-key is provided
'';
type = path;
};
+25 -8
nixos/modules/services/cluster/kubernetes/default.nix
···
let
cfg = config.services.kubernetes;
mkKubeConfig = name: conf: pkgs.writeText "${name}-kubeconfig" (builtins.toJSON {
apiVersion = "v1";
kind = "Config";
···
})
(mkIf cfg.kubelet.enable {
-
virtualisation.docker = {
enable = mkDefault true;
-
-
# kubernetes needs access to logs
-
logDriver = mkDefault "json-file";
-
-
# iptables must be disabled for kubernetes
-
extraOptions = "--iptables=false --ip-masq=false";
};
})
···
users.users.kubernetes = {
uid = config.ids.uids.kubernetes;
description = "Kubernetes user";
-
extraGroups = [ "docker" ];
group = "kubernetes";
home = cfg.dataDir;
createHome = true;
···
let
cfg = config.services.kubernetes;
+
defaultContainerdConfigFile = pkgs.writeText "containerd.toml" ''
+
version = 2
+
root = "/var/lib/containerd/daemon"
+
state = "/var/run/containerd/daemon"
+
oom_score = 0
+
+
[grpc]
+
address = "/var/run/containerd/containerd.sock"
+
+
[plugins."io.containerd.grpc.v1.cri"]
+
sandbox_image = "pause:latest"
+
+
[plugins."io.containerd.grpc.v1.cri".cni]
+
bin_dir = "/opt/cni/bin"
+
max_conf_num = 0
+
+
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
+
runtime_type = "io.containerd.runc.v2"
+
+
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes."io.containerd.runc.v2".options]
+
SystemdCgroup = true
+
'';
+
mkKubeConfig = name: conf: pkgs.writeText "${name}-kubeconfig" (builtins.toJSON {
apiVersion = "v1";
kind = "Config";
···
})
(mkIf cfg.kubelet.enable {
+
virtualisation.containerd = {
enable = mkDefault true;
+
configFile = mkDefault defaultContainerdConfigFile;
};
})
···
users.users.kubernetes = {
uid = config.ids.uids.kubernetes;
description = "Kubernetes user";
group = "kubernetes";
home = cfg.dataDir;
createHome = true;
+2 -38
nixos/modules/services/cluster/kubernetes/flannel.nix
···
# we want flannel to use kubernetes itself as configuration backend, not direct etcd
storageBackend = "kubernetes";
-
-
# needed for flannel to pass options to docker
-
mkDockerOpts = pkgs.runCommand "mk-docker-opts" {
-
buildInputs = [ pkgs.makeWrapper ];
-
} ''
-
mkdir -p $out
-
-
# bashInteractive needed for `compgen`
-
makeWrapper ${pkgs.bashInteractive}/bin/bash $out/mk-docker-opts --add-flags "${pkgs.kubernetes}/bin/mk-docker-opts.sh"
-
'';
in
{
###### interface
···
cniVersion = "0.3.1";
delegate = {
isDefaultGateway = true;
-
bridge = "docker0";
};
}];
};
-
systemd.services.mk-docker-opts = {
-
description = "Pre-Docker Actions";
-
path = with pkgs; [ gawk gnugrep ];
-
script = ''
-
${mkDockerOpts}/mk-docker-opts -d /run/flannel/docker
-
systemctl restart docker
-
'';
-
serviceConfig.Type = "oneshot";
-
};
-
-
systemd.paths.flannel-subnet-env = {
-
wantedBy = [ "flannel.service" ];
-
pathConfig = {
-
PathModified = "/run/flannel/subnet.env";
-
Unit = "mk-docker-opts.service";
-
};
-
};
-
-
systemd.services.docker = {
-
environment.DOCKER_OPTS = "-b none";
-
serviceConfig.EnvironmentFile = "-/run/flannel/docker";
-
};
-
-
# read environment variables generated by mk-docker-opts
-
virtualisation.docker.extraOptions = "$DOCKER_OPTS";
-
networking = {
firewall.allowedUDPPorts = [
8285 # flannel udp
8472 # flannel vxlan
];
-
dhcpcd.denyInterfaces = [ "docker*" "flannel*" ];
};
services.kubernetes.pki.certs = {
···
# we want flannel to use kubernetes itself as configuration backend, not direct etcd
storageBackend = "kubernetes";
in
{
###### interface
···
cniVersion = "0.3.1";
delegate = {
isDefaultGateway = true;
+
bridge = "mynet";
};
}];
};
networking = {
firewall.allowedUDPPorts = [
8285 # flannel udp
8472 # flannel vxlan
];
+
dhcpcd.denyInterfaces = [ "mynet*" "flannel*" ];
};
services.kubernetes.pki.certs = {
+20 -7
nixos/modules/services/cluster/kubernetes/kubelet.nix
···
name = "pause";
tag = "latest";
contents = top.package.pause;
-
config.Cmd = "/bin/pause";
};
kubeconfig = top.lib.mkKubeConfig "kubelet" cfg.kubeconfig;
···
containerRuntimeEndpoint = mkOption {
description = "Endpoint at which to find the container runtime api interface/socket";
type = str;
-
default = "unix:///var/run/docker/containerd/containerd.sock";
};
enable = mkEnableOption "Kubernetes kubelet.";
···
###### implementation
config = mkMerge [
(mkIf cfg.enable {
services.kubernetes.kubelet.seedDockerImages = [infraContainer];
systemd.services.kubelet = {
description = "Kubernetes Kubelet Service";
wantedBy = [ "kubernetes.target" ];
-
after = [ "network.target" "kube-apiserver.service" "sockets.target" ];
path = with pkgs; [
gitMinimal
openssh
-
docker
util-linux
iproute
ethtool
···
] ++ lib.optional config.boot.zfs.enabled config.boot.zfs.package ++ top.path;
preStart = ''
${concatMapStrings (img: ''
-
echo "Seeding docker image: ${img}"
-
docker load <${img}
'') cfg.seedDockerImages}
rm /opt/cni/bin/* || true
···
${optionalString (cfg.verbosity != null) "--v=${toString cfg.verbosity}"} \
--container-runtime=${cfg.containerRuntime} \
--container-runtime-endpoint=${cfg.containerRuntimeEndpoint} \
${cfg.extraOpts}
'';
WorkingDirectory = top.dataDir;
···
# Allways include cni plugins
services.kubernetes.kubelet.cni.packages = [pkgs.cni-plugins];
-
boot.kernelModules = ["br_netfilter"];
services.kubernetes.kubelet.hostname = with config.networking;
mkDefault (hostName + optionalString (domain != null) ".${domain}");
···
name = "pause";
tag = "latest";
contents = top.package.pause;
+
config.Cmd = ["/bin/pause"];
};
kubeconfig = top.lib.mkKubeConfig "kubelet" cfg.kubeconfig;
···
containerRuntimeEndpoint = mkOption {
description = "Endpoint at which to find the container runtime api interface/socket";
type = str;
+
default = "unix:///var/run/containerd/containerd.sock";
};
enable = mkEnableOption "Kubernetes kubelet.";
···
###### implementation
config = mkMerge [
(mkIf cfg.enable {
+
+
environment.etc."cni/net.d".source = cniConfig;
+
services.kubernetes.kubelet.seedDockerImages = [infraContainer];
+
boot.kernel.sysctl = {
+
"net.bridge.bridge-nf-call-iptables" = 1;
+
"net.ipv4.ip_forward" = 1;
+
"net.bridge.bridge-nf-call-ip6tables" = 1;
+
};
+
systemd.services.kubelet = {
description = "Kubernetes Kubelet Service";
wantedBy = [ "kubernetes.target" ];
+
after = [ "containerd.service" "network.target" "kube-apiserver.service" ];
path = with pkgs; [
gitMinimal
openssh
util-linux
iproute
ethtool
···
] ++ lib.optional config.boot.zfs.enabled config.boot.zfs.package ++ top.path;
preStart = ''
${concatMapStrings (img: ''
+
echo "Seeding container image: ${img}"
+
${if (lib.hasSuffix "gz" img) then
+
''${pkgs.gzip}/bin/zcat "${img}" | ${pkgs.containerd}/bin/ctr -n k8s.io image import -''
+
else
+
''${pkgs.coreutils}/bin/cat "${img}" | ${pkgs.containerd}/bin/ctr -n k8s.io image import -''
+
}
'') cfg.seedDockerImages}
rm /opt/cni/bin/* || true
···
${optionalString (cfg.verbosity != null) "--v=${toString cfg.verbosity}"} \
--container-runtime=${cfg.containerRuntime} \
--container-runtime-endpoint=${cfg.containerRuntimeEndpoint} \
+
--cgroup-driver=systemd \
${cfg.extraOpts}
'';
WorkingDirectory = top.dataDir;
···
# Allways include cni plugins
services.kubernetes.kubelet.cni.packages = [pkgs.cni-plugins];
+
boot.kernelModules = ["br_netfilter" "overlay"];
services.kubernetes.kubelet.hostname = with config.networking;
mkDefault (hostName + optionalString (domain != null) ".${domain}");
+2 -4
nixos/modules/services/networking/flannel.nix
···
NODE_NAME = cfg.nodeName;
};
path = [ pkgs.iptables ];
-
preStart = ''
-
mkdir -p /run/flannel
-
touch /run/flannel/docker
-
'' + optionalString (cfg.storageBackend == "etcd") ''
echo "setting network configuration"
until ${pkgs.etcdctl}/bin/etcdctl set /coreos.com/network/config '${builtins.toJSON networkConfig}'
do
···
ExecStart = "${cfg.package}/bin/flannel";
Restart = "always";
RestartSec = "10s";
};
};
···
NODE_NAME = cfg.nodeName;
};
path = [ pkgs.iptables ];
+
preStart = optionalString (cfg.storageBackend == "etcd") ''
echo "setting network configuration"
until ${pkgs.etcdctl}/bin/etcdctl set /coreos.com/network/config '${builtins.toJSON networkConfig}'
do
···
ExecStart = "${cfg.package}/bin/flannel";
Restart = "always";
RestartSec = "10s";
+
RuntimeDirectory = "flannel";
};
};
+60
nixos/modules/virtualisation/containerd.nix
···
···
+
{ pkgs, lib, config, ... }:
+
let
+
cfg = config.virtualisation.containerd;
+
containerdConfigChecked = pkgs.runCommand "containerd-config-checked.toml" { nativeBuildInputs = [pkgs.containerd]; } ''
+
containerd -c ${cfg.configFile} config dump >/dev/null
+
ln -s ${cfg.configFile} $out
+
'';
+
in
+
{
+
+
options.virtualisation.containerd = with lib.types; {
+
enable = lib.mkEnableOption "containerd container runtime";
+
+
configFile = lib.mkOption {
+
default = null;
+
description = "path to containerd config file";
+
type = nullOr path;
+
};
+
+
args = lib.mkOption {
+
default = {};
+
description = "extra args to append to the containerd cmdline";
+
type = attrsOf str;
+
};
+
};
+
+
config = lib.mkIf cfg.enable {
+
virtualisation.containerd.args.config = lib.mkIf (cfg.configFile != null) (toString containerdConfigChecked);
+
+
environment.systemPackages = [pkgs.containerd];
+
+
systemd.services.containerd = {
+
description = "containerd - container runtime";
+
wantedBy = [ "multi-user.target" ];
+
after = [ "network.target" ];
+
path = with pkgs; [
+
containerd
+
runc
+
iptables
+
];
+
serviceConfig = {
+
ExecStart = ''${pkgs.containerd}/bin/containerd ${lib.concatStringsSep " " (lib.cli.toGNUCommandLine {} cfg.args)}'';
+
Delegate = "yes";
+
KillMode = "process";
+
Type = "notify";
+
Restart = "always";
+
RestartSec = "5";
+
StartLimitBurst = "8";
+
StartLimitIntervalSec = "120s";
+
+
# "limits" defined below are adopted from upstream: https://github.com/containerd/containerd/blob/master/containerd.service
+
LimitNPROC = "infinity";
+
LimitCORE = "infinity";
+
LimitNOFILE = "infinity";
+
TasksMax = "infinity";
+
OOMScoreAdjust = "-999";
+
};
+
};
+
};
+
}
+7 -8
nixos/tests/kubernetes/dns.nix
···
name = "redis";
tag = "latest";
contents = [ pkgs.redis pkgs.bind.host ];
-
config.Entrypoint = "/bin/redis-server";
};
probePod = pkgs.writeText "probe-pod.json" (builtins.toJSON {
···
name = "probe";
tag = "latest";
contents = [ pkgs.bind.host pkgs.busybox ];
-
config.Entrypoint = "/bin/tail";
};
-
extraConfiguration = { config, pkgs, ... }: {
environment.systemPackages = [ pkgs.bind.host ];
-
# virtualisation.docker.extraOptions = "--dns=${config.services.kubernetes.addons.dns.clusterIp}";
services.dnsmasq.enable = true;
services.dnsmasq.servers = [
"/cluster.local/${config.services.kubernetes.addons.dns.clusterIp}#53"
···
# prepare machine1 for test
machine1.wait_until_succeeds("kubectl get node machine1.${domain} | grep -w Ready")
machine1.wait_until_succeeds(
-
"docker load < ${redisImage}"
)
machine1.wait_until_succeeds(
"kubectl create -f ${redisPod}"
···
"kubectl create -f ${redisService}"
)
machine1.wait_until_succeeds(
-
"docker load < ${probeImage}"
)
machine1.wait_until_succeeds(
"kubectl create -f ${probePod}"
···
# prepare machines for test
machine1.wait_until_succeeds("kubectl get node machine2.${domain} | grep -w Ready")
machine2.wait_until_succeeds(
-
"docker load < ${redisImage}"
)
machine1.wait_until_succeeds(
"kubectl create -f ${redisPod}"
···
"kubectl create -f ${redisService}"
)
machine2.wait_until_succeeds(
-
"docker load < ${probeImage}"
)
machine1.wait_until_succeeds(
"kubectl create -f ${probePod}"
···
name = "redis";
tag = "latest";
contents = [ pkgs.redis pkgs.bind.host ];
+
config.Entrypoint = ["/bin/redis-server"];
};
probePod = pkgs.writeText "probe-pod.json" (builtins.toJSON {
···
name = "probe";
tag = "latest";
contents = [ pkgs.bind.host pkgs.busybox ];
+
config.Entrypoint = ["/bin/tail"];
};
+
extraConfiguration = { config, pkgs, lib, ... }: {
environment.systemPackages = [ pkgs.bind.host ];
services.dnsmasq.enable = true;
services.dnsmasq.servers = [
"/cluster.local/${config.services.kubernetes.addons.dns.clusterIp}#53"
···
# prepare machine1 for test
machine1.wait_until_succeeds("kubectl get node machine1.${domain} | grep -w Ready")
machine1.wait_until_succeeds(
+
"${pkgs.gzip}/bin/zcat ${redisImage} | ${pkgs.containerd}/bin/ctr -n k8s.io image import -"
)
machine1.wait_until_succeeds(
"kubectl create -f ${redisPod}"
···
"kubectl create -f ${redisService}"
)
machine1.wait_until_succeeds(
+
"${pkgs.gzip}/bin/zcat ${probeImage} | ${pkgs.containerd}/bin/ctr -n k8s.io image import -"
)
machine1.wait_until_succeeds(
"kubectl create -f ${probePod}"
···
# prepare machines for test
machine1.wait_until_succeeds("kubectl get node machine2.${domain} | grep -w Ready")
machine2.wait_until_succeeds(
+
"${pkgs.gzip}/bin/zcat ${redisImage} | ${pkgs.containerd}/bin/ctr -n k8s.io image import -"
)
machine1.wait_until_succeeds(
"kubectl create -f ${redisPod}"
···
"kubectl create -f ${redisService}"
)
machine2.wait_until_succeeds(
+
"${pkgs.gzip}/bin/zcat ${probeImage} | ${pkgs.containerd}/bin/ctr -n k8s.io image import -"
)
machine1.wait_until_succeeds(
"kubectl create -f ${probePod}"
+3 -3
nixos/tests/kubernetes/rbac.nix
···
name = "kubectl";
tag = "latest";
contents = [ kubectl pkgs.busybox kubectlPod2 ];
-
config.Entrypoint = "/bin/sh";
};
base = {
···
machine1.wait_until_succeeds("kubectl get node machine1.my.zyx | grep -w Ready")
machine1.wait_until_succeeds(
-
"docker load < ${kubectlImage}"
)
machine1.wait_until_succeeds(
···
machine1.wait_until_succeeds("kubectl get node machine2.my.zyx | grep -w Ready")
machine2.wait_until_succeeds(
-
"docker load < ${kubectlImage}"
)
machine1.wait_until_succeeds(
···
name = "kubectl";
tag = "latest";
contents = [ kubectl pkgs.busybox kubectlPod2 ];
+
config.Entrypoint = ["/bin/sh"];
};
base = {
···
machine1.wait_until_succeeds("kubectl get node machine1.my.zyx | grep -w Ready")
machine1.wait_until_succeeds(
+
"${pkgs.gzip}/bin/zcat ${kubectlImage} | ${pkgs.containerd}/bin/ctr -n k8s.io image import -"
)
machine1.wait_until_succeeds(
···
machine1.wait_until_succeeds("kubectl get node machine2.my.zyx | grep -w Ready")
machine2.wait_until_succeeds(
+
"${pkgs.gzip}/bin/zcat ${kubectlImage} | ${pkgs.containerd}/bin/ctr -n k8s.io image import -"
)
machine1.wait_until_succeeds(
-2
pkgs/applications/networking/cluster/kubernetes/default.nix
···
cp cluster/addons/addon-manager/kube-addons.sh $out/bin/kube-addons-lib.sh
-
cp ${./mk-docker-opts.sh} $out/bin/mk-docker-opts.sh
-
for tool in kubeadm kubectl; do
installShellCompletion --cmd $tool \
--bash <($out/bin/$tool completion bash) \
···
cp cluster/addons/addon-manager/kube-addons.sh $out/bin/kube-addons-lib.sh
for tool in kubeadm kubectl; do
installShellCompletion --cmd $tool \
--bash <($out/bin/$tool completion bash) \
-113
pkgs/applications/networking/cluster/kubernetes/mk-docker-opts.sh
···
-
#!/usr/bin/env bash
-
-
# Copyright 2014 The Kubernetes Authors.
-
#
-
# Licensed under the Apache License, Version 2.0 (the "License");
-
# you may not use this file except in compliance with the License.
-
# You may obtain a copy of the License at
-
#
-
# http://www.apache.org/licenses/LICENSE-2.0
-
#
-
# Unless required by applicable law or agreed to in writing, software
-
# distributed under the License is distributed on an "AS IS" BASIS,
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-
# See the License for the specific language governing permissions and
-
# limitations under the License.
-
-
# Generate Docker daemon options based on flannel env file.
-
-
# exit on any error
-
set -e
-
-
usage() {
-
echo "$0 [-f FLANNEL-ENV-FILE] [-d DOCKER-ENV-FILE] [-i] [-c] [-m] [-k COMBINED-KEY]
-
-
Generate Docker daemon options based on flannel env file
-
OPTIONS:
-
-f Path to flannel env file. Defaults to /run/flannel/subnet.env
-
-d Path to Docker env file to write to. Defaults to /run/docker_opts.env
-
-i Output each Docker option as individual var. e.g. DOCKER_OPT_MTU=1500
-
-c Output combined Docker options into DOCKER_OPTS var
-
-k Set the combined options key to this value (default DOCKER_OPTS=)
-
-m Do not output --ip-masq (useful for older Docker version)
-
" >/dev/stderr
-
exit 1
-
}
-
-
flannel_env="/run/flannel/subnet.env"
-
docker_env="/run/docker_opts.env"
-
combined_opts_key="DOCKER_OPTS"
-
indiv_opts=false
-
combined_opts=false
-
ipmasq=true
-
val=""
-
-
while getopts "f:d:icmk:" opt; do
-
case $opt in
-
f)
-
flannel_env=$OPTARG
-
;;
-
d)
-
docker_env=$OPTARG
-
;;
-
i)
-
indiv_opts=true
-
;;
-
c)
-
combined_opts=true
-
;;
-
m)
-
ipmasq=false
-
;;
-
k)
-
combined_opts_key=$OPTARG
-
;;
-
\?)
-
usage
-
;;
-
esac
-
done
-
-
if [[ $indiv_opts = false ]] && [[ $combined_opts = false ]]; then
-
indiv_opts=true
-
combined_opts=true
-
fi
-
-
if [[ -f "${flannel_env}" ]]; then
-
source "${flannel_env}"
-
fi
-
-
if [[ -n "$FLANNEL_SUBNET" ]]; then
-
# shellcheck disable=SC2034 # Variable name referenced in OPT_LOOP below
-
DOCKER_OPT_BIP="--bip=$FLANNEL_SUBNET"
-
fi
-
-
if [[ -n "$FLANNEL_MTU" ]]; then
-
# shellcheck disable=SC2034 # Variable name referenced in OPT_LOOP below
-
DOCKER_OPT_MTU="--mtu=$FLANNEL_MTU"
-
fi
-
-
if [[ "$FLANNEL_IPMASQ" = true ]] && [[ $ipmasq = true ]]; then
-
# shellcheck disable=SC2034 # Variable name referenced in OPT_LOOP below
-
DOCKER_OPT_IPMASQ="--ip-masq=false"
-
fi
-
-
eval docker_opts="\$${combined_opts_key}"
-
docker_opts+=" "
-
-
echo -n "" >"${docker_env}"
-
-
# OPT_LOOP
-
for opt in $(compgen -v DOCKER_OPT_); do
-
eval val=\$"${opt}"
-
-
if [[ "$indiv_opts" = true ]]; then
-
echo "$opt=\"$val\"" >>"${docker_env}"
-
fi
-
-
docker_opts+="$val "
-
done
-
-
if [[ "$combined_opts" = true ]]; then
-
echo "${combined_opts_key}=\"${docker_opts}\"" >>"${docker_env}"
-
fi
···