test-driver: Implement debugging breakpoint hooks

Co-authored-by: Maximilian Bosch <maximilian@mbosch.me>

Changed files
+167 -7
nixos
+51
nixos/doc/manual/development/writing-nixos-tests.section.md
···
list-id: test-options-list
source: @NIXOS_TEST_OPTIONS_JSON@
```
+
+
## Accessing VMs in the sandbox with SSH {#sec-test-sandbox-breakpoint}
+
+
As explained in [](#sec-nixos-test-ssh-access), it's possible to configure an
+
SSH backdoor based on AF_VSOCK. This can be used to SSH into a VM of a running
+
build in a sandbox.
+
+
This can be done when something in the test fails, e.g.
+
+
```nix
+
{
+
nodes.machine = {};
+
+
sshBackdoor.enable = true;
+
enableDebugHook = true;
+
+
testScript = ''
+
start_all()
+
machine.succeed("false") # this will fail
+
'';
+
}
+
```
+
+
For the AF_VSOCK feature to work, `/dev/vhost-vsock` is needed in the sandbox
+
which can be done with e.g.
+
+
```
+
nix-build -A nixosTests.foo --option sandbox-paths /dev/vhost-vsock
+
```
+
+
This will halt the test execution on a test-failure and print instructions
+
on how to enter the sandbox shell of the VM test. Inside, one can log into
+
e.g. `machine` with
+
+
```
+
ssh -F ./ssh_config vsock/3
+
```
+
+
As described in [](#sec-nixos-test-ssh-access), the numbers for vsock start at
+
`3` instead of `1`. So the first VM in the network (sorted alphabetically) can
+
be accessed with `vsock/3`.
+
+
Alternatively, it's possible to explicitly set a breakpoint with
+
`debug.breakpoint()`. This also has the benefit, that one can step through
+
`testScript` with `pdb` like this:
+
+
```
+
$ sudo /nix/store/eeeee-attach <id>
+
bash# telnet 127.0.0.1 4444
+
pdb$ …
+
```
+6
nixos/doc/manual/redirects.json
···
"test-opt-sshBackdoor.vsockOffset": [
"index.html#test-opt-sshBackdoor.vsockOffset"
],
+
"test-opt-enableDebugHook": [
+
"index.html#test-opt-enableDebugHook"
+
],
"test-opt-defaults": [
"index.html#test-opt-defaults"
],
···
],
"sec-nixos-test-testing-hardware-features": [
"index.html#sec-nixos-test-testing-hardware-features"
+
],
+
"sec-test-sandbox-breakpoint": [
+
"index.html#sec-test-sandbox-breakpoint"
],
"chap-developing-the-test-driver": [
"index.html#chap-developing-the-test-driver"
+2
nixos/lib/test-driver/default.nix
···
extraPythonPackages ? (_: [ ]),
nixosTests,
}:
+
python3Packages.buildPythonApplication {
pname = "nixos-test-driver";
version = "1.1";
···
junit-xml
ptpython
ipython
+
remote-pdb
]
++ extraPythonPackages python3Packages;
+10
nixos/lib/test-driver/src/test_driver/__init__.py
···
import ptpython.ipython
+
from test_driver.debug import Debug, DebugAbstract, DebugNop
from test_driver.driver import Driver
from test_driver.logger import (
CompositeLogger,
···
"--interactive",
help="drop into a python repl and run the tests interactively",
action=argparse.BooleanOptionalAction,
+
)
+
arg_parser.add_argument(
+
"--debug-hook-attach",
+
help="Enable interactive debugging breakpoints for sandboxed runs",
)
arg_parser.add_argument(
"--start-scripts",
···
if not args.keep_vm_state:
logger.info("Machine state will be reset. To keep it, pass --keep-vm-state")
+
debugger: DebugAbstract = DebugNop()
+
if args.debug_hook_attach is not None:
+
debugger = Debug(logger, args.debug_hook_attach)
+
with Driver(
args.start_scripts,
args.vlans,
···
logger,
args.keep_vm_state,
args.global_timeout,
+
debug=debugger,
) as driver:
if args.interactive:
history_dir = os.getcwd()
+53
nixos/lib/test-driver/src/test_driver/debug.py
···
+
import logging
+
import os
+
import random
+
import shutil
+
import subprocess
+
import sys
+
from abc import ABC, abstractmethod
+
+
from remote_pdb import RemotePdb # type:ignore
+
+
from test_driver.logger import AbstractLogger
+
+
+
class DebugAbstract(ABC):
+
@abstractmethod
+
def breakpoint(self, host: str = "127.0.0.1", port: int = 4444) -> None:
+
pass
+
+
+
class DebugNop(DebugAbstract):
+
def __init__(self) -> None:
+
pass
+
+
def breakpoint(self, host: str = "127.0.0.1", port: int = 4444) -> None:
+
pass
+
+
+
class Debug(DebugAbstract):
+
def __init__(self, logger: AbstractLogger, attach_command: str) -> None:
+
self.breakpoint_on_failure = False
+
self.logger = logger
+
self.attach = attach_command
+
+
def breakpoint(self, host: str = "127.0.0.1", port: int = 4444) -> None:
+
"""
+
Call this function to stop execution and put the process on sleep while
+
at the same time have the test driver provide a debug shell on TCP port
+
`port`. This is meant to be used for sandboxed tests that have the test
+
driver feature `enableDebugHook` enabled.
+
"""
+
pattern = str(random.randrange(999999, 9999999))
+
self.logger.log_test_error(
+
f"Breakpoint reached, run 'sudo {self.attach} {pattern}'"
+
)
+
os.environ["bashInteractive"] = shutil.which("bash") # type:ignore
+
if os.fork() == 0:
+
subprocess.run(["sleep", pattern])
+
else:
+
# RemotePdb writes log messages to both stderr AND the logger,
+
# which is the same here. Hence, disabling the remote_pdb logger
+
# to avoid duplicate messages in the build log.
+
logging.root.manager.loggerDict["remote_pdb"].disabled = True # type:ignore
+
RemotePdb(host=host, port=port).set_trace(sys._getframe().f_back)
+11
nixos/lib/test-driver/src/test_driver/driver.py
···
from colorama import Style
+
from test_driver.debug import DebugAbstract, DebugNop
from test_driver.errors import MachineError, RequestedAssertionFailed
from test_driver.logger import AbstractLogger
from test_driver.machine import Machine, NixStartScript, retry
···
global_timeout: int
race_timer: threading.Timer
logger: AbstractLogger
+
debug: DebugAbstract
def __init__(
self,
···
logger: AbstractLogger,
keep_vm_state: bool = False,
global_timeout: int = 24 * 60 * 60 * 7,
+
debug: DebugAbstract = DebugNop(),
):
self.tests = tests
self.out_dir = out_dir
self.global_timeout = global_timeout
self.race_timer = threading.Timer(global_timeout, self.terminate_test)
self.logger = logger
+
self.debug = debug
tmp_dir = get_tmp_dir()
···
polling_condition=self.polling_condition,
Machine=Machine, # for typing
t=AssertionTester(),
+
debug=self.debug,
)
machine_symbols = {pythonize_name(m.name): m for m in self.machines}
# If there's exactly one machine, make it available under the name
···
for line in f"{exc_prefix}: {exc}".splitlines():
self.logger.log_test_error(line)
+
self.debug.breakpoint()
+
sys.exit(1)
+
+
except Exception:
+
self.debug.breakpoint()
+
raise
def run_tests(self) -> None:
"""Run the test script (for non-interactive test runs)"""
+2
nixos/lib/test-script-prepend.py
···
# This file contains type hints that can be prepended to Nix test scripts so they can be type
# checked.
+
from test_driver.debug import DebugAbstract
from test_driver.driver import Driver
from test_driver.vlan import VLan
from test_driver.machine import Machine
···
serial_stdout_off: Callable[[], None]
serial_stdout_on: Callable[[], None]
polling_condition: PollingConditionProtocol
+
debug: DebugAbstract
t: TestCase
+32 -7
nixos/lib/testing/run.nix
···
}:
let
inherit (lib) types mkOption;
+
inherit (hostPkgs.stdenv.hostPlatform) isDarwin isLinux;
# TODO (lib): Also use lib equivalent in nodes.nix
/**
···
*/
f:
lib.mkOverride (opt.highestPrio - 1) (f opt.value);
-
in
{
options = {
···
'';
};
+
enableDebugHook = lib.mkEnableOption "" // {
+
description = ''
+
Halt test execution after any test fail and provide the possibility to
+
hook into the sandbox to connect with either the test driver via
+
`telnet localhost 4444` or with the VMs via SSH and vsocks (see also
+
`sshBackdoor.enable`).
+
'';
+
};
+
rawTestDerivation = mkOption {
type = types.package;
description = ''
···
rawTestDerivation = hostPkgs.stdenv.mkDerivation config.rawTestDerivationArg;
rawTestDerivationArg =
finalAttrs:
-
assert lib.assertMsg (!config.sshBackdoor.enable)
-
"The SSH backdoor is currently not supported for non-interactive testing! Please make sure to only set `interactive.sshBackdoor.enable = true;`!";
+
assert lib.assertMsg (
+
config.sshBackdoor.enable -> isLinux
+
) "The SSH backdoor is not supported for macOS host systems!";
+
+
assert lib.assertMsg (
+
config.enableDebugHook -> isLinux
+
) "The debugging hook is not supported for macOS host systems!";
{
name = "vm-test-run-${config.name}";
requiredSystemFeatures =
-
[ "nixos-test" ]
-
++ lib.optionals hostPkgs.stdenv.hostPlatform.isLinux [ "kvm" ]
-
++ lib.optionals hostPkgs.stdenv.hostPlatform.isDarwin [ "apple-virt" ];
+
[ "nixos-test" ] ++ lib.optional isLinux "kvm" ++ lib.optional isDarwin "apple-virt";
+
+
nativeBuildInputs = lib.optionals config.enableDebugHook [
+
hostPkgs.openssh
+
hostPkgs.inetutils
+
];
buildCommand = ''
mkdir -p $out
···
# effectively mute the XMLLogger
export LOGFILE=/dev/null
-
${config.driver}/bin/nixos-test-driver -o $out
+
${lib.optionalString config.enableDebugHook ''
+
ln -sf \
+
${hostPkgs.systemd}/lib/systemd/ssh_config.d/20-systemd-ssh-proxy.conf \
+
ssh_config
+
''}
+
+
${config.driver}/bin/nixos-test-driver \
+
-o $out \
+
${lib.optionalString config.enableDebugHook "--debug-hook=${hostPkgs.breakpointHook.attach}"}
'';
passthru = config.passthru;