Merge pull request #262839 from RaitoBezarius/qemu-vm/timeout

Changed files
+99 -13
nixos
lib
tests
nixos-test-driver
+5
nixos/lib/test-driver/default.nix
···
, tesseract4
, vde2
, extraPythonPackages ? (_ : [])
}:
python3Packages.buildPythonApplication {
···
]
++ (lib.optionals enableOCR [ imagemagick_light tesseract4 ])
++ extraPythonPackages python3Packages;
doCheck = true;
nativeCheckInputs = with python3Packages; [ mypy ruff black ];
···
, tesseract4
, vde2
, extraPythonPackages ? (_ : [])
+
, nixosTests
}:
python3Packages.buildPythonApplication {
···
]
++ (lib.optionals enableOCR [ imagemagick_light tesseract4 ])
++ extraPythonPackages python3Packages;
+
+
passthru.tests = {
+
inherit (nixosTests.nixos-test-driver) driver-timeout;
+
};
doCheck = true;
nativeCheckInputs = with python3Packages; [ mypy ruff black ];
+9
nixos/lib/test-driver/test_driver/__init__.py
···
help="vlans to span by the driver",
)
arg_parser.add_argument(
"-o",
"--output_directory",
help="""The path to the directory where outputs copied from the VM will be placed.
···
args.testscript.read_text(),
args.output_directory.resolve(),
args.keep_vm_state,
) as driver:
if args.interactive:
history_dir = os.getcwd()
···
help="vlans to span by the driver",
)
arg_parser.add_argument(
+
"--global-timeout",
+
type=int,
+
metavar="GLOBAL_TIMEOUT",
+
action=EnvDefault,
+
envvar="globalTimeout",
+
help="Timeout in seconds for the whole test",
+
)
+
arg_parser.add_argument(
"-o",
"--output_directory",
help="""The path to the directory where outputs copied from the VM will be placed.
···
args.testscript.read_text(),
args.output_directory.resolve(),
args.keep_vm_state,
+
args.global_timeout,
) as driver:
if args.interactive:
history_dir = os.getcwd()
+25
nixos/lib/test-driver/test_driver/driver.py
···
import os
import re
import tempfile
from contextlib import contextmanager
from pathlib import Path
from typing import Any, Callable, ContextManager, Dict, Iterator, List, Optional, Union
···
vlans: List[VLan]
machines: List[Machine]
polling_conditions: List[PollingCondition]
def __init__(
self,
···
tests: str,
out_dir: Path,
keep_vm_state: bool = False,
):
self.tests = tests
self.out_dir = out_dir
tmp_dir = get_tmp_dir()
···
def __exit__(self, *_: Any) -> None:
with rootlog.nested("cleanup"):
for machine in self.machines:
machine.release()
···
def run_tests(self) -> None:
"""Run the test script (for non-interactive test runs)"""
self.test_script()
# TODO: Collect coverage data
for machine in self.machines:
···
with rootlog.nested("wait for all VMs to finish"):
for machine in self.machines:
machine.wait_for_shutdown()
def create_machine(self, args: Dict[str, Any]) -> Machine:
tmp_dir = get_tmp_dir()
···
import os
import re
+
import signal
import tempfile
+
import threading
from contextlib import contextmanager
from pathlib import Path
from typing import Any, Callable, ContextManager, Dict, Iterator, List, Optional, Union
···
vlans: List[VLan]
machines: List[Machine]
polling_conditions: List[PollingCondition]
+
global_timeout: int
+
race_timer: threading.Timer
def __init__(
self,
···
tests: str,
out_dir: Path,
keep_vm_state: bool = False,
+
global_timeout: int = 24 * 60 * 60 * 7,
):
self.tests = tests
self.out_dir = out_dir
+
self.global_timeout = global_timeout
+
self.race_timer = threading.Timer(global_timeout, self.terminate_test)
tmp_dir = get_tmp_dir()
···
def __exit__(self, *_: Any) -> None:
with rootlog.nested("cleanup"):
+
self.race_timer.cancel()
for machine in self.machines:
machine.release()
···
def run_tests(self) -> None:
"""Run the test script (for non-interactive test runs)"""
+
rootlog.info(
+
f"Test will time out and terminate in {self.global_timeout} seconds"
+
)
+
self.race_timer.start()
self.test_script()
# TODO: Collect coverage data
for machine in self.machines:
···
with rootlog.nested("wait for all VMs to finish"):
for machine in self.machines:
machine.wait_for_shutdown()
+
self.race_timer.cancel()
+
+
def terminate_test(self) -> None:
+
# This will be usually running in another thread than
+
# the thread actually executing the test script.
+
with rootlog.nested("timeout reached; test terminating..."):
+
for machine in self.machines:
+
machine.release()
+
# As we cannot `sys.exit` from another thread
+
# We can at least force the main thread to get SIGTERM'ed.
+
# This will prevent any user who caught all the exceptions
+
# to swallow them and prevent itself from terminating.
+
os.kill(os.getpid(), signal.SIGTERM)
def create_machine(self, args: Dict[str, Any]) -> Machine:
tmp_dir = get_tmp_dir()
+1
nixos/lib/testing-python.nix
···
, nodes ? {}
, testScript
, enableOCR ? false
, name ? "unnamed"
, skipTypeCheck ? false
# Skip linting (mainly intended for faster dev cycles)
···
, nodes ? {}
, testScript
, enableOCR ? false
+
, globalTimeout ? (60 * 60)
, name ? "unnamed"
, skipTypeCheck ? false
# Skip linting (mainly intended for faster dev cycles)
+13
nixos/lib/testing/driver.nix
···
wrapProgram $out/bin/nixos-test-driver \
--set startScripts "''${vmStartScripts[*]}" \
--set testScript "$out/test-script" \
--set vlans '${toString vlans}' \
${lib.escapeShellArgs (lib.concatMap (arg: ["--add-flags" arg]) config.extraDriverArgs)}
'';
···
type = types.package;
default = hostPkgs.qemu_test;
defaultText = "hostPkgs.qemu_test";
};
enableOCR = mkOption {
···
wrapProgram $out/bin/nixos-test-driver \
--set startScripts "''${vmStartScripts[*]}" \
--set testScript "$out/test-script" \
+
--set globalTimeout "${toString config.globalTimeout}" \
--set vlans '${toString vlans}' \
${lib.escapeShellArgs (lib.concatMap (arg: ["--add-flags" arg]) config.extraDriverArgs)}
'';
···
type = types.package;
default = hostPkgs.qemu_test;
defaultText = "hostPkgs.qemu_test";
+
};
+
+
globalTimeout = mkOption {
+
description = mdDoc ''
+
A global timeout for the complete test, expressed in seconds.
+
Beyond that timeout, every resource will be killed and released and the test will fail.
+
+
By default, we use a 1 hour timeout.
+
'';
+
type = types.int;
+
default = 60 * 60;
+
example = 10 * 60;
};
enableOCR = mkOption {
+23 -13
nixos/lib/testing/run.nix
···
'';
};
test = mkOption {
type = types.package;
# TODO: can the interactive driver be configured to access the network?
···
};
config = {
-
test = lib.lazyDerivation { # lazyDerivation improves performance when only passthru items and/or meta are used.
-
derivation = hostPkgs.stdenv.mkDerivation {
-
name = "vm-test-run-${config.name}";
-
requiredSystemFeatures = [ "kvm" "nixos-test" ];
-
buildCommand = ''
-
mkdir -p $out
-
# effectively mute the XMLLogger
-
export LOGFILE=/dev/null
-
${config.driver}/bin/nixos-test-driver -o $out
-
'';
-
passthru = config.passthru;
-
meta = config.meta;
-
};
inherit (config) passthru meta;
};
···
'';
};
+
rawTestDerivation = mkOption {
+
type = types.package;
+
description = mdDoc ''
+
Unfiltered version of `test`, for troubleshooting the test framework and `testBuildFailure` in the test framework's test suite.
+
This is not intended for general use. Use `test` instead.
+
'';
+
internal = true;
+
};
+
test = mkOption {
type = types.package;
# TODO: can the interactive driver be configured to access the network?
···
};
config = {
+
rawTestDerivation = hostPkgs.stdenv.mkDerivation {
+
name = "vm-test-run-${config.name}";
+
requiredSystemFeatures = [ "kvm" "nixos-test" ];
+
buildCommand = ''
+
mkdir -p $out
+
# effectively mute the XMLLogger
+
export LOGFILE=/dev/null
+
${config.driver}/bin/nixos-test-driver -o $out
+
'';
+
passthru = config.passthru;
+
meta = config.meta;
+
};
+
test = lib.lazyDerivation { # lazyDerivation improves performance when only passthru items and/or meta are used.
+
derivation = config.rawTestDerivation;
inherit (config) passthru meta;
};
+8
nixos/tests/all-tests.nix
···
lib-extend = handleTestOn [ "x86_64-linux" "aarch64-linux" ] ./nixos-test-driver/lib-extend.nix {};
node-name = runTest ./nixos-test-driver/node-name.nix;
busybox = runTest ./nixos-test-driver/busybox.nix;
};
# NixOS vm tests and non-vm unit tests
···
lib-extend = handleTestOn [ "x86_64-linux" "aarch64-linux" ] ./nixos-test-driver/lib-extend.nix {};
node-name = runTest ./nixos-test-driver/node-name.nix;
busybox = runTest ./nixos-test-driver/busybox.nix;
+
driver-timeout = pkgs.runCommand "ensure-timeout-induced-failure" {
+
failed = pkgs.testers.testBuildFailure ((runTest ./nixos-test-driver/timeout.nix).config.rawTestDerivation);
+
} ''
+
grep -F "timeout reached; test terminating" $failed/testBuildFailure.log
+
# The program will always be terminated by SIGTERM (143) if it waits for the deadline thread.
+
[[ 143 = $(cat $failed/testBuildFailure.exit) ]]
+
touch $out
+
'';
};
# NixOS vm tests and non-vm unit tests
+15
nixos/tests/nixos-test-driver/timeout.nix
···
···
+
{
+
name = "Test that sleep of 6 seconds fails a timeout of 5 seconds";
+
globalTimeout = 5;
+
+
nodes = {
+
machine = ({ pkgs, ... }: {
+
});
+
};
+
+
testScript = ''
+
start_all()
+
machine.wait_for_unit("multi-user.target")
+
machine.succeed("sleep 6")
+
'';
+
}