diff options
Diffstat (limited to 'tools/testing')
429 files changed, 25252 insertions, 7073 deletions
diff --git a/tools/testing/crypto/chacha20-s390/test-cipher.c b/tools/testing/crypto/chacha20-s390/test-cipher.c index 34e8b855266f..8141d45df51a 100644 --- a/tools/testing/crypto/chacha20-s390/test-cipher.c +++ b/tools/testing/crypto/chacha20-s390/test-cipher.c @@ -252,29 +252,26 @@ static int __init chacha_s390_test_init(void) memset(plain, 'a', data_size); get_random_bytes(plain, (data_size > 256 ? 256 : data_size)); - cipher_generic = vmalloc(data_size); + cipher_generic = vzalloc(data_size); if (!cipher_generic) { pr_info("could not allocate cipher_generic buffer\n"); ret = -2; goto out; } - memset(cipher_generic, 0, data_size); - cipher_s390 = vmalloc(data_size); + cipher_s390 = vzalloc(data_size); if (!cipher_s390) { pr_info("could not allocate cipher_s390 buffer\n"); ret = -2; goto out; } - memset(cipher_s390, 0, data_size); - revert = vmalloc(data_size); + revert = vzalloc(data_size); if (!revert) { pr_info("could not allocate revert buffer\n"); ret = -2; goto out; } - memset(revert, 0, data_size); if (debug) print_hex_dump(KERN_INFO, "src: ", DUMP_PREFIX_OFFSET, diff --git a/tools/testing/kunit/configs/arch_uml.config b/tools/testing/kunit/configs/arch_uml.config new file mode 100644 index 000000000000..e824ce43b05a --- /dev/null +++ b/tools/testing/kunit/configs/arch_uml.config @@ -0,0 +1,5 @@ +# Config options which are added to UML builds by default + +# Enable virtio/pci, as a lot of tests require it. +CONFIG_VIRTIO_UML=y +CONFIG_UML_PCI_OVER_VIRTIO=y diff --git a/tools/testing/kunit/configs/coverage_uml.config b/tools/testing/kunit/configs/coverage_uml.config new file mode 100644 index 000000000000..bacb77664fa8 --- /dev/null +++ b/tools/testing/kunit/configs/coverage_uml.config @@ -0,0 +1,11 @@ +# This config fragment enables coverage on UML, which is different from the +# normal gcov used in other arches (no debugfs). +# Example usage: +# ./tools/testing/kunit/kunit.py run \ +# --kunitconfig=tools/testing/kunit/configs/all_tests_uml.config \ +# --kunitconfig=tools/testing/kunit/configs/coverage_uml.config + +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y +CONFIG_GCOV=y diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index 13bd72e47da8..e132b0654029 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -10,6 +10,7 @@ import argparse import os import re +import shlex import sys import time @@ -22,6 +23,7 @@ from typing import Iterable, List, Optional, Sequence, Tuple import kunit_json import kunit_kernel import kunit_parser +from kunit_printer import stdout class KunitStatus(Enum): SUCCESS = auto() @@ -72,7 +74,7 @@ def get_kernel_root_path() -> str: def config_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitConfigRequest) -> KunitResult: - kunit_parser.print_with_timestamp('Configuring KUnit Kernel ...') + stdout.print_with_timestamp('Configuring KUnit Kernel ...') config_start = time.time() success = linux.build_reconfig(request.build_dir, request.make_options) @@ -85,7 +87,7 @@ def config_tests(linux: kunit_kernel.LinuxSourceTree, def build_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitBuildRequest) -> KunitResult: - kunit_parser.print_with_timestamp('Building KUnit Kernel ...') + stdout.print_with_timestamp('Building KUnit Kernel ...') build_start = time.time() success = linux.build_kernel(request.alltests, @@ -158,7 +160,7 @@ def exec_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitExecRequest) - test_counts = kunit_parser.TestCounts() exec_time = 0.0 for i, filter_glob in enumerate(filter_globs): - kunit_parser.print_with_timestamp('Starting KUnit Kernel ({}/{})...'.format(i+1, len(filter_globs))) + stdout.print_with_timestamp('Starting KUnit Kernel ({}/{})...'.format(i+1, len(filter_globs))) test_start = time.time() run_result = linux.run_kernel( @@ -221,7 +223,7 @@ def parse_tests(request: KunitParseRequest, metadata: kunit_json.Metadata, input else: with open(request.json, 'w') as f: f.write(json_str) - kunit_parser.print_with_timestamp("Test results stored in %s" % + stdout.print_with_timestamp("Test results stored in %s" % os.path.abspath(request.json)) if test_result.status != kunit_parser.TestStatus.SUCCESS: @@ -245,7 +247,7 @@ def run_tests(linux: kunit_kernel.LinuxSourceTree, run_end = time.time() - kunit_parser.print_with_timestamp(( + stdout.print_with_timestamp(( 'Elapsed time: %.3fs total, %.3fs configuring, %.3fs ' + 'building, %.3fs running\n') % ( run_end - run_start, @@ -291,8 +293,9 @@ def add_common_opts(parser) -> None: parser.add_argument('--kunitconfig', help='Path to Kconfig fragment that enables KUnit tests.' ' If given a directory, (e.g. lib/kunit), "/.kunitconfig" ' - 'will get automatically appended.', - metavar='PATH') + 'will get automatically appended. If repeated, the files ' + 'blindly concatenated, which might not work in all cases.', + action='append', metavar='PATHS') parser.add_argument('--kconfig_add', help='Additional Kconfig options to append to the ' '.kunitconfig, e.g. CONFIG_KASAN=y. Can be repeated.', @@ -323,6 +326,10 @@ def add_common_opts(parser) -> None: 'a QemuArchParams object.'), type=str, metavar='FILE') + parser.add_argument('--qemu_args', + help='Additional QEMU arguments, e.g. "-smp 8"', + action='append', metavar='') + def add_build_opts(parser) -> None: parser.add_argument('--jobs', help='As in the make command, "Specifies the number of ' @@ -365,7 +372,25 @@ def add_parse_opts(parser) -> None: 'filename is specified', type=str, const='stdout', default=None, metavar='FILE') -def main(argv, linux=None): + +def tree_from_args(cli_args: argparse.Namespace) -> kunit_kernel.LinuxSourceTree: + """Returns a LinuxSourceTree based on the user's arguments.""" + # Allow users to specify multiple arguments in one string, e.g. '-smp 8' + qemu_args: List[str] = [] + if cli_args.qemu_args: + for arg in cli_args.qemu_args: + qemu_args.extend(shlex.split(arg)) + + return kunit_kernel.LinuxSourceTree(cli_args.build_dir, + kunitconfig_paths=cli_args.kunitconfig, + kconfig_add=cli_args.kconfig_add, + arch=cli_args.arch, + cross_compile=cli_args.cross_compile, + qemu_config_path=cli_args.qemu_config, + extra_qemu_args=qemu_args) + + +def main(argv): parser = argparse.ArgumentParser( description='Helps writing and running KUnit tests.') subparser = parser.add_subparsers(dest='subcommand') @@ -412,14 +437,7 @@ def main(argv, linux=None): if not os.path.exists(cli_args.build_dir): os.mkdir(cli_args.build_dir) - if not linux: - linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir, - kunitconfig_path=cli_args.kunitconfig, - kconfig_add=cli_args.kconfig_add, - arch=cli_args.arch, - cross_compile=cli_args.cross_compile, - qemu_config_path=cli_args.qemu_config) - + linux = tree_from_args(cli_args) request = KunitRequest(build_dir=cli_args.build_dir, make_options=cli_args.make_options, jobs=cli_args.jobs, @@ -438,50 +456,29 @@ def main(argv, linux=None): not os.path.exists(cli_args.build_dir)): os.mkdir(cli_args.build_dir) - if not linux: - linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir, - kunitconfig_path=cli_args.kunitconfig, - kconfig_add=cli_args.kconfig_add, - arch=cli_args.arch, - cross_compile=cli_args.cross_compile, - qemu_config_path=cli_args.qemu_config) - + linux = tree_from_args(cli_args) request = KunitConfigRequest(build_dir=cli_args.build_dir, make_options=cli_args.make_options) result = config_tests(linux, request) - kunit_parser.print_with_timestamp(( + stdout.print_with_timestamp(( 'Elapsed time: %.3fs\n') % ( result.elapsed_time)) if result.status != KunitStatus.SUCCESS: sys.exit(1) elif cli_args.subcommand == 'build': - if not linux: - linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir, - kunitconfig_path=cli_args.kunitconfig, - kconfig_add=cli_args.kconfig_add, - arch=cli_args.arch, - cross_compile=cli_args.cross_compile, - qemu_config_path=cli_args.qemu_config) - + linux = tree_from_args(cli_args) request = KunitBuildRequest(build_dir=cli_args.build_dir, make_options=cli_args.make_options, jobs=cli_args.jobs, alltests=cli_args.alltests) result = config_and_build_tests(linux, request) - kunit_parser.print_with_timestamp(( + stdout.print_with_timestamp(( 'Elapsed time: %.3fs\n') % ( result.elapsed_time)) if result.status != KunitStatus.SUCCESS: sys.exit(1) elif cli_args.subcommand == 'exec': - if not linux: - linux = kunit_kernel.LinuxSourceTree(cli_args.build_dir, - kunitconfig_path=cli_args.kunitconfig, - kconfig_add=cli_args.kconfig_add, - arch=cli_args.arch, - cross_compile=cli_args.cross_compile, - qemu_config_path=cli_args.qemu_config) - + linux = tree_from_args(cli_args) exec_request = KunitExecRequest(raw_output=cli_args.raw_output, build_dir=cli_args.build_dir, json=cli_args.json, @@ -491,7 +488,7 @@ def main(argv, linux=None): kernel_args=cli_args.kernel_args, run_isolated=cli_args.run_isolated) result = exec_tests(linux, exec_request) - kunit_parser.print_with_timestamp(( + stdout.print_with_timestamp(( 'Elapsed time: %.3fs\n') % (result.elapsed_time)) if result.status != KunitStatus.SUCCESS: sys.exit(1) diff --git a/tools/testing/kunit/kunit_config.py b/tools/testing/kunit/kunit_config.py index 75a8dc1683d4..48b5f34b2e5d 100644 --- a/tools/testing/kunit/kunit_config.py +++ b/tools/testing/kunit/kunit_config.py @@ -8,7 +8,7 @@ from dataclasses import dataclass import re -from typing import List, Set +from typing import Dict, Iterable, List, Set, Tuple CONFIG_IS_NOT_SET_PATTERN = r'^# CONFIG_(\w+) is not set$' CONFIG_PATTERN = r'^CONFIG_(\w+)=(\S+|".*")$' @@ -32,35 +32,51 @@ class Kconfig: """Represents defconfig or .config specified using the Kconfig language.""" def __init__(self) -> None: - self._entries = [] # type: List[KconfigEntry] + self._entries = {} # type: Dict[str, str] - def entries(self) -> Set[KconfigEntry]: - return set(self._entries) + def __eq__(self, other) -> bool: + if not isinstance(other, self.__class__): + return False + return self._entries == other._entries - def add_entry(self, entry: KconfigEntry) -> None: - self._entries.append(entry) + def __repr__(self) -> str: + return ','.join(str(e) for e in self.as_entries()) + + def as_entries(self) -> Iterable[KconfigEntry]: + for name, value in self._entries.items(): + yield KconfigEntry(name, value) + + def add_entry(self, name: str, value: str) -> None: + self._entries[name] = value def is_subset_of(self, other: 'Kconfig') -> bool: - other_dict = {e.name: e.value for e in other.entries()} - for a in self.entries(): - b = other_dict.get(a.name) + for name, value in self._entries.items(): + b = other._entries.get(name) if b is None: - if a.value == 'n': + if value == 'n': continue return False - if a.value != b: + if value != b: return False return True + def conflicting_options(self, other: 'Kconfig') -> List[Tuple[KconfigEntry, KconfigEntry]]: + diff = [] # type: List[Tuple[KconfigEntry, KconfigEntry]] + for name, value in self._entries.items(): + b = other._entries.get(name) + if b and value != b: + pair = (KconfigEntry(name, value), KconfigEntry(name, b)) + diff.append(pair) + return diff + def merge_in_entries(self, other: 'Kconfig') -> None: - if other.is_subset_of(self): - return - self._entries = list(self.entries().union(other.entries())) + for name, value in other._entries.items(): + self._entries[name] = value def write_to_file(self, path: str) -> None: with open(path, 'a+') as f: - for entry in self.entries(): - f.write(str(entry) + '\n') + for e in self.as_entries(): + f.write(str(e) + '\n') def parse_file(path: str) -> Kconfig: with open(path, 'r') as f: @@ -78,14 +94,12 @@ def parse_from_string(blob: str) -> Kconfig: match = config_matcher.match(line) if match: - entry = KconfigEntry(match.group(1), match.group(2)) - kconfig.add_entry(entry) + kconfig.add_entry(match.group(1), match.group(2)) continue empty_match = is_not_set_matcher.match(line) if empty_match: - entry = KconfigEntry(empty_match.group(1), 'n') - kconfig.add_entry(entry) + kconfig.add_entry(empty_match.group(1), 'n') continue if line[0] == '#': diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index 3539efaf99ba..f5c26ea89714 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -18,7 +18,7 @@ import threading from typing import Iterator, List, Optional, Tuple import kunit_config -import kunit_parser +from kunit_printer import stdout import qemu_config KCONFIG_PATH = '.config' @@ -26,6 +26,7 @@ KUNITCONFIG_PATH = '.kunitconfig' OLD_KUNITCONFIG_PATH = 'last_used_kunitconfig' DEFAULT_KUNITCONFIG_PATH = 'tools/testing/kunit/configs/default.config' BROKEN_ALLCONFIG_PATH = 'tools/testing/kunit/configs/broken_on_uml.config' +UML_KCONFIG_PATH = 'tools/testing/kunit/configs/arch_uml.config' OUTFILE_PATH = 'test.log' ABS_TOOL_PATH = os.path.abspath(os.path.dirname(__file__)) QEMU_CONFIGS_DIR = os.path.join(ABS_TOOL_PATH, 'qemu_configs') @@ -53,8 +54,8 @@ class LinuxSourceTreeOperations: except subprocess.CalledProcessError as e: raise ConfigError(e.output.decode()) - def make_arch_qemuconfig(self, base_kunitconfig: kunit_config.Kconfig) -> None: - pass + def make_arch_config(self, base_kunitconfig: kunit_config.Kconfig) -> kunit_config.Kconfig: + return base_kunitconfig def make_allyesconfig(self, build_dir: str, make_options) -> None: raise ConfigError('Only the "um" arch is supported for alltests') @@ -109,9 +110,10 @@ class LinuxSourceTreeOperationsQemu(LinuxSourceTreeOperations): self._kernel_command_line = qemu_arch_params.kernel_command_line + ' kunit_shutdown=reboot' self._extra_qemu_params = qemu_arch_params.extra_qemu_params - def make_arch_qemuconfig(self, base_kunitconfig: kunit_config.Kconfig) -> None: + def make_arch_config(self, base_kunitconfig: kunit_config.Kconfig) -> kunit_config.Kconfig: kconfig = kunit_config.parse_from_string(self._kconfig) - base_kunitconfig.merge_in_entries(kconfig) + kconfig.merge_in_entries(base_kunitconfig) + return kconfig def start(self, params: List[str], build_dir: str) -> subprocess.Popen: kernel_path = os.path.join(build_dir, self._kernel_path) @@ -137,8 +139,13 @@ class LinuxSourceTreeOperationsUml(LinuxSourceTreeOperations): def __init__(self, cross_compile=None): super().__init__(linux_arch='um', cross_compile=cross_compile) + def make_arch_config(self, base_kunitconfig: kunit_config.Kconfig) -> kunit_config.Kconfig: + kconfig = kunit_config.parse_file(UML_KCONFIG_PATH) + kconfig.merge_in_entries(base_kunitconfig) + return kconfig + def make_allyesconfig(self, build_dir: str, make_options) -> None: - kunit_parser.print_with_timestamp( + stdout.print_with_timestamp( 'Enabling all CONFIGs for UML...') command = ['make', 'ARCH=um', 'O=' + build_dir, 'allyesconfig'] if make_options: @@ -148,18 +155,19 @@ class LinuxSourceTreeOperationsUml(LinuxSourceTreeOperations): stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT) process.wait() - kunit_parser.print_with_timestamp( + stdout.print_with_timestamp( 'Disabling broken configs to run KUnit tests...') with open(get_kconfig_path(build_dir), 'a') as config: with open(BROKEN_ALLCONFIG_PATH, 'r') as disable: config.write(disable.read()) - kunit_parser.print_with_timestamp( + stdout.print_with_timestamp( 'Starting Kernel with all configs takes a few minutes...') def start(self, params: List[str], build_dir: str) -> subprocess.Popen: """Runs the Linux UML binary. Must be named 'linux'.""" linux_bin = os.path.join(build_dir, 'linux') + params.extend(['mem=1G', 'console=tty', 'kunit_shutdown=halt']) return subprocess.Popen([linux_bin] + params, stdin=subprocess.PIPE, stdout=subprocess.PIPE, @@ -175,22 +183,44 @@ def get_kunitconfig_path(build_dir: str) -> str: def get_old_kunitconfig_path(build_dir: str) -> str: return os.path.join(build_dir, OLD_KUNITCONFIG_PATH) +def get_parsed_kunitconfig(build_dir: str, + kunitconfig_paths: Optional[List[str]]=None) -> kunit_config.Kconfig: + if not kunitconfig_paths: + path = get_kunitconfig_path(build_dir) + if not os.path.exists(path): + shutil.copyfile(DEFAULT_KUNITCONFIG_PATH, path) + return kunit_config.parse_file(path) + + merged = kunit_config.Kconfig() + + for path in kunitconfig_paths: + if os.path.isdir(path): + path = os.path.join(path, KUNITCONFIG_PATH) + if not os.path.exists(path): + raise ConfigError(f'Specified kunitconfig ({path}) does not exist') + + partial = kunit_config.parse_file(path) + diff = merged.conflicting_options(partial) + if diff: + diff_str = '\n\n'.join(f'{a}\n vs from {path}\n{b}' for a, b in diff) + raise ConfigError(f'Multiple values specified for {len(diff)} options in kunitconfig:\n{diff_str}') + merged.merge_in_entries(partial) + return merged + def get_outfile_path(build_dir: str) -> str: return os.path.join(build_dir, OUTFILE_PATH) -def get_source_tree_ops(arch: str, cross_compile: Optional[str]) -> LinuxSourceTreeOperations: +def _default_qemu_config_path(arch: str) -> str: config_path = os.path.join(QEMU_CONFIGS_DIR, arch + '.py') - if arch == 'um': - return LinuxSourceTreeOperationsUml(cross_compile=cross_compile) if os.path.isfile(config_path): - return get_source_tree_ops_from_qemu_config(config_path, cross_compile)[1] + return config_path options = [f[:-3] for f in os.listdir(QEMU_CONFIGS_DIR) if f.endswith('.py')] raise ConfigError(arch + ' is not a valid arch, options are ' + str(sorted(options))) -def get_source_tree_ops_from_qemu_config(config_path: str, - cross_compile: Optional[str]) -> Tuple[ - str, LinuxSourceTreeOperations]: +def _get_qemu_ops(config_path: str, + extra_qemu_args: Optional[List[str]], + cross_compile: Optional[str]) -> Tuple[str, LinuxSourceTreeOperations]: # The module name/path has very little to do with where the actual file # exists (I learned this through experimentation and could not find it # anywhere in the Python documentation). @@ -210,6 +240,8 @@ def get_source_tree_ops_from_qemu_config(config_path: str, if not hasattr(config, 'QEMU_ARCH'): raise ValueError('qemu_config module missing "QEMU_ARCH": ' + config_path) params: qemu_config.QemuArchParams = config.QEMU_ARCH # type: ignore + if extra_qemu_args: + params.extra_qemu_params.extend(extra_qemu_args) return params.linux_arch, LinuxSourceTreeOperationsQemu( params, cross_compile=cross_compile) @@ -219,34 +251,24 @@ class LinuxSourceTree: def __init__( self, build_dir: str, - load_config=True, - kunitconfig_path='', + kunitconfig_paths: Optional[List[str]]=None, kconfig_add: Optional[List[str]]=None, arch=None, cross_compile=None, - qemu_config_path=None) -> None: + qemu_config_path=None, + extra_qemu_args=None) -> None: signal.signal(signal.SIGINT, self.signal_handler) if qemu_config_path: - self._arch, self._ops = get_source_tree_ops_from_qemu_config( - qemu_config_path, cross_compile) + self._arch, self._ops = _get_qemu_ops(qemu_config_path, extra_qemu_args, cross_compile) else: self._arch = 'um' if arch is None else arch - self._ops = get_source_tree_ops(self._arch, cross_compile) - - if not load_config: - return + if self._arch == 'um': + self._ops = LinuxSourceTreeOperationsUml(cross_compile=cross_compile) + else: + qemu_config_path = _default_qemu_config_path(self._arch) + _, self._ops = _get_qemu_ops(qemu_config_path, extra_qemu_args, cross_compile) - if kunitconfig_path: - if os.path.isdir(kunitconfig_path): - kunitconfig_path = os.path.join(kunitconfig_path, KUNITCONFIG_PATH) - if not os.path.exists(kunitconfig_path): - raise ConfigError(f'Specified kunitconfig ({kunitconfig_path}) does not exist') - else: - kunitconfig_path = get_kunitconfig_path(build_dir) - if not os.path.exists(kunitconfig_path): - shutil.copyfile(DEFAULT_KUNITCONFIG_PATH, kunitconfig_path) - - self._kconfig = kunit_config.parse_file(kunitconfig_path) + self._kconfig = get_parsed_kunitconfig(build_dir, kunitconfig_paths) if kconfig_add: kconfig = kunit_config.parse_from_string('\n'.join(kconfig_add)) self._kconfig.merge_in_entries(kconfig) @@ -267,10 +289,10 @@ class LinuxSourceTree: validated_kconfig = kunit_config.parse_file(kconfig_path) if self._kconfig.is_subset_of(validated_kconfig): return True - invalid = self._kconfig.entries() - validated_kconfig.entries() + missing = set(self._kconfig.as_entries()) - set(validated_kconfig.as_entries()) message = 'Not all Kconfig options selected in kunitconfig were in the generated .config.\n' \ 'This is probably due to unsatisfied dependencies.\n' \ - 'Missing: ' + ', '.join([str(e) for e in invalid]) + 'Missing: ' + ', '.join(str(e) for e in missing) if self._arch == 'um': message += '\nNote: many Kconfig options aren\'t available on UML. You can try running ' \ 'on a different architecture with something like "--arch=x86_64".' @@ -282,7 +304,7 @@ class LinuxSourceTree: if build_dir and not os.path.exists(build_dir): os.mkdir(build_dir) try: - self._ops.make_arch_qemuconfig(self._kconfig) + self._kconfig = self._ops.make_arch_config(self._kconfig) self._kconfig.write_to_file(kconfig_path) self._ops.make_olddefconfig(build_dir, make_options) except ConfigError as e: @@ -303,7 +325,7 @@ class LinuxSourceTree: return True old_kconfig = kunit_config.parse_file(old_path) - return old_kconfig.entries() != self._kconfig.entries() + return old_kconfig != self._kconfig def build_reconfig(self, build_dir: str, make_options) -> bool: """Creates a new .config if it is not a subset of the .kunitconfig.""" @@ -313,7 +335,8 @@ class LinuxSourceTree: return self.build_config(build_dir, make_options) existing_kconfig = kunit_config.parse_file(kconfig_path) - self._ops.make_arch_qemuconfig(self._kconfig) + self._kconfig = self._ops.make_arch_config(self._kconfig) + if self._kconfig.is_subset_of(existing_kconfig) and not self._kunitconfig_changed(build_dir): return True print('Regenerating .config ...') @@ -334,7 +357,6 @@ class LinuxSourceTree: def run_kernel(self, args=None, build_dir='', filter_glob='', timeout=None) -> Iterator[str]: if not args: args = [] - args.extend(['mem=1G', 'console=tty', 'kunit_shutdown=halt']) if filter_glob: args.append('kunit.filter_glob='+filter_glob) diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index c5569b367c69..12d3ec77f427 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -13,10 +13,11 @@ from __future__ import annotations import re import sys -import datetime from enum import Enum, auto from typing import Iterable, Iterator, List, Optional, Tuple +from kunit_printer import stdout + class Test: """ A class to represent a test parsed from KTAP results. All KTAP @@ -55,7 +56,7 @@ class Test: def add_error(self, error_message: str) -> None: """Records an error that occurred while parsing this test.""" self.counts.errors += 1 - print_with_timestamp(red('[ERROR]') + f' Test: {self.name}: {error_message}') + stdout.print_with_timestamp(stdout.red('[ERROR]') + f' Test: {self.name}: {error_message}') class TestStatus(Enum): """An enumeration class to represent the status of a test.""" @@ -461,32 +462,6 @@ def parse_diagnostic(lines: LineStream) -> List[str]: DIVIDER = '=' * 60 -RESET = '\033[0;0m' - -def red(text: str) -> str: - """Returns inputted string with red color code.""" - if not sys.stdout.isatty(): - return text - return '\033[1;31m' + text + RESET - -def yellow(text: str) -> str: - """Returns inputted string with yellow color code.""" - if not sys.stdout.isatty(): - return text - return '\033[1;33m' + text + RESET - -def green(text: str) -> str: - """Returns inputted string with green color code.""" - if not sys.stdout.isatty(): - return text - return '\033[1;32m' + text + RESET - -ANSI_LEN = len(red('')) - -def print_with_timestamp(message: str) -> None: - """Prints message with timestamp at beginning.""" - print('[%s] %s' % (datetime.datetime.now().strftime('%H:%M:%S'), message)) - def format_test_divider(message: str, len_message: int) -> str: """ Returns string with message centered in fixed width divider. @@ -529,12 +504,12 @@ def print_test_header(test: Test) -> None: message += ' (1 subtest)' else: message += f' ({test.expected_count} subtests)' - print_with_timestamp(format_test_divider(message, len(message))) + stdout.print_with_timestamp(format_test_divider(message, len(message))) def print_log(log: Iterable[str]) -> None: """Prints all strings in saved log for test in yellow.""" for m in log: - print_with_timestamp(yellow(m)) + stdout.print_with_timestamp(stdout.yellow(m)) def format_test_result(test: Test) -> str: """ @@ -551,16 +526,16 @@ def format_test_result(test: Test) -> str: String containing formatted test result """ if test.status == TestStatus.SUCCESS: - return green('[PASSED] ') + test.name + return stdout.green('[PASSED] ') + test.name if test.status == TestStatus.SKIPPED: - return yellow('[SKIPPED] ') + test.name + return stdout.yellow('[SKIPPED] ') + test.name if test.status == TestStatus.NO_TESTS: - return yellow('[NO TESTS RUN] ') + test.name + return stdout.yellow('[NO TESTS RUN] ') + test.name if test.status == TestStatus.TEST_CRASHED: print_log(test.log) - return red('[CRASHED] ') + test.name + return stdout.red('[CRASHED] ') + test.name print_log(test.log) - return red('[FAILED] ') + test.name + return stdout.red('[FAILED] ') + test.name def print_test_result(test: Test) -> None: """ @@ -572,7 +547,7 @@ def print_test_result(test: Test) -> None: Parameters: test - Test object representing current test being printed """ - print_with_timestamp(format_test_result(test)) + stdout.print_with_timestamp(format_test_result(test)) def print_test_footer(test: Test) -> None: """ @@ -585,8 +560,8 @@ def print_test_footer(test: Test) -> None: test - Test object representing current test being printed """ message = format_test_result(test) - print_with_timestamp(format_test_divider(message, - len(message) - ANSI_LEN)) + stdout.print_with_timestamp(format_test_divider(message, + len(message) - stdout.color_len())) def print_summary_line(test: Test) -> None: """ @@ -603,12 +578,12 @@ def print_summary_line(test: Test) -> None: test - Test object representing current test being printed """ if test.status == TestStatus.SUCCESS: - color = green + color = stdout.green elif test.status in (TestStatus.SKIPPED, TestStatus.NO_TESTS): - color = yellow + color = stdout.yellow else: - color = red - print_with_timestamp(color(f'Testing complete. {test.counts}')) + color = stdout.red + stdout.print_with_timestamp(color(f'Testing complete. {test.counts}')) # Other methods: @@ -762,7 +737,7 @@ def parse_run_tests(kernel_output: Iterable[str]) -> Test: Return: Test - the main test object with all subtests. """ - print_with_timestamp(DIVIDER) + stdout.print_with_timestamp(DIVIDER) lines = extract_tap_lines(kernel_output) test = Test() if not lines: @@ -773,6 +748,6 @@ def parse_run_tests(kernel_output: Iterable[str]) -> Test: test = parse_test(lines, 0, []) if test.status != TestStatus.NO_TESTS: test.status = test.counts.get_status() - print_with_timestamp(DIVIDER) + stdout.print_with_timestamp(DIVIDER) print_summary_line(test) return test diff --git a/tools/testing/kunit/kunit_printer.py b/tools/testing/kunit/kunit_printer.py new file mode 100644 index 000000000000..5f1cc55ecdf5 --- /dev/null +++ b/tools/testing/kunit/kunit_printer.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Utilities for printing and coloring output. +# +# Copyright (C) 2022, Google LLC. +# Author: Daniel Latypov <dlatypov@google.com> + +import datetime +import sys +import typing + +_RESET = '\033[0;0m' + +class Printer: + """Wraps a file object, providing utilities for coloring output, etc.""" + + def __init__(self, output: typing.IO): + self._output = output + self._use_color = output.isatty() + + def print(self, message: str) -> None: + print(message, file=self._output) + + def print_with_timestamp(self, message: str) -> None: + ts = datetime.datetime.now().strftime('%H:%M:%S') + self.print(f'[{ts}] {message}') + + def _color(self, code: str, text: str) -> str: + if not self._use_color: + return text + return code + text + _RESET + + def red(self, text: str) -> str: + return self._color('\033[1;31m', text) + + def yellow(self, text: str) -> str: + return self._color('\033[1;33m', text) + + def green(self, text: str) -> str: + return self._color('\033[1;32m', text) + + def color_len(self) -> int: + """Returns the length of the color escape codes.""" + return len(self.red('')) + +# Provides a default instance that prints to stdout +stdout = Printer(sys.stdout) diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index 25a2eb3bf114..446ac432d9a4 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -45,7 +45,7 @@ class KconfigTest(unittest.TestCase): self.assertTrue(kconfig0.is_subset_of(kconfig0)) kconfig1 = kunit_config.Kconfig() - kconfig1.add_entry(kunit_config.KconfigEntry('TEST', 'y')) + kconfig1.add_entry('TEST', 'y') self.assertTrue(kconfig1.is_subset_of(kconfig1)) self.assertTrue(kconfig0.is_subset_of(kconfig1)) self.assertFalse(kconfig1.is_subset_of(kconfig0)) @@ -56,40 +56,28 @@ class KconfigTest(unittest.TestCase): kconfig = kunit_config.parse_file(kconfig_path) expected_kconfig = kunit_config.Kconfig() - expected_kconfig.add_entry( - kunit_config.KconfigEntry('UML', 'y')) - expected_kconfig.add_entry( - kunit_config.KconfigEntry('MMU', 'y')) - expected_kconfig.add_entry( - kunit_config.KconfigEntry('TEST', 'y')) - expected_kconfig.add_entry( - kunit_config.KconfigEntry('EXAMPLE_TEST', 'y')) - expected_kconfig.add_entry( - kunit_config.KconfigEntry('MK8', 'n')) - - self.assertEqual(kconfig.entries(), expected_kconfig.entries()) + expected_kconfig.add_entry('UML', 'y') + expected_kconfig.add_entry('MMU', 'y') + expected_kconfig.add_entry('TEST', 'y') + expected_kconfig.add_entry('EXAMPLE_TEST', 'y') + expected_kconfig.add_entry('MK8', 'n') + + self.assertEqual(kconfig, expected_kconfig) def test_write_to_file(self): kconfig_path = os.path.join(test_tmpdir, '.config') expected_kconfig = kunit_config.Kconfig() - expected_kconfig.add_entry( - kunit_config.KconfigEntry('UML', 'y')) - expected_kconfig.add_entry( - kunit_config.KconfigEntry('MMU', 'y')) - expected_kconfig.add_entry( - kunit_config.KconfigEntry('TEST', 'y')) - expected_kconfig.add_entry( - kunit_config.KconfigEntry('EXAMPLE_TEST', 'y')) - expected_kconfig.add_entry( - kunit_config.KconfigEntry('MK8', 'n')) + expected_kconfig.add_entry('UML', 'y') + expected_kconfig.add_entry('MMU', 'y') + expected_kconfig.add_entry('TEST', 'y') + expected_kconfig.add_entry('EXAMPLE_TEST', 'y') + expected_kconfig.add_entry('MK8', 'n') expected_kconfig.write_to_file(kconfig_path) actual_kconfig = kunit_config.parse_file(kconfig_path) - - self.assertEqual(actual_kconfig.entries(), - expected_kconfig.entries()) + self.assertEqual(actual_kconfig, expected_kconfig) class KUnitParserTest(unittest.TestCase): @@ -222,7 +210,7 @@ class KUnitParserTest(unittest.TestCase): def test_no_kunit_output(self): crash_log = test_data_path('test_insufficient_memory.log') - print_mock = mock.patch('builtins.print').start() + print_mock = mock.patch('kunit_printer.Printer.print').start() with open(crash_log) as file: result = kunit_parser.parse_run_tests( kunit_parser.extract_tap_lines(file.readlines())) @@ -368,21 +356,53 @@ class LinuxSourceTreeTest(unittest.TestCase): def test_invalid_kunitconfig(self): with self.assertRaisesRegex(kunit_kernel.ConfigError, 'nonexistent.* does not exist'): - kunit_kernel.LinuxSourceTree('', kunitconfig_path='/nonexistent_file') + kunit_kernel.LinuxSourceTree('', kunitconfig_paths=['/nonexistent_file']) def test_valid_kunitconfig(self): with tempfile.NamedTemporaryFile('wt') as kunitconfig: - kunit_kernel.LinuxSourceTree('', kunitconfig_path=kunitconfig.name) + kunit_kernel.LinuxSourceTree('', kunitconfig_paths=[kunitconfig.name]) def test_dir_kunitconfig(self): with tempfile.TemporaryDirectory('') as dir: with open(os.path.join(dir, '.kunitconfig'), 'w'): pass - kunit_kernel.LinuxSourceTree('', kunitconfig_path=dir) + kunit_kernel.LinuxSourceTree('', kunitconfig_paths=[dir]) + + def test_multiple_kunitconfig(self): + want_kconfig = kunit_config.Kconfig() + want_kconfig.add_entry('KUNIT', 'y') + want_kconfig.add_entry('KUNIT_TEST', 'm') + + with tempfile.TemporaryDirectory('') as dir: + other = os.path.join(dir, 'otherkunitconfig') + with open(os.path.join(dir, '.kunitconfig'), 'w') as f: + f.write('CONFIG_KUNIT=y') + with open(other, 'w') as f: + f.write('CONFIG_KUNIT_TEST=m') + pass + + tree = kunit_kernel.LinuxSourceTree('', kunitconfig_paths=[dir, other]) + self.assertTrue(want_kconfig.is_subset_of(tree._kconfig), msg=tree._kconfig) + + + def test_multiple_kunitconfig_invalid(self): + with tempfile.TemporaryDirectory('') as dir: + other = os.path.join(dir, 'otherkunitconfig') + with open(os.path.join(dir, '.kunitconfig'), 'w') as f: + f.write('CONFIG_KUNIT=y') + with open(other, 'w') as f: + f.write('CONFIG_KUNIT=m') + + with self.assertRaisesRegex(kunit_kernel.ConfigError, '(?s)Multiple values.*CONFIG_KUNIT'): + kunit_kernel.LinuxSourceTree('', kunitconfig_paths=[dir, other]) + def test_kconfig_add(self): + want_kconfig = kunit_config.Kconfig() + want_kconfig.add_entry('NOT_REAL', 'y') + tree = kunit_kernel.LinuxSourceTree('', kconfig_add=['CONFIG_NOT_REAL=y']) - self.assertIn(kunit_config.KconfigEntry('NOT_REAL', 'y'), tree._kconfig.entries()) + self.assertTrue(want_kconfig.is_subset_of(tree._kconfig), msg=tree._kconfig) def test_invalid_arch(self): with self.assertRaisesRegex(kunit_kernel.ConfigError, 'not a valid arch, options are.*x86_64'): @@ -393,7 +413,7 @@ class LinuxSourceTreeTest(unittest.TestCase): return subprocess.Popen(['echo "hi\nbye"'], shell=True, text=True, stdout=subprocess.PIPE) with tempfile.TemporaryDirectory('') as build_dir: - tree = kunit_kernel.LinuxSourceTree(build_dir, load_config=False) + tree = kunit_kernel.LinuxSourceTree(build_dir) mock.patch.object(tree._ops, 'start', side_effect=fake_start).start() with self.assertRaises(ValueError): @@ -410,6 +430,10 @@ class LinuxSourceTreeTest(unittest.TestCase): f.write('CONFIG_KUNIT=y') tree = kunit_kernel.LinuxSourceTree(build_dir) + # Stub out the source tree operations, so we don't have + # the defaults for any given architecture get in the + # way. + tree._ops = kunit_kernel.LinuxSourceTreeOperations('none', None) mock_build_config = mock.patch.object(tree, 'build_config').start() # Should generate the .config @@ -427,6 +451,10 @@ class LinuxSourceTreeTest(unittest.TestCase): f.write('CONFIG_KUNIT=y\nCONFIG_KUNIT_TEST=y') tree = kunit_kernel.LinuxSourceTree(build_dir) + # Stub out the source tree operations, so we don't have + # the defaults for any given architecture get in the + # way. + tree._ops = kunit_kernel.LinuxSourceTreeOperations('none', None) mock_build_config = mock.patch.object(tree, 'build_config').start() self.assertTrue(tree.build_reconfig(build_dir, make_options=[])) @@ -443,6 +471,10 @@ class LinuxSourceTreeTest(unittest.TestCase): f.write('CONFIG_KUNIT=y\nCONFIG_KUNIT_TEST=y') tree = kunit_kernel.LinuxSourceTree(build_dir) + # Stub out the source tree operations, so we don't have + # the defaults for any given architecture get in the + # way. + tree._ops = kunit_kernel.LinuxSourceTreeOperations('none', None) mock_build_config = mock.patch.object(tree, 'build_config').start() # ... so we should trigger a call to build_config() @@ -500,27 +532,28 @@ class KUnitMainTest(unittest.TestCase): with open(path) as file: all_passed_log = file.readlines() - self.print_mock = mock.patch('builtins.print').start() + self.print_mock = mock.patch('kunit_printer.Printer.print').start() self.addCleanup(mock.patch.stopall) - self.linux_source_mock = mock.Mock() - self.linux_source_mock.build_reconfig = mock.Mock(return_value=True) - self.linux_source_mock.build_kernel = mock.Mock(return_value=True) - self.linux_source_mock.run_kernel = mock.Mock(return_value=all_passed_log) + self.mock_linux_init = mock.patch.object(kunit_kernel, 'LinuxSourceTree').start() + self.linux_source_mock = self.mock_linux_init.return_value + self.linux_source_mock.build_reconfig.return_value = True + self.linux_source_mock.build_kernel.return_value = True + self.linux_source_mock.run_kernel.return_value = all_passed_log def test_config_passes_args_pass(self): - kunit.main(['config', '--build_dir=.kunit'], self.linux_source_mock) + kunit.main(['config', '--build_dir=.kunit']) self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) self.assertEqual(self.linux_source_mock.run_kernel.call_count, 0) def test_build_passes_args_pass(self): - kunit.main(['build'], self.linux_source_mock) + kunit.main(['build']) self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) self.linux_source_mock.build_kernel.assert_called_once_with(False, kunit.get_default_jobs(), '.kunit', None) self.assertEqual(self.linux_source_mock.run_kernel.call_count, 0) def test_exec_passes_args_pass(self): - kunit.main(['exec'], self.linux_source_mock) + kunit.main(['exec']) self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 0) self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1) self.linux_source_mock.run_kernel.assert_called_once_with( @@ -528,7 +561,7 @@ class KUnitMainTest(unittest.TestCase): self.print_mock.assert_any_call(StrContains('Testing complete.')) def test_run_passes_args_pass(self): - kunit.main(['run'], self.linux_source_mock) + kunit.main(['run']) self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1) self.linux_source_mock.run_kernel.assert_called_once_with( @@ -538,13 +571,13 @@ class KUnitMainTest(unittest.TestCase): def test_exec_passes_args_fail(self): self.linux_source_mock.run_kernel = mock.Mock(return_value=[]) with self.assertRaises(SystemExit) as e: - kunit.main(['exec'], self.linux_source_mock) + kunit.main(['exec']) self.assertEqual(e.exception.code, 1) def test_run_passes_args_fail(self): self.linux_source_mock.run_kernel = mock.Mock(return_value=[]) with self.assertRaises(SystemExit) as e: - kunit.main(['run'], self.linux_source_mock) + kunit.main(['run']) self.assertEqual(e.exception.code, 1) self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1) @@ -553,7 +586,7 @@ class KUnitMainTest(unittest.TestCase): def test_exec_no_tests(self): self.linux_source_mock.run_kernel = mock.Mock(return_value=['TAP version 14', '1..0']) with self.assertRaises(SystemExit) as e: - kunit.main(['run'], self.linux_source_mock) + kunit.main(['run']) self.assertEqual(e.exception.code, 1) self.linux_source_mock.run_kernel.assert_called_once_with( args=None, build_dir='.kunit', filter_glob='', timeout=300) @@ -561,7 +594,7 @@ class KUnitMainTest(unittest.TestCase): def test_exec_raw_output(self): self.linux_source_mock.run_kernel = mock.Mock(return_value=[]) - kunit.main(['exec', '--raw_output'], self.linux_source_mock) + kunit.main(['exec', '--raw_output']) self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1) for call in self.print_mock.call_args_list: self.assertNotEqual(call, mock.call(StrContains('Testing complete.'))) @@ -569,7 +602,7 @@ class KUnitMainTest(unittest.TestCase): def test_run_raw_output(self): self.linux_source_mock.run_kernel = mock.Mock(return_value=[]) - kunit.main(['run', '--raw_output'], self.linux_source_mock) + kunit.main(['run', '--raw_output']) self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1) for call in self.print_mock.call_args_list: @@ -578,7 +611,7 @@ class KUnitMainTest(unittest.TestCase): def test_run_raw_output_kunit(self): self.linux_source_mock.run_kernel = mock.Mock(return_value=[]) - kunit.main(['run', '--raw_output=kunit'], self.linux_source_mock) + kunit.main(['run', '--raw_output=kunit']) self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1) for call in self.print_mock.call_args_list: @@ -588,27 +621,27 @@ class KUnitMainTest(unittest.TestCase): def test_run_raw_output_invalid(self): self.linux_source_mock.run_kernel = mock.Mock(return_value=[]) with self.assertRaises(SystemExit) as e: - kunit.main(['run', '--raw_output=invalid'], self.linux_source_mock) + kunit.main(['run', '--raw_output=invalid']) self.assertNotEqual(e.exception.code, 0) def test_run_raw_output_does_not_take_positional_args(self): # --raw_output is a string flag, but we don't want it to consume # any positional arguments, only ones after an '=' self.linux_source_mock.run_kernel = mock.Mock(return_value=[]) - kunit.main(['run', '--raw_output', 'filter_glob'], self.linux_source_mock) + kunit.main(['run', '--raw_output', 'filter_glob']) self.linux_source_mock.run_kernel.assert_called_once_with( args=None, build_dir='.kunit', filter_glob='filter_glob', timeout=300) def test_exec_timeout(self): timeout = 3453 - kunit.main(['exec', '--timeout', str(timeout)], self.linux_source_mock) + kunit.main(['exec', '--timeout', str(timeout)]) self.linux_source_mock.run_kernel.assert_called_once_with( args=None, build_dir='.kunit', filter_glob='', timeout=timeout) self.print_mock.assert_any_call(StrContains('Testing complete.')) def test_run_timeout(self): timeout = 3453 - kunit.main(['run', '--timeout', str(timeout)], self.linux_source_mock) + kunit.main(['run', '--timeout', str(timeout)]) self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) self.linux_source_mock.run_kernel.assert_called_once_with( args=None, build_dir='.kunit', filter_glob='', timeout=timeout) @@ -616,7 +649,7 @@ class KUnitMainTest(unittest.TestCase): def test_run_builddir(self): build_dir = '.kunit' - kunit.main(['run', '--build_dir=.kunit'], self.linux_source_mock) + kunit.main(['run', '--build_dir=.kunit']) self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) self.linux_source_mock.run_kernel.assert_called_once_with( args=None, build_dir=build_dir, filter_glob='', timeout=300) @@ -624,60 +657,81 @@ class KUnitMainTest(unittest.TestCase): def test_config_builddir(self): build_dir = '.kunit' - kunit.main(['config', '--build_dir', build_dir], self.linux_source_mock) + kunit.main(['config', '--build_dir', build_dir]) self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) def test_build_builddir(self): build_dir = '.kunit' jobs = kunit.get_default_jobs() - kunit.main(['build', '--build_dir', build_dir], self.linux_source_mock) + kunit.main(['build', '--build_dir', build_dir]) self.linux_source_mock.build_kernel.assert_called_once_with(False, jobs, build_dir, None) def test_exec_builddir(self): build_dir = '.kunit' - kunit.main(['exec', '--build_dir', build_dir], self.linux_source_mock) + kunit.main(['exec', '--build_dir', build_dir]) self.linux_source_mock.run_kernel.assert_called_once_with( args=None, build_dir=build_dir, filter_glob='', timeout=300) self.print_mock.assert_any_call(StrContains('Testing complete.')) - @mock.patch.object(kunit_kernel, 'LinuxSourceTree') - def test_run_kunitconfig(self, mock_linux_init): - mock_linux_init.return_value = self.linux_source_mock + def test_run_kunitconfig(self): kunit.main(['run', '--kunitconfig=mykunitconfig']) # Just verify that we parsed and initialized it correctly here. - mock_linux_init.assert_called_once_with('.kunit', - kunitconfig_path='mykunitconfig', - kconfig_add=None, - arch='um', - cross_compile=None, - qemu_config_path=None) + self.mock_linux_init.assert_called_once_with('.kunit', + kunitconfig_paths=['mykunitconfig'], + kconfig_add=None, + arch='um', + cross_compile=None, + qemu_config_path=None, + extra_qemu_args=[]) + + def test_config_kunitconfig(self): + kunit.main(['config', '--kunitconfig=mykunitconfig']) + # Just verify that we parsed and initialized it correctly here. + self.mock_linux_init.assert_called_once_with('.kunit', + kunitconfig_paths=['mykunitconfig'], + kconfig_add=None, + arch='um', + cross_compile=None, + qemu_config_path=None, + extra_qemu_args=[]) @mock.patch.object(kunit_kernel, 'LinuxSourceTree') - def test_config_kunitconfig(self, mock_linux_init): + def test_run_multiple_kunitconfig(self, mock_linux_init): mock_linux_init.return_value = self.linux_source_mock - kunit.main(['config', '--kunitconfig=mykunitconfig']) + kunit.main(['run', '--kunitconfig=mykunitconfig', '--kunitconfig=other']) # Just verify that we parsed and initialized it correctly here. mock_linux_init.assert_called_once_with('.kunit', - kunitconfig_path='mykunitconfig', + kunitconfig_paths=['mykunitconfig', 'other'], kconfig_add=None, arch='um', cross_compile=None, - qemu_config_path=None) + qemu_config_path=None, + extra_qemu_args=[]) - @mock.patch.object(kunit_kernel, 'LinuxSourceTree') - def test_run_kconfig_add(self, mock_linux_init): - mock_linux_init.return_value = self.linux_source_mock + def test_run_kconfig_add(self): kunit.main(['run', '--kconfig_add=CONFIG_KASAN=y', '--kconfig_add=CONFIG_KCSAN=y']) # Just verify that we parsed and initialized it correctly here. - mock_linux_init.assert_called_once_with('.kunit', - kunitconfig_path=None, - kconfig_add=['CONFIG_KASAN=y', 'CONFIG_KCSAN=y'], - arch='um', - cross_compile=None, - qemu_config_path=None) + self.mock_linux_init.assert_called_once_with('.kunit', + kunitconfig_paths=None, + kconfig_add=['CONFIG_KASAN=y', 'CONFIG_KCSAN=y'], + arch='um', + cross_compile=None, + qemu_config_path=None, + extra_qemu_args=[]) + + def test_run_qemu_args(self): + kunit.main(['run', '--arch=x86_64', '--qemu_args', '-m 2048']) + # Just verify that we parsed and initialized it correctly here. + self.mock_linux_init.assert_called_once_with('.kunit', + kunitconfig_paths=None, + kconfig_add=None, + arch='x86_64', + cross_compile=None, + qemu_config_path=None, + extra_qemu_args=['-m', '2048']) def test_run_kernel_args(self): - kunit.main(['run', '--kernel_args=a=1', '--kernel_args=b=2'], self.linux_source_mock) + kunit.main(['run', '--kernel_args=a=1', '--kernel_args=b=2']) self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) self.linux_source_mock.run_kernel.assert_called_once_with( args=['a=1','b=2'], build_dir='.kunit', filter_glob='', timeout=300) @@ -699,7 +753,7 @@ class KUnitMainTest(unittest.TestCase): @mock.patch.object(kunit, '_list_tests') def test_run_isolated_by_suite(self, mock_tests): mock_tests.return_value = ['suite.test1', 'suite.test2', 'suite2.test1'] - kunit.main(['exec', '--run_isolated=suite', 'suite*.test*'], self.linux_source_mock) + kunit.main(['exec', '--run_isolated=suite', 'suite*.test*']) # Should respect the user's filter glob when listing tests. mock_tests.assert_called_once_with(mock.ANY, @@ -712,7 +766,7 @@ class KUnitMainTest(unittest.TestCase): @mock.patch.object(kunit, '_list_tests') def test_run_isolated_by_test(self, mock_tests): mock_tests.return_value = ['suite.test1', 'suite.test2', 'suite2.test1'] - kunit.main(['exec', '--run_isolated=test', 'suite*'], self.linux_source_mock) + kunit.main(['exec', '--run_isolated=test', 'suite*']) # Should respect the user's filter glob when listing tests. mock_tests.assert_called_once_with(mock.ANY, diff --git a/tools/testing/memblock/Makefile b/tools/testing/memblock/Makefile index a698e24b35e7..246f7ac8489b 100644 --- a/tools/testing/memblock/Makefile +++ b/tools/testing/memblock/Makefile @@ -45,9 +45,8 @@ help: @echo ' clean - Remove generated files and symlinks in the directory' @echo '' @echo 'Configuration:' + @echo ' make MEMBLOCK_DEBUG=1 - enable memblock_dbg() messages' @echo ' make NUMA=1 - simulate enabled NUMA' - @echo ' make MOVABLE_NODE=1 - override `movable_node_is_enabled`' - @echo ' definition to simulate movable NUMA nodes' @echo ' make 32BIT_PHYS_ADDR_T=1 - Use 32 bit physical addresses' vpath %.c ../../lib diff --git a/tools/testing/memblock/README b/tools/testing/memblock/README index ca6afcff013a..7ca437d81806 100644 --- a/tools/testing/memblock/README +++ b/tools/testing/memblock/README @@ -33,12 +33,23 @@ To run the tests, build the main target and run it: $ make && ./main -A successful run produces no output. It is also possible to override different -configuration parameters. For example, to simulate enabled NUMA, use: +A successful run produces no output. It is possible to control the behavior +by passing options from command line. For example, to include verbose output, +append the `-v` options when you run the tests: + +$ ./main -v + +This will print information about which functions are being tested and the +number of test cases that passed. + +For the full list of options from command line, see `./main --help`. + +It is also possible to override different configuration parameters to change +the test functions. For example, to simulate enabled NUMA, use: $ make NUMA=1 -For the full list of options, see `make help`. +For the full list of build options, see `make help`. Project structure ================= diff --git a/tools/testing/memblock/TODO b/tools/testing/memblock/TODO index cd1a30d5acc9..33044c634ea7 100644 --- a/tools/testing/memblock/TODO +++ b/tools/testing/memblock/TODO @@ -1,25 +1,17 @@ TODO ===== -1. Add verbose output (e.g., what is being tested and how many tests cases are - passing) - -2. Add flags to Makefile: - + verbosity level - + enable memblock_dbg() messages (i.e. pass "-D CONFIG_DEBUG_MEMORY_INIT" - flag) - -3. Add tests trying to memblock_add() or memblock_reserve() 129th region. +1. Add tests trying to memblock_add() or memblock_reserve() 129th region. This will trigger memblock_double_array(), make sure it succeeds. *Important:* These tests require valid memory ranges, use dummy physical memory block from common.c to implement them. It is also very likely that the current MEM_SIZE won't be enough for these test cases. Use realloc to adjust the size accordingly. -4. Add test cases using this functions (implement them for both directions): +2. Add test cases using this functions (implement them for both directions): + memblock_alloc_raw() + memblock_alloc_exact_nid_raw() + memblock_alloc_try_nid_raw() -5. Add tests for memblock_alloc_node() to check if the correct NUMA node is set +3. Add tests for memblock_alloc_node() to check if the correct NUMA node is set for the new region diff --git a/tools/testing/memblock/internal.h b/tools/testing/memblock/internal.h index 94b52a8718b5..fdb7f5db7308 100644 --- a/tools/testing/memblock/internal.h +++ b/tools/testing/memblock/internal.h @@ -2,6 +2,17 @@ #ifndef _MM_INTERNAL_H #define _MM_INTERNAL_H +/* + * Enable memblock_dbg() messages + */ +#ifdef MEMBLOCK_DEBUG +static int memblock_debug = 1; +#endif + +#define pr_warn_ratelimited(fmt, ...) printf(fmt, ##__VA_ARGS__) + +bool mirrored_kernelcore = false; + struct page {}; void memblock_free_pages(struct page *page, unsigned long pfn, diff --git a/tools/testing/memblock/linux/kmemleak.h b/tools/testing/memblock/linux/kmemleak.h index 462f8c5e8aa0..5fed13bb9ec4 100644 --- a/tools/testing/memblock/linux/kmemleak.h +++ b/tools/testing/memblock/linux/kmemleak.h @@ -7,7 +7,7 @@ static inline void kmemleak_free_part_phys(phys_addr_t phys, size_t size) } static inline void kmemleak_alloc_phys(phys_addr_t phys, size_t size, - int min_count, gfp_t gfp) + gfp_t gfp) { } diff --git a/tools/testing/memblock/linux/memory_hotplug.h b/tools/testing/memblock/linux/memory_hotplug.h index 47988765a219..dabe2c556858 100644 --- a/tools/testing/memblock/linux/memory_hotplug.h +++ b/tools/testing/memblock/linux/memory_hotplug.h @@ -7,13 +7,11 @@ #include <linux/cache.h> #include <linux/types.h> +extern bool movable_node_enabled; + static inline bool movable_node_is_enabled(void) { -#ifdef MOVABLE_NODE - return true; -#else - return false; -#endif + return movable_node_enabled; } #endif diff --git a/tools/testing/memblock/main.c b/tools/testing/memblock/main.c index fb183c9e76d1..4ca1024342b1 100644 --- a/tools/testing/memblock/main.c +++ b/tools/testing/memblock/main.c @@ -3,9 +3,11 @@ #include "tests/alloc_api.h" #include "tests/alloc_helpers_api.h" #include "tests/alloc_nid_api.h" +#include "tests/common.h" int main(int argc, char **argv) { + parse_args(argc, argv); memblock_basic_checks(); memblock_alloc_checks(); memblock_alloc_helpers_checks(); diff --git a/tools/testing/memblock/scripts/Makefile.include b/tools/testing/memblock/scripts/Makefile.include index 641569ccb7b0..aa6d82d56a23 100644 --- a/tools/testing/memblock/scripts/Makefile.include +++ b/tools/testing/memblock/scripts/Makefile.include @@ -6,14 +6,14 @@ ifeq ($(NUMA), 1) CFLAGS += -D CONFIG_NUMA endif -# Simulate movable NUMA memory regions -ifeq ($(MOVABLE_NODE), 1) - CFLAGS += -D MOVABLE_NODE -endif - # Use 32 bit physical addresses. # Remember to install 32-bit version of dependencies. ifeq ($(32BIT_PHYS_ADDR_T), 1) CFLAGS += -m32 -U CONFIG_PHYS_ADDR_T_64BIT LDFLAGS += -m32 endif + +# Enable memblock_dbg() messages +ifeq ($(MEMBLOCK_DEBUG), 1) + CFLAGS += -D MEMBLOCK_DEBUG +endif diff --git a/tools/testing/memblock/tests/alloc_api.c b/tools/testing/memblock/tests/alloc_api.c index d1aa7e15c18d..a14f38eb8a89 100644 --- a/tools/testing/memblock/tests/alloc_api.c +++ b/tools/testing/memblock/tests/alloc_api.c @@ -10,6 +10,8 @@ static int alloc_top_down_simple_check(void) struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; + PREFIX_PUSH(); + phys_addr_t size = SZ_2; phys_addr_t expected_start; @@ -19,12 +21,14 @@ static int alloc_top_down_simple_check(void) allocated_ptr = memblock_alloc(size, SMP_CACHE_BYTES); - assert(allocated_ptr); - assert(rgn->size == size); - assert(rgn->base == expected_start); + ASSERT_NE(allocated_ptr, NULL); + ASSERT_EQ(rgn->size, size); + ASSERT_EQ(rgn->base, expected_start); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); - assert(memblock.reserved.cnt == 1); - assert(memblock.reserved.total_size == size); + test_pass_pop(); return 0; } @@ -55,6 +59,8 @@ static int alloc_top_down_disjoint_check(void) struct region r1; void *allocated_ptr = NULL; + PREFIX_PUSH(); + phys_addr_t r2_size = SZ_16; /* Use custom alignment */ phys_addr_t alignment = SMP_CACHE_BYTES * 2; @@ -73,15 +79,17 @@ static int alloc_top_down_disjoint_check(void) allocated_ptr = memblock_alloc(r2_size, alignment); - assert(allocated_ptr); - assert(rgn1->size == r1.size); - assert(rgn1->base == r1.base); + ASSERT_NE(allocated_ptr, NULL); + ASSERT_EQ(rgn1->size, r1.size); + ASSERT_EQ(rgn1->base, r1.base); - assert(rgn2->size == r2_size); - assert(rgn2->base == expected_start); + ASSERT_EQ(rgn2->size, r2_size); + ASSERT_EQ(rgn2->base, expected_start); - assert(memblock.reserved.cnt == 2); - assert(memblock.reserved.total_size == total_size); + ASSERT_EQ(memblock.reserved.cnt, 2); + ASSERT_EQ(memblock.reserved.total_size, total_size); + + test_pass_pop(); return 0; } @@ -101,6 +109,8 @@ static int alloc_top_down_before_check(void) struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; + PREFIX_PUSH(); + /* * The first region ends at the aligned address to test region merging */ @@ -114,12 +124,14 @@ static int alloc_top_down_before_check(void) allocated_ptr = memblock_alloc(r2_size, SMP_CACHE_BYTES); - assert(allocated_ptr); - assert(rgn->size == total_size); - assert(rgn->base == memblock_end_of_DRAM() - total_size); + ASSERT_NE(allocated_ptr, NULL); + ASSERT_EQ(rgn->size, total_size); + ASSERT_EQ(rgn->base, memblock_end_of_DRAM() - total_size); - assert(memblock.reserved.cnt == 1); - assert(memblock.reserved.total_size == total_size); + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, total_size); + + test_pass_pop(); return 0; } @@ -141,6 +153,8 @@ static int alloc_top_down_after_check(void) struct region r1; void *allocated_ptr = NULL; + PREFIX_PUSH(); + phys_addr_t r2_size = SZ_512; phys_addr_t total_size; @@ -158,12 +172,14 @@ static int alloc_top_down_after_check(void) allocated_ptr = memblock_alloc(r2_size, SMP_CACHE_BYTES); - assert(allocated_ptr); - assert(rgn->size == total_size); - assert(rgn->base == r1.base - r2_size); + ASSERT_NE(allocated_ptr, NULL); + ASSERT_EQ(rgn->size, total_size); + ASSERT_EQ(rgn->base, r1.base - r2_size); - assert(memblock.reserved.cnt == 1); - assert(memblock.reserved.total_size == total_size); + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, total_size); + + test_pass_pop(); return 0; } @@ -186,6 +202,8 @@ static int alloc_top_down_second_fit_check(void) struct region r1, r2; void *allocated_ptr = NULL; + PREFIX_PUSH(); + phys_addr_t r3_size = SZ_1K; phys_addr_t total_size; @@ -204,12 +222,14 @@ static int alloc_top_down_second_fit_check(void) allocated_ptr = memblock_alloc(r3_size, SMP_CACHE_BYTES); - assert(allocated_ptr); - assert(rgn->size == r2.size + r3_size); - assert(rgn->base == r2.base - r3_size); + ASSERT_NE(allocated_ptr, NULL); + ASSERT_EQ(rgn->size, r2.size + r3_size); + ASSERT_EQ(rgn->base, r2.base - r3_size); - assert(memblock.reserved.cnt == 2); - assert(memblock.reserved.total_size == total_size); + ASSERT_EQ(memblock.reserved.cnt, 2); + ASSERT_EQ(memblock.reserved.total_size, total_size); + + test_pass_pop(); return 0; } @@ -231,6 +251,8 @@ static int alloc_in_between_generic_check(void) struct region r1, r2; void *allocated_ptr = NULL; + PREFIX_PUSH(); + phys_addr_t gap_size = SMP_CACHE_BYTES; phys_addr_t r3_size = SZ_64; /* @@ -254,12 +276,14 @@ static int alloc_in_between_generic_check(void) allocated_ptr = memblock_alloc(r3_size, SMP_CACHE_BYTES); - assert(allocated_ptr); - assert(rgn->size == total_size); - assert(rgn->base == r1.base - r2.size - r3_size); + ASSERT_NE(allocated_ptr, NULL); + ASSERT_EQ(rgn->size, total_size); + ASSERT_EQ(rgn->base, r1.base - r2.size - r3_size); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, total_size); - assert(memblock.reserved.cnt == 1); - assert(memblock.reserved.total_size == total_size); + test_pass_pop(); return 0; } @@ -281,6 +305,8 @@ static int alloc_small_gaps_generic_check(void) { void *allocated_ptr = NULL; + PREFIX_PUSH(); + phys_addr_t region_size = SZ_1K; phys_addr_t gap_size = SZ_256; phys_addr_t region_end; @@ -296,7 +322,9 @@ static int alloc_small_gaps_generic_check(void) allocated_ptr = memblock_alloc(region_size, SMP_CACHE_BYTES); - assert(!allocated_ptr); + ASSERT_EQ(allocated_ptr, NULL); + + test_pass_pop(); return 0; } @@ -309,6 +337,8 @@ static int alloc_all_reserved_generic_check(void) { void *allocated_ptr = NULL; + PREFIX_PUSH(); + setup_memblock(); /* Simulate full memory */ @@ -316,7 +346,9 @@ static int alloc_all_reserved_generic_check(void) allocated_ptr = memblock_alloc(SZ_256, SMP_CACHE_BYTES); - assert(!allocated_ptr); + ASSERT_EQ(allocated_ptr, NULL); + + test_pass_pop(); return 0; } @@ -338,6 +370,8 @@ static int alloc_no_space_generic_check(void) { void *allocated_ptr = NULL; + PREFIX_PUSH(); + setup_memblock(); phys_addr_t available_size = SZ_256; @@ -348,7 +382,9 @@ static int alloc_no_space_generic_check(void) allocated_ptr = memblock_alloc(SZ_1K, SMP_CACHE_BYTES); - assert(!allocated_ptr); + ASSERT_EQ(allocated_ptr, NULL); + + test_pass_pop(); return 0; } @@ -369,6 +405,8 @@ static int alloc_limited_space_generic_check(void) struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; + PREFIX_PUSH(); + phys_addr_t available_size = SZ_256; phys_addr_t reserved_size = MEM_SIZE - available_size; @@ -379,12 +417,14 @@ static int alloc_limited_space_generic_check(void) allocated_ptr = memblock_alloc(available_size, SMP_CACHE_BYTES); - assert(allocated_ptr); - assert(rgn->size == MEM_SIZE); - assert(rgn->base == memblock_start_of_DRAM()); + ASSERT_NE(allocated_ptr, NULL); + ASSERT_EQ(rgn->size, MEM_SIZE); + ASSERT_EQ(rgn->base, memblock_start_of_DRAM()); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, MEM_SIZE); - assert(memblock.reserved.cnt == 1); - assert(memblock.reserved.total_size == MEM_SIZE); + test_pass_pop(); return 0; } @@ -399,14 +439,18 @@ static int alloc_no_memory_generic_check(void) struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; + PREFIX_PUSH(); + reset_memblock_regions(); allocated_ptr = memblock_alloc(SZ_1K, SMP_CACHE_BYTES); - assert(!allocated_ptr); - assert(rgn->size == 0); - assert(rgn->base == 0); - assert(memblock.reserved.total_size == 0); + ASSERT_EQ(allocated_ptr, NULL); + ASSERT_EQ(rgn->size, 0); + ASSERT_EQ(rgn->base, 0); + ASSERT_EQ(memblock.reserved.total_size, 0); + + test_pass_pop(); return 0; } @@ -421,16 +465,20 @@ static int alloc_bottom_up_simple_check(void) struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; + PREFIX_PUSH(); + setup_memblock(); allocated_ptr = memblock_alloc(SZ_2, SMP_CACHE_BYTES); - assert(allocated_ptr); - assert(rgn->size == SZ_2); - assert(rgn->base == memblock_start_of_DRAM()); + ASSERT_NE(allocated_ptr, NULL); + ASSERT_EQ(rgn->size, SZ_2); + ASSERT_EQ(rgn->base, memblock_start_of_DRAM()); - assert(memblock.reserved.cnt == 1); - assert(memblock.reserved.total_size == SZ_2); + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, SZ_2); + + test_pass_pop(); return 0; } @@ -459,6 +507,8 @@ static int alloc_bottom_up_disjoint_check(void) struct region r1; void *allocated_ptr = NULL; + PREFIX_PUSH(); + phys_addr_t r2_size = SZ_16; /* Use custom alignment */ phys_addr_t alignment = SMP_CACHE_BYTES * 2; @@ -477,16 +527,18 @@ static int alloc_bottom_up_disjoint_check(void) allocated_ptr = memblock_alloc(r2_size, alignment); - assert(allocated_ptr); + ASSERT_NE(allocated_ptr, NULL); - assert(rgn1->size == r1.size); - assert(rgn1->base == r1.base); + ASSERT_EQ(rgn1->size, r1.size); + ASSERT_EQ(rgn1->base, r1.base); - assert(rgn2->size == r2_size); - assert(rgn2->base == expected_start); + ASSERT_EQ(rgn2->size, r2_size); + ASSERT_EQ(rgn2->base, expected_start); - assert(memblock.reserved.cnt == 2); - assert(memblock.reserved.total_size == total_size); + ASSERT_EQ(memblock.reserved.cnt, 2); + ASSERT_EQ(memblock.reserved.total_size, total_size); + + test_pass_pop(); return 0; } @@ -506,6 +558,8 @@ static int alloc_bottom_up_before_check(void) struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; + PREFIX_PUSH(); + phys_addr_t r1_size = SZ_512; phys_addr_t r2_size = SZ_128; phys_addr_t total_size = r1_size + r2_size; @@ -516,12 +570,14 @@ static int alloc_bottom_up_before_check(void) allocated_ptr = memblock_alloc(r1_size, SMP_CACHE_BYTES); - assert(allocated_ptr); - assert(rgn->size == total_size); - assert(rgn->base == memblock_start_of_DRAM()); + ASSERT_NE(allocated_ptr, NULL); + ASSERT_EQ(rgn->size, total_size); + ASSERT_EQ(rgn->base, memblock_start_of_DRAM()); - assert(memblock.reserved.cnt == 1); - assert(memblock.reserved.total_size == total_size); + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, total_size); + + test_pass_pop(); return 0; } @@ -542,6 +598,8 @@ static int alloc_bottom_up_after_check(void) struct region r1; void *allocated_ptr = NULL; + PREFIX_PUSH(); + phys_addr_t r2_size = SZ_512; phys_addr_t total_size; @@ -559,12 +617,14 @@ static int alloc_bottom_up_after_check(void) allocated_ptr = memblock_alloc(r2_size, SMP_CACHE_BYTES); - assert(allocated_ptr); - assert(rgn->size == total_size); - assert(rgn->base == r1.base); + ASSERT_NE(allocated_ptr, NULL); + ASSERT_EQ(rgn->size, total_size); + ASSERT_EQ(rgn->base, r1.base); - assert(memblock.reserved.cnt == 1); - assert(memblock.reserved.total_size == total_size); + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, total_size); + + test_pass_pop(); return 0; } @@ -588,6 +648,8 @@ static int alloc_bottom_up_second_fit_check(void) struct region r1, r2; void *allocated_ptr = NULL; + PREFIX_PUSH(); + phys_addr_t r3_size = SZ_1K; phys_addr_t total_size; @@ -606,12 +668,14 @@ static int alloc_bottom_up_second_fit_check(void) allocated_ptr = memblock_alloc(r3_size, SMP_CACHE_BYTES); - assert(allocated_ptr); - assert(rgn->size == r2.size + r3_size); - assert(rgn->base == r2.base); + ASSERT_NE(allocated_ptr, NULL); + ASSERT_EQ(rgn->size, r2.size + r3_size); + ASSERT_EQ(rgn->base, r2.base); - assert(memblock.reserved.cnt == 2); - assert(memblock.reserved.total_size == total_size); + ASSERT_EQ(memblock.reserved.cnt, 2); + ASSERT_EQ(memblock.reserved.total_size, total_size); + + test_pass_pop(); return 0; } @@ -619,6 +683,7 @@ static int alloc_bottom_up_second_fit_check(void) /* Test case wrappers */ static int alloc_simple_check(void) { + test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); alloc_top_down_simple_check(); memblock_set_bottom_up(true); @@ -629,6 +694,7 @@ static int alloc_simple_check(void) static int alloc_disjoint_check(void) { + test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); alloc_top_down_disjoint_check(); memblock_set_bottom_up(true); @@ -639,6 +705,7 @@ static int alloc_disjoint_check(void) static int alloc_before_check(void) { + test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); alloc_top_down_before_check(); memblock_set_bottom_up(true); @@ -649,6 +716,7 @@ static int alloc_before_check(void) static int alloc_after_check(void) { + test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); alloc_top_down_after_check(); memblock_set_bottom_up(true); @@ -659,6 +727,7 @@ static int alloc_after_check(void) static int alloc_in_between_check(void) { + test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); alloc_in_between_generic_check(); memblock_set_bottom_up(true); @@ -669,6 +738,7 @@ static int alloc_in_between_check(void) static int alloc_second_fit_check(void) { + test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); alloc_top_down_second_fit_check(); memblock_set_bottom_up(true); @@ -679,6 +749,7 @@ static int alloc_second_fit_check(void) static int alloc_small_gaps_check(void) { + test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); alloc_small_gaps_generic_check(); memblock_set_bottom_up(true); @@ -689,6 +760,7 @@ static int alloc_small_gaps_check(void) static int alloc_all_reserved_check(void) { + test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); alloc_all_reserved_generic_check(); memblock_set_bottom_up(true); @@ -699,6 +771,7 @@ static int alloc_all_reserved_check(void) static int alloc_no_space_check(void) { + test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); alloc_no_space_generic_check(); memblock_set_bottom_up(true); @@ -709,6 +782,7 @@ static int alloc_no_space_check(void) static int alloc_limited_space_check(void) { + test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); alloc_limited_space_generic_check(); memblock_set_bottom_up(true); @@ -719,6 +793,7 @@ static int alloc_limited_space_check(void) static int alloc_no_memory_check(void) { + test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); alloc_no_memory_generic_check(); memblock_set_bottom_up(true); @@ -729,6 +804,12 @@ static int alloc_no_memory_check(void) int memblock_alloc_checks(void) { + const char *func_testing = "memblock_alloc"; + + prefix_reset(); + prefix_push(func_testing); + test_print("Running %s tests...\n", func_testing); + reset_memblock_attributes(); dummy_physical_memory_init(); @@ -746,5 +827,7 @@ int memblock_alloc_checks(void) dummy_physical_memory_cleanup(); + prefix_pop(); + return 0; } diff --git a/tools/testing/memblock/tests/alloc_helpers_api.c b/tools/testing/memblock/tests/alloc_helpers_api.c index 963a966db461..1069b4bdd5fd 100644 --- a/tools/testing/memblock/tests/alloc_helpers_api.c +++ b/tools/testing/memblock/tests/alloc_helpers_api.c @@ -21,6 +21,8 @@ static int alloc_from_simple_generic_check(void) void *allocated_ptr = NULL; char *b; + PREFIX_PUSH(); + phys_addr_t size = SZ_16; phys_addr_t min_addr; @@ -31,14 +33,16 @@ static int alloc_from_simple_generic_check(void) allocated_ptr = memblock_alloc_from(size, SMP_CACHE_BYTES, min_addr); b = (char *)allocated_ptr; - assert(allocated_ptr); - assert(*b == 0); + ASSERT_NE(allocated_ptr, NULL); + ASSERT_EQ(*b, 0); + + ASSERT_EQ(rgn->size, size); + ASSERT_EQ(rgn->base, min_addr); - assert(rgn->size == size); - assert(rgn->base == min_addr); + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); - assert(memblock.reserved.cnt == 1); - assert(memblock.reserved.total_size == size); + test_pass_pop(); return 0; } @@ -64,6 +68,8 @@ static int alloc_from_misaligned_generic_check(void) void *allocated_ptr = NULL; char *b; + PREFIX_PUSH(); + phys_addr_t size = SZ_32; phys_addr_t min_addr; @@ -75,14 +81,16 @@ static int alloc_from_misaligned_generic_check(void) allocated_ptr = memblock_alloc_from(size, SMP_CACHE_BYTES, min_addr); b = (char *)allocated_ptr; - assert(allocated_ptr); - assert(*b == 0); + ASSERT_NE(allocated_ptr, NULL); + ASSERT_EQ(*b, 0); - assert(rgn->size == size); - assert(rgn->base == memblock_end_of_DRAM() - SMP_CACHE_BYTES); + ASSERT_EQ(rgn->size, size); + ASSERT_EQ(rgn->base, memblock_end_of_DRAM() - SMP_CACHE_BYTES); - assert(memblock.reserved.cnt == 1); - assert(memblock.reserved.total_size == size); + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); return 0; } @@ -110,6 +118,8 @@ static int alloc_from_top_down_high_addr_check(void) struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; + PREFIX_PUSH(); + phys_addr_t size = SZ_32; phys_addr_t min_addr; @@ -120,12 +130,14 @@ static int alloc_from_top_down_high_addr_check(void) allocated_ptr = memblock_alloc_from(size, SMP_CACHE_BYTES, min_addr); - assert(allocated_ptr); - assert(rgn->size == size); - assert(rgn->base == memblock_end_of_DRAM() - SMP_CACHE_BYTES); + ASSERT_NE(allocated_ptr, NULL); + ASSERT_EQ(rgn->size, size); + ASSERT_EQ(rgn->base, memblock_end_of_DRAM() - SMP_CACHE_BYTES); - assert(memblock.reserved.cnt == 1); - assert(memblock.reserved.total_size == size); + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); return 0; } @@ -151,6 +163,8 @@ static int alloc_from_top_down_no_space_above_check(void) struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; + PREFIX_PUSH(); + phys_addr_t r1_size = SZ_64; phys_addr_t r2_size = SZ_2; phys_addr_t total_size = r1_size + r2_size; @@ -165,12 +179,14 @@ static int alloc_from_top_down_no_space_above_check(void) allocated_ptr = memblock_alloc_from(r1_size, SMP_CACHE_BYTES, min_addr); - assert(allocated_ptr); - assert(rgn->base == min_addr - r1_size); - assert(rgn->size == total_size); + ASSERT_NE(allocated_ptr, NULL); + ASSERT_EQ(rgn->base, min_addr - r1_size); + ASSERT_EQ(rgn->size, total_size); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, total_size); - assert(memblock.reserved.cnt == 1); - assert(memblock.reserved.total_size == total_size); + test_pass_pop(); return 0; } @@ -186,6 +202,8 @@ static int alloc_from_top_down_min_addr_cap_check(void) struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; + PREFIX_PUSH(); + phys_addr_t r1_size = SZ_64; phys_addr_t min_addr; phys_addr_t start_addr; @@ -199,12 +217,14 @@ static int alloc_from_top_down_min_addr_cap_check(void) allocated_ptr = memblock_alloc_from(r1_size, SMP_CACHE_BYTES, min_addr); - assert(allocated_ptr); - assert(rgn->base == start_addr); - assert(rgn->size == MEM_SIZE); + ASSERT_NE(allocated_ptr, NULL); + ASSERT_EQ(rgn->base, start_addr); + ASSERT_EQ(rgn->size, MEM_SIZE); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, MEM_SIZE); - assert(memblock.reserved.cnt == 1); - assert(memblock.reserved.total_size == MEM_SIZE); + test_pass_pop(); return 0; } @@ -230,6 +250,8 @@ static int alloc_from_bottom_up_high_addr_check(void) struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; + PREFIX_PUSH(); + phys_addr_t size = SZ_32; phys_addr_t min_addr; @@ -240,12 +262,14 @@ static int alloc_from_bottom_up_high_addr_check(void) allocated_ptr = memblock_alloc_from(size, SMP_CACHE_BYTES, min_addr); - assert(allocated_ptr); - assert(rgn->size == size); - assert(rgn->base == memblock_start_of_DRAM()); + ASSERT_NE(allocated_ptr, NULL); + ASSERT_EQ(rgn->size, size); + ASSERT_EQ(rgn->base, memblock_start_of_DRAM()); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); - assert(memblock.reserved.cnt == 1); - assert(memblock.reserved.total_size == size); + test_pass_pop(); return 0; } @@ -270,6 +294,8 @@ static int alloc_from_bottom_up_no_space_above_check(void) struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; + PREFIX_PUSH(); + phys_addr_t r1_size = SZ_64; phys_addr_t min_addr; phys_addr_t r2_size; @@ -284,12 +310,14 @@ static int alloc_from_bottom_up_no_space_above_check(void) allocated_ptr = memblock_alloc_from(r1_size, SMP_CACHE_BYTES, min_addr); - assert(allocated_ptr); - assert(rgn->base == memblock_start_of_DRAM()); - assert(rgn->size == r1_size); + ASSERT_NE(allocated_ptr, NULL); + ASSERT_EQ(rgn->base, memblock_start_of_DRAM()); + ASSERT_EQ(rgn->size, r1_size); - assert(memblock.reserved.cnt == 2); - assert(memblock.reserved.total_size == r1_size + r2_size); + ASSERT_EQ(memblock.reserved.cnt, 2); + ASSERT_EQ(memblock.reserved.total_size, r1_size + r2_size); + + test_pass_pop(); return 0; } @@ -304,6 +332,8 @@ static int alloc_from_bottom_up_min_addr_cap_check(void) struct memblock_region *rgn = &memblock.reserved.regions[0]; void *allocated_ptr = NULL; + PREFIX_PUSH(); + phys_addr_t r1_size = SZ_64; phys_addr_t min_addr; phys_addr_t start_addr; @@ -315,12 +345,14 @@ static int alloc_from_bottom_up_min_addr_cap_check(void) allocated_ptr = memblock_alloc_from(r1_size, SMP_CACHE_BYTES, min_addr); - assert(allocated_ptr); - assert(rgn->base == start_addr); - assert(rgn->size == r1_size); + ASSERT_NE(allocated_ptr, NULL); + ASSERT_EQ(rgn->base, start_addr); + ASSERT_EQ(rgn->size, r1_size); - assert(memblock.reserved.cnt == 1); - assert(memblock.reserved.total_size == r1_size); + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, r1_size); + + test_pass_pop(); return 0; } @@ -328,6 +360,7 @@ static int alloc_from_bottom_up_min_addr_cap_check(void) /* Test case wrappers */ static int alloc_from_simple_check(void) { + test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); alloc_from_simple_generic_check(); memblock_set_bottom_up(true); @@ -338,6 +371,7 @@ static int alloc_from_simple_check(void) static int alloc_from_misaligned_check(void) { + test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); alloc_from_misaligned_generic_check(); memblock_set_bottom_up(true); @@ -348,6 +382,7 @@ static int alloc_from_misaligned_check(void) static int alloc_from_high_addr_check(void) { + test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); alloc_from_top_down_high_addr_check(); memblock_set_bottom_up(true); @@ -358,6 +393,7 @@ static int alloc_from_high_addr_check(void) static int alloc_from_no_space_above_check(void) { + test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); alloc_from_top_down_no_space_above_check(); memblock_set_bottom_up(true); @@ -368,6 +404,7 @@ static int alloc_from_no_space_above_check(void) static int alloc_from_min_addr_cap_check(void) { + test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); alloc_from_top_down_min_addr_cap_check(); memblock_set_bottom_up(true); @@ -378,6 +415,12 @@ static int alloc_from_min_addr_cap_check(void) int memblock_alloc_helpers_checks(void) { + const char *func_testing = "memblock_alloc_from"; + + prefix_reset(); + prefix_push(func_testing); + test_print("Running %s tests...\n", func_testing); + reset_memblock_attributes(); dummy_physical_memory_init(); @@ -389,5 +432,7 @@ int memblock_alloc_helpers_checks(void) dummy_physical_memory_cleanup(); + prefix_pop(); + return 0; } diff --git a/tools/testing/memblock/tests/alloc_nid_api.c b/tools/testing/memblock/tests/alloc_nid_api.c index 6390206e50e1..255fd514e9f5 100644 --- a/tools/testing/memblock/tests/alloc_nid_api.c +++ b/tools/testing/memblock/tests/alloc_nid_api.c @@ -21,6 +21,8 @@ static int alloc_try_nid_top_down_simple_check(void) void *allocated_ptr = NULL; char *b; + PREFIX_PUSH(); + phys_addr_t size = SZ_128; phys_addr_t min_addr; phys_addr_t max_addr; @@ -36,15 +38,17 @@ static int alloc_try_nid_top_down_simple_check(void) b = (char *)allocated_ptr; rgn_end = rgn->base + rgn->size; - assert(allocated_ptr); - assert(*b == 0); + ASSERT_NE(allocated_ptr, NULL); + ASSERT_EQ(*b, 0); + + ASSERT_EQ(rgn->size, size); + ASSERT_EQ(rgn->base, max_addr - size); + ASSERT_EQ(rgn_end, max_addr); - assert(rgn->size == size); - assert(rgn->base == max_addr - size); - assert(rgn_end == max_addr); + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); - assert(memblock.reserved.cnt == 1); - assert(memblock.reserved.total_size == size); + test_pass_pop(); return 0; } @@ -72,6 +76,8 @@ static int alloc_try_nid_top_down_end_misaligned_check(void) void *allocated_ptr = NULL; char *b; + PREFIX_PUSH(); + phys_addr_t size = SZ_128; phys_addr_t misalign = SZ_2; phys_addr_t min_addr; @@ -88,15 +94,17 @@ static int alloc_try_nid_top_down_end_misaligned_check(void) b = (char *)allocated_ptr; rgn_end = rgn->base + rgn->size; - assert(allocated_ptr); - assert(*b == 0); + ASSERT_NE(allocated_ptr, NULL); + ASSERT_EQ(*b, 0); - assert(rgn->size == size); - assert(rgn->base == max_addr - size - misalign); - assert(rgn_end < max_addr); + ASSERT_EQ(rgn->size, size); + ASSERT_EQ(rgn->base, max_addr - size - misalign); + ASSERT_LT(rgn_end, max_addr); - assert(memblock.reserved.cnt == 1); - assert(memblock.reserved.total_size == size); + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); return 0; } @@ -122,6 +130,8 @@ static int alloc_try_nid_exact_address_generic_check(void) void *allocated_ptr = NULL; char *b; + PREFIX_PUSH(); + phys_addr_t size = SZ_1K; phys_addr_t min_addr; phys_addr_t max_addr; @@ -137,15 +147,17 @@ static int alloc_try_nid_exact_address_generic_check(void) b = (char *)allocated_ptr; rgn_end = rgn->base + rgn->size; - assert(allocated_ptr); - assert(*b == 0); + ASSERT_NE(allocated_ptr, NULL); + ASSERT_EQ(*b, 0); + + ASSERT_EQ(rgn->size, size); + ASSERT_EQ(rgn->base, min_addr); + ASSERT_EQ(rgn_end, max_addr); - assert(rgn->size == size); - assert(rgn->base == min_addr); - assert(rgn_end == max_addr); + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); - assert(memblock.reserved.cnt == 1); - assert(memblock.reserved.total_size == size); + test_pass_pop(); return 0; } @@ -173,6 +185,8 @@ static int alloc_try_nid_top_down_narrow_range_check(void) void *allocated_ptr = NULL; char *b; + PREFIX_PUSH(); + phys_addr_t size = SZ_256; phys_addr_t min_addr; phys_addr_t max_addr; @@ -186,14 +200,16 @@ static int alloc_try_nid_top_down_narrow_range_check(void) min_addr, max_addr, NUMA_NO_NODE); b = (char *)allocated_ptr; - assert(allocated_ptr); - assert(*b == 0); + ASSERT_NE(allocated_ptr, NULL); + ASSERT_EQ(*b, 0); + + ASSERT_EQ(rgn->size, size); + ASSERT_EQ(rgn->base, max_addr - size); - assert(rgn->size == size); - assert(rgn->base == max_addr - size); + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); - assert(memblock.reserved.cnt == 1); - assert(memblock.reserved.total_size == size); + test_pass_pop(); return 0; } @@ -222,6 +238,8 @@ static int alloc_try_nid_low_max_generic_check(void) { void *allocated_ptr = NULL; + PREFIX_PUSH(); + phys_addr_t size = SZ_1K; phys_addr_t min_addr; phys_addr_t max_addr; @@ -234,7 +252,9 @@ static int alloc_try_nid_low_max_generic_check(void) allocated_ptr = memblock_alloc_try_nid(size, SMP_CACHE_BYTES, min_addr, max_addr, NUMA_NO_NODE); - assert(!allocated_ptr); + ASSERT_EQ(allocated_ptr, NULL); + + test_pass_pop(); return 0; } @@ -259,6 +279,8 @@ static int alloc_try_nid_min_reserved_generic_check(void) void *allocated_ptr = NULL; char *b; + PREFIX_PUSH(); + phys_addr_t r1_size = SZ_128; phys_addr_t r2_size = SZ_64; phys_addr_t total_size = r1_size + r2_size; @@ -278,14 +300,16 @@ static int alloc_try_nid_min_reserved_generic_check(void) min_addr, max_addr, NUMA_NO_NODE); b = (char *)allocated_ptr; - assert(allocated_ptr); - assert(*b == 0); + ASSERT_NE(allocated_ptr, NULL); + ASSERT_EQ(*b, 0); - assert(rgn->size == total_size); - assert(rgn->base == reserved_base); + ASSERT_EQ(rgn->size, total_size); + ASSERT_EQ(rgn->base, reserved_base); - assert(memblock.reserved.cnt == 1); - assert(memblock.reserved.total_size == total_size); + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, total_size); + + test_pass_pop(); return 0; } @@ -310,6 +334,8 @@ static int alloc_try_nid_max_reserved_generic_check(void) void *allocated_ptr = NULL; char *b; + PREFIX_PUSH(); + phys_addr_t r1_size = SZ_64; phys_addr_t r2_size = SZ_128; phys_addr_t total_size = r1_size + r2_size; @@ -327,14 +353,16 @@ static int alloc_try_nid_max_reserved_generic_check(void) min_addr, max_addr, NUMA_NO_NODE); b = (char *)allocated_ptr; - assert(allocated_ptr); - assert(*b == 0); + ASSERT_NE(allocated_ptr, NULL); + ASSERT_EQ(*b, 0); + + ASSERT_EQ(rgn->size, total_size); + ASSERT_EQ(rgn->base, min_addr); - assert(rgn->size == total_size); - assert(rgn->base == min_addr); + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, total_size); - assert(memblock.reserved.cnt == 1); - assert(memblock.reserved.total_size == total_size); + test_pass_pop(); return 0; } @@ -364,6 +392,8 @@ static int alloc_try_nid_top_down_reserved_with_space_check(void) char *b; struct region r1, r2; + PREFIX_PUSH(); + phys_addr_t r3_size = SZ_64; phys_addr_t gap_size = SMP_CACHE_BYTES; phys_addr_t total_size; @@ -389,17 +419,19 @@ static int alloc_try_nid_top_down_reserved_with_space_check(void) min_addr, max_addr, NUMA_NO_NODE); b = (char *)allocated_ptr; - assert(allocated_ptr); - assert(*b == 0); + ASSERT_NE(allocated_ptr, NULL); + ASSERT_EQ(*b, 0); + + ASSERT_EQ(rgn1->size, r1.size + r3_size); + ASSERT_EQ(rgn1->base, max_addr - r3_size); - assert(rgn1->size == r1.size + r3_size); - assert(rgn1->base == max_addr - r3_size); + ASSERT_EQ(rgn2->size, r2.size); + ASSERT_EQ(rgn2->base, r2.base); - assert(rgn2->size == r2.size); - assert(rgn2->base == r2.base); + ASSERT_EQ(memblock.reserved.cnt, 2); + ASSERT_EQ(memblock.reserved.total_size, total_size); - assert(memblock.reserved.cnt == 2); - assert(memblock.reserved.total_size == total_size); + test_pass_pop(); return 0; } @@ -427,6 +459,8 @@ static int alloc_try_nid_reserved_full_merge_generic_check(void) char *b; struct region r1, r2; + PREFIX_PUSH(); + phys_addr_t r3_size = SZ_64; phys_addr_t total_size; phys_addr_t max_addr; @@ -451,14 +485,16 @@ static int alloc_try_nid_reserved_full_merge_generic_check(void) min_addr, max_addr, NUMA_NO_NODE); b = (char *)allocated_ptr; - assert(allocated_ptr); - assert(*b == 0); + ASSERT_NE(allocated_ptr, NULL); + ASSERT_EQ(*b, 0); - assert(rgn->size == total_size); - assert(rgn->base == r2.base); + ASSERT_EQ(rgn->size, total_size); + ASSERT_EQ(rgn->base, r2.base); - assert(memblock.reserved.cnt == 1); - assert(memblock.reserved.total_size == total_size); + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, total_size); + + test_pass_pop(); return 0; } @@ -489,6 +525,8 @@ static int alloc_try_nid_top_down_reserved_no_space_check(void) char *b; struct region r1, r2; + PREFIX_PUSH(); + phys_addr_t r3_size = SZ_256; phys_addr_t gap_size = SMP_CACHE_BYTES; phys_addr_t total_size; @@ -514,17 +552,19 @@ static int alloc_try_nid_top_down_reserved_no_space_check(void) min_addr, max_addr, NUMA_NO_NODE); b = (char *)allocated_ptr; - assert(allocated_ptr); - assert(*b == 0); + ASSERT_NE(allocated_ptr, NULL); + ASSERT_EQ(*b, 0); + + ASSERT_EQ(rgn1->size, r1.size); + ASSERT_EQ(rgn1->base, r1.base); - assert(rgn1->size == r1.size); - assert(rgn1->base == r1.base); + ASSERT_EQ(rgn2->size, r2.size + r3_size); + ASSERT_EQ(rgn2->base, r2.base - r3_size); - assert(rgn2->size == r2.size + r3_size); - assert(rgn2->base == r2.base - r3_size); + ASSERT_EQ(memblock.reserved.cnt, 2); + ASSERT_EQ(memblock.reserved.total_size, total_size); - assert(memblock.reserved.cnt == 2); - assert(memblock.reserved.total_size == total_size); + test_pass_pop(); return 0; } @@ -554,6 +594,8 @@ static int alloc_try_nid_reserved_all_generic_check(void) void *allocated_ptr = NULL; struct region r1, r2; + PREFIX_PUSH(); + phys_addr_t r3_size = SZ_256; phys_addr_t gap_size = SMP_CACHE_BYTES; phys_addr_t max_addr; @@ -576,7 +618,9 @@ static int alloc_try_nid_reserved_all_generic_check(void) allocated_ptr = memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES, min_addr, max_addr, NUMA_NO_NODE); - assert(!allocated_ptr); + ASSERT_EQ(allocated_ptr, NULL); + + test_pass_pop(); return 0; } @@ -592,6 +636,8 @@ static int alloc_try_nid_top_down_cap_max_check(void) void *allocated_ptr = NULL; char *b; + PREFIX_PUSH(); + phys_addr_t size = SZ_256; phys_addr_t min_addr; phys_addr_t max_addr; @@ -605,14 +651,16 @@ static int alloc_try_nid_top_down_cap_max_check(void) min_addr, max_addr, NUMA_NO_NODE); b = (char *)allocated_ptr; - assert(allocated_ptr); - assert(*b == 0); + ASSERT_NE(allocated_ptr, NULL); + ASSERT_EQ(*b, 0); + + ASSERT_EQ(rgn->size, size); + ASSERT_EQ(rgn->base, memblock_end_of_DRAM() - size); - assert(rgn->size == size); - assert(rgn->base == memblock_end_of_DRAM() - size); + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); - assert(memblock.reserved.cnt == 1); - assert(memblock.reserved.total_size == size); + test_pass_pop(); return 0; } @@ -628,6 +676,8 @@ static int alloc_try_nid_top_down_cap_min_check(void) void *allocated_ptr = NULL; char *b; + PREFIX_PUSH(); + phys_addr_t size = SZ_1K; phys_addr_t min_addr; phys_addr_t max_addr; @@ -641,14 +691,16 @@ static int alloc_try_nid_top_down_cap_min_check(void) min_addr, max_addr, NUMA_NO_NODE); b = (char *)allocated_ptr; - assert(allocated_ptr); - assert(*b == 0); + ASSERT_NE(allocated_ptr, NULL); + ASSERT_EQ(*b, 0); - assert(rgn->size == size); - assert(rgn->base == memblock_end_of_DRAM() - size); + ASSERT_EQ(rgn->size, size); + ASSERT_EQ(rgn->base, memblock_end_of_DRAM() - size); - assert(memblock.reserved.cnt == 1); - assert(memblock.reserved.total_size == size); + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); return 0; } @@ -673,6 +725,8 @@ static int alloc_try_nid_bottom_up_simple_check(void) void *allocated_ptr = NULL; char *b; + PREFIX_PUSH(); + phys_addr_t size = SZ_128; phys_addr_t min_addr; phys_addr_t max_addr; @@ -689,15 +743,17 @@ static int alloc_try_nid_bottom_up_simple_check(void) b = (char *)allocated_ptr; rgn_end = rgn->base + rgn->size; - assert(allocated_ptr); - assert(*b == 0); + ASSERT_NE(allocated_ptr, NULL); + ASSERT_EQ(*b, 0); + + ASSERT_EQ(rgn->size, size); + ASSERT_EQ(rgn->base, min_addr); + ASSERT_LT(rgn_end, max_addr); - assert(rgn->size == size); - assert(rgn->base == min_addr); - assert(rgn_end < max_addr); + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); - assert(memblock.reserved.cnt == 1); - assert(memblock.reserved.total_size == size); + test_pass_pop(); return 0; } @@ -725,6 +781,8 @@ static int alloc_try_nid_bottom_up_start_misaligned_check(void) void *allocated_ptr = NULL; char *b; + PREFIX_PUSH(); + phys_addr_t size = SZ_128; phys_addr_t misalign = SZ_2; phys_addr_t min_addr; @@ -742,15 +800,17 @@ static int alloc_try_nid_bottom_up_start_misaligned_check(void) b = (char *)allocated_ptr; rgn_end = rgn->base + rgn->size; - assert(allocated_ptr); - assert(*b == 0); + ASSERT_NE(allocated_ptr, NULL); + ASSERT_EQ(*b, 0); + + ASSERT_EQ(rgn->size, size); + ASSERT_EQ(rgn->base, min_addr + (SMP_CACHE_BYTES - misalign)); + ASSERT_LT(rgn_end, max_addr); - assert(rgn->size == size); - assert(rgn->base == min_addr + (SMP_CACHE_BYTES - misalign)); - assert(rgn_end < max_addr); + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); - assert(memblock.reserved.cnt == 1); - assert(memblock.reserved.total_size == size); + test_pass_pop(); return 0; } @@ -778,6 +838,8 @@ static int alloc_try_nid_bottom_up_narrow_range_check(void) void *allocated_ptr = NULL; char *b; + PREFIX_PUSH(); + phys_addr_t size = SZ_256; phys_addr_t min_addr; phys_addr_t max_addr; @@ -792,14 +854,16 @@ static int alloc_try_nid_bottom_up_narrow_range_check(void) NUMA_NO_NODE); b = (char *)allocated_ptr; - assert(allocated_ptr); - assert(*b == 0); + ASSERT_NE(allocated_ptr, NULL); + ASSERT_EQ(*b, 0); - assert(rgn->size == size); - assert(rgn->base == memblock_start_of_DRAM()); + ASSERT_EQ(rgn->size, size); + ASSERT_EQ(rgn->base, memblock_start_of_DRAM()); - assert(memblock.reserved.cnt == 1); - assert(memblock.reserved.total_size == size); + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); + + test_pass_pop(); return 0; } @@ -829,6 +893,8 @@ static int alloc_try_nid_bottom_up_reserved_with_space_check(void) char *b; struct region r1, r2; + PREFIX_PUSH(); + phys_addr_t r3_size = SZ_64; phys_addr_t gap_size = SMP_CACHE_BYTES; phys_addr_t total_size; @@ -855,17 +921,19 @@ static int alloc_try_nid_bottom_up_reserved_with_space_check(void) NUMA_NO_NODE); b = (char *)allocated_ptr; - assert(allocated_ptr); - assert(*b == 0); + ASSERT_NE(allocated_ptr, NULL); + ASSERT_EQ(*b, 0); - assert(rgn1->size == r1.size); - assert(rgn1->base == max_addr); + ASSERT_EQ(rgn1->size, r1.size); + ASSERT_EQ(rgn1->base, max_addr); - assert(rgn2->size == r2.size + r3_size); - assert(rgn2->base == r2.base); + ASSERT_EQ(rgn2->size, r2.size + r3_size); + ASSERT_EQ(rgn2->base, r2.base); - assert(memblock.reserved.cnt == 2); - assert(memblock.reserved.total_size == total_size); + ASSERT_EQ(memblock.reserved.cnt, 2); + ASSERT_EQ(memblock.reserved.total_size, total_size); + + test_pass_pop(); return 0; } @@ -899,6 +967,8 @@ static int alloc_try_nid_bottom_up_reserved_no_space_check(void) char *b; struct region r1, r2; + PREFIX_PUSH(); + phys_addr_t r3_size = SZ_256; phys_addr_t gap_size = SMP_CACHE_BYTES; phys_addr_t total_size; @@ -925,20 +995,22 @@ static int alloc_try_nid_bottom_up_reserved_no_space_check(void) NUMA_NO_NODE); b = (char *)allocated_ptr; - assert(allocated_ptr); - assert(*b == 0); + ASSERT_NE(allocated_ptr, NULL); + ASSERT_EQ(*b, 0); + + ASSERT_EQ(rgn3->size, r3_size); + ASSERT_EQ(rgn3->base, memblock_start_of_DRAM()); - assert(rgn3->size == r3_size); - assert(rgn3->base == memblock_start_of_DRAM()); + ASSERT_EQ(rgn2->size, r2.size); + ASSERT_EQ(rgn2->base, r2.base); - assert(rgn2->size == r2.size); - assert(rgn2->base == r2.base); + ASSERT_EQ(rgn1->size, r1.size); + ASSERT_EQ(rgn1->base, r1.base); - assert(rgn1->size == r1.size); - assert(rgn1->base == r1.base); + ASSERT_EQ(memblock.reserved.cnt, 3); + ASSERT_EQ(memblock.reserved.total_size, total_size); - assert(memblock.reserved.cnt == 3); - assert(memblock.reserved.total_size == total_size); + test_pass_pop(); return 0; } @@ -954,6 +1026,8 @@ static int alloc_try_nid_bottom_up_cap_max_check(void) void *allocated_ptr = NULL; char *b; + PREFIX_PUSH(); + phys_addr_t size = SZ_256; phys_addr_t min_addr; phys_addr_t max_addr; @@ -968,14 +1042,16 @@ static int alloc_try_nid_bottom_up_cap_max_check(void) NUMA_NO_NODE); b = (char *)allocated_ptr; - assert(allocated_ptr); - assert(*b == 0); + ASSERT_NE(allocated_ptr, NULL); + ASSERT_EQ(*b, 0); + + ASSERT_EQ(rgn->size, size); + ASSERT_EQ(rgn->base, min_addr); - assert(rgn->size == size); - assert(rgn->base == min_addr); + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); - assert(memblock.reserved.cnt == 1); - assert(memblock.reserved.total_size == size); + test_pass_pop(); return 0; } @@ -991,6 +1067,8 @@ static int alloc_try_nid_bottom_up_cap_min_check(void) void *allocated_ptr = NULL; char *b; + PREFIX_PUSH(); + phys_addr_t size = SZ_1K; phys_addr_t min_addr; phys_addr_t max_addr; @@ -1005,14 +1083,16 @@ static int alloc_try_nid_bottom_up_cap_min_check(void) NUMA_NO_NODE); b = (char *)allocated_ptr; - assert(allocated_ptr); - assert(*b == 0); + ASSERT_NE(allocated_ptr, NULL); + ASSERT_EQ(*b, 0); + + ASSERT_EQ(rgn->size, size); + ASSERT_EQ(rgn->base, memblock_start_of_DRAM()); - assert(rgn->size == size); - assert(rgn->base == memblock_start_of_DRAM()); + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, size); - assert(memblock.reserved.cnt == 1); - assert(memblock.reserved.total_size == size); + test_pass_pop(); return 0; } @@ -1020,6 +1100,7 @@ static int alloc_try_nid_bottom_up_cap_min_check(void) /* Test case wrappers */ static int alloc_try_nid_simple_check(void) { + test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); alloc_try_nid_top_down_simple_check(); memblock_set_bottom_up(true); @@ -1030,6 +1111,7 @@ static int alloc_try_nid_simple_check(void) static int alloc_try_nid_misaligned_check(void) { + test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); alloc_try_nid_top_down_end_misaligned_check(); memblock_set_bottom_up(true); @@ -1040,6 +1122,7 @@ static int alloc_try_nid_misaligned_check(void) static int alloc_try_nid_narrow_range_check(void) { + test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); alloc_try_nid_top_down_narrow_range_check(); memblock_set_bottom_up(true); @@ -1050,6 +1133,7 @@ static int alloc_try_nid_narrow_range_check(void) static int alloc_try_nid_reserved_with_space_check(void) { + test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); alloc_try_nid_top_down_reserved_with_space_check(); memblock_set_bottom_up(true); @@ -1060,6 +1144,7 @@ static int alloc_try_nid_reserved_with_space_check(void) static int alloc_try_nid_reserved_no_space_check(void) { + test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); alloc_try_nid_top_down_reserved_no_space_check(); memblock_set_bottom_up(true); @@ -1070,6 +1155,7 @@ static int alloc_try_nid_reserved_no_space_check(void) static int alloc_try_nid_cap_max_check(void) { + test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); alloc_try_nid_top_down_cap_max_check(); memblock_set_bottom_up(true); @@ -1080,6 +1166,7 @@ static int alloc_try_nid_cap_max_check(void) static int alloc_try_nid_cap_min_check(void) { + test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); alloc_try_nid_top_down_cap_min_check(); memblock_set_bottom_up(true); @@ -1090,6 +1177,7 @@ static int alloc_try_nid_cap_min_check(void) static int alloc_try_nid_min_reserved_check(void) { + test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); alloc_try_nid_min_reserved_generic_check(); memblock_set_bottom_up(true); @@ -1100,6 +1188,7 @@ static int alloc_try_nid_min_reserved_check(void) static int alloc_try_nid_max_reserved_check(void) { + test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); alloc_try_nid_max_reserved_generic_check(); memblock_set_bottom_up(true); @@ -1110,6 +1199,7 @@ static int alloc_try_nid_max_reserved_check(void) static int alloc_try_nid_exact_address_check(void) { + test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); alloc_try_nid_exact_address_generic_check(); memblock_set_bottom_up(true); @@ -1120,6 +1210,7 @@ static int alloc_try_nid_exact_address_check(void) static int alloc_try_nid_reserved_full_merge_check(void) { + test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); alloc_try_nid_reserved_full_merge_generic_check(); memblock_set_bottom_up(true); @@ -1130,6 +1221,7 @@ static int alloc_try_nid_reserved_full_merge_check(void) static int alloc_try_nid_reserved_all_check(void) { + test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); alloc_try_nid_reserved_all_generic_check(); memblock_set_bottom_up(true); @@ -1140,6 +1232,7 @@ static int alloc_try_nid_reserved_all_check(void) static int alloc_try_nid_low_max_check(void) { + test_print("\tRunning %s...\n", __func__); memblock_set_bottom_up(false); alloc_try_nid_low_max_generic_check(); memblock_set_bottom_up(true); @@ -1150,6 +1243,12 @@ static int alloc_try_nid_low_max_check(void) int memblock_alloc_nid_checks(void) { + const char *func_testing = "memblock_alloc_try_nid"; + + prefix_reset(); + prefix_push(func_testing); + test_print("Running %s tests...\n", func_testing); + reset_memblock_attributes(); dummy_physical_memory_init(); @@ -1170,5 +1269,7 @@ int memblock_alloc_nid_checks(void) dummy_physical_memory_cleanup(); + prefix_pop(); + return 0; } diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c index a7bc180316d6..66f46f261e66 100644 --- a/tools/testing/memblock/tests/basic_api.c +++ b/tools/testing/memblock/tests/basic_api.c @@ -4,21 +4,29 @@ #include "basic_api.h" #define EXPECTED_MEMBLOCK_REGIONS 128 +#define FUNC_ADD "memblock_add" +#define FUNC_RESERVE "memblock_reserve" +#define FUNC_REMOVE "memblock_remove" +#define FUNC_FREE "memblock_free" static int memblock_initialization_check(void) { - assert(memblock.memory.regions); - assert(memblock.memory.cnt == 1); - assert(memblock.memory.max == EXPECTED_MEMBLOCK_REGIONS); - assert(strcmp(memblock.memory.name, "memory") == 0); + PREFIX_PUSH(); - assert(memblock.reserved.regions); - assert(memblock.reserved.cnt == 1); - assert(memblock.memory.max == EXPECTED_MEMBLOCK_REGIONS); - assert(strcmp(memblock.reserved.name, "reserved") == 0); + ASSERT_NE(memblock.memory.regions, NULL); + ASSERT_EQ(memblock.memory.cnt, 1); + ASSERT_EQ(memblock.memory.max, EXPECTED_MEMBLOCK_REGIONS); + ASSERT_EQ(strcmp(memblock.memory.name, "memory"), 0); - assert(!memblock.bottom_up); - assert(memblock.current_limit == MEMBLOCK_ALLOC_ANYWHERE); + ASSERT_NE(memblock.reserved.regions, NULL); + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.memory.max, EXPECTED_MEMBLOCK_REGIONS); + ASSERT_EQ(strcmp(memblock.reserved.name, "reserved"), 0); + + ASSERT_EQ(memblock.bottom_up, false); + ASSERT_EQ(memblock.current_limit, MEMBLOCK_ALLOC_ANYWHERE); + + test_pass_pop(); return 0; } @@ -40,14 +48,18 @@ static int memblock_add_simple_check(void) .size = SZ_4M }; + PREFIX_PUSH(); + reset_memblock_regions(); memblock_add(r.base, r.size); - assert(rgn->base == r.base); - assert(rgn->size == r.size); + ASSERT_EQ(rgn->base, r.base); + ASSERT_EQ(rgn->size, r.size); + + ASSERT_EQ(memblock.memory.cnt, 1); + ASSERT_EQ(memblock.memory.total_size, r.size); - assert(memblock.memory.cnt == 1); - assert(memblock.memory.total_size == r.size); + test_pass_pop(); return 0; } @@ -69,18 +81,22 @@ static int memblock_add_node_simple_check(void) .size = SZ_16M }; + PREFIX_PUSH(); + reset_memblock_regions(); memblock_add_node(r.base, r.size, 1, MEMBLOCK_HOTPLUG); - assert(rgn->base == r.base); - assert(rgn->size == r.size); + ASSERT_EQ(rgn->base, r.base); + ASSERT_EQ(rgn->size, r.size); #ifdef CONFIG_NUMA - assert(rgn->nid == 1); + ASSERT_EQ(rgn->nid, 1); #endif - assert(rgn->flags == MEMBLOCK_HOTPLUG); + ASSERT_EQ(rgn->flags, MEMBLOCK_HOTPLUG); + + ASSERT_EQ(memblock.memory.cnt, 1); + ASSERT_EQ(memblock.memory.total_size, r.size); - assert(memblock.memory.cnt == 1); - assert(memblock.memory.total_size == r.size); + test_pass_pop(); return 0; } @@ -113,18 +129,22 @@ static int memblock_add_disjoint_check(void) .size = SZ_8K }; + PREFIX_PUSH(); + reset_memblock_regions(); memblock_add(r1.base, r1.size); memblock_add(r2.base, r2.size); - assert(rgn1->base == r1.base); - assert(rgn1->size == r1.size); + ASSERT_EQ(rgn1->base, r1.base); + ASSERT_EQ(rgn1->size, r1.size); + + ASSERT_EQ(rgn2->base, r2.base); + ASSERT_EQ(rgn2->size, r2.size); - assert(rgn2->base == r2.base); - assert(rgn2->size == r2.size); + ASSERT_EQ(memblock.memory.cnt, 2); + ASSERT_EQ(memblock.memory.total_size, r1.size + r2.size); - assert(memblock.memory.cnt == 2); - assert(memblock.memory.total_size == r1.size + r2.size); + test_pass_pop(); return 0; } @@ -162,17 +182,21 @@ static int memblock_add_overlap_top_check(void) .size = SZ_512M }; + PREFIX_PUSH(); + total_size = (r1.base - r2.base) + r1.size; reset_memblock_regions(); memblock_add(r1.base, r1.size); memblock_add(r2.base, r2.size); - assert(rgn->base == r2.base); - assert(rgn->size == total_size); + ASSERT_EQ(rgn->base, r2.base); + ASSERT_EQ(rgn->size, total_size); + + ASSERT_EQ(memblock.memory.cnt, 1); + ASSERT_EQ(memblock.memory.total_size, total_size); - assert(memblock.memory.cnt == 1); - assert(memblock.memory.total_size == total_size); + test_pass_pop(); return 0; } @@ -210,17 +234,21 @@ static int memblock_add_overlap_bottom_check(void) .size = SZ_1G }; + PREFIX_PUSH(); + total_size = (r2.base - r1.base) + r2.size; reset_memblock_regions(); memblock_add(r1.base, r1.size); memblock_add(r2.base, r2.size); - assert(rgn->base == r1.base); - assert(rgn->size == total_size); + ASSERT_EQ(rgn->base, r1.base); + ASSERT_EQ(rgn->size, total_size); + + ASSERT_EQ(memblock.memory.cnt, 1); + ASSERT_EQ(memblock.memory.total_size, total_size); - assert(memblock.memory.cnt == 1); - assert(memblock.memory.total_size == total_size); + test_pass_pop(); return 0; } @@ -255,15 +283,19 @@ static int memblock_add_within_check(void) .size = SZ_1M }; + PREFIX_PUSH(); + reset_memblock_regions(); memblock_add(r1.base, r1.size); memblock_add(r2.base, r2.size); - assert(rgn->base == r1.base); - assert(rgn->size == r1.size); + ASSERT_EQ(rgn->base, r1.base); + ASSERT_EQ(rgn->size, r1.size); + + ASSERT_EQ(memblock.memory.cnt, 1); + ASSERT_EQ(memblock.memory.total_size, r1.size); - assert(memblock.memory.cnt == 1); - assert(memblock.memory.total_size == r1.size); + test_pass_pop(); return 0; } @@ -279,19 +311,27 @@ static int memblock_add_twice_check(void) .size = SZ_2M }; + PREFIX_PUSH(); + reset_memblock_regions(); memblock_add(r.base, r.size); memblock_add(r.base, r.size); - assert(memblock.memory.cnt == 1); - assert(memblock.memory.total_size == r.size); + ASSERT_EQ(memblock.memory.cnt, 1); + ASSERT_EQ(memblock.memory.total_size, r.size); + + test_pass_pop(); return 0; } static int memblock_add_checks(void) { + prefix_reset(); + prefix_push(FUNC_ADD); + test_print("Running %s tests...\n", FUNC_ADD); + memblock_add_simple_check(); memblock_add_node_simple_check(); memblock_add_disjoint_check(); @@ -300,6 +340,8 @@ static int memblock_add_checks(void) memblock_add_within_check(); memblock_add_twice_check(); + prefix_pop(); + return 0; } @@ -320,11 +362,15 @@ static int memblock_reserve_simple_check(void) .size = SZ_128M }; + PREFIX_PUSH(); + reset_memblock_regions(); memblock_reserve(r.base, r.size); - assert(rgn->base == r.base); - assert(rgn->size == r.size); + ASSERT_EQ(rgn->base, r.base); + ASSERT_EQ(rgn->size, r.size); + + test_pass_pop(); return 0; } @@ -356,18 +402,22 @@ static int memblock_reserve_disjoint_check(void) .size = SZ_512M }; + PREFIX_PUSH(); + reset_memblock_regions(); memblock_reserve(r1.base, r1.size); memblock_reserve(r2.base, r2.size); - assert(rgn1->base == r1.base); - assert(rgn1->size == r1.size); + ASSERT_EQ(rgn1->base, r1.base); + ASSERT_EQ(rgn1->size, r1.size); + + ASSERT_EQ(rgn2->base, r2.base); + ASSERT_EQ(rgn2->size, r2.size); - assert(rgn2->base == r2.base); - assert(rgn2->size == r2.size); + ASSERT_EQ(memblock.reserved.cnt, 2); + ASSERT_EQ(memblock.reserved.total_size, r1.size + r2.size); - assert(memblock.reserved.cnt == 2); - assert(memblock.reserved.total_size == r1.size + r2.size); + test_pass_pop(); return 0; } @@ -406,17 +456,21 @@ static int memblock_reserve_overlap_top_check(void) .size = SZ_1G }; + PREFIX_PUSH(); + total_size = (r1.base - r2.base) + r1.size; reset_memblock_regions(); memblock_reserve(r1.base, r1.size); memblock_reserve(r2.base, r2.size); - assert(rgn->base == r2.base); - assert(rgn->size == total_size); + ASSERT_EQ(rgn->base, r2.base); + ASSERT_EQ(rgn->size, total_size); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, total_size); - assert(memblock.reserved.cnt == 1); - assert(memblock.reserved.total_size == total_size); + test_pass_pop(); return 0; } @@ -455,17 +509,21 @@ static int memblock_reserve_overlap_bottom_check(void) .size = SZ_128K }; + PREFIX_PUSH(); + total_size = (r2.base - r1.base) + r2.size; reset_memblock_regions(); memblock_reserve(r1.base, r1.size); memblock_reserve(r2.base, r2.size); - assert(rgn->base == r1.base); - assert(rgn->size == total_size); + ASSERT_EQ(rgn->base, r1.base); + ASSERT_EQ(rgn->size, total_size); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, total_size); - assert(memblock.reserved.cnt == 1); - assert(memblock.reserved.total_size == total_size); + test_pass_pop(); return 0; } @@ -502,15 +560,19 @@ static int memblock_reserve_within_check(void) .size = SZ_64K }; + PREFIX_PUSH(); + reset_memblock_regions(); memblock_reserve(r1.base, r1.size); memblock_reserve(r2.base, r2.size); - assert(rgn->base == r1.base); - assert(rgn->size == r1.size); + ASSERT_EQ(rgn->base, r1.base); + ASSERT_EQ(rgn->size, r1.size); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, r1.size); - assert(memblock.reserved.cnt == 1); - assert(memblock.reserved.total_size == r1.size); + test_pass_pop(); return 0; } @@ -527,19 +589,27 @@ static int memblock_reserve_twice_check(void) .size = SZ_2M }; + PREFIX_PUSH(); + reset_memblock_regions(); memblock_reserve(r.base, r.size); memblock_reserve(r.base, r.size); - assert(memblock.reserved.cnt == 1); - assert(memblock.reserved.total_size == r.size); + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, r.size); + + test_pass_pop(); return 0; } static int memblock_reserve_checks(void) { + prefix_reset(); + prefix_push(FUNC_RESERVE); + test_print("Running %s tests...\n", FUNC_RESERVE); + memblock_reserve_simple_check(); memblock_reserve_disjoint_check(); memblock_reserve_overlap_top_check(); @@ -547,6 +617,8 @@ static int memblock_reserve_checks(void) memblock_reserve_within_check(); memblock_reserve_twice_check(); + prefix_pop(); + return 0; } @@ -581,16 +653,20 @@ static int memblock_remove_simple_check(void) .size = SZ_4M }; + PREFIX_PUSH(); + reset_memblock_regions(); memblock_add(r1.base, r1.size); memblock_add(r2.base, r2.size); memblock_remove(r1.base, r1.size); - assert(rgn->base == r2.base); - assert(rgn->size == r2.size); + ASSERT_EQ(rgn->base, r2.base); + ASSERT_EQ(rgn->size, r2.size); - assert(memblock.memory.cnt == 1); - assert(memblock.memory.total_size == r2.size); + ASSERT_EQ(memblock.memory.cnt, 1); + ASSERT_EQ(memblock.memory.total_size, r2.size); + + test_pass_pop(); return 0; } @@ -626,15 +702,19 @@ static int memblock_remove_absent_check(void) .size = SZ_1G }; + PREFIX_PUSH(); + reset_memblock_regions(); memblock_add(r1.base, r1.size); memblock_remove(r2.base, r2.size); - assert(rgn->base == r1.base); - assert(rgn->size == r1.size); + ASSERT_EQ(rgn->base, r1.base); + ASSERT_EQ(rgn->size, r1.size); + + ASSERT_EQ(memblock.memory.cnt, 1); + ASSERT_EQ(memblock.memory.total_size, r1.size); - assert(memblock.memory.cnt == 1); - assert(memblock.memory.total_size == r1.size); + test_pass_pop(); return 0; } @@ -674,6 +754,8 @@ static int memblock_remove_overlap_top_check(void) .size = SZ_32M }; + PREFIX_PUSH(); + r1_end = r1.base + r1.size; r2_end = r2.base + r2.size; total_size = r1_end - r2_end; @@ -682,11 +764,13 @@ static int memblock_remove_overlap_top_check(void) memblock_add(r1.base, r1.size); memblock_remove(r2.base, r2.size); - assert(rgn->base == r1.base + r2.base); - assert(rgn->size == total_size); + ASSERT_EQ(rgn->base, r1.base + r2.base); + ASSERT_EQ(rgn->size, total_size); + + ASSERT_EQ(memblock.memory.cnt, 1); + ASSERT_EQ(memblock.memory.total_size, total_size); - assert(memblock.memory.cnt == 1); - assert(memblock.memory.total_size == total_size); + test_pass_pop(); return 0; } @@ -724,17 +808,22 @@ static int memblock_remove_overlap_bottom_check(void) .size = SZ_256M }; + PREFIX_PUSH(); + total_size = r2.base - r1.base; reset_memblock_regions(); memblock_add(r1.base, r1.size); memblock_remove(r2.base, r2.size); - assert(rgn->base == r1.base); - assert(rgn->size == total_size); + ASSERT_EQ(rgn->base, r1.base); + ASSERT_EQ(rgn->size, total_size); + + ASSERT_EQ(memblock.memory.cnt, 1); + ASSERT_EQ(memblock.memory.total_size, total_size); + + test_pass_pop(); - assert(memblock.memory.cnt == 1); - assert(memblock.memory.total_size == total_size); return 0; } @@ -774,6 +863,8 @@ static int memblock_remove_within_check(void) .size = SZ_1M }; + PREFIX_PUSH(); + r1_size = r2.base - r1.base; r2_size = (r1.base + r1.size) - (r2.base + r2.size); total_size = r1_size + r2_size; @@ -782,26 +873,34 @@ static int memblock_remove_within_check(void) memblock_add(r1.base, r1.size); memblock_remove(r2.base, r2.size); - assert(rgn1->base == r1.base); - assert(rgn1->size == r1_size); + ASSERT_EQ(rgn1->base, r1.base); + ASSERT_EQ(rgn1->size, r1_size); + + ASSERT_EQ(rgn2->base, r2.base + r2.size); + ASSERT_EQ(rgn2->size, r2_size); - assert(rgn2->base == r2.base + r2.size); - assert(rgn2->size == r2_size); + ASSERT_EQ(memblock.memory.cnt, 2); + ASSERT_EQ(memblock.memory.total_size, total_size); - assert(memblock.memory.cnt == 2); - assert(memblock.memory.total_size == total_size); + test_pass_pop(); return 0; } static int memblock_remove_checks(void) { + prefix_reset(); + prefix_push(FUNC_REMOVE); + test_print("Running %s tests...\n", FUNC_REMOVE); + memblock_remove_simple_check(); memblock_remove_absent_check(); memblock_remove_overlap_top_check(); memblock_remove_overlap_bottom_check(); memblock_remove_within_check(); + prefix_pop(); + return 0; } @@ -835,16 +934,20 @@ static int memblock_free_simple_check(void) .size = SZ_1M }; + PREFIX_PUSH(); + reset_memblock_regions(); memblock_reserve(r1.base, r1.size); memblock_reserve(r2.base, r2.size); memblock_free((void *)r1.base, r1.size); - assert(rgn->base == r2.base); - assert(rgn->size == r2.size); + ASSERT_EQ(rgn->base, r2.base); + ASSERT_EQ(rgn->size, r2.size); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, r2.size); - assert(memblock.reserved.cnt == 1); - assert(memblock.reserved.total_size == r2.size); + test_pass_pop(); return 0; } @@ -880,15 +983,19 @@ static int memblock_free_absent_check(void) .size = SZ_128M }; + PREFIX_PUSH(); + reset_memblock_regions(); memblock_reserve(r1.base, r1.size); memblock_free((void *)r2.base, r2.size); - assert(rgn->base == r1.base); - assert(rgn->size == r1.size); + ASSERT_EQ(rgn->base, r1.base); + ASSERT_EQ(rgn->size, r1.size); + + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, r1.size); - assert(memblock.reserved.cnt == 1); - assert(memblock.reserved.total_size == r1.size); + test_pass_pop(); return 0; } @@ -928,17 +1035,21 @@ static int memblock_free_overlap_top_check(void) .size = SZ_8M }; + PREFIX_PUSH(); + total_size = (r1.size + r1.base) - (r2.base + r2.size); reset_memblock_regions(); memblock_reserve(r1.base, r1.size); memblock_free((void *)r2.base, r2.size); - assert(rgn->base == r2.base + r2.size); - assert(rgn->size == total_size); + ASSERT_EQ(rgn->base, r2.base + r2.size); + ASSERT_EQ(rgn->size, total_size); - assert(memblock.reserved.cnt == 1); - assert(memblock.reserved.total_size == total_size); + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, total_size); + + test_pass_pop(); return 0; } @@ -973,17 +1084,21 @@ static int memblock_free_overlap_bottom_check(void) .size = SZ_32M }; + PREFIX_PUSH(); + total_size = r2.base - r1.base; reset_memblock_regions(); memblock_reserve(r1.base, r1.size); memblock_free((void *)r2.base, r2.size); - assert(rgn->base == r1.base); - assert(rgn->size == total_size); + ASSERT_EQ(rgn->base, r1.base); + ASSERT_EQ(rgn->size, total_size); - assert(memblock.reserved.cnt == 1); - assert(memblock.reserved.total_size == total_size); + ASSERT_EQ(memblock.reserved.cnt, 1); + ASSERT_EQ(memblock.reserved.total_size, total_size); + + test_pass_pop(); return 0; } @@ -1024,6 +1139,8 @@ static int memblock_free_within_check(void) .size = SZ_1M }; + PREFIX_PUSH(); + r1_size = r2.base - r1.base; r2_size = (r1.base + r1.size) - (r2.base + r2.size); total_size = r1_size + r2_size; @@ -1032,26 +1149,34 @@ static int memblock_free_within_check(void) memblock_reserve(r1.base, r1.size); memblock_free((void *)r2.base, r2.size); - assert(rgn1->base == r1.base); - assert(rgn1->size == r1_size); + ASSERT_EQ(rgn1->base, r1.base); + ASSERT_EQ(rgn1->size, r1_size); - assert(rgn2->base == r2.base + r2.size); - assert(rgn2->size == r2_size); + ASSERT_EQ(rgn2->base, r2.base + r2.size); + ASSERT_EQ(rgn2->size, r2_size); - assert(memblock.reserved.cnt == 2); - assert(memblock.reserved.total_size == total_size); + ASSERT_EQ(memblock.reserved.cnt, 2); + ASSERT_EQ(memblock.reserved.total_size, total_size); + + test_pass_pop(); return 0; } static int memblock_free_checks(void) { + prefix_reset(); + prefix_push(FUNC_FREE); + test_print("Running %s tests...\n", FUNC_FREE); + memblock_free_simple_check(); memblock_free_absent_check(); memblock_free_overlap_top_check(); memblock_free_overlap_bottom_check(); memblock_free_within_check(); + prefix_pop(); + return 0; } diff --git a/tools/testing/memblock/tests/common.c b/tools/testing/memblock/tests/common.c index 62d3191f7c9a..e43b2676af81 100644 --- a/tools/testing/memblock/tests/common.c +++ b/tools/testing/memblock/tests/common.c @@ -1,11 +1,39 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "tests/common.h" #include <string.h> +#include <getopt.h> +#include <linux/memory_hotplug.h> +#include <linux/build_bug.h> #define INIT_MEMBLOCK_REGIONS 128 #define INIT_MEMBLOCK_RESERVED_REGIONS INIT_MEMBLOCK_REGIONS +#define PREFIXES_MAX 15 +#define DELIM ": " static struct test_memory memory_block; +static const char __maybe_unused *prefixes[PREFIXES_MAX]; +static int __maybe_unused nr_prefixes; + +static const char *short_opts = "mv"; +static const struct option long_opts[] = { + {"movable-node", 0, NULL, 'm'}, + {"verbose", 0, NULL, 'v'}, + {NULL, 0, NULL, 0} +}; + +static const char * const help_opts[] = { + "disallow allocations from regions marked as hotplugged\n\t\t\t" + "by simulating enabling the \"movable_node\" kernel\n\t\t\t" + "parameter", + "enable verbose output, which includes the name of the\n\t\t\t" + "memblock function being tested, the name of the test,\n\t\t\t" + "and whether the test passed or failed." +}; + +static int verbose; + +/* sets global variable returned by movable_node_is_enabled() stub */ +bool movable_node_enabled; void reset_memblock_regions(void) { @@ -46,3 +74,93 @@ void dummy_physical_memory_cleanup(void) { free(memory_block.base); } + +static void usage(const char *prog) +{ + BUILD_BUG_ON(ARRAY_SIZE(help_opts) != ARRAY_SIZE(long_opts) - 1); + + printf("Usage: %s [-%s]\n", prog, short_opts); + + for (int i = 0; long_opts[i].name; i++) { + printf(" -%c, --%-12s\t%s\n", long_opts[i].val, + long_opts[i].name, help_opts[i]); + } + + exit(1); +} + +void parse_args(int argc, char **argv) +{ + int c; + + while ((c = getopt_long_only(argc, argv, short_opts, long_opts, + NULL)) != -1) { + switch (c) { + case 'm': + movable_node_enabled = true; + break; + case 'v': + verbose = 1; + break; + default: + usage(argv[0]); + } + } +} + +void print_prefixes(const char *postfix) +{ + for (int i = 0; i < nr_prefixes; i++) + test_print("%s%s", prefixes[i], DELIM); + test_print(postfix); +} + +void test_fail(void) +{ + if (verbose) { + ksft_test_result_fail(": "); + print_prefixes("failed\n"); + } +} + +void test_pass(void) +{ + if (verbose) { + ksft_test_result_pass(": "); + print_prefixes("passed\n"); + } +} + +void test_print(const char *fmt, ...) +{ + if (verbose) { + int saved_errno = errno; + va_list args; + + va_start(args, fmt); + errno = saved_errno; + vprintf(fmt, args); + va_end(args); + } +} + +void prefix_reset(void) +{ + memset(prefixes, 0, PREFIXES_MAX * sizeof(char *)); + nr_prefixes = 0; +} + +void prefix_push(const char *prefix) +{ + assert(nr_prefixes < PREFIXES_MAX); + prefixes[nr_prefixes] = prefix; + nr_prefixes++; +} + +void prefix_pop(void) +{ + if (nr_prefixes > 0) { + prefixes[nr_prefixes - 1] = 0; + nr_prefixes--; + } +} diff --git a/tools/testing/memblock/tests/common.h b/tools/testing/memblock/tests/common.h index 619054d03219..3e7f23d341d7 100644 --- a/tools/testing/memblock/tests/common.h +++ b/tools/testing/memblock/tests/common.h @@ -7,9 +7,49 @@ #include <linux/types.h> #include <linux/memblock.h> #include <linux/sizes.h> +#include <linux/printk.h> +#include <../selftests/kselftest.h> #define MEM_SIZE SZ_16K +/** + * ASSERT_EQ(): + * Check the condition + * @_expected == @_seen + * If false, print failed test message (if in VERBOSE mode) and then assert + */ +#define ASSERT_EQ(_expected, _seen) do { \ + if ((_expected) != (_seen)) \ + test_fail(); \ + assert((_expected) == (_seen)); \ +} while (0) + +/** + * ASSERT_NE(): + * Check the condition + * @_expected != @_seen + * If false, print failed test message (if in VERBOSE mode) and then assert + */ +#define ASSERT_NE(_expected, _seen) do { \ + if ((_expected) == (_seen)) \ + test_fail(); \ + assert((_expected) != (_seen)); \ +} while (0) + +/** + * ASSERT_LT(): + * Check the condition + * @_expected < @_seen + * If false, print failed test message (if in VERBOSE mode) and then assert + */ +#define ASSERT_LT(_expected, _seen) do { \ + if ((_expected) >= (_seen)) \ + test_fail(); \ + assert((_expected) < (_seen)); \ +} while (0) + +#define PREFIX_PUSH() prefix_push(__func__) + /* * Available memory registered with memblock needs to be valid for allocs * test to run. This is a convenience wrapper for memory allocated in @@ -30,5 +70,19 @@ void reset_memblock_attributes(void); void setup_memblock(void); void dummy_physical_memory_init(void); void dummy_physical_memory_cleanup(void); +void parse_args(int argc, char **argv); + +void test_fail(void); +void test_pass(void); +void test_print(const char *fmt, ...); +void prefix_reset(void); +void prefix_push(const char *prefix); +void prefix_pop(void); + +static inline void test_pass_pop(void) +{ + test_pass(); + prefix_pop(); +} #endif diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index de11992dc577..10b34bb03bc1 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -17,6 +17,7 @@ TARGETS += exec TARGETS += filesystems TARGETS += filesystems/binderfs TARGETS += filesystems/epoll +TARGETS += filesystems/fat TARGETS += firmware TARGETS += fpu TARGETS += ftrace @@ -143,7 +144,6 @@ endif # Prepare for headers install include $(top_srcdir)/scripts/subarch.include ARCH ?= $(SUBARCH) -export KSFT_KHDR_INSTALL_DONE := 1 export BUILD export KHDR_INCLUDES @@ -151,30 +151,7 @@ export KHDR_INCLUDES # all isn't the first target in the file. .DEFAULT_GOAL := all -# Install headers here once for all tests. KSFT_KHDR_INSTALL_DONE -# is used to avoid running headers_install from lib.mk. -# Invoke headers install with --no-builtin-rules to avoid circular -# dependency in "make kselftest" case. In this case, second level -# make inherits builtin-rules which will use the rule generate -# Makefile.o and runs into -# "Circular Makefile.o <- prepare dependency dropped." -# and headers_install fails and test compile fails. -# -# O= KBUILD_OUTPUT cases don't run into this error, since main Makefile -# invokes them as sub-makes and --no-builtin-rules is not necessary, -# but doesn't cause any failures. Keep it simple and use the same -# flags in both cases. -# Local build cases: "make kselftest", "make -C" - headers are installed -# in the default INSTALL_HDR_PATH usr/include. -khdr: -ifeq (1,$(DEFAULT_INSTALL_HDR_PATH)) - $(MAKE) --no-builtin-rules ARCH=$(ARCH) -C $(top_srcdir) headers_install -else - $(MAKE) --no-builtin-rules INSTALL_HDR_PATH=$(abs_objtree)/usr \ - ARCH=$(ARCH) -C $(top_srcdir) headers_install -endif - -all: khdr +all: @ret=1; \ for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ @@ -253,7 +230,7 @@ ifdef INSTALL_PATH for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ [ ! -d $(INSTALL_PATH)/$$TARGET ] && echo "Skipping non-existent dir: $$TARGET" && continue; \ - echo -n "Emit Tests for $$TARGET\n"; \ + echo -ne "Emit Tests for $$TARGET\n"; \ $(MAKE) -s --no-print-directory OUTPUT=$$BUILD_TARGET COLLECTION=$$TARGET \ -C $$TARGET emit_tests >> $(TEST_LIST); \ done; @@ -274,4 +251,4 @@ clean: $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\ done; -.PHONY: khdr all run_tests hotplug run_hotplug clean_hotplug run_pstore_crash install clean gen_tar +.PHONY: all run_tests hotplug run_hotplug clean_hotplug run_pstore_crash install clean gen_tar diff --git a/tools/testing/selftests/arm64/mte/Makefile b/tools/testing/selftests/arm64/mte/Makefile index 409e3e53d00a..a5a0744423d8 100644 --- a/tools/testing/selftests/arm64/mte/Makefile +++ b/tools/testing/selftests/arm64/mte/Makefile @@ -22,7 +22,6 @@ ifeq ($(mte_cc_support),1) TEST_GEN_PROGS := $(PROGS) # Get Kernel headers installed and use them. -KSFT_KHDR_INSTALL := 1 else $(warning compiler "$(CC)" does not support the ARMv8.5 MTE extension.) $(warning test program "mte" will not be created.) diff --git a/tools/testing/selftests/arm64/signal/Makefile b/tools/testing/selftests/arm64/signal/Makefile index ac4ad0005715..be7520a863b0 100644 --- a/tools/testing/selftests/arm64/signal/Makefile +++ b/tools/testing/selftests/arm64/signal/Makefile @@ -11,7 +11,6 @@ PROGS := $(patsubst %.c,%,$(SRCS)) TEST_GEN_PROGS := $(notdir $(PROGS)) # Get Kernel headers installed and use them. -KSFT_KHDR_INSTALL := 1 # Including KSFT lib.mk here will also mangle the TEST_GEN_PROGS list # to account for any OUTPUT target-dirs optionally provided by diff --git a/tools/testing/selftests/arm64/signal/test_signals.h b/tools/testing/selftests/arm64/signal/test_signals.h index c70fdec7d7c4..0c645834ddc3 100644 --- a/tools/testing/selftests/arm64/signal/test_signals.h +++ b/tools/testing/selftests/arm64/signal/test_signals.h @@ -9,9 +9,7 @@ #include <ucontext.h> /* - * Using ARCH specific and sanitized Kernel headers installed by KSFT - * framework since we asked for it by setting flag KSFT_KHDR_INSTALL - * in our Makefile. + * Using ARCH specific and sanitized Kernel headers from the tree. */ #include <asm/ptrace.h> #include <asm/hwcap.h> diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 595565eb68c0..3a8cb2404ea6 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -41,5 +41,6 @@ test_cpp /bench *.ko *.tmp -xdpxceiver +xskxceiver xdp_redirect_multi +xdp_synproxy diff --git a/tools/testing/selftests/bpf/DENYLIST b/tools/testing/selftests/bpf/DENYLIST new file mode 100644 index 000000000000..939de574fc7f --- /dev/null +++ b/tools/testing/selftests/bpf/DENYLIST @@ -0,0 +1,6 @@ +# TEMPORARY +get_stack_raw_tp # spams with kernel warnings until next bpf -> bpf-next merge +stacktrace_build_id_nmi +stacktrace_build_id +task_fd_query_rawtp +varlen diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x new file mode 100644 index 000000000000..e33cab34d22f --- /dev/null +++ b/tools/testing/selftests/bpf/DENYLIST.s390x @@ -0,0 +1,67 @@ +# TEMPORARY +atomics # attach(add): actual -524 <= expected 0 (trampoline) +bpf_iter_setsockopt # JIT does not support calling kernel function (kfunc) +bloom_filter_map # failed to find kernel BTF type ID of '__x64_sys_getpgid': -3 (?) +bpf_tcp_ca # JIT does not support calling kernel function (kfunc) +bpf_loop # attaches to __x64_sys_nanosleep +bpf_mod_race # BPF trampoline +bpf_nf # JIT does not support calling kernel function +core_read_macros # unknown func bpf_probe_read#4 (overlapping) +d_path # failed to auto-attach program 'prog_stat': -524 (trampoline) +dummy_st_ops # test_run unexpected error: -524 (errno 524) (trampoline) +fentry_fexit # fentry attach failed: -524 (trampoline) +fentry_test # fentry_first_attach unexpected error: -524 (trampoline) +fexit_bpf2bpf # freplace_attach_trace unexpected error: -524 (trampoline) +fexit_sleep # fexit_skel_load fexit skeleton failed (trampoline) +fexit_stress # fexit attach failed prog 0 failed: -524 (trampoline) +fexit_test # fexit_first_attach unexpected error: -524 (trampoline) +get_func_args_test # trampoline +get_func_ip_test # get_func_ip_test__attach unexpected error: -524 (trampoline) +get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace) +kfree_skb # attach fentry unexpected error: -524 (trampoline) +kfunc_call # 'bpf_prog_active': not found in kernel BTF (?) +ksyms_module # test_ksyms_module__open_and_load unexpected error: -9 (?) +ksyms_module_libbpf # JIT does not support calling kernel function (kfunc) +ksyms_module_lskel # test_ksyms_module_lskel__open_and_load unexpected error: -9 (?) +modify_return # modify_return attach failed: -524 (trampoline) +module_attach # skel_attach skeleton attach failed: -524 (trampoline) +mptcp +kprobe_multi_test # relies on fentry +netcnt # failed to load BPF skeleton 'netcnt_prog': -7 (?) +probe_user # check_kprobe_res wrong kprobe res from probe read (?) +recursion # skel_attach unexpected error: -524 (trampoline) +ringbuf # skel_load skeleton load failed (?) +sk_assign # Can't read on server: Invalid argument (?) +sk_lookup # endianness problem +sk_storage_tracing # test_sk_storage_tracing__attach unexpected error: -524 (trampoline) +skc_to_unix_sock # could not attach BPF object unexpected error: -524 (trampoline) +socket_cookie # prog_attach unexpected error: -524 (trampoline) +stacktrace_build_id # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2 (?) +tailcalls # tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls (?) +task_local_storage # failed to auto-attach program 'trace_exit_creds': -524 (trampoline) +test_bpffs # bpffs test failed 255 (iterator) +test_bprm_opts # failed to auto-attach program 'secure_exec': -524 (trampoline) +test_ima # failed to auto-attach program 'ima': -524 (trampoline) +test_local_storage # failed to auto-attach program 'unlink_hook': -524 (trampoline) +test_lsm # failed to find kernel BTF type ID of '__x64_sys_setdomainname': -3 (?) +test_overhead # attach_fentry unexpected error: -524 (trampoline) +test_profiler # unknown func bpf_probe_read_str#45 (overlapping) +timer # failed to auto-attach program 'test1': -524 (trampoline) +timer_crash # trampoline +timer_mim # failed to auto-attach program 'test1': -524 (trampoline) +trace_ext # failed to auto-attach program 'test_pkt_md_access_new': -524 (trampoline) +trace_printk # trace_printk__load unexpected error: -2 (errno 2) (?) +trace_vprintk # trace_vprintk__open_and_load unexpected error: -9 (?) +trampoline_count # prog 'prog1': failed to attach: ERROR: strerror_r(-524)=22 (trampoline) +verif_stats # trace_vprintk__open_and_load unexpected error: -9 (?) +vmlinux # failed to auto-attach program 'handle__fentry': -524 (trampoline) +xdp_adjust_tail # case-128 err 0 errno 28 retval 1 size 128 expect-size 3520 (?) +xdp_bonding # failed to auto-attach program 'trace_on_entry': -524 (trampoline) +xdp_bpf2bpf # failed to auto-attach program 'trace_on_entry': -524 (trampoline) +map_kptr # failed to open_and_load program: -524 (trampoline) +bpf_cookie # failed to open_and_load program: -524 (trampoline) +xdp_do_redirect # prog_run_max_size unexpected error: -22 (errno 22) +send_signal # intermittently fails to receive signal +select_reuseport # intermittently fails on new s390x setup +xdp_synproxy # JIT does not support calling kernel function (kfunc) +unpriv_bpf_disabled # fentry diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 2d3c8c8f558a..8d59ec7f4c2d 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -82,7 +82,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \ TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \ flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \ test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \ - xdpxceiver xdp_redirect_multi + xskxceiver xdp_redirect_multi xdp_synproxy TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read @@ -168,17 +168,26 @@ $(OUTPUT)/%:%.c $(call msg,BINARY,,$@) $(Q)$(LINK.c) $^ $(LDLIBS) -o $@ +# LLVM's ld.lld doesn't support all the architectures, so use it only on x86 +ifeq ($(SRCARCH),x86) +LLD := lld +else +LLD := ld +endif + # Filter out -static for liburandom_read.so and its dependent targets so that static builds # do not fail. Static builds leave urandom_read relying on system-wide shared libraries. $(OUTPUT)/liburandom_read.so: urandom_read_lib1.c urandom_read_lib2.c $(call msg,LIB,,$@) - $(Q)$(CC) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $^ $(LDLIBS) -fPIC -shared -o $@ + $(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $^ $(LDLIBS) \ + -fuse-ld=$(LLD) -Wl,-znoseparate-code -fPIC -shared -o $@ $(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_read.so $(call msg,BINARY,,$@) - $(Q)$(CC) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \ - liburandom_read.so $(LDLIBS) \ - -Wl,-rpath=. -Wl,--build-id=sha1 -o $@ + $(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \ + liburandom_read.so $(LDLIBS) \ + -fuse-ld=$(LLD) -Wl,-znoseparate-code \ + -Wl,-rpath=. -Wl,--build-id=sha1 -o $@ $(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch]) $(call msg,MOD,,$@) @@ -221,6 +230,8 @@ $(OUTPUT)/xdping: $(TESTING_HELPERS) $(OUTPUT)/flow_dissector_load: $(TESTING_HELPERS) $(OUTPUT)/test_maps: $(TESTING_HELPERS) $(OUTPUT)/test_verifier: $(TESTING_HELPERS) $(CAP_HELPERS) +$(OUTPUT)/xsk.o: $(BPFOBJ) +$(OUTPUT)/xskxceiver: $(OUTPUT)/xsk.o BPFTOOL ?= $(DEFAULT_BPFTOOL) $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ @@ -502,6 +513,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \ cap_helpers.c TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \ $(OUTPUT)/liburandom_read.so \ + $(OUTPUT)/xdp_synproxy \ ima_setup.sh \ $(wildcard progs/btf_dump_test_case_*.c) TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE @@ -560,6 +572,9 @@ $(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \ $(OUTPUT)/bench_bloom_filter_map.o: $(OUTPUT)/bloom_filter_bench.skel.h $(OUTPUT)/bench_bpf_loop.o: $(OUTPUT)/bpf_loop_bench.skel.h $(OUTPUT)/bench_strncmp.o: $(OUTPUT)/strncmp_bench.skel.h +$(OUTPUT)/bench_bpf_hashmap_full_update.o: $(OUTPUT)/bpf_hashmap_full_update_bench.skel.h +$(OUTPUT)/bench_local_storage.o: $(OUTPUT)/local_storage_bench.skel.h +$(OUTPUT)/bench_local_storage_rcu_tasks_trace.o: $(OUTPUT)/local_storage_rcu_tasks_trace_bench.skel.h $(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ) $(OUTPUT)/bench: LDLIBS += -lm $(OUTPUT)/bench: $(OUTPUT)/bench.o \ @@ -571,13 +586,18 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \ $(OUTPUT)/bench_ringbufs.o \ $(OUTPUT)/bench_bloom_filter_map.o \ $(OUTPUT)/bench_bpf_loop.o \ - $(OUTPUT)/bench_strncmp.o + $(OUTPUT)/bench_strncmp.o \ + $(OUTPUT)/bench_bpf_hashmap_full_update.o \ + $(OUTPUT)/bench_local_storage.o \ + $(OUTPUT)/bench_local_storage_rcu_tasks_trace.o $(call msg,BINARY,,$@) $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@ EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \ prog_tests/tests.h map_tests/tests.h verifier/tests.h \ feature bpftool \ - $(addprefix $(OUTPUT)/,*.o *.skel.h *.lskel.h *.subskel.h no_alu32 bpf_gcc bpf_testmod.ko) + $(addprefix $(OUTPUT)/,*.o *.skel.h *.lskel.h *.subskel.h \ + no_alu32 bpf_gcc bpf_testmod.ko \ + liburandom_read.so) .PHONY: docs docs-clean diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index f061cc20e776..c1f20a147462 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -79,6 +79,43 @@ void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns) hits_per_sec, hits_per_prod, drops_per_sec, hits_per_sec + drops_per_sec); } +void +grace_period_latency_basic_stats(struct bench_res res[], int res_cnt, struct basic_stats *gp_stat) +{ + int i; + + memset(gp_stat, 0, sizeof(struct basic_stats)); + + for (i = 0; i < res_cnt; i++) + gp_stat->mean += res[i].gp_ns / 1000.0 / (double)res[i].gp_ct / (0.0 + res_cnt); + +#define IT_MEAN_DIFF (res[i].gp_ns / 1000.0 / (double)res[i].gp_ct - gp_stat->mean) + if (res_cnt > 1) { + for (i = 0; i < res_cnt; i++) + gp_stat->stddev += (IT_MEAN_DIFF * IT_MEAN_DIFF) / (res_cnt - 1.0); + } + gp_stat->stddev = sqrt(gp_stat->stddev); +#undef IT_MEAN_DIFF +} + +void +grace_period_ticks_basic_stats(struct bench_res res[], int res_cnt, struct basic_stats *gp_stat) +{ + int i; + + memset(gp_stat, 0, sizeof(struct basic_stats)); + for (i = 0; i < res_cnt; i++) + gp_stat->mean += res[i].stime / (double)res[i].gp_ct / (0.0 + res_cnt); + +#define IT_MEAN_DIFF (res[i].stime / (double)res[i].gp_ct - gp_stat->mean) + if (res_cnt > 1) { + for (i = 0; i < res_cnt; i++) + gp_stat->stddev += (IT_MEAN_DIFF * IT_MEAN_DIFF) / (res_cnt - 1.0); + } + gp_stat->stddev = sqrt(gp_stat->stddev); +#undef IT_MEAN_DIFF +} + void hits_drops_report_final(struct bench_res res[], int res_cnt) { int i; @@ -150,6 +187,53 @@ void ops_report_final(struct bench_res res[], int res_cnt) printf("latency %8.3lf ns/op\n", 1000.0 / hits_mean * env.producer_cnt); } +void local_storage_report_progress(int iter, struct bench_res *res, + long delta_ns) +{ + double important_hits_per_sec, hits_per_sec; + double delta_sec = delta_ns / 1000000000.0; + + hits_per_sec = res->hits / 1000000.0 / delta_sec; + important_hits_per_sec = res->important_hits / 1000000.0 / delta_sec; + + printf("Iter %3d (%7.3lfus): ", iter, (delta_ns - 1000000000) / 1000.0); + + printf("hits %8.3lfM/s ", hits_per_sec); + printf("important_hits %8.3lfM/s\n", important_hits_per_sec); +} + +void local_storage_report_final(struct bench_res res[], int res_cnt) +{ + double important_hits_mean = 0.0, important_hits_stddev = 0.0; + double hits_mean = 0.0, hits_stddev = 0.0; + int i; + + for (i = 0; i < res_cnt; i++) { + hits_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt); + important_hits_mean += res[i].important_hits / 1000000.0 / (0.0 + res_cnt); + } + + if (res_cnt > 1) { + for (i = 0; i < res_cnt; i++) { + hits_stddev += (hits_mean - res[i].hits / 1000000.0) * + (hits_mean - res[i].hits / 1000000.0) / + (res_cnt - 1.0); + important_hits_stddev += + (important_hits_mean - res[i].important_hits / 1000000.0) * + (important_hits_mean - res[i].important_hits / 1000000.0) / + (res_cnt - 1.0); + } + + hits_stddev = sqrt(hits_stddev); + important_hits_stddev = sqrt(important_hits_stddev); + } + printf("Summary: hits throughput %8.3lf \u00B1 %5.3lf M ops/s, ", + hits_mean, hits_stddev); + printf("hits latency %8.3lf ns/op, ", 1000.0 / hits_mean); + printf("important_hits throughput %8.3lf \u00B1 %5.3lf M ops/s\n", + important_hits_mean, important_hits_stddev); +} + const char *argp_program_version = "benchmark"; const char *argp_program_bug_address = "<bpf@vger.kernel.org>"; const char argp_program_doc[] = @@ -188,13 +272,18 @@ static const struct argp_option opts[] = { extern struct argp bench_ringbufs_argp; extern struct argp bench_bloom_map_argp; extern struct argp bench_bpf_loop_argp; +extern struct argp bench_local_storage_argp; +extern struct argp bench_local_storage_rcu_tasks_trace_argp; extern struct argp bench_strncmp_argp; static const struct argp_child bench_parsers[] = { { &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 }, { &bench_bloom_map_argp, 0, "Bloom filter map benchmark", 0 }, { &bench_bpf_loop_argp, 0, "bpf_loop helper benchmark", 0 }, + { &bench_local_storage_argp, 0, "local_storage benchmark", 0 }, { &bench_strncmp_argp, 0, "bpf_strncmp helper benchmark", 0 }, + { &bench_local_storage_rcu_tasks_trace_argp, 0, + "local_storage RCU Tasks Trace slowdown benchmark", 0 }, {}, }; @@ -396,6 +485,11 @@ extern const struct bench bench_hashmap_with_bloom; extern const struct bench bench_bpf_loop; extern const struct bench bench_strncmp_no_helper; extern const struct bench bench_strncmp_helper; +extern const struct bench bench_bpf_hashmap_full_update; +extern const struct bench bench_local_storage_cache_seq_get; +extern const struct bench bench_local_storage_cache_interleaved_get; +extern const struct bench bench_local_storage_cache_hashmap_control; +extern const struct bench bench_local_storage_tasks_trace; static const struct bench *benchs[] = { &bench_count_global, @@ -430,6 +524,11 @@ static const struct bench *benchs[] = { &bench_bpf_loop, &bench_strncmp_no_helper, &bench_strncmp_helper, + &bench_bpf_hashmap_full_update, + &bench_local_storage_cache_seq_get, + &bench_local_storage_cache_interleaved_get, + &bench_local_storage_cache_hashmap_control, + &bench_local_storage_tasks_trace, }; static void setup_benchmark() diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h index fb3e213df3dc..d748255877e2 100644 --- a/tools/testing/selftests/bpf/bench.h +++ b/tools/testing/selftests/bpf/bench.h @@ -30,10 +30,19 @@ struct env { struct cpu_set cons_cpus; }; +struct basic_stats { + double mean; + double stddev; +}; + struct bench_res { long hits; long drops; long false_hits; + long important_hits; + unsigned long gp_ns; + unsigned long gp_ct; + unsigned int stime; }; struct bench { @@ -61,6 +70,13 @@ void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns); void false_hits_report_final(struct bench_res res[], int res_cnt); void ops_report_progress(int iter, struct bench_res *res, long delta_ns); void ops_report_final(struct bench_res res[], int res_cnt); +void local_storage_report_progress(int iter, struct bench_res *res, + long delta_ns); +void local_storage_report_final(struct bench_res res[], int res_cnt); +void grace_period_latency_basic_stats(struct bench_res res[], int res_cnt, + struct basic_stats *gp_stat); +void grace_period_ticks_basic_stats(struct bench_res res[], int res_cnt, + struct basic_stats *gp_stat); static inline __u64 get_time_ns(void) { diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c new file mode 100644 index 000000000000..cec51e0ff4b8 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Bytedance */ + +#include <argp.h> +#include "bench.h" +#include "bpf_hashmap_full_update_bench.skel.h" +#include "bpf_util.h" + +/* BPF triggering benchmarks */ +static struct ctx { + struct bpf_hashmap_full_update_bench *skel; +} ctx; + +#define MAX_LOOP_NUM 10000 + +static void validate(void) +{ + if (env.consumer_cnt != 1) { + fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); + exit(1); + } +} + +static void *producer(void *input) +{ + while (true) { + /* trigger the bpf program */ + syscall(__NR_getpgid); + } + + return NULL; +} + +static void *consumer(void *input) +{ + return NULL; +} + +static void measure(struct bench_res *res) +{ +} + +static void setup(void) +{ + struct bpf_link *link; + int map_fd, i, max_entries; + + setup_libbpf(); + + ctx.skel = bpf_hashmap_full_update_bench__open_and_load(); + if (!ctx.skel) { + fprintf(stderr, "failed to open skeleton\n"); + exit(1); + } + + ctx.skel->bss->nr_loops = MAX_LOOP_NUM; + + link = bpf_program__attach(ctx.skel->progs.benchmark); + if (!link) { + fprintf(stderr, "failed to attach program!\n"); + exit(1); + } + + /* fill hash_map */ + map_fd = bpf_map__fd(ctx.skel->maps.hash_map_bench); + max_entries = bpf_map__max_entries(ctx.skel->maps.hash_map_bench); + for (i = 0; i < max_entries; i++) + bpf_map_update_elem(map_fd, &i, &i, BPF_ANY); +} + +void hashmap_report_final(struct bench_res res[], int res_cnt) +{ + unsigned int nr_cpus = bpf_num_possible_cpus(); + int i; + + for (i = 0; i < nr_cpus; i++) { + u64 time = ctx.skel->bss->percpu_time[i]; + + if (!time) + continue; + + printf("%d:hash_map_full_perf %lld events per sec\n", + i, ctx.skel->bss->nr_loops * 1000000000ll / time); + } +} + +const struct bench bench_bpf_hashmap_full_update = { + .name = "bpf-hashmap-ful-update", + .validate = validate, + .setup = setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = NULL, + .report_final = hashmap_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage.c b/tools/testing/selftests/bpf/benchs/bench_local_storage.c new file mode 100644 index 000000000000..5a378c84e81f --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_local_storage.c @@ -0,0 +1,287 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include <argp.h> +#include <linux/btf.h> + +#include "local_storage_bench.skel.h" +#include "bench.h" + +#include <test_btf.h> + +static struct { + __u32 nr_maps; + __u32 hashmap_nr_keys_used; +} args = { + .nr_maps = 1000, + .hashmap_nr_keys_used = 1000, +}; + +enum { + ARG_NR_MAPS = 6000, + ARG_HASHMAP_NR_KEYS_USED = 6001, +}; + +static const struct argp_option opts[] = { + { "nr_maps", ARG_NR_MAPS, "NR_MAPS", 0, + "Set number of local_storage maps"}, + { "hashmap_nr_keys_used", ARG_HASHMAP_NR_KEYS_USED, "NR_KEYS", + 0, "When doing hashmap test, set number of hashmap keys test uses"}, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + long ret; + + switch (key) { + case ARG_NR_MAPS: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > UINT_MAX) { + fprintf(stderr, "invalid nr_maps"); + argp_usage(state); + } + args.nr_maps = ret; + break; + case ARG_HASHMAP_NR_KEYS_USED: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > UINT_MAX) { + fprintf(stderr, "invalid hashmap_nr_keys_used"); + argp_usage(state); + } + args.hashmap_nr_keys_used = ret; + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +const struct argp bench_local_storage_argp = { + .options = opts, + .parser = parse_arg, +}; + +/* Keep in sync w/ array of maps in bpf */ +#define MAX_NR_MAPS 1000 +/* keep in sync w/ same define in bpf */ +#define HASHMAP_SZ 4194304 + +static void validate(void) +{ + if (env.producer_cnt != 1) { + fprintf(stderr, "benchmark doesn't support multi-producer!\n"); + exit(1); + } + if (env.consumer_cnt != 1) { + fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); + exit(1); + } + + if (args.nr_maps > MAX_NR_MAPS) { + fprintf(stderr, "nr_maps must be <= 1000\n"); + exit(1); + } + + if (args.hashmap_nr_keys_used > HASHMAP_SZ) { + fprintf(stderr, "hashmap_nr_keys_used must be <= %u\n", HASHMAP_SZ); + exit(1); + } +} + +static struct { + struct local_storage_bench *skel; + void *bpf_obj; + struct bpf_map *array_of_maps; +} ctx; + +static void prepopulate_hashmap(int fd) +{ + int i, key, val; + + /* local_storage gets will have BPF_LOCAL_STORAGE_GET_F_CREATE flag set, so + * populate the hashmap for a similar comparison + */ + for (i = 0; i < HASHMAP_SZ; i++) { + key = val = i; + if (bpf_map_update_elem(fd, &key, &val, 0)) { + fprintf(stderr, "Error prepopulating hashmap (key %d)\n", key); + exit(1); + } + } +} + +static void __setup(struct bpf_program *prog, bool hashmap) +{ + struct bpf_map *inner_map; + int i, fd, mim_fd, err; + + LIBBPF_OPTS(bpf_map_create_opts, create_opts); + + if (!hashmap) + create_opts.map_flags = BPF_F_NO_PREALLOC; + + ctx.skel->rodata->num_maps = args.nr_maps; + ctx.skel->rodata->hashmap_num_keys = args.hashmap_nr_keys_used; + inner_map = bpf_map__inner_map(ctx.array_of_maps); + create_opts.btf_key_type_id = bpf_map__btf_key_type_id(inner_map); + create_opts.btf_value_type_id = bpf_map__btf_value_type_id(inner_map); + + err = local_storage_bench__load(ctx.skel); + if (err) { + fprintf(stderr, "Error loading skeleton\n"); + goto err_out; + } + + create_opts.btf_fd = bpf_object__btf_fd(ctx.skel->obj); + + mim_fd = bpf_map__fd(ctx.array_of_maps); + if (mim_fd < 0) { + fprintf(stderr, "Error getting map_in_map fd\n"); + goto err_out; + } + + for (i = 0; i < args.nr_maps; i++) { + if (hashmap) + fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(int), + sizeof(int), HASHMAP_SZ, &create_opts); + else + fd = bpf_map_create(BPF_MAP_TYPE_TASK_STORAGE, NULL, sizeof(int), + sizeof(int), 0, &create_opts); + if (fd < 0) { + fprintf(stderr, "Error creating map %d: %d\n", i, fd); + goto err_out; + } + + if (hashmap) + prepopulate_hashmap(fd); + + err = bpf_map_update_elem(mim_fd, &i, &fd, 0); + if (err) { + fprintf(stderr, "Error updating array-of-maps w/ map %d\n", i); + goto err_out; + } + } + + if (!bpf_program__attach(prog)) { + fprintf(stderr, "Error attaching bpf program\n"); + goto err_out; + } + + return; +err_out: + exit(1); +} + +static void hashmap_setup(void) +{ + struct local_storage_bench *skel; + + setup_libbpf(); + + skel = local_storage_bench__open(); + ctx.skel = skel; + ctx.array_of_maps = skel->maps.array_of_hash_maps; + skel->rodata->use_hashmap = 1; + skel->rodata->interleave = 0; + + __setup(skel->progs.get_local, true); +} + +static void local_storage_cache_get_setup(void) +{ + struct local_storage_bench *skel; + + setup_libbpf(); + + skel = local_storage_bench__open(); + ctx.skel = skel; + ctx.array_of_maps = skel->maps.array_of_local_storage_maps; + skel->rodata->use_hashmap = 0; + skel->rodata->interleave = 0; + + __setup(skel->progs.get_local, false); +} + +static void local_storage_cache_get_interleaved_setup(void) +{ + struct local_storage_bench *skel; + + setup_libbpf(); + + skel = local_storage_bench__open(); + ctx.skel = skel; + ctx.array_of_maps = skel->maps.array_of_local_storage_maps; + skel->rodata->use_hashmap = 0; + skel->rodata->interleave = 1; + + __setup(skel->progs.get_local, false); +} + +static void measure(struct bench_res *res) +{ + res->hits = atomic_swap(&ctx.skel->bss->hits, 0); + res->important_hits = atomic_swap(&ctx.skel->bss->important_hits, 0); +} + +static inline void trigger_bpf_program(void) +{ + syscall(__NR_getpgid); +} + +static void *consumer(void *input) +{ + return NULL; +} + +static void *producer(void *input) +{ + while (true) + trigger_bpf_program(); + + return NULL; +} + +/* cache sequential and interleaved get benchs test local_storage get + * performance, specifically they demonstrate performance cliff of + * current list-plus-cache local_storage model. + * + * cache sequential get: call bpf_task_storage_get on n maps in order + * cache interleaved get: like "sequential get", but interleave 4 calls to the + * 'important' map (idx 0 in array_of_maps) for every 10 calls. Goal + * is to mimic environment where many progs are accessing their local_storage + * maps, with 'our' prog needing to access its map more often than others + */ +const struct bench bench_local_storage_cache_seq_get = { + .name = "local-storage-cache-seq-get", + .validate = validate, + .setup = local_storage_cache_get_setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = local_storage_report_progress, + .report_final = local_storage_report_final, +}; + +const struct bench bench_local_storage_cache_interleaved_get = { + .name = "local-storage-cache-int-get", + .validate = validate, + .setup = local_storage_cache_get_interleaved_setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = local_storage_report_progress, + .report_final = local_storage_report_final, +}; + +const struct bench bench_local_storage_cache_hashmap_control = { + .name = "local-storage-cache-hashmap-control", + .validate = validate, + .setup = hashmap_setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = local_storage_report_progress, + .report_final = local_storage_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c b/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c new file mode 100644 index 000000000000..43f109d93130 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c @@ -0,0 +1,281 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include <argp.h> + +#include <sys/prctl.h> +#include "local_storage_rcu_tasks_trace_bench.skel.h" +#include "bench.h" + +#include <signal.h> + +static struct { + __u32 nr_procs; + __u32 kthread_pid; + bool quiet; +} args = { + .nr_procs = 1000, + .kthread_pid = 0, + .quiet = false, +}; + +enum { + ARG_NR_PROCS = 7000, + ARG_KTHREAD_PID = 7001, + ARG_QUIET = 7002, +}; + +static const struct argp_option opts[] = { + { "nr_procs", ARG_NR_PROCS, "NR_PROCS", 0, + "Set number of user processes to spin up"}, + { "kthread_pid", ARG_KTHREAD_PID, "PID", 0, + "Pid of rcu_tasks_trace kthread for ticks tracking"}, + { "quiet", ARG_QUIET, "{0,1}", 0, + "If true, don't report progress"}, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + long ret; + + switch (key) { + case ARG_NR_PROCS: + ret = strtol(arg, NULL, 10); + if (ret < 1 || ret > UINT_MAX) { + fprintf(stderr, "invalid nr_procs\n"); + argp_usage(state); + } + args.nr_procs = ret; + break; + case ARG_KTHREAD_PID: + ret = strtol(arg, NULL, 10); + if (ret < 1) { + fprintf(stderr, "invalid kthread_pid\n"); + argp_usage(state); + } + args.kthread_pid = ret; + break; + case ARG_QUIET: + ret = strtol(arg, NULL, 10); + if (ret < 0 || ret > 1) { + fprintf(stderr, "invalid quiet %ld\n", ret); + argp_usage(state); + } + args.quiet = ret; + break; +break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +const struct argp bench_local_storage_rcu_tasks_trace_argp = { + .options = opts, + .parser = parse_arg, +}; + +#define MAX_SLEEP_PROCS 150000 + +static void validate(void) +{ + if (env.producer_cnt != 1) { + fprintf(stderr, "benchmark doesn't support multi-producer!\n"); + exit(1); + } + if (env.consumer_cnt != 1) { + fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); + exit(1); + } + + if (args.nr_procs > MAX_SLEEP_PROCS) { + fprintf(stderr, "benchmark supports up to %u sleeper procs!\n", + MAX_SLEEP_PROCS); + exit(1); + } +} + +static long kthread_pid_ticks(void) +{ + char procfs_path[100]; + long stime; + FILE *f; + + if (!args.kthread_pid) + return -1; + + sprintf(procfs_path, "/proc/%u/stat", args.kthread_pid); + f = fopen(procfs_path, "r"); + if (!f) { + fprintf(stderr, "couldn't open %s, exiting\n", procfs_path); + goto err_out; + } + if (fscanf(f, "%*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %ld", &stime) != 1) { + fprintf(stderr, "fscanf of %s failed, exiting\n", procfs_path); + goto err_out; + } + fclose(f); + return stime; + +err_out: + if (f) + fclose(f); + exit(1); + return 0; +} + +static struct { + struct local_storage_rcu_tasks_trace_bench *skel; + long prev_kthread_stime; +} ctx; + +static void sleep_and_loop(void) +{ + while (true) { + sleep(rand() % 4); + syscall(__NR_getpgid); + } +} + +static void local_storage_tasks_trace_setup(void) +{ + int i, err, forkret, runner_pid; + + runner_pid = getpid(); + + for (i = 0; i < args.nr_procs; i++) { + forkret = fork(); + if (forkret < 0) { + fprintf(stderr, "Error forking sleeper proc %u of %u, exiting\n", i, + args.nr_procs); + goto err_out; + } + + if (!forkret) { + err = prctl(PR_SET_PDEATHSIG, SIGKILL); + if (err < 0) { + fprintf(stderr, "prctl failed with err %d, exiting\n", errno); + goto err_out; + } + + if (getppid() != runner_pid) { + fprintf(stderr, "Runner died while spinning up procs, exiting\n"); + goto err_out; + } + sleep_and_loop(); + } + } + printf("Spun up %u procs (our pid %d)\n", args.nr_procs, runner_pid); + + setup_libbpf(); + + ctx.skel = local_storage_rcu_tasks_trace_bench__open_and_load(); + if (!ctx.skel) { + fprintf(stderr, "Error doing open_and_load, exiting\n"); + goto err_out; + } + + ctx.prev_kthread_stime = kthread_pid_ticks(); + + if (!bpf_program__attach(ctx.skel->progs.get_local)) { + fprintf(stderr, "Error attaching bpf program\n"); + goto err_out; + } + + if (!bpf_program__attach(ctx.skel->progs.pregp_step)) { + fprintf(stderr, "Error attaching bpf program\n"); + goto err_out; + } + + if (!bpf_program__attach(ctx.skel->progs.postgp)) { + fprintf(stderr, "Error attaching bpf program\n"); + goto err_out; + } + + return; +err_out: + exit(1); +} + +static void measure(struct bench_res *res) +{ + long ticks; + + res->gp_ct = atomic_swap(&ctx.skel->bss->gp_hits, 0); + res->gp_ns = atomic_swap(&ctx.skel->bss->gp_times, 0); + ticks = kthread_pid_ticks(); + res->stime = ticks - ctx.prev_kthread_stime; + ctx.prev_kthread_stime = ticks; +} + +static void *consumer(void *input) +{ + return NULL; +} + +static void *producer(void *input) +{ + while (true) + syscall(__NR_getpgid); + return NULL; +} + +static void report_progress(int iter, struct bench_res *res, long delta_ns) +{ + if (ctx.skel->bss->unexpected) { + fprintf(stderr, "Error: Unexpected order of bpf prog calls (postgp after pregp)."); + fprintf(stderr, "Data can't be trusted, exiting\n"); + exit(1); + } + + if (args.quiet) + return; + + printf("Iter %d\t avg tasks_trace grace period latency\t%lf ns\n", + iter, res->gp_ns / (double)res->gp_ct); + printf("Iter %d\t avg ticks per tasks_trace grace period\t%lf\n", + iter, res->stime / (double)res->gp_ct); +} + +static void report_final(struct bench_res res[], int res_cnt) +{ + struct basic_stats gp_stat; + + grace_period_latency_basic_stats(res, res_cnt, &gp_stat); + printf("SUMMARY tasks_trace grace period latency"); + printf("\tavg %.3lf us\tstddev %.3lf us\n", gp_stat.mean, gp_stat.stddev); + grace_period_ticks_basic_stats(res, res_cnt, &gp_stat); + printf("SUMMARY ticks per tasks_trace grace period"); + printf("\tavg %.3lf\tstddev %.3lf\n", gp_stat.mean, gp_stat.stddev); +} + +/* local-storage-tasks-trace: Benchmark performance of BPF local_storage's use + * of RCU Tasks-Trace. + * + * Stress RCU Tasks Trace by forking many tasks, all of which do no work aside + * from sleep() loop, and creating/destroying BPF task-local storage on wakeup. + * The number of forked tasks is configurable. + * + * exercising code paths which call call_rcu_tasks_trace while there are many + * thousands of tasks on the system should result in RCU Tasks-Trace having to + * do a noticeable amount of work. + * + * This should be observable by measuring rcu_tasks_trace_kthread CPU usage + * after the grace period has ended, or by measuring grace period latency. + * + * This benchmark uses both approaches, attaching to rcu_tasks_trace_pregp_step + * and rcu_tasks_trace_postgp functions to measure grace period latency and + * using /proc/PID/stat to measure rcu_tasks_trace_kthread kernel ticks + */ +const struct bench bench_local_storage_tasks_trace = { + .name = "local-storage-tasks-trace", + .validate = validate, + .setup = local_storage_tasks_trace_setup, + .producer_thread = producer, + .consumer_thread = consumer, + .measure = measure, + .report_progress = report_progress, + .report_final = report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh b/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh new file mode 100755 index 000000000000..1e2de838f9fa --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./benchs/run_common.sh + +set -eufo pipefail + +nr_threads=`expr $(cat /proc/cpuinfo | grep "processor"| wc -l) - 1` +summary=$($RUN_BENCH -p $nr_threads bpf-hashmap-ful-update) +printf "$summary" +printf "\n" diff --git a/tools/testing/selftests/bpf/benchs/run_bench_local_storage.sh b/tools/testing/selftests/bpf/benchs/run_bench_local_storage.sh new file mode 100755 index 000000000000..2eb2b513a173 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_local_storage.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./benchs/run_common.sh + +set -eufo pipefail + +header "Hashmap Control" +for i in 10 1000 10000 100000 4194304; do +subtitle "num keys: $i" + summarize_local_storage "hashmap (control) sequential get: "\ + "$(./bench --nr_maps 1 --hashmap_nr_keys_used=$i local-storage-cache-hashmap-control)" + printf "\n" +done + +header "Local Storage" +for i in 1 10 16 17 24 32 100 1000; do +subtitle "num_maps: $i" + summarize_local_storage "local_storage cache sequential get: "\ + "$(./bench --nr_maps $i local-storage-cache-seq-get)" + summarize_local_storage "local_storage cache interleaved get: "\ + "$(./bench --nr_maps $i local-storage-cache-int-get)" + printf "\n" +done diff --git a/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh b/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh new file mode 100755 index 000000000000..5dac1f02892c --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +kthread_pid=`pgrep rcu_tasks_trace_kthread` + +if [ -z $kthread_pid ]; then + echo "error: Couldn't find rcu_tasks_trace_kthread" + exit 1 +fi + +./bench --nr_procs 15000 --kthread_pid $kthread_pid -d 600 --quiet 1 local-storage-tasks-trace diff --git a/tools/testing/selftests/bpf/benchs/run_common.sh b/tools/testing/selftests/bpf/benchs/run_common.sh index 6c5e6023a69f..d9f40af82006 100644 --- a/tools/testing/selftests/bpf/benchs/run_common.sh +++ b/tools/testing/selftests/bpf/benchs/run_common.sh @@ -41,6 +41,16 @@ function ops() echo "$*" | sed -E "s/.*latency\s+([0-9]+\.[0-9]+\sns\/op).*/\1/" } +function local_storage() +{ + echo -n "hits throughput: " + echo -n "$*" | sed -E "s/.* hits throughput\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+\sM\sops\/s).*/\1/" + echo -n -e ", hits latency: " + echo -n "$*" | sed -E "s/.* hits latency\s+([0-9]+\.[0-9]+\sns\/op).*/\1/" + echo -n ", important_hits throughput: " + echo "$*" | sed -E "s/.*important_hits throughput\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+\sM\sops\/s).*/\1/" +} + function total() { echo "$*" | sed -E "s/.*total operations\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/" @@ -67,6 +77,13 @@ function summarize_ops() printf "%-20s %s\n" "$bench" "$(ops $summary)" } +function summarize_local_storage() +{ + bench="$1" + summary=$(echo $2 | tail -n1) + printf "%-20s %s\n" "$bench" "$(local_storage $summary)" +} + function summarize_total() { bench="$1" diff --git a/tools/testing/selftests/bpf/bpf_legacy.h b/tools/testing/selftests/bpf/bpf_legacy.h index 719ab56cdb5d..845209581440 100644 --- a/tools/testing/selftests/bpf/bpf_legacy.h +++ b/tools/testing/selftests/bpf/bpf_legacy.h @@ -2,15 +2,6 @@ #ifndef __BPF_LEGACY__ #define __BPF_LEGACY__ -#define BPF_ANNOTATE_KV_PAIR(name, type_key, type_val) \ - struct ____btf_map_##name { \ - type_key key; \ - type_val value; \ - }; \ - struct ____btf_map_##name \ - __attribute__ ((section(".maps." #name), used)) \ - ____btf_map_##name = { } - /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions */ diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index e585e1cefc77..792cb15bac40 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -148,13 +148,13 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = { .write = bpf_testmod_test_write, }; -BTF_SET_START(bpf_testmod_check_kfunc_ids) -BTF_ID(func, bpf_testmod_test_mod_kfunc) -BTF_SET_END(bpf_testmod_check_kfunc_ids) +BTF_SET8_START(bpf_testmod_check_kfunc_ids) +BTF_ID_FLAGS(func, bpf_testmod_test_mod_kfunc) +BTF_SET8_END(bpf_testmod_check_kfunc_ids) static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = { - .owner = THIS_MODULE, - .check_set = &bpf_testmod_check_kfunc_ids, + .owner = THIS_MODULE, + .set = &bpf_testmod_check_kfunc_ids, }; extern int bpf_fentry_test1(int a); diff --git a/tools/testing/selftests/bpf/btf_helpers.c b/tools/testing/selftests/bpf/btf_helpers.c index b5941d514e17..1c1c2c26690a 100644 --- a/tools/testing/selftests/bpf/btf_helpers.c +++ b/tools/testing/selftests/bpf/btf_helpers.c @@ -26,11 +26,12 @@ static const char * const btf_kind_str_mapping[] = { [BTF_KIND_FLOAT] = "FLOAT", [BTF_KIND_DECL_TAG] = "DECL_TAG", [BTF_KIND_TYPE_TAG] = "TYPE_TAG", + [BTF_KIND_ENUM64] = "ENUM64", }; static const char *btf_kind_str(__u16 kind) { - if (kind > BTF_KIND_TYPE_TAG) + if (kind > BTF_KIND_ENUM64) return "UNKNOWN"; return btf_kind_str_mapping[kind]; } @@ -139,14 +140,32 @@ int fprintf_btf_type_raw(FILE *out, const struct btf *btf, __u32 id) } case BTF_KIND_ENUM: { const struct btf_enum *v = btf_enum(t); + const char *fmt_str; - fprintf(out, " size=%u vlen=%u", t->size, vlen); + fmt_str = btf_kflag(t) ? "\n\t'%s' val=%d" : "\n\t'%s' val=%u"; + fprintf(out, " encoding=%s size=%u vlen=%u", + btf_kflag(t) ? "SIGNED" : "UNSIGNED", t->size, vlen); for (i = 0; i < vlen; i++, v++) { - fprintf(out, "\n\t'%s' val=%u", + fprintf(out, fmt_str, btf_str(btf, v->name_off), v->val); } break; } + case BTF_KIND_ENUM64: { + const struct btf_enum64 *v = btf_enum64(t); + const char *fmt_str; + + fmt_str = btf_kflag(t) ? "\n\t'%s' val=%lld" : "\n\t'%s' val=%llu"; + + fprintf(out, " encoding=%s size=%u vlen=%u", + btf_kflag(t) ? "SIGNED" : "UNSIGNED", t->size, vlen); + for (i = 0; i < vlen; i++, v++) { + fprintf(out, fmt_str, + btf_str(btf, v->name_off), + ((__u64)v->val_hi32 << 32) | v->val_lo32); + } + break; + } case BTF_KIND_FWD: fprintf(out, " fwd_kind=%s", btf_kflag(t) ? "union" : "struct"); break; diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 3b3edc0fc8a6..fabf0c014349 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -1,59 +1,64 @@ +CONFIG_BLK_DEV_LOOP=y CONFIG_BPF=y -CONFIG_BPF_SYSCALL=y -CONFIG_NET_CLS_BPF=m CONFIG_BPF_EVENTS=y -CONFIG_TEST_BPF=m +CONFIG_BPF_JIT=y +CONFIG_BPF_LIRC_MODE2=y +CONFIG_BPF_LSM=y +CONFIG_BPF_STREAM_PARSER=y +CONFIG_BPF_SYSCALL=y CONFIG_CGROUP_BPF=y -CONFIG_NETDEVSIM=m -CONFIG_NET_CLS_ACT=y -CONFIG_NET_SCHED=y -CONFIG_NET_SCH_INGRESS=y -CONFIG_NET_IPIP=y -CONFIG_IPV6=y -CONFIG_NET_IPGRE_DEMUX=y -CONFIG_NET_IPGRE=y -CONFIG_IPV6_GRE=y -CONFIG_CRYPTO_USER_API_HASH=m CONFIG_CRYPTO_HMAC=m CONFIG_CRYPTO_SHA256=m -CONFIG_VXLAN=y -CONFIG_GENEVE=y -CONFIG_NET_CLS_FLOWER=m -CONFIG_LWTUNNEL=y -CONFIG_BPF_STREAM_PARSER=y -CONFIG_XDP_SOCKETS=y +CONFIG_CRYPTO_USER_API_HASH=m +CONFIG_DYNAMIC_FTRACE=y +CONFIG_FPROBE=y CONFIG_FTRACE_SYSCALLS=y -CONFIG_IPV6_TUNNEL=y +CONFIG_FUNCTION_TRACER=y +CONFIG_GENEVE=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_IMA=y +CONFIG_IMA_READ_POLICY=y +CONFIG_IMA_WRITE_POLICY=y +CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_RAW=y +CONFIG_IP_NF_TARGET_SYNPROXY=y +CONFIG_IPV6=y +CONFIG_IPV6_FOU=m +CONFIG_IPV6_FOU_TUNNEL=m CONFIG_IPV6_GRE=y CONFIG_IPV6_SEG6_BPF=y +CONFIG_IPV6_SIT=m +CONFIG_IPV6_TUNNEL=y +CONFIG_LIRC=y +CONFIG_LWTUNNEL=y +CONFIG_MPLS=y +CONFIG_MPLS_IPTUNNEL=m +CONFIG_MPLS_ROUTING=m +CONFIG_MPTCP=y +CONFIG_NET_CLS_ACT=y +CONFIG_NET_CLS_BPF=y +CONFIG_NET_CLS_FLOWER=m CONFIG_NET_FOU=m CONFIG_NET_FOU_IP_TUNNELS=y -CONFIG_IPV6_FOU=m -CONFIG_IPV6_FOU_TUNNEL=m -CONFIG_MPLS=y +CONFIG_NET_IPGRE=y +CONFIG_NET_IPGRE_DEMUX=y +CONFIG_NET_IPIP=y CONFIG_NET_MPLS_GSO=m -CONFIG_MPLS_ROUTING=m -CONFIG_MPLS_IPTUNNEL=m -CONFIG_IPV6_SIT=m -CONFIG_BPF_JIT=y -CONFIG_BPF_LSM=y -CONFIG_SECURITY=y -CONFIG_RC_CORE=y -CONFIG_LIRC=y -CONFIG_BPF_LIRC_MODE2=y -CONFIG_IMA=y -CONFIG_SECURITYFS=y -CONFIG_IMA_WRITE_POLICY=y -CONFIG_IMA_READ_POLICY=y -CONFIG_BLK_DEV_LOOP=y -CONFIG_FUNCTION_TRACER=y -CONFIG_DYNAMIC_FTRACE=y +CONFIG_NET_SCH_INGRESS=y +CONFIG_NET_SCHED=y +CONFIG_NETDEVSIM=m CONFIG_NETFILTER=y +CONFIG_NETFILTER_SYNPROXY=y +CONFIG_NETFILTER_XT_MATCH_STATE=y +CONFIG_NETFILTER_XT_TARGET_CT=y +CONFIG_NF_CONNTRACK=y CONFIG_NF_DEFRAG_IPV4=y CONFIG_NF_DEFRAG_IPV6=y -CONFIG_NF_CONNTRACK=y +CONFIG_RC_CORE=y +CONFIG_SECURITY=y +CONFIG_SECURITYFS=y +CONFIG_TEST_BPF=m CONFIG_USERFAULTFD=y -CONFIG_FPROBE=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_MPTCP=y +CONFIG_VXLAN=y +CONFIG_XDP_SOCKETS=y diff --git a/tools/testing/selftests/bpf/config.s390x b/tools/testing/selftests/bpf/config.s390x new file mode 100644 index 000000000000..f8a7a258a718 --- /dev/null +++ b/tools/testing/selftests/bpf/config.s390x @@ -0,0 +1,147 @@ +CONFIG_9P_FS=y +CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y +CONFIG_AUDIT=y +CONFIG_BLK_CGROUP=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BONDING=y +CONFIG_BPF_JIT_ALWAYS_ON=y +CONFIG_BPF_JIT_DEFAULT_ON=y +CONFIG_BPF_PRELOAD=y +CONFIG_BPF_PRELOAD_UMD=y +CONFIG_BPFILTER=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_HUGETLB=y +CONFIG_CGROUP_NET_CLASSID=y +CONFIG_CGROUP_PERF=y +CONFIG_CGROUP_PIDS=y +CONFIG_CGROUP_SCHED=y +CONFIG_CGROUPS=y +CONFIG_CHECKPOINT_RESTORE=y +CONFIG_CPUSETS=y +CONFIG_CRASH_DUMP=y +CONFIG_CRYPTO_USER_API_RNG=y +CONFIG_CRYPTO_USER_API_SKCIPHER=y +CONFIG_DEBUG_ATOMIC_SLEEP=y +CONFIG_DEBUG_INFO_BTF=y +CONFIG_DEBUG_INFO_DWARF4=y +CONFIG_DEBUG_LIST=y +CONFIG_DEBUG_LOCKDEP=y +CONFIG_DEBUG_NOTIFIERS=y +CONFIG_DEBUG_PAGEALLOC=y +CONFIG_DEBUG_SECTION_MISMATCH=y +CONFIG_DEBUG_SG=y +CONFIG_DETECT_HUNG_TASK=y +CONFIG_DEVTMPFS=y +CONFIG_EXPERT=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_FANOTIFY=y +CONFIG_FUNCTION_PROFILER=y +CONFIG_GDB_SCRIPTS=y +CONFIG_HAVE_EBPF_JIT=y +CONFIG_HAVE_KPROBES=y +CONFIG_HAVE_KPROBES_ON_FTRACE=y +CONFIG_HAVE_KRETPROBES=y +CONFIG_HAVE_MARCH_Z10_FEATURES=y +CONFIG_HAVE_MARCH_Z196_FEATURES=y +CONFIG_HEADERS_INSTALL=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_HUGETLBFS=y +CONFIG_HW_RANDOM=y +CONFIG_HZ_100=y +CONFIG_IDLE_PAGE_TRACKING=y +CONFIG_IKHEADERS=y +CONFIG_INET6_ESP=y +CONFIG_INET=y +CONFIG_INET_ESP=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IPV6_SEG6_LWTUNNEL=y +CONFIG_IPVLAN=y +CONFIG_JUMP_LABEL=y +CONFIG_KERNEL_UNCOMPRESSED=y +CONFIG_KPROBES=y +CONFIG_KPROBES_ON_FTRACE=y +CONFIG_KRETPROBES=y +CONFIG_KSM=y +CONFIG_LATENCYTOP=y +CONFIG_LIVEPATCH=y +CONFIG_LOCK_STAT=y +CONFIG_MACVLAN=y +CONFIG_MACVTAP=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_MARCH_Z196=y +CONFIG_MARCH_Z196_TUNE=y +CONFIG_MEMCG=y +CONFIG_MEMORY_HOTPLUG=y +CONFIG_MEMORY_HOTREMOVE=y +CONFIG_MODULE_SIG=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULES=y +CONFIG_NAMESPACES=y +CONFIG_NET=y +CONFIG_NET_9P=y +CONFIG_NET_9P_VIRTIO=y +CONFIG_NET_ACT_BPF=y +CONFIG_NET_ACT_GACT=y +CONFIG_NET_KEY=y +CONFIG_NET_SCH_FQ=y +CONFIG_NET_VRF=y +CONFIG_NETDEVICES=y +CONFIG_NETFILTER_XT_MATCH_BPF=y +CONFIG_NETFILTER_XT_TARGET_MARK=y +CONFIG_NF_TABLES=y +CONFIG_NO_HZ_IDLE=y +CONFIG_NR_CPUS=256 +CONFIG_NUMA=y +CONFIG_PACKET=y +CONFIG_PANIC_ON_OOPS=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_PCI=y +CONFIG_POSIX_MQUEUE=y +CONFIG_PROC_KCORE=y +CONFIG_PROFILING=y +CONFIG_PROVE_LOCKING=y +CONFIG_PTDUMP_DEBUGFS=y +CONFIG_RC_DEVICES=y +CONFIG_RC_LOOPBACK=y +CONFIG_RT_GROUP_SCHED=y +CONFIG_SAMPLE_SECCOMP=y +CONFIG_SAMPLES=y +CONFIG_SCHED_TRACER=y +CONFIG_SCSI=y +CONFIG_SCSI_VIRTIO=y +CONFIG_SECURITY_NETWORK=y +CONFIG_STACK_TRACER=y +CONFIG_STATIC_KEYS_SELFTEST=y +CONFIG_SYSVIPC=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_TASK_XACCT=y +CONFIG_TASKSTATS=y +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_CONG_DCTCP=y +CONFIG_TLS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y +CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_TUN=y +CONFIG_UNIX=y +CONFIG_UPROBES=y +CONFIG_USELIB=y +CONFIG_USER_NS=y +CONFIG_VETH=y +CONFIG_VIRTIO_BALLOON=y +CONFIG_VIRTIO_BLK=y +CONFIG_VIRTIO_NET=y +CONFIG_VIRTIO_PCI=y +CONFIG_VLAN_8021Q=y +CONFIG_XFRM_USER=y diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64 new file mode 100644 index 000000000000..f0859a1d37ab --- /dev/null +++ b/tools/testing/selftests/bpf/config.x86_64 @@ -0,0 +1,251 @@ +CONFIG_9P_FS=y +CONFIG_9P_FS_POSIX_ACL=y +CONFIG_9P_FS_SECURITY=y +CONFIG_AGP=y +CONFIG_AGP_AMD64=y +CONFIG_AGP_INTEL=y +CONFIG_AGP_SIS=y +CONFIG_AGP_VIA=y +CONFIG_AMIGA_PARTITION=y +CONFIG_AUDIT=y +CONFIG_BACKLIGHT_CLASS_DEVICE=y +CONFIG_BINFMT_MISC=y +CONFIG_BLK_CGROUP=y +CONFIG_BLK_CGROUP_IOLATENCY=y +CONFIG_BLK_DEV_BSGLIB=y +CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BLK_DEV_RAM_SIZE=16384 +CONFIG_BLK_DEV_THROTTLING=y +CONFIG_BONDING=y +CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y +CONFIG_BOOTTIME_TRACING=y +CONFIG_BPF_JIT_ALWAYS_ON=y +CONFIG_BPF_KPROBE_OVERRIDE=y +CONFIG_BPF_PRELOAD=y +CONFIG_BPF_PRELOAD_UMD=y +CONFIG_BPFILTER=y +CONFIG_BSD_DISKLABEL=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_CFS_BANDWIDTH=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_HUGETLB=y +CONFIG_CGROUP_PERF=y +CONFIG_CGROUP_SCHED=y +CONFIG_CGROUPS=y +CONFIG_CMA=y +CONFIG_CMA_AREAS=7 +CONFIG_COMPAT_32BIT_TIME=y +CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y +CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y +CONFIG_CPU_FREQ_GOV_ONDEMAND=y +CONFIG_CPU_FREQ_GOV_USERSPACE=y +CONFIG_CPU_FREQ_STAT=y +CONFIG_CPU_IDLE_GOV_LADDER=y +CONFIG_CPUSETS=y +CONFIG_CRC_T10DIF=y +CONFIG_CRYPTO_BLAKE2B=y +CONFIG_CRYPTO_DEV_VIRTIO=m +CONFIG_CRYPTO_SEQIV=y +CONFIG_CRYPTO_XXHASH=y +CONFIG_DCB=y +CONFIG_DEBUG_ATOMIC_SLEEP=y +CONFIG_DEBUG_CREDENTIALS=y +CONFIG_DEBUG_INFO_BTF=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y +CONFIG_DEBUG_MEMORY_INIT=y +CONFIG_DEFAULT_FQ_CODEL=y +CONFIG_DEFAULT_RENO=y +CONFIG_DEFAULT_SECURITY_DAC=y +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y +CONFIG_DMA_CMA=y +CONFIG_DNS_RESOLVER=y +CONFIG_EFI=y +CONFIG_EFI_STUB=y +CONFIG_EXPERT=y +CONFIG_EXT4_FS=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_FAIL_FUNCTION=y +CONFIG_FAULT_INJECTION=y +CONFIG_FAULT_INJECTION_DEBUG_FS=y +CONFIG_FB=y +CONFIG_FB_MODE_HELPERS=y +CONFIG_FB_TILEBLITTING=y +CONFIG_FB_VESA=y +CONFIG_FONT_8x16=y +CONFIG_FONT_MINI_4x6=y +CONFIG_FONTS=y +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y +CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y +CONFIG_FW_LOADER_USER_HELPER=y +CONFIG_GART_IOMMU=y +CONFIG_GENERIC_PHY=y +CONFIG_HARDLOCKUP_DETECTOR=y +CONFIG_HID_A4TECH=y +CONFIG_HID_BELKIN=y +CONFIG_HID_CHERRY=y +CONFIG_HID_CYPRESS=y +CONFIG_HID_DRAGONRISE=y +CONFIG_HID_EZKEY=y +CONFIG_HID_GREENASIA=y +CONFIG_HID_GYRATION=y +CONFIG_HID_KENSINGTON=y +CONFIG_HID_KYE=y +CONFIG_HID_MICROSOFT=y +CONFIG_HID_MONTEREY=y +CONFIG_HID_PANTHERLORD=y +CONFIG_HID_PETALYNX=y +CONFIG_HID_SMARTJOYPLUS=y +CONFIG_HID_SUNPLUS=y +CONFIG_HID_TOPSEED=y +CONFIG_HID_TWINHAN=y +CONFIG_HID_ZEROPLUS=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_HPET=y +CONFIG_HUGETLBFS=y +CONFIG_HWPOISON_INJECT=y +CONFIG_HZ_1000=y +CONFIG_INET=y +CONFIG_INPUT_EVDEV=y +CONFIG_INTEL_POWERCLAMP=y +CONFIG_IP6_NF_IPTABLES=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MROUTE=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_NF_IPTABLES=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_IP_ROUTE_MULTIPATH=y +CONFIG_IP_ROUTE_VERBOSE=y +CONFIG_IPV6_MIP6=y +CONFIG_IPV6_ROUTE_INFO=y +CONFIG_IPV6_ROUTER_PREF=y +CONFIG_IPV6_SEG6_LWTUNNEL=y +CONFIG_IPV6_SUBTREES=y +CONFIG_IRQ_POLL=y +CONFIG_JUMP_LABEL=y +CONFIG_KARMA_PARTITION=y +CONFIG_KEXEC=y +CONFIG_KPROBES=y +CONFIG_KSM=y +CONFIG_LEGACY_VSYSCALL_NONE=y +CONFIG_LOG_BUF_SHIFT=21 +CONFIG_LOG_CPU_MAX_BUF_SHIFT=0 +CONFIG_LOGO=y +CONFIG_LSM="selinux,bpf,integrity" +CONFIG_MAC_PARTITION=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_MCORE2=y +CONFIG_MEMCG=y +CONFIG_MEMORY_FAILURE=y +CONFIG_MINIX_SUBPARTITION=y +CONFIG_MODULE_SIG=y +CONFIG_MODULE_SRCVERSION_ALL=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODULES=y +CONFIG_MODVERSIONS=y +CONFIG_NAMESPACES=y +CONFIG_NET=y +CONFIG_NET_9P=y +CONFIG_NET_9P_VIRTIO=y +CONFIG_NET_ACT_BPF=y +CONFIG_NET_CLS_CGROUP=y +CONFIG_NET_EMATCH=y +CONFIG_NET_IPGRE_BROADCAST=y +CONFIG_NET_L3_MASTER_DEV=y +CONFIG_NET_SCH_DEFAULT=y +CONFIG_NET_SCH_FQ_CODEL=y +CONFIG_NET_TC_SKB_EXT=y +CONFIG_NET_VRF=y +CONFIG_NETDEVICES=y +CONFIG_NETFILTER_NETLINK_LOG=y +CONFIG_NETFILTER_NETLINK_QUEUE=y +CONFIG_NETFILTER_XT_MATCH_BPF=y +CONFIG_NETFILTER_XT_MATCH_STATISTIC=y +CONFIG_NETLABEL=y +CONFIG_NLS_ASCII=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_DEFAULT="utf8" +CONFIG_NO_HZ=y +CONFIG_NR_CPUS=128 +CONFIG_NUMA=y +CONFIG_NUMA_BALANCING=y +CONFIG_NVMEM=y +CONFIG_OSF_PARTITION=y +CONFIG_PACKET=y +CONFIG_PANIC_ON_OOPS=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_PCI=y +CONFIG_PCI_IOV=y +CONFIG_PCI_MSI=y +CONFIG_PCIEPORTBUS=y +CONFIG_PHYSICAL_ALIGN=0x1000000 +CONFIG_POSIX_MQUEUE=y +CONFIG_POWER_SUPPLY=y +CONFIG_PREEMPT=y +CONFIG_PRINTK_TIME=y +CONFIG_PROC_KCORE=y +CONFIG_PROFILING=y +CONFIG_PROVE_LOCKING=y +CONFIG_PTP_1588_CLOCK=y +CONFIG_RC_DEVICES=y +CONFIG_RC_LOOPBACK=y +CONFIG_RCU_CPU_STALL_TIMEOUT=60 +CONFIG_SCHED_STACK_END_CHECK=y +CONFIG_SCHEDSTATS=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SECURITY_SELINUX=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_DETECT_IRQ=y +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_MANY_PORTS=y +CONFIG_SERIAL_8250_NR_UARTS=32 +CONFIG_SERIAL_8250_RSA=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +CONFIG_SERIAL_NONSTANDARD=y +CONFIG_SERIO_LIBPS2=y +CONFIG_SGI_PARTITION=y +CONFIG_SMP=y +CONFIG_SOLARIS_X86_PARTITION=y +CONFIG_SUN_PARTITION=y +CONFIG_SYNC_FILE=y +CONFIG_SYSVIPC=y +CONFIG_TASK_DELAY_ACCT=y +CONFIG_TASK_IO_ACCOUNTING=y +CONFIG_TASK_XACCT=y +CONFIG_TASKSTATS=y +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_TCP_MD5SIG=y +CONFIG_TLS=y +CONFIG_TMPFS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_TRANSPARENT_HUGEPAGE=y +CONFIG_TRANSPARENT_HUGEPAGE_MADVISE=y +CONFIG_TUN=y +CONFIG_UNIX=y +CONFIG_UNIXWARE_DISKLABEL=y +CONFIG_USER_NS=y +CONFIG_VALIDATE_FS_PARSER=y +CONFIG_VETH=y +CONFIG_VIRT_DRIVERS=y +CONFIG_VIRTIO_BALLOON=y +CONFIG_VIRTIO_BLK=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_VIRTIO_NET=y +CONFIG_VIRTIO_PCI=y +CONFIG_VLAN_8021Q=y +CONFIG_X86_ACPI_CPUFREQ=y +CONFIG_X86_CPUID=y +CONFIG_X86_MSR=y +CONFIG_X86_POWERNOW_K8=y +CONFIG_XDP_SOCKETS_DIAG=y +CONFIG_XFRM_SUB_POLICY=y +CONFIG_XFRM_USER=y +CONFIG_ZEROPLUS_FF=y diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index 59cf81ec55af..bec15558fd93 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -436,7 +436,7 @@ struct nstoken *open_netns(const char *name) int err; struct nstoken *token; - token = malloc(sizeof(struct nstoken)); + token = calloc(1, sizeof(struct nstoken)); if (!ASSERT_OK_PTR(token, "malloc token")) return NULL; diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c index 08c0601b3e84..0b899d2d8ea7 100644 --- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c +++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c @@ -17,6 +17,14 @@ static void trigger_func2(void) asm volatile (""); } +/* attach point for byname sleepable uprobe */ +static void trigger_func3(void) +{ + asm volatile (""); +} + +static char test_data[] = "test_data"; + void test_attach_probe(void) { DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts); @@ -49,9 +57,17 @@ void test_attach_probe(void) if (!ASSERT_GE(ref_ctr_offset, 0, "ref_ctr_offset")) return; - skel = test_attach_probe__open_and_load(); + skel = test_attach_probe__open(); if (!ASSERT_OK_PTR(skel, "skel_open")) return; + + /* sleepable kprobe test case needs flags set before loading */ + if (!ASSERT_OK(bpf_program__set_flags(skel->progs.handle_kprobe_sleepable, + BPF_F_SLEEPABLE), "kprobe_sleepable_flags")) + goto cleanup; + + if (!ASSERT_OK(test_attach_probe__load(skel), "skel_load")) + goto cleanup; if (!ASSERT_OK_PTR(skel->bss, "check_bss")) goto cleanup; @@ -151,6 +167,30 @@ void test_attach_probe(void) if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname2, "attach_uretprobe_byname2")) goto cleanup; + /* sleepable kprobes should not attach successfully */ + skel->links.handle_kprobe_sleepable = bpf_program__attach(skel->progs.handle_kprobe_sleepable); + if (!ASSERT_ERR_PTR(skel->links.handle_kprobe_sleepable, "attach_kprobe_sleepable")) + goto cleanup; + + /* test sleepable uprobe and uretprobe variants */ + skel->links.handle_uprobe_byname3_sleepable = bpf_program__attach(skel->progs.handle_uprobe_byname3_sleepable); + if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname3_sleepable, "attach_uprobe_byname3_sleepable")) + goto cleanup; + + skel->links.handle_uprobe_byname3 = bpf_program__attach(skel->progs.handle_uprobe_byname3); + if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname3, "attach_uprobe_byname3")) + goto cleanup; + + skel->links.handle_uretprobe_byname3_sleepable = bpf_program__attach(skel->progs.handle_uretprobe_byname3_sleepable); + if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname3_sleepable, "attach_uretprobe_byname3_sleepable")) + goto cleanup; + + skel->links.handle_uretprobe_byname3 = bpf_program__attach(skel->progs.handle_uretprobe_byname3); + if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname3, "attach_uretprobe_byname3")) + goto cleanup; + + skel->bss->user_ptr = test_data; + /* trigger & validate kprobe && kretprobe */ usleep(1); @@ -164,6 +204,9 @@ void test_attach_probe(void) /* trigger & validate uprobe attached by name */ trigger_func2(); + /* trigger & validate sleepable uprobe attached by name */ + trigger_func3(); + ASSERT_EQ(skel->bss->kprobe_res, 1, "check_kprobe_res"); ASSERT_EQ(skel->bss->kprobe2_res, 11, "check_kprobe_auto_res"); ASSERT_EQ(skel->bss->kretprobe_res, 2, "check_kretprobe_res"); @@ -174,6 +217,10 @@ void test_attach_probe(void) ASSERT_EQ(skel->bss->uretprobe_byname_res, 6, "check_uretprobe_byname_res"); ASSERT_EQ(skel->bss->uprobe_byname2_res, 7, "check_uprobe_byname2_res"); ASSERT_EQ(skel->bss->uretprobe_byname2_res, 8, "check_uretprobe_byname2_res"); + ASSERT_EQ(skel->bss->uprobe_byname3_sleepable_res, 9, "check_uprobe_byname3_sleepable_res"); + ASSERT_EQ(skel->bss->uprobe_byname3_res, 10, "check_uprobe_byname3_res"); + ASSERT_EQ(skel->bss->uretprobe_byname3_sleepable_res, 11, "check_uretprobe_byname3_sleepable_res"); + ASSERT_EQ(skel->bss->uretprobe_byname3_res, 12, "check_uretprobe_byname3_res"); cleanup: test_attach_probe__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c index 83ef55e3caa4..2974b44f80fa 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c @@ -121,24 +121,24 @@ static void kprobe_multi_link_api_subtest(void) }) GET_ADDR("bpf_fentry_test1", addrs[0]); - GET_ADDR("bpf_fentry_test2", addrs[1]); - GET_ADDR("bpf_fentry_test3", addrs[2]); - GET_ADDR("bpf_fentry_test4", addrs[3]); - GET_ADDR("bpf_fentry_test5", addrs[4]); - GET_ADDR("bpf_fentry_test6", addrs[5]); - GET_ADDR("bpf_fentry_test7", addrs[6]); + GET_ADDR("bpf_fentry_test3", addrs[1]); + GET_ADDR("bpf_fentry_test4", addrs[2]); + GET_ADDR("bpf_fentry_test5", addrs[3]); + GET_ADDR("bpf_fentry_test6", addrs[4]); + GET_ADDR("bpf_fentry_test7", addrs[5]); + GET_ADDR("bpf_fentry_test2", addrs[6]); GET_ADDR("bpf_fentry_test8", addrs[7]); #undef GET_ADDR - cookies[0] = 1; - cookies[1] = 2; - cookies[2] = 3; - cookies[3] = 4; - cookies[4] = 5; - cookies[5] = 6; - cookies[6] = 7; - cookies[7] = 8; + cookies[0] = 1; /* bpf_fentry_test1 */ + cookies[1] = 2; /* bpf_fentry_test3 */ + cookies[2] = 3; /* bpf_fentry_test4 */ + cookies[3] = 4; /* bpf_fentry_test5 */ + cookies[4] = 5; /* bpf_fentry_test6 */ + cookies[5] = 6; /* bpf_fentry_test7 */ + cookies[6] = 7; /* bpf_fentry_test2 */ + cookies[7] = 8; /* bpf_fentry_test8 */ opts.kprobe_multi.addrs = (const unsigned long *) &addrs; opts.kprobe_multi.cnt = ARRAY_SIZE(addrs); @@ -149,14 +149,14 @@ static void kprobe_multi_link_api_subtest(void) if (!ASSERT_GE(link1_fd, 0, "link1_fd")) goto cleanup; - cookies[0] = 8; - cookies[1] = 7; - cookies[2] = 6; - cookies[3] = 5; - cookies[4] = 4; - cookies[5] = 3; - cookies[6] = 2; - cookies[7] = 1; + cookies[0] = 8; /* bpf_fentry_test1 */ + cookies[1] = 7; /* bpf_fentry_test3 */ + cookies[2] = 6; /* bpf_fentry_test4 */ + cookies[3] = 5; /* bpf_fentry_test5 */ + cookies[4] = 4; /* bpf_fentry_test6 */ + cookies[5] = 3; /* bpf_fentry_test7 */ + cookies[6] = 2; /* bpf_fentry_test2 */ + cookies[7] = 1; /* bpf_fentry_test8 */ opts.kprobe_multi.flags = BPF_F_KPROBE_MULTI_RETURN; prog_fd = bpf_program__fd(skel->progs.test_kretprobe); @@ -181,12 +181,12 @@ static void kprobe_multi_attach_api_subtest(void) struct kprobe_multi *skel = NULL; const char *syms[8] = { "bpf_fentry_test1", - "bpf_fentry_test2", "bpf_fentry_test3", "bpf_fentry_test4", "bpf_fentry_test5", "bpf_fentry_test6", "bpf_fentry_test7", + "bpf_fentry_test2", "bpf_fentry_test8", }; __u64 cookies[8]; @@ -198,14 +198,14 @@ static void kprobe_multi_attach_api_subtest(void) skel->bss->pid = getpid(); skel->bss->test_cookie = true; - cookies[0] = 1; - cookies[1] = 2; - cookies[2] = 3; - cookies[3] = 4; - cookies[4] = 5; - cookies[5] = 6; - cookies[6] = 7; - cookies[7] = 8; + cookies[0] = 1; /* bpf_fentry_test1 */ + cookies[1] = 2; /* bpf_fentry_test3 */ + cookies[2] = 3; /* bpf_fentry_test4 */ + cookies[3] = 4; /* bpf_fentry_test5 */ + cookies[4] = 5; /* bpf_fentry_test6 */ + cookies[5] = 6; /* bpf_fentry_test7 */ + cookies[6] = 7; /* bpf_fentry_test2 */ + cookies[7] = 8; /* bpf_fentry_test8 */ opts.syms = syms; opts.cnt = ARRAY_SIZE(syms); @@ -216,14 +216,14 @@ static void kprobe_multi_attach_api_subtest(void) if (!ASSERT_OK_PTR(link1, "bpf_program__attach_kprobe_multi_opts")) goto cleanup; - cookies[0] = 8; - cookies[1] = 7; - cookies[2] = 6; - cookies[3] = 5; - cookies[4] = 4; - cookies[5] = 3; - cookies[6] = 2; - cookies[7] = 1; + cookies[0] = 8; /* bpf_fentry_test1 */ + cookies[1] = 7; /* bpf_fentry_test3 */ + cookies[2] = 6; /* bpf_fentry_test4 */ + cookies[3] = 5; /* bpf_fentry_test5 */ + cookies[4] = 4; /* bpf_fentry_test6 */ + cookies[5] = 3; /* bpf_fentry_test7 */ + cookies[6] = 2; /* bpf_fentry_test2 */ + cookies[7] = 1; /* bpf_fentry_test8 */ opts.retprobe = true; diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 7ff5fa93d056..a33874b081b6 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -27,6 +27,7 @@ #include "bpf_iter_test_kern5.skel.h" #include "bpf_iter_test_kern6.skel.h" #include "bpf_iter_bpf_link.skel.h" +#include "bpf_iter_ksym.skel.h" static int duration; @@ -1120,6 +1121,19 @@ static void test_link_iter(void) bpf_iter_bpf_link__destroy(skel); } +static void test_ksym_iter(void) +{ + struct bpf_iter_ksym *skel; + + skel = bpf_iter_ksym__open_and_load(); + if (!ASSERT_OK_PTR(skel, "bpf_iter_ksym__open_and_load")) + return; + + do_dummy_read(skel->progs.dump_ksym); + + bpf_iter_ksym__destroy(skel); +} + #define CMP_BUFFER_SIZE 1024 static char task_vma_output[CMP_BUFFER_SIZE]; static char proc_maps_output[CMP_BUFFER_SIZE]; @@ -1267,4 +1281,6 @@ void test_bpf_iter(void) test_buf_neg_offset(); if (test__start_subtest("link-iter")) test_link_iter(); + if (test__start_subtest("ksym")) + test_ksym_iter(); } diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_loop.c b/tools/testing/selftests/bpf/prog_tests/bpf_loop.c index 380d7a2072e3..4cd8a25afe68 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_loop.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_loop.c @@ -120,6 +120,64 @@ static void check_nested_calls(struct bpf_loop *skel) bpf_link__destroy(link); } +static void check_non_constant_callback(struct bpf_loop *skel) +{ + struct bpf_link *link = + bpf_program__attach(skel->progs.prog_non_constant_callback); + + if (!ASSERT_OK_PTR(link, "link")) + return; + + skel->bss->callback_selector = 0x0F; + usleep(1); + ASSERT_EQ(skel->bss->g_output, 0x0F, "g_output #1"); + + skel->bss->callback_selector = 0xF0; + usleep(1); + ASSERT_EQ(skel->bss->g_output, 0xF0, "g_output #2"); + + bpf_link__destroy(link); +} + +static void check_stack(struct bpf_loop *skel) +{ + struct bpf_link *link = bpf_program__attach(skel->progs.stack_check); + const int max_key = 12; + int key; + int map_fd; + + if (!ASSERT_OK_PTR(link, "link")) + return; + + map_fd = bpf_map__fd(skel->maps.map1); + + if (!ASSERT_GE(map_fd, 0, "bpf_map__fd")) + goto out; + + for (key = 1; key <= max_key; ++key) { + int val = key; + int err = bpf_map_update_elem(map_fd, &key, &val, BPF_NOEXIST); + + if (!ASSERT_OK(err, "bpf_map_update_elem")) + goto out; + } + + usleep(1); + + for (key = 1; key <= max_key; ++key) { + int val; + int err = bpf_map_lookup_elem(map_fd, &key, &val); + + if (!ASSERT_OK(err, "bpf_map_lookup_elem")) + goto out; + if (!ASSERT_EQ(val, key + 1, "bad value in the map")) + goto out; + } + +out: + bpf_link__destroy(link); +} + void test_bpf_loop(void) { struct bpf_loop *skel; @@ -140,6 +198,10 @@ void test_bpf_loop(void) check_invalid_flags(skel); if (test__start_subtest("check_nested_calls")) check_nested_calls(skel); + if (test__start_subtest("check_non_constant_callback")) + check_non_constant_callback(skel); + if (test__start_subtest("check_stack")) + check_stack(skel); bpf_loop__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c index dd30b1e3a67c..7a74a1579076 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c @@ -2,13 +2,29 @@ #include <test_progs.h> #include <network_helpers.h> #include "test_bpf_nf.skel.h" +#include "test_bpf_nf_fail.skel.h" + +static char log_buf[1024 * 1024]; + +struct { + const char *prog_name; + const char *err_msg; +} test_bpf_nf_fail_tests[] = { + { "alloc_release", "kernel function bpf_ct_release args#0 expected pointer to STRUCT nf_conn but" }, + { "insert_insert", "kernel function bpf_ct_insert_entry args#0 expected pointer to STRUCT nf_conn___init but" }, + { "lookup_insert", "kernel function bpf_ct_insert_entry args#0 expected pointer to STRUCT nf_conn___init but" }, + { "set_timeout_after_insert", "kernel function bpf_ct_set_timeout args#0 expected pointer to STRUCT nf_conn___init but" }, + { "set_status_after_insert", "kernel function bpf_ct_set_status args#0 expected pointer to STRUCT nf_conn___init but" }, + { "change_timeout_after_alloc", "kernel function bpf_ct_change_timeout args#0 expected pointer to STRUCT nf_conn but" }, + { "change_status_after_alloc", "kernel function bpf_ct_change_status args#0 expected pointer to STRUCT nf_conn but" }, +}; enum { TEST_XDP, TEST_TC_BPF, }; -void test_bpf_nf_ct(int mode) +static void test_bpf_nf_ct(int mode) { struct test_bpf_nf *skel; int prog_fd, err; @@ -39,14 +55,60 @@ void test_bpf_nf_ct(int mode) ASSERT_EQ(skel->bss->test_enonet_netns_id, -ENONET, "Test ENONET for bad but valid netns_id"); ASSERT_EQ(skel->bss->test_enoent_lookup, -ENOENT, "Test ENOENT for failed lookup"); ASSERT_EQ(skel->bss->test_eafnosupport, -EAFNOSUPPORT, "Test EAFNOSUPPORT for invalid len__tuple"); + ASSERT_EQ(skel->data->test_alloc_entry, 0, "Test for alloc new entry"); + ASSERT_EQ(skel->data->test_insert_entry, 0, "Test for insert new entry"); + ASSERT_EQ(skel->data->test_succ_lookup, 0, "Test for successful lookup"); + /* allow some tolerance for test_delta_timeout value to avoid races. */ + ASSERT_GT(skel->bss->test_delta_timeout, 8, "Test for min ct timeout update"); + ASSERT_LE(skel->bss->test_delta_timeout, 10, "Test for max ct timeout update"); + /* expected status is IPS_SEEN_REPLY */ + ASSERT_EQ(skel->bss->test_status, 2, "Test for ct status update "); end: test_bpf_nf__destroy(skel); } +static void test_bpf_nf_ct_fail(const char *prog_name, const char *err_msg) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts, .kernel_log_buf = log_buf, + .kernel_log_size = sizeof(log_buf), + .kernel_log_level = 1); + struct test_bpf_nf_fail *skel; + struct bpf_program *prog; + int ret; + + skel = test_bpf_nf_fail__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "test_bpf_nf_fail__open")) + return; + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto end; + + bpf_program__set_autoload(prog, true); + + ret = test_bpf_nf_fail__load(skel); + if (!ASSERT_ERR(ret, "test_bpf_nf_fail__load must fail")) + goto end; + + if (!ASSERT_OK_PTR(strstr(log_buf, err_msg), "expected error message")) { + fprintf(stderr, "Expected: %s\n", err_msg); + fprintf(stderr, "Verifier: %s\n", log_buf); + } + +end: + test_bpf_nf_fail__destroy(skel); +} + void test_bpf_nf(void) { + int i; if (test__start_subtest("xdp-ct")) test_bpf_nf_ct(TEST_XDP); if (test__start_subtest("tc-bpf-ct")) test_bpf_nf_ct(TEST_TC_BPF); + for (i = 0; i < ARRAY_SIZE(test_bpf_nf_fail_tests); i++) { + if (test__start_subtest(test_bpf_nf_fail_tests[i].prog_name)) + test_bpf_nf_ct_fail(test_bpf_nf_fail_tests[i].prog_name, + test_bpf_nf_fail_tests[i].err_msg); + } } diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index e9a9a31b2ffe..2959a52ced06 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -9,6 +9,9 @@ #include "bpf_cubic.skel.h" #include "bpf_tcp_nogpl.skel.h" #include "bpf_dctcp_release.skel.h" +#include "tcp_ca_write_sk_pacing.skel.h" +#include "tcp_ca_incompl_cong_ops.skel.h" +#include "tcp_ca_unsupp_cong_op.skel.h" #ifndef ENOTSUPP #define ENOTSUPP 524 @@ -322,6 +325,58 @@ static void test_rel_setsockopt(void) bpf_dctcp_release__destroy(rel_skel); } +static void test_write_sk_pacing(void) +{ + struct tcp_ca_write_sk_pacing *skel; + struct bpf_link *link; + + skel = tcp_ca_write_sk_pacing__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + link = bpf_map__attach_struct_ops(skel->maps.write_sk_pacing); + ASSERT_OK_PTR(link, "attach_struct_ops"); + + bpf_link__destroy(link); + tcp_ca_write_sk_pacing__destroy(skel); +} + +static void test_incompl_cong_ops(void) +{ + struct tcp_ca_incompl_cong_ops *skel; + struct bpf_link *link; + + skel = tcp_ca_incompl_cong_ops__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + /* That cong_avoid() and cong_control() are missing is only reported at + * this point: + */ + link = bpf_map__attach_struct_ops(skel->maps.incompl_cong_ops); + ASSERT_ERR_PTR(link, "attach_struct_ops"); + + bpf_link__destroy(link); + tcp_ca_incompl_cong_ops__destroy(skel); +} + +static void test_unsupp_cong_op(void) +{ + libbpf_print_fn_t old_print_fn; + struct tcp_ca_unsupp_cong_op *skel; + + err_str = "attach to unsupported member get_info"; + found = false; + old_print_fn = libbpf_set_print(libbpf_debug_print); + + skel = tcp_ca_unsupp_cong_op__open_and_load(); + ASSERT_NULL(skel, "open_and_load"); + ASSERT_EQ(found, true, "expected_err_msg"); + + tcp_ca_unsupp_cong_op__destroy(skel); + libbpf_set_print(old_print_fn); +} + void test_bpf_tcp_ca(void) { if (test__start_subtest("dctcp")) @@ -334,4 +389,10 @@ void test_bpf_tcp_ca(void) test_dctcp_fallback(); if (test__start_subtest("rel_setsockopt")) test_rel_setsockopt(); + if (test__start_subtest("write_sk_pacing")) + test_write_sk_pacing(); + if (test__start_subtest("incompl_cong_ops")) + test_incompl_cong_ops(); + if (test__start_subtest("unsupp_cong_op")) + test_unsupp_cong_op(); } diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index ba5bde53d418..ef6528b8084c 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -34,7 +34,6 @@ static bool always_log; #undef CHECK #define CHECK(condition, format...) _CHECK(condition, "check", duration, format) -#define BTF_END_RAW 0xdeadbeef #define NAME_TBD 0xdeadb33f #define NAME_NTH(N) (0xfffe0000 | N) @@ -2897,26 +2896,6 @@ static struct btf_raw_test raw_tests[] = { }, { - .descr = "invalid enum kind_flag", - .raw_types = { - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 1, 1), 4), /* [2] */ - BTF_ENUM_ENC(NAME_TBD, 0), - BTF_END_RAW, - }, - BTF_STR_SEC("\0A"), - .map_type = BPF_MAP_TYPE_ARRAY, - .map_name = "enum_type_check_btf", - .key_size = sizeof(int), - .value_size = sizeof(int), - .key_type_id = 1, - .value_type_id = 1, - .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid btf_info kind_flag", -}, - -{ .descr = "valid fwd kind_flag", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ @@ -4072,6 +4051,42 @@ static struct btf_raw_test raw_tests[] = { .btf_load_err = true, .err_str = "Type tags don't precede modifiers", }, +{ + .descr = "enum64 test #1, unsigned, size 8", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 2), 8), /* [2] */ + BTF_ENUM64_ENC(NAME_TBD, 0, 0), + BTF_ENUM64_ENC(NAME_TBD, 1, 1), + BTF_END_RAW, + }, + BTF_STR_SEC("\0a\0b\0c"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "tag_type_check_btf", + .key_size = sizeof(int), + .value_size = 8, + .key_type_id = 1, + .value_type_id = 2, + .max_entries = 1, +}, +{ + .descr = "enum64 test #2, signed, size 4", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM64, 1, 2), 4), /* [2] */ + BTF_ENUM64_ENC(NAME_TBD, -1, 0), + BTF_ENUM64_ENC(NAME_TBD, 1, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0a\0b\0c"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "tag_type_check_btf", + .key_size = sizeof(int), + .value_size = 4, + .key_type_id = 1, + .value_type_id = 2, + .max_entries = 1, +}, }; /* struct btf_raw_test raw_tests[] */ @@ -4636,7 +4651,6 @@ struct btf_file_test { }; static struct btf_file_test file_tests[] = { - { .file = "test_btf_haskv.o", }, { .file = "test_btf_newkv.o", }, { .file = "test_btf_nokv.o", .btf_kv_notfound = true, }, }; @@ -5324,7 +5338,7 @@ static void do_test_pprint(int test_num) ret = snprintf(pin_path, sizeof(pin_path), "%s/%s", "/sys/fs/bpf", test->map_name); - if (CHECK(ret == sizeof(pin_path), "pin_path %s/%s is too long", + if (CHECK(ret >= sizeof(pin_path), "pin_path %s/%s is too long", "/sys/fs/bpf", test->map_name)) { err = -1; goto done; @@ -7000,9 +7014,12 @@ static struct btf_dedup_test dedup_tests[] = { BTF_DECL_TAG_ENC(NAME_TBD, 13, 1), /* [16] decl_tag */ BTF_DECL_TAG_ENC(NAME_TBD, 7, -1), /* [17] decl_tag */ BTF_TYPE_TAG_ENC(NAME_TBD, 8), /* [18] type_tag */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 2), 8), /* [19] enum64 */ + BTF_ENUM64_ENC(NAME_TBD, 0, 0), + BTF_ENUM64_ENC(NAME_TBD, 1, 1), BTF_END_RAW, }, - BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q\0R"), + BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q\0R\0S\0T\0U"), }, .expect = { .raw_types = { @@ -7030,9 +7047,12 @@ static struct btf_dedup_test dedup_tests[] = { BTF_DECL_TAG_ENC(NAME_TBD, 13, 1), /* [16] decl_tag */ BTF_DECL_TAG_ENC(NAME_TBD, 7, -1), /* [17] decl_tag */ BTF_TYPE_TAG_ENC(NAME_TBD, 8), /* [18] type_tag */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 2), 8), /* [19] enum64 */ + BTF_ENUM64_ENC(NAME_TBD, 0, 0), + BTF_ENUM64_ENC(NAME_TBD, 1, 1), BTF_END_RAW, }, - BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q\0R"), + BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q\0R\0S\0T\0U"), }, }, { @@ -7493,6 +7513,91 @@ static struct btf_dedup_test dedup_tests[] = { BTF_STR_SEC("\0tag1\0t\0m"), }, }, +{ + .descr = "dedup: enum64, standalone", + .input = { + .raw_types = { + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8), + BTF_ENUM64_ENC(NAME_NTH(2), 1, 123), + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8), + BTF_ENUM64_ENC(NAME_NTH(2), 1, 123), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val"), + }, + .expect = { + .raw_types = { + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8), + BTF_ENUM64_ENC(NAME_NTH(2), 1, 123), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val"), + }, +}, +{ + .descr = "dedup: enum64, fwd resolution", + .input = { + .raw_types = { + /* [1] fwd enum64 'e1' before full enum */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8), + /* [2] full enum64 'e1' after fwd */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8), + BTF_ENUM64_ENC(NAME_NTH(2), 1, 123), + /* [3] full enum64 'e2' before fwd */ + BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8), + BTF_ENUM64_ENC(NAME_NTH(4), 0, 456), + /* [4] fwd enum64 'e2' after full enum */ + BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8), + /* [5] incompatible full enum64 with different value */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8), + BTF_ENUM64_ENC(NAME_NTH(2), 0, 321), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val\0e2\0e2_val"), + }, + .expect = { + .raw_types = { + /* [1] full enum64 'e1' */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8), + BTF_ENUM64_ENC(NAME_NTH(2), 1, 123), + /* [2] full enum64 'e2' */ + BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8), + BTF_ENUM64_ENC(NAME_NTH(4), 0, 456), + /* [3] incompatible full enum64 with different value */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8), + BTF_ENUM64_ENC(NAME_NTH(2), 0, 321), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val\0e2\0e2_val"), + }, +}, +{ + .descr = "dedup: enum and enum64, no dedup", + .input = { + .raw_types = { + /* [1] enum 'e1' */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), + BTF_ENUM_ENC(NAME_NTH(2), 1), + /* [2] enum64 'e1' */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 4), + BTF_ENUM64_ENC(NAME_NTH(2), 1, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val"), + }, + .expect = { + .raw_types = { + /* [1] enum 'e1' */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), + BTF_ENUM_ENC(NAME_NTH(2), 1), + /* [2] enum64 'e1' */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 4), + BTF_ENUM64_ENC(NAME_NTH(2), 1, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val"), + }, +}, }; @@ -7517,6 +7622,8 @@ static int btf_type_size(const struct btf_type *t) return base_size + sizeof(__u32); case BTF_KIND_ENUM: return base_size + vlen * sizeof(struct btf_enum); + case BTF_KIND_ENUM64: + return base_size + vlen * sizeof(struct btf_enum64); case BTF_KIND_ARRAY: return base_size + sizeof(struct btf_array); case BTF_KIND_STRUCT: diff --git a/tools/testing/selftests/bpf/prog_tests/btf_write.c b/tools/testing/selftests/bpf/prog_tests/btf_write.c index addf99c05896..6e36de1302fc 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_write.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_write.c @@ -9,6 +9,7 @@ static void gen_btf(struct btf *btf) const struct btf_var_secinfo *vi; const struct btf_type *t; const struct btf_member *m; + const struct btf_enum64 *v64; const struct btf_enum *v; const struct btf_param *p; int id, err, str_off; @@ -171,7 +172,7 @@ static void gen_btf(struct btf *btf) ASSERT_STREQ(btf__str_by_offset(btf, v->name_off), "v2", "v2_name"); ASSERT_EQ(v->val, 2, "v2_val"); ASSERT_STREQ(btf_type_raw_dump(btf, 9), - "[9] ENUM 'e1' size=4 vlen=2\n" + "[9] ENUM 'e1' encoding=UNSIGNED size=4 vlen=2\n" "\t'v1' val=1\n" "\t'v2' val=2", "raw_dump"); @@ -202,7 +203,7 @@ static void gen_btf(struct btf *btf) ASSERT_EQ(btf_vlen(t), 0, "enum_fwd_kind"); ASSERT_EQ(t->size, 4, "enum_fwd_sz"); ASSERT_STREQ(btf_type_raw_dump(btf, 12), - "[12] ENUM 'enum_fwd' size=4 vlen=0", "raw_dump"); + "[12] ENUM 'enum_fwd' encoding=UNSIGNED size=4 vlen=0", "raw_dump"); /* TYPEDEF */ id = btf__add_typedef(btf, "typedef1", 1); @@ -307,6 +308,48 @@ static void gen_btf(struct btf *btf) ASSERT_EQ(t->type, 1, "tag_type"); ASSERT_STREQ(btf_type_raw_dump(btf, 20), "[20] TYPE_TAG 'tag1' type_id=1", "raw_dump"); + + /* ENUM64 */ + id = btf__add_enum64(btf, "e1", 8, true); + ASSERT_EQ(id, 21, "enum64_id"); + err = btf__add_enum64_value(btf, "v1", -1); + ASSERT_OK(err, "v1_res"); + err = btf__add_enum64_value(btf, "v2", 0x123456789); /* 4886718345 */ + ASSERT_OK(err, "v2_res"); + t = btf__type_by_id(btf, 21); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "e1", "enum64_name"); + ASSERT_EQ(btf_kind(t), BTF_KIND_ENUM64, "enum64_kind"); + ASSERT_EQ(btf_vlen(t), 2, "enum64_vlen"); + ASSERT_EQ(t->size, 8, "enum64_sz"); + v64 = btf_enum64(t) + 0; + ASSERT_STREQ(btf__str_by_offset(btf, v64->name_off), "v1", "v1_name"); + ASSERT_EQ(v64->val_hi32, 0xffffffff, "v1_val"); + ASSERT_EQ(v64->val_lo32, 0xffffffff, "v1_val"); + v64 = btf_enum64(t) + 1; + ASSERT_STREQ(btf__str_by_offset(btf, v64->name_off), "v2", "v2_name"); + ASSERT_EQ(v64->val_hi32, 0x1, "v2_val"); + ASSERT_EQ(v64->val_lo32, 0x23456789, "v2_val"); + ASSERT_STREQ(btf_type_raw_dump(btf, 21), + "[21] ENUM64 'e1' encoding=SIGNED size=8 vlen=2\n" + "\t'v1' val=-1\n" + "\t'v2' val=4886718345", "raw_dump"); + + id = btf__add_enum64(btf, "e1", 8, false); + ASSERT_EQ(id, 22, "enum64_id"); + err = btf__add_enum64_value(btf, "v1", 0xffffffffFFFFFFFF); /* 18446744073709551615 */ + ASSERT_OK(err, "v1_res"); + t = btf__type_by_id(btf, 22); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "e1", "enum64_name"); + ASSERT_EQ(btf_kind(t), BTF_KIND_ENUM64, "enum64_kind"); + ASSERT_EQ(btf_vlen(t), 1, "enum64_vlen"); + ASSERT_EQ(t->size, 8, "enum64_sz"); + v64 = btf_enum64(t) + 0; + ASSERT_STREQ(btf__str_by_offset(btf, v64->name_off), "v1", "v1_name"); + ASSERT_EQ(v64->val_hi32, 0xffffffff, "v1_val"); + ASSERT_EQ(v64->val_lo32, 0xffffffff, "v1_val"); + ASSERT_STREQ(btf_type_raw_dump(btf, 22), + "[22] ENUM64 'e1' encoding=UNSIGNED size=8 vlen=1\n" + "\t'v1' val=18446744073709551615", "raw_dump"); } static void test_btf_add() @@ -332,12 +375,12 @@ static void test_btf_add() "\t'f2' type_id=1 bits_offset=32 bitfield_size=16", "[8] UNION 'u1' size=8 vlen=1\n" "\t'f1' type_id=1 bits_offset=0 bitfield_size=16", - "[9] ENUM 'e1' size=4 vlen=2\n" + "[9] ENUM 'e1' encoding=UNSIGNED size=4 vlen=2\n" "\t'v1' val=1\n" "\t'v2' val=2", "[10] FWD 'struct_fwd' fwd_kind=struct", "[11] FWD 'union_fwd' fwd_kind=union", - "[12] ENUM 'enum_fwd' size=4 vlen=0", + "[12] ENUM 'enum_fwd' encoding=UNSIGNED size=4 vlen=0", "[13] TYPEDEF 'typedef1' type_id=1", "[14] FUNC 'func1' type_id=15 linkage=global", "[15] FUNC_PROTO '(anon)' ret_type_id=1 vlen=2\n" @@ -348,7 +391,12 @@ static void test_btf_add() "\ttype_id=1 offset=4 size=8", "[18] DECL_TAG 'tag1' type_id=16 component_idx=-1", "[19] DECL_TAG 'tag2' type_id=14 component_idx=1", - "[20] TYPE_TAG 'tag1' type_id=1"); + "[20] TYPE_TAG 'tag1' type_id=1", + "[21] ENUM64 'e1' encoding=SIGNED size=8 vlen=2\n" + "\t'v1' val=-1\n" + "\t'v2' val=4886718345", + "[22] ENUM64 'e1' encoding=UNSIGNED size=8 vlen=1\n" + "\t'v1' val=18446744073709551615"); btf__free(btf); } @@ -370,7 +418,7 @@ static void test_btf_add_btf() gen_btf(btf2); id = btf__add_btf(btf1, btf2); - if (!ASSERT_EQ(id, 21, "id")) + if (!ASSERT_EQ(id, 23, "id")) goto cleanup; VALIDATE_RAW_BTF( @@ -386,12 +434,12 @@ static void test_btf_add_btf() "\t'f2' type_id=1 bits_offset=32 bitfield_size=16", "[8] UNION 'u1' size=8 vlen=1\n" "\t'f1' type_id=1 bits_offset=0 bitfield_size=16", - "[9] ENUM 'e1' size=4 vlen=2\n" + "[9] ENUM 'e1' encoding=UNSIGNED size=4 vlen=2\n" "\t'v1' val=1\n" "\t'v2' val=2", "[10] FWD 'struct_fwd' fwd_kind=struct", "[11] FWD 'union_fwd' fwd_kind=union", - "[12] ENUM 'enum_fwd' size=4 vlen=0", + "[12] ENUM 'enum_fwd' encoding=UNSIGNED size=4 vlen=0", "[13] TYPEDEF 'typedef1' type_id=1", "[14] FUNC 'func1' type_id=15 linkage=global", "[15] FUNC_PROTO '(anon)' ret_type_id=1 vlen=2\n" @@ -403,36 +451,46 @@ static void test_btf_add_btf() "[18] DECL_TAG 'tag1' type_id=16 component_idx=-1", "[19] DECL_TAG 'tag2' type_id=14 component_idx=1", "[20] TYPE_TAG 'tag1' type_id=1", + "[21] ENUM64 'e1' encoding=SIGNED size=8 vlen=2\n" + "\t'v1' val=-1\n" + "\t'v2' val=4886718345", + "[22] ENUM64 'e1' encoding=UNSIGNED size=8 vlen=1\n" + "\t'v1' val=18446744073709551615", /* types appended from the second BTF */ - "[21] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", - "[22] PTR '(anon)' type_id=21", - "[23] CONST '(anon)' type_id=25", - "[24] VOLATILE '(anon)' type_id=23", - "[25] RESTRICT '(anon)' type_id=24", - "[26] ARRAY '(anon)' type_id=22 index_type_id=21 nr_elems=10", - "[27] STRUCT 's1' size=8 vlen=2\n" - "\t'f1' type_id=21 bits_offset=0\n" - "\t'f2' type_id=21 bits_offset=32 bitfield_size=16", - "[28] UNION 'u1' size=8 vlen=1\n" - "\t'f1' type_id=21 bits_offset=0 bitfield_size=16", - "[29] ENUM 'e1' size=4 vlen=2\n" + "[23] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", + "[24] PTR '(anon)' type_id=23", + "[25] CONST '(anon)' type_id=27", + "[26] VOLATILE '(anon)' type_id=25", + "[27] RESTRICT '(anon)' type_id=26", + "[28] ARRAY '(anon)' type_id=24 index_type_id=23 nr_elems=10", + "[29] STRUCT 's1' size=8 vlen=2\n" + "\t'f1' type_id=23 bits_offset=0\n" + "\t'f2' type_id=23 bits_offset=32 bitfield_size=16", + "[30] UNION 'u1' size=8 vlen=1\n" + "\t'f1' type_id=23 bits_offset=0 bitfield_size=16", + "[31] ENUM 'e1' encoding=UNSIGNED size=4 vlen=2\n" "\t'v1' val=1\n" "\t'v2' val=2", - "[30] FWD 'struct_fwd' fwd_kind=struct", - "[31] FWD 'union_fwd' fwd_kind=union", - "[32] ENUM 'enum_fwd' size=4 vlen=0", - "[33] TYPEDEF 'typedef1' type_id=21", - "[34] FUNC 'func1' type_id=35 linkage=global", - "[35] FUNC_PROTO '(anon)' ret_type_id=21 vlen=2\n" - "\t'p1' type_id=21\n" - "\t'p2' type_id=22", - "[36] VAR 'var1' type_id=21, linkage=global-alloc", - "[37] DATASEC 'datasec1' size=12 vlen=1\n" - "\ttype_id=21 offset=4 size=8", - "[38] DECL_TAG 'tag1' type_id=36 component_idx=-1", - "[39] DECL_TAG 'tag2' type_id=34 component_idx=1", - "[40] TYPE_TAG 'tag1' type_id=21"); + "[32] FWD 'struct_fwd' fwd_kind=struct", + "[33] FWD 'union_fwd' fwd_kind=union", + "[34] ENUM 'enum_fwd' encoding=UNSIGNED size=4 vlen=0", + "[35] TYPEDEF 'typedef1' type_id=23", + "[36] FUNC 'func1' type_id=37 linkage=global", + "[37] FUNC_PROTO '(anon)' ret_type_id=23 vlen=2\n" + "\t'p1' type_id=23\n" + "\t'p2' type_id=24", + "[38] VAR 'var1' type_id=23, linkage=global-alloc", + "[39] DATASEC 'datasec1' size=12 vlen=1\n" + "\ttype_id=23 offset=4 size=8", + "[40] DECL_TAG 'tag1' type_id=38 component_idx=-1", + "[41] DECL_TAG 'tag2' type_id=36 component_idx=1", + "[42] TYPE_TAG 'tag1' type_id=23", + "[43] ENUM64 'e1' encoding=SIGNED size=8 vlen=2\n" + "\t'v1' val=-1\n" + "\t'v2' val=4886718345", + "[44] ENUM64 'e1' encoding=UNSIGNED size=8 vlen=1\n" + "\t'v1' val=18446744073709551615"); cleanup: btf__free(btf1); diff --git a/tools/testing/selftests/bpf/prog_tests/core_extern.c b/tools/testing/selftests/bpf/prog_tests/core_extern.c index 1931a158510e..63a51e9f3630 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_extern.c +++ b/tools/testing/selftests/bpf/prog_tests/core_extern.c @@ -39,6 +39,7 @@ static struct test_case { "CONFIG_STR=\"abracad\"\n" "CONFIG_MISSING=0", .data = { + .unkn_virt_val = 0, .bpf_syscall = false, .tristate_val = TRI_MODULE, .bool_val = true, @@ -121,7 +122,7 @@ static struct test_case { void test_core_extern(void) { const uint32_t kern_ver = get_kernel_version(); - int err, duration = 0, i, j; + int err, i, j; struct test_core_extern *skel = NULL; uint64_t *got, *exp; int n = sizeof(*skel->data) / sizeof(uint64_t); @@ -136,19 +137,17 @@ void test_core_extern(void) continue; skel = test_core_extern__open_opts(&opts); - if (CHECK(!skel, "skel_open", "skeleton open failed\n")) + if (!ASSERT_OK_PTR(skel, "skel_open")) goto cleanup; err = test_core_extern__load(skel); if (t->fails) { - CHECK(!err, "skel_load", - "shouldn't succeed open/load of skeleton\n"); + ASSERT_ERR(err, "skel_load_should_fail"); goto cleanup; - } else if (CHECK(err, "skel_load", - "failed to open/load skeleton\n")) { + } else if (!ASSERT_OK(err, "skel_load")) { goto cleanup; } err = test_core_extern__attach(skel); - if (CHECK(err, "attach_raw_tp", "failed attach: %d\n", err)) + if (!ASSERT_OK(err, "attach_raw_tp")) goto cleanup; usleep(1); @@ -158,9 +157,7 @@ void test_core_extern(void) got = (uint64_t *)skel->data; exp = (uint64_t *)&t->data; for (j = 0; j < n; j++) { - CHECK(got[j] != exp[j], "check_res", - "result #%d: expected %llx, but got %llx\n", - j, (__u64)exp[j], (__u64)got[j]); + ASSERT_EQ(got[j], exp[j], "result"); } cleanup: test_core_extern__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index 3712dfe1be59..c8655ba9a88f 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -84,6 +84,7 @@ static int duration = 0; #define NESTING_ERR_CASE(name) { \ NESTING_CASE_COMMON(name), \ .fails = true, \ + .run_btfgen_fails = true, \ } #define ARRAYS_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) { \ @@ -258,12 +259,14 @@ static int duration = 0; BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_probed.o", \ "probed:", name), \ .fails = true, \ + .run_btfgen_fails = true, \ .raw_tp_name = "sys_enter", \ .prog_name = "test_core_bitfields", \ }, { \ BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_direct.o", \ "direct:", name), \ .fails = true, \ + .run_btfgen_fails = true, \ .prog_name = "test_core_bitfields_direct", \ } @@ -304,6 +307,7 @@ static int duration = 0; #define SIZE_ERR_CASE(name) { \ SIZE_CASE_COMMON(name), \ .fails = true, \ + .run_btfgen_fails = true, \ } #define TYPE_BASED_CASE_COMMON(name) \ @@ -363,6 +367,25 @@ static int duration = 0; .fails = true, \ } +#define ENUM64VAL_CASE_COMMON(name) \ + .case_name = #name, \ + .bpf_obj_file = "test_core_reloc_enum64val.o", \ + .btf_src_file = "btf__core_reloc_" #name ".o", \ + .raw_tp_name = "sys_enter", \ + .prog_name = "test_core_enum64val" + +#define ENUM64VAL_CASE(name, ...) { \ + ENUM64VAL_CASE_COMMON(name), \ + .output = STRUCT_TO_CHAR_PTR(core_reloc_enum64val_output) \ + __VA_ARGS__, \ + .output_len = sizeof(struct core_reloc_enum64val_output), \ +} + +#define ENUM64VAL_ERR_CASE(name) { \ + ENUM64VAL_CASE_COMMON(name), \ + .fails = true, \ +} + struct core_reloc_test_case; typedef int (*setup_test_fn)(struct core_reloc_test_case *test); @@ -377,6 +400,7 @@ struct core_reloc_test_case { const char *output; int output_len; bool fails; + bool run_btfgen_fails; bool needs_testmod; bool relaxed_core_relocs; const char *prog_name; @@ -519,7 +543,6 @@ static int __trigger_module_test_read(const struct core_reloc_test_case *test) return 0; } - static const struct core_reloc_test_case test_cases[] = { /* validate we can find kernel image and use its BTF for relocs */ { @@ -532,6 +555,7 @@ static const struct core_reloc_test_case test_cases[] = { .valid = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, .comm = "test_progs", .comm_len = sizeof("test_progs"), + .local_task_struct_matches = true, }, .output_len = sizeof(struct core_reloc_kernel_output), .raw_tp_name = "sys_enter", @@ -728,9 +752,10 @@ static const struct core_reloc_test_case test_cases[] = { SIZE_CASE(size___diff_offs), SIZE_ERR_CASE(size___err_ambiguous), - /* validate type existence and size relocations */ + /* validate type existence, match, and size relocations */ TYPE_BASED_CASE(type_based, { .struct_exists = 1, + .complex_struct_exists = 1, .union_exists = 1, .enum_exists = 1, .typedef_named_struct_exists = 1, @@ -739,8 +764,24 @@ static const struct core_reloc_test_case test_cases[] = { .typedef_int_exists = 1, .typedef_enum_exists = 1, .typedef_void_ptr_exists = 1, + .typedef_restrict_ptr_exists = 1, .typedef_func_proto_exists = 1, .typedef_arr_exists = 1, + + .struct_matches = 1, + .complex_struct_matches = 1, + .union_matches = 1, + .enum_matches = 1, + .typedef_named_struct_matches = 1, + .typedef_anon_struct_matches = 1, + .typedef_struct_ptr_matches = 1, + .typedef_int_matches = 1, + .typedef_enum_matches = 1, + .typedef_void_ptr_matches = 1, + .typedef_restrict_ptr_matches = 1, + .typedef_func_proto_matches = 1, + .typedef_arr_matches = 1, + .struct_sz = sizeof(struct a_struct), .union_sz = sizeof(union a_union), .enum_sz = sizeof(enum an_enum), @@ -756,6 +797,45 @@ static const struct core_reloc_test_case test_cases[] = { TYPE_BASED_CASE(type_based___all_missing, { /* all zeros */ }), + TYPE_BASED_CASE(type_based___diff, { + .struct_exists = 1, + .complex_struct_exists = 1, + .union_exists = 1, + .enum_exists = 1, + .typedef_named_struct_exists = 1, + .typedef_anon_struct_exists = 1, + .typedef_struct_ptr_exists = 1, + .typedef_int_exists = 1, + .typedef_enum_exists = 1, + .typedef_void_ptr_exists = 1, + .typedef_func_proto_exists = 1, + .typedef_arr_exists = 1, + + .struct_matches = 1, + .complex_struct_matches = 1, + .union_matches = 1, + .enum_matches = 1, + .typedef_named_struct_matches = 1, + .typedef_anon_struct_matches = 1, + .typedef_struct_ptr_matches = 1, + .typedef_int_matches = 0, + .typedef_enum_matches = 1, + .typedef_void_ptr_matches = 1, + .typedef_func_proto_matches = 0, + .typedef_arr_matches = 0, + + .struct_sz = sizeof(struct a_struct___diff), + .union_sz = sizeof(union a_union___diff), + .enum_sz = sizeof(enum an_enum___diff), + .typedef_named_struct_sz = sizeof(named_struct_typedef___diff), + .typedef_anon_struct_sz = sizeof(anon_struct_typedef___diff), + .typedef_struct_ptr_sz = sizeof(struct_ptr_typedef___diff), + .typedef_int_sz = sizeof(int_typedef___diff), + .typedef_enum_sz = sizeof(enum_typedef___diff), + .typedef_void_ptr_sz = sizeof(void_ptr_typedef___diff), + .typedef_func_proto_sz = sizeof(func_proto_typedef___diff), + .typedef_arr_sz = sizeof(arr_typedef___diff), + }), TYPE_BASED_CASE(type_based___diff_sz, { .struct_exists = 1, .union_exists = 1, @@ -768,6 +848,19 @@ static const struct core_reloc_test_case test_cases[] = { .typedef_void_ptr_exists = 1, .typedef_func_proto_exists = 1, .typedef_arr_exists = 1, + + .struct_matches = 0, + .union_matches = 0, + .enum_matches = 0, + .typedef_named_struct_matches = 0, + .typedef_anon_struct_matches = 0, + .typedef_struct_ptr_matches = 1, + .typedef_int_matches = 0, + .typedef_enum_matches = 0, + .typedef_void_ptr_matches = 1, + .typedef_func_proto_matches = 0, + .typedef_arr_matches = 0, + .struct_sz = sizeof(struct a_struct___diff_sz), .union_sz = sizeof(union a_union___diff_sz), .enum_sz = sizeof(enum an_enum___diff_sz), @@ -782,10 +875,12 @@ static const struct core_reloc_test_case test_cases[] = { }), TYPE_BASED_CASE(type_based___incompat, { .enum_exists = 1, + .enum_matches = 1, .enum_sz = sizeof(enum an_enum), }), TYPE_BASED_CASE(type_based___fn_wrong_args, { .struct_exists = 1, + .struct_matches = 1, .struct_sz = sizeof(struct a_struct), }), @@ -831,6 +926,45 @@ static const struct core_reloc_test_case test_cases[] = { .anon_val2 = 0x222, }), ENUMVAL_ERR_CASE(enumval___err_missing), + + /* 64bit enumerator value existence and value relocations */ + ENUM64VAL_CASE(enum64val, { + .unsigned_val1_exists = true, + .unsigned_val2_exists = true, + .unsigned_val3_exists = true, + .signed_val1_exists = true, + .signed_val2_exists = true, + .signed_val3_exists = true, + .unsigned_val1 = 0x1ffffffffULL, + .unsigned_val2 = 0x2, + .signed_val1 = 0x1ffffffffLL, + .signed_val2 = -2, + }), + ENUM64VAL_CASE(enum64val___diff, { + .unsigned_val1_exists = true, + .unsigned_val2_exists = true, + .unsigned_val3_exists = true, + .signed_val1_exists = true, + .signed_val2_exists = true, + .signed_val3_exists = true, + .unsigned_val1 = 0x101ffffffffULL, + .unsigned_val2 = 0x202ffffffffULL, + .signed_val1 = -101, + .signed_val2 = -202, + }), + ENUM64VAL_CASE(enum64val___val3_missing, { + .unsigned_val1_exists = true, + .unsigned_val2_exists = true, + .unsigned_val3_exists = false, + .signed_val1_exists = true, + .signed_val2_exists = true, + .signed_val3_exists = false, + .unsigned_val1 = 0x111ffffffffULL, + .unsigned_val2 = 0x222, + .signed_val1 = 0x111ffffffffLL, + .signed_val2 = -222, + }), + ENUM64VAL_ERR_CASE(enum64val___err_missing), }; struct data { @@ -894,7 +1028,7 @@ static void run_core_reloc_tests(bool use_btfgen) /* generate a "minimal" BTF file and use it as source */ if (use_btfgen) { - if (!test_case->btf_src_file || test_case->fails) { + if (!test_case->btf_src_file || test_case->run_btfgen_fails) { test__skip(); continue; } diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c index a7e74297f15f..5a7e6011f6bf 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c @@ -7,11 +7,9 @@ void serial_test_fexit_stress(void) { - char test_skb[128] = {}; int fexit_fd[CNT] = {}; int link_fd[CNT] = {}; - char error[4096]; - int err, i, filter_fd; + int err, i; const struct bpf_insn trace_program[] = { BPF_MOV64_IMM(BPF_REG_0, 0), @@ -20,25 +18,9 @@ void serial_test_fexit_stress(void) LIBBPF_OPTS(bpf_prog_load_opts, trace_opts, .expected_attach_type = BPF_TRACE_FEXIT, - .log_buf = error, - .log_size = sizeof(error), ); - const struct bpf_insn skb_program[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - - LIBBPF_OPTS(bpf_prog_load_opts, skb_opts, - .log_buf = error, - .log_size = sizeof(error), - ); - - LIBBPF_OPTS(bpf_test_run_opts, topts, - .data_in = test_skb, - .data_size_in = sizeof(test_skb), - .repeat = 1, - ); + LIBBPF_OPTS(bpf_test_run_opts, topts); err = libbpf_find_vmlinux_btf_id("bpf_fentry_test1", trace_opts.expected_attach_type); @@ -58,15 +40,9 @@ void serial_test_fexit_stress(void) goto out; } - filter_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", - skb_program, sizeof(skb_program) / sizeof(struct bpf_insn), - &skb_opts); - if (!ASSERT_GE(filter_fd, 0, "test_program_loaded")) - goto out; + err = bpf_prog_test_run_opts(fexit_fd[0], &topts); + ASSERT_OK(err, "bpf_prog_test_run_opts"); - err = bpf_prog_test_run_opts(filter_fd, &topts); - close(filter_fd); - CHECK_FAIL(err); out: for (i = 0; i < CNT; i++) { if (link_fd[i]) diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c index 586dc52d6fb9..d457a55ff408 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c @@ -329,7 +329,7 @@ static int get_syms(char ***symsp, size_t *cntp) struct hashmap *map; char buf[256]; FILE *f; - int err; + int err = 0; /* * The available_filter_functions contains many duplicates, @@ -364,6 +364,11 @@ static int get_syms(char ***symsp, size_t *cntp) continue; if (!strncmp(name, "rcu_", 4)) continue; + if (!strcmp(name, "bpf_dispatcher_xdp_func")) + continue; + if (!strncmp(name, "__ftrace_invalid_address__", + sizeof("__ftrace_invalid_address__") - 1)) + continue; err = hashmap__add(map, name, NULL); if (err) { free(name); @@ -404,7 +409,7 @@ static void test_bench_attach(void) double attach_delta, detach_delta; struct bpf_link *link = NULL; char **syms = NULL; - size_t cnt, i; + size_t cnt = 0, i; if (!ASSERT_OK(get_syms(&syms, &cnt), "get_syms")) return; diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c new file mode 100644 index 000000000000..93e9cddaadcf --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c @@ -0,0 +1,207 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include <ctype.h> +#include <test_progs.h> +#include <bpf/btf.h> + +/* + * Utility function uppercasing an entire string. + */ +static void uppercase(char *s) +{ + for (; *s != '\0'; s++) + *s = toupper(*s); +} + +/* + * Test case to check that all bpf_attach_type variants are covered by + * libbpf_bpf_attach_type_str. + */ +static void test_libbpf_bpf_attach_type_str(void) +{ + struct btf *btf; + const struct btf_type *t; + const struct btf_enum *e; + int i, n, id; + + btf = btf__parse("/sys/kernel/btf/vmlinux", NULL); + if (!ASSERT_OK_PTR(btf, "btf_parse")) + return; + + /* find enum bpf_attach_type and enumerate each value */ + id = btf__find_by_name_kind(btf, "bpf_attach_type", BTF_KIND_ENUM); + if (!ASSERT_GT(id, 0, "bpf_attach_type_id")) + goto cleanup; + t = btf__type_by_id(btf, id); + e = btf_enum(t); + n = btf_vlen(t); + for (i = 0; i < n; e++, i++) { + enum bpf_attach_type attach_type = (enum bpf_attach_type)e->val; + const char *attach_type_name; + const char *attach_type_str; + char buf[256]; + + if (attach_type == __MAX_BPF_ATTACH_TYPE) + continue; + + attach_type_name = btf__str_by_offset(btf, e->name_off); + attach_type_str = libbpf_bpf_attach_type_str(attach_type); + ASSERT_OK_PTR(attach_type_str, attach_type_name); + + snprintf(buf, sizeof(buf), "BPF_%s", attach_type_str); + uppercase(buf); + + ASSERT_STREQ(buf, attach_type_name, "exp_str_value"); + } + +cleanup: + btf__free(btf); +} + +/* + * Test case to check that all bpf_link_type variants are covered by + * libbpf_bpf_link_type_str. + */ +static void test_libbpf_bpf_link_type_str(void) +{ + struct btf *btf; + const struct btf_type *t; + const struct btf_enum *e; + int i, n, id; + + btf = btf__parse("/sys/kernel/btf/vmlinux", NULL); + if (!ASSERT_OK_PTR(btf, "btf_parse")) + return; + + /* find enum bpf_link_type and enumerate each value */ + id = btf__find_by_name_kind(btf, "bpf_link_type", BTF_KIND_ENUM); + if (!ASSERT_GT(id, 0, "bpf_link_type_id")) + goto cleanup; + t = btf__type_by_id(btf, id); + e = btf_enum(t); + n = btf_vlen(t); + for (i = 0; i < n; e++, i++) { + enum bpf_link_type link_type = (enum bpf_link_type)e->val; + const char *link_type_name; + const char *link_type_str; + char buf[256]; + + if (link_type == MAX_BPF_LINK_TYPE) + continue; + + link_type_name = btf__str_by_offset(btf, e->name_off); + link_type_str = libbpf_bpf_link_type_str(link_type); + ASSERT_OK_PTR(link_type_str, link_type_name); + + snprintf(buf, sizeof(buf), "BPF_LINK_TYPE_%s", link_type_str); + uppercase(buf); + + ASSERT_STREQ(buf, link_type_name, "exp_str_value"); + } + +cleanup: + btf__free(btf); +} + +/* + * Test case to check that all bpf_map_type variants are covered by + * libbpf_bpf_map_type_str. + */ +static void test_libbpf_bpf_map_type_str(void) +{ + struct btf *btf; + const struct btf_type *t; + const struct btf_enum *e; + int i, n, id; + + btf = btf__parse("/sys/kernel/btf/vmlinux", NULL); + if (!ASSERT_OK_PTR(btf, "btf_parse")) + return; + + /* find enum bpf_map_type and enumerate each value */ + id = btf__find_by_name_kind(btf, "bpf_map_type", BTF_KIND_ENUM); + if (!ASSERT_GT(id, 0, "bpf_map_type_id")) + goto cleanup; + t = btf__type_by_id(btf, id); + e = btf_enum(t); + n = btf_vlen(t); + for (i = 0; i < n; e++, i++) { + enum bpf_map_type map_type = (enum bpf_map_type)e->val; + const char *map_type_name; + const char *map_type_str; + char buf[256]; + + map_type_name = btf__str_by_offset(btf, e->name_off); + map_type_str = libbpf_bpf_map_type_str(map_type); + ASSERT_OK_PTR(map_type_str, map_type_name); + + snprintf(buf, sizeof(buf), "BPF_MAP_TYPE_%s", map_type_str); + uppercase(buf); + + ASSERT_STREQ(buf, map_type_name, "exp_str_value"); + } + +cleanup: + btf__free(btf); +} + +/* + * Test case to check that all bpf_prog_type variants are covered by + * libbpf_bpf_prog_type_str. + */ +static void test_libbpf_bpf_prog_type_str(void) +{ + struct btf *btf; + const struct btf_type *t; + const struct btf_enum *e; + int i, n, id; + + btf = btf__parse("/sys/kernel/btf/vmlinux", NULL); + if (!ASSERT_OK_PTR(btf, "btf_parse")) + return; + + /* find enum bpf_prog_type and enumerate each value */ + id = btf__find_by_name_kind(btf, "bpf_prog_type", BTF_KIND_ENUM); + if (!ASSERT_GT(id, 0, "bpf_prog_type_id")) + goto cleanup; + t = btf__type_by_id(btf, id); + e = btf_enum(t); + n = btf_vlen(t); + for (i = 0; i < n; e++, i++) { + enum bpf_prog_type prog_type = (enum bpf_prog_type)e->val; + const char *prog_type_name; + const char *prog_type_str; + char buf[256]; + + prog_type_name = btf__str_by_offset(btf, e->name_off); + prog_type_str = libbpf_bpf_prog_type_str(prog_type); + ASSERT_OK_PTR(prog_type_str, prog_type_name); + + snprintf(buf, sizeof(buf), "BPF_PROG_TYPE_%s", prog_type_str); + uppercase(buf); + + ASSERT_STREQ(buf, prog_type_name, "exp_str_value"); + } + +cleanup: + btf__free(btf); +} + +/* + * Run all libbpf str conversion tests. + */ +void test_libbpf_str(void) +{ + if (test__start_subtest("bpf_attach_type_str")) + test_libbpf_bpf_attach_type_str(); + + if (test__start_subtest("bpf_link_type_str")) + test_libbpf_bpf_link_type_str(); + + if (test__start_subtest("bpf_map_type_str")) + test_libbpf_bpf_map_type_str(); + + if (test__start_subtest("bpf_prog_type_str")) + test_libbpf_bpf_prog_type_str(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c b/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c new file mode 100644 index 000000000000..1102e4f42d2d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c @@ -0,0 +1,313 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <sys/types.h> +#include <sys/socket.h> +#include <test_progs.h> +#include <bpf/btf.h> + +#include "lsm_cgroup.skel.h" +#include "lsm_cgroup_nonvoid.skel.h" +#include "cgroup_helpers.h" +#include "network_helpers.h" + +#ifndef ENOTSUPP +#define ENOTSUPP 524 +#endif + +static struct btf *btf; + +static __u32 query_prog_cnt(int cgroup_fd, const char *attach_func) +{ + LIBBPF_OPTS(bpf_prog_query_opts, p); + int cnt = 0; + int i; + + ASSERT_OK(bpf_prog_query_opts(cgroup_fd, BPF_LSM_CGROUP, &p), "prog_query"); + + if (!attach_func) + return p.prog_cnt; + + /* When attach_func is provided, count the number of progs that + * attach to the given symbol. + */ + + if (!btf) + btf = btf__load_vmlinux_btf(); + if (!ASSERT_OK(libbpf_get_error(btf), "btf_vmlinux")) + return -1; + + p.prog_ids = malloc(sizeof(u32) * p.prog_cnt); + p.prog_attach_flags = malloc(sizeof(u32) * p.prog_cnt); + ASSERT_OK(bpf_prog_query_opts(cgroup_fd, BPF_LSM_CGROUP, &p), "prog_query"); + + for (i = 0; i < p.prog_cnt; i++) { + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + int fd; + + fd = bpf_prog_get_fd_by_id(p.prog_ids[i]); + ASSERT_GE(fd, 0, "prog_get_fd_by_id"); + ASSERT_OK(bpf_obj_get_info_by_fd(fd, &info, &info_len), "prog_info_by_fd"); + close(fd); + + if (info.attach_btf_id == + btf__find_by_name_kind(btf, attach_func, BTF_KIND_FUNC)) + cnt++; + } + + free(p.prog_ids); + free(p.prog_attach_flags); + + return cnt; +} + +static void test_lsm_cgroup_functional(void) +{ + DECLARE_LIBBPF_OPTS(bpf_prog_attach_opts, attach_opts); + DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts); + int cgroup_fd = -1, cgroup_fd2 = -1, cgroup_fd3 = -1; + int listen_fd, client_fd, accepted_fd; + struct lsm_cgroup *skel = NULL; + int post_create_prog_fd2 = -1; + int post_create_prog_fd = -1; + int bind_link_fd2 = -1; + int bind_prog_fd2 = -1; + int alloc_prog_fd = -1; + int bind_prog_fd = -1; + int bind_link_fd = -1; + int clone_prog_fd = -1; + int err, fd, prio; + socklen_t socklen; + + cgroup_fd3 = test__join_cgroup("/sock_policy_empty"); + if (!ASSERT_GE(cgroup_fd3, 0, "create empty cgroup")) + goto close_cgroup; + + cgroup_fd2 = test__join_cgroup("/sock_policy_reuse"); + if (!ASSERT_GE(cgroup_fd2, 0, "create cgroup for reuse")) + goto close_cgroup; + + cgroup_fd = test__join_cgroup("/sock_policy"); + if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup")) + goto close_cgroup; + + skel = lsm_cgroup__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + goto close_cgroup; + + post_create_prog_fd = bpf_program__fd(skel->progs.socket_post_create); + post_create_prog_fd2 = bpf_program__fd(skel->progs.socket_post_create2); + bind_prog_fd = bpf_program__fd(skel->progs.socket_bind); + bind_prog_fd2 = bpf_program__fd(skel->progs.socket_bind2); + alloc_prog_fd = bpf_program__fd(skel->progs.socket_alloc); + clone_prog_fd = bpf_program__fd(skel->progs.socket_clone); + + ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_sk_alloc_security"), 0, "prog count"); + ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 0, "total prog count"); + err = bpf_prog_attach(alloc_prog_fd, cgroup_fd, BPF_LSM_CGROUP, 0); + if (err == -ENOTSUPP) { + test__skip(); + goto close_cgroup; + } + if (!ASSERT_OK(err, "attach alloc_prog_fd")) + goto detach_cgroup; + ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_sk_alloc_security"), 1, "prog count"); + ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 1, "total prog count"); + + ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_inet_csk_clone"), 0, "prog count"); + err = bpf_prog_attach(clone_prog_fd, cgroup_fd, BPF_LSM_CGROUP, 0); + if (!ASSERT_OK(err, "attach clone_prog_fd")) + goto detach_cgroup; + ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_inet_csk_clone"), 1, "prog count"); + ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 2, "total prog count"); + + /* Make sure replacing works. */ + + ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_post_create"), 0, "prog count"); + err = bpf_prog_attach(post_create_prog_fd, cgroup_fd, + BPF_LSM_CGROUP, 0); + if (!ASSERT_OK(err, "attach post_create_prog_fd")) + goto detach_cgroup; + ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_post_create"), 1, "prog count"); + ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 3, "total prog count"); + + attach_opts.replace_prog_fd = post_create_prog_fd; + err = bpf_prog_attach_opts(post_create_prog_fd2, cgroup_fd, + BPF_LSM_CGROUP, &attach_opts); + if (!ASSERT_OK(err, "prog replace post_create_prog_fd")) + goto detach_cgroup; + ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_post_create"), 1, "prog count"); + ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 3, "total prog count"); + + /* Try the same attach/replace via link API. */ + + ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_bind"), 0, "prog count"); + bind_link_fd = bpf_link_create(bind_prog_fd, cgroup_fd, + BPF_LSM_CGROUP, NULL); + if (!ASSERT_GE(bind_link_fd, 0, "link create bind_prog_fd")) + goto detach_cgroup; + ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_bind"), 1, "prog count"); + ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 4, "total prog count"); + + update_opts.old_prog_fd = bind_prog_fd; + update_opts.flags = BPF_F_REPLACE; + + err = bpf_link_update(bind_link_fd, bind_prog_fd2, &update_opts); + if (!ASSERT_OK(err, "link update bind_prog_fd")) + goto detach_cgroup; + ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_bind"), 1, "prog count"); + ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 4, "total prog count"); + + /* Attach another instance of bind program to another cgroup. + * This should trigger the reuse of the trampoline shim (two + * programs attaching to the same btf_id). + */ + + ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_bind"), 1, "prog count"); + ASSERT_EQ(query_prog_cnt(cgroup_fd2, "bpf_lsm_socket_bind"), 0, "prog count"); + bind_link_fd2 = bpf_link_create(bind_prog_fd2, cgroup_fd2, + BPF_LSM_CGROUP, NULL); + if (!ASSERT_GE(bind_link_fd2, 0, "link create bind_prog_fd2")) + goto detach_cgroup; + ASSERT_EQ(query_prog_cnt(cgroup_fd2, "bpf_lsm_socket_bind"), 1, "prog count"); + ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 4, "total prog count"); + ASSERT_EQ(query_prog_cnt(cgroup_fd2, NULL), 1, "total prog count"); + + /* AF_UNIX is prohibited. */ + + fd = socket(AF_UNIX, SOCK_STREAM, 0); + ASSERT_LT(fd, 0, "socket(AF_UNIX)"); + close(fd); + + /* AF_INET6 gets default policy (sk_priority). */ + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (!ASSERT_GE(fd, 0, "socket(SOCK_STREAM)")) + goto detach_cgroup; + + prio = 0; + socklen = sizeof(prio); + ASSERT_GE(getsockopt(fd, SOL_SOCKET, SO_PRIORITY, &prio, &socklen), 0, + "getsockopt"); + ASSERT_EQ(prio, 123, "sk_priority"); + + close(fd); + + /* TX-only AF_PACKET is allowed. */ + + ASSERT_LT(socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)), 0, + "socket(AF_PACKET, ..., ETH_P_ALL)"); + + fd = socket(AF_PACKET, SOCK_RAW, 0); + ASSERT_GE(fd, 0, "socket(AF_PACKET, ..., 0)"); + + /* TX-only AF_PACKET can not be rebound. */ + + struct sockaddr_ll sa = { + .sll_family = AF_PACKET, + .sll_protocol = htons(ETH_P_ALL), + }; + ASSERT_LT(bind(fd, (struct sockaddr *)&sa, sizeof(sa)), 0, + "bind(ETH_P_ALL)"); + + close(fd); + + /* Trigger passive open. */ + + listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); + ASSERT_GE(listen_fd, 0, "start_server"); + client_fd = connect_to_fd(listen_fd, 0); + ASSERT_GE(client_fd, 0, "connect_to_fd"); + accepted_fd = accept(listen_fd, NULL, NULL); + ASSERT_GE(accepted_fd, 0, "accept"); + + prio = 0; + socklen = sizeof(prio); + ASSERT_GE(getsockopt(accepted_fd, SOL_SOCKET, SO_PRIORITY, &prio, &socklen), 0, + "getsockopt"); + ASSERT_EQ(prio, 234, "sk_priority"); + + /* These are replaced and never called. */ + ASSERT_EQ(skel->bss->called_socket_post_create, 0, "called_create"); + ASSERT_EQ(skel->bss->called_socket_bind, 0, "called_bind"); + + /* AF_INET6+SOCK_STREAM + * AF_PACKET+SOCK_RAW + * listen_fd + * client_fd + * accepted_fd + */ + ASSERT_EQ(skel->bss->called_socket_post_create2, 5, "called_create2"); + + /* start_server + * bind(ETH_P_ALL) + */ + ASSERT_EQ(skel->bss->called_socket_bind2, 2, "called_bind2"); + /* Single accept(). */ + ASSERT_EQ(skel->bss->called_socket_clone, 1, "called_clone"); + + /* AF_UNIX+SOCK_STREAM (failed) + * AF_INET6+SOCK_STREAM + * AF_PACKET+SOCK_RAW (failed) + * AF_PACKET+SOCK_RAW + * listen_fd + * client_fd + * accepted_fd + */ + ASSERT_EQ(skel->bss->called_socket_alloc, 7, "called_alloc"); + + close(listen_fd); + close(client_fd); + close(accepted_fd); + + /* Make sure other cgroup doesn't trigger the programs. */ + + if (!ASSERT_OK(join_cgroup("/sock_policy_empty"), "join root cgroup")) + goto detach_cgroup; + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (!ASSERT_GE(fd, 0, "socket(SOCK_STREAM)")) + goto detach_cgroup; + + prio = 0; + socklen = sizeof(prio); + ASSERT_GE(getsockopt(fd, SOL_SOCKET, SO_PRIORITY, &prio, &socklen), 0, + "getsockopt"); + ASSERT_EQ(prio, 0, "sk_priority"); + + close(fd); + +detach_cgroup: + ASSERT_GE(bpf_prog_detach2(post_create_prog_fd2, cgroup_fd, + BPF_LSM_CGROUP), 0, "detach_create"); + close(bind_link_fd); + /* Don't close bind_link_fd2, exercise cgroup release cleanup. */ + ASSERT_GE(bpf_prog_detach2(alloc_prog_fd, cgroup_fd, + BPF_LSM_CGROUP), 0, "detach_alloc"); + ASSERT_GE(bpf_prog_detach2(clone_prog_fd, cgroup_fd, + BPF_LSM_CGROUP), 0, "detach_clone"); + +close_cgroup: + close(cgroup_fd); + close(cgroup_fd2); + close(cgroup_fd3); + lsm_cgroup__destroy(skel); +} + +static void test_lsm_cgroup_nonvoid(void) +{ + struct lsm_cgroup_nonvoid *skel = NULL; + + skel = lsm_cgroup_nonvoid__open_and_load(); + ASSERT_NULL(skel, "open succeeds"); + lsm_cgroup_nonvoid__destroy(skel); +} + +void test_lsm_cgroup(void) +{ + if (test__start_subtest("functional")) + test_lsm_cgroup_functional(); + if (test__start_subtest("nonvoid")) + test_lsm_cgroup_nonvoid(); + btf__free(btf); +} diff --git a/tools/testing/selftests/bpf/prog_tests/probe_user.c b/tools/testing/selftests/bpf/prog_tests/probe_user.c index abf890d066eb..34dbd2adc157 100644 --- a/tools/testing/selftests/bpf/prog_tests/probe_user.c +++ b/tools/testing/selftests/bpf/prog_tests/probe_user.c @@ -4,25 +4,35 @@ /* TODO: corrupts other tests uses connect() */ void serial_test_probe_user(void) { - const char *prog_name = "handle_sys_connect"; + static const char *const prog_names[] = { + "handle_sys_connect", +#if defined(__s390x__) + "handle_sys_socketcall", +#endif + }; + enum { prog_count = ARRAY_SIZE(prog_names) }; const char *obj_file = "./test_probe_user.o"; DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, ); int err, results_map_fd, sock_fd, duration = 0; struct sockaddr curr, orig, tmp; struct sockaddr_in *in = (struct sockaddr_in *)&curr; - struct bpf_link *kprobe_link = NULL; - struct bpf_program *kprobe_prog; + struct bpf_link *kprobe_links[prog_count] = {}; + struct bpf_program *kprobe_progs[prog_count]; struct bpf_object *obj; static const int zero = 0; + size_t i; obj = bpf_object__open_file(obj_file, &opts); if (!ASSERT_OK_PTR(obj, "obj_open_file")) return; - kprobe_prog = bpf_object__find_program_by_name(obj, prog_name); - if (CHECK(!kprobe_prog, "find_probe", - "prog '%s' not found\n", prog_name)) - goto cleanup; + for (i = 0; i < prog_count; i++) { + kprobe_progs[i] = + bpf_object__find_program_by_name(obj, prog_names[i]); + if (CHECK(!kprobe_progs[i], "find_probe", + "prog '%s' not found\n", prog_names[i])) + goto cleanup; + } err = bpf_object__load(obj); if (CHECK(err, "obj_load", "err %d\n", err)) @@ -33,9 +43,11 @@ void serial_test_probe_user(void) "err %d\n", results_map_fd)) goto cleanup; - kprobe_link = bpf_program__attach(kprobe_prog); - if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe")) - goto cleanup; + for (i = 0; i < prog_count; i++) { + kprobe_links[i] = bpf_program__attach(kprobe_progs[i]); + if (!ASSERT_OK_PTR(kprobe_links[i], "attach_kprobe")) + goto cleanup; + } memset(&curr, 0, sizeof(curr)); in->sin_family = AF_INET; @@ -69,6 +81,7 @@ void serial_test_probe_user(void) inet_ntoa(in->sin_addr), ntohs(in->sin_port))) goto cleanup; cleanup: - bpf_link__destroy(kprobe_link); + for (i = 0; i < prog_count; i++) + bpf_link__destroy(kprobe_links[i]); bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c index f4a13d9dd5c8..c197261d02e2 100644 --- a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c +++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c @@ -44,7 +44,7 @@ BTF_ID(union, U) BTF_ID(func, func) extern __u32 test_list_global[]; -BTF_ID_LIST_GLOBAL(test_list_global) +BTF_ID_LIST_GLOBAL(test_list_global, 1) BTF_ID_UNUSED BTF_ID(typedef, S) BTF_ID(typedef, T) diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c index eb5f7f5aa81a..1455911d9fcb 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c @@ -50,6 +50,13 @@ void test_ringbuf_multi(void) if (CHECK(!skel, "skel_open", "skeleton open failed\n")) return; + /* validate ringbuf size adjustment logic */ + ASSERT_EQ(bpf_map__max_entries(skel->maps.ringbuf1), page_size, "rb1_size_before"); + ASSERT_OK(bpf_map__set_max_entries(skel->maps.ringbuf1, page_size + 1), "rb1_resize"); + ASSERT_EQ(bpf_map__max_entries(skel->maps.ringbuf1), 2 * page_size, "rb1_size_after"); + ASSERT_OK(bpf_map__set_max_entries(skel->maps.ringbuf1, page_size), "rb1_reset"); + ASSERT_EQ(bpf_map__max_entries(skel->maps.ringbuf1), page_size, "rb1_size_final"); + proto_fd = bpf_map_create(BPF_MAP_TYPE_RINGBUF, NULL, 0, 0, page_size, NULL); if (CHECK(proto_fd < 0, "bpf_map_create", "bpf_map_create failed\n")) goto cleanup; @@ -65,6 +72,10 @@ void test_ringbuf_multi(void) close(proto_fd); proto_fd = -1; + /* make sure we can't resize ringbuf after object load */ + if (!ASSERT_ERR(bpf_map__set_max_entries(skel->maps.ringbuf1, 3 * page_size), "rb1_resize_after_load")) + goto cleanup; + /* only trigger BPF program for current process */ skel->bss->pid = getpid(); diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c index d71226e34c34..d63a20fbed33 100644 --- a/tools/testing/selftests/bpf/prog_tests/send_signal.c +++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c @@ -64,7 +64,7 @@ static void test_send_signal_common(struct perf_event_attr *attr, ASSERT_EQ(read(pipe_p2c[0], buf, 1), 1, "pipe_read"); /* wait a little for signal handler */ - for (int i = 0; i < 100000000 && !sigusr1_received; i++) + for (int i = 0; i < 1000000000 && !sigusr1_received; i++) j /= i + j + 1; buf[0] = sigusr1_received ? '2' : '0'; diff --git a/tools/testing/selftests/bpf/prog_tests/skeleton.c b/tools/testing/selftests/bpf/prog_tests/skeleton.c index 180afd632f4c..99dac5292b41 100644 --- a/tools/testing/selftests/bpf/prog_tests/skeleton.c +++ b/tools/testing/selftests/bpf/prog_tests/skeleton.c @@ -122,6 +122,8 @@ void test_skeleton(void) ASSERT_EQ(skel->bss->out_mostly_var, 123, "out_mostly_var"); + ASSERT_EQ(bss->huge_arr[ARRAY_SIZE(bss->huge_arr) - 1], 123, "huge_arr"); + elf_bytes = test_skeleton__elf_bytes(&elf_bytes_sz); ASSERT_OK_PTR(elf_bytes, "elf_bytes"); ASSERT_GE(elf_bytes_sz, 0, "elf_bytes_sz"); diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c index 9d211b5c22c4..7d23166c77af 100644 --- a/tools/testing/selftests/bpf/prog_tests/sock_fields.c +++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c @@ -394,7 +394,6 @@ void serial_test_sock_fields(void) test(); done: - test_sock_fields__detach(skel); test_sock_fields__destroy(skel); if (child_cg_fd >= 0) close(child_cg_fd); diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c index af293ea1542c..e172d89e92e1 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c @@ -4,6 +4,7 @@ * Tests for sockmap/sockhash holding kTLS sockets. */ +#include <netinet/tcp.h> #include "test_progs.h" #define MAX_TEST_NAME 80 @@ -92,9 +93,78 @@ close_srv: close(srv); } +static void test_sockmap_ktls_update_fails_when_sock_has_ulp(int family, int map) +{ + struct sockaddr_storage addr = {}; + socklen_t len = sizeof(addr); + struct sockaddr_in6 *v6; + struct sockaddr_in *v4; + int err, s, zero = 0; + + switch (family) { + case AF_INET: + v4 = (struct sockaddr_in *)&addr; + v4->sin_family = AF_INET; + break; + case AF_INET6: + v6 = (struct sockaddr_in6 *)&addr; + v6->sin6_family = AF_INET6; + break; + default: + PRINT_FAIL("unsupported socket family %d", family); + return; + } + + s = socket(family, SOCK_STREAM, 0); + if (!ASSERT_GE(s, 0, "socket")) + return; + + err = bind(s, (struct sockaddr *)&addr, len); + if (!ASSERT_OK(err, "bind")) + goto close; + + err = getsockname(s, (struct sockaddr *)&addr, &len); + if (!ASSERT_OK(err, "getsockname")) + goto close; + + err = connect(s, (struct sockaddr *)&addr, len); + if (!ASSERT_OK(err, "connect")) + goto close; + + /* save sk->sk_prot and set it to tls_prots */ + err = setsockopt(s, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls")); + if (!ASSERT_OK(err, "setsockopt(TCP_ULP)")) + goto close; + + /* sockmap update should not affect saved sk_prot */ + err = bpf_map_update_elem(map, &zero, &s, BPF_ANY); + if (!ASSERT_ERR(err, "sockmap update elem")) + goto close; + + /* call sk->sk_prot->setsockopt to dispatch to saved sk_prot */ + err = setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &zero, sizeof(zero)); + ASSERT_OK(err, "setsockopt(TCP_NODELAY)"); + +close: + close(s); +} + +static const char *fmt_test_name(const char *subtest_name, int family, + enum bpf_map_type map_type) +{ + const char *map_type_str = BPF_MAP_TYPE_SOCKMAP ? "SOCKMAP" : "SOCKHASH"; + const char *family_str = AF_INET ? "IPv4" : "IPv6"; + static char test_name[MAX_TEST_NAME]; + + snprintf(test_name, MAX_TEST_NAME, + "sockmap_ktls %s %s %s", + subtest_name, family_str, map_type_str); + + return test_name; +} + static void run_tests(int family, enum bpf_map_type map_type) { - char test_name[MAX_TEST_NAME]; int map; map = bpf_map_create(map_type, NULL, sizeof(int), sizeof(int), 1, NULL); @@ -103,14 +173,10 @@ static void run_tests(int family, enum bpf_map_type map_type) return; } - snprintf(test_name, MAX_TEST_NAME, - "sockmap_ktls disconnect_after_delete %s %s", - family == AF_INET ? "IPv4" : "IPv6", - map_type == BPF_MAP_TYPE_SOCKMAP ? "SOCKMAP" : "SOCKHASH"); - if (!test__start_subtest(test_name)) - return; - - test_sockmap_ktls_disconnect_after_delete(family, map); + if (test__start_subtest(fmt_test_name("disconnect_after_delete", family, map_type))) + test_sockmap_ktls_disconnect_after_delete(family, map); + if (test__start_subtest(fmt_test_name("update_fails_when_sock_has_ulp", family, map_type))) + test_sockmap_ktls_update_fails_when_sock_has_ulp(family, map); close(map); } diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c index c4da87ec3ba4..19c70880cfb3 100644 --- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c +++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c @@ -831,6 +831,59 @@ out: bpf_object__close(obj); } +#include "tailcall_bpf2bpf6.skel.h" + +/* Tail call counting works even when there is data on stack which is + * not aligned to 8 bytes. + */ +static void test_tailcall_bpf2bpf_6(void) +{ + struct tailcall_bpf2bpf6 *obj; + int err, map_fd, prog_fd, main_fd, data_fd, i, val; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + + obj = tailcall_bpf2bpf6__open_and_load(); + if (!ASSERT_OK_PTR(obj, "open and load")) + return; + + main_fd = bpf_program__fd(obj->progs.entry); + if (!ASSERT_GE(main_fd, 0, "entry prog fd")) + goto out; + + map_fd = bpf_map__fd(obj->maps.jmp_table); + if (!ASSERT_GE(map_fd, 0, "jmp_table map fd")) + goto out; + + prog_fd = bpf_program__fd(obj->progs.classifier_0); + if (!ASSERT_GE(prog_fd, 0, "classifier_0 prog fd")) + goto out; + + i = 0; + err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY); + if (!ASSERT_OK(err, "jmp_table map update")) + goto out; + + err = bpf_prog_test_run_opts(main_fd, &topts); + ASSERT_OK(err, "entry prog test run"); + ASSERT_EQ(topts.retval, 0, "tailcall retval"); + + data_fd = bpf_map__fd(obj->maps.bss); + if (!ASSERT_GE(map_fd, 0, "bss map fd")) + goto out; + + i = 0; + err = bpf_map_lookup_elem(data_fd, &i, &val); + ASSERT_OK(err, "bss map lookup"); + ASSERT_EQ(val, 1, "done flag is set"); + +out: + tailcall_bpf2bpf6__destroy(obj); +} + void test_tailcalls(void) { if (test__start_subtest("tailcall_1")) @@ -855,4 +908,6 @@ void test_tailcalls(void) test_tailcall_bpf2bpf_4(false); if (test__start_subtest("tailcall_bpf2bpf_5")) test_tailcall_bpf2bpf_4(true); + if (test__start_subtest("tailcall_bpf2bpf_6")) + test_tailcall_bpf2bpf_6(); } diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index 958dae769c52..cb6a53b3e023 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -646,7 +646,7 @@ static void test_tcp_clear_dtime(struct test_tc_dtime *skel) __u32 *errs = skel->bss->errs[t]; skel->bss->test = t; - test_inet_dtime(AF_INET6, SOCK_STREAM, IP6_DST, 0); + test_inet_dtime(AF_INET6, SOCK_STREAM, IP6_DST, 50000 + t); ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0, dtime_cnt_str(t, INGRESS_FWDNS_P100)); @@ -683,7 +683,7 @@ static void test_tcp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd) errs = skel->bss->errs[t]; skel->bss->test = t; - test_inet_dtime(family, SOCK_STREAM, addr, 0); + test_inet_dtime(family, SOCK_STREAM, addr, 50000 + t); /* fwdns_prio100 prog does not read delivery_time_type, so * kernel puts the (rcv) timetamp in __sk_buff->tstamp @@ -715,13 +715,13 @@ static void test_udp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd) errs = skel->bss->errs[t]; skel->bss->test = t; - test_inet_dtime(family, SOCK_DGRAM, addr, 0); + test_inet_dtime(family, SOCK_DGRAM, addr, 50000 + t); ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0, dtime_cnt_str(t, INGRESS_FWDNS_P100)); /* non mono delivery time is not forwarded */ ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0, - dtime_cnt_str(t, INGRESS_FWDNS_P100)); + dtime_cnt_str(t, INGRESS_FWDNS_P101)); for (i = EGRESS_FWDNS_P100; i < SET_DTIME; i++) ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i)); diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c index 3bba4a2a0530..eea274110267 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c +++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c @@ -82,6 +82,7 @@ #define MAC_TUNL_DEV0 "52:54:00:d9:01:00" #define MAC_TUNL_DEV1 "52:54:00:d9:02:00" +#define MAC_VETH1 "52:54:00:d9:03:00" #define VXLAN_TUNL_DEV0 "vxlan00" #define VXLAN_TUNL_DEV1 "vxlan11" @@ -108,10 +109,9 @@ static int config_device(void) { SYS("ip netns add at_ns0"); - SYS("ip link add veth0 type veth peer name veth1"); + SYS("ip link add veth0 address " MAC_VETH1 " type veth peer name veth1"); SYS("ip link set veth0 netns at_ns0"); SYS("ip addr add " IP4_ADDR1_VETH1 "/24 dev veth1"); - SYS("ip addr add " IP4_ADDR2_VETH1 "/24 dev veth1"); SYS("ip link set dev veth1 up mtu 1500"); SYS("ip netns exec at_ns0 ip addr add " IP4_ADDR_VETH0 "/24 dev veth0"); SYS("ip netns exec at_ns0 ip link set dev veth0 up mtu 1500"); @@ -140,6 +140,8 @@ static int add_vxlan_tunnel(void) VXLAN_TUNL_DEV0, IP4_ADDR_TUNL_DEV0); SYS("ip netns exec at_ns0 ip neigh add %s lladdr %s dev %s", IP4_ADDR_TUNL_DEV1, MAC_TUNL_DEV1, VXLAN_TUNL_DEV0); + SYS("ip netns exec at_ns0 ip neigh add %s lladdr %s dev veth0", + IP4_ADDR2_VETH1, MAC_VETH1); /* root namespace */ SYS("ip link add dev %s type vxlan external gbp dstport 4789", @@ -277,6 +279,17 @@ static void test_vxlan_tunnel(void) if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd)) goto done; + /* load and attach bpf prog to veth dev tc hook point */ + ifindex = if_nametoindex("veth1"); + if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex")) + goto done; + tc_hook.ifindex = ifindex; + set_dst_prog_fd = bpf_program__fd(skel->progs.veth_set_outer_dst); + if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd")) + goto done; + if (attach_tc_prog(&tc_hook, set_dst_prog_fd, -1)) + goto done; + /* load and attach prog set_md to tunnel dev tc hook point at_ns0 */ nstoken = open_netns("at_ns0"); if (!ASSERT_OK_PTR(nstoken, "setns src")) diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c index 5f733d50b0d7..9ad9da0f215e 100644 --- a/tools/testing/selftests/bpf/prog_tests/usdt.c +++ b/tools/testing/selftests/bpf/prog_tests/usdt.c @@ -12,7 +12,7 @@ int lets_test_this(int); static volatile int idx = 2; static volatile __u64 bla = 0xFEDCBA9876543210ULL; -static volatile short nums[] = {-1, -2, -3, }; +static volatile short nums[] = {-1, -2, -3, -4}; static volatile struct { int x; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c new file mode 100644 index 000000000000..874a846e298c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c @@ -0,0 +1,183 @@ +// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#define _GNU_SOURCE +#include <test_progs.h> +#include <network_helpers.h> +#include <ctype.h> + +#define CMD_OUT_BUF_SIZE 1023 + +#define SYS(cmd) ({ \ + if (!ASSERT_OK(system(cmd), (cmd))) \ + goto out; \ +}) + +#define SYS_OUT(cmd, ...) ({ \ + char buf[1024]; \ + snprintf(buf, sizeof(buf), (cmd), ##__VA_ARGS__); \ + FILE *f = popen(buf, "r"); \ + if (!ASSERT_OK_PTR(f, buf)) \ + goto out; \ + f; \ +}) + +/* out must be at least `size * 4 + 1` bytes long */ +static void escape_str(char *out, const char *in, size_t size) +{ + static const char *hex = "0123456789ABCDEF"; + size_t i; + + for (i = 0; i < size; i++) { + if (isprint(in[i]) && in[i] != '\\' && in[i] != '\'') { + *out++ = in[i]; + } else { + *out++ = '\\'; + *out++ = 'x'; + *out++ = hex[(in[i] >> 4) & 0xf]; + *out++ = hex[in[i] & 0xf]; + } + } + *out++ = '\0'; +} + +static bool expect_str(char *buf, size_t size, const char *str, const char *name) +{ + static char escbuf_expected[CMD_OUT_BUF_SIZE * 4]; + static char escbuf_actual[CMD_OUT_BUF_SIZE * 4]; + static int duration = 0; + bool ok; + + ok = size == strlen(str) && !memcmp(buf, str, size); + + if (!ok) { + escape_str(escbuf_expected, str, strlen(str)); + escape_str(escbuf_actual, buf, size); + } + CHECK(!ok, name, "unexpected %s: actual '%s' != expected '%s'\n", + name, escbuf_actual, escbuf_expected); + + return ok; +} + +static void test_synproxy(bool xdp) +{ + int server_fd = -1, client_fd = -1, accept_fd = -1; + char *prog_id = NULL, *prog_id_end; + struct nstoken *ns = NULL; + FILE *ctrl_file = NULL; + char buf[CMD_OUT_BUF_SIZE]; + size_t size; + + SYS("ip netns add synproxy"); + + SYS("ip link add tmp0 type veth peer name tmp1"); + SYS("ip link set tmp1 netns synproxy"); + SYS("ip link set tmp0 up"); + SYS("ip addr replace 198.18.0.1/24 dev tmp0"); + + /* When checksum offload is enabled, the XDP program sees wrong + * checksums and drops packets. + */ + SYS("ethtool -K tmp0 tx off"); + if (xdp) + /* Workaround required for veth. */ + SYS("ip link set tmp0 xdp object xdp_dummy.o section xdp 2> /dev/null"); + + ns = open_netns("synproxy"); + if (!ASSERT_OK_PTR(ns, "setns")) + goto out; + + SYS("ip link set lo up"); + SYS("ip link set tmp1 up"); + SYS("ip addr replace 198.18.0.2/24 dev tmp1"); + SYS("sysctl -w net.ipv4.tcp_syncookies=2"); + SYS("sysctl -w net.ipv4.tcp_timestamps=1"); + SYS("sysctl -w net.netfilter.nf_conntrack_tcp_loose=0"); + SYS("iptables -t raw -I PREROUTING \ + -i tmp1 -p tcp -m tcp --syn --dport 8080 -j CT --notrack"); + SYS("iptables -t filter -A INPUT \ + -i tmp1 -p tcp -m tcp --dport 8080 -m state --state INVALID,UNTRACKED \ + -j SYNPROXY --sack-perm --timestamp --wscale 7 --mss 1460"); + SYS("iptables -t filter -A INPUT \ + -i tmp1 -m state --state INVALID -j DROP"); + + ctrl_file = SYS_OUT("./xdp_synproxy --iface tmp1 --ports 8080 \ + --single --mss4 1460 --mss6 1440 \ + --wscale 7 --ttl 64%s", xdp ? "" : " --tc"); + size = fread(buf, 1, sizeof(buf), ctrl_file); + pclose(ctrl_file); + if (!expect_str(buf, size, "Total SYNACKs generated: 0\n", + "initial SYNACKs")) + goto out; + + if (!xdp) { + ctrl_file = SYS_OUT("tc filter show dev tmp1 ingress"); + size = fread(buf, 1, sizeof(buf), ctrl_file); + pclose(ctrl_file); + prog_id = memmem(buf, size, " id ", 4); + if (!ASSERT_OK_PTR(prog_id, "find prog id")) + goto out; + prog_id += 4; + if (!ASSERT_LT(prog_id, buf + size, "find prog id begin")) + goto out; + prog_id_end = prog_id; + while (prog_id_end < buf + size && *prog_id_end >= '0' && + *prog_id_end <= '9') + prog_id_end++; + if (!ASSERT_LT(prog_id_end, buf + size, "find prog id end")) + goto out; + *prog_id_end = '\0'; + } + + server_fd = start_server(AF_INET, SOCK_STREAM, "198.18.0.2", 8080, 0); + if (!ASSERT_GE(server_fd, 0, "start_server")) + goto out; + + close_netns(ns); + ns = NULL; + + client_fd = connect_to_fd(server_fd, 10000); + if (!ASSERT_GE(client_fd, 0, "connect_to_fd")) + goto out; + + accept_fd = accept(server_fd, NULL, NULL); + if (!ASSERT_GE(accept_fd, 0, "accept")) + goto out; + + ns = open_netns("synproxy"); + if (!ASSERT_OK_PTR(ns, "setns")) + goto out; + + if (xdp) + ctrl_file = SYS_OUT("./xdp_synproxy --iface tmp1 --single"); + else + ctrl_file = SYS_OUT("./xdp_synproxy --prog %s --single", + prog_id); + size = fread(buf, 1, sizeof(buf), ctrl_file); + pclose(ctrl_file); + if (!expect_str(buf, size, "Total SYNACKs generated: 1\n", + "SYNACKs after connection")) + goto out; + +out: + if (accept_fd >= 0) + close(accept_fd); + if (client_fd >= 0) + close(client_fd); + if (server_fd >= 0) + close(server_fd); + if (ns) + close_netns(ns); + + system("ip link del tmp0"); + system("ip netns del synproxy"); +} + +void test_xdp_synproxy(void) +{ + if (test__start_subtest("xdp")) + test_synproxy(true); + if (test__start_subtest("tc")) + test_synproxy(false); +} diff --git a/tools/testing/selftests/bpf/progs/bpf_hashmap_full_update_bench.c b/tools/testing/selftests/bpf/progs/bpf_hashmap_full_update_bench.c new file mode 100644 index 000000000000..56957557e3e1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_hashmap_full_update_bench.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Bytedance */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +#define MAX_ENTRIES 1000 + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u32); + __type(value, u64); + __uint(max_entries, MAX_ENTRIES); +} hash_map_bench SEC(".maps"); + +u64 __attribute__((__aligned__(256))) percpu_time[256]; +u64 nr_loops; + +static int loop_update_callback(__u32 index, u32 *key) +{ + u64 init_val = 1; + + bpf_map_update_elem(&hash_map_bench, key, &init_val, BPF_ANY); + return 0; +} + +SEC("fentry/" SYS_PREFIX "sys_getpgid") +int benchmark(void *ctx) +{ + u32 cpu = bpf_get_smp_processor_id(); + u32 key = cpu + MAX_ENTRIES; + u64 start_time = bpf_ktime_get_ns(); + + bpf_loop(nr_loops, loop_update_callback, &key, 0); + percpu_time[cpu & 255] = bpf_ktime_get_ns() - start_time; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h index 97ec8bc76ae6..e9846606690d 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter.h +++ b/tools/testing/selftests/bpf/progs/bpf_iter.h @@ -22,6 +22,7 @@ #define BTF_F_NONAME BTF_F_NONAME___not_used #define BTF_F_PTR_RAW BTF_F_PTR_RAW___not_used #define BTF_F_ZERO BTF_F_ZERO___not_used +#define bpf_iter__ksym bpf_iter__ksym___not_used #include "vmlinux.h" #undef bpf_iter_meta #undef bpf_iter__bpf_map @@ -44,6 +45,7 @@ #undef BTF_F_NONAME #undef BTF_F_PTR_RAW #undef BTF_F_ZERO +#undef bpf_iter__ksym struct bpf_iter_meta { struct seq_file *seq; @@ -151,3 +153,8 @@ enum { BTF_F_PTR_RAW = (1ULL << 2), BTF_F_ZERO = (1ULL << 3), }; + +struct bpf_iter__ksym { + struct bpf_iter_meta *meta; + struct kallsym_iter *ksym; +}; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c b/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c new file mode 100644 index 000000000000..285c008cbf9c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022, Oracle and/or its affiliates. */ +#include "bpf_iter.h" +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +unsigned long last_sym_value = 0; + +static inline char tolower(char c) +{ + if (c >= 'A' && c <= 'Z') + c += ('a' - 'A'); + return c; +} + +static inline char toupper(char c) +{ + if (c >= 'a' && c <= 'z') + c -= ('a' - 'A'); + return c; +} + +/* Dump symbols with max size; the latter is calculated by caching symbol N value + * and when iterating on symbol N+1, we can print max size of symbol N via + * address of N+1 - address of N. + */ +SEC("iter/ksym") +int dump_ksym(struct bpf_iter__ksym *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + struct kallsym_iter *iter = ctx->ksym; + __u32 seq_num = ctx->meta->seq_num; + unsigned long value; + char type; + int ret; + + if (!iter) + return 0; + + if (seq_num == 0) { + BPF_SEQ_PRINTF(seq, "ADDR TYPE NAME MODULE_NAME KIND MAX_SIZE\n"); + return 0; + } + if (last_sym_value) + BPF_SEQ_PRINTF(seq, "0x%x\n", iter->value - last_sym_value); + else + BPF_SEQ_PRINTF(seq, "\n"); + + value = iter->show_value ? iter->value : 0; + + last_sym_value = value; + + type = iter->type; + + if (iter->module_name[0]) { + type = iter->exported ? toupper(type) : tolower(type); + BPF_SEQ_PRINTF(seq, "0x%llx %c %s [ %s ] ", + value, type, iter->name, iter->module_name); + } else { + BPF_SEQ_PRINTF(seq, "0x%llx %c %s ", value, type, iter->name); + } + if (!iter->pos_arch_end || iter->pos_arch_end > iter->pos) + BPF_SEQ_PRINTF(seq, "CORE "); + else if (!iter->pos_mod_end || iter->pos_mod_end > iter->pos) + BPF_SEQ_PRINTF(seq, "MOD "); + else if (!iter->pos_ftrace_mod_end || iter->pos_ftrace_mod_end > iter->pos) + BPF_SEQ_PRINTF(seq, "FTRACE_MOD "); + else if (!iter->pos_bpf_end || iter->pos_bpf_end > iter->pos) + BPF_SEQ_PRINTF(seq, "BPF "); + else + BPF_SEQ_PRINTF(seq, "KPROBE "); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_loop.c b/tools/testing/selftests/bpf/progs/bpf_loop.c index e08565282759..de1fc82d2710 100644 --- a/tools/testing/selftests/bpf/progs/bpf_loop.c +++ b/tools/testing/selftests/bpf/progs/bpf_loop.c @@ -11,11 +11,19 @@ struct callback_ctx { int output; }; +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 32); + __type(key, int); + __type(value, int); +} map1 SEC(".maps"); + /* These should be set by the user program */ u32 nested_callback_nr_loops; u32 stop_index = -1; u32 nr_loops; int pid; +int callback_selector; /* Making these global variables so that the userspace program * can verify the output through the skeleton @@ -111,3 +119,109 @@ int prog_nested_calls(void *ctx) return 0; } + +static int callback_set_f0(int i, void *ctx) +{ + g_output = 0xF0; + return 0; +} + +static int callback_set_0f(int i, void *ctx) +{ + g_output = 0x0F; + return 0; +} + +/* + * non-constant callback is a corner case for bpf_loop inline logic + */ +SEC("fentry/" SYS_PREFIX "sys_nanosleep") +int prog_non_constant_callback(void *ctx) +{ + struct callback_ctx data = {}; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + int (*callback)(int i, void *ctx); + + g_output = 0; + + if (callback_selector == 0x0F) + callback = callback_set_0f; + else + callback = callback_set_f0; + + bpf_loop(1, callback, NULL, 0); + + return 0; +} + +static int stack_check_inner_callback(void *ctx) +{ + return 0; +} + +static int map1_lookup_elem(int key) +{ + int *val = bpf_map_lookup_elem(&map1, &key); + + return val ? *val : -1; +} + +static void map1_update_elem(int key, int val) +{ + bpf_map_update_elem(&map1, &key, &val, BPF_ANY); +} + +static int stack_check_outer_callback(void *ctx) +{ + int a = map1_lookup_elem(1); + int b = map1_lookup_elem(2); + int c = map1_lookup_elem(3); + int d = map1_lookup_elem(4); + int e = map1_lookup_elem(5); + int f = map1_lookup_elem(6); + + bpf_loop(1, stack_check_inner_callback, NULL, 0); + + map1_update_elem(1, a + 1); + map1_update_elem(2, b + 1); + map1_update_elem(3, c + 1); + map1_update_elem(4, d + 1); + map1_update_elem(5, e + 1); + map1_update_elem(6, f + 1); + + return 0; +} + +/* Some of the local variables in stack_check and + * stack_check_outer_callback would be allocated on stack by + * compiler. This test should verify that stack content for these + * variables is preserved between calls to bpf_loop (might be an issue + * if loop inlining allocates stack slots incorrectly). + */ +SEC("fentry/" SYS_PREFIX "sys_nanosleep") +int stack_check(void *ctx) +{ + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + int a = map1_lookup_elem(7); + int b = map1_lookup_elem(8); + int c = map1_lookup_elem(9); + int d = map1_lookup_elem(10); + int e = map1_lookup_elem(11); + int f = map1_lookup_elem(12); + + bpf_loop(1, stack_check_outer_callback, NULL, 0); + + map1_update_elem(7, a + 1); + map1_update_elem(8, b + 1); + map1_update_elem(9, c + 1); + map1_update_elem(10, d + 1); + map1_update_elem(11, e + 1); + map1_update_elem(12, f + 1); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c b/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c index 05838ed9b89c..e1e11897e99b 100644 --- a/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c +++ b/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c @@ -64,9 +64,9 @@ int BPF_KPROBE(handle_sys_prctl) return 0; } -SEC("kprobe/" SYS_PREFIX "sys_prctl") -int BPF_KPROBE_SYSCALL(prctl_enter, int option, unsigned long arg2, - unsigned long arg3, unsigned long arg4, unsigned long arg5) +SEC("ksyscall/prctl") +int BPF_KSYSCALL(prctl_enter, int option, unsigned long arg2, + unsigned long arg3, unsigned long arg4, unsigned long arg5) { pid_t pid = bpf_get_current_pid_tgid() >> 32; diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h index 1c1289ba5fc5..98dd2c4815f0 100644 --- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h +++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h @@ -8,6 +8,7 @@ #define SOL_SOCKET 1 #define SO_SNDBUF 7 #define __SO_ACCEPTCON (1 << 16) +#define SO_PRIORITY 12 #define SOL_TCP 6 #define TCP_CONGESTION 13 diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val.c new file mode 100644 index 000000000000..888e79db6a77 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_enum64val x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___diff.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___diff.c new file mode 100644 index 000000000000..194749130d87 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___diff.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_enum64val___diff x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___err_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___err_missing.c new file mode 100644 index 000000000000..3d732d4193e4 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___err_missing.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_enum64val___err_missing x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___val3_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___val3_missing.c new file mode 100644 index 000000000000..17cf5d6a848d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___val3_missing.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_enum64val___val3_missing x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff.c new file mode 100644 index 000000000000..57ae2c258928 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_type_based___diff x) {} diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h index f9dc9766546e..fd8e1b4c6762 100644 --- a/tools/testing/selftests/bpf/progs/core_reloc_types.h +++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h @@ -13,6 +13,7 @@ struct core_reloc_kernel_output { int valid[10]; char comm[sizeof("test_progs")]; int comm_len; + bool local_task_struct_matches; }; /* @@ -860,10 +861,11 @@ struct core_reloc_size___err_ambiguous2 { }; /* - * TYPE EXISTENCE & SIZE + * TYPE EXISTENCE, MATCH & SIZE */ struct core_reloc_type_based_output { bool struct_exists; + bool complex_struct_exists; bool union_exists; bool enum_exists; bool typedef_named_struct_exists; @@ -872,9 +874,24 @@ struct core_reloc_type_based_output { bool typedef_int_exists; bool typedef_enum_exists; bool typedef_void_ptr_exists; + bool typedef_restrict_ptr_exists; bool typedef_func_proto_exists; bool typedef_arr_exists; + bool struct_matches; + bool complex_struct_matches; + bool union_matches; + bool enum_matches; + bool typedef_named_struct_matches; + bool typedef_anon_struct_matches; + bool typedef_struct_ptr_matches; + bool typedef_int_matches; + bool typedef_enum_matches; + bool typedef_void_ptr_matches; + bool typedef_restrict_ptr_matches; + bool typedef_func_proto_matches; + bool typedef_arr_matches; + int struct_sz; int union_sz; int enum_sz; @@ -892,6 +909,14 @@ struct a_struct { int x; }; +struct a_complex_struct { + union { + struct a_struct * restrict a; + void *b; + } x; + volatile long y; +}; + union a_union { int y; int z; @@ -916,6 +941,7 @@ typedef int int_typedef; typedef enum { TYPEDEF_ENUM_VAL1, TYPEDEF_ENUM_VAL2 } enum_typedef; typedef void *void_ptr_typedef; +typedef int *restrict restrict_ptr_typedef; typedef int (*func_proto_typedef)(long); @@ -923,22 +949,86 @@ typedef char arr_typedef[20]; struct core_reloc_type_based { struct a_struct f1; - union a_union f2; - enum an_enum f3; - named_struct_typedef f4; - anon_struct_typedef f5; - struct_ptr_typedef f6; - int_typedef f7; - enum_typedef f8; - void_ptr_typedef f9; - func_proto_typedef f10; - arr_typedef f11; + struct a_complex_struct f2; + union a_union f3; + enum an_enum f4; + named_struct_typedef f5; + anon_struct_typedef f6; + struct_ptr_typedef f7; + int_typedef f8; + enum_typedef f9; + void_ptr_typedef f10; + restrict_ptr_typedef f11; + func_proto_typedef f12; + arr_typedef f13; }; /* no types in target */ struct core_reloc_type_based___all_missing { }; +/* different member orders, enum variant values, signedness, etc */ +struct a_struct___diff { + int x; + int a; +}; + +struct a_struct___forward; + +struct a_complex_struct___diff { + union { + struct a_struct___forward *a; + void *b; + } x; + volatile long y; +}; + +union a_union___diff { + int z; + int y; +}; + +typedef struct a_struct___diff named_struct_typedef___diff; + +typedef struct { int z, x, y; } anon_struct_typedef___diff; + +typedef struct { + int c; + int b; + int a; +} *struct_ptr_typedef___diff; + +enum an_enum___diff { + AN_ENUM_VAL2___diff = 0, + AN_ENUM_VAL1___diff = 42, + AN_ENUM_VAL3___diff = 1, +}; + +typedef unsigned int int_typedef___diff; + +typedef enum { TYPEDEF_ENUM_VAL2___diff, TYPEDEF_ENUM_VAL1___diff = 50 } enum_typedef___diff; + +typedef const void *void_ptr_typedef___diff; + +typedef int_typedef___diff (*func_proto_typedef___diff)(long); + +typedef char arr_typedef___diff[3]; + +struct core_reloc_type_based___diff { + struct a_struct___diff f1; + struct a_complex_struct___diff f2; + union a_union___diff f3; + enum an_enum___diff f4; + named_struct_typedef___diff f5; + anon_struct_typedef___diff f6; + struct_ptr_typedef___diff f7; + int_typedef___diff f8; + enum_typedef___diff f9; + void_ptr_typedef___diff f10; + func_proto_typedef___diff f11; + arr_typedef___diff f12; +}; + /* different type sizes, extra modifiers, anon vs named enums, etc */ struct a_struct___diff_sz { long x; @@ -1117,6 +1207,20 @@ struct core_reloc_enumval_output { int anon_val2; }; +struct core_reloc_enum64val_output { + bool unsigned_val1_exists; + bool unsigned_val2_exists; + bool unsigned_val3_exists; + bool signed_val1_exists; + bool signed_val2_exists; + bool signed_val3_exists; + + long unsigned_val1; + long unsigned_val2; + long signed_val1; + long signed_val2; +}; + enum named_enum { NAMED_ENUM_VAL1 = 1, NAMED_ENUM_VAL2 = 2, @@ -1134,6 +1238,23 @@ struct core_reloc_enumval { anon_enum f2; }; +enum named_unsigned_enum64 { + UNSIGNED_ENUM64_VAL1 = 0x1ffffffffULL, + UNSIGNED_ENUM64_VAL2 = 0x2, + UNSIGNED_ENUM64_VAL3 = 0x3ffffffffULL, +}; + +enum named_signed_enum64 { + SIGNED_ENUM64_VAL1 = 0x1ffffffffLL, + SIGNED_ENUM64_VAL2 = -2, + SIGNED_ENUM64_VAL3 = 0x3ffffffffLL, +}; + +struct core_reloc_enum64val { + enum named_unsigned_enum64 f1; + enum named_signed_enum64 f2; +}; + /* differing enumerator values */ enum named_enum___diff { NAMED_ENUM_VAL1___diff = 101, @@ -1152,6 +1273,23 @@ struct core_reloc_enumval___diff { anon_enum___diff f2; }; +enum named_unsigned_enum64___diff { + UNSIGNED_ENUM64_VAL1___diff = 0x101ffffffffULL, + UNSIGNED_ENUM64_VAL2___diff = 0x202ffffffffULL, + UNSIGNED_ENUM64_VAL3___diff = 0x303ffffffffULL, +}; + +enum named_signed_enum64___diff { + SIGNED_ENUM64_VAL1___diff = -101, + SIGNED_ENUM64_VAL2___diff = -202, + SIGNED_ENUM64_VAL3___diff = -303, +}; + +struct core_reloc_enum64val___diff { + enum named_unsigned_enum64___diff f1; + enum named_signed_enum64___diff f2; +}; + /* missing (optional) third enum value */ enum named_enum___val3_missing { NAMED_ENUM_VAL1___val3_missing = 111, @@ -1168,6 +1306,21 @@ struct core_reloc_enumval___val3_missing { anon_enum___val3_missing f2; }; +enum named_unsigned_enum64___val3_missing { + UNSIGNED_ENUM64_VAL1___val3_missing = 0x111ffffffffULL, + UNSIGNED_ENUM64_VAL2___val3_missing = 0x222, +}; + +enum named_signed_enum64___val3_missing { + SIGNED_ENUM64_VAL1___val3_missing = 0x111ffffffffLL, + SIGNED_ENUM64_VAL2___val3_missing = -222, +}; + +struct core_reloc_enum64val___val3_missing { + enum named_unsigned_enum64___val3_missing f1; + enum named_signed_enum64___val3_missing f2; +}; + /* missing (mandatory) second enum value, should fail */ enum named_enum___err_missing { NAMED_ENUM_VAL1___err_missing = 1, @@ -1183,3 +1336,18 @@ struct core_reloc_enumval___err_missing { enum named_enum___err_missing f1; anon_enum___err_missing f2; }; + +enum named_unsigned_enum64___err_missing { + UNSIGNED_ENUM64_VAL1___err_missing = 0x1ffffffffULL, + UNSIGNED_ENUM64_VAL3___err_missing = 0x3ffffffffULL, +}; + +enum named_signed_enum64___err_missing { + SIGNED_ENUM64_VAL1___err_missing = 0x1ffffffffLL, + SIGNED_ENUM64_VAL3___err_missing = -3, +}; + +struct core_reloc_enum64val___err_missing { + enum named_unsigned_enum64___err_missing f1; + enum named_signed_enum64___err_missing f2; +}; diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c index d811cff73597..0a26c243e6e9 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_fail.c +++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c @@ -140,12 +140,12 @@ int use_after_invalid(void *ctx) bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(read_data), 0, &ptr); - bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0); + bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0); bpf_ringbuf_submit_dynptr(&ptr, 0); /* this should fail */ - bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0); + bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0); return 0; } @@ -338,7 +338,7 @@ int invalid_helper2(void *ctx) get_map_val_dynptr(&ptr); /* this should fail */ - bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 8, 0); + bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 8, 0, 0); return 0; } @@ -377,7 +377,7 @@ int invalid_write2(void *ctx) memcpy((void *)&ptr + 8, &x, sizeof(x)); /* this should fail */ - bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0); + bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0); bpf_ringbuf_submit_dynptr(&ptr, 0); @@ -473,7 +473,7 @@ int invalid_read2(void *ctx) get_map_val_dynptr(&ptr); /* this should fail */ - bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 1, 0); + bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 1, 0, 0); return 0; } diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c index d67be48df4b2..a3a6103c8569 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_success.c +++ b/tools/testing/selftests/bpf/progs/dynptr_success.c @@ -43,10 +43,10 @@ int test_read_write(void *ctx) bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(write_data), 0, &ptr); /* Write data into the dynptr */ - err = err ?: bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data)); + err = bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data), 0); /* Read the data that was written into the dynptr */ - err = err ?: bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0); + err = err ?: bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0); /* Ensure the data we read matches the data we wrote */ for (i = 0; i < sizeof(read_data); i++) { diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi.c b/tools/testing/selftests/bpf/progs/kprobe_multi.c index 93510f4f0f3a..08f95a8155d1 100644 --- a/tools/testing/selftests/bpf/progs/kprobe_multi.c +++ b/tools/testing/selftests/bpf/progs/kprobe_multi.c @@ -54,21 +54,21 @@ static void kprobe_multi_check(void *ctx, bool is_return) if (is_return) { SET(kretprobe_test1_result, &bpf_fentry_test1, 8); - SET(kretprobe_test2_result, &bpf_fentry_test2, 7); - SET(kretprobe_test3_result, &bpf_fentry_test3, 6); - SET(kretprobe_test4_result, &bpf_fentry_test4, 5); - SET(kretprobe_test5_result, &bpf_fentry_test5, 4); - SET(kretprobe_test6_result, &bpf_fentry_test6, 3); - SET(kretprobe_test7_result, &bpf_fentry_test7, 2); + SET(kretprobe_test2_result, &bpf_fentry_test2, 2); + SET(kretprobe_test3_result, &bpf_fentry_test3, 7); + SET(kretprobe_test4_result, &bpf_fentry_test4, 6); + SET(kretprobe_test5_result, &bpf_fentry_test5, 5); + SET(kretprobe_test6_result, &bpf_fentry_test6, 4); + SET(kretprobe_test7_result, &bpf_fentry_test7, 3); SET(kretprobe_test8_result, &bpf_fentry_test8, 1); } else { SET(kprobe_test1_result, &bpf_fentry_test1, 1); - SET(kprobe_test2_result, &bpf_fentry_test2, 2); - SET(kprobe_test3_result, &bpf_fentry_test3, 3); - SET(kprobe_test4_result, &bpf_fentry_test4, 4); - SET(kprobe_test5_result, &bpf_fentry_test5, 5); - SET(kprobe_test6_result, &bpf_fentry_test6, 6); - SET(kprobe_test7_result, &bpf_fentry_test7, 7); + SET(kprobe_test2_result, &bpf_fentry_test2, 7); + SET(kprobe_test3_result, &bpf_fentry_test3, 2); + SET(kprobe_test4_result, &bpf_fentry_test4, 3); + SET(kprobe_test5_result, &bpf_fentry_test5, 4); + SET(kprobe_test6_result, &bpf_fentry_test6, 5); + SET(kprobe_test7_result, &bpf_fentry_test7, 6); SET(kprobe_test8_result, &bpf_fentry_test8, 8); } diff --git a/tools/testing/selftests/bpf/progs/local_storage_bench.c b/tools/testing/selftests/bpf/progs/local_storage_bench.c new file mode 100644 index 000000000000..2c3234c5b73a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/local_storage_bench.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +#define HASHMAP_SZ 4194304 + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 1000); + __type(key, int); + __type(value, int); + __array(values, struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, int); + }); +} array_of_local_storage_maps SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 1000); + __type(key, int); + __type(value, int); + __array(values, struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, HASHMAP_SZ); + __type(key, int); + __type(value, int); + }); +} array_of_hash_maps SEC(".maps"); + +long important_hits; +long hits; + +/* set from user-space */ +const volatile unsigned int use_hashmap; +const volatile unsigned int hashmap_num_keys; +const volatile unsigned int num_maps; +const volatile unsigned int interleave; + +struct loop_ctx { + struct task_struct *task; + long loop_hits; + long loop_important_hits; +}; + +static int do_lookup(unsigned int elem, struct loop_ctx *lctx) +{ + void *map, *inner_map; + int idx = 0; + + if (use_hashmap) + map = &array_of_hash_maps; + else + map = &array_of_local_storage_maps; + + inner_map = bpf_map_lookup_elem(map, &elem); + if (!inner_map) + return -1; + + if (use_hashmap) { + idx = bpf_get_prandom_u32() % hashmap_num_keys; + bpf_map_lookup_elem(inner_map, &idx); + } else { + bpf_task_storage_get(inner_map, lctx->task, &idx, + BPF_LOCAL_STORAGE_GET_F_CREATE); + } + + lctx->loop_hits++; + if (!elem) + lctx->loop_important_hits++; + return 0; +} + +static long loop(u32 index, void *ctx) +{ + struct loop_ctx *lctx = (struct loop_ctx *)ctx; + unsigned int map_idx = index % num_maps; + + do_lookup(map_idx, lctx); + if (interleave && map_idx % 3 == 0) + do_lookup(0, lctx); + return 0; +} + +SEC("fentry/" SYS_PREFIX "sys_getpgid") +int get_local(void *ctx) +{ + struct loop_ctx lctx; + + lctx.task = bpf_get_current_task_btf(); + lctx.loop_hits = 0; + lctx.loop_important_hits = 0; + bpf_loop(10000, &loop, &lctx, 0); + __sync_add_and_fetch(&hits, lctx.loop_hits); + __sync_add_and_fetch(&important_hits, lctx.loop_important_hits); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/local_storage_rcu_tasks_trace_bench.c b/tools/testing/selftests/bpf/progs/local_storage_rcu_tasks_trace_bench.c new file mode 100644 index 000000000000..03bf69f49075 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/local_storage_rcu_tasks_trace_bench.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, int); +} task_storage SEC(".maps"); + +long hits; +long gp_hits; +long gp_times; +long current_gp_start; +long unexpected; +bool postgp_seen; + +SEC("fentry/" SYS_PREFIX "sys_getpgid") +int get_local(void *ctx) +{ + struct task_struct *task; + int idx; + int *s; + + idx = 0; + task = bpf_get_current_task_btf(); + s = bpf_task_storage_get(&task_storage, task, &idx, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!s) + return 0; + + *s = 3; + bpf_task_storage_delete(&task_storage, task); + __sync_add_and_fetch(&hits, 1); + return 0; +} + +SEC("fentry/rcu_tasks_trace_pregp_step") +int pregp_step(struct pt_regs *ctx) +{ + current_gp_start = bpf_ktime_get_ns(); + return 0; +} + +SEC("fentry/rcu_tasks_trace_postgp") +int postgp(struct pt_regs *ctx) +{ + if (!current_gp_start && postgp_seen) { + /* Will only happen if prog tracing rcu_tasks_trace_pregp_step doesn't + * execute before this prog + */ + __sync_add_and_fetch(&unexpected, 1); + return 0; + } + + __sync_add_and_fetch(&gp_times, bpf_ktime_get_ns() - current_gp_start); + __sync_add_and_fetch(&gp_hits, 1); + current_gp_start = 0; + postgp_seen = true; + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/lsm_cgroup.c b/tools/testing/selftests/bpf/progs/lsm_cgroup.c new file mode 100644 index 000000000000..4f2d60b87b75 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/lsm_cgroup.c @@ -0,0 +1,180 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include "bpf_tracing_net.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +#ifndef AF_PACKET +#define AF_PACKET 17 +#endif + +#ifndef AF_UNIX +#define AF_UNIX 1 +#endif + +#ifndef EPERM +#define EPERM 1 +#endif + +struct { + __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE); + __type(key, __u64); + __type(value, __u64); +} cgroup_storage SEC(".maps"); + +int called_socket_post_create; +int called_socket_post_create2; +int called_socket_bind; +int called_socket_bind2; +int called_socket_alloc; +int called_socket_clone; + +static __always_inline int test_local_storage(void) +{ + __u64 *val; + + val = bpf_get_local_storage(&cgroup_storage, 0); + if (!val) + return 0; + *val += 1; + + return 1; +} + +static __always_inline int real_create(struct socket *sock, int family, + int protocol) +{ + struct sock *sk; + int prio = 123; + + /* Reject non-tx-only AF_PACKET. */ + if (family == AF_PACKET && protocol != 0) + return 0; /* EPERM */ + + sk = sock->sk; + if (!sk) + return 1; + + /* The rest of the sockets get default policy. */ + if (bpf_setsockopt(sk, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio))) + return 0; /* EPERM */ + + /* Make sure bpf_getsockopt is allowed and works. */ + prio = 0; + if (bpf_getsockopt(sk, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio))) + return 0; /* EPERM */ + if (prio != 123) + return 0; /* EPERM */ + + /* Can access cgroup local storage. */ + if (!test_local_storage()) + return 0; /* EPERM */ + + return 1; +} + +/* __cgroup_bpf_run_lsm_socket */ +SEC("lsm_cgroup/socket_post_create") +int BPF_PROG(socket_post_create, struct socket *sock, int family, + int type, int protocol, int kern) +{ + called_socket_post_create++; + return real_create(sock, family, protocol); +} + +/* __cgroup_bpf_run_lsm_socket */ +SEC("lsm_cgroup/socket_post_create") +int BPF_PROG(socket_post_create2, struct socket *sock, int family, + int type, int protocol, int kern) +{ + called_socket_post_create2++; + return real_create(sock, family, protocol); +} + +static __always_inline int real_bind(struct socket *sock, + struct sockaddr *address, + int addrlen) +{ + struct sockaddr_ll sa = {}; + + if (sock->sk->__sk_common.skc_family != AF_PACKET) + return 1; + + if (sock->sk->sk_kern_sock) + return 1; + + bpf_probe_read_kernel(&sa, sizeof(sa), address); + if (sa.sll_protocol) + return 0; /* EPERM */ + + /* Can access cgroup local storage. */ + if (!test_local_storage()) + return 0; /* EPERM */ + + return 1; +} + +/* __cgroup_bpf_run_lsm_socket */ +SEC("lsm_cgroup/socket_bind") +int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address, + int addrlen) +{ + called_socket_bind++; + return real_bind(sock, address, addrlen); +} + +/* __cgroup_bpf_run_lsm_socket */ +SEC("lsm_cgroup/socket_bind") +int BPF_PROG(socket_bind2, struct socket *sock, struct sockaddr *address, + int addrlen) +{ + called_socket_bind2++; + return real_bind(sock, address, addrlen); +} + +/* __cgroup_bpf_run_lsm_current (via bpf_lsm_current_hooks) */ +SEC("lsm_cgroup/sk_alloc_security") +int BPF_PROG(socket_alloc, struct sock *sk, int family, gfp_t priority) +{ + called_socket_alloc++; + if (family == AF_UNIX) + return 0; /* EPERM */ + + /* Can access cgroup local storage. */ + if (!test_local_storage()) + return 0; /* EPERM */ + + return 1; +} + +/* __cgroup_bpf_run_lsm_sock */ +SEC("lsm_cgroup/inet_csk_clone") +int BPF_PROG(socket_clone, struct sock *newsk, const struct request_sock *req) +{ + int prio = 234; + + if (!newsk) + return 1; + + /* Accepted request sockets get a different priority. */ + if (bpf_setsockopt(newsk, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio))) + return 1; + + /* Make sure bpf_getsockopt is allowed and works. */ + prio = 0; + if (bpf_getsockopt(newsk, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio))) + return 1; + if (prio != 234) + return 1; + + /* Can access cgroup local storage. */ + if (!test_local_storage()) + return 1; + + called_socket_clone++; + + return 1; +} diff --git a/tools/testing/selftests/bpf/progs/lsm_cgroup_nonvoid.c b/tools/testing/selftests/bpf/progs/lsm_cgroup_nonvoid.c new file mode 100644 index 000000000000..6cb0f161f417 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/lsm_cgroup_nonvoid.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +SEC("lsm_cgroup/inet_csk_clone") +int BPF_PROG(nonvoid_socket_clone, struct sock *newsk, const struct request_sock *req) +{ + /* Can not return any errors from void LSM hooks. */ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf6.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf6.c new file mode 100644 index 000000000000..41ce83da78e8 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf6.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +#define __unused __attribute__((unused)) + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} jmp_table SEC(".maps"); + +int done = 0; + +SEC("tc") +int classifier_0(struct __sk_buff *skb __unused) +{ + done = 1; + return 0; +} + +static __noinline +int subprog_tail(struct __sk_buff *skb) +{ + /* Don't propagate the constant to the caller */ + volatile int ret = 1; + + bpf_tail_call_static(skb, &jmp_table, 0); + return ret; +} + +SEC("tc") +int entry(struct __sk_buff *skb) +{ + /* Have data on stack which size is not a multiple of 8 */ + volatile char arr[1] = {}; + + return subprog_tail(skb); +} + +char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c b/tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c new file mode 100644 index 000000000000..7bb872fb22dd --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +static inline struct tcp_sock *tcp_sk(const struct sock *sk) +{ + return (struct tcp_sock *)sk; +} + +SEC("struct_ops/incompl_cong_ops_ssthresh") +__u32 BPF_PROG(incompl_cong_ops_ssthresh, struct sock *sk) +{ + return tcp_sk(sk)->snd_ssthresh; +} + +SEC("struct_ops/incompl_cong_ops_undo_cwnd") +__u32 BPF_PROG(incompl_cong_ops_undo_cwnd, struct sock *sk) +{ + return tcp_sk(sk)->snd_cwnd; +} + +SEC(".struct_ops") +struct tcp_congestion_ops incompl_cong_ops = { + /* Intentionally leaving out any of the required cong_avoid() and + * cong_control() here. + */ + .ssthresh = (void *)incompl_cong_ops_ssthresh, + .undo_cwnd = (void *)incompl_cong_ops_undo_cwnd, + .name = "bpf_incompl_ops", +}; diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c b/tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c new file mode 100644 index 000000000000..c06f4a41c21a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +SEC("struct_ops/unsupp_cong_op_get_info") +size_t BPF_PROG(unsupp_cong_op_get_info, struct sock *sk, u32 ext, int *attr, + union tcp_cc_info *info) +{ + return 0; +} + +SEC(".struct_ops") +struct tcp_congestion_ops unsupp_cong_op = { + .get_info = (void *)unsupp_cong_op_get_info, + .name = "bpf_unsupp_op", +}; diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c b/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c new file mode 100644 index 000000000000..43447704cf0e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +#define USEC_PER_SEC 1000000UL + +#define min(a, b) ((a) < (b) ? (a) : (b)) + +static inline struct tcp_sock *tcp_sk(const struct sock *sk) +{ + return (struct tcp_sock *)sk; +} + +SEC("struct_ops/write_sk_pacing_init") +void BPF_PROG(write_sk_pacing_init, struct sock *sk) +{ +#ifdef ENABLE_ATOMICS_TESTS + __sync_bool_compare_and_swap(&sk->sk_pacing_status, SK_PACING_NONE, + SK_PACING_NEEDED); +#else + sk->sk_pacing_status = SK_PACING_NEEDED; +#endif +} + +SEC("struct_ops/write_sk_pacing_cong_control") +void BPF_PROG(write_sk_pacing_cong_control, struct sock *sk, + const struct rate_sample *rs) +{ + const struct tcp_sock *tp = tcp_sk(sk); + unsigned long rate = + ((tp->snd_cwnd * tp->mss_cache * USEC_PER_SEC) << 3) / + (tp->srtt_us ?: 1U << 3); + sk->sk_pacing_rate = min(rate, sk->sk_max_pacing_rate); +} + +SEC("struct_ops/write_sk_pacing_ssthresh") +__u32 BPF_PROG(write_sk_pacing_ssthresh, struct sock *sk) +{ + return tcp_sk(sk)->snd_ssthresh; +} + +SEC("struct_ops/write_sk_pacing_undo_cwnd") +__u32 BPF_PROG(write_sk_pacing_undo_cwnd, struct sock *sk) +{ + return tcp_sk(sk)->snd_cwnd; +} + +SEC(".struct_ops") +struct tcp_congestion_ops write_sk_pacing = { + .init = (void *)write_sk_pacing_init, + .cong_control = (void *)write_sk_pacing_cong_control, + .ssthresh = (void *)write_sk_pacing_ssthresh, + .undo_cwnd = (void *)write_sk_pacing_undo_cwnd, + .name = "bpf_w_sk_pacing", +}; diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c index ce9acf4db8d2..a1e45fec8938 100644 --- a/tools/testing/selftests/bpf/progs/test_attach_probe.c +++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c @@ -1,10 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2017 Facebook -#include <linux/ptrace.h> -#include <linux/bpf.h> +#include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> #include "bpf_misc.h" int kprobe_res = 0; @@ -17,6 +17,11 @@ int uprobe_byname_res = 0; int uretprobe_byname_res = 0; int uprobe_byname2_res = 0; int uretprobe_byname2_res = 0; +int uprobe_byname3_sleepable_res = 0; +int uprobe_byname3_res = 0; +int uretprobe_byname3_sleepable_res = 0; +int uretprobe_byname3_res = 0; +void *user_ptr = 0; SEC("kprobe") int handle_kprobe(struct pt_regs *ctx) @@ -25,13 +30,24 @@ int handle_kprobe(struct pt_regs *ctx) return 0; } -SEC("kprobe/" SYS_PREFIX "sys_nanosleep") -int BPF_KPROBE(handle_kprobe_auto) +SEC("ksyscall/nanosleep") +int BPF_KSYSCALL(handle_kprobe_auto, struct __kernel_timespec *req, struct __kernel_timespec *rem) { kprobe2_res = 11; return 0; } +/** + * This program will be manually made sleepable on the userspace side + * and should thus be unattachable. + */ +SEC("kprobe/" SYS_PREFIX "sys_nanosleep") +int handle_kprobe_sleepable(struct pt_regs *ctx) +{ + kprobe_res = 2; + return 0; +} + SEC("kretprobe") int handle_kretprobe(struct pt_regs *ctx) { @@ -39,11 +55,11 @@ int handle_kretprobe(struct pt_regs *ctx) return 0; } -SEC("kretprobe/" SYS_PREFIX "sys_nanosleep") -int BPF_KRETPROBE(handle_kretprobe_auto) +SEC("kretsyscall/nanosleep") +int BPF_KRETPROBE(handle_kretprobe_auto, int ret) { kretprobe2_res = 22; - return 0; + return ret; } SEC("uprobe") @@ -93,4 +109,47 @@ int handle_uretprobe_byname2(struct pt_regs *ctx) return 0; } +static __always_inline bool verify_sleepable_user_copy(void) +{ + char data[9]; + + bpf_copy_from_user(data, sizeof(data), user_ptr); + return bpf_strncmp(data, sizeof(data), "test_data") == 0; +} + +SEC("uprobe.s//proc/self/exe:trigger_func3") +int handle_uprobe_byname3_sleepable(struct pt_regs *ctx) +{ + if (verify_sleepable_user_copy()) + uprobe_byname3_sleepable_res = 9; + return 0; +} + +/** + * same target as the uprobe.s above to force sleepable and non-sleepable + * programs in the same bpf_prog_array + */ +SEC("uprobe//proc/self/exe:trigger_func3") +int handle_uprobe_byname3(struct pt_regs *ctx) +{ + uprobe_byname3_res = 10; + return 0; +} + +SEC("uretprobe.s//proc/self/exe:trigger_func3") +int handle_uretprobe_byname3_sleepable(struct pt_regs *ctx) +{ + if (verify_sleepable_user_copy()) + uretprobe_byname3_sleepable_res = 11; + return 0; +} + +SEC("uretprobe//proc/self/exe:trigger_func3") +int handle_uretprobe_byname3(struct pt_regs *ctx) +{ + uretprobe_byname3_res = 12; + return 0; +} + + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c index f00a9731930e..196cd8dfe42a 100644 --- a/tools/testing/selftests/bpf/progs/test_bpf_nf.c +++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c @@ -8,6 +8,8 @@ #define EINVAL 22 #define ENOENT 2 +extern unsigned long CONFIG_HZ __kconfig; + int test_einval_bpf_tuple = 0; int test_einval_reserved = 0; int test_einval_netns_id = 0; @@ -16,6 +18,11 @@ int test_eproto_l4proto = 0; int test_enonet_netns_id = 0; int test_enoent_lookup = 0; int test_eafnosupport = 0; +int test_alloc_entry = -EINVAL; +int test_insert_entry = -EAFNOSUPPORT; +int test_succ_lookup = -ENOENT; +u32 test_delta_timeout = 0; +u32 test_status = 0; struct nf_conn; @@ -26,31 +33,44 @@ struct bpf_ct_opts___local { u8 reserved[3]; } __attribute__((preserve_access_index)); +struct nf_conn *bpf_xdp_ct_alloc(struct xdp_md *, struct bpf_sock_tuple *, u32, + struct bpf_ct_opts___local *, u32) __ksym; struct nf_conn *bpf_xdp_ct_lookup(struct xdp_md *, struct bpf_sock_tuple *, u32, struct bpf_ct_opts___local *, u32) __ksym; +struct nf_conn *bpf_skb_ct_alloc(struct __sk_buff *, struct bpf_sock_tuple *, u32, + struct bpf_ct_opts___local *, u32) __ksym; struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *, struct bpf_sock_tuple *, u32, struct bpf_ct_opts___local *, u32) __ksym; +struct nf_conn *bpf_ct_insert_entry(struct nf_conn *) __ksym; void bpf_ct_release(struct nf_conn *) __ksym; +void bpf_ct_set_timeout(struct nf_conn *, u32) __ksym; +int bpf_ct_change_timeout(struct nf_conn *, u32) __ksym; +int bpf_ct_set_status(struct nf_conn *, u32) __ksym; +int bpf_ct_change_status(struct nf_conn *, u32) __ksym; static __always_inline void -nf_ct_test(struct nf_conn *(*func)(void *, struct bpf_sock_tuple *, u32, - struct bpf_ct_opts___local *, u32), +nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32, + struct bpf_ct_opts___local *, u32), + struct nf_conn *(*alloc_fn)(void *, struct bpf_sock_tuple *, u32, + struct bpf_ct_opts___local *, u32), void *ctx) { struct bpf_ct_opts___local opts_def = { .l4proto = IPPROTO_TCP, .netns_id = -1 }; struct bpf_sock_tuple bpf_tuple; struct nf_conn *ct; + int err; __builtin_memset(&bpf_tuple, 0, sizeof(bpf_tuple.ipv4)); - ct = func(ctx, NULL, 0, &opts_def, sizeof(opts_def)); + ct = lookup_fn(ctx, NULL, 0, &opts_def, sizeof(opts_def)); if (ct) bpf_ct_release(ct); else test_einval_bpf_tuple = opts_def.error; opts_def.reserved[0] = 1; - ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def)); + ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, + sizeof(opts_def)); opts_def.reserved[0] = 0; opts_def.l4proto = IPPROTO_TCP; if (ct) @@ -59,21 +79,24 @@ nf_ct_test(struct nf_conn *(*func)(void *, struct bpf_sock_tuple *, u32, test_einval_reserved = opts_def.error; opts_def.netns_id = -2; - ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def)); + ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, + sizeof(opts_def)); opts_def.netns_id = -1; if (ct) bpf_ct_release(ct); else test_einval_netns_id = opts_def.error; - ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def) - 1); + ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, + sizeof(opts_def) - 1); if (ct) bpf_ct_release(ct); else test_einval_len_opts = opts_def.error; opts_def.l4proto = IPPROTO_ICMP; - ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def)); + ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, + sizeof(opts_def)); opts_def.l4proto = IPPROTO_TCP; if (ct) bpf_ct_release(ct); @@ -81,37 +104,75 @@ nf_ct_test(struct nf_conn *(*func)(void *, struct bpf_sock_tuple *, u32, test_eproto_l4proto = opts_def.error; opts_def.netns_id = 0xf00f; - ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def)); + ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, + sizeof(opts_def)); opts_def.netns_id = -1; if (ct) bpf_ct_release(ct); else test_enonet_netns_id = opts_def.error; - ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, sizeof(opts_def)); + ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, + sizeof(opts_def)); if (ct) bpf_ct_release(ct); else test_enoent_lookup = opts_def.error; - ct = func(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4) - 1, &opts_def, sizeof(opts_def)); + ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4) - 1, &opts_def, + sizeof(opts_def)); if (ct) bpf_ct_release(ct); else test_eafnosupport = opts_def.error; + + bpf_tuple.ipv4.saddr = bpf_get_prandom_u32(); /* src IP */ + bpf_tuple.ipv4.daddr = bpf_get_prandom_u32(); /* dst IP */ + bpf_tuple.ipv4.sport = bpf_get_prandom_u32(); /* src port */ + bpf_tuple.ipv4.dport = bpf_get_prandom_u32(); /* dst port */ + + ct = alloc_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def, + sizeof(opts_def)); + if (ct) { + struct nf_conn *ct_ins; + + bpf_ct_set_timeout(ct, 10000); + bpf_ct_set_status(ct, IPS_CONFIRMED); + + ct_ins = bpf_ct_insert_entry(ct); + if (ct_ins) { + struct nf_conn *ct_lk; + + ct_lk = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), + &opts_def, sizeof(opts_def)); + if (ct_lk) { + /* update ct entry timeout */ + bpf_ct_change_timeout(ct_lk, 10000); + test_delta_timeout = ct_lk->timeout - bpf_jiffies64(); + test_delta_timeout /= CONFIG_HZ; + test_status = IPS_SEEN_REPLY; + bpf_ct_change_status(ct_lk, IPS_SEEN_REPLY); + bpf_ct_release(ct_lk); + test_succ_lookup = 0; + } + bpf_ct_release(ct_ins); + test_insert_entry = 0; + } + test_alloc_entry = 0; + } } SEC("xdp") int nf_xdp_ct_test(struct xdp_md *ctx) { - nf_ct_test((void *)bpf_xdp_ct_lookup, ctx); + nf_ct_test((void *)bpf_xdp_ct_lookup, (void *)bpf_xdp_ct_alloc, ctx); return 0; } SEC("tc") int nf_skb_ct_test(struct __sk_buff *ctx) { - nf_ct_test((void *)bpf_skb_ct_lookup, ctx); + nf_ct_test((void *)bpf_skb_ct_lookup, (void *)bpf_skb_ct_alloc, ctx); return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c b/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c new file mode 100644 index 000000000000..bf79af15c808 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +struct nf_conn; + +struct bpf_ct_opts___local { + s32 netns_id; + s32 error; + u8 l4proto; + u8 reserved[3]; +} __attribute__((preserve_access_index)); + +struct nf_conn *bpf_skb_ct_alloc(struct __sk_buff *, struct bpf_sock_tuple *, u32, + struct bpf_ct_opts___local *, u32) __ksym; +struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *, struct bpf_sock_tuple *, u32, + struct bpf_ct_opts___local *, u32) __ksym; +struct nf_conn *bpf_ct_insert_entry(struct nf_conn *) __ksym; +void bpf_ct_release(struct nf_conn *) __ksym; +void bpf_ct_set_timeout(struct nf_conn *, u32) __ksym; +int bpf_ct_change_timeout(struct nf_conn *, u32) __ksym; +int bpf_ct_set_status(struct nf_conn *, u32) __ksym; +int bpf_ct_change_status(struct nf_conn *, u32) __ksym; + +SEC("?tc") +int alloc_release(struct __sk_buff *ctx) +{ + struct bpf_ct_opts___local opts = {}; + struct bpf_sock_tuple tup = {}; + struct nf_conn *ct; + + ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts)); + if (!ct) + return 0; + bpf_ct_release(ct); + return 0; +} + +SEC("?tc") +int insert_insert(struct __sk_buff *ctx) +{ + struct bpf_ct_opts___local opts = {}; + struct bpf_sock_tuple tup = {}; + struct nf_conn *ct; + + ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts)); + if (!ct) + return 0; + ct = bpf_ct_insert_entry(ct); + if (!ct) + return 0; + ct = bpf_ct_insert_entry(ct); + return 0; +} + +SEC("?tc") +int lookup_insert(struct __sk_buff *ctx) +{ + struct bpf_ct_opts___local opts = {}; + struct bpf_sock_tuple tup = {}; + struct nf_conn *ct; + + ct = bpf_skb_ct_lookup(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts)); + if (!ct) + return 0; + bpf_ct_insert_entry(ct); + return 0; +} + +SEC("?tc") +int set_timeout_after_insert(struct __sk_buff *ctx) +{ + struct bpf_ct_opts___local opts = {}; + struct bpf_sock_tuple tup = {}; + struct nf_conn *ct; + + ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts)); + if (!ct) + return 0; + ct = bpf_ct_insert_entry(ct); + if (!ct) + return 0; + bpf_ct_set_timeout(ct, 0); + return 0; +} + +SEC("?tc") +int set_status_after_insert(struct __sk_buff *ctx) +{ + struct bpf_ct_opts___local opts = {}; + struct bpf_sock_tuple tup = {}; + struct nf_conn *ct; + + ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts)); + if (!ct) + return 0; + ct = bpf_ct_insert_entry(ct); + if (!ct) + return 0; + bpf_ct_set_status(ct, 0); + return 0; +} + +SEC("?tc") +int change_timeout_after_alloc(struct __sk_buff *ctx) +{ + struct bpf_ct_opts___local opts = {}; + struct bpf_sock_tuple tup = {}; + struct nf_conn *ct; + + ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts)); + if (!ct) + return 0; + bpf_ct_change_timeout(ct, 0); + return 0; +} + +SEC("?tc") +int change_status_after_alloc(struct __sk_buff *ctx) +{ + struct bpf_ct_opts___local opts = {}; + struct bpf_sock_tuple tup = {}; + struct nf_conn *ct; + + ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts)); + if (!ct) + return 0; + bpf_ct_change_status(ct, 0); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_btf_haskv.c b/tools/testing/selftests/bpf/progs/test_btf_haskv.c deleted file mode 100644 index 07c94df13660..000000000000 --- a/tools/testing/selftests/bpf/progs/test_btf_haskv.c +++ /dev/null @@ -1,51 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (c) 2018 Facebook */ -#include <linux/bpf.h> -#include <bpf/bpf_helpers.h> -#include "bpf_legacy.h" - -struct ipv_counts { - unsigned int v4; - unsigned int v6; -}; - -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -struct bpf_map_def SEC("maps") btf_map = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(struct ipv_counts), - .max_entries = 4, -}; -#pragma GCC diagnostic pop - -BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts); - -__attribute__((noinline)) -int test_long_fname_2(void) -{ - struct ipv_counts *counts; - int key = 0; - - counts = bpf_map_lookup_elem(&btf_map, &key); - if (!counts) - return 0; - - counts->v6++; - - return 0; -} - -__attribute__((noinline)) -int test_long_fname_1(void) -{ - return test_long_fname_2(); -} - -SEC("dummy_tracepoint") -int _dummy_tracepoint(void *arg) -{ - return test_long_fname_1(); -} - -char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_btf_newkv.c b/tools/testing/selftests/bpf/progs/test_btf_newkv.c index 762671a2e90c..251854a041b5 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_newkv.c +++ b/tools/testing/selftests/bpf/progs/test_btf_newkv.c @@ -9,19 +9,6 @@ struct ipv_counts { unsigned int v6; }; -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" -/* just to validate we can handle maps in multiple sections */ -struct bpf_map_def SEC("maps") btf_map_legacy = { - .type = BPF_MAP_TYPE_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(long long), - .max_entries = 4, -}; -#pragma GCC diagnostic pop - -BPF_ANNOTATE_KV_PAIR(btf_map_legacy, int, struct ipv_counts); - struct { __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, 4); @@ -41,11 +28,6 @@ int test_long_fname_2(void) counts->v6++; - /* just verify we can reference both maps */ - counts = bpf_map_lookup_elem(&btf_map_legacy, &key); - if (!counts) - return 0; - return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_core_extern.c b/tools/testing/selftests/bpf/progs/test_core_extern.c index 3ac3603ad53d..a3c7c1042f35 100644 --- a/tools/testing/selftests/bpf/progs/test_core_extern.c +++ b/tools/testing/selftests/bpf/progs/test_core_extern.c @@ -11,6 +11,7 @@ static int (*bpf_missing_helper)(const void *arg1, int arg2) = (void *) 999; extern int LINUX_KERNEL_VERSION __kconfig; +extern int LINUX_UNKNOWN_VIRTUAL_EXTERN __kconfig __weak; extern bool CONFIG_BPF_SYSCALL __kconfig; /* strong */ extern enum libbpf_tristate CONFIG_TRISTATE __kconfig __weak; extern bool CONFIG_BOOL __kconfig __weak; @@ -22,6 +23,7 @@ extern const char CONFIG_STR[8] __kconfig __weak; extern uint64_t CONFIG_MISSING __kconfig __weak; uint64_t kern_ver = -1; +uint64_t unkn_virt_val = -1; uint64_t bpf_syscall = -1; uint64_t tristate_val = -1; uint64_t bool_val = -1; @@ -38,6 +40,7 @@ int handle_sys_enter(struct pt_regs *ctx) int i; kern_ver = LINUX_KERNEL_VERSION; + unkn_virt_val = LINUX_UNKNOWN_VIRTUAL_EXTERN; bpf_syscall = CONFIG_BPF_SYSCALL; tristate_val = CONFIG_TRISTATE; bool_val = CONFIG_BOOL; diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_enum64val.c b/tools/testing/selftests/bpf/progs/test_core_reloc_enum64val.c new file mode 100644 index 000000000000..63147fbfae6e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_enum64val.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ + +#include <linux/bpf.h> +#include <stdint.h> +#include <stdbool.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +char _license[] SEC("license") = "GPL"; + +struct { + char in[256]; + char out[256]; + bool skip; +} data = {}; + +enum named_unsigned_enum64 { + UNSIGNED_ENUM64_VAL1 = 0x1ffffffffULL, + UNSIGNED_ENUM64_VAL2 = 0x2ffffffffULL, + UNSIGNED_ENUM64_VAL3 = 0x3ffffffffULL, +}; + +enum named_signed_enum64 { + SIGNED_ENUM64_VAL1 = 0x1ffffffffLL, + SIGNED_ENUM64_VAL2 = -2, + SIGNED_ENUM64_VAL3 = 0x3ffffffffLL, +}; + +struct core_reloc_enum64val_output { + bool unsigned_val1_exists; + bool unsigned_val2_exists; + bool unsigned_val3_exists; + bool signed_val1_exists; + bool signed_val2_exists; + bool signed_val3_exists; + + long unsigned_val1; + long unsigned_val2; + long signed_val1; + long signed_val2; +}; + +SEC("raw_tracepoint/sys_enter") +int test_core_enum64val(void *ctx) +{ +#if __clang_major__ >= 15 + struct core_reloc_enum64val_output *out = (void *)&data.out; + enum named_unsigned_enum64 named_unsigned = 0; + enum named_signed_enum64 named_signed = 0; + + out->unsigned_val1_exists = bpf_core_enum_value_exists(named_unsigned, UNSIGNED_ENUM64_VAL1); + out->unsigned_val2_exists = bpf_core_enum_value_exists(enum named_unsigned_enum64, UNSIGNED_ENUM64_VAL2); + out->unsigned_val3_exists = bpf_core_enum_value_exists(enum named_unsigned_enum64, UNSIGNED_ENUM64_VAL3); + out->signed_val1_exists = bpf_core_enum_value_exists(named_signed, SIGNED_ENUM64_VAL1); + out->signed_val2_exists = bpf_core_enum_value_exists(enum named_signed_enum64, SIGNED_ENUM64_VAL2); + out->signed_val3_exists = bpf_core_enum_value_exists(enum named_signed_enum64, SIGNED_ENUM64_VAL3); + + out->unsigned_val1 = bpf_core_enum_value(named_unsigned, UNSIGNED_ENUM64_VAL1); + out->unsigned_val2 = bpf_core_enum_value(named_unsigned, UNSIGNED_ENUM64_VAL2); + out->signed_val1 = bpf_core_enum_value(named_signed, SIGNED_ENUM64_VAL1); + out->signed_val2 = bpf_core_enum_value(named_signed, SIGNED_ENUM64_VAL2); + /* NAMED_ENUM64_VAL3 value is optional */ + +#else + data.skip = true; +#endif + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c index 145028b52ad8..a17dd83eae67 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c @@ -21,6 +21,7 @@ struct core_reloc_kernel_output { /* we have test_progs[-flavor], so cut flavor part */ char comm[sizeof("test_progs")]; int comm_len; + bool local_task_struct_matches; }; struct task_struct { @@ -30,11 +31,25 @@ struct task_struct { struct task_struct *group_leader; }; +struct mm_struct___wrong { + int abc_whatever_should_not_exist; +}; + +struct task_struct___local { + int pid; + struct mm_struct___wrong *mm; +}; + #define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src) SEC("raw_tracepoint/sys_enter") int test_core_kernel(void *ctx) { + /* Support for the BPF_TYPE_MATCHES argument to the + * __builtin_preserve_type_info builtin was added at some point during + * development of clang 15 and it's what we require for this test. + */ +#if __has_builtin(__builtin_preserve_type_info) && __clang_major__ >= 15 struct task_struct *task = (void *)bpf_get_current_task(); struct core_reloc_kernel_output *out = (void *)&data.out; uint64_t pid_tgid = bpf_get_current_pid_tgid(); @@ -93,6 +108,10 @@ int test_core_kernel(void *ctx) group_leader, group_leader, group_leader, group_leader, comm); + out->local_task_struct_matches = bpf_core_type_matches(struct task_struct___local); +#else + data.skip = true; +#endif return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c b/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c index fb60f8195c53..2edb4df35e6e 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c @@ -19,6 +19,14 @@ struct a_struct { int x; }; +struct a_complex_struct { + union { + struct a_struct *a; + void *b; + } x; + volatile long y; +}; + union a_union { int y; int z; @@ -43,6 +51,7 @@ typedef int int_typedef; typedef enum { TYPEDEF_ENUM_VAL1, TYPEDEF_ENUM_VAL2 } enum_typedef; typedef void *void_ptr_typedef; +typedef int *restrict restrict_ptr_typedef; typedef int (*func_proto_typedef)(long); @@ -50,6 +59,7 @@ typedef char arr_typedef[20]; struct core_reloc_type_based_output { bool struct_exists; + bool complex_struct_exists; bool union_exists; bool enum_exists; bool typedef_named_struct_exists; @@ -58,9 +68,24 @@ struct core_reloc_type_based_output { bool typedef_int_exists; bool typedef_enum_exists; bool typedef_void_ptr_exists; + bool typedef_restrict_ptr_exists; bool typedef_func_proto_exists; bool typedef_arr_exists; + bool struct_matches; + bool complex_struct_matches; + bool union_matches; + bool enum_matches; + bool typedef_named_struct_matches; + bool typedef_anon_struct_matches; + bool typedef_struct_ptr_matches; + bool typedef_int_matches; + bool typedef_enum_matches; + bool typedef_void_ptr_matches; + bool typedef_restrict_ptr_matches; + bool typedef_func_proto_matches; + bool typedef_arr_matches; + int struct_sz; int union_sz; int enum_sz; @@ -77,10 +102,17 @@ struct core_reloc_type_based_output { SEC("raw_tracepoint/sys_enter") int test_core_type_based(void *ctx) { -#if __has_builtin(__builtin_preserve_type_info) + /* Support for the BPF_TYPE_MATCHES argument to the + * __builtin_preserve_type_info builtin was added at some point during + * development of clang 15 and it's what we require for this test. Part of it + * could run with merely __builtin_preserve_type_info (which could be checked + * separately), but we have to find an upper bound. + */ +#if __has_builtin(__builtin_preserve_type_info) && __clang_major__ >= 15 struct core_reloc_type_based_output *out = (void *)&data.out; out->struct_exists = bpf_core_type_exists(struct a_struct); + out->complex_struct_exists = bpf_core_type_exists(struct a_complex_struct); out->union_exists = bpf_core_type_exists(union a_union); out->enum_exists = bpf_core_type_exists(enum an_enum); out->typedef_named_struct_exists = bpf_core_type_exists(named_struct_typedef); @@ -89,9 +121,24 @@ int test_core_type_based(void *ctx) out->typedef_int_exists = bpf_core_type_exists(int_typedef); out->typedef_enum_exists = bpf_core_type_exists(enum_typedef); out->typedef_void_ptr_exists = bpf_core_type_exists(void_ptr_typedef); + out->typedef_restrict_ptr_exists = bpf_core_type_exists(restrict_ptr_typedef); out->typedef_func_proto_exists = bpf_core_type_exists(func_proto_typedef); out->typedef_arr_exists = bpf_core_type_exists(arr_typedef); + out->struct_matches = bpf_core_type_matches(struct a_struct); + out->complex_struct_matches = bpf_core_type_matches(struct a_complex_struct); + out->union_matches = bpf_core_type_matches(union a_union); + out->enum_matches = bpf_core_type_matches(enum an_enum); + out->typedef_named_struct_matches = bpf_core_type_matches(named_struct_typedef); + out->typedef_anon_struct_matches = bpf_core_type_matches(anon_struct_typedef); + out->typedef_struct_ptr_matches = bpf_core_type_matches(struct_ptr_typedef); + out->typedef_int_matches = bpf_core_type_matches(int_typedef); + out->typedef_enum_matches = bpf_core_type_matches(enum_typedef); + out->typedef_void_ptr_matches = bpf_core_type_matches(void_ptr_typedef); + out->typedef_restrict_ptr_matches = bpf_core_type_matches(restrict_ptr_typedef); + out->typedef_func_proto_matches = bpf_core_type_matches(func_proto_typedef); + out->typedef_arr_matches = bpf_core_type_matches(arr_typedef); + out->struct_sz = bpf_core_type_size(struct a_struct); out->union_sz = bpf_core_type_size(union a_union); out->enum_sz = bpf_core_type_size(enum an_enum); diff --git a/tools/testing/selftests/bpf/progs/test_probe_user.c b/tools/testing/selftests/bpf/progs/test_probe_user.c index 702578a5e496..a8e501af9604 100644 --- a/tools/testing/selftests/bpf/progs/test_probe_user.c +++ b/tools/testing/selftests/bpf/progs/test_probe_user.c @@ -1,37 +1,47 @@ // SPDX-License-Identifier: GPL-2.0 - -#include <linux/ptrace.h> -#include <linux/bpf.h> - -#include <netinet/in.h> - +#include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> #include "bpf_misc.h" static struct sockaddr_in old; -SEC("kprobe/" SYS_PREFIX "sys_connect") -int BPF_KPROBE(handle_sys_connect) +static int handle_sys_connect_common(struct sockaddr_in *uservaddr) { -#if SYSCALL_WRAPPER == 1 - struct pt_regs *real_regs; -#endif struct sockaddr_in new; - void *ptr; -#if SYSCALL_WRAPPER == 0 - ptr = (void *)PT_REGS_PARM2(ctx); -#else - real_regs = (struct pt_regs *)PT_REGS_PARM1(ctx); - bpf_probe_read_kernel(&ptr, sizeof(ptr), &PT_REGS_PARM2(real_regs)); + bpf_probe_read_user(&old, sizeof(old), uservaddr); + __builtin_memset(&new, 0xab, sizeof(new)); + bpf_probe_write_user(uservaddr, &new, sizeof(new)); + + return 0; +} + +SEC("ksyscall/connect") +int BPF_KSYSCALL(handle_sys_connect, int fd, struct sockaddr_in *uservaddr, + int addrlen) +{ + return handle_sys_connect_common(uservaddr); +} + +#if defined(bpf_target_s390) +#ifndef SYS_CONNECT +#define SYS_CONNECT 3 #endif - bpf_probe_read_user(&old, sizeof(old), ptr); - __builtin_memset(&new, 0xab, sizeof(new)); - bpf_probe_write_user(ptr, &new, sizeof(new)); +SEC("ksyscall/socketcall") +int BPF_KSYSCALL(handle_sys_socketcall, int call, unsigned long *args) +{ + if (call == SYS_CONNECT) { + struct sockaddr_in *uservaddr; + + bpf_probe_read_user(&uservaddr, sizeof(uservaddr), &args[1]); + return handle_sys_connect_common(uservaddr); + } return 0; } +#endif char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_skeleton.c b/tools/testing/selftests/bpf/progs/test_skeleton.c index 1b1187d2967b..1a4e93f6d9df 100644 --- a/tools/testing/selftests/bpf/progs/test_skeleton.c +++ b/tools/testing/selftests/bpf/progs/test_skeleton.c @@ -51,6 +51,8 @@ int out_dynarr[4] SEC(".data.dyn") = { 1, 2, 3, 4 }; int read_mostly_var __read_mostly; int out_mostly_var; +char huge_arr[16 * 1024 * 1024]; + SEC("raw_tp/sys_enter") int handler(const void *ctx) { @@ -71,6 +73,8 @@ int handler(const void *ctx) out_mostly_var = read_mostly_var; + huge_arr[sizeof(huge_arr) - 1] = 123; + return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_tc_dtime.c b/tools/testing/selftests/bpf/progs/test_tc_dtime.c index 06f300d06dbd..b596479a9ebe 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_dtime.c +++ b/tools/testing/selftests/bpf/progs/test_tc_dtime.c @@ -11,6 +11,8 @@ #include <linux/in.h> #include <linux/ip.h> #include <linux/ipv6.h> +#include <linux/tcp.h> +#include <linux/udp.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> #include <sys/socket.h> @@ -115,6 +117,19 @@ static bool bpf_fwd(void) return test < TCP_IP4_RT_FWD; } +static __u8 get_proto(void) +{ + switch (test) { + case UDP_IP4: + case UDP_IP6: + case UDP_IP4_RT_FWD: + case UDP_IP6_RT_FWD: + return IPPROTO_UDP; + default: + return IPPROTO_TCP; + } +} + /* -1: parse error: TC_ACT_SHOT * 0: not testing traffic: TC_ACT_OK * >0: first byte is the inet_proto, second byte has the netns @@ -122,11 +137,16 @@ static bool bpf_fwd(void) */ static int skb_get_type(struct __sk_buff *skb) { + __u16 dst_ns_port = __bpf_htons(50000 + test); void *data_end = ctx_ptr(skb->data_end); void *data = ctx_ptr(skb->data); __u8 inet_proto = 0, ns = 0; struct ipv6hdr *ip6h; + __u16 sport, dport; struct iphdr *iph; + struct tcphdr *th; + struct udphdr *uh; + void *trans; switch (skb->protocol) { case __bpf_htons(ETH_P_IP): @@ -138,6 +158,7 @@ static int skb_get_type(struct __sk_buff *skb) else if (iph->saddr == ip4_dst) ns = DST_NS; inet_proto = iph->protocol; + trans = iph + 1; break; case __bpf_htons(ETH_P_IPV6): ip6h = data + sizeof(struct ethhdr); @@ -148,15 +169,43 @@ static int skb_get_type(struct __sk_buff *skb) else if (v6_equal(ip6h->saddr, (struct in6_addr)ip6_dst)) ns = DST_NS; inet_proto = ip6h->nexthdr; + trans = ip6h + 1; break; default: return 0; } - if ((inet_proto != IPPROTO_TCP && inet_proto != IPPROTO_UDP) || !ns) + /* skb is not from src_ns or dst_ns. + * skb is not the testing IPPROTO. + */ + if (!ns || inet_proto != get_proto()) return 0; - return (ns << 8 | inet_proto); + switch (inet_proto) { + case IPPROTO_TCP: + th = trans; + if (th + 1 > data_end) + return -1; + sport = th->source; + dport = th->dest; + break; + case IPPROTO_UDP: + uh = trans; + if (uh + 1 > data_end) + return -1; + sport = uh->source; + dport = uh->dest; + break; + default: + return 0; + } + + /* The skb is the testing traffic */ + if ((ns == SRC_NS && dport == dst_ns_port) || + (ns == DST_NS && sport == dst_ns_port)) + return (ns << 8 | inet_proto); + + return 0; } /* format: direction@iface@netns diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index 17f2f325b3f3..df0673c4ecbe 100644 --- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -14,15 +14,24 @@ #include <linux/if_packet.h> #include <linux/ip.h> #include <linux/ipv6.h> +#include <linux/icmp.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/pkt_cls.h> #include <linux/erspan.h> +#include <linux/udp.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> #define log_err(__ret) bpf_printk("ERROR line:%d ret:%d\n", __LINE__, __ret) +#define VXLAN_UDP_PORT 4789 + +/* Only IPv4 address assigned to veth1. + * 172.16.1.200 + */ +#define ASSIGNED_ADDR_VETH1 0xac1001c8 + struct geneve_opt { __be16 opt_class; __u8 type; @@ -33,6 +42,11 @@ struct geneve_opt { __u8 opt_data[8]; /* hard-coded to 8 byte */ }; +struct vxlanhdr { + __be32 vx_flags; + __be32 vx_vni; +} __attribute__((packed)); + struct vxlan_metadata { __u32 gbp; }; @@ -369,14 +383,8 @@ int vxlan_get_tunnel_src(struct __sk_buff *skb) int ret; struct bpf_tunnel_key key; struct vxlan_metadata md; + __u32 orig_daddr; __u32 index = 0; - __u32 *local_ip = NULL; - - local_ip = bpf_map_lookup_elem(&local_ip_map, &index); - if (!local_ip) { - log_err(ret); - return TC_ACT_SHOT; - } ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); if (ret < 0) { @@ -390,11 +398,10 @@ int vxlan_get_tunnel_src(struct __sk_buff *skb) return TC_ACT_SHOT; } - if (key.local_ipv4 != *local_ip || md.gbp != 0x800FF) { + if (key.local_ipv4 != ASSIGNED_ADDR_VETH1 || md.gbp != 0x800FF) { bpf_printk("vxlan key %d local ip 0x%x remote ip 0x%x gbp 0x%x\n", key.tunnel_id, key.local_ipv4, key.remote_ipv4, md.gbp); - bpf_printk("local_ip 0x%x\n", *local_ip); log_err(ret); return TC_ACT_SHOT; } @@ -403,6 +410,61 @@ int vxlan_get_tunnel_src(struct __sk_buff *skb) } SEC("tc") +int veth_set_outer_dst(struct __sk_buff *skb) +{ + struct ethhdr *eth = (struct ethhdr *)(long)skb->data; + __u32 assigned_ip = bpf_htonl(ASSIGNED_ADDR_VETH1); + void *data_end = (void *)(long)skb->data_end; + struct udphdr *udph; + struct iphdr *iph; + __u32 index = 0; + int ret = 0; + int shrink; + __s64 csum; + + if ((void *)eth + sizeof(*eth) > data_end) { + log_err(ret); + return TC_ACT_SHOT; + } + + if (eth->h_proto != bpf_htons(ETH_P_IP)) + return TC_ACT_OK; + + iph = (struct iphdr *)(eth + 1); + if ((void *)iph + sizeof(*iph) > data_end) { + log_err(ret); + return TC_ACT_SHOT; + } + if (iph->protocol != IPPROTO_UDP) + return TC_ACT_OK; + + udph = (struct udphdr *)(iph + 1); + if ((void *)udph + sizeof(*udph) > data_end) { + log_err(ret); + return TC_ACT_SHOT; + } + if (udph->dest != bpf_htons(VXLAN_UDP_PORT)) + return TC_ACT_OK; + + if (iph->daddr != assigned_ip) { + csum = bpf_csum_diff(&iph->daddr, sizeof(__u32), &assigned_ip, + sizeof(__u32), 0); + if (bpf_skb_store_bytes(skb, ETH_HLEN + offsetof(struct iphdr, daddr), + &assigned_ip, sizeof(__u32), 0) < 0) { + log_err(ret); + return TC_ACT_SHOT; + } + if (bpf_l3_csum_replace(skb, ETH_HLEN + offsetof(struct iphdr, check), + 0, csum, 0) < 0) { + log_err(ret); + return TC_ACT_SHOT; + } + bpf_skb_change_type(skb, PACKET_HOST); + } + return TC_ACT_OK; +} + +SEC("tc") int ip6vxlan_set_tunnel_dst(struct __sk_buff *skb) { struct bpf_tunnel_key key; diff --git a/tools/testing/selftests/bpf/progs/test_varlen.c b/tools/testing/selftests/bpf/progs/test_varlen.c index 913acdffd90f..3987ff174f1f 100644 --- a/tools/testing/selftests/bpf/progs/test_varlen.c +++ b/tools/testing/selftests/bpf/progs/test_varlen.c @@ -41,20 +41,20 @@ int handler64_unsigned(void *regs) { int pid = bpf_get_current_pid_tgid() >> 32; void *payload = payload1; - u64 len; + long len; /* ignore irrelevant invocations */ if (test_pid != pid || !capture) return 0; len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]); - if (len <= MAX_LEN) { + if (len >= 0) { payload += len; payload1_len1 = len; } len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]); - if (len <= MAX_LEN) { + if (len >= 0) { payload += len; payload1_len2 = len; } @@ -123,7 +123,7 @@ int handler32_signed(void *regs) { int pid = bpf_get_current_pid_tgid() >> 32; void *payload = payload4; - int len; + long len; /* ignore irrelevant invocations */ if (test_pid != pid || !capture) diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c index 125d872d7981..ba48fcb98ab2 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c @@ -239,7 +239,7 @@ bool parse_udp(void *data, void *data_end, udp = data + off; if (udp + 1 > data_end) - return 0; + return false; if (!is_icmp) { pckt->flow.port16[0] = udp->source; pckt->flow.port16[1] = udp->dest; @@ -247,7 +247,7 @@ bool parse_udp(void *data, void *data_end, pckt->flow.port16[0] = udp->dest; pckt->flow.port16[1] = udp->source; } - return 1; + return true; } static __attribute__ ((noinline)) @@ -261,7 +261,7 @@ bool parse_tcp(void *data, void *data_end, tcp = data + off; if (tcp + 1 > data_end) - return 0; + return false; if (tcp->syn) pckt->flags |= (1 << 1); if (!is_icmp) { @@ -271,7 +271,7 @@ bool parse_tcp(void *data, void *data_end, pckt->flow.port16[0] = tcp->dest; pckt->flow.port16[1] = tcp->source; } - return 1; + return true; } static __attribute__ ((noinline)) @@ -287,7 +287,7 @@ bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval, void *data; if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr))) - return 0; + return false; data = (void *)(long)xdp->data; data_end = (void *)(long)xdp->data_end; new_eth = data; @@ -295,7 +295,7 @@ bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval, old_eth = data + sizeof(struct ipv6hdr); if (new_eth + 1 > data_end || old_eth + 1 > data_end || ip6h + 1 > data_end) - return 0; + return false; memcpy(new_eth->eth_dest, cval->mac, 6); memcpy(new_eth->eth_source, old_eth->eth_dest, 6); new_eth->eth_proto = 56710; @@ -314,7 +314,7 @@ bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval, ip6h->saddr.in6_u.u6_addr32[2] = 3; ip6h->saddr.in6_u.u6_addr32[3] = ip_suffix; memcpy(ip6h->daddr.in6_u.u6_addr32, dst->dstv6, 16); - return 1; + return true; } static __attribute__ ((noinline)) @@ -335,7 +335,7 @@ bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval, ip_suffix <<= 15; ip_suffix ^= pckt->flow.src; if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr))) - return 0; + return false; data = (void *)(long)xdp->data; data_end = (void *)(long)xdp->data_end; new_eth = data; @@ -343,7 +343,7 @@ bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval, old_eth = data + sizeof(struct iphdr); if (new_eth + 1 > data_end || old_eth + 1 > data_end || iph + 1 > data_end) - return 0; + return false; memcpy(new_eth->eth_dest, cval->mac, 6); memcpy(new_eth->eth_source, old_eth->eth_dest, 6); new_eth->eth_proto = 8; @@ -367,8 +367,8 @@ bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval, csum += *next_iph_u16++; iph->check = ~((csum & 0xffff) + (csum >> 16)); if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr))) - return 0; - return 1; + return false; + return true; } static __attribute__ ((noinline)) @@ -386,10 +386,10 @@ bool decap_v6(struct xdp_md *xdp, void **data, void **data_end, bool inner_v4) else new_eth->eth_proto = 56710; if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct ipv6hdr))) - return 0; + return false; *data = (void *)(long)xdp->data; *data_end = (void *)(long)xdp->data_end; - return 1; + return true; } static __attribute__ ((noinline)) @@ -404,10 +404,10 @@ bool decap_v4(struct xdp_md *xdp, void **data, void **data_end) memcpy(new_eth->eth_dest, old_eth->eth_dest, 6); new_eth->eth_proto = 8; if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr))) - return 0; + return false; *data = (void *)(long)xdp->data; *data_end = (void *)(long)xdp->data_end; - return 1; + return true; } static __attribute__ ((noinline)) diff --git a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c new file mode 100644 index 000000000000..736686e903f6 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c @@ -0,0 +1,843 @@ +// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include "vmlinux.h" + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> +#include <asm/errno.h> + +#define TC_ACT_OK 0 +#define TC_ACT_SHOT 2 + +#define NSEC_PER_SEC 1000000000L + +#define ETH_ALEN 6 +#define ETH_P_IP 0x0800 +#define ETH_P_IPV6 0x86DD + +#define tcp_flag_word(tp) (((union tcp_word_hdr *)(tp))->words[3]) + +#define IP_DF 0x4000 +#define IP_MF 0x2000 +#define IP_OFFSET 0x1fff + +#define NEXTHDR_TCP 6 + +#define TCPOPT_NOP 1 +#define TCPOPT_EOL 0 +#define TCPOPT_MSS 2 +#define TCPOPT_WINDOW 3 +#define TCPOPT_SACK_PERM 4 +#define TCPOPT_TIMESTAMP 8 + +#define TCPOLEN_MSS 4 +#define TCPOLEN_WINDOW 3 +#define TCPOLEN_SACK_PERM 2 +#define TCPOLEN_TIMESTAMP 10 + +#define TCP_TS_HZ 1000 +#define TS_OPT_WSCALE_MASK 0xf +#define TS_OPT_SACK (1 << 4) +#define TS_OPT_ECN (1 << 5) +#define TSBITS 6 +#define TSMASK (((__u32)1 << TSBITS) - 1) +#define TCP_MAX_WSCALE 14U + +#define IPV4_MAXLEN 60 +#define TCP_MAXLEN 60 + +#define DEFAULT_MSS4 1460 +#define DEFAULT_MSS6 1440 +#define DEFAULT_WSCALE 7 +#define DEFAULT_TTL 64 +#define MAX_ALLOWED_PORTS 8 + +#define swap(a, b) \ + do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) + +#define __get_unaligned_t(type, ptr) ({ \ + const struct { type x; } __attribute__((__packed__)) *__pptr = (typeof(__pptr))(ptr); \ + __pptr->x; \ +}) + +#define get_unaligned(ptr) __get_unaligned_t(typeof(*(ptr)), (ptr)) + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, __u64); + __uint(max_entries, 2); +} values SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, __u16); + __uint(max_entries, MAX_ALLOWED_PORTS); +} allowed_ports SEC(".maps"); + +/* Some symbols defined in net/netfilter/nf_conntrack_bpf.c are unavailable in + * vmlinux.h if CONFIG_NF_CONNTRACK=m, so they are redefined locally. + */ + +struct bpf_ct_opts___local { + s32 netns_id; + s32 error; + u8 l4proto; + u8 dir; + u8 reserved[2]; +} __attribute__((preserve_access_index)); + +#define BPF_F_CURRENT_NETNS (-1) + +extern struct nf_conn *bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, + struct bpf_sock_tuple *bpf_tuple, + __u32 len_tuple, + struct bpf_ct_opts___local *opts, + __u32 len_opts) __ksym; + +extern struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, + struct bpf_sock_tuple *bpf_tuple, + u32 len_tuple, + struct bpf_ct_opts___local *opts, + u32 len_opts) __ksym; + +extern void bpf_ct_release(struct nf_conn *ct) __ksym; + +static __always_inline void swap_eth_addr(__u8 *a, __u8 *b) +{ + __u8 tmp[ETH_ALEN]; + + __builtin_memcpy(tmp, a, ETH_ALEN); + __builtin_memcpy(a, b, ETH_ALEN); + __builtin_memcpy(b, tmp, ETH_ALEN); +} + +static __always_inline __u16 csum_fold(__u32 csum) +{ + csum = (csum & 0xffff) + (csum >> 16); + csum = (csum & 0xffff) + (csum >> 16); + return (__u16)~csum; +} + +static __always_inline __u16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, + __u32 len, __u8 proto, + __u32 csum) +{ + __u64 s = csum; + + s += (__u32)saddr; + s += (__u32)daddr; +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + s += proto + len; +#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + s += (proto + len) << 8; +#else +#error Unknown endian +#endif + s = (s & 0xffffffff) + (s >> 32); + s = (s & 0xffffffff) + (s >> 32); + + return csum_fold((__u32)s); +} + +static __always_inline __u16 csum_ipv6_magic(const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u32 len, __u8 proto, __u32 csum) +{ + __u64 sum = csum; + int i; + +#pragma unroll + for (i = 0; i < 4; i++) + sum += (__u32)saddr->in6_u.u6_addr32[i]; + +#pragma unroll + for (i = 0; i < 4; i++) + sum += (__u32)daddr->in6_u.u6_addr32[i]; + + /* Don't combine additions to avoid 32-bit overflow. */ + sum += bpf_htonl(len); + sum += bpf_htonl(proto); + + sum = (sum & 0xffffffff) + (sum >> 32); + sum = (sum & 0xffffffff) + (sum >> 32); + + return csum_fold((__u32)sum); +} + +static __always_inline __u64 tcp_clock_ns(void) +{ + return bpf_ktime_get_ns(); +} + +static __always_inline __u32 tcp_ns_to_ts(__u64 ns) +{ + return ns / (NSEC_PER_SEC / TCP_TS_HZ); +} + +static __always_inline __u32 tcp_time_stamp_raw(void) +{ + return tcp_ns_to_ts(tcp_clock_ns()); +} + +struct tcpopt_context { + __u8 *ptr; + __u8 *end; + void *data_end; + __be32 *tsecr; + __u8 wscale; + bool option_timestamp; + bool option_sack; +}; + +static int tscookie_tcpopt_parse(struct tcpopt_context *ctx) +{ + __u8 opcode, opsize; + + if (ctx->ptr >= ctx->end) + return 1; + if (ctx->ptr >= ctx->data_end) + return 1; + + opcode = ctx->ptr[0]; + + if (opcode == TCPOPT_EOL) + return 1; + if (opcode == TCPOPT_NOP) { + ++ctx->ptr; + return 0; + } + + if (ctx->ptr + 1 >= ctx->end) + return 1; + if (ctx->ptr + 1 >= ctx->data_end) + return 1; + opsize = ctx->ptr[1]; + if (opsize < 2) + return 1; + + if (ctx->ptr + opsize > ctx->end) + return 1; + + switch (opcode) { + case TCPOPT_WINDOW: + if (opsize == TCPOLEN_WINDOW && ctx->ptr + TCPOLEN_WINDOW <= ctx->data_end) + ctx->wscale = ctx->ptr[2] < TCP_MAX_WSCALE ? ctx->ptr[2] : TCP_MAX_WSCALE; + break; + case TCPOPT_TIMESTAMP: + if (opsize == TCPOLEN_TIMESTAMP && ctx->ptr + TCPOLEN_TIMESTAMP <= ctx->data_end) { + ctx->option_timestamp = true; + /* Client's tsval becomes our tsecr. */ + *ctx->tsecr = get_unaligned((__be32 *)(ctx->ptr + 2)); + } + break; + case TCPOPT_SACK_PERM: + if (opsize == TCPOLEN_SACK_PERM) + ctx->option_sack = true; + break; + } + + ctx->ptr += opsize; + + return 0; +} + +static int tscookie_tcpopt_parse_batch(__u32 index, void *context) +{ + int i; + + for (i = 0; i < 7; i++) + if (tscookie_tcpopt_parse(context)) + return 1; + return 0; +} + +static __always_inline bool tscookie_init(struct tcphdr *tcp_header, + __u16 tcp_len, __be32 *tsval, + __be32 *tsecr, void *data_end) +{ + struct tcpopt_context loop_ctx = { + .ptr = (__u8 *)(tcp_header + 1), + .end = (__u8 *)tcp_header + tcp_len, + .data_end = data_end, + .tsecr = tsecr, + .wscale = TS_OPT_WSCALE_MASK, + .option_timestamp = false, + .option_sack = false, + }; + u32 cookie; + + bpf_loop(6, tscookie_tcpopt_parse_batch, &loop_ctx, 0); + + if (!loop_ctx.option_timestamp) + return false; + + cookie = tcp_time_stamp_raw() & ~TSMASK; + cookie |= loop_ctx.wscale & TS_OPT_WSCALE_MASK; + if (loop_ctx.option_sack) + cookie |= TS_OPT_SACK; + if (tcp_header->ece && tcp_header->cwr) + cookie |= TS_OPT_ECN; + *tsval = bpf_htonl(cookie); + + return true; +} + +static __always_inline void values_get_tcpipopts(__u16 *mss, __u8 *wscale, + __u8 *ttl, bool ipv6) +{ + __u32 key = 0; + __u64 *value; + + value = bpf_map_lookup_elem(&values, &key); + if (value && *value != 0) { + if (ipv6) + *mss = (*value >> 32) & 0xffff; + else + *mss = *value & 0xffff; + *wscale = (*value >> 16) & 0xf; + *ttl = (*value >> 24) & 0xff; + return; + } + + *mss = ipv6 ? DEFAULT_MSS6 : DEFAULT_MSS4; + *wscale = DEFAULT_WSCALE; + *ttl = DEFAULT_TTL; +} + +static __always_inline void values_inc_synacks(void) +{ + __u32 key = 1; + __u32 *value; + + value = bpf_map_lookup_elem(&values, &key); + if (value) + __sync_fetch_and_add(value, 1); +} + +static __always_inline bool check_port_allowed(__u16 port) +{ + __u32 i; + + for (i = 0; i < MAX_ALLOWED_PORTS; i++) { + __u32 key = i; + __u16 *value; + + value = bpf_map_lookup_elem(&allowed_ports, &key); + + if (!value) + break; + /* 0 is a terminator value. Check it first to avoid matching on + * a forbidden port == 0 and returning true. + */ + if (*value == 0) + break; + + if (*value == port) + return true; + } + + return false; +} + +struct header_pointers { + struct ethhdr *eth; + struct iphdr *ipv4; + struct ipv6hdr *ipv6; + struct tcphdr *tcp; + __u16 tcp_len; +}; + +static __always_inline int tcp_dissect(void *data, void *data_end, + struct header_pointers *hdr) +{ + hdr->eth = data; + if (hdr->eth + 1 > data_end) + return XDP_DROP; + + switch (bpf_ntohs(hdr->eth->h_proto)) { + case ETH_P_IP: + hdr->ipv6 = NULL; + + hdr->ipv4 = (void *)hdr->eth + sizeof(*hdr->eth); + if (hdr->ipv4 + 1 > data_end) + return XDP_DROP; + if (hdr->ipv4->ihl * 4 < sizeof(*hdr->ipv4)) + return XDP_DROP; + if (hdr->ipv4->version != 4) + return XDP_DROP; + + if (hdr->ipv4->protocol != IPPROTO_TCP) + return XDP_PASS; + + hdr->tcp = (void *)hdr->ipv4 + hdr->ipv4->ihl * 4; + break; + case ETH_P_IPV6: + hdr->ipv4 = NULL; + + hdr->ipv6 = (void *)hdr->eth + sizeof(*hdr->eth); + if (hdr->ipv6 + 1 > data_end) + return XDP_DROP; + if (hdr->ipv6->version != 6) + return XDP_DROP; + + /* XXX: Extension headers are not supported and could circumvent + * XDP SYN flood protection. + */ + if (hdr->ipv6->nexthdr != NEXTHDR_TCP) + return XDP_PASS; + + hdr->tcp = (void *)hdr->ipv6 + sizeof(*hdr->ipv6); + break; + default: + /* XXX: VLANs will circumvent XDP SYN flood protection. */ + return XDP_PASS; + } + + if (hdr->tcp + 1 > data_end) + return XDP_DROP; + hdr->tcp_len = hdr->tcp->doff * 4; + if (hdr->tcp_len < sizeof(*hdr->tcp)) + return XDP_DROP; + + return XDP_TX; +} + +static __always_inline int tcp_lookup(void *ctx, struct header_pointers *hdr, bool xdp) +{ + struct bpf_ct_opts___local ct_lookup_opts = { + .netns_id = BPF_F_CURRENT_NETNS, + .l4proto = IPPROTO_TCP, + }; + struct bpf_sock_tuple tup = {}; + struct nf_conn *ct; + __u32 tup_size; + + if (hdr->ipv4) { + /* TCP doesn't normally use fragments, and XDP can't reassemble + * them. + */ + if ((hdr->ipv4->frag_off & bpf_htons(IP_DF | IP_MF | IP_OFFSET)) != bpf_htons(IP_DF)) + return XDP_DROP; + + tup.ipv4.saddr = hdr->ipv4->saddr; + tup.ipv4.daddr = hdr->ipv4->daddr; + tup.ipv4.sport = hdr->tcp->source; + tup.ipv4.dport = hdr->tcp->dest; + tup_size = sizeof(tup.ipv4); + } else if (hdr->ipv6) { + __builtin_memcpy(tup.ipv6.saddr, &hdr->ipv6->saddr, sizeof(tup.ipv6.saddr)); + __builtin_memcpy(tup.ipv6.daddr, &hdr->ipv6->daddr, sizeof(tup.ipv6.daddr)); + tup.ipv6.sport = hdr->tcp->source; + tup.ipv6.dport = hdr->tcp->dest; + tup_size = sizeof(tup.ipv6); + } else { + /* The verifier can't track that either ipv4 or ipv6 is not + * NULL. + */ + return XDP_ABORTED; + } + if (xdp) + ct = bpf_xdp_ct_lookup(ctx, &tup, tup_size, &ct_lookup_opts, sizeof(ct_lookup_opts)); + else + ct = bpf_skb_ct_lookup(ctx, &tup, tup_size, &ct_lookup_opts, sizeof(ct_lookup_opts)); + if (ct) { + unsigned long status = ct->status; + + bpf_ct_release(ct); + if (status & IPS_CONFIRMED_BIT) + return XDP_PASS; + } else if (ct_lookup_opts.error != -ENOENT) { + return XDP_ABORTED; + } + + /* error == -ENOENT || !(status & IPS_CONFIRMED_BIT) */ + return XDP_TX; +} + +static __always_inline __u8 tcp_mkoptions(__be32 *buf, __be32 *tsopt, __u16 mss, + __u8 wscale) +{ + __be32 *start = buf; + + *buf++ = bpf_htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss); + + if (!tsopt) + return buf - start; + + if (tsopt[0] & bpf_htonl(1 << 4)) + *buf++ = bpf_htonl((TCPOPT_SACK_PERM << 24) | + (TCPOLEN_SACK_PERM << 16) | + (TCPOPT_TIMESTAMP << 8) | + TCPOLEN_TIMESTAMP); + else + *buf++ = bpf_htonl((TCPOPT_NOP << 24) | + (TCPOPT_NOP << 16) | + (TCPOPT_TIMESTAMP << 8) | + TCPOLEN_TIMESTAMP); + *buf++ = tsopt[0]; + *buf++ = tsopt[1]; + + if ((tsopt[0] & bpf_htonl(0xf)) != bpf_htonl(0xf)) + *buf++ = bpf_htonl((TCPOPT_NOP << 24) | + (TCPOPT_WINDOW << 16) | + (TCPOLEN_WINDOW << 8) | + wscale); + + return buf - start; +} + +static __always_inline void tcp_gen_synack(struct tcphdr *tcp_header, + __u32 cookie, __be32 *tsopt, + __u16 mss, __u8 wscale) +{ + void *tcp_options; + + tcp_flag_word(tcp_header) = TCP_FLAG_SYN | TCP_FLAG_ACK; + if (tsopt && (tsopt[0] & bpf_htonl(1 << 5))) + tcp_flag_word(tcp_header) |= TCP_FLAG_ECE; + tcp_header->doff = 5; /* doff is part of tcp_flag_word. */ + swap(tcp_header->source, tcp_header->dest); + tcp_header->ack_seq = bpf_htonl(bpf_ntohl(tcp_header->seq) + 1); + tcp_header->seq = bpf_htonl(cookie); + tcp_header->window = 0; + tcp_header->urg_ptr = 0; + tcp_header->check = 0; /* Calculate checksum later. */ + + tcp_options = (void *)(tcp_header + 1); + tcp_header->doff += tcp_mkoptions(tcp_options, tsopt, mss, wscale); +} + +static __always_inline void tcpv4_gen_synack(struct header_pointers *hdr, + __u32 cookie, __be32 *tsopt) +{ + __u8 wscale; + __u16 mss; + __u8 ttl; + + values_get_tcpipopts(&mss, &wscale, &ttl, false); + + swap_eth_addr(hdr->eth->h_source, hdr->eth->h_dest); + + swap(hdr->ipv4->saddr, hdr->ipv4->daddr); + hdr->ipv4->check = 0; /* Calculate checksum later. */ + hdr->ipv4->tos = 0; + hdr->ipv4->id = 0; + hdr->ipv4->ttl = ttl; + + tcp_gen_synack(hdr->tcp, cookie, tsopt, mss, wscale); + + hdr->tcp_len = hdr->tcp->doff * 4; + hdr->ipv4->tot_len = bpf_htons(sizeof(*hdr->ipv4) + hdr->tcp_len); +} + +static __always_inline void tcpv6_gen_synack(struct header_pointers *hdr, + __u32 cookie, __be32 *tsopt) +{ + __u8 wscale; + __u16 mss; + __u8 ttl; + + values_get_tcpipopts(&mss, &wscale, &ttl, true); + + swap_eth_addr(hdr->eth->h_source, hdr->eth->h_dest); + + swap(hdr->ipv6->saddr, hdr->ipv6->daddr); + *(__be32 *)hdr->ipv6 = bpf_htonl(0x60000000); + hdr->ipv6->hop_limit = ttl; + + tcp_gen_synack(hdr->tcp, cookie, tsopt, mss, wscale); + + hdr->tcp_len = hdr->tcp->doff * 4; + hdr->ipv6->payload_len = bpf_htons(hdr->tcp_len); +} + +static __always_inline int syncookie_handle_syn(struct header_pointers *hdr, + void *ctx, + void *data, void *data_end, + bool xdp) +{ + __u32 old_pkt_size, new_pkt_size; + /* Unlike clang 10, clang 11 and 12 generate code that doesn't pass the + * BPF verifier if tsopt is not volatile. Volatile forces it to store + * the pointer value and use it directly, otherwise tcp_mkoptions is + * (mis)compiled like this: + * if (!tsopt) + * return buf - start; + * reg = stored_return_value_of_tscookie_init; + * if (reg) + * tsopt = tsopt_buf; + * else + * tsopt = NULL; + * ... + * *buf++ = tsopt[1]; + * It creates a dead branch where tsopt is assigned NULL, but the + * verifier can't prove it's dead and blocks the program. + */ + __be32 * volatile tsopt = NULL; + __be32 tsopt_buf[2] = {}; + __u16 ip_len; + __u32 cookie; + __s64 value; + + /* Checksum is not yet verified, but both checksum failure and TCP + * header checks return XDP_DROP, so the order doesn't matter. + */ + if (hdr->tcp->fin || hdr->tcp->rst) + return XDP_DROP; + + /* Issue SYN cookies on allowed ports, drop SYN packets on blocked + * ports. + */ + if (!check_port_allowed(bpf_ntohs(hdr->tcp->dest))) + return XDP_DROP; + + if (hdr->ipv4) { + /* Check the IPv4 and TCP checksums before creating a SYNACK. */ + value = bpf_csum_diff(0, 0, (void *)hdr->ipv4, hdr->ipv4->ihl * 4, 0); + if (value < 0) + return XDP_ABORTED; + if (csum_fold(value) != 0) + return XDP_DROP; /* Bad IPv4 checksum. */ + + value = bpf_csum_diff(0, 0, (void *)hdr->tcp, hdr->tcp_len, 0); + if (value < 0) + return XDP_ABORTED; + if (csum_tcpudp_magic(hdr->ipv4->saddr, hdr->ipv4->daddr, + hdr->tcp_len, IPPROTO_TCP, value) != 0) + return XDP_DROP; /* Bad TCP checksum. */ + + ip_len = sizeof(*hdr->ipv4); + + value = bpf_tcp_raw_gen_syncookie_ipv4(hdr->ipv4, hdr->tcp, + hdr->tcp_len); + } else if (hdr->ipv6) { + /* Check the TCP checksum before creating a SYNACK. */ + value = bpf_csum_diff(0, 0, (void *)hdr->tcp, hdr->tcp_len, 0); + if (value < 0) + return XDP_ABORTED; + if (csum_ipv6_magic(&hdr->ipv6->saddr, &hdr->ipv6->daddr, + hdr->tcp_len, IPPROTO_TCP, value) != 0) + return XDP_DROP; /* Bad TCP checksum. */ + + ip_len = sizeof(*hdr->ipv6); + + value = bpf_tcp_raw_gen_syncookie_ipv6(hdr->ipv6, hdr->tcp, + hdr->tcp_len); + } else { + return XDP_ABORTED; + } + + if (value < 0) + return XDP_ABORTED; + cookie = (__u32)value; + + if (tscookie_init((void *)hdr->tcp, hdr->tcp_len, + &tsopt_buf[0], &tsopt_buf[1], data_end)) + tsopt = tsopt_buf; + + /* Check that there is enough space for a SYNACK. It also covers + * the check that the destination of the __builtin_memmove below + * doesn't overflow. + */ + if (data + sizeof(*hdr->eth) + ip_len + TCP_MAXLEN > data_end) + return XDP_ABORTED; + + if (hdr->ipv4) { + if (hdr->ipv4->ihl * 4 > sizeof(*hdr->ipv4)) { + struct tcphdr *new_tcp_header; + + new_tcp_header = data + sizeof(*hdr->eth) + sizeof(*hdr->ipv4); + __builtin_memmove(new_tcp_header, hdr->tcp, sizeof(*hdr->tcp)); + hdr->tcp = new_tcp_header; + + hdr->ipv4->ihl = sizeof(*hdr->ipv4) / 4; + } + + tcpv4_gen_synack(hdr, cookie, tsopt); + } else if (hdr->ipv6) { + tcpv6_gen_synack(hdr, cookie, tsopt); + } else { + return XDP_ABORTED; + } + + /* Recalculate checksums. */ + hdr->tcp->check = 0; + value = bpf_csum_diff(0, 0, (void *)hdr->tcp, hdr->tcp_len, 0); + if (value < 0) + return XDP_ABORTED; + if (hdr->ipv4) { + hdr->tcp->check = csum_tcpudp_magic(hdr->ipv4->saddr, + hdr->ipv4->daddr, + hdr->tcp_len, + IPPROTO_TCP, + value); + + hdr->ipv4->check = 0; + value = bpf_csum_diff(0, 0, (void *)hdr->ipv4, sizeof(*hdr->ipv4), 0); + if (value < 0) + return XDP_ABORTED; + hdr->ipv4->check = csum_fold(value); + } else if (hdr->ipv6) { + hdr->tcp->check = csum_ipv6_magic(&hdr->ipv6->saddr, + &hdr->ipv6->daddr, + hdr->tcp_len, + IPPROTO_TCP, + value); + } else { + return XDP_ABORTED; + } + + /* Set the new packet size. */ + old_pkt_size = data_end - data; + new_pkt_size = sizeof(*hdr->eth) + ip_len + hdr->tcp->doff * 4; + if (xdp) { + if (bpf_xdp_adjust_tail(ctx, new_pkt_size - old_pkt_size)) + return XDP_ABORTED; + } else { + if (bpf_skb_change_tail(ctx, new_pkt_size, 0)) + return XDP_ABORTED; + } + + values_inc_synacks(); + + return XDP_TX; +} + +static __always_inline int syncookie_handle_ack(struct header_pointers *hdr) +{ + int err; + + if (hdr->tcp->rst) + return XDP_DROP; + + if (hdr->ipv4) + err = bpf_tcp_raw_check_syncookie_ipv4(hdr->ipv4, hdr->tcp); + else if (hdr->ipv6) + err = bpf_tcp_raw_check_syncookie_ipv6(hdr->ipv6, hdr->tcp); + else + return XDP_ABORTED; + if (err) + return XDP_DROP; + + return XDP_PASS; +} + +static __always_inline int syncookie_part1(void *ctx, void *data, void *data_end, + struct header_pointers *hdr, bool xdp) +{ + int ret; + + ret = tcp_dissect(data, data_end, hdr); + if (ret != XDP_TX) + return ret; + + ret = tcp_lookup(ctx, hdr, xdp); + if (ret != XDP_TX) + return ret; + + /* Packet is TCP and doesn't belong to an established connection. */ + + if ((hdr->tcp->syn ^ hdr->tcp->ack) != 1) + return XDP_DROP; + + /* Grow the TCP header to TCP_MAXLEN to be able to pass any hdr->tcp_len + * to bpf_tcp_raw_gen_syncookie_ipv{4,6} and pass the verifier. + */ + if (xdp) { + if (bpf_xdp_adjust_tail(ctx, TCP_MAXLEN - hdr->tcp_len)) + return XDP_ABORTED; + } else { + /* Without volatile the verifier throws this error: + * R9 32-bit pointer arithmetic prohibited + */ + volatile u64 old_len = data_end - data; + + if (bpf_skb_change_tail(ctx, old_len + TCP_MAXLEN - hdr->tcp_len, 0)) + return XDP_ABORTED; + } + + return XDP_TX; +} + +static __always_inline int syncookie_part2(void *ctx, void *data, void *data_end, + struct header_pointers *hdr, bool xdp) +{ + if (hdr->ipv4) { + hdr->eth = data; + hdr->ipv4 = (void *)hdr->eth + sizeof(*hdr->eth); + /* IPV4_MAXLEN is needed when calculating checksum. + * At least sizeof(struct iphdr) is needed here to access ihl. + */ + if ((void *)hdr->ipv4 + IPV4_MAXLEN > data_end) + return XDP_ABORTED; + hdr->tcp = (void *)hdr->ipv4 + hdr->ipv4->ihl * 4; + } else if (hdr->ipv6) { + hdr->eth = data; + hdr->ipv6 = (void *)hdr->eth + sizeof(*hdr->eth); + hdr->tcp = (void *)hdr->ipv6 + sizeof(*hdr->ipv6); + } else { + return XDP_ABORTED; + } + + if ((void *)hdr->tcp + TCP_MAXLEN > data_end) + return XDP_ABORTED; + + /* We run out of registers, tcp_len gets spilled to the stack, and the + * verifier forgets its min and max values checked above in tcp_dissect. + */ + hdr->tcp_len = hdr->tcp->doff * 4; + if (hdr->tcp_len < sizeof(*hdr->tcp)) + return XDP_ABORTED; + + return hdr->tcp->syn ? syncookie_handle_syn(hdr, ctx, data, data_end, xdp) : + syncookie_handle_ack(hdr); +} + +SEC("xdp") +int syncookie_xdp(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct header_pointers hdr; + int ret; + + ret = syncookie_part1(ctx, data, data_end, &hdr, true); + if (ret != XDP_TX) + return ret; + + data_end = (void *)(long)ctx->data_end; + data = (void *)(long)ctx->data; + + return syncookie_part2(ctx, data, data_end, &hdr, true); +} + +SEC("tc") +int syncookie_tc(struct __sk_buff *skb) +{ + void *data_end = (void *)(long)skb->data_end; + void *data = (void *)(long)skb->data; + struct header_pointers hdr; + int ret; + + ret = syncookie_part1(skb, data, data_end, &hdr, false); + if (ret != XDP_TX) + return ret == XDP_PASS ? TC_ACT_OK : TC_ACT_SHOT; + + data_end = (void *)(long)skb->data_end; + data = (void *)(long)skb->data; + + ret = syncookie_part2(skb, data, data_end, &hdr, false); + switch (ret) { + case XDP_PASS: + return TC_ACT_OK; + case XDP_TX: + return bpf_redirect(skb->ifindex, 0); + default: + return TC_ACT_SHOT; + } +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py index c0e7acd698ed..a6410bebe603 100755 --- a/tools/testing/selftests/bpf/test_bpftool_synctypes.py +++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py @@ -58,7 +58,7 @@ class BlockParser(object): class ArrayParser(BlockParser): """ - A parser for extracting dicionaries of values from some BPF-related arrays. + A parser for extracting a set of values from some BPF-related arrays. @reader: a pointer to the open file to parse @array_name: name of the array to parse """ @@ -66,7 +66,7 @@ class ArrayParser(BlockParser): def __init__(self, reader, array_name): self.array_name = array_name - self.start_marker = re.compile(f'(static )?const char \* const {self.array_name}\[.*\] = {{\n') + self.start_marker = re.compile(f'(static )?const bool {self.array_name}\[.*\] = {{\n') super().__init__(reader) def search_block(self): @@ -80,15 +80,15 @@ class ArrayParser(BlockParser): Parse a block and return data as a dictionary. Items to extract must be on separate lines in the file. """ - pattern = re.compile('\[(BPF_\w*)\]\s*= "(.*)",?$') - entries = {} + pattern = re.compile('\[(BPF_\w*)\]\s*= (true|false),?$') + entries = set() while True: line = self.reader.readline() if line == '' or re.match(self.end_marker, line): break capture = pattern.search(line) if capture: - entries[capture.group(1)] = capture.group(2) + entries |= {capture.group(1)} return entries class InlineListParser(BlockParser): @@ -115,7 +115,7 @@ class InlineListParser(BlockParser): class FileExtractor(object): """ A generic reader for extracting data from a given file. This class contains - several helper methods that wrap arround parser objects to extract values + several helper methods that wrap around parser objects to extract values from different structures. This class does not offer a way to set a filename, which is expected to be defined in children classes. @@ -139,21 +139,19 @@ class FileExtractor(object): def get_types_from_array(self, array_name): """ - Search for and parse an array associating names to BPF_* enum members, - for example: + Search for and parse a list of allowed BPF_* enum members, for example: - const char * const prog_type_name[] = { - [BPF_PROG_TYPE_UNSPEC] = "unspec", - [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter", - [BPF_PROG_TYPE_KPROBE] = "kprobe", + const bool prog_type_name[] = { + [BPF_PROG_TYPE_UNSPEC] = true, + [BPF_PROG_TYPE_SOCKET_FILTER] = true, + [BPF_PROG_TYPE_KPROBE] = true, }; - Return a dictionary with the enum member names as keys and the - associated names as values, for example: + Return a set of the enum members, for example: - {'BPF_PROG_TYPE_UNSPEC': 'unspec', - 'BPF_PROG_TYPE_SOCKET_FILTER': 'socket_filter', - 'BPF_PROG_TYPE_KPROBE': 'kprobe'} + {'BPF_PROG_TYPE_UNSPEC', + 'BPF_PROG_TYPE_SOCKET_FILTER', + 'BPF_PROG_TYPE_KPROBE'} @array_name: name of the array to parse """ @@ -186,6 +184,27 @@ class FileExtractor(object): parser.search_block(start_marker) return parser.parse(pattern, end_marker) + def make_enum_map(self, names, enum_prefix): + """ + Search for and parse an enum containing BPF_* members, just as get_enum + does. However, instead of just returning a set of the variant names, + also generate a textual representation from them by (assuming and) + removing a provided prefix and lowercasing the remainder. Then return a + dict mapping from name to textual representation. + + @enum_values: a set of enum values; e.g., as retrieved by get_enum + @enum_prefix: the prefix to remove from each of the variants to infer + textual representation + """ + mapping = {} + for name in names: + if not name.startswith(enum_prefix): + raise Exception(f"enum variant {name} does not start with {enum_prefix}") + text = name[len(enum_prefix):].lower() + mapping[name] = text + + return mapping + def __get_description_list(self, start_marker, pattern, end_marker): parser = InlineListParser(self.reader) parser.search_block(start_marker) @@ -333,11 +352,9 @@ class ProgFileExtractor(SourceFileExtractor): """ filename = os.path.join(BPFTOOL_DIR, 'prog.c') - def get_prog_types(self): - return self.get_types_from_array('prog_type_name') - def get_attach_types(self): - return self.get_types_from_array('attach_type_strings') + types = self.get_types_from_array('attach_types') + return self.make_enum_map(types, 'BPF_') def get_prog_attach_help(self): return self.get_help_list('ATTACH_TYPE') @@ -348,9 +365,6 @@ class MapFileExtractor(SourceFileExtractor): """ filename = os.path.join(BPFTOOL_DIR, 'map.c') - def get_map_types(self): - return self.get_types_from_array('map_type_name') - def get_map_help(self): return self.get_help_list('TYPE') @@ -363,30 +377,6 @@ class CgroupFileExtractor(SourceFileExtractor): def get_prog_attach_help(self): return self.get_help_list('ATTACH_TYPE') -class CommonFileExtractor(SourceFileExtractor): - """ - An extractor for bpftool's common.c. - """ - filename = os.path.join(BPFTOOL_DIR, 'common.c') - - def __init__(self): - super().__init__() - self.attach_types = {} - - def get_attach_types(self): - if not self.attach_types: - self.attach_types = self.get_types_from_array('attach_type_name') - return self.attach_types - - def get_cgroup_attach_types(self): - if not self.attach_types: - self.get_attach_types() - cgroup_types = {} - for (key, value) in self.attach_types.items(): - if key.find('BPF_CGROUP') != -1: - cgroup_types[key] = value - return cgroup_types - class GenericSourceExtractor(SourceFileExtractor): """ An extractor for generic source code files. @@ -403,14 +393,28 @@ class BpfHeaderExtractor(FileExtractor): """ filename = os.path.join(INCLUDE_DIR, 'uapi/linux/bpf.h') + def __init__(self): + super().__init__() + self.attach_types = {} + def get_prog_types(self): return self.get_enum('bpf_prog_type') - def get_map_types(self): - return self.get_enum('bpf_map_type') + def get_map_type_map(self): + names = self.get_enum('bpf_map_type') + return self.make_enum_map(names, 'BPF_MAP_TYPE_') - def get_attach_types(self): - return self.get_enum('bpf_attach_type') + def get_attach_type_map(self): + if not self.attach_types: + names = self.get_enum('bpf_attach_type') + self.attach_types = self.make_enum_map(names, 'BPF_') + return self.attach_types + + def get_cgroup_attach_type_map(self): + if not self.attach_types: + self.get_attach_type_map() + return {name: text for name, text in self.attach_types.items() + if name.startswith('BPF_CGROUP')} class ManPageExtractor(FileExtractor): """ @@ -467,12 +471,6 @@ class BashcompExtractor(FileExtractor): def get_prog_attach_types(self): return self.get_bashcomp_list('BPFTOOL_PROG_ATTACH_TYPES') - def get_map_types(self): - return self.get_bashcomp_list('BPFTOOL_MAP_CREATE_TYPES') - - def get_cgroup_attach_types(self): - return self.get_bashcomp_list('BPFTOOL_CGROUP_ATTACH_TYPES') - def verify(first_set, second_set, message): """ Print all values that differ between two sets. @@ -495,21 +493,12 @@ def main(): """) args = argParser.parse_args() - # Map types (enum) - bpf_info = BpfHeaderExtractor() - ref = bpf_info.get_map_types() - - map_info = MapFileExtractor() - source_map_items = map_info.get_map_types() - map_types_enum = set(source_map_items.keys()) - - verify(ref, map_types_enum, - f'Comparing BPF header (enum bpf_map_type) and {MapFileExtractor.filename} (map_type_name):') # Map types (names) - source_map_types = set(source_map_items.values()) + map_info = MapFileExtractor() + source_map_types = set(bpf_info.get_map_type_map().values()) source_map_types.discard('unspec') help_map_types = map_info.get_map_help() @@ -521,41 +510,16 @@ def main(): man_map_types = man_map_info.get_map_types() man_map_info.close() - bashcomp_info = BashcompExtractor() - bashcomp_map_types = bashcomp_info.get_map_types() - verify(source_map_types, help_map_types, - f'Comparing {MapFileExtractor.filename} (map_type_name) and {MapFileExtractor.filename} (do_help() TYPE):') + f'Comparing {BpfHeaderExtractor.filename} (bpf_map_type) and {MapFileExtractor.filename} (do_help() TYPE):') verify(source_map_types, man_map_types, - f'Comparing {MapFileExtractor.filename} (map_type_name) and {ManMapExtractor.filename} (TYPE):') + f'Comparing {BpfHeaderExtractor.filename} (bpf_map_type) and {ManMapExtractor.filename} (TYPE):') verify(help_map_options, man_map_options, f'Comparing {MapFileExtractor.filename} (do_help() OPTIONS) and {ManMapExtractor.filename} (OPTIONS):') - verify(source_map_types, bashcomp_map_types, - f'Comparing {MapFileExtractor.filename} (map_type_name) and {BashcompExtractor.filename} (BPFTOOL_MAP_CREATE_TYPES):') - - # Program types (enum) - - ref = bpf_info.get_prog_types() - - prog_info = ProgFileExtractor() - prog_types = set(prog_info.get_prog_types().keys()) - - verify(ref, prog_types, - f'Comparing BPF header (enum bpf_prog_type) and {ProgFileExtractor.filename} (prog_type_name):') - - # Attach types (enum) - - ref = bpf_info.get_attach_types() - bpf_info.close() - - common_info = CommonFileExtractor() - attach_types = common_info.get_attach_types() - - verify(ref, attach_types, - f'Comparing BPF header (enum bpf_attach_type) and {CommonFileExtractor.filename} (attach_type_name):') # Attach types (names) + prog_info = ProgFileExtractor() source_prog_attach_types = set(prog_info.get_attach_types().values()) help_prog_attach_types = prog_info.get_prog_attach_help() @@ -567,22 +531,23 @@ def main(): man_prog_attach_types = man_prog_info.get_attach_types() man_prog_info.close() - bashcomp_info.reset_read() # We stopped at map types, rewind + + bashcomp_info = BashcompExtractor() bashcomp_prog_attach_types = bashcomp_info.get_prog_attach_types() + bashcomp_info.close() verify(source_prog_attach_types, help_prog_attach_types, - f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {ProgFileExtractor.filename} (do_help() ATTACH_TYPE):') + f'Comparing {ProgFileExtractor.filename} (bpf_attach_type) and {ProgFileExtractor.filename} (do_help() ATTACH_TYPE):') verify(source_prog_attach_types, man_prog_attach_types, - f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {ManProgExtractor.filename} (ATTACH_TYPE):') + f'Comparing {ProgFileExtractor.filename} (bpf_attach_type) and {ManProgExtractor.filename} (ATTACH_TYPE):') verify(help_prog_options, man_prog_options, f'Comparing {ProgFileExtractor.filename} (do_help() OPTIONS) and {ManProgExtractor.filename} (OPTIONS):') verify(source_prog_attach_types, bashcomp_prog_attach_types, - f'Comparing {ProgFileExtractor.filename} (attach_type_strings) and {BashcompExtractor.filename} (BPFTOOL_PROG_ATTACH_TYPES):') + f'Comparing {ProgFileExtractor.filename} (bpf_attach_type) and {BashcompExtractor.filename} (BPFTOOL_PROG_ATTACH_TYPES):') # Cgroup attach types - - source_cgroup_attach_types = set(common_info.get_cgroup_attach_types().values()) - common_info.close() + source_cgroup_attach_types = set(bpf_info.get_cgroup_attach_type_map().values()) + bpf_info.close() cgroup_info = CgroupFileExtractor() help_cgroup_attach_types = cgroup_info.get_prog_attach_help() @@ -594,17 +559,12 @@ def main(): man_cgroup_attach_types = man_cgroup_info.get_attach_types() man_cgroup_info.close() - bashcomp_cgroup_attach_types = bashcomp_info.get_cgroup_attach_types() - bashcomp_info.close() - verify(source_cgroup_attach_types, help_cgroup_attach_types, - f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {CgroupFileExtractor.filename} (do_help() ATTACH_TYPE):') + f'Comparing {BpfHeaderExtractor.filename} (bpf_attach_type) and {CgroupFileExtractor.filename} (do_help() ATTACH_TYPE):') verify(source_cgroup_attach_types, man_cgroup_attach_types, - f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {ManCgroupExtractor.filename} (ATTACH_TYPE):') + f'Comparing {BpfHeaderExtractor.filename} (bpf_attach_type) and {ManCgroupExtractor.filename} (ATTACH_TYPE):') verify(help_cgroup_options, man_cgroup_options, f'Comparing {CgroupFileExtractor.filename} (do_help() OPTIONS) and {ManCgroupExtractor.filename} (OPTIONS):') - verify(source_cgroup_attach_types, bashcomp_cgroup_attach_types, - f'Comparing {CommonFileExtractor.filename} (attach_type_strings) and {BashcompExtractor.filename} (BPFTOOL_CGROUP_ATTACH_TYPES):') # Options for remaining commands diff --git a/tools/testing/selftests/bpf/test_btf.h b/tools/testing/selftests/bpf/test_btf.h index 128989bed8b7..fb4f4714eeb4 100644 --- a/tools/testing/selftests/bpf/test_btf.h +++ b/tools/testing/selftests/bpf/test_btf.h @@ -4,6 +4,8 @@ #ifndef _TEST_BTF_H #define _TEST_BTF_H +#define BTF_END_RAW 0xdeadbeef + #define BTF_INFO_ENC(kind, kind_flag, vlen) \ ((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN)) @@ -39,6 +41,7 @@ #define BTF_MEMBER_ENC(name, type, bits_offset) \ (name), (type), (bits_offset) #define BTF_ENUM_ENC(name, val) (name), (val) +#define BTF_ENUM64_ENC(name, val_lo32, val_hi32) (name), (val_lo32), (val_hi32) #define BTF_MEMBER_OFFSET(bitfield_size, bits_offset) \ ((bitfield_size) << 24 | (bits_offset)) diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index c639f2e56fc5..3561c97701f2 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -1604,11 +1604,8 @@ int main(int argc, char **argv) struct prog_test_def *test = &prog_test_defs[i]; test->test_num = i + 1; - if (should_run(&env.test_selector, - test->test_num, test->test_name)) - test->should_run = true; - else - test->should_run = false; + test->should_run = should_run(&env.test_selector, + test->test_num, test->test_name); if ((test->run_test == NULL && test->run_serial_test == NULL) || (test->run_test != NULL && test->run_serial_test != NULL)) { diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 372579c9f45e..f9d553fbf68a 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -51,12 +51,24 @@ #endif #define MAX_INSNS BPF_MAXINSNS +#define MAX_EXPECTED_INSNS 32 +#define MAX_UNEXPECTED_INSNS 32 #define MAX_TEST_INSNS 1000000 #define MAX_FIXUPS 8 #define MAX_NR_MAPS 23 #define MAX_TEST_RUNS 8 #define POINTER_VALUE 0xcafe4all #define TEST_DATA_LEN 64 +#define MAX_FUNC_INFOS 8 +#define MAX_BTF_STRINGS 256 +#define MAX_BTF_TYPES 256 + +#define INSN_OFF_MASK ((__s16)0xFFFF) +#define INSN_IMM_MASK ((__s32)0xFFFFFFFF) +#define SKIP_INSNS() BPF_RAW_INSN(0xde, 0xa, 0xd, 0xbeef, 0xdeadbeef) + +#define DEFAULT_LIBBPF_LOG_LEVEL 4 +#define VERBOSE_LIBBPF_LOG_LEVEL 1 #define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS (1 << 0) #define F_LOAD_WITH_STRICT_ALIGNMENT (1 << 1) @@ -79,6 +91,23 @@ struct bpf_test { const char *descr; struct bpf_insn insns[MAX_INSNS]; struct bpf_insn *fill_insns; + /* If specified, test engine looks for this sequence of + * instructions in the BPF program after loading. Allows to + * test rewrites applied by verifier. Use values + * INSN_OFF_MASK and INSN_IMM_MASK to mask `off` and `imm` + * fields if content does not matter. The test case fails if + * specified instructions are not found. + * + * The sequence could be split into sub-sequences by adding + * SKIP_INSNS instruction at the end of each sub-sequence. In + * such case sub-sequences are searched for one after another. + */ + struct bpf_insn expected_insns[MAX_EXPECTED_INSNS]; + /* If specified, test engine applies same pattern matching + * logic as for `expected_insns`. If the specified pattern is + * matched test case is marked as failed. + */ + struct bpf_insn unexpected_insns[MAX_UNEXPECTED_INSNS]; int fixup_map_hash_8b[MAX_FIXUPS]; int fixup_map_hash_48b[MAX_FIXUPS]; int fixup_map_hash_16b[MAX_FIXUPS]; @@ -135,6 +164,14 @@ struct bpf_test { }; enum bpf_attach_type expected_attach_type; const char *kfunc; + struct bpf_func_info func_info[MAX_FUNC_INFOS]; + int func_info_cnt; + char btf_strings[MAX_BTF_STRINGS]; + /* A set of BTF types to load when specified, + * use macro definitions from test_btf.h, + * must end with BTF_END_RAW + */ + __u32 btf_types[MAX_BTF_TYPES]; }; /* Note we want this to be 64 bit aligned so that the end of our array is @@ -388,6 +425,45 @@ static void bpf_fill_torturous_jumps(struct bpf_test *self) } } +static void bpf_fill_big_prog_with_loop_1(struct bpf_test *self) +{ + struct bpf_insn *insn = self->fill_insns; + /* This test was added to catch a specific use after free + * error, which happened upon BPF program reallocation. + * Reallocation is handled by core.c:bpf_prog_realloc, which + * reuses old memory if page boundary is not crossed. The + * value of `len` is chosen to cross this boundary on bpf_loop + * patching. + */ + const int len = getpagesize() - 25; + int callback_load_idx; + int callback_idx; + int i = 0; + + insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1); + callback_load_idx = i; + insn[i++] = BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, + BPF_REG_2, BPF_PSEUDO_FUNC, 0, + 777 /* filled below */); + insn[i++] = BPF_RAW_INSN(0, 0, 0, 0, 0); + insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0); + insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0); + insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop); + + while (i < len - 3) + insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0); + insn[i++] = BPF_EXIT_INSN(); + + callback_idx = i; + insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0); + insn[i++] = BPF_EXIT_INSN(); + + insn[callback_load_idx].imm = callback_idx - callback_load_idx - 1; + self->func_info[1].insn_off = callback_idx; + self->prog_len = i; + assert(i == len); +} + /* BPF_SK_LOOKUP contains 13 instructions, if you need to fix up maps */ #define BPF_SK_LOOKUP(func) \ /* struct bpf_sock_tuple tuple = {} */ \ @@ -664,34 +740,66 @@ static __u32 btf_raw_types[] = { BTF_MEMBER_ENC(71, 13, 128), /* struct prog_test_member __kptr_ref *ptr; */ }; -static int load_btf(void) +static char bpf_vlog[UINT_MAX >> 8]; + +static int load_btf_spec(__u32 *types, int types_len, + const char *strings, int strings_len) { struct btf_header hdr = { .magic = BTF_MAGIC, .version = BTF_VERSION, .hdr_len = sizeof(struct btf_header), - .type_len = sizeof(btf_raw_types), - .str_off = sizeof(btf_raw_types), - .str_len = sizeof(btf_str_sec), + .type_len = types_len, + .str_off = types_len, + .str_len = strings_len, }; void *ptr, *raw_btf; int btf_fd; + LIBBPF_OPTS(bpf_btf_load_opts, opts, + .log_buf = bpf_vlog, + .log_size = sizeof(bpf_vlog), + .log_level = (verbose + ? VERBOSE_LIBBPF_LOG_LEVEL + : DEFAULT_LIBBPF_LOG_LEVEL), + ); - ptr = raw_btf = malloc(sizeof(hdr) + sizeof(btf_raw_types) + - sizeof(btf_str_sec)); + raw_btf = malloc(sizeof(hdr) + types_len + strings_len); + ptr = raw_btf; memcpy(ptr, &hdr, sizeof(hdr)); ptr += sizeof(hdr); - memcpy(ptr, btf_raw_types, hdr.type_len); + memcpy(ptr, types, hdr.type_len); ptr += hdr.type_len; - memcpy(ptr, btf_str_sec, hdr.str_len); + memcpy(ptr, strings, hdr.str_len); ptr += hdr.str_len; - btf_fd = bpf_btf_load(raw_btf, ptr - raw_btf, NULL); - free(raw_btf); + btf_fd = bpf_btf_load(raw_btf, ptr - raw_btf, &opts); if (btf_fd < 0) - return -1; - return btf_fd; + printf("Failed to load BTF spec: '%s'\n", strerror(errno)); + + free(raw_btf); + + return btf_fd < 0 ? -1 : btf_fd; +} + +static int load_btf(void) +{ + return load_btf_spec(btf_raw_types, sizeof(btf_raw_types), + btf_str_sec, sizeof(btf_str_sec)); +} + +static int load_btf_for_test(struct bpf_test *test) +{ + int types_num = 0; + + while (types_num < MAX_BTF_TYPES && + test->btf_types[types_num] != BTF_END_RAW) + ++types_num; + + int types_len = types_num * sizeof(test->btf_types[0]); + + return load_btf_spec(test->btf_types, types_len, + test->btf_strings, sizeof(test->btf_strings)); } static int create_map_spin_lock(void) @@ -770,8 +878,6 @@ static int create_map_kptr(void) return fd; } -static char bpf_vlog[UINT_MAX >> 8]; - static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, struct bpf_insn *prog, int *map_fds) { @@ -1126,10 +1232,218 @@ static bool cmp_str_seq(const char *log, const char *exp) return true; } +static int get_xlated_program(int fd_prog, struct bpf_insn **buf, int *cnt) +{ + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + __u32 xlated_prog_len; + __u32 buf_element_size = sizeof(struct bpf_insn); + + if (bpf_obj_get_info_by_fd(fd_prog, &info, &info_len)) { + perror("bpf_obj_get_info_by_fd failed"); + return -1; + } + + xlated_prog_len = info.xlated_prog_len; + if (xlated_prog_len % buf_element_size) { + printf("Program length %d is not multiple of %d\n", + xlated_prog_len, buf_element_size); + return -1; + } + + *cnt = xlated_prog_len / buf_element_size; + *buf = calloc(*cnt, buf_element_size); + if (!buf) { + perror("can't allocate xlated program buffer"); + return -ENOMEM; + } + + bzero(&info, sizeof(info)); + info.xlated_prog_len = xlated_prog_len; + info.xlated_prog_insns = (__u64)*buf; + if (bpf_obj_get_info_by_fd(fd_prog, &info, &info_len)) { + perror("second bpf_obj_get_info_by_fd failed"); + goto out_free_buf; + } + + return 0; + +out_free_buf: + free(*buf); + return -1; +} + +static bool is_null_insn(struct bpf_insn *insn) +{ + struct bpf_insn null_insn = {}; + + return memcmp(insn, &null_insn, sizeof(null_insn)) == 0; +} + +static bool is_skip_insn(struct bpf_insn *insn) +{ + struct bpf_insn skip_insn = SKIP_INSNS(); + + return memcmp(insn, &skip_insn, sizeof(skip_insn)) == 0; +} + +static int null_terminated_insn_len(struct bpf_insn *seq, int max_len) +{ + int i; + + for (i = 0; i < max_len; ++i) { + if (is_null_insn(&seq[i])) + return i; + } + return max_len; +} + +static bool compare_masked_insn(struct bpf_insn *orig, struct bpf_insn *masked) +{ + struct bpf_insn orig_masked; + + memcpy(&orig_masked, orig, sizeof(orig_masked)); + if (masked->imm == INSN_IMM_MASK) + orig_masked.imm = INSN_IMM_MASK; + if (masked->off == INSN_OFF_MASK) + orig_masked.off = INSN_OFF_MASK; + + return memcmp(&orig_masked, masked, sizeof(orig_masked)) == 0; +} + +static int find_insn_subseq(struct bpf_insn *seq, struct bpf_insn *subseq, + int seq_len, int subseq_len) +{ + int i, j; + + if (subseq_len > seq_len) + return -1; + + for (i = 0; i < seq_len - subseq_len + 1; ++i) { + bool found = true; + + for (j = 0; j < subseq_len; ++j) { + if (!compare_masked_insn(&seq[i + j], &subseq[j])) { + found = false; + break; + } + } + if (found) + return i; + } + + return -1; +} + +static int find_skip_insn_marker(struct bpf_insn *seq, int len) +{ + int i; + + for (i = 0; i < len; ++i) + if (is_skip_insn(&seq[i])) + return i; + + return -1; +} + +/* Return true if all sub-sequences in `subseqs` could be found in + * `seq` one after another. Sub-sequences are separated by a single + * nil instruction. + */ +static bool find_all_insn_subseqs(struct bpf_insn *seq, struct bpf_insn *subseqs, + int seq_len, int max_subseqs_len) +{ + int subseqs_len = null_terminated_insn_len(subseqs, max_subseqs_len); + + while (subseqs_len > 0) { + int skip_idx = find_skip_insn_marker(subseqs, subseqs_len); + int cur_subseq_len = skip_idx < 0 ? subseqs_len : skip_idx; + int subseq_idx = find_insn_subseq(seq, subseqs, + seq_len, cur_subseq_len); + + if (subseq_idx < 0) + return false; + seq += subseq_idx + cur_subseq_len; + seq_len -= subseq_idx + cur_subseq_len; + subseqs += cur_subseq_len + 1; + subseqs_len -= cur_subseq_len + 1; + } + + return true; +} + +static void print_insn(struct bpf_insn *buf, int cnt) +{ + int i; + + printf(" addr op d s off imm\n"); + for (i = 0; i < cnt; ++i) { + struct bpf_insn *insn = &buf[i]; + + if (is_null_insn(insn)) + break; + + if (is_skip_insn(insn)) + printf(" ...\n"); + else + printf(" %04x: %02x %1x %x %04hx %08x\n", + i, insn->code, insn->dst_reg, + insn->src_reg, insn->off, insn->imm); + } +} + +static bool check_xlated_program(struct bpf_test *test, int fd_prog) +{ + struct bpf_insn *buf; + int cnt; + bool result = true; + bool check_expected = !is_null_insn(test->expected_insns); + bool check_unexpected = !is_null_insn(test->unexpected_insns); + + if (!check_expected && !check_unexpected) + goto out; + + if (get_xlated_program(fd_prog, &buf, &cnt)) { + printf("FAIL: can't get xlated program\n"); + result = false; + goto out; + } + + if (check_expected && + !find_all_insn_subseqs(buf, test->expected_insns, + cnt, MAX_EXPECTED_INSNS)) { + printf("FAIL: can't find expected subsequence of instructions\n"); + result = false; + if (verbose) { + printf("Program:\n"); + print_insn(buf, cnt); + printf("Expected subsequence:\n"); + print_insn(test->expected_insns, MAX_EXPECTED_INSNS); + } + } + + if (check_unexpected && + find_all_insn_subseqs(buf, test->unexpected_insns, + cnt, MAX_UNEXPECTED_INSNS)) { + printf("FAIL: found unexpected subsequence of instructions\n"); + result = false; + if (verbose) { + printf("Program:\n"); + print_insn(buf, cnt); + printf("Un-expected subsequence:\n"); + print_insn(test->unexpected_insns, MAX_UNEXPECTED_INSNS); + } + } + + free(buf); + out: + return result; +} + static void do_test_single(struct bpf_test *test, bool unpriv, int *passes, int *errors) { - int fd_prog, expected_ret, alignment_prevented_execution; + int fd_prog, btf_fd, expected_ret, alignment_prevented_execution; int prog_len, prog_type = test->prog_type; struct bpf_insn *prog = test->insns; LIBBPF_OPTS(bpf_prog_load_opts, opts); @@ -1141,8 +1455,10 @@ static void do_test_single(struct bpf_test *test, bool unpriv, __u32 pflags; int i, err; + fd_prog = -1; for (i = 0; i < MAX_NR_MAPS; i++) map_fds[i] = -1; + btf_fd = -1; if (!prog_type) prog_type = BPF_PROG_TYPE_SOCKET_FILTER; @@ -1175,11 +1491,11 @@ static void do_test_single(struct bpf_test *test, bool unpriv, opts.expected_attach_type = test->expected_attach_type; if (verbose) - opts.log_level = 1; + opts.log_level = VERBOSE_LIBBPF_LOG_LEVEL; else if (expected_ret == VERBOSE_ACCEPT) opts.log_level = 2; else - opts.log_level = 4; + opts.log_level = DEFAULT_LIBBPF_LOG_LEVEL; opts.prog_flags = pflags; if (prog_type == BPF_PROG_TYPE_TRACING && test->kfunc) { @@ -1197,6 +1513,19 @@ static void do_test_single(struct bpf_test *test, bool unpriv, opts.attach_btf_id = attach_btf_id; } + if (test->btf_types[0] != 0) { + btf_fd = load_btf_for_test(test); + if (btf_fd < 0) + goto fail_log; + opts.prog_btf_fd = btf_fd; + } + + if (test->func_info_cnt != 0) { + opts.func_info = test->func_info; + opts.func_info_cnt = test->func_info_cnt; + opts.func_info_rec_size = sizeof(test->func_info[0]); + } + opts.log_buf = bpf_vlog; opts.log_size = sizeof(bpf_vlog); fd_prog = bpf_prog_load(prog_type, NULL, "GPL", prog, prog_len, &opts); @@ -1262,6 +1591,9 @@ static void do_test_single(struct bpf_test *test, bool unpriv, if (verbose) printf(", verifier log:\n%s", bpf_vlog); + if (!check_xlated_program(test, fd_prog)) + goto fail_log; + run_errs = 0; run_successes = 0; if (!alignment_prevented_execution && fd_prog >= 0 && test->runs >= 0) { @@ -1305,6 +1637,7 @@ close_fds: if (test->fill_insns) free(test->fill_insns); close(fd_prog); + close(btf_fd); for (i = 0; i < MAX_NR_MAPS; i++) close(map_fds[i]); sched_yield(); diff --git a/tools/testing/selftests/bpf/test_xdp_veth.sh b/tools/testing/selftests/bpf/test_xdp_veth.sh index 392d28cc4e58..49936c4c8567 100755 --- a/tools/testing/selftests/bpf/test_xdp_veth.sh +++ b/tools/testing/selftests/bpf/test_xdp_veth.sh @@ -106,9 +106,9 @@ bpftool prog loadall \ bpftool map update pinned $BPF_DIR/maps/tx_port key 0 0 0 0 value 122 0 0 0 bpftool map update pinned $BPF_DIR/maps/tx_port key 1 0 0 0 value 133 0 0 0 bpftool map update pinned $BPF_DIR/maps/tx_port key 2 0 0 0 value 111 0 0 0 -ip link set dev veth1 xdp pinned $BPF_DIR/progs/redirect_map_0 -ip link set dev veth2 xdp pinned $BPF_DIR/progs/redirect_map_1 -ip link set dev veth3 xdp pinned $BPF_DIR/progs/redirect_map_2 +ip link set dev veth1 xdp pinned $BPF_DIR/progs/xdp_redirect_map_0 +ip link set dev veth2 xdp pinned $BPF_DIR/progs/xdp_redirect_map_1 +ip link set dev veth3 xdp pinned $BPF_DIR/progs/xdp_redirect_map_2 ip -n ${NS1} link set dev veth11 xdp obj xdp_dummy.o sec xdp ip -n ${NS2} link set dev veth22 xdp obj xdp_tx.o sec xdp diff --git a/tools/testing/selftests/bpf/test_xdping.sh b/tools/testing/selftests/bpf/test_xdping.sh index c2f0ddb45531..c3d82e0a7378 100755 --- a/tools/testing/selftests/bpf/test_xdping.sh +++ b/tools/testing/selftests/bpf/test_xdping.sh @@ -95,5 +95,9 @@ for server_args in "" "-I veth0 -s -S" ; do test "$client_args" "$server_args" done +# Test drv mode +test "-I veth1 -N" "-I veth0 -s -N" +test "-I veth1 -N -c 10" "-I veth0 -s -N" + echo "OK. All tests passed" exit 0 diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh index 567500299231..096a957594cd 100755 --- a/tools/testing/selftests/bpf/test_xsk.sh +++ b/tools/testing/selftests/bpf/test_xsk.sh @@ -47,7 +47,7 @@ # conflict with any existing interface # * tests the veth and xsk layers of the topology # -# See the source xdpxceiver.c for information on each test +# See the source xskxceiver.c for information on each test # # Kernel configuration: # --------------------- @@ -160,14 +160,14 @@ statusList=() TEST_NAME="XSK_SELFTESTS_SOFTIRQ" -execxdpxceiver +exec_xskxceiver cleanup_exit ${VETH0} ${VETH1} ${NS1} TEST_NAME="XSK_SELFTESTS_BUSY_POLL" busy_poll=1 setup_vethPairs -execxdpxceiver +exec_xskxceiver ## END TESTS diff --git a/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c b/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c new file mode 100644 index 000000000000..a535d41dc20d --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c @@ -0,0 +1,264 @@ +#define BTF_TYPES \ + .btf_strings = "\0int\0i\0ctx\0callback\0main\0", \ + .btf_types = { \ + /* 1: int */ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), \ + /* 2: int* */ BTF_PTR_ENC(1), \ + /* 3: void* */ BTF_PTR_ENC(0), \ + /* 4: int __(void*) */ BTF_FUNC_PROTO_ENC(1, 1), \ + BTF_FUNC_PROTO_ARG_ENC(7, 3), \ + /* 5: int __(int, int*) */ BTF_FUNC_PROTO_ENC(1, 2), \ + BTF_FUNC_PROTO_ARG_ENC(5, 1), \ + BTF_FUNC_PROTO_ARG_ENC(7, 2), \ + /* 6: main */ BTF_FUNC_ENC(20, 4), \ + /* 7: callback */ BTF_FUNC_ENC(11, 5), \ + BTF_END_RAW \ + } + +#define MAIN_TYPE 6 +#define CALLBACK_TYPE 7 + +/* can't use BPF_CALL_REL, jit_subprogs adjusts IMM & OFF + * fields for pseudo calls + */ +#define PSEUDO_CALL_INSN() \ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_CALL, \ + INSN_OFF_MASK, INSN_IMM_MASK) + +/* can't use BPF_FUNC_loop constant, + * do_mix_fixups adjusts the IMM field + */ +#define HELPER_CALL_INSN() \ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, INSN_OFF_MASK, INSN_IMM_MASK) + +{ + "inline simple bpf_loop call", + .insns = { + /* main */ + /* force verifier state branching to verify logic on first and + * subsequent bpf_loop insn processing steps + */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 777, 2), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 2), + + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 6), + BPF_RAW_INSN(0, 0, 0, 0, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* callback */ + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .expected_insns = { PSEUDO_CALL_INSN() }, + .unexpected_insns = { HELPER_CALL_INSN() }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + .runs = 0, + .func_info = { { 0, MAIN_TYPE }, { 12, CALLBACK_TYPE } }, + .func_info_cnt = 2, + BTF_TYPES +}, +{ + "don't inline bpf_loop call, flags non-zero", + .insns = { + /* main */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64), + BPF_ALU64_REG(BPF_MOV, BPF_REG_7, BPF_REG_0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 9), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1), + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 7), + BPF_RAW_INSN(0, 0, 0, 0, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, -10), + /* callback */ + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .expected_insns = { HELPER_CALL_INSN() }, + .unexpected_insns = { PSEUDO_CALL_INSN() }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + .runs = 0, + .func_info = { { 0, MAIN_TYPE }, { 16, CALLBACK_TYPE } }, + .func_info_cnt = 2, + BTF_TYPES +}, +{ + "don't inline bpf_loop call, callback non-constant", + .insns = { + /* main */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 777, 4), /* pick a random callback */ + + BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1), + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 10), + BPF_RAW_INSN(0, 0, 0, 0, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 3), + + BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1), + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 8), + BPF_RAW_INSN(0, 0, 0, 0, 0), + + BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* callback */ + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1), + BPF_EXIT_INSN(), + /* callback #2 */ + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .expected_insns = { HELPER_CALL_INSN() }, + .unexpected_insns = { PSEUDO_CALL_INSN() }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + .runs = 0, + .func_info = { + { 0, MAIN_TYPE }, + { 14, CALLBACK_TYPE }, + { 16, CALLBACK_TYPE } + }, + .func_info_cnt = 3, + BTF_TYPES +}, +{ + "bpf_loop_inline and a dead func", + .insns = { + /* main */ + + /* A reference to callback #1 to make verifier count it as a func. + * This reference is overwritten below and callback #1 is dead. + */ + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 9), + BPF_RAW_INSN(0, 0, 0, 0, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1), + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 8), + BPF_RAW_INSN(0, 0, 0, 0, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* callback */ + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1), + BPF_EXIT_INSN(), + /* callback #2 */ + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .expected_insns = { PSEUDO_CALL_INSN() }, + .unexpected_insns = { HELPER_CALL_INSN() }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + .runs = 0, + .func_info = { + { 0, MAIN_TYPE }, + { 10, CALLBACK_TYPE }, + { 12, CALLBACK_TYPE } + }, + .func_info_cnt = 3, + BTF_TYPES +}, +{ + "bpf_loop_inline stack locations for loop vars", + .insns = { + /* main */ + BPF_ST_MEM(BPF_W, BPF_REG_10, -12, 0x77), + /* bpf_loop call #1 */ + BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1), + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 22), + BPF_RAW_INSN(0, 0, 0, 0, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop), + /* bpf_loop call #2 */ + BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 2), + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 16), + BPF_RAW_INSN(0, 0, 0, 0, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop), + /* call func and exit */ + BPF_CALL_REL(2), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* func */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -32, 0x55), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 2), + BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 6), + BPF_RAW_INSN(0, 0, 0, 0, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* callback */ + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .expected_insns = { + BPF_ST_MEM(BPF_W, BPF_REG_10, -12, 0x77), + SKIP_INSNS(), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -40), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, -32), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, -24), + SKIP_INSNS(), + /* offsets are the same as in the first call */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -40), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, -32), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, -24), + SKIP_INSNS(), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -32, 0x55), + SKIP_INSNS(), + /* offsets differ from main because of different offset + * in BPF_ST_MEM instruction + */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -56), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, -48), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, -40), + }, + .unexpected_insns = { HELPER_CALL_INSN() }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + .func_info = { + { 0, MAIN_TYPE }, + { 16, MAIN_TYPE }, + { 25, CALLBACK_TYPE }, + }, + .func_info_cnt = 3, + BTF_TYPES +}, +{ + "inline bpf_loop call in a big program", + .insns = {}, + .fill_helper = bpf_fill_big_prog_with_loop_1, + .expected_insns = { PSEUDO_CALL_INSN() }, + .unexpected_insns = { HELPER_CALL_INSN() }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info = { { 0, MAIN_TYPE }, { 16, CALLBACK_TYPE } }, + .func_info_cnt = 2, + BTF_TYPES +}, + +#undef HELPER_CALL_INSN +#undef PSEUDO_CALL_INSN +#undef CALLBACK_TYPE +#undef MAIN_TYPE +#undef BTF_TYPES diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index 743ed34c1238..3fb4f69b1962 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -219,6 +219,59 @@ .errstr = "variable ptr_ access var_off=(0x0; 0x7) disallowed", }, { + "calls: invalid kfunc call: referenced arg needs refcounted PTR_TO_BTF_ID", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_kfunc_btf_id = { + { "bpf_kfunc_call_test_acquire", 3 }, + { "bpf_kfunc_call_test_ref", 8 }, + { "bpf_kfunc_call_test_ref", 10 }, + }, + .result_unpriv = REJECT, + .result = REJECT, + .errstr = "R1 must be referenced", +}, +{ + "calls: valid kfunc call: referenced arg needs refcounted PTR_TO_BTF_ID", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_kfunc_btf_id = { + { "bpf_kfunc_call_test_acquire", 3 }, + { "bpf_kfunc_call_test_ref", 8 }, + { "bpf_kfunc_call_test_release", 10 }, + }, + .result_unpriv = REJECT, + .result = ACCEPT, +}, +{ "calls: basic sanity", .insns = { BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), diff --git a/tools/testing/selftests/bpf/verifier/jmp32.c b/tools/testing/selftests/bpf/verifier/jmp32.c index 6ddc418fdfaf..1a27a6210554 100644 --- a/tools/testing/selftests/bpf/verifier/jmp32.c +++ b/tools/testing/selftests/bpf/verifier/jmp32.c @@ -864,3 +864,24 @@ .result = ACCEPT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "jeq32/jne32: bounds checking", + .insns = { + BPF_MOV64_IMM(BPF_REG_6, 563), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0), + BPF_ALU32_REG(BPF_OR, BPF_REG_2, BPF_REG_6), + BPF_JMP32_IMM(BPF_JNE, BPF_REG_2, 8, 5), + BPF_JMP_IMM(BPF_JSGE, BPF_REG_2, 500, 2), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_4), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 1, +}, diff --git a/tools/testing/selftests/bpf/verifier/jump.c b/tools/testing/selftests/bpf/verifier/jump.c index 6f951d1ff0a4..497fe17d2eaf 100644 --- a/tools/testing/selftests/bpf/verifier/jump.c +++ b/tools/testing/selftests/bpf/verifier/jump.c @@ -373,3 +373,25 @@ .result = ACCEPT, .retval = 3, }, +{ + "jump & dead code elimination", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_3, 0), + BPF_ALU64_IMM(BPF_NEG, BPF_REG_3, 0), + BPF_ALU64_IMM(BPF_OR, BPF_REG_3, 32767), + BPF_JMP_IMM(BPF_JSGE, BPF_REG_3, 0, 1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 0x8000, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -32767), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_JMP_IMM(BPF_JLE, BPF_REG_3, 0, 1), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_4), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = 2, +}, diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh index e0bb04a97e10..b86ae4a2e5c5 100755 --- a/tools/testing/selftests/bpf/vmtest.sh +++ b/tools/testing/selftests/bpf/vmtest.sh @@ -30,8 +30,7 @@ DEFAULT_COMMAND="./test_progs" MOUNT_DIR="mnt" ROOTFS_IMAGE="root.img" OUTPUT_DIR="$HOME/.bpf_selftests" -KCONFIG_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/config-latest.${ARCH}" -KCONFIG_API_URL="https://api.github.com/repos/libbpf/libbpf/contents/travis-ci/vmtest/configs/config-latest.${ARCH}" +KCONFIG_REL_PATHS=("tools/testing/selftests/bpf/config" "tools/testing/selftests/bpf/config.${ARCH}") INDEX_URL="https://raw.githubusercontent.com/libbpf/ci/master/INDEX" NUM_COMPILE_JOBS="$(nproc)" LOG_FILE_BASE="$(date +"bpf_selftests.%Y-%m-%d_%H-%M-%S")" @@ -269,26 +268,42 @@ is_rel_path() [[ ${path:0:1} != "/" ]] } +do_update_kconfig() +{ + local kernel_checkout="$1" + local kconfig_file="$2" + + rm -f "$kconfig_file" 2> /dev/null + + for config in "${KCONFIG_REL_PATHS[@]}"; do + local kconfig_src="${kernel_checkout}/${config}" + cat "$kconfig_src" >> "$kconfig_file" + done +} + update_kconfig() { - local kconfig_file="$1" - local update_command="curl -sLf ${KCONFIG_URL} -o ${kconfig_file}" - # Github does not return the "last-modified" header when retrieving the - # raw contents of the file. Use the API call to get the last-modified - # time of the kernel config and only update the config if it has been - # updated after the previously cached config was created. This avoids - # unnecessarily compiling the kernel and selftests. - if [[ -f "${kconfig_file}" ]]; then - local last_modified_date="$(curl -sL -D - "${KCONFIG_API_URL}" -o /dev/null | \ - grep "last-modified" | awk -F ': ' '{print $2}')" - local remote_modified_timestamp="$(date -d "${last_modified_date}" +"%s")" - local local_creation_timestamp="$(stat -c %Y "${kconfig_file}")" + local kernel_checkout="$1" + local kconfig_file="$2" - if [[ "${remote_modified_timestamp}" -gt "${local_creation_timestamp}" ]]; then - ${update_command} - fi + if [[ -f "${kconfig_file}" ]]; then + local local_modified="$(stat -c %Y "${kconfig_file}")" + + for config in "${KCONFIG_REL_PATHS[@]}"; do + local kconfig_src="${kernel_checkout}/${config}" + local src_modified="$(stat -c %Y "${kconfig_src}")" + # Only update the config if it has been updated after the + # previously cached config was created. This avoids + # unnecessarily compiling the kernel and selftests. + if [[ "${src_modified}" -gt "${local_modified}" ]]; then + do_update_kconfig "$kernel_checkout" "$kconfig_file" + # Once we have found one outdated configuration + # there is no need to check other ones. + break + fi + done else - ${update_command} + do_update_kconfig "$kernel_checkout" "$kconfig_file" fi } @@ -372,7 +387,7 @@ main() mkdir -p "${OUTPUT_DIR}" mkdir -p "${mount_dir}" - update_kconfig "${kconfig_file}" + update_kconfig "${kernel_checkout}" "${kconfig_file}" recompile_kernel "${kernel_checkout}" "${make_command}" diff --git a/tools/testing/selftests/bpf/xdp_synproxy.c b/tools/testing/selftests/bpf/xdp_synproxy.c new file mode 100644 index 000000000000..d874ddfb39c4 --- /dev/null +++ b/tools/testing/selftests/bpf/xdp_synproxy.c @@ -0,0 +1,466 @@ +// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include <stdnoreturn.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <getopt.h> +#include <signal.h> +#include <sys/types.h> +#include <bpf/bpf.h> +#include <bpf/libbpf.h> +#include <net/if.h> +#include <linux/if_link.h> +#include <linux/limits.h> + +static unsigned int ifindex; +static __u32 attached_prog_id; +static bool attached_tc; + +static void noreturn cleanup(int sig) +{ + LIBBPF_OPTS(bpf_xdp_attach_opts, opts); + int prog_fd; + int err; + + if (attached_prog_id == 0) + exit(0); + + if (attached_tc) { + LIBBPF_OPTS(bpf_tc_hook, hook, + .ifindex = ifindex, + .attach_point = BPF_TC_INGRESS); + + err = bpf_tc_hook_destroy(&hook); + if (err < 0) { + fprintf(stderr, "Error: bpf_tc_hook_destroy: %s\n", strerror(-err)); + fprintf(stderr, "Failed to destroy the TC hook\n"); + exit(1); + } + exit(0); + } + + prog_fd = bpf_prog_get_fd_by_id(attached_prog_id); + if (prog_fd < 0) { + fprintf(stderr, "Error: bpf_prog_get_fd_by_id: %s\n", strerror(-prog_fd)); + err = bpf_xdp_attach(ifindex, -1, 0, NULL); + if (err < 0) { + fprintf(stderr, "Error: bpf_set_link_xdp_fd: %s\n", strerror(-err)); + fprintf(stderr, "Failed to detach XDP program\n"); + exit(1); + } + } else { + opts.old_prog_fd = prog_fd; + err = bpf_xdp_attach(ifindex, -1, XDP_FLAGS_REPLACE, &opts); + close(prog_fd); + if (err < 0) { + fprintf(stderr, "Error: bpf_set_link_xdp_fd_opts: %s\n", strerror(-err)); + /* Not an error if already replaced by someone else. */ + if (err != -EEXIST) { + fprintf(stderr, "Failed to detach XDP program\n"); + exit(1); + } + } + } + exit(0); +} + +static noreturn void usage(const char *progname) +{ + fprintf(stderr, "Usage: %s [--iface <iface>|--prog <prog_id>] [--mss4 <mss ipv4> --mss6 <mss ipv6> --wscale <wscale> --ttl <ttl>] [--ports <port1>,<port2>,...] [--single] [--tc]\n", + progname); + exit(1); +} + +static unsigned long parse_arg_ul(const char *progname, const char *arg, unsigned long limit) +{ + unsigned long res; + char *endptr; + + errno = 0; + res = strtoul(arg, &endptr, 10); + if (errno != 0 || *endptr != '\0' || arg[0] == '\0' || res > limit) + usage(progname); + + return res; +} + +static void parse_options(int argc, char *argv[], unsigned int *ifindex, __u32 *prog_id, + __u64 *tcpipopts, char **ports, bool *single, bool *tc) +{ + static struct option long_options[] = { + { "help", no_argument, NULL, 'h' }, + { "iface", required_argument, NULL, 'i' }, + { "prog", required_argument, NULL, 'x' }, + { "mss4", required_argument, NULL, 4 }, + { "mss6", required_argument, NULL, 6 }, + { "wscale", required_argument, NULL, 'w' }, + { "ttl", required_argument, NULL, 't' }, + { "ports", required_argument, NULL, 'p' }, + { "single", no_argument, NULL, 's' }, + { "tc", no_argument, NULL, 'c' }, + { NULL, 0, NULL, 0 }, + }; + unsigned long mss4, mss6, wscale, ttl; + unsigned int tcpipopts_mask = 0; + + if (argc < 2) + usage(argv[0]); + + *ifindex = 0; + *prog_id = 0; + *tcpipopts = 0; + *ports = NULL; + *single = false; + + while (true) { + int opt; + + opt = getopt_long(argc, argv, "", long_options, NULL); + if (opt == -1) + break; + + switch (opt) { + case 'h': + usage(argv[0]); + break; + case 'i': + *ifindex = if_nametoindex(optarg); + if (*ifindex == 0) + usage(argv[0]); + break; + case 'x': + *prog_id = parse_arg_ul(argv[0], optarg, UINT32_MAX); + if (*prog_id == 0) + usage(argv[0]); + break; + case 4: + mss4 = parse_arg_ul(argv[0], optarg, UINT16_MAX); + tcpipopts_mask |= 1 << 0; + break; + case 6: + mss6 = parse_arg_ul(argv[0], optarg, UINT16_MAX); + tcpipopts_mask |= 1 << 1; + break; + case 'w': + wscale = parse_arg_ul(argv[0], optarg, 14); + tcpipopts_mask |= 1 << 2; + break; + case 't': + ttl = parse_arg_ul(argv[0], optarg, UINT8_MAX); + tcpipopts_mask |= 1 << 3; + break; + case 'p': + *ports = optarg; + break; + case 's': + *single = true; + break; + case 'c': + *tc = true; + break; + default: + usage(argv[0]); + } + } + if (optind < argc) + usage(argv[0]); + + if (tcpipopts_mask == 0xf) { + if (mss4 == 0 || mss6 == 0 || wscale == 0 || ttl == 0) + usage(argv[0]); + *tcpipopts = (mss6 << 32) | (ttl << 24) | (wscale << 16) | mss4; + } else if (tcpipopts_mask != 0) { + usage(argv[0]); + } + + if (*ifindex != 0 && *prog_id != 0) + usage(argv[0]); + if (*ifindex == 0 && *prog_id == 0) + usage(argv[0]); +} + +static int syncookie_attach(const char *argv0, unsigned int ifindex, bool tc) +{ + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + char xdp_filename[PATH_MAX]; + struct bpf_program *prog; + struct bpf_object *obj; + int prog_fd; + int err; + + snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv0); + obj = bpf_object__open_file(xdp_filename, NULL); + err = libbpf_get_error(obj); + if (err < 0) { + fprintf(stderr, "Error: bpf_object__open_file: %s\n", strerror(-err)); + return err; + } + + err = bpf_object__load(obj); + if (err < 0) { + fprintf(stderr, "Error: bpf_object__open_file: %s\n", strerror(-err)); + return err; + } + + prog = bpf_object__find_program_by_name(obj, tc ? "syncookie_tc" : "syncookie_xdp"); + if (!prog) { + fprintf(stderr, "Error: bpf_object__find_program_by_name: program was not found\n"); + return -ENOENT; + } + + prog_fd = bpf_program__fd(prog); + + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (err < 0) { + fprintf(stderr, "Error: bpf_obj_get_info_by_fd: %s\n", strerror(-err)); + goto out; + } + attached_tc = tc; + attached_prog_id = info.id; + signal(SIGINT, cleanup); + signal(SIGTERM, cleanup); + if (tc) { + LIBBPF_OPTS(bpf_tc_hook, hook, + .ifindex = ifindex, + .attach_point = BPF_TC_INGRESS); + LIBBPF_OPTS(bpf_tc_opts, opts, + .handle = 1, + .priority = 1, + .prog_fd = prog_fd); + + err = bpf_tc_hook_create(&hook); + if (err < 0) { + fprintf(stderr, "Error: bpf_tc_hook_create: %s\n", + strerror(-err)); + goto fail; + } + err = bpf_tc_attach(&hook, &opts); + if (err < 0) { + fprintf(stderr, "Error: bpf_tc_attach: %s\n", + strerror(-err)); + goto fail; + } + + } else { + err = bpf_xdp_attach(ifindex, prog_fd, + XDP_FLAGS_UPDATE_IF_NOEXIST, NULL); + if (err < 0) { + fprintf(stderr, "Error: bpf_set_link_xdp_fd: %s\n", + strerror(-err)); + goto fail; + } + } + err = 0; +out: + bpf_object__close(obj); + return err; +fail: + signal(SIGINT, SIG_DFL); + signal(SIGTERM, SIG_DFL); + attached_prog_id = 0; + goto out; +} + +static int syncookie_open_bpf_maps(__u32 prog_id, int *values_map_fd, int *ports_map_fd) +{ + struct bpf_prog_info prog_info; + __u32 map_ids[8]; + __u32 info_len; + int prog_fd; + int err; + int i; + + *values_map_fd = -1; + *ports_map_fd = -1; + + prog_fd = bpf_prog_get_fd_by_id(prog_id); + if (prog_fd < 0) { + fprintf(stderr, "Error: bpf_prog_get_fd_by_id: %s\n", strerror(-prog_fd)); + return prog_fd; + } + + prog_info = (struct bpf_prog_info) { + .nr_map_ids = 8, + .map_ids = (__u64)map_ids, + }; + info_len = sizeof(prog_info); + + err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len); + if (err != 0) { + fprintf(stderr, "Error: bpf_obj_get_info_by_fd: %s\n", strerror(-err)); + goto out; + } + + if (prog_info.nr_map_ids < 2) { + fprintf(stderr, "Error: Found %u BPF maps, expected at least 2\n", + prog_info.nr_map_ids); + err = -ENOENT; + goto out; + } + + for (i = 0; i < prog_info.nr_map_ids; i++) { + struct bpf_map_info map_info = {}; + int map_fd; + + err = bpf_map_get_fd_by_id(map_ids[i]); + if (err < 0) { + fprintf(stderr, "Error: bpf_map_get_fd_by_id: %s\n", strerror(-err)); + goto err_close_map_fds; + } + map_fd = err; + + info_len = sizeof(map_info); + err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len); + if (err != 0) { + fprintf(stderr, "Error: bpf_obj_get_info_by_fd: %s\n", strerror(-err)); + close(map_fd); + goto err_close_map_fds; + } + if (strcmp(map_info.name, "values") == 0) { + *values_map_fd = map_fd; + continue; + } + if (strcmp(map_info.name, "allowed_ports") == 0) { + *ports_map_fd = map_fd; + continue; + } + close(map_fd); + } + + if (*values_map_fd != -1 && *ports_map_fd != -1) { + err = 0; + goto out; + } + + err = -ENOENT; + +err_close_map_fds: + if (*values_map_fd != -1) + close(*values_map_fd); + if (*ports_map_fd != -1) + close(*ports_map_fd); + *values_map_fd = -1; + *ports_map_fd = -1; + +out: + close(prog_fd); + return err; +} + +int main(int argc, char *argv[]) +{ + int values_map_fd, ports_map_fd; + __u64 tcpipopts; + bool firstiter; + __u64 prevcnt; + __u32 prog_id; + char *ports; + bool single; + int err = 0; + bool tc; + + parse_options(argc, argv, &ifindex, &prog_id, &tcpipopts, &ports, + &single, &tc); + + if (prog_id == 0) { + if (!tc) { + err = bpf_xdp_query_id(ifindex, 0, &prog_id); + if (err < 0) { + fprintf(stderr, "Error: bpf_get_link_xdp_id: %s\n", + strerror(-err)); + goto out; + } + } + if (prog_id == 0) { + err = syncookie_attach(argv[0], ifindex, tc); + if (err < 0) + goto out; + prog_id = attached_prog_id; + } + } + + err = syncookie_open_bpf_maps(prog_id, &values_map_fd, &ports_map_fd); + if (err < 0) + goto out; + + if (ports) { + __u16 port_last = 0; + __u32 port_idx = 0; + char *p = ports; + + fprintf(stderr, "Replacing allowed ports\n"); + + while (p && *p != '\0') { + char *token = strsep(&p, ","); + __u16 port; + + port = parse_arg_ul(argv[0], token, UINT16_MAX); + err = bpf_map_update_elem(ports_map_fd, &port_idx, &port, BPF_ANY); + if (err != 0) { + fprintf(stderr, "Error: bpf_map_update_elem: %s\n", strerror(-err)); + fprintf(stderr, "Failed to add port %u (index %u)\n", + port, port_idx); + goto out_close_maps; + } + fprintf(stderr, "Added port %u\n", port); + port_idx++; + } + err = bpf_map_update_elem(ports_map_fd, &port_idx, &port_last, BPF_ANY); + if (err != 0) { + fprintf(stderr, "Error: bpf_map_update_elem: %s\n", strerror(-err)); + fprintf(stderr, "Failed to add the terminator value 0 (index %u)\n", + port_idx); + goto out_close_maps; + } + } + + if (tcpipopts) { + __u32 key = 0; + + fprintf(stderr, "Replacing TCP/IP options\n"); + + err = bpf_map_update_elem(values_map_fd, &key, &tcpipopts, BPF_ANY); + if (err != 0) { + fprintf(stderr, "Error: bpf_map_update_elem: %s\n", strerror(-err)); + goto out_close_maps; + } + } + + if ((ports || tcpipopts) && attached_prog_id == 0 && !single) + goto out_close_maps; + + prevcnt = 0; + firstiter = true; + while (true) { + __u32 key = 1; + __u64 value; + + err = bpf_map_lookup_elem(values_map_fd, &key, &value); + if (err != 0) { + fprintf(stderr, "Error: bpf_map_lookup_elem: %s\n", strerror(-err)); + goto out_close_maps; + } + if (firstiter) { + prevcnt = value; + firstiter = false; + } + if (single) { + printf("Total SYNACKs generated: %llu\n", value); + break; + } + printf("SYNACKs generated: %llu (total %llu)\n", value - prevcnt, value); + prevcnt = value; + sleep(1); + } + +out_close_maps: + close(values_map_fd); + close(ports_map_fd); +out: + return err == 0 ? 0 : 1; +} diff --git a/tools/testing/selftests/bpf/xsk.c b/tools/testing/selftests/bpf/xsk.c new file mode 100644 index 000000000000..f2721a4ae7c5 --- /dev/null +++ b/tools/testing/selftests/bpf/xsk.c @@ -0,0 +1,1268 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +/* + * AF_XDP user-space access library. + * + * Copyright(c) 2018 - 2019 Intel Corporation. + * + * Author(s): Magnus Karlsson <magnus.karlsson@intel.com> + */ + +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <arpa/inet.h> +#include <asm/barrier.h> +#include <linux/compiler.h> +#include <linux/ethtool.h> +#include <linux/filter.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/if_xdp.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/sockios.h> +#include <net/if.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <linux/if_link.h> + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> +#include "xsk.h" + +#ifndef SOL_XDP + #define SOL_XDP 283 +#endif + +#ifndef AF_XDP + #define AF_XDP 44 +#endif + +#ifndef PF_XDP + #define PF_XDP AF_XDP +#endif + +#define pr_warn(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__) + +enum xsk_prog { + XSK_PROG_FALLBACK, + XSK_PROG_REDIRECT_FLAGS, +}; + +struct xsk_umem { + struct xsk_ring_prod *fill_save; + struct xsk_ring_cons *comp_save; + char *umem_area; + struct xsk_umem_config config; + int fd; + int refcount; + struct list_head ctx_list; + bool rx_ring_setup_done; + bool tx_ring_setup_done; +}; + +struct xsk_ctx { + struct xsk_ring_prod *fill; + struct xsk_ring_cons *comp; + __u32 queue_id; + struct xsk_umem *umem; + int refcount; + int ifindex; + struct list_head list; + int prog_fd; + int link_fd; + int xsks_map_fd; + char ifname[IFNAMSIZ]; + bool has_bpf_link; +}; + +struct xsk_socket { + struct xsk_ring_cons *rx; + struct xsk_ring_prod *tx; + __u64 outstanding_tx; + struct xsk_ctx *ctx; + struct xsk_socket_config config; + int fd; +}; + +struct xsk_nl_info { + bool xdp_prog_attached; + int ifindex; + int fd; +}; + +/* Up until and including Linux 5.3 */ +struct xdp_ring_offset_v1 { + __u64 producer; + __u64 consumer; + __u64 desc; +}; + +/* Up until and including Linux 5.3 */ +struct xdp_mmap_offsets_v1 { + struct xdp_ring_offset_v1 rx; + struct xdp_ring_offset_v1 tx; + struct xdp_ring_offset_v1 fr; + struct xdp_ring_offset_v1 cr; +}; + +int xsk_umem__fd(const struct xsk_umem *umem) +{ + return umem ? umem->fd : -EINVAL; +} + +int xsk_socket__fd(const struct xsk_socket *xsk) +{ + return xsk ? xsk->fd : -EINVAL; +} + +static bool xsk_page_aligned(void *buffer) +{ + unsigned long addr = (unsigned long)buffer; + + return !(addr & (getpagesize() - 1)); +} + +static void xsk_set_umem_config(struct xsk_umem_config *cfg, + const struct xsk_umem_config *usr_cfg) +{ + if (!usr_cfg) { + cfg->fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; + cfg->comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; + cfg->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; + cfg->frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM; + cfg->flags = XSK_UMEM__DEFAULT_FLAGS; + return; + } + + cfg->fill_size = usr_cfg->fill_size; + cfg->comp_size = usr_cfg->comp_size; + cfg->frame_size = usr_cfg->frame_size; + cfg->frame_headroom = usr_cfg->frame_headroom; + cfg->flags = usr_cfg->flags; +} + +static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg, + const struct xsk_socket_config *usr_cfg) +{ + if (!usr_cfg) { + cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; + cfg->tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; + cfg->libbpf_flags = 0; + cfg->xdp_flags = 0; + cfg->bind_flags = 0; + return 0; + } + + if (usr_cfg->libbpf_flags & ~XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD) + return -EINVAL; + + cfg->rx_size = usr_cfg->rx_size; + cfg->tx_size = usr_cfg->tx_size; + cfg->libbpf_flags = usr_cfg->libbpf_flags; + cfg->xdp_flags = usr_cfg->xdp_flags; + cfg->bind_flags = usr_cfg->bind_flags; + + return 0; +} + +static void xsk_mmap_offsets_v1(struct xdp_mmap_offsets *off) +{ + struct xdp_mmap_offsets_v1 off_v1; + + /* getsockopt on a kernel <= 5.3 has no flags fields. + * Copy over the offsets to the correct places in the >=5.4 format + * and put the flags where they would have been on that kernel. + */ + memcpy(&off_v1, off, sizeof(off_v1)); + + off->rx.producer = off_v1.rx.producer; + off->rx.consumer = off_v1.rx.consumer; + off->rx.desc = off_v1.rx.desc; + off->rx.flags = off_v1.rx.consumer + sizeof(__u32); + + off->tx.producer = off_v1.tx.producer; + off->tx.consumer = off_v1.tx.consumer; + off->tx.desc = off_v1.tx.desc; + off->tx.flags = off_v1.tx.consumer + sizeof(__u32); + + off->fr.producer = off_v1.fr.producer; + off->fr.consumer = off_v1.fr.consumer; + off->fr.desc = off_v1.fr.desc; + off->fr.flags = off_v1.fr.consumer + sizeof(__u32); + + off->cr.producer = off_v1.cr.producer; + off->cr.consumer = off_v1.cr.consumer; + off->cr.desc = off_v1.cr.desc; + off->cr.flags = off_v1.cr.consumer + sizeof(__u32); +} + +static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off) +{ + socklen_t optlen; + int err; + + optlen = sizeof(*off); + err = getsockopt(fd, SOL_XDP, XDP_MMAP_OFFSETS, off, &optlen); + if (err) + return err; + + if (optlen == sizeof(*off)) + return 0; + + if (optlen == sizeof(struct xdp_mmap_offsets_v1)) { + xsk_mmap_offsets_v1(off); + return 0; + } + + return -EINVAL; +} + +static int xsk_create_umem_rings(struct xsk_umem *umem, int fd, + struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp) +{ + struct xdp_mmap_offsets off; + void *map; + int err; + + err = setsockopt(fd, SOL_XDP, XDP_UMEM_FILL_RING, + &umem->config.fill_size, + sizeof(umem->config.fill_size)); + if (err) + return -errno; + + err = setsockopt(fd, SOL_XDP, XDP_UMEM_COMPLETION_RING, + &umem->config.comp_size, + sizeof(umem->config.comp_size)); + if (err) + return -errno; + + err = xsk_get_mmap_offsets(fd, &off); + if (err) + return -errno; + + map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, + XDP_UMEM_PGOFF_FILL_RING); + if (map == MAP_FAILED) + return -errno; + + fill->mask = umem->config.fill_size - 1; + fill->size = umem->config.fill_size; + fill->producer = map + off.fr.producer; + fill->consumer = map + off.fr.consumer; + fill->flags = map + off.fr.flags; + fill->ring = map + off.fr.desc; + fill->cached_cons = umem->config.fill_size; + + map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, + XDP_UMEM_PGOFF_COMPLETION_RING); + if (map == MAP_FAILED) { + err = -errno; + goto out_mmap; + } + + comp->mask = umem->config.comp_size - 1; + comp->size = umem->config.comp_size; + comp->producer = map + off.cr.producer; + comp->consumer = map + off.cr.consumer; + comp->flags = map + off.cr.flags; + comp->ring = map + off.cr.desc; + + return 0; + +out_mmap: + munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64)); + return err; +} + +int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, + __u64 size, struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp, + const struct xsk_umem_config *usr_config) +{ + struct xdp_umem_reg mr; + struct xsk_umem *umem; + int err; + + if (!umem_area || !umem_ptr || !fill || !comp) + return -EFAULT; + if (!size && !xsk_page_aligned(umem_area)) + return -EINVAL; + + umem = calloc(1, sizeof(*umem)); + if (!umem) + return -ENOMEM; + + umem->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0); + if (umem->fd < 0) { + err = -errno; + goto out_umem_alloc; + } + + umem->umem_area = umem_area; + INIT_LIST_HEAD(&umem->ctx_list); + xsk_set_umem_config(&umem->config, usr_config); + + memset(&mr, 0, sizeof(mr)); + mr.addr = (uintptr_t)umem_area; + mr.len = size; + mr.chunk_size = umem->config.frame_size; + mr.headroom = umem->config.frame_headroom; + mr.flags = umem->config.flags; + + err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr)); + if (err) { + err = -errno; + goto out_socket; + } + + err = xsk_create_umem_rings(umem, umem->fd, fill, comp); + if (err) + goto out_socket; + + umem->fill_save = fill; + umem->comp_save = comp; + *umem_ptr = umem; + return 0; + +out_socket: + close(umem->fd); +out_umem_alloc: + free(umem); + return err; +} + +struct xsk_umem_config_v1 { + __u32 fill_size; + __u32 comp_size; + __u32 frame_size; + __u32 frame_headroom; +}; + +static enum xsk_prog get_xsk_prog(void) +{ + enum xsk_prog detected = XSK_PROG_FALLBACK; + char data_in = 0, data_out; + struct bpf_insn insns[] = { + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, XDP_PASS), + BPF_EMIT_CALL(BPF_FUNC_redirect_map), + BPF_EXIT_INSN(), + }; + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &data_in, + .data_size_in = 1, + .data_out = &data_out, + ); + + int prog_fd, map_fd, ret, insn_cnt = ARRAY_SIZE(insns); + + map_fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, NULL, sizeof(int), sizeof(int), 1, NULL); + if (map_fd < 0) + return detected; + + insns[0].imm = map_fd; + + prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, NULL); + if (prog_fd < 0) { + close(map_fd); + return detected; + } + + ret = bpf_prog_test_run_opts(prog_fd, &opts); + if (!ret && opts.retval == XDP_PASS) + detected = XSK_PROG_REDIRECT_FLAGS; + close(prog_fd); + close(map_fd); + return detected; +} + +static int xsk_load_xdp_prog(struct xsk_socket *xsk) +{ + static const int log_buf_size = 16 * 1024; + struct xsk_ctx *ctx = xsk->ctx; + char log_buf[log_buf_size]; + int prog_fd; + + /* This is the fallback C-program: + * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) + * { + * int ret, index = ctx->rx_queue_index; + * + * // A set entry here means that the correspnding queue_id + * // has an active AF_XDP socket bound to it. + * ret = bpf_redirect_map(&xsks_map, index, XDP_PASS); + * if (ret > 0) + * return ret; + * + * // Fallback for pre-5.3 kernels, not supporting default + * // action in the flags parameter. + * if (bpf_map_lookup_elem(&xsks_map, &index)) + * return bpf_redirect_map(&xsks_map, index, 0); + * return XDP_PASS; + * } + */ + struct bpf_insn prog[] = { + /* r2 = *(u32 *)(r1 + 16) */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 16), + /* *(u32 *)(r10 - 4) = r2 */ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -4), + /* r1 = xskmap[] */ + BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd), + /* r3 = XDP_PASS */ + BPF_MOV64_IMM(BPF_REG_3, 2), + /* call bpf_redirect_map */ + BPF_EMIT_CALL(BPF_FUNC_redirect_map), + /* if w0 != 0 goto pc+13 */ + BPF_JMP32_IMM(BPF_JSGT, BPF_REG_0, 0, 13), + /* r2 = r10 */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + /* r2 += -4 */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + /* r1 = xskmap[] */ + BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd), + /* call bpf_map_lookup_elem */ + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + /* r1 = r0 */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + /* r0 = XDP_PASS */ + BPF_MOV64_IMM(BPF_REG_0, 2), + /* if r1 == 0 goto pc+5 */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5), + /* r2 = *(u32 *)(r10 - 4) */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_10, -4), + /* r1 = xskmap[] */ + BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd), + /* r3 = 0 */ + BPF_MOV64_IMM(BPF_REG_3, 0), + /* call bpf_redirect_map */ + BPF_EMIT_CALL(BPF_FUNC_redirect_map), + /* The jumps are to this instruction */ + BPF_EXIT_INSN(), + }; + + /* This is the post-5.3 kernel C-program: + * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) + * { + * return bpf_redirect_map(&xsks_map, ctx->rx_queue_index, XDP_PASS); + * } + */ + struct bpf_insn prog_redirect_flags[] = { + /* r2 = *(u32 *)(r1 + 16) */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 16), + /* r1 = xskmap[] */ + BPF_LD_MAP_FD(BPF_REG_1, ctx->xsks_map_fd), + /* r3 = XDP_PASS */ + BPF_MOV64_IMM(BPF_REG_3, 2), + /* call bpf_redirect_map */ + BPF_EMIT_CALL(BPF_FUNC_redirect_map), + BPF_EXIT_INSN(), + }; + size_t insns_cnt[] = {ARRAY_SIZE(prog), + ARRAY_SIZE(prog_redirect_flags), + }; + struct bpf_insn *progs[] = {prog, prog_redirect_flags}; + enum xsk_prog option = get_xsk_prog(); + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .log_buf = log_buf, + .log_size = log_buf_size, + ); + + prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "LGPL-2.1 or BSD-2-Clause", + progs[option], insns_cnt[option], &opts); + if (prog_fd < 0) { + pr_warn("BPF log buffer:\n%s", log_buf); + return prog_fd; + } + + ctx->prog_fd = prog_fd; + return 0; +} + +static int xsk_create_bpf_link(struct xsk_socket *xsk) +{ + DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts); + struct xsk_ctx *ctx = xsk->ctx; + __u32 prog_id = 0; + int link_fd; + int err; + + err = bpf_xdp_query_id(ctx->ifindex, xsk->config.xdp_flags, &prog_id); + if (err) { + pr_warn("getting XDP prog id failed\n"); + return err; + } + + /* if there's a netlink-based XDP prog loaded on interface, bail out + * and ask user to do the removal by himself + */ + if (prog_id) { + pr_warn("Netlink-based XDP prog detected, please unload it in order to launch AF_XDP prog\n"); + return -EINVAL; + } + + opts.flags = xsk->config.xdp_flags & ~(XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_REPLACE); + + link_fd = bpf_link_create(ctx->prog_fd, ctx->ifindex, BPF_XDP, &opts); + if (link_fd < 0) { + pr_warn("bpf_link_create failed: %s\n", strerror(errno)); + return link_fd; + } + + ctx->link_fd = link_fd; + return 0; +} + +/* Copy up to sz - 1 bytes from zero-terminated src string and ensure that dst + * is zero-terminated string no matter what (unless sz == 0, in which case + * it's a no-op). It's conceptually close to FreeBSD's strlcpy(), but differs + * in what is returned. Given this is internal helper, it's trivial to extend + * this, when necessary. Use this instead of strncpy inside libbpf source code. + */ +static inline void libbpf_strlcpy(char *dst, const char *src, size_t sz) +{ + size_t i; + + if (sz == 0) + return; + + sz--; + for (i = 0; i < sz && src[i]; i++) + dst[i] = src[i]; + dst[i] = '\0'; +} + +static int xsk_get_max_queues(struct xsk_socket *xsk) +{ + struct ethtool_channels channels = { .cmd = ETHTOOL_GCHANNELS }; + struct xsk_ctx *ctx = xsk->ctx; + struct ifreq ifr = {}; + int fd, err, ret; + + fd = socket(AF_LOCAL, SOCK_DGRAM | SOCK_CLOEXEC, 0); + if (fd < 0) + return -errno; + + ifr.ifr_data = (void *)&channels; + libbpf_strlcpy(ifr.ifr_name, ctx->ifname, IFNAMSIZ); + err = ioctl(fd, SIOCETHTOOL, &ifr); + if (err && errno != EOPNOTSUPP) { + ret = -errno; + goto out; + } + + if (err) { + /* If the device says it has no channels, then all traffic + * is sent to a single stream, so max queues = 1. + */ + ret = 1; + } else { + /* Take the max of rx, tx, combined. Drivers return + * the number of channels in different ways. + */ + ret = max(channels.max_rx, channels.max_tx); + ret = max(ret, (int)channels.max_combined); + } + +out: + close(fd); + return ret; +} + +static int xsk_create_bpf_maps(struct xsk_socket *xsk) +{ + struct xsk_ctx *ctx = xsk->ctx; + int max_queues; + int fd; + + max_queues = xsk_get_max_queues(xsk); + if (max_queues < 0) + return max_queues; + + fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, "xsks_map", + sizeof(int), sizeof(int), max_queues, NULL); + if (fd < 0) + return fd; + + ctx->xsks_map_fd = fd; + + return 0; +} + +static void xsk_delete_bpf_maps(struct xsk_socket *xsk) +{ + struct xsk_ctx *ctx = xsk->ctx; + + bpf_map_delete_elem(ctx->xsks_map_fd, &ctx->queue_id); + close(ctx->xsks_map_fd); +} + +static int xsk_lookup_bpf_maps(struct xsk_socket *xsk) +{ + __u32 i, *map_ids, num_maps, prog_len = sizeof(struct bpf_prog_info); + __u32 map_len = sizeof(struct bpf_map_info); + struct bpf_prog_info prog_info = {}; + struct xsk_ctx *ctx = xsk->ctx; + struct bpf_map_info map_info; + int fd, err; + + err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len); + if (err) + return err; + + num_maps = prog_info.nr_map_ids; + + map_ids = calloc(prog_info.nr_map_ids, sizeof(*map_ids)); + if (!map_ids) + return -ENOMEM; + + memset(&prog_info, 0, prog_len); + prog_info.nr_map_ids = num_maps; + prog_info.map_ids = (__u64)(unsigned long)map_ids; + + err = bpf_obj_get_info_by_fd(ctx->prog_fd, &prog_info, &prog_len); + if (err) + goto out_map_ids; + + ctx->xsks_map_fd = -1; + + for (i = 0; i < prog_info.nr_map_ids; i++) { + fd = bpf_map_get_fd_by_id(map_ids[i]); + if (fd < 0) + continue; + + memset(&map_info, 0, map_len); + err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len); + if (err) { + close(fd); + continue; + } + + if (!strncmp(map_info.name, "xsks_map", sizeof(map_info.name))) { + ctx->xsks_map_fd = fd; + break; + } + + close(fd); + } + + if (ctx->xsks_map_fd == -1) + err = -ENOENT; + +out_map_ids: + free(map_ids); + return err; +} + +static int xsk_set_bpf_maps(struct xsk_socket *xsk) +{ + struct xsk_ctx *ctx = xsk->ctx; + + return bpf_map_update_elem(ctx->xsks_map_fd, &ctx->queue_id, + &xsk->fd, 0); +} + +static int xsk_link_lookup(int ifindex, __u32 *prog_id, int *link_fd) +{ + struct bpf_link_info link_info; + __u32 link_len; + __u32 id = 0; + int err; + int fd; + + while (true) { + err = bpf_link_get_next_id(id, &id); + if (err) { + if (errno == ENOENT) { + err = 0; + break; + } + pr_warn("can't get next link: %s\n", strerror(errno)); + break; + } + + fd = bpf_link_get_fd_by_id(id); + if (fd < 0) { + if (errno == ENOENT) + continue; + pr_warn("can't get link by id (%u): %s\n", id, strerror(errno)); + err = -errno; + break; + } + + link_len = sizeof(struct bpf_link_info); + memset(&link_info, 0, link_len); + err = bpf_obj_get_info_by_fd(fd, &link_info, &link_len); + if (err) { + pr_warn("can't get link info: %s\n", strerror(errno)); + close(fd); + break; + } + if (link_info.type == BPF_LINK_TYPE_XDP) { + if (link_info.xdp.ifindex == ifindex) { + *link_fd = fd; + if (prog_id) + *prog_id = link_info.prog_id; + break; + } + } + close(fd); + } + + return err; +} + +static bool xsk_probe_bpf_link(void) +{ + LIBBPF_OPTS(bpf_link_create_opts, opts, .flags = XDP_FLAGS_SKB_MODE); + struct bpf_insn insns[2] = { + BPF_MOV64_IMM(BPF_REG_0, XDP_PASS), + BPF_EXIT_INSN() + }; + int prog_fd, link_fd = -1, insn_cnt = ARRAY_SIZE(insns); + int ifindex_lo = 1; + bool ret = false; + int err; + + err = xsk_link_lookup(ifindex_lo, NULL, &link_fd); + if (err) + return ret; + + if (link_fd >= 0) + return true; + + prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, NULL); + if (prog_fd < 0) + return ret; + + link_fd = bpf_link_create(prog_fd, ifindex_lo, BPF_XDP, &opts); + close(prog_fd); + + if (link_fd >= 0) { + ret = true; + close(link_fd); + } + + return ret; +} + +static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk) +{ + char ifname[IFNAMSIZ]; + struct xsk_ctx *ctx; + char *interface; + + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) + return -ENOMEM; + + interface = if_indextoname(ifindex, &ifname[0]); + if (!interface) { + free(ctx); + return -errno; + } + + ctx->ifindex = ifindex; + libbpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ); + + xsk->ctx = ctx; + xsk->ctx->has_bpf_link = xsk_probe_bpf_link(); + + return 0; +} + +static int xsk_init_xdp_res(struct xsk_socket *xsk, + int *xsks_map_fd) +{ + struct xsk_ctx *ctx = xsk->ctx; + int err; + + err = xsk_create_bpf_maps(xsk); + if (err) + return err; + + err = xsk_load_xdp_prog(xsk); + if (err) + goto err_load_xdp_prog; + + if (ctx->has_bpf_link) + err = xsk_create_bpf_link(xsk); + else + err = bpf_xdp_attach(xsk->ctx->ifindex, ctx->prog_fd, + xsk->config.xdp_flags, NULL); + + if (err) + goto err_attach_xdp_prog; + + if (!xsk->rx) + return err; + + err = xsk_set_bpf_maps(xsk); + if (err) + goto err_set_bpf_maps; + + return err; + +err_set_bpf_maps: + if (ctx->has_bpf_link) + close(ctx->link_fd); + else + bpf_xdp_detach(ctx->ifindex, 0, NULL); +err_attach_xdp_prog: + close(ctx->prog_fd); +err_load_xdp_prog: + xsk_delete_bpf_maps(xsk); + return err; +} + +static int xsk_lookup_xdp_res(struct xsk_socket *xsk, int *xsks_map_fd, int prog_id) +{ + struct xsk_ctx *ctx = xsk->ctx; + int err; + + ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id); + if (ctx->prog_fd < 0) { + err = -errno; + goto err_prog_fd; + } + err = xsk_lookup_bpf_maps(xsk); + if (err) + goto err_lookup_maps; + + if (!xsk->rx) + return err; + + err = xsk_set_bpf_maps(xsk); + if (err) + goto err_set_maps; + + return err; + +err_set_maps: + close(ctx->xsks_map_fd); +err_lookup_maps: + close(ctx->prog_fd); +err_prog_fd: + if (ctx->has_bpf_link) + close(ctx->link_fd); + return err; +} + +static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp, int *xsks_map_fd) +{ + struct xsk_socket *xsk = _xdp; + struct xsk_ctx *ctx = xsk->ctx; + __u32 prog_id = 0; + int err; + + if (ctx->has_bpf_link) + err = xsk_link_lookup(ctx->ifindex, &prog_id, &ctx->link_fd); + else + err = bpf_xdp_query_id(ctx->ifindex, xsk->config.xdp_flags, &prog_id); + + if (err) + return err; + + err = !prog_id ? xsk_init_xdp_res(xsk, xsks_map_fd) : + xsk_lookup_xdp_res(xsk, xsks_map_fd, prog_id); + + if (!err && xsks_map_fd) + *xsks_map_fd = ctx->xsks_map_fd; + + return err; +} + +int xsk_setup_xdp_prog_xsk(struct xsk_socket *xsk, int *xsks_map_fd) +{ + return __xsk_setup_xdp_prog(xsk, xsks_map_fd); +} + +static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex, + __u32 queue_id) +{ + struct xsk_ctx *ctx; + + if (list_empty(&umem->ctx_list)) + return NULL; + + list_for_each_entry(ctx, &umem->ctx_list, list) { + if (ctx->ifindex == ifindex && ctx->queue_id == queue_id) { + ctx->refcount++; + return ctx; + } + } + + return NULL; +} + +static void xsk_put_ctx(struct xsk_ctx *ctx, bool unmap) +{ + struct xsk_umem *umem = ctx->umem; + struct xdp_mmap_offsets off; + int err; + + if (--ctx->refcount) + return; + + if (!unmap) + goto out_free; + + err = xsk_get_mmap_offsets(umem->fd, &off); + if (err) + goto out_free; + + munmap(ctx->fill->ring - off.fr.desc, off.fr.desc + umem->config.fill_size * + sizeof(__u64)); + munmap(ctx->comp->ring - off.cr.desc, off.cr.desc + umem->config.comp_size * + sizeof(__u64)); + +out_free: + list_del(&ctx->list); + free(ctx); +} + +static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk, + struct xsk_umem *umem, int ifindex, + const char *ifname, __u32 queue_id, + struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp) +{ + struct xsk_ctx *ctx; + int err; + + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) + return NULL; + + if (!umem->fill_save) { + err = xsk_create_umem_rings(umem, xsk->fd, fill, comp); + if (err) { + free(ctx); + return NULL; + } + } else if (umem->fill_save != fill || umem->comp_save != comp) { + /* Copy over rings to new structs. */ + memcpy(fill, umem->fill_save, sizeof(*fill)); + memcpy(comp, umem->comp_save, sizeof(*comp)); + } + + ctx->ifindex = ifindex; + ctx->refcount = 1; + ctx->umem = umem; + ctx->queue_id = queue_id; + libbpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ); + + ctx->fill = fill; + ctx->comp = comp; + list_add(&ctx->list, &umem->ctx_list); + ctx->has_bpf_link = xsk_probe_bpf_link(); + return ctx; +} + +static void xsk_destroy_xsk_struct(struct xsk_socket *xsk) +{ + free(xsk->ctx); + free(xsk); +} + +int xsk_socket__update_xskmap(struct xsk_socket *xsk, int fd) +{ + xsk->ctx->xsks_map_fd = fd; + return xsk_set_bpf_maps(xsk); +} + +int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd) +{ + struct xsk_socket *xsk; + int res; + + xsk = calloc(1, sizeof(*xsk)); + if (!xsk) + return -ENOMEM; + + res = xsk_create_xsk_struct(ifindex, xsk); + if (res) { + free(xsk); + return -EINVAL; + } + + res = __xsk_setup_xdp_prog(xsk, xsks_map_fd); + + xsk_destroy_xsk_struct(xsk); + + return res; +} + +int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, + const char *ifname, + __u32 queue_id, struct xsk_umem *umem, + struct xsk_ring_cons *rx, + struct xsk_ring_prod *tx, + struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp, + const struct xsk_socket_config *usr_config) +{ + bool unmap, rx_setup_done = false, tx_setup_done = false; + void *rx_map = NULL, *tx_map = NULL; + struct sockaddr_xdp sxdp = {}; + struct xdp_mmap_offsets off; + struct xsk_socket *xsk; + struct xsk_ctx *ctx; + int err, ifindex; + + if (!umem || !xsk_ptr || !(rx || tx)) + return -EFAULT; + + unmap = umem->fill_save != fill; + + xsk = calloc(1, sizeof(*xsk)); + if (!xsk) + return -ENOMEM; + + err = xsk_set_xdp_socket_config(&xsk->config, usr_config); + if (err) + goto out_xsk_alloc; + + xsk->outstanding_tx = 0; + ifindex = if_nametoindex(ifname); + if (!ifindex) { + err = -errno; + goto out_xsk_alloc; + } + + if (umem->refcount++ > 0) { + xsk->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0); + if (xsk->fd < 0) { + err = -errno; + goto out_xsk_alloc; + } + } else { + xsk->fd = umem->fd; + rx_setup_done = umem->rx_ring_setup_done; + tx_setup_done = umem->tx_ring_setup_done; + } + + ctx = xsk_get_ctx(umem, ifindex, queue_id); + if (!ctx) { + if (!fill || !comp) { + err = -EFAULT; + goto out_socket; + } + + ctx = xsk_create_ctx(xsk, umem, ifindex, ifname, queue_id, + fill, comp); + if (!ctx) { + err = -ENOMEM; + goto out_socket; + } + } + xsk->ctx = ctx; + + if (rx && !rx_setup_done) { + err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING, + &xsk->config.rx_size, + sizeof(xsk->config.rx_size)); + if (err) { + err = -errno; + goto out_put_ctx; + } + if (xsk->fd == umem->fd) + umem->rx_ring_setup_done = true; + } + if (tx && !tx_setup_done) { + err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING, + &xsk->config.tx_size, + sizeof(xsk->config.tx_size)); + if (err) { + err = -errno; + goto out_put_ctx; + } + if (xsk->fd == umem->fd) + umem->tx_ring_setup_done = true; + } + + err = xsk_get_mmap_offsets(xsk->fd, &off); + if (err) { + err = -errno; + goto out_put_ctx; + } + + if (rx) { + rx_map = mmap(NULL, off.rx.desc + + xsk->config.rx_size * sizeof(struct xdp_desc), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, + xsk->fd, XDP_PGOFF_RX_RING); + if (rx_map == MAP_FAILED) { + err = -errno; + goto out_put_ctx; + } + + rx->mask = xsk->config.rx_size - 1; + rx->size = xsk->config.rx_size; + rx->producer = rx_map + off.rx.producer; + rx->consumer = rx_map + off.rx.consumer; + rx->flags = rx_map + off.rx.flags; + rx->ring = rx_map + off.rx.desc; + rx->cached_prod = *rx->producer; + rx->cached_cons = *rx->consumer; + } + xsk->rx = rx; + + if (tx) { + tx_map = mmap(NULL, off.tx.desc + + xsk->config.tx_size * sizeof(struct xdp_desc), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, + xsk->fd, XDP_PGOFF_TX_RING); + if (tx_map == MAP_FAILED) { + err = -errno; + goto out_mmap_rx; + } + + tx->mask = xsk->config.tx_size - 1; + tx->size = xsk->config.tx_size; + tx->producer = tx_map + off.tx.producer; + tx->consumer = tx_map + off.tx.consumer; + tx->flags = tx_map + off.tx.flags; + tx->ring = tx_map + off.tx.desc; + tx->cached_prod = *tx->producer; + /* cached_cons is r->size bigger than the real consumer pointer + * See xsk_prod_nb_free + */ + tx->cached_cons = *tx->consumer + xsk->config.tx_size; + } + xsk->tx = tx; + + sxdp.sxdp_family = PF_XDP; + sxdp.sxdp_ifindex = ctx->ifindex; + sxdp.sxdp_queue_id = ctx->queue_id; + if (umem->refcount > 1) { + sxdp.sxdp_flags |= XDP_SHARED_UMEM; + sxdp.sxdp_shared_umem_fd = umem->fd; + } else { + sxdp.sxdp_flags = xsk->config.bind_flags; + } + + err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp)); + if (err) { + err = -errno; + goto out_mmap_tx; + } + + if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) { + err = __xsk_setup_xdp_prog(xsk, NULL); + if (err) + goto out_mmap_tx; + } + + *xsk_ptr = xsk; + umem->fill_save = NULL; + umem->comp_save = NULL; + return 0; + +out_mmap_tx: + if (tx) + munmap(tx_map, off.tx.desc + + xsk->config.tx_size * sizeof(struct xdp_desc)); +out_mmap_rx: + if (rx) + munmap(rx_map, off.rx.desc + + xsk->config.rx_size * sizeof(struct xdp_desc)); +out_put_ctx: + xsk_put_ctx(ctx, unmap); +out_socket: + if (--umem->refcount) + close(xsk->fd); +out_xsk_alloc: + free(xsk); + return err; +} + +int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, + __u32 queue_id, struct xsk_umem *umem, + struct xsk_ring_cons *rx, struct xsk_ring_prod *tx, + const struct xsk_socket_config *usr_config) +{ + if (!umem) + return -EFAULT; + + return xsk_socket__create_shared(xsk_ptr, ifname, queue_id, umem, + rx, tx, umem->fill_save, + umem->comp_save, usr_config); +} + +int xsk_umem__delete(struct xsk_umem *umem) +{ + struct xdp_mmap_offsets off; + int err; + + if (!umem) + return 0; + + if (umem->refcount) + return -EBUSY; + + err = xsk_get_mmap_offsets(umem->fd, &off); + if (!err && umem->fill_save && umem->comp_save) { + munmap(umem->fill_save->ring - off.fr.desc, + off.fr.desc + umem->config.fill_size * sizeof(__u64)); + munmap(umem->comp_save->ring - off.cr.desc, + off.cr.desc + umem->config.comp_size * sizeof(__u64)); + } + + close(umem->fd); + free(umem); + + return 0; +} + +void xsk_socket__delete(struct xsk_socket *xsk) +{ + size_t desc_sz = sizeof(struct xdp_desc); + struct xdp_mmap_offsets off; + struct xsk_umem *umem; + struct xsk_ctx *ctx; + int err; + + if (!xsk) + return; + + ctx = xsk->ctx; + umem = ctx->umem; + + xsk_put_ctx(ctx, true); + + if (!ctx->refcount) { + xsk_delete_bpf_maps(xsk); + close(ctx->prog_fd); + if (ctx->has_bpf_link) + close(ctx->link_fd); + } + + err = xsk_get_mmap_offsets(xsk->fd, &off); + if (!err) { + if (xsk->rx) { + munmap(xsk->rx->ring - off.rx.desc, + off.rx.desc + xsk->config.rx_size * desc_sz); + } + if (xsk->tx) { + munmap(xsk->tx->ring - off.tx.desc, + off.tx.desc + xsk->config.tx_size * desc_sz); + } + } + + umem->refcount--; + /* Do not close an fd that also has an associated umem connected + * to it. + */ + if (xsk->fd != umem->fd) + close(xsk->fd); + free(xsk); +} diff --git a/tools/testing/selftests/bpf/xsk.h b/tools/testing/selftests/bpf/xsk.h new file mode 100644 index 000000000000..997723b0bfb2 --- /dev/null +++ b/tools/testing/selftests/bpf/xsk.h @@ -0,0 +1,316 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ + +/* + * AF_XDP user-space access library. + * + * Copyright (c) 2018 - 2019 Intel Corporation. + * Copyright (c) 2019 Facebook + * + * Author(s): Magnus Karlsson <magnus.karlsson@intel.com> + */ + +#ifndef __XSK_H +#define __XSK_H + +#include <stdio.h> +#include <stdint.h> +#include <stdbool.h> +#include <linux/if_xdp.h> + +#include <bpf/libbpf.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* This whole API has been deprecated and moved to libxdp that can be found at + * https://github.com/xdp-project/xdp-tools. The APIs are exactly the same so + * it should just be linking with libxdp instead of libbpf for this set of + * functionality. If not, please submit a bug report on the aforementioned page. + */ + +/* Load-Acquire Store-Release barriers used by the XDP socket + * library. The following macros should *NOT* be considered part of + * the xsk.h API, and is subject to change anytime. + * + * LIBRARY INTERNAL + */ + +#define __XSK_READ_ONCE(x) (*(volatile typeof(x) *)&x) +#define __XSK_WRITE_ONCE(x, v) (*(volatile typeof(x) *)&x) = (v) + +#if defined(__i386__) || defined(__x86_64__) +# define libbpf_smp_store_release(p, v) \ + do { \ + asm volatile("" : : : "memory"); \ + __XSK_WRITE_ONCE(*p, v); \ + } while (0) +# define libbpf_smp_load_acquire(p) \ + ({ \ + typeof(*p) ___p1 = __XSK_READ_ONCE(*p); \ + asm volatile("" : : : "memory"); \ + ___p1; \ + }) +#elif defined(__aarch64__) +# define libbpf_smp_store_release(p, v) \ + asm volatile ("stlr %w1, %0" : "=Q" (*p) : "r" (v) : "memory") +# define libbpf_smp_load_acquire(p) \ + ({ \ + typeof(*p) ___p1; \ + asm volatile ("ldar %w0, %1" \ + : "=r" (___p1) : "Q" (*p) : "memory"); \ + ___p1; \ + }) +#elif defined(__riscv) +# define libbpf_smp_store_release(p, v) \ + do { \ + asm volatile ("fence rw,w" : : : "memory"); \ + __XSK_WRITE_ONCE(*p, v); \ + } while (0) +# define libbpf_smp_load_acquire(p) \ + ({ \ + typeof(*p) ___p1 = __XSK_READ_ONCE(*p); \ + asm volatile ("fence r,rw" : : : "memory"); \ + ___p1; \ + }) +#endif + +#ifndef libbpf_smp_store_release +#define libbpf_smp_store_release(p, v) \ + do { \ + __sync_synchronize(); \ + __XSK_WRITE_ONCE(*p, v); \ + } while (0) +#endif + +#ifndef libbpf_smp_load_acquire +#define libbpf_smp_load_acquire(p) \ + ({ \ + typeof(*p) ___p1 = __XSK_READ_ONCE(*p); \ + __sync_synchronize(); \ + ___p1; \ + }) +#endif + +/* LIBRARY INTERNAL -- END */ + +/* Do not access these members directly. Use the functions below. */ +#define DEFINE_XSK_RING(name) \ +struct name { \ + __u32 cached_prod; \ + __u32 cached_cons; \ + __u32 mask; \ + __u32 size; \ + __u32 *producer; \ + __u32 *consumer; \ + void *ring; \ + __u32 *flags; \ +} + +DEFINE_XSK_RING(xsk_ring_prod); +DEFINE_XSK_RING(xsk_ring_cons); + +/* For a detailed explanation on the memory barriers associated with the + * ring, please take a look at net/xdp/xsk_queue.h. + */ + +struct xsk_umem; +struct xsk_socket; + +static inline __u64 *xsk_ring_prod__fill_addr(struct xsk_ring_prod *fill, + __u32 idx) +{ + __u64 *addrs = (__u64 *)fill->ring; + + return &addrs[idx & fill->mask]; +} + +static inline const __u64 * +xsk_ring_cons__comp_addr(const struct xsk_ring_cons *comp, __u32 idx) +{ + const __u64 *addrs = (const __u64 *)comp->ring; + + return &addrs[idx & comp->mask]; +} + +static inline struct xdp_desc *xsk_ring_prod__tx_desc(struct xsk_ring_prod *tx, + __u32 idx) +{ + struct xdp_desc *descs = (struct xdp_desc *)tx->ring; + + return &descs[idx & tx->mask]; +} + +static inline const struct xdp_desc * +xsk_ring_cons__rx_desc(const struct xsk_ring_cons *rx, __u32 idx) +{ + const struct xdp_desc *descs = (const struct xdp_desc *)rx->ring; + + return &descs[idx & rx->mask]; +} + +static inline int xsk_ring_prod__needs_wakeup(const struct xsk_ring_prod *r) +{ + return *r->flags & XDP_RING_NEED_WAKEUP; +} + +static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb) +{ + __u32 free_entries = r->cached_cons - r->cached_prod; + + if (free_entries >= nb) + return free_entries; + + /* Refresh the local tail pointer. + * cached_cons is r->size bigger than the real consumer pointer so + * that this addition can be avoided in the more frequently + * executed code that computs free_entries in the beginning of + * this function. Without this optimization it whould have been + * free_entries = r->cached_prod - r->cached_cons + r->size. + */ + r->cached_cons = libbpf_smp_load_acquire(r->consumer); + r->cached_cons += r->size; + + return r->cached_cons - r->cached_prod; +} + +static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb) +{ + __u32 entries = r->cached_prod - r->cached_cons; + + if (entries == 0) { + r->cached_prod = libbpf_smp_load_acquire(r->producer); + entries = r->cached_prod - r->cached_cons; + } + + return (entries > nb) ? nb : entries; +} + +static inline __u32 xsk_ring_prod__reserve(struct xsk_ring_prod *prod, __u32 nb, __u32 *idx) +{ + if (xsk_prod_nb_free(prod, nb) < nb) + return 0; + + *idx = prod->cached_prod; + prod->cached_prod += nb; + + return nb; +} + +static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, __u32 nb) +{ + /* Make sure everything has been written to the ring before indicating + * this to the kernel by writing the producer pointer. + */ + libbpf_smp_store_release(prod->producer, *prod->producer + nb); +} + +static inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __u32 *idx) +{ + __u32 entries = xsk_cons_nb_avail(cons, nb); + + if (entries > 0) { + *idx = cons->cached_cons; + cons->cached_cons += entries; + } + + return entries; +} + +static inline void xsk_ring_cons__cancel(struct xsk_ring_cons *cons, __u32 nb) +{ + cons->cached_cons -= nb; +} + +static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, __u32 nb) +{ + /* Make sure data has been read before indicating we are done + * with the entries by updating the consumer pointer. + */ + libbpf_smp_store_release(cons->consumer, *cons->consumer + nb); + +} + +static inline void *xsk_umem__get_data(void *umem_area, __u64 addr) +{ + return &((char *)umem_area)[addr]; +} + +static inline __u64 xsk_umem__extract_addr(__u64 addr) +{ + return addr & XSK_UNALIGNED_BUF_ADDR_MASK; +} + +static inline __u64 xsk_umem__extract_offset(__u64 addr) +{ + return addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT; +} + +static inline __u64 xsk_umem__add_offset_to_addr(__u64 addr) +{ + return xsk_umem__extract_addr(addr) + xsk_umem__extract_offset(addr); +} + +int xsk_umem__fd(const struct xsk_umem *umem); +int xsk_socket__fd(const struct xsk_socket *xsk); + +#define XSK_RING_CONS__DEFAULT_NUM_DESCS 2048 +#define XSK_RING_PROD__DEFAULT_NUM_DESCS 2048 +#define XSK_UMEM__DEFAULT_FRAME_SHIFT 12 /* 4096 bytes */ +#define XSK_UMEM__DEFAULT_FRAME_SIZE (1 << XSK_UMEM__DEFAULT_FRAME_SHIFT) +#define XSK_UMEM__DEFAULT_FRAME_HEADROOM 0 +#define XSK_UMEM__DEFAULT_FLAGS 0 + +struct xsk_umem_config { + __u32 fill_size; + __u32 comp_size; + __u32 frame_size; + __u32 frame_headroom; + __u32 flags; +}; + +int xsk_setup_xdp_prog_xsk(struct xsk_socket *xsk, int *xsks_map_fd); +int xsk_setup_xdp_prog(int ifindex, int *xsks_map_fd); +int xsk_socket__update_xskmap(struct xsk_socket *xsk, int xsks_map_fd); + +/* Flags for the libbpf_flags field. */ +#define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0) + +struct xsk_socket_config { + __u32 rx_size; + __u32 tx_size; + __u32 libbpf_flags; + __u32 xdp_flags; + __u16 bind_flags; +}; + +/* Set config to NULL to get the default configuration. */ +int xsk_umem__create(struct xsk_umem **umem, + void *umem_area, __u64 size, + struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp, + const struct xsk_umem_config *config); +int xsk_socket__create(struct xsk_socket **xsk, + const char *ifname, __u32 queue_id, + struct xsk_umem *umem, + struct xsk_ring_cons *rx, + struct xsk_ring_prod *tx, + const struct xsk_socket_config *config); +int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, + const char *ifname, + __u32 queue_id, struct xsk_umem *umem, + struct xsk_ring_cons *rx, + struct xsk_ring_prod *tx, + struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp, + const struct xsk_socket_config *config); + +/* Returns 0 for success and -EBUSY if the umem is still in use. */ +int xsk_umem__delete(struct xsk_umem *umem); +void xsk_socket__delete(struct xsk_socket *xsk); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* __XSK_H */ diff --git a/tools/testing/selftests/bpf/xsk_prereqs.sh b/tools/testing/selftests/bpf/xsk_prereqs.sh index 684e813803ec..a0b71723a818 100755 --- a/tools/testing/selftests/bpf/xsk_prereqs.sh +++ b/tools/testing/selftests/bpf/xsk_prereqs.sh @@ -8,7 +8,7 @@ ksft_xfail=2 ksft_xpass=3 ksft_skip=4 -XSKOBJ=xdpxceiver +XSKOBJ=xskxceiver validate_root_exec() { @@ -77,7 +77,7 @@ validate_ip_utility() [ ! $(type -P ip) ] && { echo "'ip' not found. Skipping tests."; test_exit $ksft_skip; } } -execxdpxceiver() +exec_xskxceiver() { if [[ $busy_poll -eq 1 ]]; then ARGS+="-b " diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index e5992a6b5e09..74d56d971baf 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -97,12 +97,12 @@ #include <time.h> #include <unistd.h> #include <stdatomic.h> -#include <bpf/xsk.h> -#include "xdpxceiver.h" +#include "xsk.h" +#include "xskxceiver.h" #include "../kselftest.h" /* AF_XDP APIs were moved into libxdp and marked as deprecated in libbpf. - * Until xdpxceiver is either moved or re-writed into libxdp, suppress + * Until xskxceiver is either moved or re-writed into libxdp, suppress * deprecation warnings in this file */ #pragma GCC diagnostic ignored "-Wdeprecated-declarations" @@ -1085,6 +1085,7 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) { u64 umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size; int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE; + LIBBPF_OPTS(bpf_xdp_query_opts, opts); int ret, ifindex; void *bufs; u32 i; @@ -1130,10 +1131,26 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) if (!ifindex) exit_with_error(errno); - ret = xsk_setup_xdp_prog(ifindex, &ifobject->xsk_map_fd); + ret = xsk_setup_xdp_prog_xsk(ifobject->xsk->xsk, &ifobject->xsk_map_fd); if (ret) exit_with_error(-ret); + ret = bpf_xdp_query(ifindex, ifobject->xdp_flags, &opts); + if (ret) + exit_with_error(-ret); + + if (ifobject->xdp_flags & XDP_FLAGS_SKB_MODE) { + if (opts.attach_mode != XDP_ATTACHED_SKB) { + ksft_print_msg("ERROR: [%s] XDP prog not in SKB mode\n"); + exit_with_error(-EINVAL); + } + } else if (ifobject->xdp_flags & XDP_FLAGS_DRV_MODE) { + if (opts.attach_mode != XDP_ATTACHED_DRV) { + ksft_print_msg("ERROR: [%s] XDP prog not in DRV mode\n"); + exit_with_error(-EINVAL); + } + } + ret = xsk_socket__update_xskmap(ifobject->xsk->xsk, ifobject->xsk_map_fd); if (ret) exit_with_error(-ret); diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h index 8f672b0fe0e1..3d17053f98e5 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.h +++ b/tools/testing/selftests/bpf/xskxceiver.h @@ -2,8 +2,8 @@ * Copyright(c) 2020 Intel Corporation. */ -#ifndef XDPXCEIVER_H_ -#define XDPXCEIVER_H_ +#ifndef XSKXCEIVER_H_ +#define XSKXCEIVER_H_ #ifndef SOL_XDP #define SOL_XDP 283 @@ -169,4 +169,4 @@ pthread_cond_t pacing_cond = PTHREAD_COND_INITIALIZER; int pkts_in_flight; -#endif /* XDPXCEIVER_H */ +#endif /* XSKXCEIVER_H_ */ diff --git a/tools/testing/selftests/damon/_chk_dependency.sh b/tools/testing/selftests/damon/_chk_dependency.sh index 0189db81550b..0328ac0b5a5e 100644 --- a/tools/testing/selftests/damon/_chk_dependency.sh +++ b/tools/testing/selftests/damon/_chk_dependency.sh @@ -26,3 +26,13 @@ do exit 1 fi done + +permission_error="Operation not permitted" +for f in attrs target_ids monitor_on +do + status=$( cat "$DBGFS/$f" 2>&1 ) + if [ "${status#*$permission_error}" != "$status" ]; then + echo "Permission for reading $DBGFS/$f denied; maybe secureboot enabled?" + exit $ksft_skip + fi +done diff --git a/tools/testing/selftests/dma/Makefile b/tools/testing/selftests/dma/Makefile index aa8e8b5b3864..cd8c5ece1cba 100644 --- a/tools/testing/selftests/dma/Makefile +++ b/tools/testing/selftests/dma/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -I../../../../usr/include/ +CFLAGS += -I../../../../include/ TEST_GEN_PROGS := dma_map_benchmark diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c b/tools/testing/selftests/dma/dma_map_benchmark.c index c3b3c09e995e..5c997f17fcbd 100644 --- a/tools/testing/selftests/dma/dma_map_benchmark.c +++ b/tools/testing/selftests/dma/dma_map_benchmark.c @@ -10,8 +10,8 @@ #include <unistd.h> #include <sys/ioctl.h> #include <sys/mman.h> -#include <linux/map_benchmark.h> #include <linux/types.h> +#include <linux/map_benchmark.h> #define NSEC_PER_MSEC 1000000L diff --git a/tools/testing/selftests/drivers/dma-buf/udmabuf.c b/tools/testing/selftests/drivers/dma-buf/udmabuf.c index de1c4e6de0b2..c812080e304e 100644 --- a/tools/testing/selftests/drivers/dma-buf/udmabuf.c +++ b/tools/testing/selftests/drivers/dma-buf/udmabuf.c @@ -32,7 +32,8 @@ int main(int argc, char *argv[]) devfd = open("/dev/udmabuf", O_RDWR); if (devfd < 0) { - printf("%s: [skip,no-udmabuf]\n", TEST_PREFIX); + printf("%s: [skip,no-udmabuf: Unable to access DMA buffer device file]\n", + TEST_PREFIX); exit(77); } diff --git a/tools/testing/selftests/drivers/gpu/drm_mm.sh b/tools/testing/selftests/drivers/gpu/drm_mm.sh index b789dc8257e6..09c76cd7661d 100755 --- a/tools/testing/selftests/drivers/gpu/drm_mm.sh +++ b/tools/testing/selftests/drivers/gpu/drm_mm.sh @@ -3,7 +3,7 @@ # Runs API tests for struct drm_mm (DRM range manager) if ! /sbin/modprobe -n -q test-drm_mm; then - echo "drivers/gpu/drm_mm: [skip]" + echo "drivers/gpu/drm_mm: module test-drm_mm is not found in /lib/modules/`uname -r` [skip]" exit 77 fi @@ -11,6 +11,6 @@ if /sbin/modprobe -q test-drm_mm; then /sbin/modprobe -q -r test-drm_mm echo "drivers/gpu/drm_mm: ok" else - echo "drivers/gpu/drm_mm: [FAIL]" + echo "drivers/gpu/drm_mm: module test-drm_mm could not be removed [FAIL]" exit 1 fi diff --git a/tools/testing/selftests/drivers/net/dsa/Makefile b/tools/testing/selftests/drivers/net/dsa/Makefile new file mode 100644 index 000000000000..2a731d5c6d85 --- /dev/null +++ b/tools/testing/selftests/drivers/net/dsa/Makefile @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0+ OR MIT + +TEST_PROGS = bridge_locked_port.sh \ + bridge_mdb.sh \ + bridge_mld.sh \ + bridge_vlan_aware.sh \ + bridge_vlan_mcast.sh \ + bridge_vlan_unaware.sh \ + local_termination.sh \ + no_forwarding.sh \ + test_bridge_fdb_stress.sh + +TEST_PROGS_EXTENDED := lib.sh + +TEST_FILES := forwarding.config + +include ../../../lib.mk diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh index 08a922d8b86a..224ca3695c89 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh @@ -84,6 +84,13 @@ lc_wait_until_port_count_is() busywait "$timeout" until_lc_port_count_is "$port_count" lc_port_count_get "$lc" } +lc_nested_devlink_dev_get() +{ + local lc=$1 + + devlink lc show $DEVLINK_DEV lc $lc -j | jq -e -r ".[][][].nested_devlink" +} + PROV_UNPROV_TIMEOUT=8000 # ms POST_PROV_ACT_TIMEOUT=2000 # ms PROV_PORTS_INSTANTIATION_TIMEOUT=15000 # ms @@ -191,12 +198,30 @@ ports_check() check_err $? "Unexpected port count linecard $lc (got $port_count, expected $expected_port_count)" } +lc_dev_info_provisioned_check() +{ + local lc=$1 + local nested_devlink_dev=$2 + local fixed_hw_revision + local running_ini_version + + fixed_hw_revision=$(devlink dev info $nested_devlink_dev -j | \ + jq -e -r '.[][].versions.fixed."hw.revision"') + check_err $? "Failed to get linecard $lc fixed.hw.revision" + log_info "Linecard $lc fixed.hw.revision: \"$fixed_hw_revision\"" + running_ini_version=$(devlink dev info $nested_devlink_dev -j | \ + jq -e -r '.[][].versions.running."ini.version"') + check_err $? "Failed to get linecard $lc running.ini.version" + log_info "Linecard $lc running.ini.version: \"$running_ini_version\"" +} + provision_test() { RET=0 local lc local type local state + local nested_devlink_dev lc=$LC_SLOT supported_types_check $lc @@ -207,6 +232,11 @@ provision_test() fi provision_one $lc $LC_16X100G_TYPE ports_check $lc $LC_16X100G_PORT_COUNT + + nested_devlink_dev=$(lc_nested_devlink_dev_get $lc) + check_err $? "Failed to get nested devlink handle of linecard $lc" + lc_dev_info_provisioned_check $lc $nested_devlink_dev + log_test "Provision" } @@ -220,12 +250,32 @@ interface_check() setup_wait } +lc_dev_info_active_check() +{ + local lc=$1 + local nested_devlink_dev=$2 + local fixed_device_fw_psid + local running_device_fw + + fixed_device_fw_psid=$(devlink dev info $nested_devlink_dev -j | \ + jq -e -r ".[][].versions.fixed" | \ + jq -e -r '."fw.psid"') + check_err $? "Failed to get linecard $lc fixed fw PSID" + log_info "Linecard $lc fixed.fw.psid: \"$fixed_device_fw_psid\"" + + running_device_fw=$(devlink dev info $nested_devlink_dev -j | \ + jq -e -r ".[][].versions.running.fw") + check_err $? "Failed to get linecard $lc running.fw.version" + log_info "Linecard $lc running.fw: \"$running_device_fw\"" +} + activation_16x100G_test() { RET=0 local lc local type local state + local nested_devlink_dev lc=$LC_SLOT type=$LC_16X100G_TYPE @@ -238,6 +288,10 @@ activation_16x100G_test() interface_check + nested_devlink_dev=$(lc_nested_devlink_dev_get $lc) + check_err $? "Failed to get nested devlink handle of linecard $lc" + lc_dev_info_active_check $lc $nested_devlink_dev + log_test "Activation 16x100G" } diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_counter_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_counter_scale.sh new file mode 100644 index 000000000000..a43a9926e690 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/rif_counter_scale.sh @@ -0,0 +1,107 @@ +# SPDX-License-Identifier: GPL-2.0 + +RIF_COUNTER_NUM_NETIFS=2 + +rif_counter_addr4() +{ + local i=$1; shift + local p=$1; shift + + printf 192.0.%d.%d $((i / 64)) $(((4 * i % 256) + p)) +} + +rif_counter_addr4pfx() +{ + rif_counter_addr4 $@ + printf /30 +} + +rif_counter_h1_create() +{ + simple_if_init $h1 +} + +rif_counter_h1_destroy() +{ + simple_if_fini $h1 +} + +rif_counter_h2_create() +{ + simple_if_init $h2 +} + +rif_counter_h2_destroy() +{ + simple_if_fini $h2 +} + +rif_counter_setup_prepare() +{ + h1=${NETIFS[p1]} + h2=${NETIFS[p2]} + + vrf_prepare + + rif_counter_h1_create + rif_counter_h2_create +} + +rif_counter_cleanup() +{ + local count=$1; shift + + pre_cleanup + + for ((i = 1; i <= count; i++)); do + vlan_destroy $h2 $i + done + + rif_counter_h2_destroy + rif_counter_h1_destroy + + vrf_cleanup + + if [[ -v RIF_COUNTER_BATCH_FILE ]]; then + rm -f $RIF_COUNTER_BATCH_FILE + fi +} + + +rif_counter_test() +{ + local count=$1; shift + local should_fail=$1; shift + + RIF_COUNTER_BATCH_FILE="$(mktemp)" + + for ((i = 1; i <= count; i++)); do + vlan_create $h2 $i v$h2 $(rif_counter_addr4pfx $i 2) + done + for ((i = 1; i <= count; i++)); do + cat >> $RIF_COUNTER_BATCH_FILE <<-EOF + stats set dev $h2.$i l3_stats on + EOF + done + + ip -b $RIF_COUNTER_BATCH_FILE + check_err_fail $should_fail $? "RIF counter enablement" +} + +rif_counter_traffic_test() +{ + local count=$1; shift + local i; + + for ((i = count; i > 0; i /= 2)); do + $MZ $h1 -Q $i -c 1 -d 20msec -p 100 -a own -b $(mac_get $h2) \ + -A $(rif_counter_addr4 $i 1) \ + -B $(rif_counter_addr4 $i 2) \ + -q -t udp sp=54321,dp=12345 + done + for ((i = count; i > 0; i /= 2)); do + busywait "$TC_HIT_TIMEOUT" until_counter_is "== 1" \ + hw_stats_get l3_stats $h2.$i rx packets > /dev/null + check_err $? "Traffic not seen at RIF $h2.$i" + done +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh index e9f65bd2e299..688338bbeb97 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh @@ -25,7 +25,16 @@ cleanup() trap cleanup EXIT -ALL_TESTS="router tc_flower mirror_gre tc_police port rif_mac_profile" +ALL_TESTS=" + router + tc_flower + mirror_gre + tc_police + port + rif_mac_profile + rif_counter +" + for current_test in ${TESTS:-$ALL_TESTS}; do RET_FIN=0 source ${current_test}_scale.sh @@ -36,16 +45,32 @@ for current_test in ${TESTS:-$ALL_TESTS}; do for should_fail in 0 1; do RET=0 target=$(${current_test}_get_target "$should_fail") + if ((target == 0)); then + log_test_skip "'$current_test' should_fail=$should_fail test" + continue + fi + ${current_test}_setup_prepare setup_wait $num_netifs + # Update target in case occupancy of a certain resource changed + # following the test setup. + target=$(${current_test}_get_target "$should_fail") ${current_test}_test "$target" "$should_fail" - ${current_test}_cleanup - devlink_reload if [[ "$should_fail" -eq 0 ]]; then log_test "'$current_test' $target" + + if ((!RET)); then + tt=${current_test}_traffic_test + if [[ $(type -t $tt) == "function" ]]; then + $tt "$target" + log_test "'$current_test' $target traffic test" + fi + fi else log_test "'$current_test' overflow $target" fi + ${current_test}_cleanup $target + devlink_reload RET_FIN=$(( RET_FIN || RET )) done done diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_counter_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_counter_scale.sh new file mode 120000 index 000000000000..1f5752e8ffc0 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_counter_scale.sh @@ -0,0 +1 @@ +../spectrum/rif_counter_scale.sh
\ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh index efd798a85931..4444bbace1a9 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh @@ -4,17 +4,22 @@ source ../tc_flower_scale.sh tc_flower_get_target() { local should_fail=$1; shift + local max_cnts # The driver associates a counter with each tc filter, which means the # number of supported filters is bounded by the number of available # counters. - # Currently, the driver supports 30K (30,720) flow counters and six of - # these are used for multicast routing. - local target=30714 + max_cnts=$(devlink_resource_size_get counters flow) + + # Remove already allocated counters. + ((max_cnts -= $(devlink_resource_occ_get counters flow))) + + # Each rule uses two counters, for packets and bytes. + ((max_cnts /= 2)) if ((! should_fail)); then - echo $target + echo $max_cnts else - echo $((target + 1)) + echo $((max_cnts + 1)) fi } diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh index dea33dc93790..95d9f710a630 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh @@ -22,7 +22,16 @@ cleanup() devlink_sp_read_kvd_defaults trap cleanup EXIT -ALL_TESTS="router tc_flower mirror_gre tc_police port rif_mac_profile" +ALL_TESTS=" + router + tc_flower + mirror_gre + tc_police + port + rif_mac_profile + rif_counter +" + for current_test in ${TESTS:-$ALL_TESTS}; do RET_FIN=0 source ${current_test}_scale.sh @@ -41,15 +50,31 @@ for current_test in ${TESTS:-$ALL_TESTS}; do for should_fail in 0 1; do RET=0 target=$(${current_test}_get_target "$should_fail") + if ((target == 0)); then + log_test_skip "'$current_test' [$profile] should_fail=$should_fail test" + continue + fi ${current_test}_setup_prepare setup_wait $num_netifs + # Update target in case occupancy of a certain resource + # changed following the test setup. + target=$(${current_test}_get_target "$should_fail") ${current_test}_test "$target" "$should_fail" - ${current_test}_cleanup if [[ "$should_fail" -eq 0 ]]; then log_test "'$current_test' [$profile] $target" + + if ((!RET)); then + tt=${current_test}_traffic_test + if [[ $(type -t $tt) == "function" ]] + then + $tt "$target" + log_test "'$current_test' [$profile] $target traffic test" + fi + fi else log_test "'$current_test' [$profile] overflow $target" fi + ${current_test}_cleanup $target RET_FIN=$(( RET_FIN || RET )) done done diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_counter_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_counter_scale.sh new file mode 100644 index 000000000000..d44536276e8a --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_counter_scale.sh @@ -0,0 +1,34 @@ +# SPDX-License-Identifier: GPL-2.0 +source ../rif_counter_scale.sh + +rif_counter_get_target() +{ + local should_fail=$1; shift + local max_cnts + local max_rifs + local target + + max_rifs=$(devlink_resource_size_get rifs) + max_cnts=$(devlink_resource_size_get counters rif) + + # Remove already allocated RIFs. + ((max_rifs -= $(devlink_resource_occ_get rifs))) + + # 10 KVD slots per counter, ingress+egress counters per RIF + ((max_cnts /= 20)) + + # Pointless to run the overflow test if we don't have enough RIFs to + # host all the counters. + if ((max_cnts > max_rifs && should_fail)); then + echo 0 + return + fi + + target=$((max_rifs < max_cnts ? max_rifs : max_cnts)) + + if ((! should_fail)); then + echo $target + else + echo $((target + 1)) + fi +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh index aa74be9f47c8..d3d9e60d6ddf 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh @@ -77,6 +77,7 @@ tc_flower_rules_create() filter add dev $h2 ingress \ prot ipv6 \ pref 1000 \ + handle 42$i \ flower $tcflags dst_ip $(tc_flower_addr $i) \ action drop EOF @@ -121,3 +122,19 @@ tc_flower_test() tcflags="skip_sw" __tc_flower_test $count $should_fail } + +tc_flower_traffic_test() +{ + local count=$1; shift + local i; + + for ((i = count - 1; i > 0; i /= 2)); do + $MZ -6 $h1 -c 1 -d 20msec -p 100 -a own -b $(mac_get $h2) \ + -A $(tc_flower_addr 0) -B $(tc_flower_addr $i) \ + -q -t udp sp=54321,dp=12345 + done + for ((i = count - 1; i > 0; i /= 2)); do + tc_check_packets "dev $h2 ingress" 42$i 1 + check_err $? "Traffic not seen at rule #$i" + done +} diff --git a/tools/testing/selftests/drivers/net/netdevsim/fib.sh b/tools/testing/selftests/drivers/net/netdevsim/fib.sh index fc794cd30389..6800de816e8b 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/fib.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/fib.sh @@ -16,6 +16,7 @@ ALL_TESTS=" ipv4_replay ipv4_flush ipv4_error_path + ipv4_delete_fail ipv6_add ipv6_metric ipv6_append_single @@ -29,11 +30,13 @@ ALL_TESTS=" ipv6_replay_single ipv6_replay_multipath ipv6_error_path + ipv6_delete_fail " NETDEVSIM_PATH=/sys/bus/netdevsim/ DEV_ADDR=1337 DEV=netdevsim${DEV_ADDR} SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/ +DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV/ NUM_NETIFS=0 source $lib_dir/lib.sh source $lib_dir/fib_offload_lib.sh @@ -157,6 +160,27 @@ ipv4_error_path() ipv4_error_path_replay } +ipv4_delete_fail() +{ + RET=0 + + echo "y" > $DEBUGFS_DIR/fib/fail_route_delete + + ip -n testns1 link add name dummy1 type dummy + ip -n testns1 link set dev dummy1 up + + ip -n testns1 route add 192.0.2.0/24 dev dummy1 + ip -n testns1 route del 192.0.2.0/24 dev dummy1 &> /dev/null + + # We should not be able to delete the netdev if we are leaking a + # reference. + ip -n testns1 link del dev dummy1 + + log_test "IPv4 route delete failure" + + echo "n" > $DEBUGFS_DIR/fib/fail_route_delete +} + ipv6_add() { fib_ipv6_add_test "testns1" @@ -304,6 +328,27 @@ ipv6_error_path() ipv6_error_path_replay } +ipv6_delete_fail() +{ + RET=0 + + echo "y" > $DEBUGFS_DIR/fib/fail_route_delete + + ip -n testns1 link add name dummy1 type dummy + ip -n testns1 link set dev dummy1 up + + ip -n testns1 route add 2001:db8:1::/64 dev dummy1 + ip -n testns1 route del 2001:db8:1::/64 dev dummy1 &> /dev/null + + # We should not be able to delete the netdev if we are leaking a + # reference. + ip -n testns1 link del dev dummy1 + + log_test "IPv6 route delete failure" + + echo "n" > $DEBUGFS_DIR/fib/fail_route_delete +} + fib_notify_on_flag_change_set() { local notify=$1; shift diff --git a/tools/testing/selftests/drivers/s390x/uvdevice/Makefile b/tools/testing/selftests/drivers/s390x/uvdevice/Makefile index 5e701d2708d4..891215a7dc8a 100644 --- a/tools/testing/selftests/drivers/s390x/uvdevice/Makefile +++ b/tools/testing/selftests/drivers/s390x/uvdevice/Makefile @@ -11,7 +11,6 @@ else TEST_GEN_PROGS := test_uvdevice top_srcdir ?= ../../../../../.. -KSFT_KHDR_INSTALL := 1 khdr_dir = $(top_srcdir)/usr/include LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include diff --git a/tools/testing/selftests/filesystems/binderfs/config b/tools/testing/selftests/filesystems/binderfs/config index 02dd6cc9cf99..7b4fc6ee6205 100644 --- a/tools/testing/selftests/filesystems/binderfs/config +++ b/tools/testing/selftests/filesystems/binderfs/config @@ -1,3 +1,2 @@ -CONFIG_ANDROID=y CONFIG_ANDROID_BINDERFS=y CONFIG_ANDROID_BINDER_IPC=y diff --git a/tools/testing/selftests/filesystems/fat/.gitignore b/tools/testing/selftests/filesystems/fat/.gitignore new file mode 100644 index 000000000000..b89920ed841c --- /dev/null +++ b/tools/testing/selftests/filesystems/fat/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +rename_exchange diff --git a/tools/testing/selftests/filesystems/fat/Makefile b/tools/testing/selftests/filesystems/fat/Makefile new file mode 100644 index 000000000000..902033f6ef09 --- /dev/null +++ b/tools/testing/selftests/filesystems/fat/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 + +TEST_PROGS := run_fat_tests.sh +TEST_GEN_PROGS_EXTENDED := rename_exchange +CFLAGS += -O2 -g -Wall $(KHDR_INCLUDES) + +include ../../lib.mk diff --git a/tools/testing/selftests/filesystems/fat/config b/tools/testing/selftests/filesystems/fat/config new file mode 100644 index 000000000000..6cf95e787a17 --- /dev/null +++ b/tools/testing/selftests/filesystems/fat/config @@ -0,0 +1,2 @@ +CONFIG_BLK_DEV_LOOP=y +CONFIG_VFAT_FS=y diff --git a/tools/testing/selftests/filesystems/fat/rename_exchange.c b/tools/testing/selftests/filesystems/fat/rename_exchange.c new file mode 100644 index 000000000000..e488ad354fce --- /dev/null +++ b/tools/testing/selftests/filesystems/fat/rename_exchange.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Program that atomically exchanges two paths using + * the renameat2() system call RENAME_EXCHANGE flag. + * + * Copyright 2022 Red Hat Inc. + * Author: Javier Martinez Canillas <javierm@redhat.com> + */ + +#define _GNU_SOURCE +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> + +void print_usage(const char *program) +{ + printf("Usage: %s [oldpath] [newpath]\n", program); + printf("Atomically exchange oldpath and newpath\n"); +} + +int main(int argc, char *argv[]) +{ + int ret; + + if (argc != 3) { + print_usage(argv[0]); + exit(EXIT_FAILURE); + } + + ret = renameat2(AT_FDCWD, argv[1], AT_FDCWD, argv[2], RENAME_EXCHANGE); + if (ret) { + perror("rename exchange failed"); + exit(EXIT_FAILURE); + } + + exit(EXIT_SUCCESS); +} diff --git a/tools/testing/selftests/filesystems/fat/run_fat_tests.sh b/tools/testing/selftests/filesystems/fat/run_fat_tests.sh new file mode 100644 index 000000000000..7f35dc3d15df --- /dev/null +++ b/tools/testing/selftests/filesystems/fat/run_fat_tests.sh @@ -0,0 +1,82 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Run filesystem operations tests on an 1 MiB disk image that is formatted with +# a vfat filesystem and mounted in a temporary directory using a loop device. +# +# Copyright 2022 Red Hat Inc. +# Author: Javier Martinez Canillas <javierm@redhat.com> + +set -e +set -u +set -o pipefail + +BASE_DIR="$(dirname $0)" +TMP_DIR="$(mktemp -d /tmp/fat_tests_tmp.XXXX)" +IMG_PATH="${TMP_DIR}/fat.img" +MNT_PATH="${TMP_DIR}/mnt" + +cleanup() +{ + mountpoint -q "${MNT_PATH}" && unmount_image + rm -rf "${TMP_DIR}" +} +trap cleanup SIGINT SIGTERM EXIT + +create_loopback() +{ + touch "${IMG_PATH}" + chattr +C "${IMG_PATH}" >/dev/null 2>&1 || true + + truncate -s 1M "${IMG_PATH}" + mkfs.vfat "${IMG_PATH}" >/dev/null 2>&1 +} + +mount_image() +{ + mkdir -p "${MNT_PATH}" + sudo mount -o loop "${IMG_PATH}" "${MNT_PATH}" +} + +rename_exchange_test() +{ + local rename_exchange="${BASE_DIR}/rename_exchange" + local old_path="${MNT_PATH}/old_file" + local new_path="${MNT_PATH}/new_file" + + echo old | sudo tee "${old_path}" >/dev/null 2>&1 + echo new | sudo tee "${new_path}" >/dev/null 2>&1 + sudo "${rename_exchange}" "${old_path}" "${new_path}" >/dev/null 2>&1 + sudo sync -f "${MNT_PATH}" + grep new "${old_path}" >/dev/null 2>&1 + grep old "${new_path}" >/dev/null 2>&1 +} + +rename_exchange_subdir_test() +{ + local rename_exchange="${BASE_DIR}/rename_exchange" + local dir_path="${MNT_PATH}/subdir" + local old_path="${MNT_PATH}/old_file" + local new_path="${dir_path}/new_file" + + sudo mkdir -p "${dir_path}" + echo old | sudo tee "${old_path}" >/dev/null 2>&1 + echo new | sudo tee "${new_path}" >/dev/null 2>&1 + sudo "${rename_exchange}" "${old_path}" "${new_path}" >/dev/null 2>&1 + sudo sync -f "${MNT_PATH}" + grep new "${old_path}" >/dev/null 2>&1 + grep old "${new_path}" >/dev/null 2>&1 +} + +unmount_image() +{ + sudo umount "${MNT_PATH}" &> /dev/null +} + +create_loopback +mount_image +rename_exchange_test +rename_exchange_subdir_test +unmount_image + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc index 60c02b482be8..c300eb020262 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Generic dynamic event - add/remove eprobe events -# requires: dynamic_events events/syscalls/sys_enter_openat "e[:[<group>/]<event>] <attached-group>.<attached-event> [<args>]":README +# requires: dynamic_events events/syscalls/sys_enter_openat "<attached-group>.<attached-event> [<args>]":README echo 0 > events/enable @@ -87,4 +87,11 @@ echo "-:eprobes/$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events ! grep -q "$EPROBE" dynamic_events ! test -d events/eprobes/$EPROBE +if grep -q "e\[:\[<group>/]\[<event>]]" README; then + echo "e:mygroup/ $SYSTEM/$EVENT $OPTIONS" >> dynamic_events + test -d events/mygroup + echo "-:mygroup/" >> dynamic_events + ! test -d events/mygroup +fi + clear_trace diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc index b4da41d126d5..13d43f40a6fc 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc @@ -23,4 +23,11 @@ grep -q myevent1 dynamic_events echo > dynamic_events +if grep -q "p\[:\[<group>/]\[<event>]]" README; then + echo "p:mygroup/ $PLACE" >> dynamic_events + test -d events/mygroup + echo "-:mygroup/" >> dynamic_events + ! test -d events/mygroup +fi + clear_trace diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc index fa928b431555..9e85d3019ff0 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc @@ -21,7 +21,7 @@ check_error 'p:^/bar vfs_read' # NO_GROUP_NAME check_error 'p:^12345678901234567890123456789012345678901234567890123456789012345/bar vfs_read' # GROUP_TOO_LONG check_error 'p:^foo.1/bar vfs_read' # BAD_GROUP_NAME -check_error 'p:foo/^ vfs_read' # NO_EVENT_NAME +check_error 'p:^ vfs_read' # NO_EVENT_NAME check_error 'p:foo/^12345678901234567890123456789012345678901234567890123456789012345 vfs_read' # EVENT_TOO_LONG check_error 'p:foo/^bar.1 vfs_read' # BAD_EVENT_NAME diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile index b8152c573e8a..732149011692 100644 --- a/tools/testing/selftests/futex/functional/Makefile +++ b/tools/testing/selftests/futex/functional/Makefile @@ -22,7 +22,6 @@ TEST_GEN_FILES := \ TEST_PROGS := run.sh top_srcdir = ../../../../.. -KSFT_KHDR_INSTALL := 1 DEFAULT_INSTALL_HDR_PATH := 1 include ../../lib.mk diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile index 71b306602368..616ed4019655 100644 --- a/tools/testing/selftests/gpio/Makefile +++ b/tools/testing/selftests/gpio/Makefile @@ -3,6 +3,6 @@ TEST_PROGS := gpio-mockup.sh gpio-sim.sh TEST_FILES := gpio-mockup-sysfs.sh TEST_GEN_PROGS_EXTENDED := gpio-mockup-cdev gpio-chip-info gpio-line-name -CFLAGS += -O2 -g -Wall -I../../../../usr/include/ +CFLAGS += -O2 -g -Wall -I../../../../usr/include/ $(KHDR_INCLUDES) include ../lib.mk diff --git a/tools/testing/selftests/kcmp/kcmp_test.c b/tools/testing/selftests/kcmp/kcmp_test.c index 6ea7b9f37a41..25110c7c0b3e 100644 --- a/tools/testing/selftests/kcmp/kcmp_test.c +++ b/tools/testing/selftests/kcmp/kcmp_test.c @@ -88,6 +88,9 @@ int main(int argc, char **argv) int pid2 = getpid(); int ret; + ksft_print_header(); + ksft_set_plan(3); + fd2 = open(kpath, O_RDWR, 0644); if (fd2 < 0) { perror("Can't open file"); @@ -152,7 +155,6 @@ int main(int argc, char **argv) ksft_inc_pass_cnt(); } - ksft_print_cnts(); if (ret) ksft_exit_fail(); @@ -162,5 +164,5 @@ int main(int argc, char **argv) waitpid(pid2, &status, P_ALL); - return ksft_exit_pass(); + return 0; } diff --git a/tools/testing/selftests/kexec/kexec_common_lib.sh b/tools/testing/selftests/kexec/kexec_common_lib.sh index 0e114b34d5d7..641ef05863b2 100755 --- a/tools/testing/selftests/kexec/kexec_common_lib.sh +++ b/tools/testing/selftests/kexec/kexec_common_lib.sh @@ -65,32 +65,6 @@ get_efivarfs_secureboot_mode() return 0; } -get_efi_var_secureboot_mode() -{ - local efi_vars - local secure_boot_file - local setup_mode_file - local secureboot_mode - local setup_mode - - if [ ! -d "$efi_vars" ]; then - log_skip "efi_vars is not enabled\n" - fi - secure_boot_file=$(find "$efi_vars" -name SecureBoot-* 2>/dev/null) - setup_mode_file=$(find "$efi_vars" -name SetupMode-* 2>/dev/null) - if [ -f "$secure_boot_file/data" ] && \ - [ -f "$setup_mode_file/data" ]; then - secureboot_mode=`od -An -t u1 "$secure_boot_file/data"` - setup_mode=`od -An -t u1 "$setup_mode_file/data"` - - if [ $secureboot_mode -eq 1 ] && [ $setup_mode -eq 0 ]; then - log_info "secure boot mode enabled (CONFIG_EFI_VARS)" - return 1; - fi - fi - return 0; -} - # On powerpc platform, check device-tree property # /proc/device-tree/ibm,secureboot/os-secureboot-enforcing # to detect secureboot state. @@ -113,9 +87,8 @@ get_arch() } # Check efivar SecureBoot-$(the UUID) and SetupMode-$(the UUID). -# The secure boot mode can be accessed either as the last integer -# of "od -An -t u1 /sys/firmware/efi/efivars/SecureBoot-*" or from -# "od -An -t u1 /sys/firmware/efi/vars/SecureBoot-*/data". The efi +# The secure boot mode can be accessed as the last integer of +# "od -An -t u1 /sys/firmware/efi/efivars/SecureBoot-*". The efi # SetupMode can be similarly accessed. # Return 1 for SecureBoot mode enabled and SetupMode mode disabled. get_secureboot_mode() @@ -129,11 +102,6 @@ get_secureboot_mode() else get_efivarfs_secureboot_mode secureboot_mode=$? - # fallback to using the efi_var files - if [ $secureboot_mode -eq 0 ]; then - get_efi_var_secureboot_mode - secureboot_mode=$? - fi fi if [ $secureboot_mode -eq 0 ]; then diff --git a/tools/testing/selftests/kselftest_deps.sh b/tools/testing/selftests/kselftest_deps.sh index 00e60d6eb16b..708cb5429633 100755 --- a/tools/testing/selftests/kselftest_deps.sh +++ b/tools/testing/selftests/kselftest_deps.sh @@ -26,7 +26,7 @@ echo " main Makefile when optional -p is specified." echo "- Prints pass/fail dependency check for each tests/sub-test." echo "- Prints pass/fail targets and libraries." echo "- Default: runs dependency checks on all tests." -echo "- Optional test name can be specified to check dependencies for it." +echo "- Optional: test name can be specified to check dependencies for it." exit 1 } diff --git a/tools/testing/selftests/kselftest_module.h b/tools/testing/selftests/kselftest_module.h index e2ea41de3f35..63cd7487373f 100644 --- a/tools/testing/selftests/kselftest_module.h +++ b/tools/testing/selftests/kselftest_module.h @@ -3,6 +3,7 @@ #define __KSELFTEST_MODULE_H #include <linux/module.h> +#include <linux/panic.h> /* * Test framework for writing test modules to be loaded by kselftest. @@ -41,6 +42,7 @@ static inline int kstm_report(unsigned int total_tests, unsigned int failed_test static int __init __module##_init(void) \ { \ pr_info("loaded.\n"); \ + add_taint(TAINT_TEST, LOCKDEP_STILL_OK); \ selftest(); \ return kstm_report(total_tests, failed_tests, skipped_tests); \ } \ @@ -51,4 +53,6 @@ static void __exit __module##_exit(void) \ module_init(__module##_init); \ module_exit(__module##_exit) +MODULE_INFO(test, "Y"); + #endif /* __KSELFTEST_MODULE_H */ diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 4509a3a7eeae..d625a3f83780 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -25,8 +25,10 @@ /x86_64/hyperv_cpuid /x86_64/hyperv_features /x86_64/hyperv_svm_test +/x86_64/max_vcpuid_cap_test /x86_64/mmio_warning_test -/x86_64/mmu_role_test +/x86_64/monitor_mwait_test +/x86_64/nx_huge_pages_test /x86_64/platform_info_test /x86_64/pmu_event_filter_test /x86_64/set_boot_cpu_id @@ -36,9 +38,11 @@ /x86_64/state_test /x86_64/svm_vmcall_test /x86_64/svm_int_ctl_test -/x86_64/tsc_scaling_sync +/x86_64/svm_nested_soft_inject_test /x86_64/sync_regs_test /x86_64/tsc_msrs_test +/x86_64/tsc_scaling_sync +/x86_64/ucna_injection_test /x86_64/userspace_io_test /x86_64/userspace_msr_exit_test /x86_64/vmx_apic_access_test @@ -46,6 +50,7 @@ /x86_64/vmx_dirty_log_test /x86_64/vmx_exception_with_invalid_guest_state /x86_64/vmx_invalid_nested_guest_state +/x86_64/vmx_msrs_test /x86_64/vmx_preemption_timer_test /x86_64/vmx_set_nested_state_test /x86_64/vmx_tsc_adjust_test @@ -56,6 +61,7 @@ /x86_64/xen_vmcall_test /x86_64/xss_msr_test /x86_64/vmx_pmu_caps_test +/x86_64/triple_fault_event_test /access_tracking_perf_test /demand_paging_test /dirty_log_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 22423c871ed6..c7f47429d6cd 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -4,7 +4,6 @@ include ../../../build/Build.include all: top_srcdir = ../../../.. -KSFT_KHDR_INSTALL := 1 # For cross-builds to work, UNAME_M has to map to ARCH and arch specific # directories and targets in this Makefile. "uname -m" doesn't map to @@ -70,6 +69,10 @@ LIBKVM_s390x += lib/s390x/ucall.c LIBKVM_riscv += lib/riscv/processor.c LIBKVM_riscv += lib/riscv/ucall.c +# Non-compiled test targets +TEST_PROGS_x86_64 += x86_64/nx_huge_pages_test.sh + +# Compiled test targets TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features @@ -83,7 +86,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/hyperv_svm_test TEST_GEN_PROGS_x86_64 += x86_64/kvm_clock_test TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test -TEST_GEN_PROGS_x86_64 += x86_64/mmu_role_test +TEST_GEN_PROGS_x86_64 += x86_64/monitor_mwait_test TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test TEST_GEN_PROGS_x86_64 += x86_64/pmu_event_filter_test TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id @@ -93,14 +96,17 @@ TEST_GEN_PROGS_x86_64 += x86_64/state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test +TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_soft_inject_test TEST_GEN_PROGS_x86_64 += x86_64/tsc_scaling_sync TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test +TEST_GEN_PROGS_x86_64 += x86_64/ucna_injection_test TEST_GEN_PROGS_x86_64 += x86_64/userspace_io_test TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_exception_with_invalid_guest_state +TEST_GEN_PROGS_x86_64 += x86_64/vmx_msrs_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_invalid_nested_guest_state TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test @@ -115,6 +121,8 @@ TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests TEST_GEN_PROGS_x86_64 += x86_64/amx_test +TEST_GEN_PROGS_x86_64 += x86_64/max_vcpuid_cap_test +TEST_GEN_PROGS_x86_64 += x86_64/triple_fault_event_test TEST_GEN_PROGS_x86_64 += access_tracking_perf_test TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += dirty_log_test @@ -131,6 +139,9 @@ TEST_GEN_PROGS_x86_64 += steal_time TEST_GEN_PROGS_x86_64 += kvm_binary_stats_test TEST_GEN_PROGS_x86_64 += system_counter_offset_test +# Compiled outputs used by test targets +TEST_GEN_PROGS_EXTENDED_x86_64 += x86_64/nx_huge_pages_test + TEST_GEN_PROGS_aarch64 += aarch64/arch_timer TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list @@ -170,7 +181,9 @@ TEST_GEN_PROGS_riscv += kvm_page_table_test TEST_GEN_PROGS_riscv += set_memory_region_test TEST_GEN_PROGS_riscv += kvm_binary_stats_test +TEST_PROGS += $(TEST_PROGS_$(UNAME_M)) TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) +TEST_GEN_PROGS_EXTENDED += $(TEST_GEN_PROGS_EXTENDED_$(UNAME_M)) LIBKVM += $(LIBKVM_$(UNAME_M)) INSTALL_HDR_PATH = $(top_srcdir)/usr @@ -217,6 +230,7 @@ $(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS)))) $(TEST_GEN_PROGS): $(LIBKVM_OBJS) +$(TEST_GEN_PROGS_EXTENDED): $(LIBKVM_OBJS) cscope: include_paths = $(LINUX_TOOL_INCLUDE) $(LINUX_HDR_PATH) include lib .. cscope: diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c index 3b940a101bc0..574eb73f0e90 100644 --- a/tools/testing/selftests/kvm/aarch64/arch_timer.c +++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c @@ -76,13 +76,8 @@ struct test_vcpu_shared_data { uint64_t xcnt; }; -struct test_vcpu { - uint32_t vcpuid; - pthread_t pt_vcpu_run; - struct kvm_vm *vm; -}; - -static struct test_vcpu test_vcpu[KVM_MAX_VCPUS]; +static struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; +static pthread_t pt_vcpu_run[KVM_MAX_VCPUS]; static struct test_vcpu_shared_data vcpu_shared_data[KVM_MAX_VCPUS]; static int vtimer_irq, ptimer_irq; @@ -217,29 +212,32 @@ static void guest_code(void) static void *test_vcpu_run(void *arg) { + unsigned int vcpu_idx = (unsigned long)arg; struct ucall uc; - struct test_vcpu *vcpu = arg; + struct kvm_vcpu *vcpu = vcpus[vcpu_idx]; struct kvm_vm *vm = vcpu->vm; - uint32_t vcpuid = vcpu->vcpuid; - struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[vcpuid]; + struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[vcpu_idx]; - vcpu_run(vm, vcpuid); + vcpu_run(vcpu); /* Currently, any exit from guest is an indication of completion */ pthread_mutex_lock(&vcpu_done_map_lock); - set_bit(vcpuid, vcpu_done_map); + set_bit(vcpu_idx, vcpu_done_map); pthread_mutex_unlock(&vcpu_done_map_lock); - switch (get_ucall(vm, vcpuid, &uc)) { + switch (get_ucall(vcpu, &uc)) { case UCALL_SYNC: case UCALL_DONE: break; case UCALL_ABORT: sync_global_from_guest(vm, *shared_data); - TEST_FAIL("%s at %s:%ld\n\tvalues: %lu, %lu; %lu, vcpu: %u; stage: %u; iter: %u", - (const char *)uc.args[0], __FILE__, uc.args[1], - uc.args[2], uc.args[3], uc.args[4], vcpuid, - shared_data->guest_stage, shared_data->nr_iter); + REPORT_GUEST_ASSERT_N(uc, "values: %lu, %lu; %lu, vcpu %u; stage; %u; iter: %u", + GUEST_ASSERT_ARG(uc, 0), + GUEST_ASSERT_ARG(uc, 1), + GUEST_ASSERT_ARG(uc, 2), + vcpu_idx, + shared_data->guest_stage, + shared_data->nr_iter); break; default: TEST_FAIL("Unexpected guest exit\n"); @@ -265,7 +263,7 @@ static uint32_t test_get_pcpu(void) return pcpu; } -static int test_migrate_vcpu(struct test_vcpu *vcpu) +static int test_migrate_vcpu(unsigned int vcpu_idx) { int ret; cpu_set_t cpuset; @@ -274,15 +272,15 @@ static int test_migrate_vcpu(struct test_vcpu *vcpu) CPU_ZERO(&cpuset); CPU_SET(new_pcpu, &cpuset); - pr_debug("Migrating vCPU: %u to pCPU: %u\n", vcpu->vcpuid, new_pcpu); + pr_debug("Migrating vCPU: %u to pCPU: %u\n", vcpu_idx, new_pcpu); - ret = pthread_setaffinity_np(vcpu->pt_vcpu_run, - sizeof(cpuset), &cpuset); + ret = pthread_setaffinity_np(pt_vcpu_run[vcpu_idx], + sizeof(cpuset), &cpuset); /* Allow the error where the vCPU thread is already finished */ TEST_ASSERT(ret == 0 || ret == ESRCH, - "Failed to migrate the vCPU:%u to pCPU: %u; ret: %d\n", - vcpu->vcpuid, new_pcpu, ret); + "Failed to migrate the vCPU:%u to pCPU: %u; ret: %d\n", + vcpu_idx, new_pcpu, ret); return ret; } @@ -305,7 +303,7 @@ static void *test_vcpu_migration(void *arg) continue; } - test_migrate_vcpu(&test_vcpu[i]); + test_migrate_vcpu(i); } } while (test_args.nr_vcpus != n_done); @@ -314,16 +312,17 @@ static void *test_vcpu_migration(void *arg) static void test_run(struct kvm_vm *vm) { - int i, ret; pthread_t pt_vcpu_migration; + unsigned int i; + int ret; pthread_mutex_init(&vcpu_done_map_lock, NULL); vcpu_done_map = bitmap_zalloc(test_args.nr_vcpus); TEST_ASSERT(vcpu_done_map, "Failed to allocate vcpu done bitmap\n"); - for (i = 0; i < test_args.nr_vcpus; i++) { - ret = pthread_create(&test_vcpu[i].pt_vcpu_run, NULL, - test_vcpu_run, &test_vcpu[i]); + for (i = 0; i < (unsigned long)test_args.nr_vcpus; i++) { + ret = pthread_create(&pt_vcpu_run[i], NULL, test_vcpu_run, + (void *)(unsigned long)i); TEST_ASSERT(!ret, "Failed to create vCPU-%d pthread\n", i); } @@ -338,7 +337,7 @@ static void test_run(struct kvm_vm *vm) for (i = 0; i < test_args.nr_vcpus; i++) - pthread_join(test_vcpu[i].pt_vcpu_run, NULL); + pthread_join(pt_vcpu_run[i], NULL); if (test_args.migration_freq_ms) pthread_join(pt_vcpu_migration, NULL); @@ -349,12 +348,10 @@ static void test_run(struct kvm_vm *vm) static void test_init_timer_irq(struct kvm_vm *vm) { /* Timer initid should be same for all the vCPUs, so query only vCPU-0 */ - int vcpu0_fd = vcpu_get_fd(vm, 0); - - kvm_device_access(vcpu0_fd, KVM_ARM_VCPU_TIMER_CTRL, - KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq, false); - kvm_device_access(vcpu0_fd, KVM_ARM_VCPU_TIMER_CTRL, - KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq, false); + vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL, + KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq); + vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL, + KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq); sync_global_to_guest(vm, ptimer_irq); sync_global_to_guest(vm, vtimer_irq); @@ -370,25 +367,18 @@ static struct kvm_vm *test_vm_create(void) unsigned int i; int nr_vcpus = test_args.nr_vcpus; - vm = vm_create_default_with_vcpus(nr_vcpus, 0, 0, guest_code, NULL); + vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus); vm_init_descriptor_tables(vm); vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler); - for (i = 0; i < nr_vcpus; i++) { - vcpu_init_descriptor_tables(vm, i); - - test_vcpu[i].vcpuid = i; - test_vcpu[i].vm = vm; - } + for (i = 0; i < nr_vcpus; i++) + vcpu_init_descriptor_tables(vcpus[i]); ucall_init(vm, NULL); test_init_timer_irq(vm); gic_fd = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA); - if (gic_fd < 0) { - print_skip("Failed to create vgic-v3"); - exit(KSFT_SKIP); - } + __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3"); /* Make all the test's cmdline args visible to the guest */ sync_global_to_guest(vm, test_args); @@ -478,10 +468,8 @@ int main(int argc, char *argv[]) if (!parse_args(argc, argv)) exit(KSFT_SKIP); - if (test_args.migration_freq_ms && get_nprocs() < 2) { - print_skip("At least two physical CPUs needed for vCPU migration"); - exit(KSFT_SKIP); - } + __TEST_REQUIRE(!test_args.migration_freq_ms || get_nprocs() >= 2, + "At least two physical CPUs needed for vCPU migration"); vm = test_vm_create(); test_run(vm); diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c index 63b2178210c4..2ee35cf9801e 100644 --- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c @@ -3,8 +3,6 @@ #include <kvm_util.h> #include <processor.h> -#define VCPU_ID 0 - #define MDSCR_KDE (1 << 13) #define MDSCR_MDE (1 << 15) #define MDSCR_SS (1 << 0) @@ -240,31 +238,29 @@ static void guest_svc_handler(struct ex_regs *regs) svc_addr = regs->pc; } -static int debug_version(struct kvm_vm *vm) +static int debug_version(struct kvm_vcpu *vcpu) { uint64_t id_aa64dfr0; - get_reg(vm, VCPU_ID, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1), &id_aa64dfr0); + vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1), &id_aa64dfr0); return id_aa64dfr0 & 0xf; } int main(int argc, char *argv[]) { + struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct ucall uc; int stage; - vm = vm_create_default(VCPU_ID, 0, guest_code); + vm = vm_create_with_one_vcpu(&vcpu, guest_code); ucall_init(vm, NULL); vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vm, VCPU_ID); + vcpu_init_descriptor_tables(vcpu); - if (debug_version(vm) < 6) { - print_skip("Armv8 debug architecture not supported."); - kvm_vm_free(vm); - exit(KSFT_SKIP); - } + __TEST_REQUIRE(debug_version(vcpu) >= 6, + "Armv8 debug architecture not supported."); vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_EC_BRK_INS, guest_sw_bp_handler); @@ -278,18 +274,16 @@ int main(int argc, char *argv[]) ESR_EC_SVC64, guest_svc_handler); for (stage = 0; stage < 11; stage++) { - vcpu_run(vm, VCPU_ID); + vcpu_run(vcpu); - switch (get_ucall(vm, VCPU_ID, &uc)) { + switch (get_ucall(vcpu, &uc)) { case UCALL_SYNC: TEST_ASSERT(uc.args[1] == stage, "Stage %d: Unexpected sync ucall, got %lx", stage, (ulong)uc.args[1]); break; case UCALL_ABORT: - TEST_FAIL("%s at %s:%ld\n\tvalues: %#lx, %#lx", - (const char *)uc.args[0], - __FILE__, uc.args[1], uc.args[2], uc.args[3]); + REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx"); break; case UCALL_DONE: goto done; diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c index d3a7dbfcbb3d..d287dd2cac0a 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -377,7 +377,7 @@ static void prepare_vcpu_init(struct vcpu_config *c, struct kvm_vcpu_init *init) init->features[s->feature / 32] |= 1 << (s->feature % 32); } -static void finalize_vcpu(struct kvm_vm *vm, uint32_t vcpuid, struct vcpu_config *c) +static void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_config *c) { struct reg_sublist *s; int feature; @@ -385,7 +385,7 @@ static void finalize_vcpu(struct kvm_vm *vm, uint32_t vcpuid, struct vcpu_config for_each_sublist(c, s) { if (s->finalize) { feature = s->feature; - vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_FINALIZE, &feature); + vcpu_ioctl(vcpu, KVM_ARM_VCPU_FINALIZE, &feature); } } } @@ -395,10 +395,12 @@ static void check_supported(struct vcpu_config *c) struct reg_sublist *s; for_each_sublist(c, s) { - if (s->capability && !kvm_check_cap(s->capability)) { - fprintf(stderr, "%s: %s not available, skipping tests\n", config_name(c), s->name); - exit(KSFT_SKIP); - } + if (!s->capability) + continue; + + __TEST_REQUIRE(kvm_has_cap(s->capability), + "%s: %s not available, skipping tests\n", + config_name(c), s->name); } } @@ -411,17 +413,19 @@ static void run_test(struct vcpu_config *c) struct kvm_vcpu_init init = { .target = -1, }; int new_regs = 0, missing_regs = 0, i, n; int failed_get = 0, failed_set = 0, failed_reject = 0; + struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct reg_sublist *s; check_supported(c); - vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); + vm = vm_create_barebones(); prepare_vcpu_init(c, &init); - aarch64_vcpu_add_default(vm, 0, &init, NULL); - finalize_vcpu(vm, 0, c); + vcpu = __vm_vcpu_add(vm, 0); + aarch64_vcpu_setup(vcpu, &init); + finalize_vcpu(vcpu, c); - reg_list = vcpu_get_reg_list(vm, 0); + reg_list = vcpu_get_reg_list(vcpu); if (fixup_core_regs) core_reg_fixup(); @@ -457,7 +461,7 @@ static void run_test(struct vcpu_config *c) bool reject_reg = false; int ret; - ret = _vcpu_ioctl(vm, 0, KVM_GET_ONE_REG, ®); + ret = __vcpu_get_reg(vcpu, reg_list->reg[i], &addr); if (ret) { printf("%s: Failed to get ", config_name(c)); print_reg(c, reg.id); @@ -469,7 +473,7 @@ static void run_test(struct vcpu_config *c) for_each_sublist(c, s) { if (s->rejects_set && find_reg(s->rejects_set, s->rejects_set_n, reg.id)) { reject_reg = true; - ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, ®); + ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); if (ret != -1 || errno != EPERM) { printf("%s: Failed to reject (ret=%d, errno=%d) ", config_name(c), ret, errno); print_reg(c, reg.id); @@ -481,7 +485,7 @@ static void run_test(struct vcpu_config *c) } if (!reject_reg) { - ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, ®); + ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); if (ret) { printf("%s: Failed to set ", config_name(c)); print_reg(c, reg.id); diff --git a/tools/testing/selftests/kvm/aarch64/hypercalls.c b/tools/testing/selftests/kvm/aarch64/hypercalls.c index 41e0210b7a5e..a39da3fe4952 100644 --- a/tools/testing/selftests/kvm/aarch64/hypercalls.c +++ b/tools/testing/selftests/kvm/aarch64/hypercalls.c @@ -141,26 +141,6 @@ static void guest_code(void) GUEST_DONE(); } -static int set_fw_reg(struct kvm_vm *vm, uint64_t id, uint64_t val) -{ - struct kvm_one_reg reg = { - .id = id, - .addr = (uint64_t)&val, - }; - - return _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, ®); -} - -static void get_fw_reg(struct kvm_vm *vm, uint64_t id, uint64_t *addr) -{ - struct kvm_one_reg reg = { - .id = id, - .addr = (uint64_t)addr, - }; - - vcpu_ioctl(vm, 0, KVM_GET_ONE_REG, ®); -} - struct st_time { uint32_t rev; uint32_t attr; @@ -170,23 +150,19 @@ struct st_time { #define STEAL_TIME_SIZE ((sizeof(struct st_time) + 63) & ~63) #define ST_GPA_BASE (1 << 30) -static void steal_time_init(struct kvm_vm *vm) +static void steal_time_init(struct kvm_vcpu *vcpu) { uint64_t st_ipa = (ulong)ST_GPA_BASE; unsigned int gpages; - struct kvm_device_attr dev = { - .group = KVM_ARM_VCPU_PVTIME_CTRL, - .attr = KVM_ARM_VCPU_PVTIME_IPA, - .addr = (uint64_t)&st_ipa, - }; gpages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, STEAL_TIME_SIZE); - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, gpages, 0); + vm_userspace_mem_region_add(vcpu->vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, gpages, 0); - vcpu_ioctl(vm, 0, KVM_SET_DEVICE_ATTR, &dev); + vcpu_device_attr_set(vcpu, KVM_ARM_VCPU_PVTIME_CTRL, + KVM_ARM_VCPU_PVTIME_IPA, &st_ipa); } -static void test_fw_regs_before_vm_start(struct kvm_vm *vm) +static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu) { uint64_t val; unsigned int i; @@ -196,18 +172,18 @@ static void test_fw_regs_before_vm_start(struct kvm_vm *vm) const struct kvm_fw_reg_info *reg_info = &fw_reg_info[i]; /* First 'read' should be an upper limit of the features supported */ - get_fw_reg(vm, reg_info->reg, &val); + vcpu_get_reg(vcpu, reg_info->reg, &val); TEST_ASSERT(val == FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), "Expected all the features to be set for reg: 0x%lx; expected: 0x%lx; read: 0x%lx\n", reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), val); /* Test a 'write' by disabling all the features of the register map */ - ret = set_fw_reg(vm, reg_info->reg, 0); + ret = __vcpu_set_reg(vcpu, reg_info->reg, 0); TEST_ASSERT(ret == 0, "Failed to clear all the features of reg: 0x%lx; ret: %d\n", reg_info->reg, errno); - get_fw_reg(vm, reg_info->reg, &val); + vcpu_get_reg(vcpu, reg_info->reg, &val); TEST_ASSERT(val == 0, "Expected all the features to be cleared for reg: 0x%lx\n", reg_info->reg); @@ -216,7 +192,7 @@ static void test_fw_regs_before_vm_start(struct kvm_vm *vm) * Avoid this check if all the bits are occupied. */ if (reg_info->max_feat_bit < 63) { - ret = set_fw_reg(vm, reg_info->reg, BIT(reg_info->max_feat_bit + 1)); + ret = __vcpu_set_reg(vcpu, reg_info->reg, BIT(reg_info->max_feat_bit + 1)); TEST_ASSERT(ret != 0 && errno == EINVAL, "Unexpected behavior or return value (%d) while setting an unsupported feature for reg: 0x%lx\n", errno, reg_info->reg); @@ -224,7 +200,7 @@ static void test_fw_regs_before_vm_start(struct kvm_vm *vm) } } -static void test_fw_regs_after_vm_start(struct kvm_vm *vm) +static void test_fw_regs_after_vm_start(struct kvm_vcpu *vcpu) { uint64_t val; unsigned int i; @@ -237,7 +213,7 @@ static void test_fw_regs_after_vm_start(struct kvm_vm *vm) * Before starting the VM, the test clears all the bits. * Check if that's still the case. */ - get_fw_reg(vm, reg_info->reg, &val); + vcpu_get_reg(vcpu, reg_info->reg, &val); TEST_ASSERT(val == 0, "Expected all the features to be cleared for reg: 0x%lx\n", reg_info->reg); @@ -247,77 +223,78 @@ static void test_fw_regs_after_vm_start(struct kvm_vm *vm) * the registers and should return EBUSY. Set the registers and check for * the expected errno. */ - ret = set_fw_reg(vm, reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit)); + ret = __vcpu_set_reg(vcpu, reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit)); TEST_ASSERT(ret != 0 && errno == EBUSY, "Unexpected behavior or return value (%d) while setting a feature while VM is running for reg: 0x%lx\n", errno, reg_info->reg); } } -static struct kvm_vm *test_vm_create(void) +static struct kvm_vm *test_vm_create(struct kvm_vcpu **vcpu) { struct kvm_vm *vm; - vm = vm_create_default(0, 0, guest_code); + vm = vm_create_with_one_vcpu(vcpu, guest_code); ucall_init(vm, NULL); - steal_time_init(vm); + steal_time_init(*vcpu); return vm; } -static struct kvm_vm *test_guest_stage(struct kvm_vm *vm) +static void test_guest_stage(struct kvm_vm **vm, struct kvm_vcpu **vcpu) { - struct kvm_vm *ret_vm = vm; + int prev_stage = stage; + + pr_debug("Stage: %d\n", prev_stage); - pr_debug("Stage: %d\n", stage); + /* Sync the stage early, the VM might be freed below. */ + stage++; + sync_global_to_guest(*vm, stage); - switch (stage) { + switch (prev_stage) { case TEST_STAGE_REG_IFACE: - test_fw_regs_after_vm_start(vm); + test_fw_regs_after_vm_start(*vcpu); break; case TEST_STAGE_HVC_IFACE_FEAT_DISABLED: /* Start a new VM so that all the features are now enabled by default */ - kvm_vm_free(vm); - ret_vm = test_vm_create(); + kvm_vm_free(*vm); + *vm = test_vm_create(vcpu); break; case TEST_STAGE_HVC_IFACE_FEAT_ENABLED: case TEST_STAGE_HVC_IFACE_FALSE_INFO: break; default: - TEST_FAIL("Unknown test stage: %d\n", stage); + TEST_FAIL("Unknown test stage: %d\n", prev_stage); } - - stage++; - sync_global_to_guest(vm, stage); - - return ret_vm; } static void test_run(void) { + struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct ucall uc; bool guest_done = false; - vm = test_vm_create(); + vm = test_vm_create(&vcpu); - test_fw_regs_before_vm_start(vm); + test_fw_regs_before_vm_start(vcpu); while (!guest_done) { - vcpu_run(vm, 0); + vcpu_run(vcpu); - switch (get_ucall(vm, 0, &uc)) { + switch (get_ucall(vcpu, &uc)) { case UCALL_SYNC: - vm = test_guest_stage(vm); + test_guest_stage(&vm, &vcpu); break; case UCALL_DONE: guest_done = true; break; case UCALL_ABORT: - TEST_FAIL("%s at %s:%ld\n\tvalues: 0x%lx, 0x%lx; 0x%lx, stage: %u", - (const char *)uc.args[0], __FILE__, uc.args[1], - uc.args[2], uc.args[3], uc.args[4], stage); + REPORT_GUEST_ASSERT_N(uc, "values: 0x%lx, 0x%lx; 0x%lx, stage: %u", + GUEST_ASSERT_ARG(uc, 0), + GUEST_ASSERT_ARG(uc, 1), + GUEST_ASSERT_ARG(uc, 2), stage); break; default: TEST_FAIL("Unexpected guest exit\n"); diff --git a/tools/testing/selftests/kvm/aarch64/psci_test.c b/tools/testing/selftests/kvm/aarch64/psci_test.c index 88541de21c41..f7621f6e938e 100644 --- a/tools/testing/selftests/kvm/aarch64/psci_test.c +++ b/tools/testing/selftests/kvm/aarch64/psci_test.c @@ -17,9 +17,6 @@ #include "processor.h" #include "test_util.h" -#define VCPU_ID_SOURCE 0 -#define VCPU_ID_TARGET 1 - #define CPU_ON_ENTRY_ADDR 0xfeedf00dul #define CPU_ON_CONTEXT_ID 0xdeadc0deul @@ -64,49 +61,48 @@ static uint64_t psci_features(uint32_t func_id) return res.a0; } -static void vcpu_power_off(struct kvm_vm *vm, uint32_t vcpuid) +static void vcpu_power_off(struct kvm_vcpu *vcpu) { struct kvm_mp_state mp_state = { .mp_state = KVM_MP_STATE_STOPPED, }; - vcpu_set_mp_state(vm, vcpuid, &mp_state); + vcpu_mp_state_set(vcpu, &mp_state); } -static struct kvm_vm *setup_vm(void *guest_code) +static struct kvm_vm *setup_vm(void *guest_code, struct kvm_vcpu **source, + struct kvm_vcpu **target) { struct kvm_vcpu_init init; struct kvm_vm *vm; - vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); - kvm_vm_elf_load(vm, program_invocation_name); + vm = vm_create(2); ucall_init(vm, NULL); vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init); init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2); - aarch64_vcpu_add_default(vm, VCPU_ID_SOURCE, &init, guest_code); - aarch64_vcpu_add_default(vm, VCPU_ID_TARGET, &init, guest_code); + *source = aarch64_vcpu_add(vm, 0, &init, guest_code); + *target = aarch64_vcpu_add(vm, 1, &init, guest_code); return vm; } -static void enter_guest(struct kvm_vm *vm, uint32_t vcpuid) +static void enter_guest(struct kvm_vcpu *vcpu) { struct ucall uc; - vcpu_run(vm, vcpuid); - if (get_ucall(vm, vcpuid, &uc) == UCALL_ABORT) - TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], __FILE__, - uc.args[1]); + vcpu_run(vcpu); + if (get_ucall(vcpu, &uc) == UCALL_ABORT) + REPORT_GUEST_ASSERT(uc); } -static void assert_vcpu_reset(struct kvm_vm *vm, uint32_t vcpuid) +static void assert_vcpu_reset(struct kvm_vcpu *vcpu) { uint64_t obs_pc, obs_x0; - get_reg(vm, vcpuid, ARM64_CORE_REG(regs.pc), &obs_pc); - get_reg(vm, vcpuid, ARM64_CORE_REG(regs.regs[0]), &obs_x0); + vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc), &obs_pc); + vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.regs[0]), &obs_x0); TEST_ASSERT(obs_pc == CPU_ON_ENTRY_ADDR, "unexpected target cpu pc: %lx (expected: %lx)", @@ -134,37 +130,29 @@ static void guest_test_cpu_on(uint64_t target_cpu) static void host_test_cpu_on(void) { + struct kvm_vcpu *source, *target; uint64_t target_mpidr; struct kvm_vm *vm; struct ucall uc; - vm = setup_vm(guest_test_cpu_on); + vm = setup_vm(guest_test_cpu_on, &source, &target); /* * make sure the target is already off when executing the test. */ - vcpu_power_off(vm, VCPU_ID_TARGET); + vcpu_power_off(target); - get_reg(vm, VCPU_ID_TARGET, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), &target_mpidr); - vcpu_args_set(vm, VCPU_ID_SOURCE, 1, target_mpidr & MPIDR_HWID_BITMASK); - enter_guest(vm, VCPU_ID_SOURCE); + vcpu_get_reg(target, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), &target_mpidr); + vcpu_args_set(source, 1, target_mpidr & MPIDR_HWID_BITMASK); + enter_guest(source); - if (get_ucall(vm, VCPU_ID_SOURCE, &uc) != UCALL_DONE) + if (get_ucall(source, &uc) != UCALL_DONE) TEST_FAIL("Unhandled ucall: %lu", uc.cmd); - assert_vcpu_reset(vm, VCPU_ID_TARGET); + assert_vcpu_reset(target); kvm_vm_free(vm); } -static void enable_system_suspend(struct kvm_vm *vm) -{ - struct kvm_enable_cap cap = { - .cap = KVM_CAP_ARM_SYSTEM_SUSPEND, - }; - - vm_enable_cap(vm, &cap); -} - static void guest_test_system_suspend(void) { uint64_t ret; @@ -179,16 +167,17 @@ static void guest_test_system_suspend(void) static void host_test_system_suspend(void) { + struct kvm_vcpu *source, *target; struct kvm_run *run; struct kvm_vm *vm; - vm = setup_vm(guest_test_system_suspend); - enable_system_suspend(vm); + vm = setup_vm(guest_test_system_suspend, &source, &target); + vm_enable_cap(vm, KVM_CAP_ARM_SYSTEM_SUSPEND, 0); - vcpu_power_off(vm, VCPU_ID_TARGET); - run = vcpu_state(vm, VCPU_ID_SOURCE); + vcpu_power_off(target); + run = source->run; - enter_guest(vm, VCPU_ID_SOURCE); + enter_guest(source); TEST_ASSERT(run->exit_reason == KVM_EXIT_SYSTEM_EVENT, "Unhandled exit reason: %u (%s)", @@ -202,10 +191,7 @@ static void host_test_system_suspend(void) int main(void) { - if (!kvm_check_cap(KVM_CAP_ARM_SYSTEM_SUSPEND)) { - print_skip("KVM_CAP_ARM_SYSTEM_SUSPEND not supported"); - exit(KSFT_SKIP); - } + TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SYSTEM_SUSPEND)); host_test_cpu_on(); host_test_system_suspend(); diff --git a/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c b/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c index 6e9402679229..80b74c6f152b 100644 --- a/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c +++ b/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c @@ -15,24 +15,25 @@ /* - * Add a vCPU, run KVM_ARM_VCPU_INIT with @init1, and then - * add another vCPU, and run KVM_ARM_VCPU_INIT with @init2. + * Add a vCPU, run KVM_ARM_VCPU_INIT with @init0, and then + * add another vCPU, and run KVM_ARM_VCPU_INIT with @init1. */ -static int add_init_2vcpus(struct kvm_vcpu_init *init1, - struct kvm_vcpu_init *init2) +static int add_init_2vcpus(struct kvm_vcpu_init *init0, + struct kvm_vcpu_init *init1) { + struct kvm_vcpu *vcpu0, *vcpu1; struct kvm_vm *vm; int ret; - vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); + vm = vm_create_barebones(); - vm_vcpu_add(vm, 0); - ret = _vcpu_ioctl(vm, 0, KVM_ARM_VCPU_INIT, init1); + vcpu0 = __vm_vcpu_add(vm, 0); + ret = __vcpu_ioctl(vcpu0, KVM_ARM_VCPU_INIT, init0); if (ret) goto free_exit; - vm_vcpu_add(vm, 1); - ret = _vcpu_ioctl(vm, 1, KVM_ARM_VCPU_INIT, init2); + vcpu1 = __vm_vcpu_add(vm, 1); + ret = __vcpu_ioctl(vcpu1, KVM_ARM_VCPU_INIT, init1); free_exit: kvm_vm_free(vm); @@ -40,25 +41,26 @@ free_exit: } /* - * Add two vCPUs, then run KVM_ARM_VCPU_INIT for one vCPU with @init1, - * and run KVM_ARM_VCPU_INIT for another vCPU with @init2. + * Add two vCPUs, then run KVM_ARM_VCPU_INIT for one vCPU with @init0, + * and run KVM_ARM_VCPU_INIT for another vCPU with @init1. */ -static int add_2vcpus_init_2vcpus(struct kvm_vcpu_init *init1, - struct kvm_vcpu_init *init2) +static int add_2vcpus_init_2vcpus(struct kvm_vcpu_init *init0, + struct kvm_vcpu_init *init1) { + struct kvm_vcpu *vcpu0, *vcpu1; struct kvm_vm *vm; int ret; - vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); + vm = vm_create_barebones(); - vm_vcpu_add(vm, 0); - vm_vcpu_add(vm, 1); + vcpu0 = __vm_vcpu_add(vm, 0); + vcpu1 = __vm_vcpu_add(vm, 1); - ret = _vcpu_ioctl(vm, 0, KVM_ARM_VCPU_INIT, init1); + ret = __vcpu_ioctl(vcpu0, KVM_ARM_VCPU_INIT, init0); if (ret) goto free_exit; - ret = _vcpu_ioctl(vm, 1, KVM_ARM_VCPU_INIT, init2); + ret = __vcpu_ioctl(vcpu1, KVM_ARM_VCPU_INIT, init1); free_exit: kvm_vm_free(vm); @@ -76,45 +78,42 @@ free_exit: */ int main(void) { - struct kvm_vcpu_init init1, init2; + struct kvm_vcpu_init init0, init1; struct kvm_vm *vm; int ret; - if (!kvm_check_cap(KVM_CAP_ARM_EL1_32BIT)) { - print_skip("KVM_CAP_ARM_EL1_32BIT is not supported"); - exit(KSFT_SKIP); - } + TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_EL1_32BIT)); - /* Get the preferred target type and copy that to init2 for later use */ - vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); - vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init1); + /* Get the preferred target type and copy that to init1 for later use */ + vm = vm_create_barebones(); + vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init0); kvm_vm_free(vm); - init2 = init1; + init1 = init0; /* Test with 64bit vCPUs */ - ret = add_init_2vcpus(&init1, &init1); + ret = add_init_2vcpus(&init0, &init0); TEST_ASSERT(ret == 0, "Configuring 64bit EL1 vCPUs failed unexpectedly"); - ret = add_2vcpus_init_2vcpus(&init1, &init1); + ret = add_2vcpus_init_2vcpus(&init0, &init0); TEST_ASSERT(ret == 0, "Configuring 64bit EL1 vCPUs failed unexpectedly"); /* Test with 32bit vCPUs */ - init1.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT); - ret = add_init_2vcpus(&init1, &init1); + init0.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT); + ret = add_init_2vcpus(&init0, &init0); TEST_ASSERT(ret == 0, "Configuring 32bit EL1 vCPUs failed unexpectedly"); - ret = add_2vcpus_init_2vcpus(&init1, &init1); + ret = add_2vcpus_init_2vcpus(&init0, &init0); TEST_ASSERT(ret == 0, "Configuring 32bit EL1 vCPUs failed unexpectedly"); /* Test with mixed-width vCPUs */ - init1.features[0] = 0; - init2.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT); - ret = add_init_2vcpus(&init1, &init2); + init0.features[0] = 0; + init1.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT); + ret = add_init_2vcpus(&init0, &init1); TEST_ASSERT(ret != 0, "Configuring mixed-width vCPUs worked unexpectedly"); - ret = add_2vcpus_init_2vcpus(&init1, &init2); + ret = add_2vcpus_init_2vcpus(&init0, &init1); TEST_ASSERT(ret != 0, "Configuring mixed-width vCPUs worked unexpectedly"); diff --git a/tools/testing/selftests/kvm/aarch64/vgic_init.c b/tools/testing/selftests/kvm/aarch64/vgic_init.c index 34379c98d2f4..e05ecb31823f 100644 --- a/tools/testing/selftests/kvm/aarch64/vgic_init.c +++ b/tools/testing/selftests/kvm/aarch64/vgic_init.c @@ -32,14 +32,28 @@ struct vm_gic { static uint64_t max_phys_size; -/* helper to access a redistributor register */ -static int access_v3_redist_reg(int gicv3_fd, int vcpu, int offset, - uint32_t *val, bool write) +/* + * Helpers to access a redistributor register and verify the ioctl() failed or + * succeeded as expected, and provided the correct value on success. + */ +static void v3_redist_reg_get_errno(int gicv3_fd, int vcpu, int offset, + int want, const char *msg) { - uint64_t attr = REG_OFFSET(vcpu, offset); + uint32_t ignored_val; + int ret = __kvm_device_attr_get(gicv3_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS, + REG_OFFSET(vcpu, offset), &ignored_val); - return _kvm_device_access(gicv3_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS, - attr, val, write); + TEST_ASSERT(ret && errno == want, "%s; want errno = %d", msg, want); +} + +static void v3_redist_reg_get(int gicv3_fd, int vcpu, int offset, uint32_t want, + const char *msg) +{ + uint32_t val; + + kvm_device_attr_get(gicv3_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS, + REG_OFFSET(vcpu, offset), &val); + TEST_ASSERT(val == want, "%s; want '0x%x', got '0x%x'", msg, want, val); } /* dummy guest code */ @@ -52,22 +66,22 @@ static void guest_code(void) } /* we don't want to assert on run execution, hence that helper */ -static int run_vcpu(struct kvm_vm *vm, uint32_t vcpuid) +static int run_vcpu(struct kvm_vcpu *vcpu) { - ucall_init(vm, NULL); - int ret = _vcpu_ioctl(vm, vcpuid, KVM_RUN, NULL); - if (ret) - return -errno; - return 0; + ucall_init(vcpu->vm, NULL); + + return __vcpu_run(vcpu) ? -errno : 0; } -static struct vm_gic vm_gic_create_with_vcpus(uint32_t gic_dev_type, uint32_t nr_vcpus) +static struct vm_gic vm_gic_create_with_vcpus(uint32_t gic_dev_type, + uint32_t nr_vcpus, + struct kvm_vcpu *vcpus[]) { struct vm_gic v; v.gic_dev_type = gic_dev_type; - v.vm = vm_create_default_with_vcpus(nr_vcpus, 0, 0, guest_code, NULL); - v.gic_fd = kvm_create_device(v.vm, gic_dev_type, false); + v.vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus); + v.gic_fd = kvm_create_device(v.vm, gic_dev_type); return v; } @@ -129,63 +143,60 @@ static void subtest_dist_rdist(struct vm_gic *v) : gic_v2_dist_region; /* Check existing group/attributes */ - kvm_device_check_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - dist.attr); + kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, dist.attr); - kvm_device_check_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - rdist.attr); + kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, rdist.attr); /* check non existing attribute */ - ret = _kvm_device_check_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, -1); + ret = __kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, -1); TEST_ASSERT(ret && errno == ENXIO, "attribute not supported"); /* misaligned DIST and REDIST address settings */ addr = dist.alignment / 0x10; - ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - dist.attr, &addr, true); + ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + dist.attr, &addr); TEST_ASSERT(ret && errno == EINVAL, "GIC dist base not aligned"); addr = rdist.alignment / 0x10; - ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - rdist.attr, &addr, true); + ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + rdist.attr, &addr); TEST_ASSERT(ret && errno == EINVAL, "GIC redist/cpu base not aligned"); /* out of range address */ addr = max_phys_size; - ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - dist.attr, &addr, true); + ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + dist.attr, &addr); TEST_ASSERT(ret && errno == E2BIG, "dist address beyond IPA limit"); - ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - rdist.attr, &addr, true); + ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + rdist.attr, &addr); TEST_ASSERT(ret && errno == E2BIG, "redist address beyond IPA limit"); /* Space for half a rdist (a rdist is: 2 * rdist.alignment). */ addr = max_phys_size - dist.alignment; - ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - rdist.attr, &addr, true); + ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + rdist.attr, &addr); TEST_ASSERT(ret && errno == E2BIG, "half of the redist is beyond IPA limit"); /* set REDIST base address @0x0*/ addr = 0x00000; - kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - rdist.attr, &addr, true); + kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + rdist.attr, &addr); /* Attempt to create a second legacy redistributor region */ addr = 0xE0000; - ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - rdist.attr, &addr, true); + ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + rdist.attr, &addr); TEST_ASSERT(ret && errno == EEXIST, "GIC redist base set again"); - ret = _kvm_device_check_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + ret = __kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, KVM_VGIC_V3_ADDR_TYPE_REDIST); if (!ret) { /* Attempt to mix legacy and new redistributor regions */ addr = REDIST_REGION_ATTR_ADDR(NR_VCPUS, 0x100000, 0, 0); - ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, - &addr, true); + ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); TEST_ASSERT(ret && errno == EINVAL, "attempt to mix GICv3 REDIST and REDIST_REGION"); } @@ -195,8 +206,8 @@ static void subtest_dist_rdist(struct vm_gic *v) * on first vcpu run instead. */ addr = rdist.size - rdist.alignment; - kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - dist.attr, &addr, true); + kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + dist.attr, &addr); } /* Test the new REDIST region API */ @@ -205,71 +216,71 @@ static void subtest_v3_redist_regions(struct vm_gic *v) uint64_t addr, expected_addr; int ret; - ret = kvm_device_check_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST); + ret = __kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST); TEST_ASSERT(!ret, "Multiple redist regions advertised"); addr = REDIST_REGION_ATTR_ADDR(NR_VCPUS, 0x100000, 2, 0); - ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); TEST_ASSERT(ret && errno == EINVAL, "redist region attr value with flags != 0"); addr = REDIST_REGION_ATTR_ADDR(0, 0x100000, 0, 0); - ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); TEST_ASSERT(ret && errno == EINVAL, "redist region attr value with count== 0"); addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 1); - ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); TEST_ASSERT(ret && errno == EINVAL, "attempt to register the first rdist region with index != 0"); addr = REDIST_REGION_ATTR_ADDR(2, 0x201000, 0, 1); - ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); TEST_ASSERT(ret && errno == EINVAL, "rdist region with misaligned address"); addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0); - kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 1); - ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); TEST_ASSERT(ret && errno == EINVAL, "register an rdist region with already used index"); addr = REDIST_REGION_ATTR_ADDR(1, 0x210000, 0, 2); - ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); TEST_ASSERT(ret && errno == EINVAL, "register an rdist region overlapping with another one"); addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 2); - ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); TEST_ASSERT(ret && errno == EINVAL, "register redist region with index not +1"); addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 1); - kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); addr = REDIST_REGION_ATTR_ADDR(1, max_phys_size, 0, 2); - ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); TEST_ASSERT(ret && errno == E2BIG, "register redist region with base address beyond IPA range"); /* The last redist is above the pa range. */ addr = REDIST_REGION_ATTR_ADDR(2, max_phys_size - 0x30000, 0, 2); - ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); TEST_ASSERT(ret && errno == E2BIG, "register redist region with top address beyond IPA range"); addr = 0x260000; - ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr, true); + ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr); TEST_ASSERT(ret && errno == EINVAL, "Mix KVM_VGIC_V3_ADDR_TYPE_REDIST and REDIST_REGION"); @@ -282,28 +293,28 @@ static void subtest_v3_redist_regions(struct vm_gic *v) addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 0); expected_addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0); - ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, false); + ret = __kvm_device_attr_get(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); TEST_ASSERT(!ret && addr == expected_addr, "read characteristics of region #0"); addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 1); expected_addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 1); - ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, false); + ret = __kvm_device_attr_get(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); TEST_ASSERT(!ret && addr == expected_addr, "read characteristics of region #1"); addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 2); - ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, false); + ret = __kvm_device_attr_get(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); TEST_ASSERT(ret && errno == ENOENT, "read characteristics of non existing region"); addr = 0x260000; - kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_DIST, &addr, true); + kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_DIST, &addr); addr = REDIST_REGION_ATTR_ADDR(1, 0x260000, 0, 2); - ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); TEST_ASSERT(ret && errno == EINVAL, "register redist region colliding with dist"); } @@ -313,18 +324,19 @@ static void subtest_v3_redist_regions(struct vm_gic *v) */ static void test_vgic_then_vcpus(uint32_t gic_dev_type) { + struct kvm_vcpu *vcpus[NR_VCPUS]; struct vm_gic v; int ret, i; - v = vm_gic_create_with_vcpus(gic_dev_type, 1); + v = vm_gic_create_with_vcpus(gic_dev_type, 1, vcpus); subtest_dist_rdist(&v); /* Add the rest of the VCPUs */ for (i = 1; i < NR_VCPUS; ++i) - vm_vcpu_add_default(v.vm, i, guest_code); + vcpus[i] = vm_vcpu_add(v.vm, i, guest_code); - ret = run_vcpu(v.vm, 3); + ret = run_vcpu(vcpus[3]); TEST_ASSERT(ret == -EINVAL, "dist/rdist overlap detected on 1st vcpu run"); vm_gic_destroy(&v); @@ -333,14 +345,15 @@ static void test_vgic_then_vcpus(uint32_t gic_dev_type) /* All the VCPUs are created before the VGIC KVM device gets initialized */ static void test_vcpus_then_vgic(uint32_t gic_dev_type) { + struct kvm_vcpu *vcpus[NR_VCPUS]; struct vm_gic v; int ret; - v = vm_gic_create_with_vcpus(gic_dev_type, NR_VCPUS); + v = vm_gic_create_with_vcpus(gic_dev_type, NR_VCPUS, vcpus); subtest_dist_rdist(&v); - ret = run_vcpu(v.vm, 3); + ret = run_vcpu(vcpus[3]); TEST_ASSERT(ret == -EINVAL, "dist/rdist overlap detected on 1st vcpu run"); vm_gic_destroy(&v); @@ -348,52 +361,53 @@ static void test_vcpus_then_vgic(uint32_t gic_dev_type) static void test_v3_new_redist_regions(void) { + struct kvm_vcpu *vcpus[NR_VCPUS]; void *dummy = NULL; struct vm_gic v; uint64_t addr; int ret; - v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS); + v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus); subtest_v3_redist_regions(&v); - kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, - KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true); + kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); - ret = run_vcpu(v.vm, 3); + ret = run_vcpu(vcpus[3]); TEST_ASSERT(ret == -ENXIO, "running without sufficient number of rdists"); vm_gic_destroy(&v); /* step2 */ - v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS); + v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus); subtest_v3_redist_regions(&v); addr = REDIST_REGION_ATTR_ADDR(1, 0x280000, 0, 2); - kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); - ret = run_vcpu(v.vm, 3); + ret = run_vcpu(vcpus[3]); TEST_ASSERT(ret == -EBUSY, "running without vgic explicit init"); vm_gic_destroy(&v); /* step 3 */ - v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS); + v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus); subtest_v3_redist_regions(&v); - _kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, dummy, true); + ret = __kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, dummy); TEST_ASSERT(ret && errno == EFAULT, "register a third region allowing to cover the 4 vcpus"); addr = REDIST_REGION_ATTR_ADDR(1, 0x280000, 0, 2); - kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); - kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, - KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true); + kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); - ret = run_vcpu(v.vm, 3); + ret = run_vcpu(vcpus[3]); TEST_ASSERT(!ret, "vcpu run"); vm_gic_destroy(&v); @@ -403,71 +417,77 @@ static void test_v3_typer_accesses(void) { struct vm_gic v; uint64_t addr; - uint32_t val; int ret, i; - v.vm = vm_create_default(0, 0, guest_code); + v.vm = vm_create(NR_VCPUS); + (void)vm_vcpu_add(v.vm, 0, guest_code); - v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, false); + v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3); - vm_vcpu_add_default(v.vm, 3, guest_code); + (void)vm_vcpu_add(v.vm, 3, guest_code); - ret = access_v3_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false); - TEST_ASSERT(ret && errno == EINVAL, "attempting to read GICR_TYPER of non created vcpu"); + v3_redist_reg_get_errno(v.gic_fd, 1, GICR_TYPER, EINVAL, + "attempting to read GICR_TYPER of non created vcpu"); - vm_vcpu_add_default(v.vm, 1, guest_code); + (void)vm_vcpu_add(v.vm, 1, guest_code); - ret = access_v3_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false); - TEST_ASSERT(ret && errno == EBUSY, "read GICR_TYPER before GIC initialized"); + v3_redist_reg_get_errno(v.gic_fd, 1, GICR_TYPER, EBUSY, + "read GICR_TYPER before GIC initialized"); - vm_vcpu_add_default(v.vm, 2, guest_code); + (void)vm_vcpu_add(v.vm, 2, guest_code); - kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, - KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true); + kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); for (i = 0; i < NR_VCPUS ; i++) { - ret = access_v3_redist_reg(v.gic_fd, 0, GICR_TYPER, &val, false); - TEST_ASSERT(!ret && !val, "read GICR_TYPER before rdist region setting"); + v3_redist_reg_get(v.gic_fd, i, GICR_TYPER, i * 0x100, + "read GICR_TYPER before rdist region setting"); } addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0); - kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); /* The 2 first rdists should be put there (vcpu 0 and 3) */ - ret = access_v3_redist_reg(v.gic_fd, 0, GICR_TYPER, &val, false); - TEST_ASSERT(!ret && !val, "read typer of rdist #0"); - - ret = access_v3_redist_reg(v.gic_fd, 3, GICR_TYPER, &val, false); - TEST_ASSERT(!ret && val == 0x310, "read typer of rdist #1"); + v3_redist_reg_get(v.gic_fd, 0, GICR_TYPER, 0x0, "read typer of rdist #0"); + v3_redist_reg_get(v.gic_fd, 3, GICR_TYPER, 0x310, "read typer of rdist #1"); addr = REDIST_REGION_ATTR_ADDR(10, 0x100000, 0, 1); - ret = _kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + ret = __kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); TEST_ASSERT(ret && errno == EINVAL, "collision with previous rdist region"); - ret = access_v3_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false); - TEST_ASSERT(!ret && val == 0x100, - "no redist region attached to vcpu #1 yet, last cannot be returned"); - - ret = access_v3_redist_reg(v.gic_fd, 2, GICR_TYPER, &val, false); - TEST_ASSERT(!ret && val == 0x200, - "no redist region attached to vcpu #2, last cannot be returned"); + v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100, + "no redist region attached to vcpu #1 yet, last cannot be returned"); + v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x200, + "no redist region attached to vcpu #2, last cannot be returned"); addr = REDIST_REGION_ATTR_ADDR(10, 0x20000, 0, 1); - kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); - ret = access_v3_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false); - TEST_ASSERT(!ret && val == 0x100, "read typer of rdist #1"); - - ret = access_v3_redist_reg(v.gic_fd, 2, GICR_TYPER, &val, false); - TEST_ASSERT(!ret && val == 0x210, - "read typer of rdist #1, last properly returned"); + v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100, "read typer of rdist #1"); + v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x210, + "read typer of rdist #1, last properly returned"); vm_gic_destroy(&v); } +static struct vm_gic vm_gic_v3_create_with_vcpuids(int nr_vcpus, + uint32_t vcpuids[]) +{ + struct vm_gic v; + int i; + + v.vm = vm_create(nr_vcpus); + for (i = 0; i < nr_vcpus; i++) + vm_vcpu_add(v.vm, vcpuids[i], guest_code); + + v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3); + + return v; +} + /** * Test GICR_TYPER last bit with new redist regions * rdist regions #1 and #2 are contiguous @@ -483,45 +503,30 @@ static void test_v3_last_bit_redist_regions(void) uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 }; struct vm_gic v; uint64_t addr; - uint32_t val; - int ret; - - v.vm = vm_create_default_with_vcpus(6, 0, 0, guest_code, vcpuids); - v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, false); + v = vm_gic_v3_create_with_vcpuids(ARRAY_SIZE(vcpuids), vcpuids); - kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, - KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true); + kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); addr = REDIST_REGION_ATTR_ADDR(2, 0x100000, 0, 0); - kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); addr = REDIST_REGION_ATTR_ADDR(2, 0x240000, 0, 1); - kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); + kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 2); - kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true); - - ret = access_v3_redist_reg(v.gic_fd, 0, GICR_TYPER, &val, false); - TEST_ASSERT(!ret && val == 0x000, "read typer of rdist #0"); + kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr); - ret = access_v3_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false); - TEST_ASSERT(!ret && val == 0x100, "read typer of rdist #1"); - - ret = access_v3_redist_reg(v.gic_fd, 2, GICR_TYPER, &val, false); - TEST_ASSERT(!ret && val == 0x200, "read typer of rdist #2"); - - ret = access_v3_redist_reg(v.gic_fd, 3, GICR_TYPER, &val, false); - TEST_ASSERT(!ret && val == 0x310, "read typer of rdist #3"); - - ret = access_v3_redist_reg(v.gic_fd, 5, GICR_TYPER, &val, false); - TEST_ASSERT(!ret && val == 0x500, "read typer of rdist #5"); - - ret = access_v3_redist_reg(v.gic_fd, 4, GICR_TYPER, &val, false); - TEST_ASSERT(!ret && val == 0x410, "read typer of rdist #4"); + v3_redist_reg_get(v.gic_fd, 0, GICR_TYPER, 0x000, "read typer of rdist #0"); + v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100, "read typer of rdist #1"); + v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x200, "read typer of rdist #2"); + v3_redist_reg_get(v.gic_fd, 3, GICR_TYPER, 0x310, "read typer of rdist #3"); + v3_redist_reg_get(v.gic_fd, 5, GICR_TYPER, 0x500, "read typer of rdist #5"); + v3_redist_reg_get(v.gic_fd, 4, GICR_TYPER, 0x410, "read typer of rdist #4"); vm_gic_destroy(&v); } @@ -532,34 +537,21 @@ static void test_v3_last_bit_single_rdist(void) uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 }; struct vm_gic v; uint64_t addr; - uint32_t val; - int ret; - - v.vm = vm_create_default_with_vcpus(6, 0, 0, guest_code, vcpuids); - v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, false); + v = vm_gic_v3_create_with_vcpuids(ARRAY_SIZE(vcpuids), vcpuids); - kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, - KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true); + kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); addr = 0x10000; - kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr, true); - - ret = access_v3_redist_reg(v.gic_fd, 0, GICR_TYPER, &val, false); - TEST_ASSERT(!ret && val == 0x000, "read typer of rdist #0"); - - ret = access_v3_redist_reg(v.gic_fd, 3, GICR_TYPER, &val, false); - TEST_ASSERT(!ret && val == 0x300, "read typer of rdist #1"); - - ret = access_v3_redist_reg(v.gic_fd, 5, GICR_TYPER, &val, false); - TEST_ASSERT(!ret && val == 0x500, "read typer of rdist #2"); + kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr); - ret = access_v3_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false); - TEST_ASSERT(!ret && val == 0x100, "read typer of rdist #3"); - - ret = access_v3_redist_reg(v.gic_fd, 2, GICR_TYPER, &val, false); - TEST_ASSERT(!ret && val == 0x210, "read typer of rdist #3"); + v3_redist_reg_get(v.gic_fd, 0, GICR_TYPER, 0x000, "read typer of rdist #0"); + v3_redist_reg_get(v.gic_fd, 3, GICR_TYPER, 0x300, "read typer of rdist #1"); + v3_redist_reg_get(v.gic_fd, 5, GICR_TYPER, 0x500, "read typer of rdist #2"); + v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100, "read typer of rdist #3"); + v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x210, "read typer of rdist #3"); vm_gic_destroy(&v); } @@ -567,30 +559,31 @@ static void test_v3_last_bit_single_rdist(void) /* Uses the legacy REDIST region API. */ static void test_v3_redist_ipa_range_check_at_vcpu_run(void) { + struct kvm_vcpu *vcpus[NR_VCPUS]; struct vm_gic v; int ret, i; uint64_t addr; - v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, 1); + v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, 1, vcpus); /* Set space for 3 redists, we have 1 vcpu, so this succeeds. */ addr = max_phys_size - (3 * 2 * 0x10000); - kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr, true); + kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr); addr = 0x00000; - kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_DIST, &addr, true); + kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_DIST, &addr); /* Add the rest of the VCPUs */ for (i = 1; i < NR_VCPUS; ++i) - vm_vcpu_add_default(v.vm, i, guest_code); + vcpus[i] = vm_vcpu_add(v.vm, i, guest_code); - kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, - KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true); + kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); /* Attempt to run a vcpu without enough redist space. */ - ret = run_vcpu(v.vm, 2); + ret = run_vcpu(vcpus[2]); TEST_ASSERT(ret && errno == EINVAL, "redist base+size above PA range detected on 1st vcpu run"); @@ -599,39 +592,40 @@ static void test_v3_redist_ipa_range_check_at_vcpu_run(void) static void test_v3_its_region(void) { + struct kvm_vcpu *vcpus[NR_VCPUS]; struct vm_gic v; uint64_t addr; int its_fd, ret; - v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS); - its_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_ITS, false); + v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus); + its_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_ITS); addr = 0x401000; - ret = _kvm_device_access(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_ITS_ADDR_TYPE, &addr, true); + ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_ITS_ADDR_TYPE, &addr); TEST_ASSERT(ret && errno == EINVAL, "ITS region with misaligned address"); addr = max_phys_size; - ret = _kvm_device_access(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_ITS_ADDR_TYPE, &addr, true); + ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_ITS_ADDR_TYPE, &addr); TEST_ASSERT(ret && errno == E2BIG, "register ITS region with base address beyond IPA range"); addr = max_phys_size - 0x10000; - ret = _kvm_device_access(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_ITS_ADDR_TYPE, &addr, true); + ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_ITS_ADDR_TYPE, &addr); TEST_ASSERT(ret && errno == E2BIG, "Half of ITS region is beyond IPA range"); /* This one succeeds setting the ITS base */ addr = 0x400000; - kvm_device_access(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_ITS_ADDR_TYPE, &addr, true); + kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_ITS_ADDR_TYPE, &addr); addr = 0x300000; - ret = _kvm_device_access(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_ITS_ADDR_TYPE, &addr, true); + ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_ITS_ADDR_TYPE, &addr); TEST_ASSERT(ret && errno == EEXIST, "ITS base set again"); close(its_fd); @@ -643,34 +637,33 @@ static void test_v3_its_region(void) */ int test_kvm_device(uint32_t gic_dev_type) { + struct kvm_vcpu *vcpus[NR_VCPUS]; struct vm_gic v; - int ret, fd; uint32_t other; + int ret; - v.vm = vm_create_default_with_vcpus(NR_VCPUS, 0, 0, guest_code, NULL); + v.vm = vm_create_with_vcpus(NR_VCPUS, guest_code, vcpus); /* try to create a non existing KVM device */ - ret = _kvm_create_device(v.vm, 0, true, &fd); + ret = __kvm_test_create_device(v.vm, 0); TEST_ASSERT(ret && errno == ENODEV, "unsupported device"); /* trial mode */ - ret = _kvm_create_device(v.vm, gic_dev_type, true, &fd); + ret = __kvm_test_create_device(v.vm, gic_dev_type); if (ret) return ret; - v.gic_fd = kvm_create_device(v.vm, gic_dev_type, false); - - ret = _kvm_create_device(v.vm, gic_dev_type, false, &fd); - TEST_ASSERT(ret && errno == EEXIST, "create GIC device twice"); + v.gic_fd = kvm_create_device(v.vm, gic_dev_type); - kvm_create_device(v.vm, gic_dev_type, true); + ret = __kvm_create_device(v.vm, gic_dev_type); + TEST_ASSERT(ret < 0 && errno == EEXIST, "create GIC device twice"); /* try to create the other gic_dev_type */ other = VGIC_DEV_IS_V2(gic_dev_type) ? KVM_DEV_TYPE_ARM_VGIC_V3 : KVM_DEV_TYPE_ARM_VGIC_V2; - if (!_kvm_create_device(v.vm, other, true, &fd)) { - ret = _kvm_create_device(v.vm, other, false, &fd); - TEST_ASSERT(ret && errno == EINVAL, + if (!__kvm_test_create_device(v.vm, other)) { + ret = __kvm_test_create_device(v.vm, other); + TEST_ASSERT(ret && (errno == EINVAL || errno == EEXIST), "create GIC device while other version exists"); } @@ -698,6 +691,7 @@ int main(int ac, char **av) { int ret; int pa_bits; + int cnt_impl = 0; pa_bits = vm_guest_mode_params[VM_MODE_DEFAULT].pa_bits; max_phys_size = 1ULL << pa_bits; @@ -706,17 +700,19 @@ int main(int ac, char **av) if (!ret) { pr_info("Running GIC_v3 tests.\n"); run_tests(KVM_DEV_TYPE_ARM_VGIC_V3); - return 0; + cnt_impl++; } ret = test_kvm_device(KVM_DEV_TYPE_ARM_VGIC_V2); if (!ret) { pr_info("Running GIC_v2 tests.\n"); run_tests(KVM_DEV_TYPE_ARM_VGIC_V2); - return 0; + cnt_impl++; } - print_skip("No GICv2 nor GICv3 support"); - exit(KSFT_SKIP); + if (!cnt_impl) { + print_skip("No GICv2 nor GICv3 support"); + exit(KSFT_SKIP); + } return 0; } diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c index 554ca649d470..17417220a083 100644 --- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c +++ b/tools/testing/selftests/kvm/aarch64/vgic_irq.c @@ -22,7 +22,6 @@ #define GICD_BASE_GPA 0x08000000ULL #define GICR_BASE_GPA 0x080A0000ULL -#define VCPU_ID 0 /* * Stores the user specified args; it's passed to the guest and to every test @@ -589,7 +588,8 @@ static void kvm_set_gsi_routing_irqchip_check(struct kvm_vm *vm, } static void kvm_irq_write_ispendr_check(int gic_fd, uint32_t intid, - uint32_t vcpu, bool expect_failure) + struct kvm_vcpu *vcpu, + bool expect_failure) { /* * Ignore this when expecting failure as invalid intids will lead to @@ -630,8 +630,7 @@ static void kvm_routing_and_irqfd_check(struct kvm_vm *vm, for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) { fd[f] = eventfd(0, 0); - TEST_ASSERT(fd[f] != -1, - "eventfd failed, errno: %i\n", errno); + TEST_ASSERT(fd[f] != -1, __KVM_SYSCALL_ERROR("eventfd()", fd[f])); } for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) { @@ -647,7 +646,7 @@ static void kvm_routing_and_irqfd_check(struct kvm_vm *vm, val = 1; ret = write(fd[f], &val, sizeof(uint64_t)); TEST_ASSERT(ret == sizeof(uint64_t), - "Write to KVM_IRQFD failed with ret: %d\n", ret); + __KVM_SYSCALL_ERROR("write()", ret)); } for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) @@ -660,15 +659,16 @@ static void kvm_routing_and_irqfd_check(struct kvm_vm *vm, (tmp) < (uint64_t)(first) + (uint64_t)(num); \ (tmp)++, (i)++) -static void run_guest_cmd(struct kvm_vm *vm, int gic_fd, - struct kvm_inject_args *inject_args, - struct test_args *test_args) +static void run_guest_cmd(struct kvm_vcpu *vcpu, int gic_fd, + struct kvm_inject_args *inject_args, + struct test_args *test_args) { kvm_inject_cmd cmd = inject_args->cmd; uint32_t intid = inject_args->first_intid; uint32_t num = inject_args->num; int level = inject_args->level; bool expect_failure = inject_args->expect_failure; + struct kvm_vm *vm = vcpu->vm; uint64_t tmp; uint32_t i; @@ -706,12 +706,12 @@ static void run_guest_cmd(struct kvm_vm *vm, int gic_fd, break; case KVM_WRITE_ISPENDR: for (i = intid; i < intid + num; i++) - kvm_irq_write_ispendr_check(gic_fd, i, - VCPU_ID, expect_failure); + kvm_irq_write_ispendr_check(gic_fd, i, vcpu, + expect_failure); break; case KVM_WRITE_ISACTIVER: for (i = intid; i < intid + num; i++) - kvm_irq_write_isactiver(gic_fd, i, VCPU_ID); + kvm_irq_write_isactiver(gic_fd, i, vcpu); break; default: break; @@ -740,6 +740,7 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split) { struct ucall uc; int gic_fd; + struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct kvm_inject_args inject_args; vm_vaddr_t args_gva; @@ -754,39 +755,34 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split) print_args(&args); - vm = vm_create_default(VCPU_ID, 0, guest_code); + vm = vm_create_with_one_vcpu(&vcpu, guest_code); ucall_init(vm, NULL); vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vm, VCPU_ID); + vcpu_init_descriptor_tables(vcpu); /* Setup the guest args page (so it gets the args). */ args_gva = vm_vaddr_alloc_page(vm); memcpy(addr_gva2hva(vm, args_gva), &args, sizeof(args)); - vcpu_args_set(vm, 0, 1, args_gva); + vcpu_args_set(vcpu, 1, args_gva); gic_fd = vgic_v3_setup(vm, 1, nr_irqs, GICD_BASE_GPA, GICR_BASE_GPA); - if (gic_fd < 0) { - print_skip("Failed to create vgic-v3, skipping"); - exit(KSFT_SKIP); - } + __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3, skipping"); vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handlers[args.eoi_split][args.level_sensitive]); while (1) { - vcpu_run(vm, VCPU_ID); + vcpu_run(vcpu); - switch (get_ucall(vm, VCPU_ID, &uc)) { + switch (get_ucall(vcpu, &uc)) { case UCALL_SYNC: kvm_inject_get_call(vm, &uc, &inject_args); - run_guest_cmd(vm, gic_fd, &inject_args, &args); + run_guest_cmd(vcpu, gic_fd, &inject_args, &args); break; case UCALL_ABORT: - TEST_FAIL("%s at %s:%ld\n\tvalues: %#lx, %#lx", - (const char *)uc.args[0], - __FILE__, uc.args[1], uc.args[2], uc.args[3]); + REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx"); break; case UCALL_DONE: goto done; diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c index d8909032317a..1c2749b1481a 100644 --- a/tools/testing/selftests/kvm/access_tracking_perf_test.c +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -74,7 +74,7 @@ struct test_params { uint64_t vcpu_memory_bytes; /* The number of vCPUs to create in the VM. */ - int vcpus; + int nr_vcpus; }; static uint64_t pread_uint64(int fd, const char *filename, uint64_t index) @@ -104,10 +104,7 @@ static uint64_t lookup_pfn(int pagemap_fd, struct kvm_vm *vm, uint64_t gva) return 0; pfn = entry & PAGEMAP_PFN_MASK; - if (!pfn) { - print_skip("Looking up PFNs requires CAP_SYS_ADMIN"); - exit(KSFT_SKIP); - } + __TEST_REQUIRE(pfn, "Looking up PFNs requires CAP_SYS_ADMIN"); return pfn; } @@ -127,10 +124,12 @@ static void mark_page_idle(int page_idle_fd, uint64_t pfn) "Set page_idle bits for PFN 0x%" PRIx64, pfn); } -static void mark_vcpu_memory_idle(struct kvm_vm *vm, int vcpu_id) +static void mark_vcpu_memory_idle(struct kvm_vm *vm, + struct perf_test_vcpu_args *vcpu_args) { - uint64_t base_gva = perf_test_args.vcpu_args[vcpu_id].gva; - uint64_t pages = perf_test_args.vcpu_args[vcpu_id].pages; + int vcpu_idx = vcpu_args->vcpu_idx; + uint64_t base_gva = vcpu_args->gva; + uint64_t pages = vcpu_args->pages; uint64_t page; uint64_t still_idle = 0; uint64_t no_pfn = 0; @@ -138,7 +137,7 @@ static void mark_vcpu_memory_idle(struct kvm_vm *vm, int vcpu_id) int pagemap_fd; /* If vCPUs are using an overlapping region, let vCPU 0 mark it idle. */ - if (overlap_memory_access && vcpu_id) + if (overlap_memory_access && vcpu_idx) return; page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR); @@ -170,7 +169,7 @@ static void mark_vcpu_memory_idle(struct kvm_vm *vm, int vcpu_id) */ TEST_ASSERT(no_pfn < pages / 100, "vCPU %d: No PFN for %" PRIu64 " out of %" PRIu64 " pages.", - vcpu_id, no_pfn, pages); + vcpu_idx, no_pfn, pages); /* * Test that at least 90% of memory has been marked idle (the rest might @@ -183,17 +182,16 @@ static void mark_vcpu_memory_idle(struct kvm_vm *vm, int vcpu_id) TEST_ASSERT(still_idle < pages / 10, "vCPU%d: Too many pages still idle (%"PRIu64 " out of %" PRIu64 ").\n", - vcpu_id, still_idle, pages); + vcpu_idx, still_idle, pages); close(page_idle_fd); close(pagemap_fd); } -static void assert_ucall(struct kvm_vm *vm, uint32_t vcpu_id, - uint64_t expected_ucall) +static void assert_ucall(struct kvm_vcpu *vcpu, uint64_t expected_ucall) { struct ucall uc; - uint64_t actual_ucall = get_ucall(vm, vcpu_id, &uc); + uint64_t actual_ucall = get_ucall(vcpu, &uc); TEST_ASSERT(expected_ucall == actual_ucall, "Guest exited unexpectedly (expected ucall %" PRIu64 @@ -217,28 +215,29 @@ static bool spin_wait_for_next_iteration(int *current_iteration) static void vcpu_thread_main(struct perf_test_vcpu_args *vcpu_args) { + struct kvm_vcpu *vcpu = vcpu_args->vcpu; struct kvm_vm *vm = perf_test_args.vm; - int vcpu_id = vcpu_args->vcpu_id; + int vcpu_idx = vcpu_args->vcpu_idx; int current_iteration = 0; while (spin_wait_for_next_iteration(¤t_iteration)) { switch (READ_ONCE(iteration_work)) { case ITERATION_ACCESS_MEMORY: - vcpu_run(vm, vcpu_id); - assert_ucall(vm, vcpu_id, UCALL_SYNC); + vcpu_run(vcpu); + assert_ucall(vcpu, UCALL_SYNC); break; case ITERATION_MARK_IDLE: - mark_vcpu_memory_idle(vm, vcpu_id); + mark_vcpu_memory_idle(vm, vcpu_args); break; }; - vcpu_last_completed_iteration[vcpu_id] = current_iteration; + vcpu_last_completed_iteration[vcpu_idx] = current_iteration; } } -static void spin_wait_for_vcpu(int vcpu_id, int target_iteration) +static void spin_wait_for_vcpu(int vcpu_idx, int target_iteration) { - while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) != + while (READ_ONCE(vcpu_last_completed_iteration[vcpu_idx]) != target_iteration) { continue; } @@ -250,12 +249,11 @@ enum access_type { ACCESS_WRITE, }; -static void run_iteration(struct kvm_vm *vm, int vcpus, const char *description) +static void run_iteration(struct kvm_vm *vm, int nr_vcpus, const char *description) { struct timespec ts_start; struct timespec ts_elapsed; - int next_iteration; - int vcpu_id; + int next_iteration, i; /* Kick off the vCPUs by incrementing iteration. */ next_iteration = ++iteration; @@ -263,23 +261,23 @@ static void run_iteration(struct kvm_vm *vm, int vcpus, const char *description) clock_gettime(CLOCK_MONOTONIC, &ts_start); /* Wait for all vCPUs to finish the iteration. */ - for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) - spin_wait_for_vcpu(vcpu_id, next_iteration); + for (i = 0; i < nr_vcpus; i++) + spin_wait_for_vcpu(i, next_iteration); ts_elapsed = timespec_elapsed(ts_start); pr_info("%-30s: %ld.%09lds\n", description, ts_elapsed.tv_sec, ts_elapsed.tv_nsec); } -static void access_memory(struct kvm_vm *vm, int vcpus, enum access_type access, - const char *description) +static void access_memory(struct kvm_vm *vm, int nr_vcpus, + enum access_type access, const char *description) { perf_test_set_wr_fract(vm, (access == ACCESS_READ) ? INT_MAX : 1); iteration_work = ITERATION_ACCESS_MEMORY; - run_iteration(vm, vcpus, description); + run_iteration(vm, nr_vcpus, description); } -static void mark_memory_idle(struct kvm_vm *vm, int vcpus) +static void mark_memory_idle(struct kvm_vm *vm, int nr_vcpus) { /* * Even though this parallelizes the work across vCPUs, this is still a @@ -289,37 +287,37 @@ static void mark_memory_idle(struct kvm_vm *vm, int vcpus) */ pr_debug("Marking VM memory idle (slow)...\n"); iteration_work = ITERATION_MARK_IDLE; - run_iteration(vm, vcpus, "Mark memory idle"); + run_iteration(vm, nr_vcpus, "Mark memory idle"); } static void run_test(enum vm_guest_mode mode, void *arg) { struct test_params *params = arg; struct kvm_vm *vm; - int vcpus = params->vcpus; + int nr_vcpus = params->nr_vcpus; - vm = perf_test_create_vm(mode, vcpus, params->vcpu_memory_bytes, 1, + vm = perf_test_create_vm(mode, nr_vcpus, params->vcpu_memory_bytes, 1, params->backing_src, !overlap_memory_access); - perf_test_start_vcpu_threads(vcpus, vcpu_thread_main); + perf_test_start_vcpu_threads(nr_vcpus, vcpu_thread_main); pr_info("\n"); - access_memory(vm, vcpus, ACCESS_WRITE, "Populating memory"); + access_memory(vm, nr_vcpus, ACCESS_WRITE, "Populating memory"); /* As a control, read and write to the populated memory first. */ - access_memory(vm, vcpus, ACCESS_WRITE, "Writing to populated memory"); - access_memory(vm, vcpus, ACCESS_READ, "Reading from populated memory"); + access_memory(vm, nr_vcpus, ACCESS_WRITE, "Writing to populated memory"); + access_memory(vm, nr_vcpus, ACCESS_READ, "Reading from populated memory"); /* Repeat on memory that has been marked as idle. */ - mark_memory_idle(vm, vcpus); - access_memory(vm, vcpus, ACCESS_WRITE, "Writing to idle memory"); - mark_memory_idle(vm, vcpus); - access_memory(vm, vcpus, ACCESS_READ, "Reading from idle memory"); + mark_memory_idle(vm, nr_vcpus); + access_memory(vm, nr_vcpus, ACCESS_WRITE, "Writing to idle memory"); + mark_memory_idle(vm, nr_vcpus); + access_memory(vm, nr_vcpus, ACCESS_READ, "Reading from idle memory"); /* Set done to signal the vCPU threads to exit */ done = true; - perf_test_join_vcpu_threads(vcpus); + perf_test_join_vcpu_threads(nr_vcpus); perf_test_destroy_vm(vm); } @@ -347,7 +345,7 @@ int main(int argc, char *argv[]) struct test_params params = { .backing_src = DEFAULT_VM_MEM_SRC, .vcpu_memory_bytes = DEFAULT_PER_VCPU_MEM_SIZE, - .vcpus = 1, + .nr_vcpus = 1, }; int page_idle_fd; int opt; @@ -363,7 +361,7 @@ int main(int argc, char *argv[]) params.vcpu_memory_bytes = parse_size(optarg); break; case 'v': - params.vcpus = atoi(optarg); + params.nr_vcpus = atoi(optarg); break; case 'o': overlap_memory_access = true; @@ -379,10 +377,8 @@ int main(int argc, char *argv[]) } page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR); - if (page_idle_fd < 0) { - print_skip("CONFIG_IDLE_PAGE_TRACKING is not enabled"); - exit(KSFT_SKIP); - } + __TEST_REQUIRE(page_idle_fd >= 0, + "CONFIG_IDLE_PAGE_TRACKING is not enabled"); close(page_idle_fd); for_each_guest_mode(run_test, ¶ms); diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 6a719d065599..779ae54f89c4 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -44,28 +44,26 @@ static char *guest_data_prototype; static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) { - int ret; - int vcpu_id = vcpu_args->vcpu_id; - struct kvm_vm *vm = perf_test_args.vm; - struct kvm_run *run; + struct kvm_vcpu *vcpu = vcpu_args->vcpu; + int vcpu_idx = vcpu_args->vcpu_idx; + struct kvm_run *run = vcpu->run; struct timespec start; struct timespec ts_diff; - - run = vcpu_state(vm, vcpu_id); + int ret; clock_gettime(CLOCK_MONOTONIC, &start); /* Let the guest access its memory */ - ret = _vcpu_run(vm, vcpu_id); + ret = _vcpu_run(vcpu); TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); - if (get_ucall(vm, vcpu_id, NULL) != UCALL_SYNC) { + if (get_ucall(vcpu, NULL) != UCALL_SYNC) { TEST_ASSERT(false, "Invalid guest sync status: exit_reason=%s\n", exit_reason_str(run->exit_reason)); } ts_diff = timespec_elapsed(start); - PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_id, + PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_idx, ts_diff.tv_sec, ts_diff.tv_nsec); } @@ -223,6 +221,7 @@ static void setup_demand_paging(struct kvm_vm *vm, struct uffdio_api uffdio_api; struct uffdio_register uffdio_register; uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY; + int ret; PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n", is_minor ? "MINOR" : "MISSING", @@ -242,19 +241,18 @@ static void setup_demand_paging(struct kvm_vm *vm, } uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); - TEST_ASSERT(uffd >= 0, "uffd creation failed, errno: %d", errno); + TEST_ASSERT(uffd >= 0, __KVM_SYSCALL_ERROR("userfaultfd()", uffd)); uffdio_api.api = UFFD_API; uffdio_api.features = 0; - TEST_ASSERT(ioctl(uffd, UFFDIO_API, &uffdio_api) != -1, - "ioctl UFFDIO_API failed: %" PRIu64, - (uint64_t)uffdio_api.api); + ret = ioctl(uffd, UFFDIO_API, &uffdio_api); + TEST_ASSERT(ret != -1, __KVM_SYSCALL_ERROR("UFFDIO_API", ret)); uffdio_register.range.start = (uint64_t)hva; uffdio_register.range.len = len; uffdio_register.mode = uffd_mode; - TEST_ASSERT(ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) != -1, - "ioctl UFFDIO_REGISTER failed"); + ret = ioctl(uffd, UFFDIO_REGISTER, &uffdio_register); + TEST_ASSERT(ret != -1, __KVM_SYSCALL_ERROR("UFFDIO_REGISTER", ret)); TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) == expected_ioctls, "missing userfaultfd ioctls"); @@ -285,8 +283,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) struct timespec ts_diff; int *pipefds = NULL; struct kvm_vm *vm; - int vcpu_id; - int r; + int r, i; vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, p->src_type, p->partition_vcpu_memory_access); @@ -309,12 +306,12 @@ static void run_test(enum vm_guest_mode mode, void *arg) pipefds = malloc(sizeof(int) * nr_vcpus * 2); TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd"); - for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { + for (i = 0; i < nr_vcpus; i++) { struct perf_test_vcpu_args *vcpu_args; void *vcpu_hva; void *vcpu_alias; - vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; + vcpu_args = &perf_test_args.vcpu_args[i]; /* Cache the host addresses of the region */ vcpu_hva = addr_gpa2hva(vm, vcpu_args->gpa); @@ -324,13 +321,13 @@ static void run_test(enum vm_guest_mode mode, void *arg) * Set up user fault fd to handle demand paging * requests. */ - r = pipe2(&pipefds[vcpu_id * 2], + r = pipe2(&pipefds[i * 2], O_CLOEXEC | O_NONBLOCK); TEST_ASSERT(!r, "Failed to set up pipefd"); - setup_demand_paging(vm, &uffd_handler_threads[vcpu_id], - pipefds[vcpu_id * 2], p->uffd_mode, - p->uffd_delay, &uffd_args[vcpu_id], + setup_demand_paging(vm, &uffd_handler_threads[i], + pipefds[i * 2], p->uffd_mode, + p->uffd_delay, &uffd_args[i], vcpu_hva, vcpu_alias, vcpu_args->pages * perf_test_args.guest_page_size); } @@ -350,11 +347,11 @@ static void run_test(enum vm_guest_mode mode, void *arg) char c; /* Tell the user fault fd handler threads to quit */ - for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { - r = write(pipefds[vcpu_id * 2 + 1], &c, 1); + for (i = 0; i < nr_vcpus; i++) { + r = write(pipefds[i * 2 + 1], &c, 1); TEST_ASSERT(r == 1, "Unable to write to pipefd"); - pthread_join(uffd_handler_threads[vcpu_id], NULL); + pthread_join(uffd_handler_threads[i], NULL); } } diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index d60a34cdfaee..f99e39a672d3 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -59,6 +59,7 @@ static void arch_cleanup_vm(struct kvm_vm *vm) static int nr_vcpus = 1; static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; +static bool run_vcpus_while_disabling_dirty_logging; /* Host variables */ static u64 dirty_log_manual_caps; @@ -68,54 +69,59 @@ static int vcpu_last_completed_iteration[KVM_MAX_VCPUS]; static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) { - int ret; - struct kvm_vm *vm = perf_test_args.vm; + struct kvm_vcpu *vcpu = vcpu_args->vcpu; + int vcpu_idx = vcpu_args->vcpu_idx; uint64_t pages_count = 0; struct kvm_run *run; struct timespec start; struct timespec ts_diff; struct timespec total = (struct timespec){0}; struct timespec avg; - int vcpu_id = vcpu_args->vcpu_id; + int ret; - run = vcpu_state(vm, vcpu_id); + run = vcpu->run; while (!READ_ONCE(host_quit)) { int current_iteration = READ_ONCE(iteration); clock_gettime(CLOCK_MONOTONIC, &start); - ret = _vcpu_run(vm, vcpu_id); + ret = _vcpu_run(vcpu); ts_diff = timespec_elapsed(start); TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); - TEST_ASSERT(get_ucall(vm, vcpu_id, NULL) == UCALL_SYNC, + TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC, "Invalid guest sync status: exit_reason=%s\n", exit_reason_str(run->exit_reason)); - pr_debug("Got sync event from vCPU %d\n", vcpu_id); - vcpu_last_completed_iteration[vcpu_id] = current_iteration; + pr_debug("Got sync event from vCPU %d\n", vcpu_idx); + vcpu_last_completed_iteration[vcpu_idx] = current_iteration; pr_debug("vCPU %d updated last completed iteration to %d\n", - vcpu_id, vcpu_last_completed_iteration[vcpu_id]); + vcpu_idx, vcpu_last_completed_iteration[vcpu_idx]); if (current_iteration) { pages_count += vcpu_args->pages; total = timespec_add(total, ts_diff); pr_debug("vCPU %d iteration %d dirty memory time: %ld.%.9lds\n", - vcpu_id, current_iteration, ts_diff.tv_sec, + vcpu_idx, current_iteration, ts_diff.tv_sec, ts_diff.tv_nsec); } else { pr_debug("vCPU %d iteration %d populate memory time: %ld.%.9lds\n", - vcpu_id, current_iteration, ts_diff.tv_sec, + vcpu_idx, current_iteration, ts_diff.tv_sec, ts_diff.tv_nsec); } + /* + * Keep running the guest while dirty logging is being disabled + * (iteration is negative) so that vCPUs are accessing memory + * for the entire duration of zapping collapsible SPTEs. + */ while (current_iteration == READ_ONCE(iteration) && - !READ_ONCE(host_quit)) {} + READ_ONCE(iteration) >= 0 && !READ_ONCE(host_quit)) {} } - avg = timespec_div(total, vcpu_last_completed_iteration[vcpu_id]); + avg = timespec_div(total, vcpu_last_completed_iteration[vcpu_idx]); pr_debug("\nvCPU %d dirtied 0x%lx pages over %d iterations in %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", - vcpu_id, pages_count, vcpu_last_completed_iteration[vcpu_id], + vcpu_idx, pages_count, vcpu_last_completed_iteration[vcpu_idx], total.tv_sec, total.tv_nsec, avg.tv_sec, avg.tv_nsec); } @@ -207,14 +213,13 @@ static void run_test(enum vm_guest_mode mode, void *arg) uint64_t guest_num_pages; uint64_t host_num_pages; uint64_t pages_per_slot; - int vcpu_id; struct timespec start; struct timespec ts_diff; struct timespec get_dirty_log_total = (struct timespec){0}; struct timespec vcpu_dirty_total = (struct timespec){0}; struct timespec avg; - struct kvm_enable_cap cap = {}; struct timespec clear_dirty_log_total = (struct timespec){0}; + int i; vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, p->slots, p->backing_src, @@ -222,18 +227,16 @@ static void run_test(enum vm_guest_mode mode, void *arg) perf_test_set_wr_fract(vm, p->wr_fract); - guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm_get_page_shift(vm); + guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm->page_shift; guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); host_num_pages = vm_num_host_pages(mode, guest_num_pages); pages_per_slot = host_num_pages / p->slots; bitmaps = alloc_bitmaps(p->slots, pages_per_slot); - if (dirty_log_manual_caps) { - cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2; - cap.args[0] = dirty_log_manual_caps; - vm_enable_cap(vm, &cap); - } + if (dirty_log_manual_caps) + vm_enable_cap(vm, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2, + dirty_log_manual_caps); arch_setup_vm(vm, nr_vcpus); @@ -242,15 +245,15 @@ static void run_test(enum vm_guest_mode mode, void *arg) host_quit = false; clock_gettime(CLOCK_MONOTONIC, &start); - for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) - vcpu_last_completed_iteration[vcpu_id] = -1; + for (i = 0; i < nr_vcpus; i++) + vcpu_last_completed_iteration[i] = -1; perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker); /* Allow the vCPUs to populate memory */ pr_debug("Starting iteration %d - Populating\n", iteration); - for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { - while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) != + for (i = 0; i < nr_vcpus; i++) { + while (READ_ONCE(vcpu_last_completed_iteration[i]) != iteration) ; } @@ -275,8 +278,8 @@ static void run_test(enum vm_guest_mode mode, void *arg) iteration++; pr_debug("Starting iteration %d\n", iteration); - for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { - while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) + for (i = 0; i < nr_vcpus; i++) { + while (READ_ONCE(vcpu_last_completed_iteration[i]) != iteration) ; } @@ -305,6 +308,14 @@ static void run_test(enum vm_guest_mode mode, void *arg) } } + /* + * Run vCPUs while dirty logging is being disabled to stress disabling + * in terms of both performance and correctness. Opt-in via command + * line as this significantly increases time to disable dirty logging. + */ + if (run_vcpus_while_disabling_dirty_logging) + WRITE_ONCE(iteration, -1); + /* Disable dirty logging */ clock_gettime(CLOCK_MONOTONIC, &start); disable_dirty_logging(vm, p->slots); @@ -312,7 +323,11 @@ static void run_test(enum vm_guest_mode mode, void *arg) pr_info("Disabling dirty logging time: %ld.%.9lds\n", ts_diff.tv_sec, ts_diff.tv_nsec); - /* Tell the vcpu thread to quit */ + /* + * Tell the vCPU threads to quit. No need to manually check that vCPUs + * have stopped running after disabling dirty logging, the join will + * wait for them to exit. + */ host_quit = true; perf_test_join_vcpu_threads(nr_vcpus); @@ -352,6 +367,9 @@ static void help(char *name) " Warning: a low offset can conflict with the loaded test code.\n"); guest_modes_help(); printf(" -n: Run the vCPUs in nested mode (L2)\n"); + printf(" -e: Run vCPUs while dirty logging is being disabled. This\n" + " can significantly increase runtime, especially if there\n" + " isn't a dedicated pCPU for the main thread.\n"); printf(" -b: specify the size of the memory region which should be\n" " dirtied by each vCPU. e.g. 10M or 3G.\n" " (default: 1G)\n"); @@ -388,8 +406,11 @@ int main(int argc, char *argv[]) guest_modes_append_default(); - while ((opt = getopt(argc, argv, "ghi:p:m:nb:f:v:os:x:")) != -1) { + while ((opt = getopt(argc, argv, "eghi:p:m:nb:f:v:os:x:")) != -1) { switch (opt) { + case 'e': + /* 'e' is for evil. */ + run_vcpus_while_disabling_dirty_logging = true; case 'g': dirty_log_manual_caps = 0; break; diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 3fcd89e195c7..9c883c94d478 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -23,8 +23,6 @@ #include "guest_modes.h" #include "processor.h" -#define VCPU_ID 1 - /* The memory slot index to track dirty pages */ #define TEST_MEM_SLOT_INDEX 1 @@ -212,34 +210,31 @@ static void sem_wait_until(sem_t *sem) static bool clear_log_supported(void) { - return kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); + return kvm_has_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); } static void clear_log_create_vm_done(struct kvm_vm *vm) { - struct kvm_enable_cap cap = {}; u64 manual_caps; manual_caps = kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); TEST_ASSERT(manual_caps, "MANUAL_CAPS is zero!"); manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | KVM_DIRTY_LOG_INITIALLY_SET); - cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2; - cap.args[0] = manual_caps; - vm_enable_cap(vm, &cap); + vm_enable_cap(vm, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2, manual_caps); } -static void dirty_log_collect_dirty_pages(struct kvm_vm *vm, int slot, +static void dirty_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, void *bitmap, uint32_t num_pages) { - kvm_vm_get_dirty_log(vm, slot, bitmap); + kvm_vm_get_dirty_log(vcpu->vm, slot, bitmap); } -static void clear_log_collect_dirty_pages(struct kvm_vm *vm, int slot, +static void clear_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, void *bitmap, uint32_t num_pages) { - kvm_vm_get_dirty_log(vm, slot, bitmap); - kvm_vm_clear_dirty_log(vm, slot, bitmap, 0, num_pages); + kvm_vm_get_dirty_log(vcpu->vm, slot, bitmap); + kvm_vm_clear_dirty_log(vcpu->vm, slot, bitmap, 0, num_pages); } /* Should only be called after a GUEST_SYNC */ @@ -253,14 +248,14 @@ static void vcpu_handle_sync_stop(void) } } -static void default_after_vcpu_run(struct kvm_vm *vm, int ret, int err) +static void default_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) { - struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct kvm_run *run = vcpu->run; TEST_ASSERT(ret == 0 || (ret == -1 && err == EINTR), "vcpu run failed: errno=%d", err); - TEST_ASSERT(get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC, + TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC, "Invalid guest sync status: exit_reason=%s\n", exit_reason_str(run->exit_reason)); @@ -269,7 +264,7 @@ static void default_after_vcpu_run(struct kvm_vm *vm, int ret, int err) static bool dirty_ring_supported(void) { - return kvm_check_cap(KVM_CAP_DIRTY_LOG_RING); + return kvm_has_cap(KVM_CAP_DIRTY_LOG_RING); } static void dirty_ring_create_vm_done(struct kvm_vm *vm) @@ -331,7 +326,7 @@ static void dirty_ring_continue_vcpu(void) sem_post(&sem_vcpu_cont); } -static void dirty_ring_collect_dirty_pages(struct kvm_vm *vm, int slot, +static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, void *bitmap, uint32_t num_pages) { /* We only have one vcpu */ @@ -351,10 +346,10 @@ static void dirty_ring_collect_dirty_pages(struct kvm_vm *vm, int slot, } /* Only have one vcpu */ - count = dirty_ring_collect_one(vcpu_map_dirty_ring(vm, VCPU_ID), + count = dirty_ring_collect_one(vcpu_map_dirty_ring(vcpu), slot, bitmap, num_pages, &fetch_index); - cleared = kvm_vm_reset_dirty_ring(vm); + cleared = kvm_vm_reset_dirty_ring(vcpu->vm); /* Cleared pages should be the same as collected */ TEST_ASSERT(cleared == count, "Reset dirty pages (%u) mismatch " @@ -369,12 +364,12 @@ static void dirty_ring_collect_dirty_pages(struct kvm_vm *vm, int slot, pr_info("Iteration %ld collected %u pages\n", iteration, count); } -static void dirty_ring_after_vcpu_run(struct kvm_vm *vm, int ret, int err) +static void dirty_ring_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) { - struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct kvm_run *run = vcpu->run; /* A ucall-sync or ring-full event is allowed */ - if (get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC) { + if (get_ucall(vcpu, NULL) == UCALL_SYNC) { /* We should allow this to continue */ ; } else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL || @@ -408,10 +403,10 @@ struct log_mode { /* Hook when the vm creation is done (before vcpu creation) */ void (*create_vm_done)(struct kvm_vm *vm); /* Hook to collect the dirty pages into the bitmap provided */ - void (*collect_dirty_pages) (struct kvm_vm *vm, int slot, + void (*collect_dirty_pages) (struct kvm_vcpu *vcpu, int slot, void *bitmap, uint32_t num_pages); /* Hook to call when after each vcpu run */ - void (*after_vcpu_run)(struct kvm_vm *vm, int ret, int err); + void (*after_vcpu_run)(struct kvm_vcpu *vcpu, int ret, int err); void (*before_vcpu_join) (void); } log_modes[LOG_MODE_NUM] = { { @@ -473,22 +468,22 @@ static void log_mode_create_vm_done(struct kvm_vm *vm) mode->create_vm_done(vm); } -static void log_mode_collect_dirty_pages(struct kvm_vm *vm, int slot, +static void log_mode_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot, void *bitmap, uint32_t num_pages) { struct log_mode *mode = &log_modes[host_log_mode]; TEST_ASSERT(mode->collect_dirty_pages != NULL, "collect_dirty_pages() is required for any log mode!"); - mode->collect_dirty_pages(vm, slot, bitmap, num_pages); + mode->collect_dirty_pages(vcpu, slot, bitmap, num_pages); } -static void log_mode_after_vcpu_run(struct kvm_vm *vm, int ret, int err) +static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err) { struct log_mode *mode = &log_modes[host_log_mode]; if (mode->after_vcpu_run) - mode->after_vcpu_run(vm, ret, err); + mode->after_vcpu_run(vcpu, ret, err); } static void log_mode_before_vcpu_join(void) @@ -509,16 +504,15 @@ static void generate_random_array(uint64_t *guest_array, uint64_t size) static void *vcpu_worker(void *data) { - int ret, vcpu_fd; - struct kvm_vm *vm = data; + int ret; + struct kvm_vcpu *vcpu = data; + struct kvm_vm *vm = vcpu->vm; uint64_t *guest_array; uint64_t pages_count = 0; struct kvm_signal_mask *sigmask = alloca(offsetof(struct kvm_signal_mask, sigset) + sizeof(sigset_t)); sigset_t *sigset = (sigset_t *) &sigmask->sigset; - vcpu_fd = vcpu_get_fd(vm, VCPU_ID); - /* * SIG_IPI is unblocked atomically while in KVM_RUN. It causes the * ioctl to return with -EINTR, but it is still pending and we need @@ -527,7 +521,7 @@ static void *vcpu_worker(void *data) sigmask->len = 8; pthread_sigmask(0, NULL, sigset); sigdelset(sigset, SIG_IPI); - vcpu_ioctl(vm, VCPU_ID, KVM_SET_SIGNAL_MASK, sigmask); + vcpu_ioctl(vcpu, KVM_SET_SIGNAL_MASK, sigmask); sigemptyset(sigset); sigaddset(sigset, SIG_IPI); @@ -539,13 +533,13 @@ static void *vcpu_worker(void *data) generate_random_array(guest_array, TEST_PAGES_PER_LOOP); pages_count += TEST_PAGES_PER_LOOP; /* Let the guest dirty the random pages */ - ret = ioctl(vcpu_fd, KVM_RUN, NULL); + ret = __vcpu_run(vcpu); if (ret == -1 && errno == EINTR) { int sig = -1; sigwait(sigset, &sig); assert(sig == SIG_IPI); } - log_mode_after_vcpu_run(vm, ret, errno); + log_mode_after_vcpu_run(vcpu, ret, errno); } pr_info("Dirtied %"PRIu64" pages\n", pages_count); @@ -671,21 +665,17 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap) } } -static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, +static struct kvm_vm *create_vm(enum vm_guest_mode mode, struct kvm_vcpu **vcpu, uint64_t extra_mem_pages, void *guest_code) { struct kvm_vm *vm; - uint64_t extra_pg_pages = extra_mem_pages / 512 * 2; pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); - vm = vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); - kvm_vm_elf_load(vm, program_invocation_name); -#ifdef __x86_64__ - vm_create_irqchip(vm); -#endif + vm = __vm_create(mode, 1, extra_mem_pages); + log_mode_create_vm_done(vm); - vm_vcpu_add_default(vm, vcpuid, guest_code); + *vcpu = vm_vcpu_add(vm, 0, guest_code); return vm; } @@ -701,6 +691,7 @@ struct test_params { static void run_test(enum vm_guest_mode mode, void *arg) { struct test_params *p = arg; + struct kvm_vcpu *vcpu; struct kvm_vm *vm; unsigned long *bmap; @@ -718,25 +709,23 @@ static void run_test(enum vm_guest_mode mode, void *arg) * (e.g., 64K page size guest will need even less memory for * page tables). */ - vm = create_vm(mode, VCPU_ID, - 2ul << (DIRTY_MEM_BITS - PAGE_SHIFT_4K), - guest_code); + vm = create_vm(mode, &vcpu, + 2ul << (DIRTY_MEM_BITS - PAGE_SHIFT_4K), guest_code); - guest_page_size = vm_get_page_size(vm); + guest_page_size = vm->page_size; /* * A little more than 1G of guest page sized pages. Cover the * case where the size is not aligned to 64 pages. */ - guest_num_pages = (1ul << (DIRTY_MEM_BITS - - vm_get_page_shift(vm))) + 3; + guest_num_pages = (1ul << (DIRTY_MEM_BITS - vm->page_shift)) + 3; guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); host_page_size = getpagesize(); host_num_pages = vm_num_host_pages(mode, guest_num_pages); if (!p->phys_offset) { - guest_test_phys_mem = (vm_get_max_gfn(vm) - - guest_num_pages) * guest_page_size; + guest_test_phys_mem = (vm->max_gfn - guest_num_pages) * + guest_page_size; guest_test_phys_mem = align_down(guest_test_phys_mem, host_page_size); } else { guest_test_phys_mem = p->phys_offset; @@ -781,12 +770,12 @@ static void run_test(enum vm_guest_mode mode, void *arg) host_clear_count = 0; host_track_next_count = 0; - pthread_create(&vcpu_thread, NULL, vcpu_worker, vm); + pthread_create(&vcpu_thread, NULL, vcpu_worker, vcpu); while (iteration < p->iterations) { /* Give the vcpu thread some time to dirty some pages */ usleep(p->interval * 1000); - log_mode_collect_dirty_pages(vm, TEST_MEM_SLOT_INDEX, + log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX, bmap, host_num_pages); /* diff --git a/tools/testing/selftests/kvm/hardware_disable_test.c b/tools/testing/selftests/kvm/hardware_disable_test.c index b21c69a56daa..f5d59b9934f1 100644 --- a/tools/testing/selftests/kvm/hardware_disable_test.c +++ b/tools/testing/selftests/kvm/hardware_disable_test.c @@ -27,12 +27,6 @@ sem_t *sem; -/* Arguments for the pthreads */ -struct payload { - struct kvm_vm *vm; - uint32_t index; -}; - static void guest_code(void) { for (;;) @@ -42,14 +36,14 @@ static void guest_code(void) static void *run_vcpu(void *arg) { - struct payload *payload = (struct payload *)arg; - struct kvm_run *state = vcpu_state(payload->vm, payload->index); + struct kvm_vcpu *vcpu = arg; + struct kvm_run *run = vcpu->run; - vcpu_run(payload->vm, payload->index); + vcpu_run(vcpu); TEST_ASSERT(false, "%s: exited with reason %d: %s\n", - __func__, state->exit_reason, - exit_reason_str(state->exit_reason)); + __func__, run->exit_reason, + exit_reason_str(run->exit_reason)); pthread_exit(NULL); } @@ -92,11 +86,11 @@ static inline void check_join(pthread_t thread, void **retval) static void run_test(uint32_t run) { + struct kvm_vcpu *vcpu; struct kvm_vm *vm; cpu_set_t cpu_set; pthread_t threads[VCPU_NUM]; pthread_t throw_away; - struct payload payloads[VCPU_NUM]; void *b; uint32_t i, j; @@ -104,18 +98,13 @@ static void run_test(uint32_t run) for (i = 0; i < VCPU_NUM; i++) CPU_SET(i, &cpu_set); - vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); - kvm_vm_elf_load(vm, program_invocation_name); - vm_create_irqchip(vm); + vm = vm_create(VCPU_NUM); pr_debug("%s: [%d] start vcpus\n", __func__, run); for (i = 0; i < VCPU_NUM; ++i) { - vm_vcpu_add_default(vm, i, guest_code); - payloads[i].vm = vm; - payloads[i].index = i; + vcpu = vm_vcpu_add(vm, i, guest_code); - check_create_thread(&threads[i], NULL, run_vcpu, - (void *)&payloads[i]); + check_create_thread(&threads[i], NULL, run_vcpu, vcpu); check_set_affinity(threads[i], &cpu_set); for (j = 0; j < SLEEPING_THREAD_NUM; ++j) { diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h index 59ece9d4e0d1..a8124f9dd68a 100644 --- a/tools/testing/selftests/kvm/include/aarch64/processor.h +++ b/tools/testing/selftests/kvm/include/aarch64/processor.h @@ -19,7 +19,7 @@ /* * KVM_ARM64_SYS_REG(sys_reg_id): Helper macro to convert * SYS_* register definitions in asm/sysreg.h to use in KVM - * calls such as get_reg() and set_reg(). + * calls such as vcpu_get_reg() and vcpu_set_reg(). */ #define KVM_ARM64_SYS_REG(sys_reg_id) \ ARM64_SYS_REG(sys_reg_Op0(sys_reg_id), \ @@ -47,25 +47,9 @@ #define MPIDR_HWID_BITMASK (0xff00fffffful) -static inline void get_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id, uint64_t *addr) -{ - struct kvm_one_reg reg; - reg.id = id; - reg.addr = (uint64_t)addr; - vcpu_ioctl(vm, vcpuid, KVM_GET_ONE_REG, ®); -} - -static inline void set_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id, uint64_t val) -{ - struct kvm_one_reg reg; - reg.id = id; - reg.addr = (uint64_t)&val; - vcpu_ioctl(vm, vcpuid, KVM_SET_ONE_REG, ®); -} - -void aarch64_vcpu_setup(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_init *init); -void aarch64_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_vcpu_init *init, void *guest_code); +void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init); +struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, + struct kvm_vcpu_init *init, void *guest_code); struct ex_regs { u64 regs[31]; @@ -117,7 +101,7 @@ void aarch64_get_supported_page_sizes(uint32_t ipa, bool *ps4k, bool *ps16k, bool *ps64k); void vm_init_descriptor_tables(struct kvm_vm *vm); -void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid); +void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu); typedef void(*handler_fn)(struct ex_regs *); void vm_install_exception_handler(struct kvm_vm *vm, @@ -207,4 +191,6 @@ void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, uint64_t arg6, struct arm_smccc_res *res); +uint32_t guest_get_vcpuid(void); + #endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/include/aarch64/vgic.h b/tools/testing/selftests/kvm/include/aarch64/vgic.h index 4442081221a0..0ac6f05c63f9 100644 --- a/tools/testing/selftests/kvm/include/aarch64/vgic.h +++ b/tools/testing/selftests/kvm/include/aarch64/vgic.h @@ -8,6 +8,8 @@ #include <linux/kvm.h> +#include "kvm_util.h" + #define REDIST_REGION_ATTR_ADDR(count, base, flags, index) \ (((uint64_t)(count) << 52) | \ ((uint64_t)((base) >> 16) << 16) | \ @@ -26,8 +28,8 @@ void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level); int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level); /* The vcpu arg only applies to private interrupts. */ -void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, uint32_t vcpu); -void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, uint32_t vcpu); +void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu); +void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu); #define KVM_IRQCHIP_NUM_PINS (1020 - 32) diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index 92cef0ffb19e..24fde97f6121 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -9,9 +9,14 @@ #include "test_util.h" -#include "asm/kvm.h" +#include <linux/compiler.h> +#include "linux/hashtable.h" #include "linux/list.h" -#include "linux/kvm.h" +#include <linux/kernel.h> +#include <linux/kvm.h> +#include "linux/rbtree.h" + + #include <sys/ioctl.h> #include "sparsebit.h" @@ -21,20 +26,88 @@ #define NSEC_PER_SEC 1000000000L -/* - * Callers of kvm_util only have an incomplete/opaque description of the - * structure kvm_util is using to maintain the state of a VM. - */ -struct kvm_vm; - typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */ typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */ +struct userspace_mem_region { + struct kvm_userspace_memory_region region; + struct sparsebit *unused_phy_pages; + int fd; + off_t offset; + void *host_mem; + void *host_alias; + void *mmap_start; + void *mmap_alias; + size_t mmap_size; + struct rb_node gpa_node; + struct rb_node hva_node; + struct hlist_node slot_node; +}; + +struct kvm_vcpu { + struct list_head list; + uint32_t id; + int fd; + struct kvm_vm *vm; + struct kvm_run *run; +#ifdef __x86_64__ + struct kvm_cpuid2 *cpuid; +#endif + struct kvm_dirty_gfn *dirty_gfns; + uint32_t fetch_index; + uint32_t dirty_gfns_count; +}; + +struct userspace_mem_regions { + struct rb_root gpa_tree; + struct rb_root hva_tree; + DECLARE_HASHTABLE(slot_hash, 9); +}; + +struct kvm_vm { + int mode; + unsigned long type; + int kvm_fd; + int fd; + unsigned int pgtable_levels; + unsigned int page_size; + unsigned int page_shift; + unsigned int pa_bits; + unsigned int va_bits; + uint64_t max_gfn; + struct list_head vcpus; + struct userspace_mem_regions regions; + struct sparsebit *vpages_valid; + struct sparsebit *vpages_mapped; + bool has_irqchip; + bool pgd_created; + vm_paddr_t pgd; + vm_vaddr_t gdt; + vm_vaddr_t tss; + vm_vaddr_t idt; + vm_vaddr_t handlers; + uint32_t dirty_ring_size; + + /* Cache of information for binary stats interface */ + int stats_fd; + struct kvm_stats_header stats_header; + struct kvm_stats_desc *stats_desc; +}; + + +#define kvm_for_each_vcpu(vm, i, vcpu) \ + for ((i) = 0; (i) <= (vm)->last_vcpu_id; (i)++) \ + if (!((vcpu) = vm->vcpus[i])) \ + continue; \ + else + +struct userspace_mem_region * +memslot2region(struct kvm_vm *vm, uint32_t memslot); + /* Minimum allocated guest virtual and physical addresses */ #define KVM_UTIL_MIN_VADDR 0x2000 #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 -#define DEFAULT_GUEST_PHY_PAGES 512 #define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000 #define DEFAULT_STACK_PGS 5 @@ -102,49 +175,194 @@ extern const struct vm_guest_mode_params vm_guest_mode_params[]; int open_path_or_exit(const char *path, int flags); int open_kvm_dev_path_or_exit(void); -int kvm_check_cap(long cap); -int vm_check_cap(struct kvm_vm *vm, long cap); -int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap); -int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id, - struct kvm_enable_cap *cap); +unsigned int kvm_check_cap(long cap); + +static inline bool kvm_has_cap(long cap) +{ + return kvm_check_cap(cap); +} + +#define __KVM_SYSCALL_ERROR(_name, _ret) \ + "%s failed, rc: %i errno: %i (%s)", (_name), (_ret), errno, strerror(errno) + +#define __KVM_IOCTL_ERROR(_name, _ret) __KVM_SYSCALL_ERROR(_name, _ret) +#define KVM_IOCTL_ERROR(_ioctl, _ret) __KVM_IOCTL_ERROR(#_ioctl, _ret) + +#define kvm_do_ioctl(fd, cmd, arg) \ +({ \ + static_assert(!_IOC_SIZE(cmd) || sizeof(*arg) == _IOC_SIZE(cmd), ""); \ + ioctl(fd, cmd, arg); \ +}) + +#define __kvm_ioctl(kvm_fd, cmd, arg) \ + kvm_do_ioctl(kvm_fd, cmd, arg) + + +#define _kvm_ioctl(kvm_fd, cmd, name, arg) \ +({ \ + int ret = __kvm_ioctl(kvm_fd, cmd, arg); \ + \ + TEST_ASSERT(!ret, __KVM_IOCTL_ERROR(name, ret)); \ +}) + +#define kvm_ioctl(kvm_fd, cmd, arg) \ + _kvm_ioctl(kvm_fd, cmd, #cmd, arg) + +static __always_inline void static_assert_is_vm(struct kvm_vm *vm) { } + +#define __vm_ioctl(vm, cmd, arg) \ +({ \ + static_assert_is_vm(vm); \ + kvm_do_ioctl((vm)->fd, cmd, arg); \ +}) + +#define _vm_ioctl(vm, cmd, name, arg) \ +({ \ + int ret = __vm_ioctl(vm, cmd, arg); \ + \ + TEST_ASSERT(!ret, __KVM_IOCTL_ERROR(name, ret)); \ +}) + +#define vm_ioctl(vm, cmd, arg) \ + _vm_ioctl(vm, cmd, #cmd, arg) + + +static __always_inline void static_assert_is_vcpu(struct kvm_vcpu *vcpu) { } + +#define __vcpu_ioctl(vcpu, cmd, arg) \ +({ \ + static_assert_is_vcpu(vcpu); \ + kvm_do_ioctl((vcpu)->fd, cmd, arg); \ +}) + +#define _vcpu_ioctl(vcpu, cmd, name, arg) \ +({ \ + int ret = __vcpu_ioctl(vcpu, cmd, arg); \ + \ + TEST_ASSERT(!ret, __KVM_IOCTL_ERROR(name, ret)); \ +}) + +#define vcpu_ioctl(vcpu, cmd, arg) \ + _vcpu_ioctl(vcpu, cmd, #cmd, arg) + +/* + * Looks up and returns the value corresponding to the capability + * (KVM_CAP_*) given by cap. + */ +static inline int vm_check_cap(struct kvm_vm *vm, long cap) +{ + int ret = __vm_ioctl(vm, KVM_CHECK_EXTENSION, (void *)cap); + + TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_CHECK_EXTENSION, ret)); + return ret; +} + +static inline int __vm_enable_cap(struct kvm_vm *vm, uint32_t cap, uint64_t arg0) +{ + struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } }; + + return __vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap); +} +static inline void vm_enable_cap(struct kvm_vm *vm, uint32_t cap, uint64_t arg0) +{ + struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } }; + + vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap); +} + void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size); const char *vm_guest_mode_string(uint32_t i); -struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); void kvm_vm_free(struct kvm_vm *vmp); -void kvm_vm_restart(struct kvm_vm *vmp, int perm); +void kvm_vm_restart(struct kvm_vm *vmp); void kvm_vm_release(struct kvm_vm *vmp); -void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log); -void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log, - uint64_t first_page, uint32_t num_pages); -uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm); - int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva, size_t len); - void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename); int kvm_memfd_alloc(size_t size, bool hugepages); void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); -/* - * VM VCPU Dump - * - * Input Args: - * stream - Output FILE stream - * vm - Virtual Machine - * vcpuid - VCPU ID - * indent - Left margin indent amount - * - * Output Args: None - * - * Return: None - * - * Dumps the current state of the VCPU specified by @vcpuid, within the VM - * given by @vm, to the FILE stream given by @stream. - */ -void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, - uint8_t indent); +static inline void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log) +{ + struct kvm_dirty_log args = { .dirty_bitmap = log, .slot = slot }; + + vm_ioctl(vm, KVM_GET_DIRTY_LOG, &args); +} + +static inline void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log, + uint64_t first_page, uint32_t num_pages) +{ + struct kvm_clear_dirty_log args = { + .dirty_bitmap = log, + .slot = slot, + .first_page = first_page, + .num_pages = num_pages + }; + + vm_ioctl(vm, KVM_CLEAR_DIRTY_LOG, &args); +} + +static inline uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm) +{ + return __vm_ioctl(vm, KVM_RESET_DIRTY_RINGS, NULL); +} + +static inline int vm_get_stats_fd(struct kvm_vm *vm) +{ + int fd = __vm_ioctl(vm, KVM_GET_STATS_FD, NULL); + + TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_GET_STATS_FD, fd)); + return fd; +} + +static inline void read_stats_header(int stats_fd, struct kvm_stats_header *header) +{ + ssize_t ret; + + ret = read(stats_fd, header, sizeof(*header)); + TEST_ASSERT(ret == sizeof(*header), "Read stats header"); +} + +struct kvm_stats_desc *read_stats_descriptors(int stats_fd, + struct kvm_stats_header *header); + +static inline ssize_t get_stats_descriptor_size(struct kvm_stats_header *header) +{ + /* + * The base size of the descriptor is defined by KVM's ABI, but the + * size of the name field is variable, as far as KVM's ABI is + * concerned. For a given instance of KVM, the name field is the same + * size for all stats and is provided in the overall stats header. + */ + return sizeof(struct kvm_stats_desc) + header->name_size; +} + +static inline struct kvm_stats_desc *get_stats_descriptor(struct kvm_stats_desc *stats, + int index, + struct kvm_stats_header *header) +{ + /* + * Note, size_desc includes the size of the name field, which is + * variable. i.e. this is NOT equivalent to &stats_desc[i]. + */ + return (void *)stats + index * get_stats_descriptor_size(header); +} + +void read_stat_data(int stats_fd, struct kvm_stats_header *header, + struct kvm_stats_desc *desc, uint64_t *data, + size_t max_elements); + +void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data, + size_t max_elements); + +static inline uint64_t vm_get_stat(struct kvm_vm *vm, const char *stat_name) +{ + uint64_t data; + + __vm_get_stat(vm, stat_name, &data, 1); + return data; +} void vm_create_irqchip(struct kvm_vm *vm); @@ -157,18 +375,10 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, uint64_t guest_paddr, uint32_t slot, uint64_t npages, uint32_t flags); -void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl, - void *arg); -int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl, - void *arg); -void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); -int _vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg); -void kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); -int _kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa); void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot); -void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid); +struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id); vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min); vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages); vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm); @@ -180,42 +390,219 @@ void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva); vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva); void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa); -/* - * Address Guest Virtual to Guest Physical - * - * Input Args: - * vm - Virtual Machine - * gva - VM virtual address - * - * Output Args: None - * - * Return: - * Equivalent VM physical address - * - * Returns the VM physical address of the translated VM virtual - * address given by @gva. - */ -vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva); - -struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid); -void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid); -int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid); -int vcpu_get_fd(struct kvm_vm *vm, uint32_t vcpuid); -void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid); -void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_guest_debug *debug); -void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_mp_state *mp_state); -struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid); -void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs); -void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs); +void vcpu_run(struct kvm_vcpu *vcpu); +int _vcpu_run(struct kvm_vcpu *vcpu); + +static inline int __vcpu_run(struct kvm_vcpu *vcpu) +{ + return __vcpu_ioctl(vcpu, KVM_RUN, NULL); +} + +void vcpu_run_complete_io(struct kvm_vcpu *vcpu); +struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu); + +static inline void vcpu_enable_cap(struct kvm_vcpu *vcpu, uint32_t cap, + uint64_t arg0) +{ + struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } }; + + vcpu_ioctl(vcpu, KVM_ENABLE_CAP, &enable_cap); +} + +static inline void vcpu_guest_debug_set(struct kvm_vcpu *vcpu, + struct kvm_guest_debug *debug) +{ + vcpu_ioctl(vcpu, KVM_SET_GUEST_DEBUG, debug); +} + +static inline void vcpu_mp_state_get(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + vcpu_ioctl(vcpu, KVM_GET_MP_STATE, mp_state); +} +static inline void vcpu_mp_state_set(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + vcpu_ioctl(vcpu, KVM_SET_MP_STATE, mp_state); +} + +static inline void vcpu_regs_get(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + vcpu_ioctl(vcpu, KVM_GET_REGS, regs); +} + +static inline void vcpu_regs_set(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + vcpu_ioctl(vcpu, KVM_SET_REGS, regs); +} +static inline void vcpu_sregs_get(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + vcpu_ioctl(vcpu, KVM_GET_SREGS, sregs); + +} +static inline void vcpu_sregs_set(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + vcpu_ioctl(vcpu, KVM_SET_SREGS, sregs); +} +static inline int _vcpu_sregs_set(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) +{ + return __vcpu_ioctl(vcpu, KVM_SET_SREGS, sregs); +} +static inline void vcpu_fpu_get(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + vcpu_ioctl(vcpu, KVM_GET_FPU, fpu); +} +static inline void vcpu_fpu_set(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + vcpu_ioctl(vcpu, KVM_SET_FPU, fpu); +} + +static inline int __vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id, void *addr) +{ + struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)addr }; + + return __vcpu_ioctl(vcpu, KVM_GET_ONE_REG, ®); +} +static inline int __vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val) +{ + struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val }; + + return __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); +} +static inline void vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id, void *addr) +{ + struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)addr }; + + vcpu_ioctl(vcpu, KVM_GET_ONE_REG, ®); +} +static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val) +{ + struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val }; + + vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); +} + +#ifdef __KVM_HAVE_VCPU_EVENTS +static inline void vcpu_events_get(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events) +{ + vcpu_ioctl(vcpu, KVM_GET_VCPU_EVENTS, events); +} +static inline void vcpu_events_set(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events) +{ + vcpu_ioctl(vcpu, KVM_SET_VCPU_EVENTS, events); +} +#endif +#ifdef __x86_64__ +static inline void vcpu_nested_state_get(struct kvm_vcpu *vcpu, + struct kvm_nested_state *state) +{ + vcpu_ioctl(vcpu, KVM_GET_NESTED_STATE, state); +} +static inline int __vcpu_nested_state_set(struct kvm_vcpu *vcpu, + struct kvm_nested_state *state) +{ + return __vcpu_ioctl(vcpu, KVM_SET_NESTED_STATE, state); +} + +static inline void vcpu_nested_state_set(struct kvm_vcpu *vcpu, + struct kvm_nested_state *state) +{ + vcpu_ioctl(vcpu, KVM_SET_NESTED_STATE, state); +} +#endif +static inline int vcpu_get_stats_fd(struct kvm_vcpu *vcpu) +{ + int fd = __vcpu_ioctl(vcpu, KVM_GET_STATS_FD, NULL); + + TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_GET_STATS_FD, fd)); + return fd; +} + +int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr); + +static inline void kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr) +{ + int ret = __kvm_has_device_attr(dev_fd, group, attr); + + TEST_ASSERT(!ret, "KVM_HAS_DEVICE_ATTR failed, rc: %i errno: %i", ret, errno); +} + +int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val); + +static inline void kvm_device_attr_get(int dev_fd, uint32_t group, + uint64_t attr, void *val) +{ + int ret = __kvm_device_attr_get(dev_fd, group, attr, val); + + TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_GET_DEVICE_ATTR, ret)); +} + +int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val); + +static inline void kvm_device_attr_set(int dev_fd, uint32_t group, + uint64_t attr, void *val) +{ + int ret = __kvm_device_attr_set(dev_fd, group, attr, val); + + TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_DEVICE_ATTR, ret)); +} + +static inline int __vcpu_has_device_attr(struct kvm_vcpu *vcpu, uint32_t group, + uint64_t attr) +{ + return __kvm_has_device_attr(vcpu->fd, group, attr); +} + +static inline void vcpu_has_device_attr(struct kvm_vcpu *vcpu, uint32_t group, + uint64_t attr) +{ + kvm_has_device_attr(vcpu->fd, group, attr); +} + +static inline int __vcpu_device_attr_get(struct kvm_vcpu *vcpu, uint32_t group, + uint64_t attr, void *val) +{ + return __kvm_device_attr_get(vcpu->fd, group, attr, val); +} + +static inline void vcpu_device_attr_get(struct kvm_vcpu *vcpu, uint32_t group, + uint64_t attr, void *val) +{ + kvm_device_attr_get(vcpu->fd, group, attr, val); +} + +static inline int __vcpu_device_attr_set(struct kvm_vcpu *vcpu, uint32_t group, + uint64_t attr, void *val) +{ + return __kvm_device_attr_set(vcpu->fd, group, attr, val); +} + +static inline void vcpu_device_attr_set(struct kvm_vcpu *vcpu, uint32_t group, + uint64_t attr, void *val) +{ + kvm_device_attr_set(vcpu->fd, group, attr, val); +} + +int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type); +int __kvm_create_device(struct kvm_vm *vm, uint64_t type); + +static inline int kvm_create_device(struct kvm_vm *vm, uint64_t type) +{ + int fd = __kvm_create_device(vm, type); + + TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_DEVICE, fd)); + return fd; +} + +void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu); /* * VM VCPU Args Set * * Input Args: * vm - Virtual Machine - * vcpuid - VCPU ID * num - number of arguments * ... - arguments, each of type uint64_t * @@ -223,59 +610,16 @@ void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs); * * Return: None * - * Sets the first @num function input registers of the VCPU with @vcpuid, - * per the C calling convention of the architecture, to the values given - * as variable args. Each of the variable args is expected to be of type - * uint64_t. The maximum @num can be is specific to the architecture. + * Sets the first @num input parameters for the function at @vcpu's entry point, + * per the C calling convention of the architecture, to the values given as + * variable args. Each of the variable args is expected to be of type uint64_t. + * The maximum @num can be is specific to the architecture. */ -void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...); - -void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_sregs *sregs); -void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_sregs *sregs); -int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_sregs *sregs); -void vcpu_fpu_get(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_fpu *fpu); -void vcpu_fpu_set(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_fpu *fpu); -void vcpu_get_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg); -void vcpu_set_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg); -#ifdef __KVM_HAVE_VCPU_EVENTS -void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_vcpu_events *events); -void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_vcpu_events *events); -#endif -#ifdef __x86_64__ -void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_nested_state *state); -int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_nested_state *state, bool ignore_error); -#endif -void *vcpu_map_dirty_ring(struct kvm_vm *vm, uint32_t vcpuid); - -int _kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr); -int kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr); -int _kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test, int *fd); -int kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test); -int _kvm_device_access(int dev_fd, uint32_t group, uint64_t attr, - void *val, bool write); -int kvm_device_access(int dev_fd, uint32_t group, uint64_t attr, - void *val, bool write); +void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...); + void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level); int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level); -int _vcpu_has_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group, - uint64_t attr); -int vcpu_has_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group, - uint64_t attr); -int _vcpu_access_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group, - uint64_t attr, void *val, bool write); -int vcpu_access_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group, - uint64_t attr, void *val, bool write); - #define KVM_MAX_IRQ_ROUTES 4096 struct kvm_irq_routing *kvm_gsi_routing_create(void); @@ -286,26 +630,6 @@ void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing); const char *exit_reason_str(unsigned int exit_reason); -void virt_pgd_alloc(struct kvm_vm *vm); - -/* - * VM Virtual Page Map - * - * Input Args: - * vm - Virtual Machine - * vaddr - VM Virtual Address - * paddr - VM Physical Address - * memslot - Memory region slot for new virtual translation tables - * - * Output Args: None - * - * Return: None - * - * Within @vm, creates a virtual translation for the page starting - * at @vaddr to the page starting at @paddr. - */ -void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr); - vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min, uint32_t memslot); vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, @@ -313,55 +637,54 @@ vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num, vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm); /* - * Create a VM with reasonable defaults - * - * Input Args: - * vcpuid - The id of the single VCPU to add to the VM. - * extra_mem_pages - The number of extra pages to add (this will - * decide how much extra space we will need to - * setup the page tables using memslot 0) - * guest_code - The vCPU's entry point - * - * Output Args: None - * - * Return: - * Pointer to opaque structure that describes the created VM. + * ____vm_create() does KVM_CREATE_VM and little else. __vm_create() also + * loads the test binary into guest memory and creates an IRQ chip (x86 only). + * __vm_create() does NOT create vCPUs, @nr_runnable_vcpus is used purely to + * calculate the amount of memory needed for per-vCPU data, e.g. stacks. */ -struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, - void *guest_code); +struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages); +struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus, + uint64_t nr_extra_pages); + +static inline struct kvm_vm *vm_create_barebones(void) +{ + return ____vm_create(VM_MODE_DEFAULT, 0); +} -/* Same as vm_create_default, but can be used for more than one vcpu */ -struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_mem_pages, - uint32_t num_percpu_pages, void *guest_code, - uint32_t vcpuids[]); +static inline struct kvm_vm *vm_create(uint32_t nr_runnable_vcpus) +{ + return __vm_create(VM_MODE_DEFAULT, nr_runnable_vcpus, 0); +} -/* Like vm_create_default_with_vcpus, but accepts mode and slot0 memory as a parameter */ -struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, - uint64_t slot0_mem_pages, uint64_t extra_mem_pages, - uint32_t num_percpu_pages, void *guest_code, - uint32_t vcpuids[]); +struct kvm_vm *__vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, + uint64_t extra_mem_pages, + void *guest_code, struct kvm_vcpu *vcpus[]); -/* Create a default VM without any vcpus. */ -struct kvm_vm *vm_create_without_vcpus(enum vm_guest_mode mode, uint64_t pages); +static inline struct kvm_vm *vm_create_with_vcpus(uint32_t nr_vcpus, + void *guest_code, + struct kvm_vcpu *vcpus[]) +{ + return __vm_create_with_vcpus(VM_MODE_DEFAULT, nr_vcpus, 0, + guest_code, vcpus); +} /* - * Adds a vCPU with reasonable defaults (e.g. a stack) - * - * Input Args: - * vm - Virtual Machine - * vcpuid - The id of the VCPU to add to the VM. - * guest_code - The vCPU's entry point + * Create a VM with a single vCPU with reasonable defaults and @extra_mem_pages + * additional pages of guest memory. Returns the VM and vCPU (via out param). */ -void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code); +struct kvm_vm *__vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, + uint64_t extra_mem_pages, + void *guest_code); -bool vm_is_unrestricted_guest(struct kvm_vm *vm); +static inline struct kvm_vm *vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, + void *guest_code) +{ + return __vm_create_with_one_vcpu(vcpu, 0, guest_code); +} -unsigned int vm_get_page_size(struct kvm_vm *vm); -unsigned int vm_get_page_shift(struct kvm_vm *vm); -unsigned long vm_compute_max_gfn(struct kvm_vm *vm); -uint64_t vm_get_max_gfn(struct kvm_vm *vm); -int vm_get_fd(struct kvm_vm *vm); +struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm); +unsigned long vm_compute_max_gfn(struct kvm_vm *vm); unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size); unsigned int vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages); unsigned int vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages); @@ -381,11 +704,6 @@ struct kvm_userspace_memory_region * kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end); -struct kvm_dirty_log * -allocate_kvm_dirty_log(struct kvm_userspace_memory_region *region); - -int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd); - #define sync_global_to_guest(vm, g) ({ \ typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \ memcpy(_p, &(g), sizeof(g)); \ @@ -396,11 +714,124 @@ int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd); memcpy(&(g), _p, sizeof(g)); \ }) -void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid); +void assert_on_unhandled_exception(struct kvm_vcpu *vcpu); + +void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, + uint8_t indent); + +static inline void vcpu_dump(FILE *stream, struct kvm_vcpu *vcpu, + uint8_t indent) +{ + vcpu_arch_dump(stream, vcpu, indent); +} + +/* + * Adds a vCPU with reasonable defaults (e.g. a stack) + * + * Input Args: + * vm - Virtual Machine + * vcpu_id - The id of the VCPU to add to the VM. + * guest_code - The vCPU's entry point + */ +struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, + void *guest_code); + +static inline struct kvm_vcpu *vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, + void *guest_code) +{ + return vm_arch_vcpu_add(vm, vcpu_id, guest_code); +} + +/* Re-create a vCPU after restarting a VM, e.g. for state save/restore tests. */ +struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, uint32_t vcpu_id); + +static inline struct kvm_vcpu *vm_vcpu_recreate(struct kvm_vm *vm, + uint32_t vcpu_id) +{ + return vm_arch_vcpu_recreate(vm, vcpu_id); +} + +void vcpu_arch_free(struct kvm_vcpu *vcpu); + +void virt_arch_pgd_alloc(struct kvm_vm *vm); + +static inline void virt_pgd_alloc(struct kvm_vm *vm) +{ + virt_arch_pgd_alloc(vm); +} + +/* + * VM Virtual Page Map + * + * Input Args: + * vm - Virtual Machine + * vaddr - VM Virtual Address + * paddr - VM Physical Address + * memslot - Memory region slot for new virtual translation tables + * + * Output Args: None + * + * Return: None + * + * Within @vm, creates a virtual translation for the page starting + * at @vaddr to the page starting at @paddr. + */ +void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr); + +static inline void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) +{ + virt_arch_pg_map(vm, vaddr, paddr); +} + + +/* + * Address Guest Virtual to Guest Physical + * + * Input Args: + * vm - Virtual Machine + * gva - VM virtual address + * + * Output Args: None + * + * Return: + * Equivalent VM physical address + * + * Returns the VM physical address of the translated VM virtual + * address given by @gva. + */ +vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva); + +static inline vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +{ + return addr_arch_gva2gpa(vm, gva); +} + +/* + * Virtual Translation Tables Dump + * + * Input Args: + * stream - Output FILE stream + * vm - Virtual Machine + * indent - Left margin indent amount + * + * Output Args: None + * + * Return: None + * + * Dumps to the FILE stream given by @stream, the contents of all the + * virtual translation tables for the VM given by @vm. + */ +void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); + +static inline void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +{ + virt_arch_dump(stream, vm, indent); +} -int vm_get_stats_fd(struct kvm_vm *vm); -int vcpu_get_stats_fd(struct kvm_vm *vm, uint32_t vcpuid); -uint32_t guest_get_vcpuid(void); +static inline int __vm_disable_nx_huge_pages(struct kvm_vm *vm) +{ + return __vm_enable_cap(vm, KVM_CAP_VM_DISABLE_NX_HUGE_PAGES, 0); +} #endif /* SELFTEST_KVM_UTIL_BASE_H */ diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h index d822cb670f1c..eaa88df0555a 100644 --- a/tools/testing/selftests/kvm/include/perf_test_util.h +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -25,7 +25,8 @@ struct perf_test_vcpu_args { uint64_t pages; /* Only used by the host userspace part of the vCPU thread */ - int vcpu_id; + struct kvm_vcpu *vcpu; + int vcpu_idx; }; struct perf_test_args { @@ -44,7 +45,7 @@ struct perf_test_args { extern struct perf_test_args perf_test_args; -struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus, +struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, uint64_t vcpu_memory_bytes, int slots, enum vm_mem_backing_src_type backing_src, bool partition_vcpu_memory_access); @@ -57,6 +58,6 @@ void perf_test_join_vcpu_threads(int vcpus); void perf_test_guest_code(uint32_t vcpu_id); uint64_t perf_test_nested_pages(int nr_vcpus); -void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus); +void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[]); #endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/riscv/processor.h b/tools/testing/selftests/kvm/include/riscv/processor.h index 4fcfd1c0389d..d00d213c3805 100644 --- a/tools/testing/selftests/kvm/include/riscv/processor.h +++ b/tools/testing/selftests/kvm/include/riscv/processor.h @@ -38,26 +38,6 @@ static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t idx, KVM_REG_RISCV_TIMER_REG(name), \ KVM_REG_SIZE_U64) -static inline void get_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id, - unsigned long *addr) -{ - struct kvm_one_reg reg; - - reg.id = id; - reg.addr = (unsigned long)addr; - vcpu_get_reg(vm, vcpuid, ®); -} - -static inline void set_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id, - unsigned long val) -{ - struct kvm_one_reg reg; - - reg.id = id; - reg.addr = (unsigned long)&val; - vcpu_set_reg(vm, vcpuid, ®); -} - /* L3 index Bit[47:39] */ #define PGTBL_L3_INDEX_MASK 0x0000FF8000000000ULL #define PGTBL_L3_INDEX_SHIFT 39 diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index 99e0dcdc923f..5c5a88180b6c 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -34,6 +34,13 @@ static inline int _no_printf(const char *format, ...) { return 0; } #endif void print_skip(const char *fmt, ...) __attribute__((format(printf, 1, 2))); +#define __TEST_REQUIRE(f, fmt, ...) \ +do { \ + if (!(f)) \ + ksft_exit_skip("- " fmt "\n", ##__VA_ARGS__); \ +} while (0) + +#define TEST_REQUIRE(f) __TEST_REQUIRE(f, "Requirement not met: %s", #f) ssize_t test_write(int fd, const void *buf, size_t count); ssize_t test_read(int fd, void *buf, size_t count); diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h index 9eecc9d40b79..ee79d180e07e 100644 --- a/tools/testing/selftests/kvm/include/ucall_common.h +++ b/tools/testing/selftests/kvm/include/ucall_common.h @@ -6,6 +6,7 @@ */ #ifndef SELFTEST_KVM_UCALL_COMMON_H #define SELFTEST_KVM_UCALL_COMMON_H +#include "test_util.h" /* Common ucalls */ enum { @@ -16,7 +17,7 @@ enum { UCALL_UNHANDLED, }; -#define UCALL_MAX_ARGS 6 +#define UCALL_MAX_ARGS 7 struct ucall { uint64_t cmd; @@ -26,17 +27,26 @@ struct ucall { void ucall_init(struct kvm_vm *vm, void *arg); void ucall_uninit(struct kvm_vm *vm); void ucall(uint64_t cmd, int nargs, ...); -uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc); +uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc); #define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \ ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4) #define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage) #define GUEST_DONE() ucall(UCALL_DONE, 0) -#define __GUEST_ASSERT(_condition, _condstr, _nargs, _args...) do { \ - if (!(_condition)) \ - ucall(UCALL_ABORT, 2 + _nargs, \ - "Failed guest assert: " \ - _condstr, __LINE__, _args); \ + +enum guest_assert_builtin_args { + GUEST_ERROR_STRING, + GUEST_FILE, + GUEST_LINE, + GUEST_ASSERT_BUILTIN_NARGS +}; + +#define __GUEST_ASSERT(_condition, _condstr, _nargs, _args...) \ +do { \ + if (!(_condition)) \ + ucall(UCALL_ABORT, GUEST_ASSERT_BUILTIN_NARGS + _nargs, \ + "Failed guest assert: " _condstr, \ + __FILE__, __LINE__, ##_args); \ } while (0) #define GUEST_ASSERT(_condition) \ @@ -56,4 +66,45 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc); #define GUEST_ASSERT_EQ(a, b) __GUEST_ASSERT((a) == (b), #a " == " #b, 2, a, b) +#define __REPORT_GUEST_ASSERT(_ucall, fmt, _args...) \ + TEST_FAIL("%s at %s:%ld\n" fmt, \ + (const char *)(_ucall).args[GUEST_ERROR_STRING], \ + (const char *)(_ucall).args[GUEST_FILE], \ + (_ucall).args[GUEST_LINE], \ + ##_args) + +#define GUEST_ASSERT_ARG(ucall, i) ((ucall).args[GUEST_ASSERT_BUILTIN_NARGS + i]) + +#define REPORT_GUEST_ASSERT(ucall) \ + __REPORT_GUEST_ASSERT((ucall), "") + +#define REPORT_GUEST_ASSERT_1(ucall, fmt) \ + __REPORT_GUEST_ASSERT((ucall), \ + fmt, \ + GUEST_ASSERT_ARG((ucall), 0)) + +#define REPORT_GUEST_ASSERT_2(ucall, fmt) \ + __REPORT_GUEST_ASSERT((ucall), \ + fmt, \ + GUEST_ASSERT_ARG((ucall), 0), \ + GUEST_ASSERT_ARG((ucall), 1)) + +#define REPORT_GUEST_ASSERT_3(ucall, fmt) \ + __REPORT_GUEST_ASSERT((ucall), \ + fmt, \ + GUEST_ASSERT_ARG((ucall), 0), \ + GUEST_ASSERT_ARG((ucall), 1), \ + GUEST_ASSERT_ARG((ucall), 2)) + +#define REPORT_GUEST_ASSERT_4(ucall, fmt) \ + __REPORT_GUEST_ASSERT((ucall), \ + fmt, \ + GUEST_ASSERT_ARG((ucall), 0), \ + GUEST_ASSERT_ARG((ucall), 1), \ + GUEST_ASSERT_ARG((ucall), 2), \ + GUEST_ASSERT_ARG((ucall), 3)) + +#define REPORT_GUEST_ASSERT_N(ucall, fmt, args...) \ + __REPORT_GUEST_ASSERT((ucall), fmt, ##args) + #endif /* SELFTEST_KVM_UCALL_COMMON_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/apic.h b/tools/testing/selftests/kvm/include/x86_64/apic.h index ac88557dcc9a..bed316fdecd5 100644 --- a/tools/testing/selftests/kvm/include/x86_64/apic.h +++ b/tools/testing/selftests/kvm/include/x86_64/apic.h @@ -35,6 +35,7 @@ #define APIC_SPIV_APIC_ENABLED (1 << 8) #define APIC_IRR 0x200 #define APIC_ICR 0x300 +#define APIC_LVTCMCI 0x2f0 #define APIC_DEST_SELF 0x40000 #define APIC_DEST_ALLINC 0x80000 #define APIC_DEST_ALLBUT 0xC0000 diff --git a/tools/testing/selftests/kvm/include/x86_64/evmcs.h b/tools/testing/selftests/kvm/include/x86_64/evmcs.h index cc5d14a45702..3c9260f8e116 100644 --- a/tools/testing/selftests/kvm/include/x86_64/evmcs.h +++ b/tools/testing/selftests/kvm/include/x86_64/evmcs.h @@ -241,7 +241,7 @@ struct hv_enlightened_vmcs { extern struct hv_enlightened_vmcs *current_evmcs; extern struct hv_vp_assist_page *current_vp_assist; -int vcpu_enable_evmcs(struct kvm_vm *vm, int vcpu_id); +int vcpu_enable_evmcs(struct kvm_vcpu *vcpu); static inline int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist) { diff --git a/tools/testing/selftests/kvm/include/x86_64/mce.h b/tools/testing/selftests/kvm/include/x86_64/mce.h new file mode 100644 index 000000000000..6119321f3f5d --- /dev/null +++ b/tools/testing/selftests/kvm/include/x86_64/mce.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * tools/testing/selftests/kvm/include/x86_64/mce.h + * + * Copyright (C) 2022, Google LLC. + */ + +#ifndef SELFTEST_KVM_MCE_H +#define SELFTEST_KVM_MCE_H + +#define MCG_CTL_P BIT_ULL(8) /* MCG_CTL register available */ +#define MCG_SER_P BIT_ULL(24) /* MCA recovery/new status bits */ +#define MCG_LMCE_P BIT_ULL(27) /* Local machine check supported */ +#define MCG_CMCI_P BIT_ULL(10) /* CMCI supported */ +#define KVM_MAX_MCE_BANKS 32 +#define MCG_CAP_BANKS_MASK 0xff /* Bit 0-7 of the MCG_CAP register are #banks */ +#define MCI_STATUS_VAL (1ULL << 63) /* valid error */ +#define MCI_STATUS_UC (1ULL << 61) /* uncorrected error */ +#define MCI_STATUS_EN (1ULL << 60) /* error enabled */ +#define MCI_STATUS_MISCV (1ULL << 59) /* misc error reg. valid */ +#define MCI_STATUS_ADDRV (1ULL << 58) /* addr reg. valid */ +#define MCM_ADDR_PHYS 2 /* physical address */ +#define MCI_CTL2_CMCI_EN BIT_ULL(30) + +#endif /* SELFTEST_KVM_MCE_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 6ce185449259..45edf45821d0 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -15,8 +15,12 @@ #include <asm/msr-index.h> #include <asm/prctl.h> +#include <linux/stringify.h> + #include "../kvm_util.h" +#define NMI_VECTOR 0x02 + #define X86_EFLAGS_FIXED (1u << 1) #define X86_CR4_VME (1ul << 0) @@ -41,24 +45,122 @@ #define X86_CR4_SMAP (1ul << 21) #define X86_CR4_PKE (1ul << 22) -/* CPUID.1.ECX */ -#define CPUID_VMX (1ul << 5) -#define CPUID_SMX (1ul << 6) -#define CPUID_PCID (1ul << 17) -#define CPUID_XSAVE (1ul << 26) +/* Note, these are ordered alphabetically to match kvm_cpuid_entry2. Eww. */ +enum cpuid_output_regs { + KVM_CPUID_EAX, + KVM_CPUID_EBX, + KVM_CPUID_ECX, + KVM_CPUID_EDX +}; -/* CPUID.7.EBX */ -#define CPUID_FSGSBASE (1ul << 0) -#define CPUID_SMEP (1ul << 7) -#define CPUID_SMAP (1ul << 20) +/* + * Pack the information into a 64-bit value so that each X86_FEATURE_XXX can be + * passed by value with no overhead. + */ +struct kvm_x86_cpu_feature { + u32 function; + u16 index; + u8 reg; + u8 bit; +}; +#define KVM_X86_CPU_FEATURE(fn, idx, gpr, __bit) \ +({ \ + struct kvm_x86_cpu_feature feature = { \ + .function = fn, \ + .index = idx, \ + .reg = KVM_CPUID_##gpr, \ + .bit = __bit, \ + }; \ + \ + feature; \ +}) -/* CPUID.7.ECX */ -#define CPUID_UMIP (1ul << 2) -#define CPUID_PKU (1ul << 3) -#define CPUID_LA57 (1ul << 16) +/* + * Basic Leafs, a.k.a. Intel defined + */ +#define X86_FEATURE_MWAIT KVM_X86_CPU_FEATURE(0x1, 0, ECX, 3) +#define X86_FEATURE_VMX KVM_X86_CPU_FEATURE(0x1, 0, ECX, 5) +#define X86_FEATURE_SMX KVM_X86_CPU_FEATURE(0x1, 0, ECX, 6) +#define X86_FEATURE_PDCM KVM_X86_CPU_FEATURE(0x1, 0, ECX, 15) +#define X86_FEATURE_PCID KVM_X86_CPU_FEATURE(0x1, 0, ECX, 17) +#define X86_FEATURE_X2APIC KVM_X86_CPU_FEATURE(0x1, 0, ECX, 21) +#define X86_FEATURE_MOVBE KVM_X86_CPU_FEATURE(0x1, 0, ECX, 22) +#define X86_FEATURE_TSC_DEADLINE_TIMER KVM_X86_CPU_FEATURE(0x1, 0, ECX, 24) +#define X86_FEATURE_XSAVE KVM_X86_CPU_FEATURE(0x1, 0, ECX, 26) +#define X86_FEATURE_OSXSAVE KVM_X86_CPU_FEATURE(0x1, 0, ECX, 27) +#define X86_FEATURE_RDRAND KVM_X86_CPU_FEATURE(0x1, 0, ECX, 30) +#define X86_FEATURE_MCE KVM_X86_CPU_FEATURE(0x1, 0, EDX, 7) +#define X86_FEATURE_APIC KVM_X86_CPU_FEATURE(0x1, 0, EDX, 9) +#define X86_FEATURE_CLFLUSH KVM_X86_CPU_FEATURE(0x1, 0, EDX, 19) +#define X86_FEATURE_XMM KVM_X86_CPU_FEATURE(0x1, 0, EDX, 25) +#define X86_FEATURE_XMM2 KVM_X86_CPU_FEATURE(0x1, 0, EDX, 26) +#define X86_FEATURE_FSGSBASE KVM_X86_CPU_FEATURE(0x7, 0, EBX, 0) +#define X86_FEATURE_TSC_ADJUST KVM_X86_CPU_FEATURE(0x7, 0, EBX, 1) +#define X86_FEATURE_HLE KVM_X86_CPU_FEATURE(0x7, 0, EBX, 4) +#define X86_FEATURE_SMEP KVM_X86_CPU_FEATURE(0x7, 0, EBX, 7) +#define X86_FEATURE_INVPCID KVM_X86_CPU_FEATURE(0x7, 0, EBX, 10) +#define X86_FEATURE_RTM KVM_X86_CPU_FEATURE(0x7, 0, EBX, 11) +#define X86_FEATURE_MPX KVM_X86_CPU_FEATURE(0x7, 0, EBX, 14) +#define X86_FEATURE_SMAP KVM_X86_CPU_FEATURE(0x7, 0, EBX, 20) +#define X86_FEATURE_PCOMMIT KVM_X86_CPU_FEATURE(0x7, 0, EBX, 22) +#define X86_FEATURE_CLFLUSHOPT KVM_X86_CPU_FEATURE(0x7, 0, EBX, 23) +#define X86_FEATURE_CLWB KVM_X86_CPU_FEATURE(0x7, 0, EBX, 24) +#define X86_FEATURE_UMIP KVM_X86_CPU_FEATURE(0x7, 0, ECX, 2) +#define X86_FEATURE_PKU KVM_X86_CPU_FEATURE(0x7, 0, ECX, 3) +#define X86_FEATURE_LA57 KVM_X86_CPU_FEATURE(0x7, 0, ECX, 16) +#define X86_FEATURE_RDPID KVM_X86_CPU_FEATURE(0x7, 0, ECX, 22) +#define X86_FEATURE_SHSTK KVM_X86_CPU_FEATURE(0x7, 0, ECX, 7) +#define X86_FEATURE_IBT KVM_X86_CPU_FEATURE(0x7, 0, EDX, 20) +#define X86_FEATURE_AMX_TILE KVM_X86_CPU_FEATURE(0x7, 0, EDX, 24) +#define X86_FEATURE_SPEC_CTRL KVM_X86_CPU_FEATURE(0x7, 0, EDX, 26) +#define X86_FEATURE_ARCH_CAPABILITIES KVM_X86_CPU_FEATURE(0x7, 0, EDX, 29) +#define X86_FEATURE_PKS KVM_X86_CPU_FEATURE(0x7, 0, ECX, 31) +#define X86_FEATURE_XTILECFG KVM_X86_CPU_FEATURE(0xD, 0, EAX, 17) +#define X86_FEATURE_XTILEDATA KVM_X86_CPU_FEATURE(0xD, 0, EAX, 18) +#define X86_FEATURE_XSAVES KVM_X86_CPU_FEATURE(0xD, 1, EAX, 3) +#define X86_FEATURE_XFD KVM_X86_CPU_FEATURE(0xD, 1, EAX, 4) -/* CPUID.0x8000_0001.EDX */ -#define CPUID_GBPAGES (1ul << 26) +/* + * Extended Leafs, a.k.a. AMD defined + */ +#define X86_FEATURE_SVM KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 2) +#define X86_FEATURE_NX KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 20) +#define X86_FEATURE_GBPAGES KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 26) +#define X86_FEATURE_RDTSCP KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 27) +#define X86_FEATURE_LM KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 29) +#define X86_FEATURE_RDPRU KVM_X86_CPU_FEATURE(0x80000008, 0, EBX, 4) +#define X86_FEATURE_AMD_IBPB KVM_X86_CPU_FEATURE(0x80000008, 0, EBX, 12) +#define X86_FEATURE_NPT KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 0) +#define X86_FEATURE_LBRV KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 1) +#define X86_FEATURE_NRIPS KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 3) +#define X86_FEATURE_TSCRATEMSR KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 4) +#define X86_FEATURE_PAUSEFILTER KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 10) +#define X86_FEATURE_PFTHRESHOLD KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 12) +#define X86_FEATURE_VGIF KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 16) +#define X86_FEATURE_SEV KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 1) +#define X86_FEATURE_SEV_ES KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 3) + +/* + * KVM defined paravirt features. + */ +#define X86_FEATURE_KVM_CLOCKSOURCE KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 0) +#define X86_FEATURE_KVM_NOP_IO_DELAY KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 1) +#define X86_FEATURE_KVM_MMU_OP KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 2) +#define X86_FEATURE_KVM_CLOCKSOURCE2 KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 3) +#define X86_FEATURE_KVM_ASYNC_PF KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 4) +#define X86_FEATURE_KVM_STEAL_TIME KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 5) +#define X86_FEATURE_KVM_PV_EOI KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 6) +#define X86_FEATURE_KVM_PV_UNHALT KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 7) +/* Bit 8 apparently isn't used?!?! */ +#define X86_FEATURE_KVM_PV_TLB_FLUSH KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 9) +#define X86_FEATURE_KVM_ASYNC_PF_VMEXIT KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 10) +#define X86_FEATURE_KVM_PV_SEND_IPI KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 11) +#define X86_FEATURE_KVM_POLL_CONTROL KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 12) +#define X86_FEATURE_KVM_PV_SCHED_YIELD KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 13) +#define X86_FEATURE_KVM_ASYNC_PF_INT KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 14) +#define X86_FEATURE_KVM_MSI_EXT_DEST_ID KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 15) +#define X86_FEATURE_KVM_HC_MAP_GPA_RANGE KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 16) +#define X86_FEATURE_KVM_MIGRATION_CONTROL KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 17) /* Page table bitfield declarations */ #define PTE_PRESENT_MASK BIT_ULL(0) @@ -300,10 +402,13 @@ static inline void outl(uint16_t port, uint32_t value) __asm__ __volatile__("outl %%eax, %%dx" : : "d"(port), "a"(value)); } -static inline void cpuid(uint32_t *eax, uint32_t *ebx, - uint32_t *ecx, uint32_t *edx) +static inline void __cpuid(uint32_t function, uint32_t index, + uint32_t *eax, uint32_t *ebx, + uint32_t *ecx, uint32_t *edx) { - /* ecx is often an input as well as an output. */ + *eax = function; + *ecx = index; + asm volatile("cpuid" : "=a" (*eax), "=b" (*ebx), @@ -313,6 +418,24 @@ static inline void cpuid(uint32_t *eax, uint32_t *ebx, : "memory"); } +static inline void cpuid(uint32_t function, + uint32_t *eax, uint32_t *ebx, + uint32_t *ecx, uint32_t *edx) +{ + return __cpuid(function, 0, eax, ebx, ecx, edx); +} + +static inline bool this_cpu_has(struct kvm_x86_cpu_feature feature) +{ + uint32_t gprs[4]; + + __cpuid(feature.function, feature.index, + &gprs[KVM_CPUID_EAX], &gprs[KVM_CPUID_EBX], + &gprs[KVM_CPUID_ECX], &gprs[KVM_CPUID_EDX]); + + return gprs[feature.reg] & BIT(feature.bit); +} + #define SET_XMM(__var, __xmm) \ asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm) @@ -385,6 +508,21 @@ static inline void cpu_relax(void) asm volatile("rep; nop" ::: "memory"); } +#define vmmcall() \ + __asm__ __volatile__( \ + "vmmcall\n" \ + ) + +#define ud2() \ + __asm__ __volatile__( \ + "ud2\n" \ + ) + +#define hlt() \ + __asm__ __volatile__( \ + "hlt\n" \ + ) + bool is_intel_cpu(void); bool is_amd_cpu(void); @@ -405,39 +543,198 @@ static inline unsigned int x86_model(unsigned int eax) return ((eax >> 12) & 0xf0) | ((eax >> 4) & 0x0f); } -struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid); -void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_x86_state *state); +struct kvm_x86_state *vcpu_save_state(struct kvm_vcpu *vcpu); +void vcpu_load_state(struct kvm_vcpu *vcpu, struct kvm_x86_state *state); void kvm_x86_state_cleanup(struct kvm_x86_state *state); -struct kvm_msr_list *kvm_get_msr_index_list(void); +const struct kvm_msr_list *kvm_get_msr_index_list(void); +const struct kvm_msr_list *kvm_get_feature_msr_index_list(void); +bool kvm_msr_is_in_save_restore_list(uint32_t msr_index); uint64_t kvm_get_feature_msr(uint64_t msr_index); -struct kvm_cpuid2 *kvm_get_supported_cpuid(void); -struct kvm_cpuid2 *vcpu_get_cpuid(struct kvm_vm *vm, uint32_t vcpuid); -int __vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_cpuid2 *cpuid); -void vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_cpuid2 *cpuid); +static inline void vcpu_msrs_get(struct kvm_vcpu *vcpu, + struct kvm_msrs *msrs) +{ + int r = __vcpu_ioctl(vcpu, KVM_GET_MSRS, msrs); + + TEST_ASSERT(r == msrs->nmsrs, + "KVM_GET_MSRS failed, r: %i (failed on MSR %x)", + r, r < 0 || r >= msrs->nmsrs ? -1 : msrs->entries[r].index); +} +static inline void vcpu_msrs_set(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs) +{ + int r = __vcpu_ioctl(vcpu, KVM_SET_MSRS, msrs); + + TEST_ASSERT(r == msrs->nmsrs, + "KVM_GET_MSRS failed, r: %i (failed on MSR %x)", + r, r < 0 || r >= msrs->nmsrs ? -1 : msrs->entries[r].index); +} +static inline void vcpu_debugregs_get(struct kvm_vcpu *vcpu, + struct kvm_debugregs *debugregs) +{ + vcpu_ioctl(vcpu, KVM_GET_DEBUGREGS, debugregs); +} +static inline void vcpu_debugregs_set(struct kvm_vcpu *vcpu, + struct kvm_debugregs *debugregs) +{ + vcpu_ioctl(vcpu, KVM_SET_DEBUGREGS, debugregs); +} +static inline void vcpu_xsave_get(struct kvm_vcpu *vcpu, + struct kvm_xsave *xsave) +{ + vcpu_ioctl(vcpu, KVM_GET_XSAVE, xsave); +} +static inline void vcpu_xsave2_get(struct kvm_vcpu *vcpu, + struct kvm_xsave *xsave) +{ + vcpu_ioctl(vcpu, KVM_GET_XSAVE2, xsave); +} +static inline void vcpu_xsave_set(struct kvm_vcpu *vcpu, + struct kvm_xsave *xsave) +{ + vcpu_ioctl(vcpu, KVM_SET_XSAVE, xsave); +} +static inline void vcpu_xcrs_get(struct kvm_vcpu *vcpu, + struct kvm_xcrs *xcrs) +{ + vcpu_ioctl(vcpu, KVM_GET_XCRS, xcrs); +} +static inline void vcpu_xcrs_set(struct kvm_vcpu *vcpu, struct kvm_xcrs *xcrs) +{ + vcpu_ioctl(vcpu, KVM_SET_XCRS, xcrs); +} + +const struct kvm_cpuid2 *kvm_get_supported_cpuid(void); +const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void); +const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu); -struct kvm_cpuid_entry2 * -kvm_get_supported_cpuid_index(uint32_t function, uint32_t index); +bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid, + struct kvm_x86_cpu_feature feature); -static inline struct kvm_cpuid_entry2 * -kvm_get_supported_cpuid_entry(uint32_t function) +static inline bool kvm_cpu_has(struct kvm_x86_cpu_feature feature) { - return kvm_get_supported_cpuid_index(function, 0); + return kvm_cpuid_has(kvm_get_supported_cpuid(), feature); +} + +static inline size_t kvm_cpuid2_size(int nr_entries) +{ + return sizeof(struct kvm_cpuid2) + + sizeof(struct kvm_cpuid_entry2) * nr_entries; +} + +/* + * Allocate a "struct kvm_cpuid2* instance, with the 0-length arrary of + * entries sized to hold @nr_entries. The caller is responsible for freeing + * the struct. + */ +static inline struct kvm_cpuid2 *allocate_kvm_cpuid2(int nr_entries) +{ + struct kvm_cpuid2 *cpuid; + + cpuid = malloc(kvm_cpuid2_size(nr_entries)); + TEST_ASSERT(cpuid, "-ENOMEM when allocating kvm_cpuid2"); + + cpuid->nent = nr_entries; + + return cpuid; } -uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index); -int _vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, - uint64_t msr_value); -void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, - uint64_t msr_value); +const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid, + uint32_t function, uint32_t index); +void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid); +void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu); + +static inline struct kvm_cpuid_entry2 *__vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu, + uint32_t function, + uint32_t index) +{ + return (struct kvm_cpuid_entry2 *)get_cpuid_entry(vcpu->cpuid, + function, index); +} + +static inline struct kvm_cpuid_entry2 *vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu, + uint32_t function) +{ + return __vcpu_get_cpuid_entry(vcpu, function, 0); +} + +static inline int __vcpu_set_cpuid(struct kvm_vcpu *vcpu) +{ + int r; + + TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first"); + r = __vcpu_ioctl(vcpu, KVM_SET_CPUID2, vcpu->cpuid); + if (r) + return r; + + /* On success, refresh the cache to pick up adjustments made by KVM. */ + vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid); + return 0; +} + +static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu) +{ + TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first"); + vcpu_ioctl(vcpu, KVM_SET_CPUID2, vcpu->cpuid); + + /* Refresh the cache to pick up adjustments made by KVM. */ + vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid); +} + +void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, uint8_t maxphyaddr); + +void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function); +void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu, + struct kvm_x86_cpu_feature feature, + bool set); + +static inline void vcpu_set_cpuid_feature(struct kvm_vcpu *vcpu, + struct kvm_x86_cpu_feature feature) +{ + vcpu_set_or_clear_cpuid_feature(vcpu, feature, true); + +} + +static inline void vcpu_clear_cpuid_feature(struct kvm_vcpu *vcpu, + struct kvm_x86_cpu_feature feature) +{ + vcpu_set_or_clear_cpuid_feature(vcpu, feature, false); +} + +static inline const struct kvm_cpuid_entry2 *__kvm_get_supported_cpuid_entry(uint32_t function, + uint32_t index) +{ + return get_cpuid_entry(kvm_get_supported_cpuid(), function, index); +} + +static inline const struct kvm_cpuid_entry2 *kvm_get_supported_cpuid_entry(uint32_t function) +{ + return __kvm_get_supported_cpuid_entry(function, 0); +} + +uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index); +int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value); + +static inline void vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, + uint64_t msr_value) +{ + int r = _vcpu_set_msr(vcpu, msr_index, msr_value); + + TEST_ASSERT(r == 1, KVM_IOCTL_ERROR(KVM_SET_MSRS, r)); +} + +static inline uint32_t kvm_get_cpuid_max_basic(void) +{ + return kvm_get_supported_cpuid_entry(0)->eax; +} + +static inline uint32_t kvm_get_cpuid_max_extended(void) +{ + return kvm_get_supported_cpuid_entry(0x80000000)->eax; +} -uint32_t kvm_get_cpuid_max_basic(void); -uint32_t kvm_get_cpuid_max_extended(void); void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits); +bool vm_is_unrestricted_guest(struct kvm_vm *vm); struct ex_regs { uint64_t rax, rcx, rdx, rbx; @@ -452,35 +749,94 @@ struct ex_regs { }; void vm_init_descriptor_tables(struct kvm_vm *vm); -void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid); +void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu); void vm_install_exception_handler(struct kvm_vm *vm, int vector, void (*handler)(struct ex_regs *)); -uint64_t vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr); -void vm_set_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr, - uint64_t pte); +/* If a toddler were to say "abracadabra". */ +#define KVM_EXCEPTION_MAGIC 0xabacadabaull /* - * get_cpuid() - find matching CPUID entry and return pointer to it. - */ -struct kvm_cpuid_entry2 *get_cpuid(struct kvm_cpuid2 *cpuid, uint32_t function, - uint32_t index); -/* - * set_cpuid() - overwrites a matching cpuid entry with the provided value. - * matches based on ent->function && ent->index. returns true - * if a match was found and successfully overwritten. - * @cpuid: the kvm cpuid list to modify. - * @ent: cpuid entry to insert + * KVM selftest exception fixup uses registers to coordinate with the exception + * handler, versus the kernel's in-memory tables and KVM-Unit-Tests's in-memory + * per-CPU data. Using only registers avoids having to map memory into the + * guest, doesn't require a valid, stable GS.base, and reduces the risk of + * for recursive faults when accessing memory in the handler. The downside to + * using registers is that it restricts what registers can be used by the actual + * instruction. But, selftests are 64-bit only, making register* pressure a + * minor concern. Use r9-r11 as they are volatile, i.e. don't need* to be saved + * by the callee, and except for r11 are not implicit parameters to any + * instructions. Ideally, fixup would use r8-r10 and thus avoid implicit + * parameters entirely, but Hyper-V's hypercall ABI uses r8 and testing Hyper-V + * is higher priority than testing non-faulting SYSCALL/SYSRET. + * + * Note, the fixup handler deliberately does not handle #DE, i.e. the vector + * is guaranteed to be non-zero on fault. + * + * REGISTER INPUTS: + * r9 = MAGIC + * r10 = RIP + * r11 = new RIP on fault + * + * REGISTER OUTPUTS: + * r9 = exception vector (non-zero) */ -bool set_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 *ent); +#define KVM_ASM_SAFE(insn) \ + "mov $" __stringify(KVM_EXCEPTION_MAGIC) ", %%r9\n\t" \ + "lea 1f(%%rip), %%r10\n\t" \ + "lea 2f(%%rip), %%r11\n\t" \ + "1: " insn "\n\t" \ + "mov $0, %[vector]\n\t" \ + "jmp 3f\n\t" \ + "2:\n\t" \ + "mov %%r9b, %[vector]\n\t" \ + "3:\n\t" + +#define KVM_ASM_SAFE_OUTPUTS(v) [vector] "=qm"(v) +#define KVM_ASM_SAFE_CLOBBERS "r9", "r10", "r11" + +#define kvm_asm_safe(insn, inputs...) \ +({ \ + uint8_t vector; \ + \ + asm volatile(KVM_ASM_SAFE(insn) \ + : KVM_ASM_SAFE_OUTPUTS(vector) \ + : inputs \ + : KVM_ASM_SAFE_CLOBBERS); \ + vector; \ +}) + +static inline uint8_t rdmsr_safe(uint32_t msr, uint64_t *val) +{ + uint8_t vector; + uint32_t a, d; + + asm volatile(KVM_ASM_SAFE("rdmsr") + : "=a"(a), "=d"(d), KVM_ASM_SAFE_OUTPUTS(vector) + : "c"(msr) + : KVM_ASM_SAFE_CLOBBERS); + + *val = (uint64_t)a | ((uint64_t)d << 32); + return vector; +} + +static inline uint8_t wrmsr_safe(uint32_t msr, uint64_t val) +{ + return kvm_asm_safe("wrmsr", "a"(val & -1u), "d"(val >> 32), "c"(msr)); +} + +uint64_t vm_get_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu, + uint64_t vaddr); +void vm_set_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu, + uint64_t vaddr, uint64_t pte); uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t a3); -struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void); -void vcpu_set_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid); -struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid); -void vm_xsave_req_perm(int bit); +void __vm_xsave_require_permission(int bit, const char *name); + +#define vm_xsave_require_permission(perm) \ + __vm_xsave_require_permission(perm, #perm) enum pg_level { PG_LEVEL_NONE, diff --git a/tools/testing/selftests/kvm/include/x86_64/svm.h b/tools/testing/selftests/kvm/include/x86_64/svm.h index 2225e5077350..c8343ff84f7f 100644 --- a/tools/testing/selftests/kvm/include/x86_64/svm.h +++ b/tools/testing/selftests/kvm/include/x86_64/svm.h @@ -218,8 +218,6 @@ struct __attribute__ ((__packed__)) vmcb { struct vmcb_save_area save; }; -#define SVM_CPUID_FUNC 0x8000000a - #define SVM_VM_CR_SVM_DISABLE 4 #define SVM_SELECTOR_S_SHIFT 4 diff --git a/tools/testing/selftests/kvm/include/x86_64/svm_util.h b/tools/testing/selftests/kvm/include/x86_64/svm_util.h index a25aabd8f5e7..a339b537a575 100644 --- a/tools/testing/selftests/kvm/include/x86_64/svm_util.h +++ b/tools/testing/selftests/kvm/include/x86_64/svm_util.h @@ -13,9 +13,8 @@ #include "svm.h" #include "processor.h" -#define CPUID_SVM_BIT 2 -#define CPUID_SVM BIT_ULL(CPUID_SVM_BIT) - +#define SVM_EXIT_EXCP_BASE 0x040 +#define SVM_EXIT_HLT 0x078 #define SVM_EXIT_MSR 0x07c #define SVM_EXIT_VMMCALL 0x081 @@ -36,21 +35,19 @@ struct svm_test_data { uint64_t msr_gpa; }; +#define stgi() \ + __asm__ __volatile__( \ + "stgi\n" \ + ) + +#define clgi() \ + __asm__ __volatile__( \ + "clgi\n" \ + ) + struct svm_test_data *vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva); void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp); void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa); -bool nested_svm_supported(void); -void nested_svm_check_supported(void); - -static inline bool cpu_has_svm(void) -{ - u32 eax = 0x80000001, ecx; - - asm("cpuid" : - "=a" (eax), "=c" (ecx) : "0" (eax) : "ebx", "edx"); - - return ecx & CPUID_SVM; -} int open_sev_dev_path_or_exit(void); diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h index cc3604f8f1d3..99fa1410964c 100644 --- a/tools/testing/selftests/kvm/include/x86_64/vmx.h +++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h @@ -607,8 +607,6 @@ bool prepare_for_vmx_operation(struct vmx_pages *vmx); void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp); bool load_vmcs(struct vmx_pages *vmx); -bool nested_vmx_supported(void); -void nested_vmx_check_supported(void); bool ept_1g_pages_supported(void); void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm, diff --git a/tools/testing/selftests/kvm/kvm_binary_stats_test.c b/tools/testing/selftests/kvm/kvm_binary_stats_test.c index 17f65d514915..0b45ac593387 100644 --- a/tools/testing/selftests/kvm/kvm_binary_stats_test.c +++ b/tools/testing/selftests/kvm/kvm_binary_stats_test.c @@ -26,163 +26,167 @@ static void stats_test(int stats_fd) int i; size_t size_desc; size_t size_data = 0; - struct kvm_stats_header *header; + struct kvm_stats_header header; char *id; struct kvm_stats_desc *stats_desc; u64 *stats_data; struct kvm_stats_desc *pdesc; + u32 type, unit, base; /* Read kvm stats header */ - header = malloc(sizeof(*header)); - TEST_ASSERT(header, "Allocate memory for stats header"); + read_stats_header(stats_fd, &header); - ret = read(stats_fd, header, sizeof(*header)); - TEST_ASSERT(ret == sizeof(*header), "Read stats header"); - size_desc = sizeof(*stats_desc) + header->name_size; + size_desc = get_stats_descriptor_size(&header); /* Read kvm stats id string */ - id = malloc(header->name_size); + id = malloc(header.name_size); TEST_ASSERT(id, "Allocate memory for id string"); - ret = read(stats_fd, id, header->name_size); - TEST_ASSERT(ret == header->name_size, "Read id string"); + + ret = read(stats_fd, id, header.name_size); + TEST_ASSERT(ret == header.name_size, "Read id string"); /* Check id string, that should start with "kvm" */ - TEST_ASSERT(!strncmp(id, "kvm", 3) && strlen(id) < header->name_size, - "Invalid KVM stats type, id: %s", id); + TEST_ASSERT(!strncmp(id, "kvm", 3) && strlen(id) < header.name_size, + "Invalid KVM stats type, id: %s", id); /* Sanity check for other fields in header */ - if (header->num_desc == 0) { + if (header.num_desc == 0) { printf("No KVM stats defined!"); return; } - /* Check overlap */ - TEST_ASSERT(header->desc_offset > 0 && header->data_offset > 0 - && header->desc_offset >= sizeof(*header) - && header->data_offset >= sizeof(*header), - "Invalid offset fields in header"); - TEST_ASSERT(header->desc_offset > header->data_offset || - (header->desc_offset + size_desc * header->num_desc <= - header->data_offset), - "Descriptor block is overlapped with data block"); - - /* Allocate memory for stats descriptors */ - stats_desc = calloc(header->num_desc, size_desc); - TEST_ASSERT(stats_desc, "Allocate memory for stats descriptors"); + /* + * The descriptor and data offsets must be valid, they must not overlap + * the header, and the descriptor and data blocks must not overlap each + * other. Note, the data block is rechecked after its size is known. + */ + TEST_ASSERT(header.desc_offset && header.desc_offset >= sizeof(header) && + header.data_offset && header.data_offset >= sizeof(header), + "Invalid offset fields in header"); + + TEST_ASSERT(header.desc_offset > header.data_offset || + (header.desc_offset + size_desc * header.num_desc <= header.data_offset), + "Descriptor block is overlapped with data block"); + /* Read kvm stats descriptors */ - ret = pread(stats_fd, stats_desc, - size_desc * header->num_desc, header->desc_offset); - TEST_ASSERT(ret == size_desc * header->num_desc, - "Read KVM stats descriptors"); + stats_desc = read_stats_descriptors(stats_fd, &header); /* Sanity check for fields in descriptors */ - for (i = 0; i < header->num_desc; ++i) { - pdesc = (void *)stats_desc + i * size_desc; + for (i = 0; i < header.num_desc; ++i) { + pdesc = get_stats_descriptor(stats_desc, i, &header); + type = pdesc->flags & KVM_STATS_TYPE_MASK; + unit = pdesc->flags & KVM_STATS_UNIT_MASK; + base = pdesc->flags & KVM_STATS_BASE_MASK; + + /* Check name string */ + TEST_ASSERT(strlen(pdesc->name) < header.name_size, + "KVM stats name (index: %d) too long", i); + /* Check type,unit,base boundaries */ - TEST_ASSERT((pdesc->flags & KVM_STATS_TYPE_MASK) - <= KVM_STATS_TYPE_MAX, "Unknown KVM stats type"); - TEST_ASSERT((pdesc->flags & KVM_STATS_UNIT_MASK) - <= KVM_STATS_UNIT_MAX, "Unknown KVM stats unit"); - TEST_ASSERT((pdesc->flags & KVM_STATS_BASE_MASK) - <= KVM_STATS_BASE_MAX, "Unknown KVM stats base"); - /* Check exponent for stats unit + TEST_ASSERT(type <= KVM_STATS_TYPE_MAX, + "Unknown KVM stats (%s) type: %u", pdesc->name, type); + TEST_ASSERT(unit <= KVM_STATS_UNIT_MAX, + "Unknown KVM stats (%s) unit: %u", pdesc->name, unit); + TEST_ASSERT(base <= KVM_STATS_BASE_MAX, + "Unknown KVM stats (%s) base: %u", pdesc->name, base); + + /* + * Check exponent for stats unit * Exponent for counter should be greater than or equal to 0 * Exponent for unit bytes should be greater than or equal to 0 * Exponent for unit seconds should be less than or equal to 0 * Exponent for unit clock cycles should be greater than or * equal to 0 + * Exponent for unit boolean should be 0 */ switch (pdesc->flags & KVM_STATS_UNIT_MASK) { case KVM_STATS_UNIT_NONE: case KVM_STATS_UNIT_BYTES: case KVM_STATS_UNIT_CYCLES: TEST_ASSERT(pdesc->exponent >= 0, - "Unsupported KVM stats unit"); + "Unsupported KVM stats (%s) exponent: %i", + pdesc->name, pdesc->exponent); break; case KVM_STATS_UNIT_SECONDS: TEST_ASSERT(pdesc->exponent <= 0, - "Unsupported KVM stats unit"); + "Unsupported KVM stats (%s) exponent: %i", + pdesc->name, pdesc->exponent); + break; + case KVM_STATS_UNIT_BOOLEAN: + TEST_ASSERT(pdesc->exponent == 0, + "Unsupported KVM stats (%s) exponent: %d", + pdesc->name, pdesc->exponent); break; } - /* Check name string */ - TEST_ASSERT(strlen(pdesc->name) < header->name_size, - "KVM stats name(%s) too long", pdesc->name); + /* Check size field, which should not be zero */ - TEST_ASSERT(pdesc->size, "KVM descriptor(%s) with size of 0", - pdesc->name); + TEST_ASSERT(pdesc->size, + "KVM descriptor(%s) with size of 0", pdesc->name); /* Check bucket_size field */ switch (pdesc->flags & KVM_STATS_TYPE_MASK) { case KVM_STATS_TYPE_LINEAR_HIST: TEST_ASSERT(pdesc->bucket_size, - "Bucket size of Linear Histogram stats (%s) is zero", - pdesc->name); + "Bucket size of Linear Histogram stats (%s) is zero", + pdesc->name); break; default: TEST_ASSERT(!pdesc->bucket_size, - "Bucket size of stats (%s) is not zero", - pdesc->name); + "Bucket size of stats (%s) is not zero", + pdesc->name); } size_data += pdesc->size * sizeof(*stats_data); } - /* Check overlap */ - TEST_ASSERT(header->data_offset >= header->desc_offset - || header->data_offset + size_data <= header->desc_offset, - "Data block is overlapped with Descriptor block"); + + /* + * Now that the size of the data block is known, verify the data block + * doesn't overlap the descriptor block. + */ + TEST_ASSERT(header.data_offset >= header.desc_offset || + header.data_offset + size_data <= header.desc_offset, + "Data block is overlapped with Descriptor block"); + /* Check validity of all stats data size */ - TEST_ASSERT(size_data >= header->num_desc * sizeof(*stats_data), - "Data size is not correct"); + TEST_ASSERT(size_data >= header.num_desc * sizeof(*stats_data), + "Data size is not correct"); + /* Check stats offset */ - for (i = 0; i < header->num_desc; ++i) { - pdesc = (void *)stats_desc + i * size_desc; + for (i = 0; i < header.num_desc; ++i) { + pdesc = get_stats_descriptor(stats_desc, i, &header); TEST_ASSERT(pdesc->offset < size_data, - "Invalid offset (%u) for stats: %s", - pdesc->offset, pdesc->name); + "Invalid offset (%u) for stats: %s", + pdesc->offset, pdesc->name); } /* Allocate memory for stats data */ stats_data = malloc(size_data); TEST_ASSERT(stats_data, "Allocate memory for stats data"); /* Read kvm stats data as a bulk */ - ret = pread(stats_fd, stats_data, size_data, header->data_offset); + ret = pread(stats_fd, stats_data, size_data, header.data_offset); TEST_ASSERT(ret == size_data, "Read KVM stats data"); /* Read kvm stats data one by one */ - size_data = 0; - for (i = 0; i < header->num_desc; ++i) { - pdesc = (void *)stats_desc + i * size_desc; - ret = pread(stats_fd, stats_data, - pdesc->size * sizeof(*stats_data), - header->data_offset + size_data); - TEST_ASSERT(ret == pdesc->size * sizeof(*stats_data), - "Read data of KVM stats: %s", pdesc->name); - size_data += pdesc->size * sizeof(*stats_data); + for (i = 0; i < header.num_desc; ++i) { + pdesc = get_stats_descriptor(stats_desc, i, &header); + read_stat_data(stats_fd, &header, pdesc, stats_data, + pdesc->size); } free(stats_data); free(stats_desc); free(id); - free(header); } static void vm_stats_test(struct kvm_vm *vm) { - int stats_fd; - - /* Get fd for VM stats */ - stats_fd = vm_get_stats_fd(vm); - TEST_ASSERT(stats_fd >= 0, "Get VM stats fd"); + int stats_fd = vm_get_stats_fd(vm); stats_test(stats_fd); close(stats_fd); TEST_ASSERT(fcntl(stats_fd, F_GETFD) == -1, "Stats fd not freed"); } -static void vcpu_stats_test(struct kvm_vm *vm, int vcpu_id) +static void vcpu_stats_test(struct kvm_vcpu *vcpu) { - int stats_fd; - - /* Get fd for VCPU stats */ - stats_fd = vcpu_get_stats_fd(vm, vcpu_id); - TEST_ASSERT(stats_fd >= 0, "Get VCPU stats fd"); + int stats_fd = vcpu_get_stats_fd(vcpu); stats_test(stats_fd); close(stats_fd); @@ -203,6 +207,7 @@ static void vcpu_stats_test(struct kvm_vm *vm, int vcpu_id) int main(int argc, char *argv[]) { int i, j; + struct kvm_vcpu **vcpus; struct kvm_vm **vms; int max_vm = DEFAULT_NUM_VM; int max_vcpu = DEFAULT_NUM_VCPU; @@ -220,26 +225,26 @@ int main(int argc, char *argv[]) } /* Check the extension for binary stats */ - if (kvm_check_cap(KVM_CAP_BINARY_STATS_FD) <= 0) { - print_skip("Binary form statistics interface is not supported"); - exit(KSFT_SKIP); - } + TEST_REQUIRE(kvm_has_cap(KVM_CAP_BINARY_STATS_FD)); /* Create VMs and VCPUs */ vms = malloc(sizeof(vms[0]) * max_vm); TEST_ASSERT(vms, "Allocate memory for storing VM pointers"); + + vcpus = malloc(sizeof(struct kvm_vcpu *) * max_vm * max_vcpu); + TEST_ASSERT(vcpus, "Allocate memory for storing vCPU pointers"); + for (i = 0; i < max_vm; ++i) { - vms[i] = vm_create(VM_MODE_DEFAULT, - DEFAULT_GUEST_PHY_PAGES, O_RDWR); + vms[i] = vm_create_barebones(); for (j = 0; j < max_vcpu; ++j) - vm_vcpu_add(vms[i], j); + vcpus[i * max_vcpu + j] = __vm_vcpu_add(vms[i], j); } /* Check stats read for every VM and VCPU */ for (i = 0; i < max_vm; ++i) { vm_stats_test(vms[i]); for (j = 0; j < max_vcpu; ++j) - vcpu_stats_test(vms[i], j); + vcpu_stats_test(vcpus[i * max_vcpu + j]); } for (i = 0; i < max_vm; ++i) diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c index aed9dc3ca1e9..31b3cb24b9a7 100644 --- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c +++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c @@ -28,11 +28,11 @@ void test_vcpu_creation(int first_vcpu_id, int num_vcpus) pr_info("Testing creating %d vCPUs, with IDs %d...%d.\n", num_vcpus, first_vcpu_id, first_vcpu_id + num_vcpus - 1); - vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); + vm = vm_create_barebones(); for (i = first_vcpu_id; i < first_vcpu_id + num_vcpus; i++) /* This asserts that the vCPU was created. */ - vm_vcpu_add(vm, i); + __vm_vcpu_add(vm, i); kvm_vm_free(vm); } @@ -64,11 +64,9 @@ int main(int argc, char *argv[]) rl.rlim_max = nr_fds_wanted; int r = setrlimit(RLIMIT_NOFILE, &rl); - if (r < 0) { - printf("RLIMIT_NOFILE hard limit is too low (%d, wanted %d)\n", + __TEST_REQUIRE(r >= 0, + "RLIMIT_NOFILE hard limit is too low (%d, wanted %d)\n", old_rlim_max, nr_fds_wanted); - exit(KSFT_SKIP); - } } else { TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!"); } diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c index 2c4a7563a4f8..f42c6ac6d71d 100644 --- a/tools/testing/selftests/kvm/kvm_page_table_test.c +++ b/tools/testing/selftests/kvm/kvm_page_table_test.c @@ -46,11 +46,6 @@ static const char * const test_stage_string[] = { "KVM_ADJUST_MAPPINGS", }; -struct vcpu_args { - int vcpu_id; - bool vcpu_write; -}; - struct test_args { struct kvm_vm *vm; uint64_t guest_test_virt_mem; @@ -60,7 +55,7 @@ struct test_args { uint64_t large_num_pages; uint64_t host_pages_per_lpage; enum vm_mem_backing_src_type src_type; - struct vcpu_args vcpu_args[KVM_MAX_VCPUS]; + struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; }; /* @@ -92,17 +87,13 @@ static uint64_t guest_test_phys_mem; */ static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; -static void guest_code(int vcpu_id) +static void guest_code(bool do_write) { struct test_args *p = &test_args; - struct vcpu_args *vcpu_args = &p->vcpu_args[vcpu_id]; enum test_stage *current_stage = &guest_test_stage; uint64_t addr; int i, j; - /* Make sure vCPU args data structure is not corrupt */ - GUEST_ASSERT(vcpu_args->vcpu_id == vcpu_id); - while (true) { addr = p->guest_test_virt_mem; @@ -123,7 +114,7 @@ static void guest_code(int vcpu_id) */ case KVM_CREATE_MAPPINGS: for (i = 0; i < p->large_num_pages; i++) { - if (vcpu_args->vcpu_write) + if (do_write) *(uint64_t *)addr = 0x0123456789ABCDEF; else READ_ONCE(*(uint64_t *)addr); @@ -193,17 +184,14 @@ static void guest_code(int vcpu_id) static void *vcpu_worker(void *data) { - int ret; - struct vcpu_args *vcpu_args = data; - struct kvm_vm *vm = test_args.vm; - int vcpu_id = vcpu_args->vcpu_id; - struct kvm_run *run; + struct kvm_vcpu *vcpu = data; + bool do_write = !(vcpu->id % 2); struct timespec start; struct timespec ts_diff; enum test_stage stage; + int ret; - vcpu_args_set(vm, vcpu_id, 1, vcpu_id); - run = vcpu_state(vm, vcpu_id); + vcpu_args_set(vcpu, 1, do_write); while (!READ_ONCE(host_quit)) { ret = sem_wait(&test_stage_updated); @@ -213,15 +201,15 @@ static void *vcpu_worker(void *data) return NULL; clock_gettime(CLOCK_MONOTONIC_RAW, &start); - ret = _vcpu_run(vm, vcpu_id); + ret = _vcpu_run(vcpu); ts_diff = timespec_elapsed(start); TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); - TEST_ASSERT(get_ucall(vm, vcpu_id, NULL) == UCALL_SYNC, + TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC, "Invalid guest sync status: exit_reason=%s\n", - exit_reason_str(run->exit_reason)); + exit_reason_str(vcpu->run->exit_reason)); - pr_debug("Got sync event from vCPU %d\n", vcpu_id); + pr_debug("Got sync event from vCPU %d\n", vcpu->id); stage = READ_ONCE(*current_stage); /* @@ -230,7 +218,7 @@ static void *vcpu_worker(void *data) */ pr_debug("vCPU %d has completed stage %s\n" "execution time is: %ld.%.9lds\n\n", - vcpu_id, test_stage_string[stage], + vcpu->id, test_stage_string[stage], ts_diff.tv_sec, ts_diff.tv_nsec); ret = sem_post(&test_stage_completed); @@ -250,7 +238,6 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg) { int ret; struct test_params *p = arg; - struct vcpu_args *vcpu_args; enum vm_mem_backing_src_type src_type = p->src_type; uint64_t large_page_size = get_backing_src_pagesz(src_type); uint64_t guest_page_size = vm_guest_mode_params[mode].page_size; @@ -260,7 +247,6 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg) uint64_t alignment; void *host_test_mem; struct kvm_vm *vm; - int vcpu_id; /* Align up the test memory size */ alignment = max(large_page_size, guest_page_size); @@ -268,12 +254,12 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg) /* Create a VM with enough guest pages */ guest_num_pages = test_mem_size / guest_page_size; - vm = vm_create_with_vcpus(mode, nr_vcpus, DEFAULT_GUEST_PHY_PAGES, - guest_num_pages, 0, guest_code, NULL); + vm = __vm_create_with_vcpus(mode, nr_vcpus, guest_num_pages, + guest_code, test_args.vcpus); /* Align down GPA of the testing memslot */ if (!p->phys_offset) - guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * + guest_test_phys_mem = (vm->max_gfn - guest_num_pages) * guest_page_size; else guest_test_phys_mem = p->phys_offset; @@ -292,12 +278,6 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg) test_args.host_pages_per_lpage = large_page_size / host_page_size; test_args.src_type = src_type; - for (vcpu_id = 0; vcpu_id < KVM_MAX_VCPUS; vcpu_id++) { - vcpu_args = &test_args.vcpu_args[vcpu_id]; - vcpu_args->vcpu_id = vcpu_id; - vcpu_args->vcpu_write = !(vcpu_id % 2); - } - /* Add an extra memory slot with specified backing src type */ vm_userspace_mem_region_add(vm, src_type, guest_test_phys_mem, TEST_MEM_SLOT_INDEX, guest_num_pages, 0); @@ -363,12 +343,11 @@ static void vcpus_complete_new_stage(enum test_stage stage) static void run_test(enum vm_guest_mode mode, void *arg) { - int ret; pthread_t *vcpu_threads; struct kvm_vm *vm; - int vcpu_id; struct timespec start; struct timespec ts_diff; + int ret, i; /* Create VM with vCPUs and make some pre-initialization */ vm = pre_init_before_test(mode, arg); @@ -379,10 +358,9 @@ static void run_test(enum vm_guest_mode mode, void *arg) host_quit = false; *current_stage = KVM_BEFORE_MAPPINGS; - for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { - pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker, - &test_args.vcpu_args[vcpu_id]); - } + for (i = 0; i < nr_vcpus; i++) + pthread_create(&vcpu_threads[i], NULL, vcpu_worker, + test_args.vcpus[i]); vcpus_complete_new_stage(*current_stage); pr_info("Started all vCPUs successfully\n"); @@ -424,13 +402,13 @@ static void run_test(enum vm_guest_mode mode, void *arg) /* Tell the vcpu thread to quit */ host_quit = true; - for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { + for (i = 0; i < nr_vcpus; i++) { ret = sem_post(&test_stage_updated); TEST_ASSERT(ret == 0, "Error in sem_post"); } - for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) - pthread_join(vcpu_threads[vcpu_id], NULL); + for (i = 0; i < nr_vcpus; i++) + pthread_join(vcpu_threads[i], NULL); ret = sem_destroy(&test_stage_updated); TEST_ASSERT(ret == 0, "Error in sem_destroy"); diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 6a041289fa80..6f5551368944 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -10,7 +10,6 @@ #include "guest_modes.h" #include "kvm_util.h" -#include "../kvm_util_internal.h" #include "processor.h" #define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN 0xac0000 @@ -75,7 +74,7 @@ static uint64_t __maybe_unused ptrs_per_pte(struct kvm_vm *vm) return 1 << (vm->page_shift - 3); } -void virt_pgd_alloc(struct kvm_vm *vm) +void virt_arch_pgd_alloc(struct kvm_vm *vm) { if (!vm->pgd_created) { vm_paddr_t paddr = vm_phy_pages_alloc(vm, @@ -132,14 +131,14 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, *ptep |= (attr_idx << 2) | (1 << 10) /* Access Flag */; } -void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) +void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) { uint64_t attr_idx = 4; /* NORMAL (See DEFAULT_MAIR_EL1) */ _virt_pg_map(vm, vaddr, paddr, attr_idx); } -vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) { uint64_t *ptep; @@ -196,7 +195,7 @@ static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t p #endif } -void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) { int level = 4 - (vm->pgtable_levels - 1); uint64_t pgd, *ptep; @@ -213,9 +212,10 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) } } -void aarch64_vcpu_setup(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_init *init) +void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) { struct kvm_vcpu_init default_init = { .target = -1, }; + struct kvm_vm *vm = vcpu->vm; uint64_t sctlr_el1, tcr_el1; if (!init) @@ -227,16 +227,16 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_init init->target = preferred.target; } - vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_INIT, init); + vcpu_ioctl(vcpu, KVM_ARM_VCPU_INIT, init); /* * Enable FP/ASIMD to avoid trapping when accessing Q0-Q15 * registers, which the variable argument list macros do. */ - set_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_CPACR_EL1), 3 << 20); + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CPACR_EL1), 3 << 20); - get_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), &sctlr_el1); - get_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_TCR_EL1), &tcr_el1); + vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), &sctlr_el1); + vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), &tcr_el1); /* Configure base granule size */ switch (vm->mode) { @@ -297,46 +297,49 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_init tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12); tcr_el1 |= (64 - vm->va_bits) /* T0SZ */; - set_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1); - set_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1); - set_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_MAIR_EL1), DEFAULT_MAIR_EL1); - set_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_TTBR0_EL1), vm->pgd); - set_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_TPIDR_EL1), vcpuid); + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1); + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1); + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MAIR_EL1), DEFAULT_MAIR_EL1); + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TTBR0_EL1), vm->pgd); + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TPIDR_EL1), vcpu->id); } -void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent) +void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) { uint64_t pstate, pc; - get_reg(vm, vcpuid, ARM64_CORE_REG(regs.pstate), &pstate); - get_reg(vm, vcpuid, ARM64_CORE_REG(regs.pc), &pc); + vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pstate), &pstate); + vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc), &pc); fprintf(stream, "%*spstate: 0x%.16lx pc: 0x%.16lx\n", indent, "", pstate, pc); } -void aarch64_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_vcpu_init *init, void *guest_code) +struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, + struct kvm_vcpu_init *init, void *guest_code) { size_t stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size : vm->page_size; uint64_t stack_vaddr = vm_vaddr_alloc(vm, stack_size, DEFAULT_ARM64_GUEST_STACK_VADDR_MIN); + struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id); - vm_vcpu_add(vm, vcpuid); - aarch64_vcpu_setup(vm, vcpuid, init); + aarch64_vcpu_setup(vcpu, init); - set_reg(vm, vcpuid, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size); - set_reg(vm, vcpuid, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code); + vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size); + vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code); + + return vcpu; } -void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) +struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, + void *guest_code) { - aarch64_vcpu_add_default(vm, vcpuid, NULL, guest_code); + return aarch64_vcpu_add(vm, vcpu_id, NULL, guest_code); } -void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) +void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) { va_list ap; int i; @@ -347,8 +350,8 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) va_start(ap, num); for (i = 0; i < num; i++) { - set_reg(vm, vcpuid, ARM64_CORE_REG(regs.regs[i]), - va_arg(ap, uint64_t)); + vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.regs[i]), + va_arg(ap, uint64_t)); } va_end(ap); @@ -361,11 +364,11 @@ void kvm_exit_unexpected_exception(int vector, uint64_t ec, bool valid_ec) ; } -void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid) +void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) { struct ucall uc; - if (get_ucall(vm, vcpuid, &uc) != UCALL_UNHANDLED) + if (get_ucall(vcpu, &uc) != UCALL_UNHANDLED) return; if (uc.args[2]) /* valid_ec */ { @@ -383,11 +386,11 @@ struct handlers { handler_fn exception_handlers[VECTOR_NUM][ESR_EC_NUM]; }; -void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid) +void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu) { extern char vectors; - set_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_VBAR_EL1), (uint64_t)&vectors); + vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_VBAR_EL1), (uint64_t)&vectors); } void route_exception(struct ex_regs *regs, int vector) @@ -469,19 +472,19 @@ void aarch64_get_supported_page_sizes(uint32_t ipa, }; kvm_fd = open_kvm_dev_path_or_exit(); - vm_fd = ioctl(kvm_fd, KVM_CREATE_VM, ipa); - TEST_ASSERT(vm_fd >= 0, "Can't create VM"); + vm_fd = __kvm_ioctl(kvm_fd, KVM_CREATE_VM, (void *)(unsigned long)ipa); + TEST_ASSERT(vm_fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm_fd)); vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0); - TEST_ASSERT(vcpu_fd >= 0, "Can't create vcpu"); + TEST_ASSERT(vcpu_fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, vcpu_fd)); err = ioctl(vm_fd, KVM_ARM_PREFERRED_TARGET, &preferred_init); - TEST_ASSERT(err == 0, "Can't get target"); + TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_ARM_PREFERRED_TARGET, err)); err = ioctl(vcpu_fd, KVM_ARM_VCPU_INIT, &preferred_init); - TEST_ASSERT(err == 0, "Can't get init vcpu"); + TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_ARM_VCPU_INIT, err)); err = ioctl(vcpu_fd, KVM_GET_ONE_REG, ®); - TEST_ASSERT(err == 0, "Can't get MMFR0"); + TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_GET_ONE_REG, vcpu_fd)); *ps4k = ((val >> 28) & 0xf) != 0xf; *ps64k = ((val >> 24) & 0xf) == 0; diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c index e0b0164e9af8..ed237b744690 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c +++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c @@ -5,7 +5,6 @@ * Copyright (C) 2018, Red Hat, Inc. */ #include "kvm_util.h" -#include "../kvm_util_internal.h" static vm_vaddr_t *ucall_exit_mmio_addr; @@ -52,7 +51,7 @@ void ucall_init(struct kvm_vm *vm, void *arg) * lower and won't match physical addresses. */ bits = vm->va_bits - 1; - bits = vm->pa_bits < bits ? vm->pa_bits : bits; + bits = min(vm->pa_bits, bits); end = 1ul << bits; start = end * 5 / 8; step = end / 16; @@ -73,25 +72,24 @@ void ucall_uninit(struct kvm_vm *vm) void ucall(uint64_t cmd, int nargs, ...) { - struct ucall uc = { - .cmd = cmd, - }; + struct ucall uc = {}; va_list va; int i; - nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS; + WRITE_ONCE(uc.cmd, cmd); + nargs = min(nargs, UCALL_MAX_ARGS); va_start(va, nargs); for (i = 0; i < nargs; ++i) - uc.args[i] = va_arg(va, uint64_t); + WRITE_ONCE(uc.args[i], va_arg(va, uint64_t)); va_end(va); - *ucall_exit_mmio_addr = (vm_vaddr_t)&uc; + WRITE_ONCE(*ucall_exit_mmio_addr, (vm_vaddr_t)&uc); } -uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) +uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) { - struct kvm_run *run = vcpu_state(vm, vcpu_id); + struct kvm_run *run = vcpu->run; struct ucall ucall = {}; if (uc) @@ -104,9 +102,9 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) TEST_ASSERT(run->mmio.is_write && run->mmio.len == 8, "Unexpected ucall exit mmio address access"); memcpy(&gva, run->mmio.data, sizeof(gva)); - memcpy(&ucall, addr_gva2hva(vm, gva), sizeof(ucall)); + memcpy(&ucall, addr_gva2hva(vcpu->vm, gva), sizeof(ucall)); - vcpu_run_complete_io(vm, vcpu_id); + vcpu_run_complete_io(vcpu); if (uc) memcpy(uc, &ucall, sizeof(ucall)); } diff --git a/tools/testing/selftests/kvm/lib/aarch64/vgic.c b/tools/testing/selftests/kvm/lib/aarch64/vgic.c index 5d45046c1b80..b5f28d21a947 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/vgic.c +++ b/tools/testing/selftests/kvm/lib/aarch64/vgic.c @@ -9,7 +9,6 @@ #include <asm/kvm.h> #include "kvm_util.h" -#include "../kvm_util_internal.h" #include "vgic.h" #include "gic.h" #include "gic_v3.h" @@ -52,31 +51,30 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs, nr_vcpus, nr_vcpus_created); /* Distributor setup */ - if (_kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3, - false, &gic_fd) != 0) - return -1; + gic_fd = __kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3); + if (gic_fd < 0) + return gic_fd; - kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_NR_IRQS, - 0, &nr_irqs, true); + kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_NR_IRQS, 0, &nr_irqs); - kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, - KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true); + kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); - kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_DIST, &gicd_base_gpa, true); + kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_DIST, &gicd_base_gpa); nr_gic_pages = vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_DIST_SIZE); virt_map(vm, gicd_base_gpa, gicd_base_gpa, nr_gic_pages); /* Redistributor setup */ redist_attr = REDIST_REGION_ATTR_ADDR(nr_vcpus, gicr_base_gpa, 0, 0); - kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, - KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &redist_attr, true); + kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, + KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &redist_attr); nr_gic_pages = vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_REDIST_SIZE * nr_vcpus); virt_map(vm, gicr_base_gpa, gicr_base_gpa, nr_gic_pages); - kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, - KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true); + kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); return gic_fd; } @@ -89,14 +87,14 @@ int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level) uint64_t val; int ret; - ret = _kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO, - attr, &val, false); + ret = __kvm_device_attr_get(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO, + attr, &val); if (ret != 0) return ret; val |= 1U << index; - ret = _kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO, - attr, &val, true); + ret = __kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO, + attr, &val); return ret; } @@ -104,8 +102,7 @@ void kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level) { int ret = _kvm_irq_set_level_info(gic_fd, intid, level); - TEST_ASSERT(ret == 0, "KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO failed, " - "rc: %i errno: %i", ret, errno); + TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO, ret)); } int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level) @@ -127,12 +124,11 @@ void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level) { int ret = _kvm_arm_irq_line(vm, intid, level); - TEST_ASSERT(ret == 0, "KVM_IRQ_LINE failed, rc: %i errno: %i", - ret, errno); + TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_IRQ_LINE, ret)); } -static void vgic_poke_irq(int gic_fd, uint32_t intid, - uint32_t vcpu, uint64_t reg_off) +static void vgic_poke_irq(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu, + uint64_t reg_off) { uint64_t reg = intid / 32; uint64_t index = intid % 32; @@ -145,7 +141,7 @@ static void vgic_poke_irq(int gic_fd, uint32_t intid, if (intid_is_private) { /* TODO: only vcpu 0 implemented for now. */ - assert(vcpu == 0); + assert(vcpu->id == 0); attr += SZ_64K; } @@ -158,17 +154,17 @@ static void vgic_poke_irq(int gic_fd, uint32_t intid, * intid will just make the read/writes point to above the intended * register space (i.e., ICPENDR after ISPENDR). */ - kvm_device_access(gic_fd, group, attr, &val, false); + kvm_device_attr_get(gic_fd, group, attr, &val); val |= 1ULL << index; - kvm_device_access(gic_fd, group, attr, &val, true); + kvm_device_attr_set(gic_fd, group, attr, &val); } -void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, uint32_t vcpu) +void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu) { vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISPENDR); } -void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, uint32_t vcpu) +void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu) { vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISACTIVER); } diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c index 13e8e3dcf984..9f54c098d9d0 100644 --- a/tools/testing/selftests/kvm/lib/elf.c +++ b/tools/testing/selftests/kvm/lib/elf.c @@ -11,7 +11,6 @@ #include <linux/elf.h> #include "kvm_util.h" -#include "kvm_util_internal.h" static void elfhdr_get(const char *filename, Elf64_Ehdr *hdrp) { diff --git a/tools/testing/selftests/kvm/lib/guest_modes.c b/tools/testing/selftests/kvm/lib/guest_modes.c index 8784013b747c..99a575bbbc52 100644 --- a/tools/testing/selftests/kvm/lib/guest_modes.c +++ b/tools/testing/selftests/kvm/lib/guest_modes.c @@ -65,9 +65,9 @@ void guest_modes_append_default(void) struct kvm_s390_vm_cpu_processor info; kvm_fd = open_kvm_dev_path_or_exit(); - vm_fd = ioctl(kvm_fd, KVM_CREATE_VM, 0); - kvm_device_access(vm_fd, KVM_S390_VM_CPU_MODEL, - KVM_S390_VM_CPU_PROCESSOR, &info, false); + vm_fd = __kvm_ioctl(kvm_fd, KVM_CREATE_VM, NULL); + kvm_device_attr_get(vm_fd, KVM_S390_VM_CPU_MODEL, + KVM_S390_VM_CPU_PROCESSOR, &info); close(vm_fd); close(kvm_fd); /* Starting with z13 we have 47bits of physical address */ diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 1665a220abcb..9889fe0d8919 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -8,7 +8,6 @@ #define _GNU_SOURCE /* for program_invocation_name */ #include "test_util.h" #include "kvm_util.h" -#include "kvm_util_internal.h" #include "processor.h" #include <assert.h> @@ -27,10 +26,7 @@ int open_path_or_exit(const char *path, int flags) int fd; fd = open(path, flags); - if (fd < 0) { - print_skip("%s not available (errno: %d)", path, errno); - exit(KSFT_SKIP); - } + __TEST_REQUIRE(fd >= 0, "%s not available (errno: %d)", path, errno); return fd; } @@ -70,121 +66,34 @@ int open_kvm_dev_path_or_exit(void) * Looks up and returns the value corresponding to the capability * (KVM_CAP_*) given by cap. */ -int kvm_check_cap(long cap) +unsigned int kvm_check_cap(long cap) { int ret; int kvm_fd; kvm_fd = open_kvm_dev_path_or_exit(); - ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap); - TEST_ASSERT(ret >= 0, "KVM_CHECK_EXTENSION IOCTL failed,\n" - " rc: %i errno: %i", ret, errno); + ret = __kvm_ioctl(kvm_fd, KVM_CHECK_EXTENSION, (void *)cap); + TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_CHECK_EXTENSION, ret)); close(kvm_fd); - return ret; -} - -/* VM Check Capability - * - * Input Args: - * vm - Virtual Machine - * cap - Capability - * - * Output Args: None - * - * Return: - * On success, the Value corresponding to the capability (KVM_CAP_*) - * specified by the value of cap. On failure a TEST_ASSERT failure - * is produced. - * - * Looks up and returns the value corresponding to the capability - * (KVM_CAP_*) given by cap. - */ -int vm_check_cap(struct kvm_vm *vm, long cap) -{ - int ret; - - ret = ioctl(vm->fd, KVM_CHECK_EXTENSION, cap); - TEST_ASSERT(ret >= 0, "KVM_CHECK_EXTENSION VM IOCTL failed,\n" - " rc: %i errno: %i", ret, errno); - - return ret; -} - -/* VM Enable Capability - * - * Input Args: - * vm - Virtual Machine - * cap - Capability - * - * Output Args: None - * - * Return: On success, 0. On failure a TEST_ASSERT failure is produced. - * - * Enables a capability (KVM_CAP_*) on the VM. - */ -int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap) -{ - int ret; - - ret = ioctl(vm->fd, KVM_ENABLE_CAP, cap); - TEST_ASSERT(ret == 0, "KVM_ENABLE_CAP IOCTL failed,\n" - " rc: %i errno: %i", ret, errno); - - return ret; -} - -/* VCPU Enable Capability - * - * Input Args: - * vm - Virtual Machine - * vcpu_id - VCPU - * cap - Capability - * - * Output Args: None - * - * Return: On success, 0. On failure a TEST_ASSERT failure is produced. - * - * Enables a capability (KVM_CAP_*) on the VCPU. - */ -int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id, - struct kvm_enable_cap *cap) -{ - struct vcpu *vcpu = vcpu_find(vm, vcpu_id); - int r; - - TEST_ASSERT(vcpu, "cannot find vcpu %d", vcpu_id); - - r = ioctl(vcpu->fd, KVM_ENABLE_CAP, cap); - TEST_ASSERT(!r, "KVM_ENABLE_CAP vCPU ioctl failed,\n" - " rc: %i, errno: %i", r, errno); - - return r; + return (unsigned int)ret; } void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size) { - struct kvm_enable_cap cap = { 0 }; - - cap.cap = KVM_CAP_DIRTY_LOG_RING; - cap.args[0] = ring_size; - vm_enable_cap(vm, &cap); + vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING, ring_size); vm->dirty_ring_size = ring_size; } -static void vm_open(struct kvm_vm *vm, int perm) +static void vm_open(struct kvm_vm *vm) { - vm->kvm_fd = _open_kvm_dev_path_or_exit(perm); + vm->kvm_fd = _open_kvm_dev_path_or_exit(O_RDWR); - if (!kvm_check_cap(KVM_CAP_IMMEDIATE_EXIT)) { - print_skip("immediate_exit not available"); - exit(KSFT_SKIP); - } + TEST_REQUIRE(kvm_has_cap(KVM_CAP_IMMEDIATE_EXIT)); - vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, vm->type); - TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, " - "rc: %i errno: %i", vm->fd, errno); + vm->fd = __kvm_ioctl(vm->kvm_fd, KVM_CREATE_VM, (void *)vm->type); + TEST_ASSERT(vm->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm->fd)); } const char *vm_guest_mode_string(uint32_t i) @@ -234,31 +143,12 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = { _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES, "Missing new mode params?"); -/* - * VM Create - * - * Input Args: - * mode - VM Mode (e.g. VM_MODE_P52V48_4K) - * phy_pages - Physical memory pages - * perm - permission - * - * Output Args: None - * - * Return: - * Pointer to opaque structure that describes the created VM. - * - * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K). - * When phy_pages is non-zero, a memory region of phy_pages physical pages - * is created and mapped starting at guest physical address 0. The file - * descriptor to control the created VM is created with the permissions - * given by perm (e.g. O_RDWR). - */ -struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) +struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages) { struct kvm_vm *vm; - pr_debug("%s: mode='%s' pages='%ld' perm='%d'\n", __func__, - vm_guest_mode_string(mode), phy_pages, perm); + pr_debug("%s: mode='%s' pages='%ld'\n", __func__, + vm_guest_mode_string(mode), nr_pages); vm = calloc(1, sizeof(*vm)); TEST_ASSERT(vm != NULL, "Insufficient Memory"); @@ -340,7 +230,7 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits); #endif - vm_open(vm, perm); + vm_open(vm); /* Limit to VA-bit canonical virtual addresses. */ vm->vpages_valid = sparsebit_alloc(); @@ -355,18 +245,56 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) /* Allocate and setup memory for guest. */ vm->vpages_mapped = sparsebit_alloc(); - if (phy_pages != 0) + if (nr_pages != 0) vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - 0, 0, phy_pages, 0); + 0, 0, nr_pages, 0); return vm; } -struct kvm_vm *vm_create_without_vcpus(enum vm_guest_mode mode, uint64_t pages) +static uint64_t vm_nr_pages_required(enum vm_guest_mode mode, + uint32_t nr_runnable_vcpus, + uint64_t extra_mem_pages) +{ + uint64_t nr_pages; + + TEST_ASSERT(nr_runnable_vcpus, + "Use vm_create_barebones() for VMs that _never_ have vCPUs\n"); + + TEST_ASSERT(nr_runnable_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS), + "nr_vcpus = %d too large for host, max-vcpus = %d", + nr_runnable_vcpus, kvm_check_cap(KVM_CAP_MAX_VCPUS)); + + /* + * Arbitrarily allocate 512 pages (2mb when page size is 4kb) for the + * test code and other per-VM assets that will be loaded into memslot0. + */ + nr_pages = 512; + + /* Account for the per-vCPU stacks on behalf of the test. */ + nr_pages += nr_runnable_vcpus * DEFAULT_STACK_PGS; + + /* + * Account for the number of pages needed for the page tables. The + * maximum page table size for a memory region will be when the + * smallest page size is used. Considering each page contains x page + * table descriptors, the total extra size for page tables (for extra + * N pages) will be: N/x+N/x^2+N/x^3+... which is definitely smaller + * than N/x*2. + */ + nr_pages += (nr_pages + extra_mem_pages) / PTES_PER_MIN_PAGE * 2; + + return vm_adjust_num_guest_pages(mode, nr_pages); +} + +struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus, + uint64_t nr_extra_pages) { + uint64_t nr_pages = vm_nr_pages_required(mode, nr_runnable_vcpus, + nr_extra_pages); struct kvm_vm *vm; - vm = vm_create(mode, pages, O_RDWR); + vm = ____vm_create(mode, nr_pages); kvm_vm_elf_load(vm, program_invocation_name); @@ -382,9 +310,7 @@ struct kvm_vm *vm_create_without_vcpus(enum vm_guest_mode mode, uint64_t pages) * Input Args: * mode - VM Mode (e.g. VM_MODE_P52V48_4K) * nr_vcpus - VCPU count - * slot0_mem_pages - Slot0 physical memory size * extra_mem_pages - Non-slot0 physical memory total size - * num_percpu_pages - Per-cpu physical memory pages * guest_code - Guest entry point * vcpuids - VCPU IDs * @@ -393,64 +319,39 @@ struct kvm_vm *vm_create_without_vcpus(enum vm_guest_mode mode, uint64_t pages) * Return: * Pointer to opaque structure that describes the created VM. * - * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K), - * with customized slot0 memory size, at least 512 pages currently. + * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K). * extra_mem_pages is only used to calculate the maximum page table size, * no real memory allocation for non-slot0 memory in this function. */ -struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, - uint64_t slot0_mem_pages, uint64_t extra_mem_pages, - uint32_t num_percpu_pages, void *guest_code, - uint32_t vcpuids[]) +struct kvm_vm *__vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus, + uint64_t extra_mem_pages, + void *guest_code, struct kvm_vcpu *vcpus[]) { - uint64_t vcpu_pages, extra_pg_pages, pages; struct kvm_vm *vm; int i; - /* Force slot0 memory size not small than DEFAULT_GUEST_PHY_PAGES */ - if (slot0_mem_pages < DEFAULT_GUEST_PHY_PAGES) - slot0_mem_pages = DEFAULT_GUEST_PHY_PAGES; - - /* The maximum page table size for a memory region will be when the - * smallest pages are used. Considering each page contains x page - * table descriptors, the total extra size for page tables (for extra - * N pages) will be: N/x+N/x^2+N/x^3+... which is definitely smaller - * than N/x*2. - */ - vcpu_pages = (DEFAULT_STACK_PGS + num_percpu_pages) * nr_vcpus; - extra_pg_pages = (slot0_mem_pages + extra_mem_pages + vcpu_pages) / PTES_PER_MIN_PAGE * 2; - pages = slot0_mem_pages + vcpu_pages + extra_pg_pages; - - TEST_ASSERT(nr_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS), - "nr_vcpus = %d too large for host, max-vcpus = %d", - nr_vcpus, kvm_check_cap(KVM_CAP_MAX_VCPUS)); - - pages = vm_adjust_num_guest_pages(mode, pages); - - vm = vm_create_without_vcpus(mode, pages); + TEST_ASSERT(!nr_vcpus || vcpus, "Must provide vCPU array"); - for (i = 0; i < nr_vcpus; ++i) { - uint32_t vcpuid = vcpuids ? vcpuids[i] : i; + vm = __vm_create(mode, nr_vcpus, extra_mem_pages); - vm_vcpu_add_default(vm, vcpuid, guest_code); - } + for (i = 0; i < nr_vcpus; ++i) + vcpus[i] = vm_vcpu_add(vm, i, guest_code); return vm; } -struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_mem_pages, - uint32_t num_percpu_pages, void *guest_code, - uint32_t vcpuids[]) +struct kvm_vm *__vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, + uint64_t extra_mem_pages, + void *guest_code) { - return vm_create_with_vcpus(VM_MODE_DEFAULT, nr_vcpus, DEFAULT_GUEST_PHY_PAGES, - extra_mem_pages, num_percpu_pages, guest_code, vcpuids); -} + struct kvm_vcpu *vcpus[1]; + struct kvm_vm *vm; -struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, - void *guest_code) -{ - return vm_create_default_with_vcpus(1, extra_mem_pages, 0, guest_code, - (uint32_t []){ vcpuid }); + vm = __vm_create_with_vcpus(VM_MODE_DEFAULT, 1, extra_mem_pages, + guest_code, vcpus); + + *vcpu = vcpus[0]; + return vm; } /* @@ -458,7 +359,6 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, * * Input Args: * vm - VM that has been released before - * perm - permission * * Output Args: None * @@ -466,12 +366,12 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages, * global state, such as the irqchip and the memory regions that are mapped * into the guest. */ -void kvm_vm_restart(struct kvm_vm *vmp, int perm) +void kvm_vm_restart(struct kvm_vm *vmp) { int ctr; struct userspace_mem_region *region; - vm_open(vmp, perm); + vm_open(vmp); if (vmp->has_irqchip) vm_create_irqchip(vmp); @@ -488,34 +388,17 @@ void kvm_vm_restart(struct kvm_vm *vmp, int perm) } } -void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log) +__weak struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, + uint32_t vcpu_id) { - struct kvm_dirty_log args = { .dirty_bitmap = log, .slot = slot }; - int ret; - - ret = ioctl(vm->fd, KVM_GET_DIRTY_LOG, &args); - TEST_ASSERT(ret == 0, "%s: KVM_GET_DIRTY_LOG failed: %s", - __func__, strerror(-ret)); + return __vm_vcpu_add(vm, vcpu_id); } -void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log, - uint64_t first_page, uint32_t num_pages) +struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm) { - struct kvm_clear_dirty_log args = { - .dirty_bitmap = log, .slot = slot, - .first_page = first_page, - .num_pages = num_pages - }; - int ret; - - ret = ioctl(vm->fd, KVM_CLEAR_DIRTY_LOG, &args); - TEST_ASSERT(ret == 0, "%s: KVM_CLEAR_DIRTY_LOG failed: %s", - __func__, strerror(-ret)); -} + kvm_vm_restart(vm); -uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm) -{ - return ioctl(vm->fd, KVM_RESET_DIRTY_RINGS); + return vm_vcpu_recreate(vm, 0); } /* @@ -589,32 +472,9 @@ kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, return ®ion->region; } -/* - * VCPU Find - * - * Input Args: - * vm - Virtual Machine - * vcpuid - VCPU ID - * - * Output Args: None - * - * Return: - * Pointer to VCPU structure - * - * Locates a vcpu structure that describes the VCPU specified by vcpuid and - * returns a pointer to it. Returns NULL if the VM doesn't contain a VCPU - * for the specified vcpuid. - */ -struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid) +__weak void vcpu_arch_free(struct kvm_vcpu *vcpu) { - struct vcpu *vcpu; - - list_for_each_entry(vcpu, &vm->vcpus, list) { - if (vcpu->id == vcpuid) - return vcpu; - } - return NULL; } /* @@ -629,43 +489,41 @@ struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid) * * Removes a vCPU from a VM and frees its resources. */ -static void vm_vcpu_rm(struct kvm_vm *vm, struct vcpu *vcpu) +static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu) { int ret; if (vcpu->dirty_gfns) { ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size); - TEST_ASSERT(ret == 0, "munmap of VCPU dirty ring failed, " - "rc: %i errno: %i", ret, errno); + TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); vcpu->dirty_gfns = NULL; } - ret = munmap(vcpu->state, vcpu_mmap_sz()); - TEST_ASSERT(ret == 0, "munmap of VCPU fd failed, rc: %i " - "errno: %i", ret, errno); + ret = munmap(vcpu->run, vcpu_mmap_sz()); + TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); + ret = close(vcpu->fd); - TEST_ASSERT(ret == 0, "Close of VCPU fd failed, rc: %i " - "errno: %i", ret, errno); + TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); list_del(&vcpu->list); + + vcpu_arch_free(vcpu); free(vcpu); } void kvm_vm_release(struct kvm_vm *vmp) { - struct vcpu *vcpu, *tmp; + struct kvm_vcpu *vcpu, *tmp; int ret; list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list) vm_vcpu_rm(vmp, vcpu); ret = close(vmp->fd); - TEST_ASSERT(ret == 0, "Close of vm fd failed,\n" - " vmp->fd: %i rc: %i errno: %i", vmp->fd, ret, errno); + TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); ret = close(vmp->kvm_fd); - TEST_ASSERT(ret == 0, "Close of /dev/kvm fd failed,\n" - " vmp->kvm_fd: %i rc: %i errno: %i", vmp->kvm_fd, ret, errno); + TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret)); } static void __vm_mem_region_delete(struct kvm_vm *vm, @@ -681,13 +539,11 @@ static void __vm_mem_region_delete(struct kvm_vm *vm, } region->region.memory_size = 0; - ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region); - TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed, " - "rc: %i errno: %i", ret, errno); + vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); sparsebit_free(®ion->unused_phy_pages); ret = munmap(region->mmap_start, region->mmap_size); - TEST_ASSERT(ret == 0, "munmap failed, rc: %i errno: %i", ret, errno); + TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret)); free(region); } @@ -704,6 +560,12 @@ void kvm_vm_free(struct kvm_vm *vmp) if (vmp == NULL) return; + /* Free cached stats metadata and close FD */ + if (vmp->stats_fd) { + free(vmp->stats_desc); + close(vmp->stats_fd); + } + /* Free userspace_mem_regions. */ hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node) __vm_mem_region_delete(vmp, region, false); @@ -727,14 +589,13 @@ int kvm_memfd_alloc(size_t size, bool hugepages) memfd_flags |= MFD_HUGETLB; fd = memfd_create("kvm_selftest", memfd_flags); - TEST_ASSERT(fd != -1, "memfd_create() failed, errno: %i (%s)", - errno, strerror(errno)); + TEST_ASSERT(fd != -1, __KVM_SYSCALL_ERROR("memfd_create()", fd)); r = ftruncate(fd, size); - TEST_ASSERT(!r, "ftruncate() failed, errno: %i (%s)", errno, strerror(errno)); + TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("ftruncate()", r)); r = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size); - TEST_ASSERT(!r, "fallocate() failed, errno: %i (%s)", errno, strerror(errno)); + TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r)); return fd; } @@ -1000,8 +861,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, vm_mem_backing_src_alias(src_type)->flag, region->fd, 0); TEST_ASSERT(region->mmap_start != MAP_FAILED, - "test_malloc failed, mmap_start: %p errno: %i", - region->mmap_start, errno); + __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); TEST_ASSERT(!is_backing_src_hugetlb(src_type) || region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz), @@ -1029,7 +889,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, region->region.guest_phys_addr = guest_paddr; region->region.memory_size = npages * vm->page_size; region->region.userspace_addr = (uintptr_t) region->host_mem; - ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region); + ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" " rc: %i errno: %i\n" " slot: %u flags: 0x%x\n" @@ -1049,7 +909,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, vm_mem_backing_src_alias(src_type)->flag, region->fd, 0); TEST_ASSERT(region->mmap_alias != MAP_FAILED, - "mmap of alias failed, errno: %i", errno); + __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); /* Align host alias address */ region->host_alias = align_ptr_up(region->mmap_alias, alignment); @@ -1112,7 +972,7 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags) region->region.flags = flags; - ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region); + ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" " rc: %i errno: %i slot: %u flags: 0x%x", @@ -1142,7 +1002,7 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa) region->region.guest_phys_addr = new_gpa; - ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region); + ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION, ®ion->region); TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed\n" "ret: %i errno: %i slot: %u new_gpa: 0x%lx", @@ -1167,19 +1027,7 @@ void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot) __vm_mem_region_delete(vm, memslot2region(vm, slot), true); } -/* - * VCPU mmap Size - * - * Input Args: None - * - * Output Args: None - * - * Return: - * Size of VCPU state - * - * Returns the size of the structure pointed to by the return value - * of vcpu_state(). - */ +/* Returns the size of a vCPU's kvm_run structure. */ static int vcpu_mmap_sz(void) { int dev_fd, ret; @@ -1188,59 +1036,57 @@ static int vcpu_mmap_sz(void) ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); TEST_ASSERT(ret >= sizeof(struct kvm_run), - "%s KVM_GET_VCPU_MMAP_SIZE ioctl failed, rc: %i errno: %i", - __func__, ret, errno); + KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, ret)); close(dev_fd); return ret; } +static bool vcpu_exists(struct kvm_vm *vm, uint32_t vcpu_id) +{ + struct kvm_vcpu *vcpu; + + list_for_each_entry(vcpu, &vm->vcpus, list) { + if (vcpu->id == vcpu_id) + return true; + } + + return false; +} + /* - * VM VCPU Add - * - * Input Args: - * vm - Virtual Machine - * vcpuid - VCPU ID - * - * Output Args: None - * - * Return: None - * - * Adds a virtual CPU to the VM specified by vm with the ID given by vcpuid. - * No additional VCPU setup is done. + * Adds a virtual CPU to the VM specified by vm with the ID given by vcpu_id. + * No additional vCPU setup is done. Returns the vCPU. */ -void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid) +struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) { - struct vcpu *vcpu; + struct kvm_vcpu *vcpu; /* Confirm a vcpu with the specified id doesn't already exist. */ - vcpu = vcpu_find(vm, vcpuid); - if (vcpu != NULL) - TEST_FAIL("vcpu with the specified id " - "already exists,\n" - " requested vcpuid: %u\n" - " existing vcpuid: %u state: %p", - vcpuid, vcpu->id, vcpu->state); + TEST_ASSERT(!vcpu_exists(vm, vcpu_id), "vCPU%d already exists\n", vcpu_id); /* Allocate and initialize new vcpu structure. */ vcpu = calloc(1, sizeof(*vcpu)); TEST_ASSERT(vcpu != NULL, "Insufficient Memory"); - vcpu->id = vcpuid; - vcpu->fd = ioctl(vm->fd, KVM_CREATE_VCPU, vcpuid); - TEST_ASSERT(vcpu->fd >= 0, "KVM_CREATE_VCPU failed, rc: %i errno: %i", - vcpu->fd, errno); - TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->state), "vcpu mmap size " + vcpu->vm = vm; + vcpu->id = vcpu_id; + vcpu->fd = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)(unsigned long)vcpu_id); + TEST_ASSERT(vcpu->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, vcpu->fd)); + + TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->run), "vcpu mmap size " "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi", - vcpu_mmap_sz(), sizeof(*vcpu->state)); - vcpu->state = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(), + vcpu_mmap_sz(), sizeof(*vcpu->run)); + vcpu->run = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(), PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0); - TEST_ASSERT(vcpu->state != MAP_FAILED, "mmap vcpu_state failed, " - "vcpu id: %u errno: %i", vcpuid, errno); + TEST_ASSERT(vcpu->run != MAP_FAILED, + __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED)); /* Add to linked-list of VCPUs. */ list_add(&vcpu->list, &vm->vcpus); + + return vcpu; } /* @@ -1336,8 +1182,6 @@ va_found: * vm - Virtual Machine * sz - Size in bytes * vaddr_min - Minimum starting virtual address - * data_memslot - Memory region slot for data pages - * pgd_memslot - Memory region slot for new virtual translation tables * * Output Args: None * @@ -1423,7 +1267,6 @@ vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm) * vaddr - Virtuall address to map * paddr - VM Physical Address * npages - The number of pages to map - * pgd_memslot - Memory region slot for new virtual translation tables * * Output Args: None * @@ -1534,11 +1377,10 @@ vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva) * (without failing the test) if the guest memory is not shared (so * no alias exists). * - * When vm_create() and related functions are called with a shared memory - * src_type, we also create a writable, shared alias mapping of the - * underlying guest memory. This allows the host to manipulate guest memory - * without mapping that memory in the guest's address space. And, for - * userfaultfd-based demand paging, we can do so without triggering userfaults. + * Create a writable, shared virtual=>physical alias for the specific GPA. + * The primary use case is to allow the host selftest to manipulate guest + * memory without mapping said memory in the guest's address space. And, for + * userfaultfd-based demand paging, to do so without triggering userfaults. */ void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa) { @@ -1556,452 +1398,90 @@ void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa) return (void *) ((uintptr_t) region->host_alias + offset); } -/* - * VM Create IRQ Chip - * - * Input Args: - * vm - Virtual Machine - * - * Output Args: None - * - * Return: None - * - * Creates an interrupt controller chip for the VM specified by vm. - */ +/* Create an interrupt controller chip for the specified VM. */ void vm_create_irqchip(struct kvm_vm *vm) { - int ret; - - ret = ioctl(vm->fd, KVM_CREATE_IRQCHIP, 0); - TEST_ASSERT(ret == 0, "KVM_CREATE_IRQCHIP IOCTL failed, " - "rc: %i errno: %i", ret, errno); + vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL); vm->has_irqchip = true; } -/* - * VM VCPU State - * - * Input Args: - * vm - Virtual Machine - * vcpuid - VCPU ID - * - * Output Args: None - * - * Return: - * Pointer to structure that describes the state of the VCPU. - * - * Locates and returns a pointer to a structure that describes the - * state of the VCPU with the given vcpuid. - */ -struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid) -{ - struct vcpu *vcpu = vcpu_find(vm, vcpuid); - TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); - - return vcpu->state; -} - -/* - * VM VCPU Run - * - * Input Args: - * vm - Virtual Machine - * vcpuid - VCPU ID - * - * Output Args: None - * - * Return: None - * - * Switch to executing the code for the VCPU given by vcpuid, within the VM - * given by vm. - */ -void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid) -{ - int ret = _vcpu_run(vm, vcpuid); - TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, " - "rc: %i errno: %i", ret, errno); -} - -int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid) +int _vcpu_run(struct kvm_vcpu *vcpu) { - struct vcpu *vcpu = vcpu_find(vm, vcpuid); int rc; - TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); do { - rc = ioctl(vcpu->fd, KVM_RUN, NULL); + rc = __vcpu_run(vcpu); } while (rc == -1 && errno == EINTR); - assert_on_unhandled_exception(vm, vcpuid); + assert_on_unhandled_exception(vcpu); return rc; } -int vcpu_get_fd(struct kvm_vm *vm, uint32_t vcpuid) +/* + * Invoke KVM_RUN on a vCPU until KVM returns something other than -EINTR. + * Assert if the KVM returns an error (other than -EINTR). + */ +void vcpu_run(struct kvm_vcpu *vcpu) { - struct vcpu *vcpu = vcpu_find(vm, vcpuid); + int ret = _vcpu_run(vcpu); - TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); - - return vcpu->fd; + TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_RUN, ret)); } -void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid) +void vcpu_run_complete_io(struct kvm_vcpu *vcpu) { - struct vcpu *vcpu = vcpu_find(vm, vcpuid); int ret; - TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); - - vcpu->state->immediate_exit = 1; - ret = ioctl(vcpu->fd, KVM_RUN, NULL); - vcpu->state->immediate_exit = 0; + vcpu->run->immediate_exit = 1; + ret = __vcpu_run(vcpu); + vcpu->run->immediate_exit = 0; TEST_ASSERT(ret == -1 && errno == EINTR, "KVM_RUN IOCTL didn't exit immediately, rc: %i, errno: %i", ret, errno); } -void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_guest_debug *debug) -{ - struct vcpu *vcpu = vcpu_find(vm, vcpuid); - int ret = ioctl(vcpu->fd, KVM_SET_GUEST_DEBUG, debug); - - TEST_ASSERT(ret == 0, "KVM_SET_GUEST_DEBUG failed: %d", ret); -} - -/* - * VM VCPU Set MP State - * - * Input Args: - * vm - Virtual Machine - * vcpuid - VCPU ID - * mp_state - mp_state to be set - * - * Output Args: None - * - * Return: None - * - * Sets the MP state of the VCPU given by vcpuid, to the state given - * by mp_state. - */ -void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_mp_state *mp_state) -{ - struct vcpu *vcpu = vcpu_find(vm, vcpuid); - int ret; - - TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); - - ret = ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state); - TEST_ASSERT(ret == 0, "KVM_SET_MP_STATE IOCTL failed, " - "rc: %i errno: %i", ret, errno); -} - /* - * VM VCPU Get Reg List - * - * Input Args: - * vm - Virtual Machine - * vcpuid - VCPU ID - * - * Output Args: - * None - * - * Return: - * A pointer to an allocated struct kvm_reg_list - * * Get the list of guest registers which are supported for - * KVM_GET_ONE_REG/KVM_SET_ONE_REG calls + * KVM_GET_ONE_REG/KVM_SET_ONE_REG ioctls. Returns a kvm_reg_list pointer, + * it is the caller's responsibility to free the list. */ -struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid) +struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu) { struct kvm_reg_list reg_list_n = { .n = 0 }, *reg_list; int ret; - ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, ®_list_n); + ret = __vcpu_ioctl(vcpu, KVM_GET_REG_LIST, ®_list_n); TEST_ASSERT(ret == -1 && errno == E2BIG, "KVM_GET_REG_LIST n=0"); + reg_list = calloc(1, sizeof(*reg_list) + reg_list_n.n * sizeof(__u64)); reg_list->n = reg_list_n.n; - vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, reg_list); + vcpu_ioctl(vcpu, KVM_GET_REG_LIST, reg_list); return reg_list; } -/* - * VM VCPU Regs Get - * - * Input Args: - * vm - Virtual Machine - * vcpuid - VCPU ID - * - * Output Args: - * regs - current state of VCPU regs - * - * Return: None - * - * Obtains the current register state for the VCPU specified by vcpuid - * and stores it at the location given by regs. - */ -void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs) -{ - struct vcpu *vcpu = vcpu_find(vm, vcpuid); - int ret; - - TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); - - ret = ioctl(vcpu->fd, KVM_GET_REGS, regs); - TEST_ASSERT(ret == 0, "KVM_GET_REGS failed, rc: %i errno: %i", - ret, errno); -} - -/* - * VM VCPU Regs Set - * - * Input Args: - * vm - Virtual Machine - * vcpuid - VCPU ID - * regs - Values to set VCPU regs to - * - * Output Args: None - * - * Return: None - * - * Sets the regs of the VCPU specified by vcpuid to the values - * given by regs. - */ -void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs) +void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu) { - struct vcpu *vcpu = vcpu_find(vm, vcpuid); - int ret; - - TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); - - ret = ioctl(vcpu->fd, KVM_SET_REGS, regs); - TEST_ASSERT(ret == 0, "KVM_SET_REGS failed, rc: %i errno: %i", - ret, errno); -} - -#ifdef __KVM_HAVE_VCPU_EVENTS -void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_vcpu_events *events) -{ - struct vcpu *vcpu = vcpu_find(vm, vcpuid); - int ret; - - TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); - - ret = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, events); - TEST_ASSERT(ret == 0, "KVM_GET_VCPU_EVENTS, failed, rc: %i errno: %i", - ret, errno); -} - -void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_vcpu_events *events) -{ - struct vcpu *vcpu = vcpu_find(vm, vcpuid); - int ret; - - TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); - - ret = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, events); - TEST_ASSERT(ret == 0, "KVM_SET_VCPU_EVENTS, failed, rc: %i errno: %i", - ret, errno); -} -#endif - -#ifdef __x86_64__ -void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_nested_state *state) -{ - struct vcpu *vcpu = vcpu_find(vm, vcpuid); - int ret; - - TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); - - ret = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, state); - TEST_ASSERT(ret == 0, - "KVM_SET_NESTED_STATE failed, ret: %i errno: %i", - ret, errno); -} - -int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_nested_state *state, bool ignore_error) -{ - struct vcpu *vcpu = vcpu_find(vm, vcpuid); - int ret; - - TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); - - ret = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, state); - if (!ignore_error) { - TEST_ASSERT(ret == 0, - "KVM_SET_NESTED_STATE failed, ret: %i errno: %i", - ret, errno); - } - - return ret; -} -#endif - -/* - * VM VCPU System Regs Get - * - * Input Args: - * vm - Virtual Machine - * vcpuid - VCPU ID - * - * Output Args: - * sregs - current state of VCPU system regs - * - * Return: None - * - * Obtains the current system register state for the VCPU specified by - * vcpuid and stores it at the location given by sregs. - */ -void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs) -{ - struct vcpu *vcpu = vcpu_find(vm, vcpuid); - int ret; - - TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); - - ret = ioctl(vcpu->fd, KVM_GET_SREGS, sregs); - TEST_ASSERT(ret == 0, "KVM_GET_SREGS failed, rc: %i errno: %i", - ret, errno); -} - -/* - * VM VCPU System Regs Set - * - * Input Args: - * vm - Virtual Machine - * vcpuid - VCPU ID - * sregs - Values to set VCPU system regs to - * - * Output Args: None - * - * Return: None - * - * Sets the system regs of the VCPU specified by vcpuid to the values - * given by sregs. - */ -void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs) -{ - int ret = _vcpu_sregs_set(vm, vcpuid, sregs); - TEST_ASSERT(ret == 0, "KVM_SET_SREGS IOCTL failed, " - "rc: %i errno: %i", ret, errno); -} - -int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs) -{ - struct vcpu *vcpu = vcpu_find(vm, vcpuid); - - TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); - - return ioctl(vcpu->fd, KVM_SET_SREGS, sregs); -} - -void vcpu_fpu_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_fpu *fpu) -{ - int ret; - - ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_FPU, fpu); - TEST_ASSERT(ret == 0, "KVM_GET_FPU failed, rc: %i errno: %i (%s)", - ret, errno, strerror(errno)); -} - -void vcpu_fpu_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_fpu *fpu) -{ - int ret; - - ret = _vcpu_ioctl(vm, vcpuid, KVM_SET_FPU, fpu); - TEST_ASSERT(ret == 0, "KVM_SET_FPU failed, rc: %i errno: %i (%s)", - ret, errno, strerror(errno)); -} - -void vcpu_get_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg) -{ - int ret; - - ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_ONE_REG, reg); - TEST_ASSERT(ret == 0, "KVM_GET_ONE_REG failed, rc: %i errno: %i (%s)", - ret, errno, strerror(errno)); -} - -void vcpu_set_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg) -{ - int ret; - - ret = _vcpu_ioctl(vm, vcpuid, KVM_SET_ONE_REG, reg); - TEST_ASSERT(ret == 0, "KVM_SET_ONE_REG failed, rc: %i errno: %i (%s)", - ret, errno, strerror(errno)); -} - -/* - * VCPU Ioctl - * - * Input Args: - * vm - Virtual Machine - * vcpuid - VCPU ID - * cmd - Ioctl number - * arg - Argument to pass to the ioctl - * - * Return: None - * - * Issues an arbitrary ioctl on a VCPU fd. - */ -void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, - unsigned long cmd, void *arg) -{ - int ret; - - ret = _vcpu_ioctl(vm, vcpuid, cmd, arg); - TEST_ASSERT(ret == 0, "vcpu ioctl %lu failed, rc: %i errno: %i (%s)", - cmd, ret, errno, strerror(errno)); -} - -int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, - unsigned long cmd, void *arg) -{ - struct vcpu *vcpu = vcpu_find(vm, vcpuid); - int ret; - - TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); - - ret = ioctl(vcpu->fd, cmd, arg); - - return ret; -} - -void *vcpu_map_dirty_ring(struct kvm_vm *vm, uint32_t vcpuid) -{ - struct vcpu *vcpu; - uint32_t size = vm->dirty_ring_size; + uint32_t page_size = vcpu->vm->page_size; + uint32_t size = vcpu->vm->dirty_ring_size; TEST_ASSERT(size > 0, "Should enable dirty ring first"); - vcpu = vcpu_find(vm, vcpuid); - - TEST_ASSERT(vcpu, "Cannot find vcpu %u", vcpuid); - if (!vcpu->dirty_gfns) { void *addr; - addr = mmap(NULL, size, PROT_READ, - MAP_PRIVATE, vcpu->fd, - vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET); + addr = mmap(NULL, size, PROT_READ, MAP_PRIVATE, vcpu->fd, + page_size * KVM_DIRTY_LOG_PAGE_OFFSET); TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped private"); - addr = mmap(NULL, size, PROT_READ | PROT_EXEC, - MAP_PRIVATE, vcpu->fd, - vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET); + addr = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_PRIVATE, vcpu->fd, + page_size * KVM_DIRTY_LOG_PAGE_OFFSET); TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec"); - addr = mmap(NULL, size, PROT_READ | PROT_WRITE, - MAP_SHARED, vcpu->fd, - vm->page_size * KVM_DIRTY_LOG_PAGE_OFFSET); + addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, + page_size * KVM_DIRTY_LOG_PAGE_OFFSET); TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed"); vcpu->dirty_gfns = addr; @@ -2012,62 +1492,10 @@ void *vcpu_map_dirty_ring(struct kvm_vm *vm, uint32_t vcpuid) } /* - * VM Ioctl - * - * Input Args: - * vm - Virtual Machine - * cmd - Ioctl number - * arg - Argument to pass to the ioctl - * - * Return: None - * - * Issues an arbitrary ioctl on a VM fd. - */ -void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg) -{ - int ret; - - ret = _vm_ioctl(vm, cmd, arg); - TEST_ASSERT(ret == 0, "vm ioctl %lu failed, rc: %i errno: %i (%s)", - cmd, ret, errno, strerror(errno)); -} - -int _vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg) -{ - return ioctl(vm->fd, cmd, arg); -} - -/* - * KVM system ioctl - * - * Input Args: - * vm - Virtual Machine - * cmd - Ioctl number - * arg - Argument to pass to the ioctl - * - * Return: None - * - * Issues an arbitrary ioctl on a KVM fd. - */ -void kvm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg) -{ - int ret; - - ret = ioctl(vm->kvm_fd, cmd, arg); - TEST_ASSERT(ret == 0, "KVM ioctl %lu failed, rc: %i errno: %i (%s)", - cmd, ret, errno, strerror(errno)); -} - -int _kvm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg) -{ - return ioctl(vm->kvm_fd, cmd, arg); -} - -/* * Device Ioctl */ -int _kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr) +int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr) { struct kvm_device_attr attribute = { .group = group, @@ -2078,43 +1506,31 @@ int _kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr) return ioctl(dev_fd, KVM_HAS_DEVICE_ATTR, &attribute); } -int kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr) -{ - int ret = _kvm_device_check_attr(dev_fd, group, attr); - - TEST_ASSERT(!ret, "KVM_HAS_DEVICE_ATTR failed, rc: %i errno: %i", ret, errno); - return ret; -} - -int _kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test, int *fd) +int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type) { - struct kvm_create_device create_dev; - int ret; + struct kvm_create_device create_dev = { + .type = type, + .flags = KVM_CREATE_DEVICE_TEST, + }; - create_dev.type = type; - create_dev.fd = -1; - create_dev.flags = test ? KVM_CREATE_DEVICE_TEST : 0; - ret = ioctl(vm_get_fd(vm), KVM_CREATE_DEVICE, &create_dev); - *fd = create_dev.fd; - return ret; + return __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev); } -int kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test) +int __kvm_create_device(struct kvm_vm *vm, uint64_t type) { - int fd, ret; - - ret = _kvm_create_device(vm, type, test, &fd); + struct kvm_create_device create_dev = { + .type = type, + .fd = -1, + .flags = 0, + }; + int err; - if (!test) { - TEST_ASSERT(!ret, - "KVM_CREATE_DEVICE IOCTL failed, rc: %i errno: %i", ret, errno); - return fd; - } - return ret; + err = __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev); + TEST_ASSERT(err <= 0, "KVM_CREATE_DEVICE shouldn't return a positive value"); + return err ? : create_dev.fd; } -int _kvm_device_access(int dev_fd, uint32_t group, uint64_t attr, - void *val, bool write) +int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val) { struct kvm_device_attr kvmattr = { .group = group, @@ -2122,58 +1538,20 @@ int _kvm_device_access(int dev_fd, uint32_t group, uint64_t attr, .flags = 0, .addr = (uintptr_t)val, }; - int ret; - - ret = ioctl(dev_fd, write ? KVM_SET_DEVICE_ATTR : KVM_GET_DEVICE_ATTR, - &kvmattr); - return ret; -} - -int kvm_device_access(int dev_fd, uint32_t group, uint64_t attr, - void *val, bool write) -{ - int ret = _kvm_device_access(dev_fd, group, attr, val, write); - - TEST_ASSERT(!ret, "KVM_SET|GET_DEVICE_ATTR IOCTL failed, rc: %i errno: %i", ret, errno); - return ret; -} - -int _vcpu_has_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group, - uint64_t attr) -{ - struct vcpu *vcpu = vcpu_find(vm, vcpuid); - - TEST_ASSERT(vcpu, "nonexistent vcpu id: %d", vcpuid); - - return _kvm_device_check_attr(vcpu->fd, group, attr); -} - -int vcpu_has_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group, - uint64_t attr) -{ - int ret = _vcpu_has_device_attr(vm, vcpuid, group, attr); - - TEST_ASSERT(!ret, "KVM_HAS_DEVICE_ATTR IOCTL failed, rc: %i errno: %i", ret, errno); - return ret; -} -int _vcpu_access_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group, - uint64_t attr, void *val, bool write) -{ - struct vcpu *vcpu = vcpu_find(vm, vcpuid); - - TEST_ASSERT(vcpu, "nonexistent vcpu id: %d", vcpuid); - - return _kvm_device_access(vcpu->fd, group, attr, val, write); + return __kvm_ioctl(dev_fd, KVM_GET_DEVICE_ATTR, &kvmattr); } -int vcpu_access_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group, - uint64_t attr, void *val, bool write) +int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val) { - int ret = _vcpu_access_device_attr(vm, vcpuid, group, attr, val, write); + struct kvm_device_attr kvmattr = { + .group = group, + .attr = attr, + .flags = 0, + .addr = (uintptr_t)val, + }; - TEST_ASSERT(!ret, "KVM_SET|GET_DEVICE_ATTR IOCTL failed, rc: %i errno: %i", ret, errno); - return ret; + return __kvm_ioctl(dev_fd, KVM_SET_DEVICE_ATTR, &kvmattr); } /* @@ -2187,14 +1565,14 @@ int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level) .level = level, }; - return _vm_ioctl(vm, KVM_IRQ_LINE, &irq_level); + return __vm_ioctl(vm, KVM_IRQ_LINE, &irq_level); } void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level) { int ret = _kvm_irq_line(vm, irq, level); - TEST_ASSERT(ret >= 0, "KVM_IRQ_LINE failed, rc: %i errno: %i", ret, errno); + TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_IRQ_LINE, ret)); } struct kvm_irq_routing *kvm_gsi_routing_create(void) @@ -2233,7 +1611,7 @@ int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing) int ret; assert(routing); - ret = ioctl(vm_get_fd(vm), KVM_SET_GSI_ROUTING, routing); + ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing); free(routing); return ret; @@ -2244,8 +1622,7 @@ void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing) int ret; ret = _kvm_gsi_routing_write(vm, routing); - TEST_ASSERT(ret == 0, "KVM_SET_GSI_ROUTING failed, rc: %i errno: %i", - ret, errno); + TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_GSI_ROUTING, ret)); } /* @@ -2267,7 +1644,7 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) { int ctr; struct userspace_mem_region *region; - struct vcpu *vcpu; + struct kvm_vcpu *vcpu; fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode); fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd); @@ -2292,8 +1669,9 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) virt_dump(stream, vm, indent + 4); } fprintf(stream, "%*sVCPUs:\n", indent, ""); + list_for_each_entry(vcpu, &vm->vcpus, list) - vcpu_dump(stream, vm, vcpu->id, indent + 2); + vcpu_dump(stream, vcpu, indent + 2); } /* Known KVM exit reasons */ @@ -2447,64 +1825,11 @@ void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva) return addr_gpa2hva(vm, addr_gva2gpa(vm, gva)); } -/* - * Is Unrestricted Guest - * - * Input Args: - * vm - Virtual Machine - * - * Output Args: None - * - * Return: True if the unrestricted guest is set to 'Y', otherwise return false. - * - * Check if the unrestricted guest flag is enabled. - */ -bool vm_is_unrestricted_guest(struct kvm_vm *vm) -{ - char val = 'N'; - size_t count; - FILE *f; - - if (vm == NULL) { - /* Ensure that the KVM vendor-specific module is loaded. */ - close(open_kvm_dev_path_or_exit()); - } - - f = fopen("/sys/module/kvm_intel/parameters/unrestricted_guest", "r"); - if (f) { - count = fread(&val, sizeof(char), 1, f); - TEST_ASSERT(count == 1, "Unable to read from param file."); - fclose(f); - } - - return val == 'Y'; -} - -unsigned int vm_get_page_size(struct kvm_vm *vm) -{ - return vm->page_size; -} - -unsigned int vm_get_page_shift(struct kvm_vm *vm) -{ - return vm->page_shift; -} - -unsigned long __attribute__((weak)) vm_compute_max_gfn(struct kvm_vm *vm) +unsigned long __weak vm_compute_max_gfn(struct kvm_vm *vm) { return ((1ULL << vm->pa_bits) >> vm->page_shift) - 1; } -uint64_t vm_get_max_gfn(struct kvm_vm *vm) -{ - return vm->max_gfn; -} - -int vm_get_fd(struct kvm_vm *vm) -{ - return vm->fd; -} - static unsigned int vm_calc_num_pages(unsigned int num_pages, unsigned int page_shift, unsigned int new_page_shift, @@ -2545,14 +1870,112 @@ unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size) return vm_adjust_num_guest_pages(mode, n); } -int vm_get_stats_fd(struct kvm_vm *vm) +/* + * Read binary stats descriptors + * + * Input Args: + * stats_fd - the file descriptor for the binary stats file from which to read + * header - the binary stats metadata header corresponding to the given FD + * + * Output Args: None + * + * Return: + * A pointer to a newly allocated series of stat descriptors. + * Caller is responsible for freeing the returned kvm_stats_desc. + * + * Read the stats descriptors from the binary stats interface. + */ +struct kvm_stats_desc *read_stats_descriptors(int stats_fd, + struct kvm_stats_header *header) { - return ioctl(vm->fd, KVM_GET_STATS_FD, NULL); + struct kvm_stats_desc *stats_desc; + ssize_t desc_size, total_size, ret; + + desc_size = get_stats_descriptor_size(header); + total_size = header->num_desc * desc_size; + + stats_desc = calloc(header->num_desc, desc_size); + TEST_ASSERT(stats_desc, "Allocate memory for stats descriptors"); + + ret = pread(stats_fd, stats_desc, total_size, header->desc_offset); + TEST_ASSERT(ret == total_size, "Read KVM stats descriptors"); + + return stats_desc; } -int vcpu_get_stats_fd(struct kvm_vm *vm, uint32_t vcpuid) +/* + * Read stat data for a particular stat + * + * Input Args: + * stats_fd - the file descriptor for the binary stats file from which to read + * header - the binary stats metadata header corresponding to the given FD + * desc - the binary stat metadata for the particular stat to be read + * max_elements - the maximum number of 8-byte values to read into data + * + * Output Args: + * data - the buffer into which stat data should be read + * + * Read the data values of a specified stat from the binary stats interface. + */ +void read_stat_data(int stats_fd, struct kvm_stats_header *header, + struct kvm_stats_desc *desc, uint64_t *data, + size_t max_elements) { - struct vcpu *vcpu = vcpu_find(vm, vcpuid); + size_t nr_elements = min_t(ssize_t, desc->size, max_elements); + size_t size = nr_elements * sizeof(*data); + ssize_t ret; - return ioctl(vcpu->fd, KVM_GET_STATS_FD, NULL); + TEST_ASSERT(desc->size, "No elements in stat '%s'", desc->name); + TEST_ASSERT(max_elements, "Zero elements requested for stat '%s'", desc->name); + + ret = pread(stats_fd, data, size, + header->data_offset + desc->offset); + + TEST_ASSERT(ret >= 0, "pread() failed on stat '%s', errno: %i (%s)", + desc->name, errno, strerror(errno)); + TEST_ASSERT(ret == size, + "pread() on stat '%s' read %ld bytes, wanted %lu bytes", + desc->name, size, ret); +} + +/* + * Read the data of the named stat + * + * Input Args: + * vm - the VM for which the stat should be read + * stat_name - the name of the stat to read + * max_elements - the maximum number of 8-byte values to read into data + * + * Output Args: + * data - the buffer into which stat data should be read + * + * Read the data values of a specified stat from the binary stats interface. + */ +void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data, + size_t max_elements) +{ + struct kvm_stats_desc *desc; + size_t size_desc; + int i; + + if (!vm->stats_fd) { + vm->stats_fd = vm_get_stats_fd(vm); + read_stats_header(vm->stats_fd, &vm->stats_header); + vm->stats_desc = read_stats_descriptors(vm->stats_fd, + &vm->stats_header); + } + + size_desc = get_stats_descriptor_size(&vm->stats_header); + + for (i = 0; i < vm->stats_header.num_desc; ++i) { + desc = (void *)vm->stats_desc + (i * size_desc); + + if (strcmp(desc->name, stat_name)) + continue; + + read_stat_data(vm->stats_fd, &vm->stats_header, desc, + data, max_elements); + + break; + } } diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h deleted file mode 100644 index a03febc24ba6..000000000000 --- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h +++ /dev/null @@ -1,128 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * tools/testing/selftests/kvm/lib/kvm_util_internal.h - * - * Copyright (C) 2018, Google LLC. - */ - -#ifndef SELFTEST_KVM_UTIL_INTERNAL_H -#define SELFTEST_KVM_UTIL_INTERNAL_H - -#include "linux/hashtable.h" -#include "linux/rbtree.h" - -#include "sparsebit.h" - -struct userspace_mem_region { - struct kvm_userspace_memory_region region; - struct sparsebit *unused_phy_pages; - int fd; - off_t offset; - void *host_mem; - void *host_alias; - void *mmap_start; - void *mmap_alias; - size_t mmap_size; - struct rb_node gpa_node; - struct rb_node hva_node; - struct hlist_node slot_node; -}; - -struct vcpu { - struct list_head list; - uint32_t id; - int fd; - struct kvm_run *state; - struct kvm_dirty_gfn *dirty_gfns; - uint32_t fetch_index; - uint32_t dirty_gfns_count; -}; - -struct userspace_mem_regions { - struct rb_root gpa_tree; - struct rb_root hva_tree; - DECLARE_HASHTABLE(slot_hash, 9); -}; - -struct kvm_vm { - int mode; - unsigned long type; - int kvm_fd; - int fd; - unsigned int pgtable_levels; - unsigned int page_size; - unsigned int page_shift; - unsigned int pa_bits; - unsigned int va_bits; - uint64_t max_gfn; - struct list_head vcpus; - struct userspace_mem_regions regions; - struct sparsebit *vpages_valid; - struct sparsebit *vpages_mapped; - bool has_irqchip; - bool pgd_created; - vm_paddr_t pgd; - vm_vaddr_t gdt; - vm_vaddr_t tss; - vm_vaddr_t idt; - vm_vaddr_t handlers; - uint32_t dirty_ring_size; -}; - -struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid); - -/* - * Virtual Translation Tables Dump - * - * Input Args: - * stream - Output FILE stream - * vm - Virtual Machine - * indent - Left margin indent amount - * - * Output Args: None - * - * Return: None - * - * Dumps to the FILE stream given by @stream, the contents of all the - * virtual translation tables for the VM given by @vm. - */ -void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); - -/* - * Register Dump - * - * Input Args: - * stream - Output FILE stream - * regs - Registers - * indent - Left margin indent amount - * - * Output Args: None - * - * Return: None - * - * Dumps the state of the registers given by @regs, to the FILE stream - * given by @stream. - */ -void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent); - -/* - * System Register Dump - * - * Input Args: - * stream - Output FILE stream - * sregs - System registers - * indent - Left margin indent amount - * - * Output Args: None - * - * Return: None - * - * Dumps the state of the system registers given by @sregs, to the FILE stream - * given by @stream. - */ -void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent); - -struct userspace_mem_region * -memslot2region(struct kvm_vm *vm, uint32_t memslot); - -#endif /* SELFTEST_KVM_UTIL_INTERNAL_H */ diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c index f989ff91f022..9618b37c66f7 100644 --- a/tools/testing/selftests/kvm/lib/perf_test_util.c +++ b/tools/testing/selftests/kvm/lib/perf_test_util.c @@ -17,8 +17,8 @@ struct perf_test_args perf_test_args; static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; struct vcpu_thread { - /* The id of the vCPU. */ - int vcpu_id; + /* The index of the vCPU. */ + int vcpu_idx; /* The pthread backing the vCPU. */ pthread_t thread; @@ -36,24 +36,26 @@ static void (*vcpu_thread_fn)(struct perf_test_vcpu_args *); /* Set to true once all vCPU threads are up and running. */ static bool all_vcpu_threads_running; +static struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; + /* * Continuously write to the first 8 bytes of each page in the * specified region. */ -void perf_test_guest_code(uint32_t vcpu_id) +void perf_test_guest_code(uint32_t vcpu_idx) { struct perf_test_args *pta = &perf_test_args; - struct perf_test_vcpu_args *vcpu_args = &pta->vcpu_args[vcpu_id]; + struct perf_test_vcpu_args *vcpu_args = &pta->vcpu_args[vcpu_idx]; uint64_t gva; uint64_t pages; int i; - /* Make sure vCPU args data structure is not corrupt. */ - GUEST_ASSERT(vcpu_args->vcpu_id == vcpu_id); - gva = vcpu_args->gva; pages = vcpu_args->pages; + /* Make sure vCPU args data structure is not corrupt. */ + GUEST_ASSERT(vcpu_args->vcpu_idx == vcpu_idx); + while (true) { for (i = 0; i < pages; i++) { uint64_t addr = gva + (i * pta->guest_page_size); @@ -68,47 +70,50 @@ void perf_test_guest_code(uint32_t vcpu_id) } } -void perf_test_setup_vcpus(struct kvm_vm *vm, int vcpus, +void perf_test_setup_vcpus(struct kvm_vm *vm, int nr_vcpus, + struct kvm_vcpu *vcpus[], uint64_t vcpu_memory_bytes, bool partition_vcpu_memory_access) { struct perf_test_args *pta = &perf_test_args; struct perf_test_vcpu_args *vcpu_args; - int vcpu_id; + int i; + + for (i = 0; i < nr_vcpus; i++) { + vcpu_args = &pta->vcpu_args[i]; - for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { - vcpu_args = &pta->vcpu_args[vcpu_id]; + vcpu_args->vcpu = vcpus[i]; + vcpu_args->vcpu_idx = i; - vcpu_args->vcpu_id = vcpu_id; if (partition_vcpu_memory_access) { vcpu_args->gva = guest_test_virt_mem + - (vcpu_id * vcpu_memory_bytes); + (i * vcpu_memory_bytes); vcpu_args->pages = vcpu_memory_bytes / pta->guest_page_size; - vcpu_args->gpa = pta->gpa + (vcpu_id * vcpu_memory_bytes); + vcpu_args->gpa = pta->gpa + (i * vcpu_memory_bytes); } else { vcpu_args->gva = guest_test_virt_mem; - vcpu_args->pages = (vcpus * vcpu_memory_bytes) / + vcpu_args->pages = (nr_vcpus * vcpu_memory_bytes) / pta->guest_page_size; vcpu_args->gpa = pta->gpa; } - vcpu_args_set(vm, vcpu_id, 1, vcpu_id); + vcpu_args_set(vcpus[i], 1, i); pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n", - vcpu_id, vcpu_args->gpa, vcpu_args->gpa + + i, vcpu_args->gpa, vcpu_args->gpa + (vcpu_args->pages * pta->guest_page_size)); } } -struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus, +struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus, uint64_t vcpu_memory_bytes, int slots, enum vm_mem_backing_src_type backing_src, bool partition_vcpu_memory_access) { struct perf_test_args *pta = &perf_test_args; struct kvm_vm *vm; - uint64_t guest_num_pages, slot0_pages = DEFAULT_GUEST_PHY_PAGES; + uint64_t guest_num_pages, slot0_pages = 0; uint64_t backing_src_pagesz = get_backing_src_pagesz(backing_src); uint64_t region_end_gfn; int i; @@ -125,7 +130,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus, pta->guest_page_size = vm_guest_mode_params[mode].page_size; guest_num_pages = vm_adjust_num_guest_pages(mode, - (vcpus * vcpu_memory_bytes) / pta->guest_page_size); + (nr_vcpus * vcpu_memory_bytes) / pta->guest_page_size); TEST_ASSERT(vcpu_memory_bytes % getpagesize() == 0, "Guest memory size is not host page size aligned."); @@ -140,20 +145,20 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus, * in-memory data structures. */ if (pta->nested) - slot0_pages += perf_test_nested_pages(vcpus); + slot0_pages += perf_test_nested_pages(nr_vcpus); /* * Pass guest_num_pages to populate the page tables for test memory. * The memory is also added to memslot 0, but that's a benign side * effect as KVM allows aliasing HVAs in meslots. */ - vm = vm_create_with_vcpus(mode, vcpus, slot0_pages, guest_num_pages, 0, - perf_test_guest_code, NULL); + vm = __vm_create_with_vcpus(mode, nr_vcpus, slot0_pages + guest_num_pages, + perf_test_guest_code, vcpus); pta->vm = vm; /* Put the test region at the top guest physical memory. */ - region_end_gfn = vm_get_max_gfn(vm) + 1; + region_end_gfn = vm->max_gfn + 1; #ifdef __x86_64__ /* @@ -170,11 +175,10 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus, TEST_ASSERT(guest_num_pages < region_end_gfn, "Requested more guest memory than address space allows.\n" " guest pages: %" PRIx64 " max gfn: %" PRIx64 - " vcpus: %d wss: %" PRIx64 "]\n", - guest_num_pages, region_end_gfn - 1, vcpus, - vcpu_memory_bytes); + " nr_vcpus: %d wss: %" PRIx64 "]\n", + guest_num_pages, region_end_gfn - 1, nr_vcpus, vcpu_memory_bytes); - pta->gpa = (region_end_gfn - guest_num_pages) * pta->guest_page_size; + pta->gpa = (region_end_gfn - guest_num_pages - 1) * pta->guest_page_size; pta->gpa = align_down(pta->gpa, backing_src_pagesz); #ifdef __s390x__ /* Align to 1M (segment size) */ @@ -197,11 +201,12 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus, /* Do mapping for the demand paging memory slot */ virt_map(vm, guest_test_virt_mem, pta->gpa, guest_num_pages); - perf_test_setup_vcpus(vm, vcpus, vcpu_memory_bytes, partition_vcpu_memory_access); + perf_test_setup_vcpus(vm, nr_vcpus, vcpus, vcpu_memory_bytes, + partition_vcpu_memory_access); if (pta->nested) { pr_info("Configuring vCPUs to run in L2 (nested).\n"); - perf_test_setup_nested(vm, vcpus); + perf_test_setup_nested(vm, nr_vcpus, vcpus); } ucall_init(vm, NULL); @@ -229,7 +234,7 @@ uint64_t __weak perf_test_nested_pages(int nr_vcpus) return 0; } -void __weak perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus) +void __weak perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu **vcpus) { pr_info("%s() not support on this architecture, skipping.\n", __func__); exit(KSFT_SKIP); @@ -250,39 +255,40 @@ static void *vcpu_thread_main(void *data) while (!READ_ONCE(all_vcpu_threads_running)) ; - vcpu_thread_fn(&perf_test_args.vcpu_args[vcpu->vcpu_id]); + vcpu_thread_fn(&perf_test_args.vcpu_args[vcpu->vcpu_idx]); return NULL; } -void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *)) +void perf_test_start_vcpu_threads(int nr_vcpus, + void (*vcpu_fn)(struct perf_test_vcpu_args *)) { - int vcpu_id; + int i; vcpu_thread_fn = vcpu_fn; WRITE_ONCE(all_vcpu_threads_running, false); - for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { - struct vcpu_thread *vcpu = &vcpu_threads[vcpu_id]; + for (i = 0; i < nr_vcpus; i++) { + struct vcpu_thread *vcpu = &vcpu_threads[i]; - vcpu->vcpu_id = vcpu_id; + vcpu->vcpu_idx = i; WRITE_ONCE(vcpu->running, false); pthread_create(&vcpu->thread, NULL, vcpu_thread_main, vcpu); } - for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { - while (!READ_ONCE(vcpu_threads[vcpu_id].running)) + for (i = 0; i < nr_vcpus; i++) { + while (!READ_ONCE(vcpu_threads[i].running)) ; } WRITE_ONCE(all_vcpu_threads_running, true); } -void perf_test_join_vcpu_threads(int vcpus) +void perf_test_join_vcpu_threads(int nr_vcpus) { - int vcpu_id; + int i; - for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) - pthread_join(vcpu_threads[vcpu_id].thread, NULL); + for (i = 0; i < nr_vcpus; i++) + pthread_join(vcpu_threads[i].thread, NULL); } diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c index abc0ae5a4fe1..604478151212 100644 --- a/tools/testing/selftests/kvm/lib/riscv/processor.c +++ b/tools/testing/selftests/kvm/lib/riscv/processor.c @@ -9,7 +9,6 @@ #include <assert.h> #include "kvm_util.h" -#include "../kvm_util_internal.h" #include "processor.h" #define DEFAULT_RISCV_GUEST_STACK_VADDR_MIN 0xac0000 @@ -54,7 +53,7 @@ static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level) return (gva & pte_index_mask[level]) >> pte_index_shift[level]; } -void virt_pgd_alloc(struct kvm_vm *vm) +void virt_arch_pgd_alloc(struct kvm_vm *vm) { if (!vm->pgd_created) { vm_paddr_t paddr = vm_phy_pages_alloc(vm, @@ -65,7 +64,7 @@ void virt_pgd_alloc(struct kvm_vm *vm) } } -void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) +void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) { uint64_t *ptep, next_ppn; int level = vm->pgtable_levels - 1; @@ -109,7 +108,7 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) PGTBL_PTE_PERM_MASK | PGTBL_PTE_VALID_MASK; } -vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) { uint64_t *ptep; int level = vm->pgtable_levels - 1; @@ -160,7 +159,7 @@ static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, #endif } -void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) { int level = vm->pgtable_levels - 1; uint64_t pgd, *ptep; @@ -179,8 +178,9 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) } } -void riscv_vcpu_mmu_setup(struct kvm_vm *vm, int vcpuid) +void riscv_vcpu_mmu_setup(struct kvm_vcpu *vcpu) { + struct kvm_vm *vm = vcpu->vm; unsigned long satp; /* @@ -199,46 +199,46 @@ void riscv_vcpu_mmu_setup(struct kvm_vm *vm, int vcpuid) satp = (vm->pgd >> PGTBL_PAGE_SIZE_SHIFT) & SATP_PPN; satp |= SATP_MODE_48; - set_reg(vm, vcpuid, RISCV_CSR_REG(satp), satp); + vcpu_set_reg(vcpu, RISCV_CSR_REG(satp), satp); } -void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent) +void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) { struct kvm_riscv_core core; - get_reg(vm, vcpuid, RISCV_CORE_REG(mode), &core.mode); - get_reg(vm, vcpuid, RISCV_CORE_REG(regs.pc), &core.regs.pc); - get_reg(vm, vcpuid, RISCV_CORE_REG(regs.ra), &core.regs.ra); - get_reg(vm, vcpuid, RISCV_CORE_REG(regs.sp), &core.regs.sp); - get_reg(vm, vcpuid, RISCV_CORE_REG(regs.gp), &core.regs.gp); - get_reg(vm, vcpuid, RISCV_CORE_REG(regs.tp), &core.regs.tp); - get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t0), &core.regs.t0); - get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t1), &core.regs.t1); - get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t2), &core.regs.t2); - get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s0), &core.regs.s0); - get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s1), &core.regs.s1); - get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a0), &core.regs.a0); - get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a1), &core.regs.a1); - get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a2), &core.regs.a2); - get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a3), &core.regs.a3); - get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a4), &core.regs.a4); - get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a5), &core.regs.a5); - get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a6), &core.regs.a6); - get_reg(vm, vcpuid, RISCV_CORE_REG(regs.a7), &core.regs.a7); - get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s2), &core.regs.s2); - get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s3), &core.regs.s3); - get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s4), &core.regs.s4); - get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s5), &core.regs.s5); - get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s6), &core.regs.s6); - get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s7), &core.regs.s7); - get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s8), &core.regs.s8); - get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s9), &core.regs.s9); - get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s10), &core.regs.s10); - get_reg(vm, vcpuid, RISCV_CORE_REG(regs.s11), &core.regs.s11); - get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t3), &core.regs.t3); - get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t4), &core.regs.t4); - get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t5), &core.regs.t5); - get_reg(vm, vcpuid, RISCV_CORE_REG(regs.t6), &core.regs.t6); + vcpu_get_reg(vcpu, RISCV_CORE_REG(mode), &core.mode); + vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc), &core.regs.pc); + vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.ra), &core.regs.ra); + vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.sp), &core.regs.sp); + vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.gp), &core.regs.gp); + vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.tp), &core.regs.tp); + vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t0), &core.regs.t0); + vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t1), &core.regs.t1); + vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t2), &core.regs.t2); + vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s0), &core.regs.s0); + vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s1), &core.regs.s1); + vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a0), &core.regs.a0); + vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a1), &core.regs.a1); + vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a2), &core.regs.a2); + vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a3), &core.regs.a3); + vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a4), &core.regs.a4); + vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a5), &core.regs.a5); + vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a6), &core.regs.a6); + vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a7), &core.regs.a7); + vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s2), &core.regs.s2); + vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s3), &core.regs.s3); + vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s4), &core.regs.s4); + vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s5), &core.regs.s5); + vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s6), &core.regs.s6); + vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s7), &core.regs.s7); + vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s8), &core.regs.s8); + vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s9), &core.regs.s9); + vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s10), &core.regs.s10); + vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s11), &core.regs.s11); + vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t3), &core.regs.t3); + vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t4), &core.regs.t4); + vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t5), &core.regs.t5); + vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t6), &core.regs.t6); fprintf(stream, " MODE: 0x%lx\n", core.mode); @@ -275,7 +275,8 @@ static void __aligned(16) guest_unexp_trap(void) 0, 0, 0, 0, 0, 0); } -void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) +struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, + void *guest_code) { int r; size_t stack_size = vm->page_size == 4096 ? @@ -285,9 +286,10 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) DEFAULT_RISCV_GUEST_STACK_VADDR_MIN); unsigned long current_gp = 0; struct kvm_mp_state mps; + struct kvm_vcpu *vcpu; - vm_vcpu_add(vm, vcpuid); - riscv_vcpu_mmu_setup(vm, vcpuid); + vcpu = __vm_vcpu_add(vm, vcpu_id); + riscv_vcpu_mmu_setup(vcpu); /* * With SBI HSM support in KVM RISC-V, all secondary VCPUs are @@ -295,26 +297,25 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) * are powered-on using KVM_SET_MP_STATE ioctl(). */ mps.mp_state = KVM_MP_STATE_RUNNABLE; - r = _vcpu_ioctl(vm, vcpuid, KVM_SET_MP_STATE, &mps); + r = __vcpu_ioctl(vcpu, KVM_SET_MP_STATE, &mps); TEST_ASSERT(!r, "IOCTL KVM_SET_MP_STATE failed (error %d)", r); /* Setup global pointer of guest to be same as the host */ asm volatile ( "add %0, gp, zero" : "=r" (current_gp) : : "memory"); - set_reg(vm, vcpuid, RISCV_CORE_REG(regs.gp), current_gp); + vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.gp), current_gp); /* Setup stack pointer and program counter of guest */ - set_reg(vm, vcpuid, RISCV_CORE_REG(regs.sp), - stack_vaddr + stack_size); - set_reg(vm, vcpuid, RISCV_CORE_REG(regs.pc), - (unsigned long)guest_code); + vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.sp), stack_vaddr + stack_size); + vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.pc), (unsigned long)guest_code); /* Setup default exception vector of guest */ - set_reg(vm, vcpuid, RISCV_CSR_REG(stvec), - (unsigned long)guest_unexp_trap); + vcpu_set_reg(vcpu, RISCV_CSR_REG(stvec), (unsigned long)guest_unexp_trap); + + return vcpu; } -void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) +void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) { va_list ap; uint64_t id = RISCV_CORE_REG(regs.a0); @@ -352,12 +353,12 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) id = RISCV_CORE_REG(regs.a7); break; } - set_reg(vm, vcpuid, id, va_arg(ap, uint64_t)); + vcpu_set_reg(vcpu, id, va_arg(ap, uint64_t)); } va_end(ap); } -void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid) +void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) { } diff --git a/tools/testing/selftests/kvm/lib/riscv/ucall.c b/tools/testing/selftests/kvm/lib/riscv/ucall.c index 8550f424d093..087b9740bc8f 100644 --- a/tools/testing/selftests/kvm/lib/riscv/ucall.c +++ b/tools/testing/selftests/kvm/lib/riscv/ucall.c @@ -8,7 +8,6 @@ #include <linux/kvm.h> #include "kvm_util.h" -#include "../kvm_util_internal.h" #include "processor.h" void ucall_init(struct kvm_vm *vm, void *arg) @@ -53,7 +52,7 @@ void ucall(uint64_t cmd, int nargs, ...) va_list va; int i; - nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS; + nargs = min(nargs, UCALL_MAX_ARGS); va_start(va, nargs); for (i = 0; i < nargs; ++i) @@ -65,9 +64,9 @@ void ucall(uint64_t cmd, int nargs, ...) (vm_vaddr_t)&uc, 0, 0, 0, 0, 0); } -uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) +uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) { - struct kvm_run *run = vcpu_state(vm, vcpu_id); + struct kvm_run *run = vcpu->run; struct ucall ucall = {}; if (uc) @@ -77,16 +76,17 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) run->riscv_sbi.extension_id == KVM_RISCV_SELFTESTS_SBI_EXT) { switch (run->riscv_sbi.function_id) { case KVM_RISCV_SELFTESTS_SBI_UCALL: - memcpy(&ucall, addr_gva2hva(vm, - run->riscv_sbi.args[0]), sizeof(ucall)); + memcpy(&ucall, + addr_gva2hva(vcpu->vm, run->riscv_sbi.args[0]), + sizeof(ucall)); - vcpu_run_complete_io(vm, vcpu_id); + vcpu_run_complete_io(vcpu); if (uc) memcpy(uc, &ucall, sizeof(ucall)); break; case KVM_RISCV_SELFTESTS_SBI_UNEXP: - vcpu_dump(stderr, vm, vcpu_id, 2); + vcpu_dump(stderr, vcpu, 2); TEST_ASSERT(0, "Unexpected trap taken by guest"); break; default: diff --git a/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c b/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c index 86b9e611ad87..cdb7daeed5fd 100644 --- a/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c +++ b/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c @@ -8,8 +8,6 @@ #include "test_util.h" #include "kvm_util.h" -#define VCPU_ID 6 - #define ICPT_INSTRUCTION 0x04 #define IPA0_DIAG 0x8300 @@ -27,14 +25,15 @@ static void guest_code(void) */ static uint64_t diag318_handler(void) { + struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct kvm_run *run; uint64_t reg; uint64_t diag318_info; - vm = vm_create_default(VCPU_ID, 0, guest_code); - vcpu_run(vm, VCPU_ID); - run = vcpu_state(vm, VCPU_ID); + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + vcpu_run(vcpu); + run = vcpu->run; TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC, "DIAGNOSE 0x0318 instruction was not intercepted"); @@ -62,7 +61,7 @@ uint64_t get_diag318_info(void) * If KVM does not support diag318, then return 0 to * ensure tests do not break. */ - if (!kvm_check_cap(KVM_CAP_S390_DIAG318)) { + if (!kvm_has_cap(KVM_CAP_S390_DIAG318)) { if (!printed_skip) { fprintf(stdout, "KVM_CAP_S390_DIAG318 not supported. " "Skipping diag318 test.\n"); diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c index f87c7137598e..89d7340d9cbd 100644 --- a/tools/testing/selftests/kvm/lib/s390x/processor.c +++ b/tools/testing/selftests/kvm/lib/s390x/processor.c @@ -7,11 +7,10 @@ #include "processor.h" #include "kvm_util.h" -#include "../kvm_util_internal.h" #define PAGES_PER_REGION 4 -void virt_pgd_alloc(struct kvm_vm *vm) +void virt_arch_pgd_alloc(struct kvm_vm *vm) { vm_paddr_t paddr; @@ -47,7 +46,7 @@ static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri) | ((ri < 4 ? (PAGES_PER_REGION - 1) : 0) & REGION_ENTRY_LENGTH); } -void virt_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa) +void virt_arch_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa) { int ri, idx; uint64_t *entry; @@ -86,7 +85,7 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa) entry[idx] = gpa; } -vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) { int ri, idx; uint64_t *entry; @@ -147,7 +146,7 @@ static void virt_dump_region(FILE *stream, struct kvm_vm *vm, uint8_t indent, } } -void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) { if (!vm->pgd_created) return; @@ -155,12 +154,14 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) virt_dump_region(stream, vm, indent, vm->pgd); } -void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) +struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, + void *guest_code) { size_t stack_size = DEFAULT_STACK_PGS * getpagesize(); uint64_t stack_vaddr; struct kvm_regs regs; struct kvm_sregs sregs; + struct kvm_vcpu *vcpu; struct kvm_run *run; TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x", @@ -169,24 +170,26 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) stack_vaddr = vm_vaddr_alloc(vm, stack_size, DEFAULT_GUEST_STACK_VADDR_MIN); - vm_vcpu_add(vm, vcpuid); + vcpu = __vm_vcpu_add(vm, vcpu_id); /* Setup guest registers */ - vcpu_regs_get(vm, vcpuid, ®s); + vcpu_regs_get(vcpu, ®s); regs.gprs[15] = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize()) - 160; - vcpu_regs_set(vm, vcpuid, ®s); + vcpu_regs_set(vcpu, ®s); - vcpu_sregs_get(vm, vcpuid, &sregs); + vcpu_sregs_get(vcpu, &sregs); sregs.crs[0] |= 0x00040000; /* Enable floating point regs */ sregs.crs[1] = vm->pgd | 0xf; /* Primary region table */ - vcpu_sregs_set(vm, vcpuid, &sregs); + vcpu_sregs_set(vcpu, &sregs); - run = vcpu_state(vm, vcpuid); + run = vcpu->run; run->psw_mask = 0x0400000180000000ULL; /* DAT enabled + 64 bit mode */ run->psw_addr = (uintptr_t)guest_code; + + return vcpu; } -void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) +void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) { va_list ap; struct kvm_regs regs; @@ -197,26 +200,21 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) num); va_start(ap, num); - vcpu_regs_get(vm, vcpuid, ®s); + vcpu_regs_get(vcpu, ®s); for (i = 0; i < num; i++) regs.gprs[i + 2] = va_arg(ap, uint64_t); - vcpu_regs_set(vm, vcpuid, ®s); + vcpu_regs_set(vcpu, ®s); va_end(ap); } -void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent) +void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) { - struct vcpu *vcpu = vcpu_find(vm, vcpuid); - - if (!vcpu) - return; - fprintf(stream, "%*spstate: psw: 0x%.16llx:0x%.16llx\n", - indent, "", vcpu->state->psw_mask, vcpu->state->psw_addr); + indent, "", vcpu->run->psw_mask, vcpu->run->psw_addr); } -void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid) +void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) { } diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390x/ucall.c index 9d3b0f15249a..73dc4e21190f 100644 --- a/tools/testing/selftests/kvm/lib/s390x/ucall.c +++ b/tools/testing/selftests/kvm/lib/s390x/ucall.c @@ -22,7 +22,7 @@ void ucall(uint64_t cmd, int nargs, ...) va_list va; int i; - nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS; + nargs = min(nargs, UCALL_MAX_ARGS); va_start(va, nargs); for (i = 0; i < nargs; ++i) @@ -33,9 +33,9 @@ void ucall(uint64_t cmd, int nargs, ...) asm volatile ("diag 0,%0,0x501" : : "a"(&uc) : "memory"); } -uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) +uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) { - struct kvm_run *run = vcpu_state(vm, vcpu_id); + struct kvm_run *run = vcpu->run; struct ucall ucall = {}; if (uc) @@ -47,10 +47,10 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) (run->s390_sieic.ipb >> 16) == 0x501) { int reg = run->s390_sieic.ipa & 0xf; - memcpy(&ucall, addr_gva2hva(vm, run->s.regs.gprs[reg]), + memcpy(&ucall, addr_gva2hva(vcpu->vm, run->s.regs.gprs[reg]), sizeof(ucall)); - vcpu_run_complete_io(vm, vcpu_id); + vcpu_run_complete_io(vcpu); if (uc) memcpy(uc, &ucall, sizeof(ucall)); } diff --git a/tools/testing/selftests/kvm/lib/x86_64/perf_test_util.c b/tools/testing/selftests/kvm/lib/x86_64/perf_test_util.c index e258524435a0..0f344a7c89c4 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/perf_test_util.c +++ b/tools/testing/selftests/kvm/lib/x86_64/perf_test_util.c @@ -12,7 +12,6 @@ #include "test_util.h" #include "kvm_util.h" #include "perf_test_util.h" -#include "../kvm_util_internal.h" #include "processor.h" #include "vmx.h" @@ -78,14 +77,14 @@ void perf_test_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm) nested_identity_map_1g(vmx, vm, start, end - start); } -void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus) +void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[]) { struct vmx_pages *vmx, *vmx0 = NULL; struct kvm_regs regs; vm_vaddr_t vmx_gva; int vcpu_id; - nested_vmx_check_supported(); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { vmx = vcpu_alloc_vmx(vm, &vmx_gva); @@ -104,9 +103,9 @@ void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus) * Override the vCPU to run perf_test_l1_guest_code() which will * bounce it into L2 before calling perf_test_guest_code(). */ - vcpu_regs_get(vm, vcpu_id, ®s); + vcpu_regs_get(vcpus[vcpu_id], ®s); regs.rip = (unsigned long) perf_test_l1_guest_code; - vcpu_regs_set(vm, vcpu_id, ®s); - vcpu_args_set(vm, vcpu_id, 2, vmx_gva, vcpu_id); + vcpu_regs_set(vcpus[vcpu_id], ®s); + vcpu_args_set(vcpus[vcpu_id], 2, vmx_gva, vcpu_id); } } diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index ead7011ee8f6..2e6e61bbe81b 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -7,7 +7,6 @@ #include "test_util.h" #include "kvm_util.h" -#include "../kvm_util_internal.h" #include "processor.h" #ifndef NUM_INTERRUPTS @@ -17,10 +16,11 @@ #define DEFAULT_CODE_SELECTOR 0x8 #define DEFAULT_DATA_SELECTOR 0x10 +#define MAX_NR_CPUID_ENTRIES 100 + vm_vaddr_t exception_handlers; -void regs_dump(FILE *stream, struct kvm_regs *regs, - uint8_t indent) +static void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent) { fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx " "rcx: 0x%.16llx rdx: 0x%.16llx\n", @@ -43,21 +43,6 @@ void regs_dump(FILE *stream, struct kvm_regs *regs, regs->rip, regs->rflags); } -/* - * Segment Dump - * - * Input Args: - * stream - Output FILE stream - * segment - KVM segment - * indent - Left margin indent amount - * - * Output Args: None - * - * Return: None - * - * Dumps the state of the KVM segment given by @segment, to the FILE stream - * given by @stream. - */ static void segment_dump(FILE *stream, struct kvm_segment *segment, uint8_t indent) { @@ -75,21 +60,6 @@ static void segment_dump(FILE *stream, struct kvm_segment *segment, segment->unusable, segment->padding); } -/* - * dtable Dump - * - * Input Args: - * stream - Output FILE stream - * dtable - KVM dtable - * indent - Left margin indent amount - * - * Output Args: None - * - * Return: None - * - * Dumps the state of the KVM dtable given by @dtable, to the FILE stream - * given by @stream. - */ static void dtable_dump(FILE *stream, struct kvm_dtable *dtable, uint8_t indent) { @@ -99,8 +69,7 @@ static void dtable_dump(FILE *stream, struct kvm_dtable *dtable, dtable->padding[0], dtable->padding[1], dtable->padding[2]); } -void sregs_dump(FILE *stream, struct kvm_sregs *sregs, - uint8_t indent) +static void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent) { unsigned int i; @@ -142,7 +111,7 @@ void sregs_dump(FILE *stream, struct kvm_sregs *sregs, } } -void virt_pgd_alloc(struct kvm_vm *vm) +void virt_arch_pgd_alloc(struct kvm_vm *vm) { TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use " "unknown or unsupported guest mode, mode: 0x%x", vm->mode); @@ -240,27 +209,24 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level) *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK); } -void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) +void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) { __virt_pg_map(vm, vaddr, paddr, PG_LEVEL_4K); } -static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, - uint64_t vaddr) +static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, + struct kvm_vcpu *vcpu, + uint64_t vaddr) { uint16_t index[4]; uint64_t *pml4e, *pdpe, *pde; uint64_t *pte; - struct kvm_cpuid_entry2 *entry; struct kvm_sregs sregs; - int max_phy_addr; uint64_t rsvd_mask = 0; - entry = kvm_get_supported_cpuid_index(0x80000008, 0); - max_phy_addr = entry->eax & 0x000000ff; /* Set the high bits in the reserved mask. */ - if (max_phy_addr < 52) - rsvd_mask = GENMASK_ULL(51, max_phy_addr); + if (vm->pa_bits < 52) + rsvd_mask = GENMASK_ULL(51, vm->pa_bits); /* * SDM vol 3, fig 4-11 "Formats of CR3 and Paging-Structure Entries @@ -268,7 +234,7 @@ static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, * If IA32_EFER.NXE = 0 and the P flag of a paging-structure entry is 1, * the XD flag (bit 63) is reserved. */ - vcpu_sregs_get(vm, vcpuid, &sregs); + vcpu_sregs_get(vcpu, &sregs); if ((sregs.efer & EFER_NX) == 0) { rsvd_mask |= PTE_NX_MASK; } @@ -320,22 +286,23 @@ static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, return &pte[index[0]]; } -uint64_t vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr) +uint64_t vm_get_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu, + uint64_t vaddr) { - uint64_t *pte = _vm_get_page_table_entry(vm, vcpuid, vaddr); + uint64_t *pte = _vm_get_page_table_entry(vm, vcpu, vaddr); return *(uint64_t *)pte; } -void vm_set_page_table_entry(struct kvm_vm *vm, int vcpuid, uint64_t vaddr, - uint64_t pte) +void vm_set_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu, + uint64_t vaddr, uint64_t pte) { - uint64_t *new_pte = _vm_get_page_table_entry(vm, vcpuid, vaddr); + uint64_t *new_pte = _vm_get_page_table_entry(vm, vcpu, vaddr); *(uint64_t *)new_pte = pte; } -void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) { uint64_t *pml4e, *pml4e_start; uint64_t *pdpe, *pdpe_start; @@ -516,7 +483,7 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector, kvm_seg_fill_gdt_64bit(vm, segp); } -vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) { uint16_t index[4]; uint64_t *pml4e, *pdpe, *pde; @@ -579,12 +546,12 @@ static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp, kvm_seg_fill_gdt_64bit(vm, segp); } -static void vcpu_setup(struct kvm_vm *vm, int vcpuid) +static void vcpu_setup(struct kvm_vm *vm, struct kvm_vcpu *vcpu) { struct kvm_sregs sregs; /* Set mode specific system register values. */ - vcpu_sregs_get(vm, vcpuid, &sregs); + vcpu_sregs_get(vcpu, &sregs); sregs.idt.limit = 0; @@ -608,25 +575,10 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid) } sregs.cr3 = vm->pgd; - vcpu_sregs_set(vm, vcpuid, &sregs); + vcpu_sregs_set(vcpu, &sregs); } -#define CPUID_XFD_BIT (1 << 4) -static bool is_xfd_supported(void) -{ - int eax, ebx, ecx, edx; - const int leaf = 0xd, subleaf = 0x1; - - __asm__ __volatile__( - "cpuid" - : /* output */ "=a"(eax), "=b"(ebx), - "=c"(ecx), "=d"(edx) - : /* input */ "0"(leaf), "2"(subleaf)); - - return !!(eax & CPUID_XFD_BIT); -} - -void vm_xsave_req_perm(int bit) +void __vm_xsave_require_permission(int bit, const char *name) { int kvm_fd; u64 bitmask; @@ -637,26 +589,21 @@ void vm_xsave_req_perm(int bit) .addr = (unsigned long) &bitmask }; + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XFD)); + kvm_fd = open_kvm_dev_path_or_exit(); - rc = ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr); + rc = __kvm_ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr); close(kvm_fd); + if (rc == -1 && (errno == ENXIO || errno == EINVAL)) - exit(KSFT_SKIP); - TEST_ASSERT(rc == 0, "KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) error: %ld", rc); - if (!(bitmask & (1ULL << bit))) - exit(KSFT_SKIP); + __TEST_REQUIRE(0, "KVM_X86_XCOMP_GUEST_SUPP not supported"); - if (!is_xfd_supported()) - exit(KSFT_SKIP); + TEST_ASSERT(rc == 0, "KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) error: %ld", rc); - rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit); + __TEST_REQUIRE(bitmask & (1ULL << bit), + "Required XSAVE feature '%s' not supported", name); - /* - * The older kernel version(<5.15) can't support - * ARCH_REQ_XCOMP_GUEST_PERM and directly return. - */ - if (rc) - return; + TEST_REQUIRE(!syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit)); rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask); TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc); @@ -665,108 +612,89 @@ void vm_xsave_req_perm(int bit) bitmask); } -void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) +struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, + void *guest_code) { struct kvm_mp_state mp_state; struct kvm_regs regs; vm_vaddr_t stack_vaddr; + struct kvm_vcpu *vcpu; + stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(), DEFAULT_GUEST_STACK_VADDR_MIN); - /* Create VCPU */ - vm_vcpu_add(vm, vcpuid); - vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid()); - vcpu_setup(vm, vcpuid); + vcpu = __vm_vcpu_add(vm, vcpu_id); + vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid()); + vcpu_setup(vm, vcpu); /* Setup guest general purpose registers */ - vcpu_regs_get(vm, vcpuid, ®s); + vcpu_regs_get(vcpu, ®s); regs.rflags = regs.rflags | 0x2; regs.rsp = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize()); regs.rip = (unsigned long) guest_code; - vcpu_regs_set(vm, vcpuid, ®s); + vcpu_regs_set(vcpu, ®s); /* Setup the MP state */ mp_state.mp_state = 0; - vcpu_set_mp_state(vm, vcpuid, &mp_state); + vcpu_mp_state_set(vcpu, &mp_state); + + return vcpu; } -/* - * Allocate an instance of struct kvm_cpuid2 - * - * Input Args: None - * - * Output Args: None - * - * Return: A pointer to the allocated struct. The caller is responsible - * for freeing this struct. - * - * Since kvm_cpuid2 uses a 0-length array to allow a the size of the - * array to be decided at allocation time, allocation is slightly - * complicated. This function uses a reasonable default length for - * the array and performs the appropriate allocation. - */ -static struct kvm_cpuid2 *allocate_kvm_cpuid2(void) +struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, uint32_t vcpu_id) { - struct kvm_cpuid2 *cpuid; - int nent = 100; - size_t size; - - size = sizeof(*cpuid); - size += nent * sizeof(struct kvm_cpuid_entry2); - cpuid = malloc(size); - if (!cpuid) { - perror("malloc"); - abort(); - } + struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id); - cpuid->nent = nent; + vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid()); - return cpuid; + return vcpu; } -/* - * KVM Supported CPUID Get - * - * Input Args: None - * - * Output Args: - * - * Return: The supported KVM CPUID - * - * Get the guest CPUID supported by KVM. - */ -struct kvm_cpuid2 *kvm_get_supported_cpuid(void) +void vcpu_arch_free(struct kvm_vcpu *vcpu) +{ + if (vcpu->cpuid) + free(vcpu->cpuid); +} + +const struct kvm_cpuid2 *kvm_get_supported_cpuid(void) { static struct kvm_cpuid2 *cpuid; - int ret; int kvm_fd; if (cpuid) return cpuid; - cpuid = allocate_kvm_cpuid2(); + cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES); kvm_fd = open_kvm_dev_path_or_exit(); - ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid); - TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n", - ret, errno); + kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid); close(kvm_fd); return cpuid; } -/* - * KVM Get MSR - * - * Input Args: - * msr_index - Index of MSR - * - * Output Args: None - * - * Return: On success, value of the MSR. On failure a TEST_ASSERT is produced. - * - * Get value of MSR for VCPU. - */ +bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid, + struct kvm_x86_cpu_feature feature) +{ + const struct kvm_cpuid_entry2 *entry; + int i; + + for (i = 0; i < cpuid->nent; i++) { + entry = &cpuid->entries[i]; + + /* + * The output registers in kvm_cpuid_entry2 are in alphabetical + * order, but kvm_x86_cpu_feature matches that mess, so yay + * pointer shenanigans! + */ + if (entry->function == feature.function && + entry->index == feature.index) + return (&entry->eax)[feature.reg] & BIT(feature.bit); + } + + return false; +} + uint64_t kvm_get_feature_msr(uint64_t msr_index) { struct { @@ -779,218 +707,98 @@ uint64_t kvm_get_feature_msr(uint64_t msr_index) buffer.entry.index = msr_index; kvm_fd = open_kvm_dev_path_or_exit(); - r = ioctl(kvm_fd, KVM_GET_MSRS, &buffer.header); - TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n" - " rc: %i errno: %i", r, errno); + r = __kvm_ioctl(kvm_fd, KVM_GET_MSRS, &buffer.header); + TEST_ASSERT(r == 1, KVM_IOCTL_ERROR(KVM_GET_MSRS, r)); close(kvm_fd); return buffer.entry.data; } -/* - * VM VCPU CPUID Set - * - * Input Args: - * vm - Virtual Machine - * vcpuid - VCPU id - * - * Output Args: None - * - * Return: KVM CPUID (KVM_GET_CPUID2) - * - * Set the VCPU's CPUID. - */ -struct kvm_cpuid2 *vcpu_get_cpuid(struct kvm_vm *vm, uint32_t vcpuid) +void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid) { - struct vcpu *vcpu = vcpu_find(vm, vcpuid); - struct kvm_cpuid2 *cpuid; - int max_ent; - int rc = -1; - - TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); - - cpuid = allocate_kvm_cpuid2(); - max_ent = cpuid->nent; - - for (cpuid->nent = 1; cpuid->nent <= max_ent; cpuid->nent++) { - rc = ioctl(vcpu->fd, KVM_GET_CPUID2, cpuid); - if (!rc) - break; + TEST_ASSERT(cpuid != vcpu->cpuid, "@cpuid can't be the vCPU's CPUID"); - TEST_ASSERT(rc == -1 && errno == E2BIG, - "KVM_GET_CPUID2 should either succeed or give E2BIG: %d %d", - rc, errno); + /* Allow overriding the default CPUID. */ + if (vcpu->cpuid && vcpu->cpuid->nent < cpuid->nent) { + free(vcpu->cpuid); + vcpu->cpuid = NULL; } - TEST_ASSERT(rc == 0, "KVM_GET_CPUID2 failed, rc: %i errno: %i", - rc, errno); + if (!vcpu->cpuid) + vcpu->cpuid = allocate_kvm_cpuid2(cpuid->nent); - return cpuid; + memcpy(vcpu->cpuid, cpuid, kvm_cpuid2_size(cpuid->nent)); + vcpu_set_cpuid(vcpu); } - - -/* - * Locate a cpuid entry. - * - * Input Args: - * function: The function of the cpuid entry to find. - * index: The index of the cpuid entry. - * - * Output Args: None - * - * Return: A pointer to the cpuid entry. Never returns NULL. - */ -struct kvm_cpuid_entry2 * -kvm_get_supported_cpuid_index(uint32_t function, uint32_t index) +void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, uint8_t maxphyaddr) { - struct kvm_cpuid2 *cpuid; - struct kvm_cpuid_entry2 *entry = NULL; - int i; - - cpuid = kvm_get_supported_cpuid(); - for (i = 0; i < cpuid->nent; i++) { - if (cpuid->entries[i].function == function && - cpuid->entries[i].index == index) { - entry = &cpuid->entries[i]; - break; - } - } + struct kvm_cpuid_entry2 *entry = vcpu_get_cpuid_entry(vcpu, 0x80000008); - TEST_ASSERT(entry, "Guest CPUID entry not found: (EAX=%x, ECX=%x).", - function, index); - return entry; + entry->eax = (entry->eax & ~0xff) | maxphyaddr; + vcpu_set_cpuid(vcpu); } - -int __vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid, - struct kvm_cpuid2 *cpuid) +void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function) { - struct vcpu *vcpu = vcpu_find(vm, vcpuid); - - TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + struct kvm_cpuid_entry2 *entry = vcpu_get_cpuid_entry(vcpu, function); - return ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid); + entry->eax = 0; + entry->ebx = 0; + entry->ecx = 0; + entry->edx = 0; + vcpu_set_cpuid(vcpu); } -/* - * VM VCPU CPUID Set - * - * Input Args: - * vm - Virtual Machine - * vcpuid - VCPU id - * cpuid - The CPUID values to set. - * - * Output Args: None - * - * Return: void - * - * Set the VCPU's CPUID. - */ -void vcpu_set_cpuid(struct kvm_vm *vm, - uint32_t vcpuid, struct kvm_cpuid2 *cpuid) +void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu, + struct kvm_x86_cpu_feature feature, + bool set) { - int rc; + struct kvm_cpuid_entry2 *entry; + u32 *reg; - rc = __vcpu_set_cpuid(vm, vcpuid, cpuid); - TEST_ASSERT(rc == 0, "KVM_SET_CPUID2 failed, rc: %i errno: %i", - rc, errno); + entry = __vcpu_get_cpuid_entry(vcpu, feature.function, feature.index); + reg = (&entry->eax) + feature.reg; + if (set) + *reg |= BIT(feature.bit); + else + *reg &= ~BIT(feature.bit); + + vcpu_set_cpuid(vcpu); } -/* - * VCPU Get MSR - * - * Input Args: - * vm - Virtual Machine - * vcpuid - VCPU ID - * msr_index - Index of MSR - * - * Output Args: None - * - * Return: On success, value of the MSR. On failure a TEST_ASSERT is produced. - * - * Get value of MSR for VCPU. - */ -uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index) +uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index) { - struct vcpu *vcpu = vcpu_find(vm, vcpuid); struct { struct kvm_msrs header; struct kvm_msr_entry entry; } buffer = {}; - int r; - TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); buffer.header.nmsrs = 1; buffer.entry.index = msr_index; - r = ioctl(vcpu->fd, KVM_GET_MSRS, &buffer.header); - TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n" - " rc: %i errno: %i", r, errno); + + vcpu_msrs_get(vcpu, &buffer.header); return buffer.entry.data; } -/* - * _VCPU Set MSR - * - * Input Args: - * vm - Virtual Machine - * vcpuid - VCPU ID - * msr_index - Index of MSR - * msr_value - New value of MSR - * - * Output Args: None - * - * Return: The result of KVM_SET_MSRS. - * - * Sets the value of an MSR for the given VCPU. - */ -int _vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, - uint64_t msr_value) +int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value) { - struct vcpu *vcpu = vcpu_find(vm, vcpuid); struct { struct kvm_msrs header; struct kvm_msr_entry entry; } buffer = {}; - int r; - TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); memset(&buffer, 0, sizeof(buffer)); buffer.header.nmsrs = 1; buffer.entry.index = msr_index; buffer.entry.data = msr_value; - r = ioctl(vcpu->fd, KVM_SET_MSRS, &buffer.header); - return r; -} - -/* - * VCPU Set MSR - * - * Input Args: - * vm - Virtual Machine - * vcpuid - VCPU ID - * msr_index - Index of MSR - * msr_value - New value of MSR - * - * Output Args: None - * - * Return: On success, nothing. On failure a TEST_ASSERT is produced. - * - * Set value of MSR for VCPU. - */ -void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index, - uint64_t msr_value) -{ - int r; - r = _vcpu_set_msr(vm, vcpuid, msr_index, msr_value); - TEST_ASSERT(r == 1, "KVM_SET_MSRS IOCTL failed,\n" - " rc: %i errno: %i", r, errno); + return __vcpu_ioctl(vcpu, KVM_SET_MSRS, &buffer.header); } -void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) +void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...) { va_list ap; struct kvm_regs regs; @@ -1000,7 +808,7 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) num); va_start(ap, num); - vcpu_regs_get(vm, vcpuid, ®s); + vcpu_regs_get(vcpu, ®s); if (num >= 1) regs.rdi = va_arg(ap, uint64_t); @@ -1020,85 +828,112 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) if (num >= 6) regs.r9 = va_arg(ap, uint64_t); - vcpu_regs_set(vm, vcpuid, ®s); + vcpu_regs_set(vcpu, ®s); va_end(ap); } -void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent) +void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent) { struct kvm_regs regs; struct kvm_sregs sregs; - fprintf(stream, "%*scpuid: %u\n", indent, "", vcpuid); + fprintf(stream, "%*svCPU ID: %u\n", indent, "", vcpu->id); fprintf(stream, "%*sregs:\n", indent + 2, ""); - vcpu_regs_get(vm, vcpuid, ®s); + vcpu_regs_get(vcpu, ®s); regs_dump(stream, ®s, indent + 4); fprintf(stream, "%*ssregs:\n", indent + 2, ""); - vcpu_sregs_get(vm, vcpuid, &sregs); + vcpu_sregs_get(vcpu, &sregs); sregs_dump(stream, &sregs, indent + 4); } -static int kvm_get_num_msrs_fd(int kvm_fd) +static struct kvm_msr_list *__kvm_get_msr_index_list(bool feature_msrs) { + struct kvm_msr_list *list; struct kvm_msr_list nmsrs; - int r; + int kvm_fd, r; + + kvm_fd = open_kvm_dev_path_or_exit(); nmsrs.nmsrs = 0; - r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs); - TEST_ASSERT(r == -1 && errno == E2BIG, "Unexpected result from KVM_GET_MSR_INDEX_LIST probe, r: %i", - r); + if (!feature_msrs) + r = __kvm_ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs); + else + r = __kvm_ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, &nmsrs); - return nmsrs.nmsrs; -} + TEST_ASSERT(r == -1 && errno == E2BIG, + "Expected -E2BIG, got rc: %i errno: %i (%s)", + r, errno, strerror(errno)); -static int kvm_get_num_msrs(struct kvm_vm *vm) -{ - return kvm_get_num_msrs_fd(vm->kvm_fd); + list = malloc(sizeof(*list) + nmsrs.nmsrs * sizeof(list->indices[0])); + TEST_ASSERT(list, "-ENOMEM when allocating MSR index list"); + list->nmsrs = nmsrs.nmsrs; + + if (!feature_msrs) + kvm_ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list); + else + kvm_ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, list); + close(kvm_fd); + + TEST_ASSERT(list->nmsrs == nmsrs.nmsrs, + "Number of MSRs in list changed, was %d, now %d", + nmsrs.nmsrs, list->nmsrs); + return list; } -struct kvm_msr_list *kvm_get_msr_index_list(void) +const struct kvm_msr_list *kvm_get_msr_index_list(void) { - struct kvm_msr_list *list; - int nmsrs, r, kvm_fd; + static const struct kvm_msr_list *list; - kvm_fd = open_kvm_dev_path_or_exit(); + if (!list) + list = __kvm_get_msr_index_list(false); + return list; +} - nmsrs = kvm_get_num_msrs_fd(kvm_fd); - list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0])); - list->nmsrs = nmsrs; - r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list); - close(kvm_fd); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i", - r); +const struct kvm_msr_list *kvm_get_feature_msr_index_list(void) +{ + static const struct kvm_msr_list *list; + if (!list) + list = __kvm_get_msr_index_list(true); return list; } -static int vcpu_save_xsave_state(struct kvm_vm *vm, struct vcpu *vcpu, - struct kvm_x86_state *state) +bool kvm_msr_is_in_save_restore_list(uint32_t msr_index) { - int size; + const struct kvm_msr_list *list = kvm_get_msr_index_list(); + int i; - size = vm_check_cap(vm, KVM_CAP_XSAVE2); - if (!size) - size = sizeof(struct kvm_xsave); + for (i = 0; i < list->nmsrs; ++i) { + if (list->indices[i] == msr_index) + return true; + } - state->xsave = malloc(size); - if (size == sizeof(struct kvm_xsave)) - return ioctl(vcpu->fd, KVM_GET_XSAVE, state->xsave); - else - return ioctl(vcpu->fd, KVM_GET_XSAVE2, state->xsave); + return false; } -struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid) +static void vcpu_save_xsave_state(struct kvm_vcpu *vcpu, + struct kvm_x86_state *state) { - struct vcpu *vcpu = vcpu_find(vm, vcpuid); - struct kvm_msr_list *list; + int size = vm_check_cap(vcpu->vm, KVM_CAP_XSAVE2); + + if (size) { + state->xsave = malloc(size); + vcpu_xsave2_get(vcpu, state->xsave); + } else { + state->xsave = malloc(sizeof(struct kvm_xsave)); + vcpu_xsave_get(vcpu, state->xsave); + } +} + +struct kvm_x86_state *vcpu_save_state(struct kvm_vcpu *vcpu) +{ + const struct kvm_msr_list *msr_list = kvm_get_msr_index_list(); struct kvm_x86_state *state; - int nmsrs, r, i; + int i; + static int nested_size = -1; if (nested_size == -1) { @@ -1114,113 +949,57 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid) * kernel with KVM_RUN. Complete IO prior to migrating state * to a new VM. */ - vcpu_run_complete_io(vm, vcpuid); - - nmsrs = kvm_get_num_msrs(vm); - list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0])); - list->nmsrs = nmsrs; - r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, list); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i", - r); - - state = malloc(sizeof(*state) + nmsrs * sizeof(state->msrs.entries[0])); - r = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, &state->events); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_VCPU_EVENTS, r: %i", - r); - - r = ioctl(vcpu->fd, KVM_GET_MP_STATE, &state->mp_state); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MP_STATE, r: %i", - r); - - r = ioctl(vcpu->fd, KVM_GET_REGS, &state->regs); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_REGS, r: %i", - r); - - r = vcpu_save_xsave_state(vm, vcpu, state); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i", - r); - - if (kvm_check_cap(KVM_CAP_XCRS)) { - r = ioctl(vcpu->fd, KVM_GET_XCRS, &state->xcrs); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XCRS, r: %i", - r); - } + vcpu_run_complete_io(vcpu); + + state = malloc(sizeof(*state) + msr_list->nmsrs * sizeof(state->msrs.entries[0])); + + vcpu_events_get(vcpu, &state->events); + vcpu_mp_state_get(vcpu, &state->mp_state); + vcpu_regs_get(vcpu, &state->regs); + vcpu_save_xsave_state(vcpu, state); - r = ioctl(vcpu->fd, KVM_GET_SREGS, &state->sregs); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_SREGS, r: %i", - r); + if (kvm_has_cap(KVM_CAP_XCRS)) + vcpu_xcrs_get(vcpu, &state->xcrs); + + vcpu_sregs_get(vcpu, &state->sregs); if (nested_size) { state->nested.size = sizeof(state->nested_); - r = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, &state->nested); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_NESTED_STATE, r: %i", - r); + + vcpu_nested_state_get(vcpu, &state->nested); TEST_ASSERT(state->nested.size <= nested_size, "Nested state size too big, %i (KVM_CHECK_CAP gave %i)", state->nested.size, nested_size); - } else + } else { state->nested.size = 0; + } - state->msrs.nmsrs = nmsrs; - for (i = 0; i < nmsrs; i++) - state->msrs.entries[i].index = list->indices[i]; - r = ioctl(vcpu->fd, KVM_GET_MSRS, &state->msrs); - TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed MSR was 0x%x)", - r, r == nmsrs ? -1 : list->indices[r]); + state->msrs.nmsrs = msr_list->nmsrs; + for (i = 0; i < msr_list->nmsrs; i++) + state->msrs.entries[i].index = msr_list->indices[i]; + vcpu_msrs_get(vcpu, &state->msrs); - r = ioctl(vcpu->fd, KVM_GET_DEBUGREGS, &state->debugregs); - TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_DEBUGREGS, r: %i", - r); + vcpu_debugregs_get(vcpu, &state->debugregs); - free(list); return state; } -void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state) +void vcpu_load_state(struct kvm_vcpu *vcpu, struct kvm_x86_state *state) { - struct vcpu *vcpu = vcpu_find(vm, vcpuid); - int r; - - r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs); - TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i", - r); - - r = ioctl(vcpu->fd, KVM_SET_MSRS, &state->msrs); - TEST_ASSERT(r == state->msrs.nmsrs, - "Unexpected result from KVM_SET_MSRS, r: %i (failed at %x)", - r, r == state->msrs.nmsrs ? -1 : state->msrs.entries[r].index); - - if (kvm_check_cap(KVM_CAP_XCRS)) { - r = ioctl(vcpu->fd, KVM_SET_XCRS, &state->xcrs); - TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XCRS, r: %i", - r); - } - - r = ioctl(vcpu->fd, KVM_SET_XSAVE, state->xsave); - TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i", - r); - - r = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, &state->events); - TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_VCPU_EVENTS, r: %i", - r); + vcpu_sregs_set(vcpu, &state->sregs); + vcpu_msrs_set(vcpu, &state->msrs); - r = ioctl(vcpu->fd, KVM_SET_MP_STATE, &state->mp_state); - TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_MP_STATE, r: %i", - r); + if (kvm_has_cap(KVM_CAP_XCRS)) + vcpu_xcrs_set(vcpu, &state->xcrs); - r = ioctl(vcpu->fd, KVM_SET_DEBUGREGS, &state->debugregs); - TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_DEBUGREGS, r: %i", - r); + vcpu_xsave_set(vcpu, state->xsave); + vcpu_events_set(vcpu, &state->events); + vcpu_mp_state_set(vcpu, &state->mp_state); + vcpu_debugregs_set(vcpu, &state->debugregs); + vcpu_regs_set(vcpu, &state->regs); - r = ioctl(vcpu->fd, KVM_SET_REGS, &state->regs); - TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i", - r); - - if (state->nested.size) { - r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested); - TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i", - r); - } + if (state->nested.size) + vcpu_nested_state_set(vcpu, &state->nested); } void kvm_x86_state_cleanup(struct kvm_x86_state *state) @@ -1232,15 +1011,9 @@ void kvm_x86_state_cleanup(struct kvm_x86_state *state) static bool cpu_vendor_string_is(const char *vendor) { const uint32_t *chunk = (const uint32_t *)vendor; - int eax, ebx, ecx, edx; - const int leaf = 0; - - __asm__ __volatile__( - "cpuid" - : /* output */ "=a"(eax), "=b"(ebx), - "=c"(ecx), "=d"(edx) - : /* input */ "0"(leaf), "2"(0)); + uint32_t eax, ebx, ecx, edx; + cpuid(0, &eax, &ebx, &ecx, &edx); return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]); } @@ -1257,19 +1030,9 @@ bool is_amd_cpu(void) return cpu_vendor_string_is("AuthenticAMD"); } -uint32_t kvm_get_cpuid_max_basic(void) -{ - return kvm_get_supported_cpuid_entry(0)->eax; -} - -uint32_t kvm_get_cpuid_max_extended(void) -{ - return kvm_get_supported_cpuid_entry(0x80000000)->eax; -} - void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits) { - struct kvm_cpuid_entry2 *entry; + const struct kvm_cpuid_entry2 *entry; bool pae; /* SDM 4.1.4 */ @@ -1315,6 +1078,20 @@ static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr, e->offset2 = addr >> 32; } + +static bool kvm_fixup_exception(struct ex_regs *regs) +{ + if (regs->r9 != KVM_EXCEPTION_MAGIC || regs->rip != regs->r10) + return false; + + if (regs->vector == DE_VECTOR) + return false; + + regs->rip = regs->r11; + regs->r9 = regs->vector; + return true; +} + void kvm_exit_unexpected_vector(uint32_t value) { ucall(UCALL_UNHANDLED, 1, value); @@ -1330,6 +1107,9 @@ void route_exception(struct ex_regs *regs) return; } + if (kvm_fixup_exception(regs)) + return; + kvm_exit_unexpected_vector(regs->vector); } @@ -1346,17 +1126,18 @@ void vm_init_descriptor_tables(struct kvm_vm *vm) DEFAULT_CODE_SELECTOR); } -void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid) +void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu) { + struct kvm_vm *vm = vcpu->vm; struct kvm_sregs sregs; - vcpu_sregs_get(vm, vcpuid, &sregs); + vcpu_sregs_get(vcpu, &sregs); sregs.idt.base = vm->idt; sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1; sregs.gdt.base = vm->gdt; sregs.gdt.limit = getpagesize() - 1; kvm_seg_set_kernel_data_64bit(NULL, DEFAULT_DATA_SELECTOR, &sregs.gs); - vcpu_sregs_set(vm, vcpuid, &sregs); + vcpu_sregs_set(vcpu, &sregs); *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; } @@ -1368,11 +1149,11 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector, handlers[vector] = (vm_vaddr_t)handler; } -void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid) +void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) { struct ucall uc; - if (get_ucall(vm, vcpuid, &uc) == UCALL_UNHANDLED) { + if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED) { uint64_t vector = uc.args[0]; TEST_FAIL("Unexpected vectored event in guest (vector:0x%lx)", @@ -1380,16 +1161,15 @@ void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid) } } -struct kvm_cpuid_entry2 *get_cpuid(struct kvm_cpuid2 *cpuid, uint32_t function, - uint32_t index) +const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid, + uint32_t function, uint32_t index) { int i; for (i = 0; i < cpuid->nent; i++) { - struct kvm_cpuid_entry2 *cur = &cpuid->entries[i]; - - if (cur->function == function && cur->index == index) - return cur; + if (cpuid->entries[i].function == function && + cpuid->entries[i].index == index) + return &cpuid->entries[i]; } TEST_FAIL("CPUID function 0x%x index 0x%x not found ", function, index); @@ -1397,24 +1177,6 @@ struct kvm_cpuid_entry2 *get_cpuid(struct kvm_cpuid2 *cpuid, uint32_t function, return NULL; } -bool set_cpuid(struct kvm_cpuid2 *cpuid, - struct kvm_cpuid_entry2 *ent) -{ - int i; - - for (i = 0; i < cpuid->nent; i++) { - struct kvm_cpuid_entry2 *cur = &cpuid->entries[i]; - - if (cur->function != ent->function || cur->index != ent->index) - continue; - - memcpy(cur, ent, sizeof(struct kvm_cpuid_entry2)); - return true; - } - - return false; -} - uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t a3) { @@ -1422,43 +1184,38 @@ uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, asm volatile("vmcall" : "=a"(r) - : "b"(a0), "c"(a1), "d"(a2), "S"(a3)); + : "a"(nr), "b"(a0), "c"(a1), "d"(a2), "S"(a3)); return r; } -struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void) +const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void) { static struct kvm_cpuid2 *cpuid; - int ret; int kvm_fd; if (cpuid) return cpuid; - cpuid = allocate_kvm_cpuid2(); + cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES); kvm_fd = open_kvm_dev_path_or_exit(); - ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, cpuid); - TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_HV_CPUID failed %d %d\n", - ret, errno); + kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, cpuid); close(kvm_fd); return cpuid; } -void vcpu_set_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid) +void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu) { static struct kvm_cpuid2 *cpuid_full; - struct kvm_cpuid2 *cpuid_sys, *cpuid_hv; + const struct kvm_cpuid2 *cpuid_sys, *cpuid_hv; int i, nent = 0; if (!cpuid_full) { cpuid_sys = kvm_get_supported_cpuid(); cpuid_hv = kvm_get_supported_hv_cpuid(); - cpuid_full = malloc(sizeof(*cpuid_full) + - (cpuid_sys->nent + cpuid_hv->nent) * - sizeof(struct kvm_cpuid_entry2)); + cpuid_full = allocate_kvm_cpuid2(cpuid_sys->nent + cpuid_hv->nent); if (!cpuid_full) { perror("malloc"); abort(); @@ -1478,16 +1235,14 @@ void vcpu_set_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid) cpuid_full->nent = nent + cpuid_hv->nent; } - vcpu_set_cpuid(vm, vcpuid, cpuid_full); + vcpu_init_cpuid(vcpu, cpuid_full); } -struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid) +const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu) { - static struct kvm_cpuid2 *cpuid; + struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES); - cpuid = allocate_kvm_cpuid2(); - - vcpu_ioctl(vm, vcpuid, KVM_GET_SUPPORTED_HV_CPUID, cpuid); + vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, cpuid); return cpuid; } @@ -1510,9 +1265,7 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm) /* Before family 17h, the HyperTransport area is just below 1T. */ ht_gfn = (1 << 28) - num_ht_pages; - eax = 1; - ecx = 0; - cpuid(&eax, &ebx, &ecx, &edx); + cpuid(1, &eax, &ebx, &ecx, &edx); if (x86_family(eax) < 0x17) goto done; @@ -1521,18 +1274,15 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm) * reduced due to SME by bits 11:6 of CPUID[0x8000001f].EBX. Use * the old conservative value if MAXPHYADDR is not enumerated. */ - eax = 0x80000000; - cpuid(&eax, &ebx, &ecx, &edx); + cpuid(0x80000000, &eax, &ebx, &ecx, &edx); max_ext_leaf = eax; if (max_ext_leaf < 0x80000008) goto done; - eax = 0x80000008; - cpuid(&eax, &ebx, &ecx, &edx); + cpuid(0x80000008, &eax, &ebx, &ecx, &edx); max_pfn = (1ULL << ((eax & 0xff) - vm->page_shift)) - 1; if (max_ext_leaf >= 0x8000001f) { - eax = 0x8000001f; - cpuid(&eax, &ebx, &ecx, &edx); + cpuid(0x8000001f, &eax, &ebx, &ecx, &edx); max_pfn >>= (ebx >> 6) & 0x3f; } @@ -1540,3 +1290,24 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm) done: return min(max_gfn, ht_gfn - 1); } + +/* Returns true if kvm_intel was loaded with unrestricted_guest=1. */ +bool vm_is_unrestricted_guest(struct kvm_vm *vm) +{ + char val = 'N'; + size_t count; + FILE *f; + + /* Ensure that a KVM vendor-specific module is loaded. */ + if (vm == NULL) + close(open_kvm_dev_path_or_exit()); + + f = fopen("/sys/module/kvm_intel/parameters/unrestricted_guest", "r"); + if (f) { + count = fread(&val, sizeof(char), 1, f); + TEST_ASSERT(count == 1, "Unable to read from param file."); + fclose(f); + } + + return val == 'Y'; +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86_64/svm.c index 736ee4a23df6..6d445886e16c 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/svm.c +++ b/tools/testing/selftests/kvm/lib/x86_64/svm.c @@ -9,7 +9,6 @@ #include "test_util.h" #include "kvm_util.h" -#include "../kvm_util_internal.h" #include "processor.h" #include "svm_util.h" @@ -165,22 +164,6 @@ void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa) : "r15", "memory"); } -bool nested_svm_supported(void) -{ - struct kvm_cpuid_entry2 *entry = - kvm_get_supported_cpuid_entry(0x80000001); - - return entry->ecx & CPUID_SVM; -} - -void nested_svm_check_supported(void) -{ - if (!nested_svm_supported()) { - print_skip("nested SVM not enabled"); - exit(KSFT_SKIP); - } -} - /* * Open SEV_DEV_PATH if available, otherwise exit the entire program. * diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c index a3489973e290..e5f0f9e0d3ee 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c +++ b/tools/testing/selftests/kvm/lib/x86_64/ucall.c @@ -24,7 +24,7 @@ void ucall(uint64_t cmd, int nargs, ...) va_list va; int i; - nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS; + nargs = min(nargs, UCALL_MAX_ARGS); va_start(va, nargs); for (i = 0; i < nargs; ++i) @@ -35,9 +35,9 @@ void ucall(uint64_t cmd, int nargs, ...) : : [port] "d" (UCALL_PIO_PORT), "D" (&uc) : "rax", "memory"); } -uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) +uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) { - struct kvm_run *run = vcpu_state(vm, vcpu_id); + struct kvm_run *run = vcpu->run; struct ucall ucall = {}; if (uc) @@ -46,11 +46,11 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) if (run->exit_reason == KVM_EXIT_IO && run->io.port == UCALL_PIO_PORT) { struct kvm_regs regs; - vcpu_regs_get(vm, vcpu_id, ®s); - memcpy(&ucall, addr_gva2hva(vm, (vm_vaddr_t)regs.rdi), + vcpu_regs_get(vcpu, ®s); + memcpy(&ucall, addr_gva2hva(vcpu->vm, (vm_vaddr_t)regs.rdi), sizeof(ucall)); - vcpu_run_complete_io(vm, vcpu_id); + vcpu_run_complete_io(vcpu); if (uc) memcpy(uc, &ucall, sizeof(ucall)); } diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c index b77a01d0a271..80a568c439b8 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c @@ -7,7 +7,6 @@ #include "test_util.h" #include "kvm_util.h" -#include "../kvm_util_internal.h" #include "processor.h" #include "vmx.h" @@ -43,16 +42,12 @@ struct eptPageTablePointer { uint64_t address:40; uint64_t reserved_63_52:12; }; -int vcpu_enable_evmcs(struct kvm_vm *vm, int vcpu_id) +int vcpu_enable_evmcs(struct kvm_vcpu *vcpu) { uint16_t evmcs_ver; - struct kvm_enable_cap enable_evmcs_cap = { - .cap = KVM_CAP_HYPERV_ENLIGHTENED_VMCS, - .args[0] = (unsigned long)&evmcs_ver - }; - - vcpu_ioctl(vm, vcpu_id, KVM_ENABLE_CAP, &enable_evmcs_cap); + vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, + (unsigned long)&evmcs_ver); /* KVM should return supported EVMCS version range */ TEST_ASSERT(((evmcs_ver >> 8) >= (evmcs_ver & 0xff)) && @@ -387,21 +382,6 @@ void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp) init_vmcs_guest_state(guest_rip, guest_rsp); } -bool nested_vmx_supported(void) -{ - struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); - - return entry->ecx & CPUID_VMX; -} - -void nested_vmx_check_supported(void) -{ - if (!nested_vmx_supported()) { - print_skip("nested VMX not enabled"); - exit(KSFT_SKIP); - } -} - static void nested_create_pte(struct kvm_vm *vm, struct eptPageTableEntry *pte, uint64_t nested_paddr, diff --git a/tools/testing/selftests/kvm/max_guest_memory_test.c b/tools/testing/selftests/kvm/max_guest_memory_test.c index 15f046e19cb2..9a6e4f3ad6b5 100644 --- a/tools/testing/selftests/kvm/max_guest_memory_test.c +++ b/tools/testing/selftests/kvm/max_guest_memory_test.c @@ -28,8 +28,7 @@ static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) } struct vcpu_info { - struct kvm_vm *vm; - uint32_t id; + struct kvm_vcpu *vcpu; uint64_t start_gpa; uint64_t end_gpa; }; @@ -52,45 +51,45 @@ static void rendezvous_with_boss(void) } } -static void run_vcpu(struct kvm_vm *vm, uint32_t vcpu_id) +static void run_vcpu(struct kvm_vcpu *vcpu) { - vcpu_run(vm, vcpu_id); - ASSERT_EQ(get_ucall(vm, vcpu_id, NULL), UCALL_DONE); + vcpu_run(vcpu); + ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); } static void *vcpu_worker(void *data) { - struct vcpu_info *vcpu = data; + struct vcpu_info *info = data; + struct kvm_vcpu *vcpu = info->vcpu; struct kvm_vm *vm = vcpu->vm; struct kvm_sregs sregs; struct kvm_regs regs; - vcpu_args_set(vm, vcpu->id, 3, vcpu->start_gpa, vcpu->end_gpa, - vm_get_page_size(vm)); + vcpu_args_set(vcpu, 3, info->start_gpa, info->end_gpa, vm->page_size); /* Snapshot regs before the first run. */ - vcpu_regs_get(vm, vcpu->id, ®s); + vcpu_regs_get(vcpu, ®s); rendezvous_with_boss(); - run_vcpu(vm, vcpu->id); + run_vcpu(vcpu); rendezvous_with_boss(); - vcpu_regs_set(vm, vcpu->id, ®s); - vcpu_sregs_get(vm, vcpu->id, &sregs); + vcpu_regs_set(vcpu, ®s); + vcpu_sregs_get(vcpu, &sregs); #ifdef __x86_64__ /* Toggle CR0.WP to trigger a MMU context reset. */ sregs.cr0 ^= X86_CR0_WP; #endif - vcpu_sregs_set(vm, vcpu->id, &sregs); + vcpu_sregs_set(vcpu, &sregs); rendezvous_with_boss(); - run_vcpu(vm, vcpu->id); + run_vcpu(vcpu); rendezvous_with_boss(); return NULL; } -static pthread_t *spawn_workers(struct kvm_vm *vm, uint64_t start_gpa, - uint64_t end_gpa) +static pthread_t *spawn_workers(struct kvm_vm *vm, struct kvm_vcpu **vcpus, + uint64_t start_gpa, uint64_t end_gpa) { struct vcpu_info *info; uint64_t gpa, nr_bytes; @@ -104,12 +103,11 @@ static pthread_t *spawn_workers(struct kvm_vm *vm, uint64_t start_gpa, TEST_ASSERT(info, "Failed to allocate vCPU gpa ranges"); nr_bytes = ((end_gpa - start_gpa) / nr_vcpus) & - ~((uint64_t)vm_get_page_size(vm) - 1); + ~((uint64_t)vm->page_size - 1); TEST_ASSERT(nr_bytes, "C'mon, no way you have %d CPUs", nr_vcpus); for (i = 0, gpa = start_gpa; i < nr_vcpus; i++, gpa += nr_bytes) { - info[i].vm = vm; - info[i].id = i; + info[i].vcpu = vcpus[i]; info[i].start_gpa = gpa; info[i].end_gpa = gpa + nr_bytes; pthread_create(&threads[i], NULL, vcpu_worker, &info[i]); @@ -172,6 +170,7 @@ int main(int argc, char *argv[]) uint64_t max_gpa, gpa, slot_size, max_mem, i; int max_slots, slot, opt, fd; bool hugepages = false; + struct kvm_vcpu **vcpus; pthread_t *threads; struct kvm_vm *vm; void *mem; @@ -215,9 +214,12 @@ int main(int argc, char *argv[]) } } - vm = vm_create_default_with_vcpus(nr_vcpus, 0, 0, guest_code, NULL); + vcpus = malloc(nr_vcpus * sizeof(*vcpus)); + TEST_ASSERT(vcpus, "Failed to allocate vCPU array"); + + vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus); - max_gpa = vm_get_max_gfn(vm) << vm_get_page_shift(vm); + max_gpa = vm->max_gfn << vm->page_shift; TEST_ASSERT(max_gpa > (4 * slot_size), "MAXPHYADDR <4gb "); fd = kvm_memfd_alloc(slot_size, hugepages); @@ -227,7 +229,7 @@ int main(int argc, char *argv[]) TEST_ASSERT(!madvise(mem, slot_size, MADV_NOHUGEPAGE), "madvise() failed"); /* Pre-fault the memory to avoid taking mmap_sem on guest page faults. */ - for (i = 0; i < slot_size; i += vm_get_page_size(vm)) + for (i = 0; i < slot_size; i += vm->page_size) ((uint8_t *)mem)[i] = 0xaa; gpa = 0; @@ -246,13 +248,16 @@ int main(int argc, char *argv[]) for (i = 0; i < slot_size; i += size_1gb) __virt_pg_map(vm, gpa + i, gpa + i, PG_LEVEL_1G); #else - for (i = 0; i < slot_size; i += vm_get_page_size(vm)) + for (i = 0; i < slot_size; i += vm->page_size) virt_pg_map(vm, gpa + i, gpa + i); #endif } atomic_set(&rendezvous, nr_vcpus + 1); - threads = spawn_workers(vm, start_gpa, gpa); + threads = spawn_workers(vm, vcpus, start_gpa, gpa); + + free(vcpus); + vcpus = NULL; pr_info("Running with %lugb of guest memory and %u vCPUs\n", (gpa - start_gpa) / size_1gb, nr_vcpus); diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c index 1410d0a9141a..6ee7e1dde404 100644 --- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c +++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c @@ -38,19 +38,18 @@ static bool run_vcpus = true; static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args) { - int ret; - int vcpu_id = vcpu_args->vcpu_id; - struct kvm_vm *vm = perf_test_args.vm; + struct kvm_vcpu *vcpu = vcpu_args->vcpu; struct kvm_run *run; + int ret; - run = vcpu_state(vm, vcpu_id); + run = vcpu->run; /* Let the guest access its memory until a stop signal is received */ while (READ_ONCE(run_vcpus)) { - ret = _vcpu_run(vm, vcpu_id); + ret = _vcpu_run(vcpu); TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); - if (get_ucall(vm, vcpu_id, NULL) == UCALL_SYNC) + if (get_ucall(vcpu, NULL) == UCALL_SYNC) continue; TEST_ASSERT(false, @@ -76,7 +75,7 @@ static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay, * Add the dummy memslot just below the perf_test_util memslot, which is * at the top of the guest physical address space. */ - gpa = perf_test_args.gpa - pages * vm_get_page_size(vm); + gpa = perf_test_args.gpa - pages * vm->page_size; for (i = 0; i < nr_modifications; i++) { usleep(delay); diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index 1727f75e0c2c..44995446d942 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -25,8 +25,6 @@ #include <kvm_util.h> #include <processor.h> -#define VCPU_ID 0 - #define MEM_SIZE ((512U << 20) + 4096) #define MEM_SIZE_PAGES (MEM_SIZE / 4096) #define MEM_GPA 0x10000000UL @@ -90,6 +88,7 @@ static_assert(MEM_TEST_MOVE_SIZE <= MEM_TEST_SIZE, struct vm_data { struct kvm_vm *vm; + struct kvm_vcpu *vcpu; pthread_t vcpu_thread; uint32_t nslots; uint64_t npages; @@ -127,29 +126,29 @@ static bool verbose; pr_info(__VA_ARGS__); \ } while (0) -static void check_mmio_access(struct vm_data *vm, struct kvm_run *run) +static void check_mmio_access(struct vm_data *data, struct kvm_run *run) { - TEST_ASSERT(vm->mmio_ok, "Unexpected mmio exit"); + TEST_ASSERT(data->mmio_ok, "Unexpected mmio exit"); TEST_ASSERT(run->mmio.is_write, "Unexpected mmio read"); TEST_ASSERT(run->mmio.len == 8, "Unexpected exit mmio size = %u", run->mmio.len); - TEST_ASSERT(run->mmio.phys_addr >= vm->mmio_gpa_min && - run->mmio.phys_addr <= vm->mmio_gpa_max, + TEST_ASSERT(run->mmio.phys_addr >= data->mmio_gpa_min && + run->mmio.phys_addr <= data->mmio_gpa_max, "Unexpected exit mmio address = 0x%llx", run->mmio.phys_addr); } -static void *vcpu_worker(void *data) +static void *vcpu_worker(void *__data) { - struct vm_data *vm = data; - struct kvm_run *run; + struct vm_data *data = __data; + struct kvm_vcpu *vcpu = data->vcpu; + struct kvm_run *run = vcpu->run; struct ucall uc; - run = vcpu_state(vm->vm, VCPU_ID); while (1) { - vcpu_run(vm->vm, VCPU_ID); + vcpu_run(vcpu); - switch (get_ucall(vm->vm, VCPU_ID, &uc)) { + switch (get_ucall(vcpu, &uc)) { case UCALL_SYNC: TEST_ASSERT(uc.args[1] == 0, "Unexpected sync ucall, got %lx", @@ -158,14 +157,12 @@ static void *vcpu_worker(void *data) continue; case UCALL_NONE: if (run->exit_reason == KVM_EXIT_MMIO) - check_mmio_access(vm, run); + check_mmio_access(data, run); else goto done; break; case UCALL_ABORT: - TEST_FAIL("%s at %s:%ld, val = %lu", - (const char *)uc.args[0], - __FILE__, uc.args[1], uc.args[2]); + REPORT_GUEST_ASSERT_1(uc, "val = %lu"); break; case UCALL_DONE: goto done; @@ -238,6 +235,7 @@ static struct vm_data *alloc_vm(void) TEST_ASSERT(data, "malloc(vmdata) failed"); data->vm = NULL; + data->vcpu = NULL; data->hva_slots = NULL; return data; @@ -278,7 +276,7 @@ static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots, data->hva_slots = malloc(sizeof(*data->hva_slots) * data->nslots); TEST_ASSERT(data->hva_slots, "malloc() fail"); - data->vm = vm_create_default(VCPU_ID, mempages, guest_code); + data->vm = __vm_create_with_one_vcpu(&data->vcpu, mempages, guest_code); ucall_init(data->vm, NULL); pr_info_v("Adding slots 1..%i, each slot with %"PRIu64" pages + %"PRIu64" extra pages last\n", diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c index 4158da0da2bb..a54d4d05a058 100644 --- a/tools/testing/selftests/kvm/rseq_test.c +++ b/tools/testing/selftests/kvm/rseq_test.c @@ -20,8 +20,6 @@ #include "processor.h" #include "test_util.h" -#define VCPU_ID 0 - static __thread volatile struct rseq __rseq = { .cpu_id = RSEQ_CPU_ID_UNINITIALIZED, }; @@ -82,8 +80,9 @@ static int next_cpu(int cpu) return cpu; } -static void *migration_worker(void *ign) +static void *migration_worker(void *__rseq_tid) { + pid_t rseq_tid = (pid_t)(unsigned long)__rseq_tid; cpu_set_t allowed_mask; int r, i, cpu; @@ -106,7 +105,7 @@ static void *migration_worker(void *ign) * stable, i.e. while changing affinity is in-progress. */ smp_wmb(); - r = sched_setaffinity(0, sizeof(allowed_mask), &allowed_mask); + r = sched_setaffinity(rseq_tid, sizeof(allowed_mask), &allowed_mask); TEST_ASSERT(!r, "sched_setaffinity failed, errno = %d (%s)", errno, strerror(errno)); smp_wmb(); @@ -173,12 +172,11 @@ static void *migration_worker(void *ign) return NULL; } -static int calc_min_max_cpu(void) +static void calc_min_max_cpu(void) { int i, cnt, nproc; - if (CPU_COUNT(&possible_mask) < 2) - return -EINVAL; + TEST_REQUIRE(CPU_COUNT(&possible_mask) >= 2); /* * CPU_SET doesn't provide a FOR_EACH helper, get the min/max CPU that @@ -200,13 +198,15 @@ static int calc_min_max_cpu(void) cnt++; } - return (cnt < 2) ? -EINVAL : 0; + __TEST_REQUIRE(cnt >= 2, + "Only one usable CPU, task migration not possible"); } int main(int argc, char *argv[]) { int r, i, snapshot; struct kvm_vm *vm; + struct kvm_vcpu *vcpu; u32 cpu, rseq_cpu; /* Tell stdout not to buffer its content */ @@ -216,10 +216,7 @@ int main(int argc, char *argv[]) TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno, strerror(errno)); - if (calc_min_max_cpu()) { - print_skip("Only one usable CPU, task migration not possible"); - exit(KSFT_SKIP); - } + calc_min_max_cpu(); sys_rseq(0); @@ -228,14 +225,15 @@ int main(int argc, char *argv[]) * GUEST_SYNC, while concurrently migrating the process by setting its * CPU affinity. */ - vm = vm_create_default(VCPU_ID, 0, guest_code); + vm = vm_create_with_one_vcpu(&vcpu, guest_code); ucall_init(vm, NULL); - pthread_create(&migration_thread, NULL, migration_worker, 0); + pthread_create(&migration_thread, NULL, migration_worker, + (void *)(unsigned long)gettid()); for (i = 0; !done; i++) { - vcpu_run(vm, VCPU_ID); - TEST_ASSERT(get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC, + vcpu_run(vcpu); + TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC, "Guest failed?"); /* diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c index 49f26f544127..9113696d5178 100644 --- a/tools/testing/selftests/kvm/s390x/memop.c +++ b/tools/testing/selftests/kvm/s390x/memop.c @@ -14,6 +14,7 @@ #include "test_util.h" #include "kvm_util.h" +#include "kselftest.h" enum mop_target { LOGICAL, @@ -98,21 +99,18 @@ static struct kvm_s390_mem_op ksmo_from_desc(struct mop_desc desc) return ksmo; } -/* vcpu dummy id signifying that vm instead of vcpu ioctl is to occur */ -const uint32_t VM_VCPU_ID = (uint32_t)-1; - -struct test_vcpu { +struct test_info { struct kvm_vm *vm; - uint32_t id; + struct kvm_vcpu *vcpu; }; #define PRINT_MEMOP false -static void print_memop(uint32_t vcpu_id, const struct kvm_s390_mem_op *ksmo) +static void print_memop(struct kvm_vcpu *vcpu, const struct kvm_s390_mem_op *ksmo) { if (!PRINT_MEMOP) return; - if (vcpu_id == VM_VCPU_ID) + if (!vcpu) printf("vm memop("); else printf("vcpu memop("); @@ -147,25 +145,29 @@ static void print_memop(uint32_t vcpu_id, const struct kvm_s390_mem_op *ksmo) puts(")"); } -static void memop_ioctl(struct test_vcpu vcpu, struct kvm_s390_mem_op *ksmo) +static void memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo) { - if (vcpu.id == VM_VCPU_ID) - vm_ioctl(vcpu.vm, KVM_S390_MEM_OP, ksmo); + struct kvm_vcpu *vcpu = info.vcpu; + + if (!vcpu) + vm_ioctl(info.vm, KVM_S390_MEM_OP, ksmo); else - vcpu_ioctl(vcpu.vm, vcpu.id, KVM_S390_MEM_OP, ksmo); + vcpu_ioctl(vcpu, KVM_S390_MEM_OP, ksmo); } -static int err_memop_ioctl(struct test_vcpu vcpu, struct kvm_s390_mem_op *ksmo) +static int err_memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo) { - if (vcpu.id == VM_VCPU_ID) - return _vm_ioctl(vcpu.vm, KVM_S390_MEM_OP, ksmo); + struct kvm_vcpu *vcpu = info.vcpu; + + if (!vcpu) + return __vm_ioctl(info.vm, KVM_S390_MEM_OP, ksmo); else - return _vcpu_ioctl(vcpu.vm, vcpu.id, KVM_S390_MEM_OP, ksmo); + return __vcpu_ioctl(vcpu, KVM_S390_MEM_OP, ksmo); } -#define MEMOP(err, vcpu_p, mop_target_p, access_mode_p, buf_p, size_p, ...) \ +#define MEMOP(err, info_p, mop_target_p, access_mode_p, buf_p, size_p, ...) \ ({ \ - struct test_vcpu __vcpu = (vcpu_p); \ + struct test_info __info = (info_p); \ struct mop_desc __desc = { \ .target = (mop_target_p), \ .mode = (access_mode_p), \ @@ -177,13 +179,13 @@ static int err_memop_ioctl(struct test_vcpu vcpu, struct kvm_s390_mem_op *ksmo) \ if (__desc._gaddr_v) { \ if (__desc.target == ABSOLUTE) \ - __desc.gaddr = addr_gva2gpa(__vcpu.vm, __desc.gaddr_v); \ + __desc.gaddr = addr_gva2gpa(__info.vm, __desc.gaddr_v); \ else \ __desc.gaddr = __desc.gaddr_v; \ } \ __ksmo = ksmo_from_desc(__desc); \ - print_memop(__vcpu.id, &__ksmo); \ - err##memop_ioctl(__vcpu, &__ksmo); \ + print_memop(__info.vcpu, &__ksmo); \ + err##memop_ioctl(__info, &__ksmo); \ }) #define MOP(...) MEMOP(, __VA_ARGS__) @@ -200,7 +202,6 @@ static int err_memop_ioctl(struct test_vcpu vcpu, struct kvm_s390_mem_op *ksmo) #define CHECK_N_DO(f, ...) ({ f(__VA_ARGS__, CHECK_ONLY); f(__VA_ARGS__); }) -#define VCPU_ID 1 #define PAGE_SHIFT 12 #define PAGE_SIZE (1ULL << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE - 1)) @@ -212,21 +213,22 @@ static uint8_t mem2[65536]; struct test_default { struct kvm_vm *kvm_vm; - struct test_vcpu vm; - struct test_vcpu vcpu; + struct test_info vm; + struct test_info vcpu; struct kvm_run *run; int size; }; static struct test_default test_default_init(void *guest_code) { + struct kvm_vcpu *vcpu; struct test_default t; t.size = min((size_t)kvm_check_cap(KVM_CAP_S390_MEM_OP), sizeof(mem1)); - t.kvm_vm = vm_create_default(VCPU_ID, 0, guest_code); - t.vm = (struct test_vcpu) { t.kvm_vm, VM_VCPU_ID }; - t.vcpu = (struct test_vcpu) { t.kvm_vm, VCPU_ID }; - t.run = vcpu_state(t.kvm_vm, VCPU_ID); + t.kvm_vm = vm_create_with_one_vcpu(&vcpu, guest_code); + t.vm = (struct test_info) { t.kvm_vm, NULL }; + t.vcpu = (struct test_info) { t.kvm_vm, vcpu }; + t.run = vcpu->run; return t; } @@ -241,14 +243,15 @@ enum stage { STAGE_COPIED, }; -#define HOST_SYNC(vcpu_p, stage) \ +#define HOST_SYNC(info_p, stage) \ ({ \ - struct test_vcpu __vcpu = (vcpu_p); \ + struct test_info __info = (info_p); \ + struct kvm_vcpu *__vcpu = __info.vcpu; \ struct ucall uc; \ int __stage = (stage); \ \ - vcpu_run(__vcpu.vm, __vcpu.id); \ - get_ucall(__vcpu.vm, __vcpu.id, &uc); \ + vcpu_run(__vcpu); \ + get_ucall(__vcpu, &uc); \ ASSERT_EQ(uc.cmd, UCALL_SYNC); \ ASSERT_EQ(uc.args[1], __stage); \ }) \ @@ -267,7 +270,7 @@ static void prepare_mem12(void) #define DEFAULT_WRITE_READ(copy_cpu, mop_cpu, mop_target_p, size, ...) \ ({ \ - struct test_vcpu __copy_cpu = (copy_cpu), __mop_cpu = (mop_cpu); \ + struct test_info __copy_cpu = (copy_cpu), __mop_cpu = (mop_cpu); \ enum mop_target __target = (mop_target_p); \ uint32_t __size = (size); \ \ @@ -282,7 +285,7 @@ static void prepare_mem12(void) #define DEFAULT_READ(copy_cpu, mop_cpu, mop_target_p, size, ...) \ ({ \ - struct test_vcpu __copy_cpu = (copy_cpu), __mop_cpu = (mop_cpu); \ + struct test_info __copy_cpu = (copy_cpu), __mop_cpu = (mop_cpu); \ enum mop_target __target = (mop_target_p); \ uint32_t __size = (size); \ \ @@ -623,34 +626,34 @@ static void guest_idle(void) GUEST_SYNC(STAGE_IDLED); } -static void _test_errors_common(struct test_vcpu vcpu, enum mop_target target, int size) +static void _test_errors_common(struct test_info info, enum mop_target target, int size) { int rv; /* Bad size: */ - rv = ERR_MOP(vcpu, target, WRITE, mem1, -1, GADDR_V(mem1)); + rv = ERR_MOP(info, target, WRITE, mem1, -1, GADDR_V(mem1)); TEST_ASSERT(rv == -1 && errno == E2BIG, "ioctl allows insane sizes"); /* Zero size: */ - rv = ERR_MOP(vcpu, target, WRITE, mem1, 0, GADDR_V(mem1)); + rv = ERR_MOP(info, target, WRITE, mem1, 0, GADDR_V(mem1)); TEST_ASSERT(rv == -1 && (errno == EINVAL || errno == ENOMEM), "ioctl allows 0 as size"); /* Bad flags: */ - rv = ERR_MOP(vcpu, target, WRITE, mem1, size, GADDR_V(mem1), SET_FLAGS(-1)); + rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR_V(mem1), SET_FLAGS(-1)); TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows all flags"); /* Bad guest address: */ - rv = ERR_MOP(vcpu, target, WRITE, mem1, size, GADDR((void *)~0xfffUL), CHECK_ONLY); + rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR((void *)~0xfffUL), CHECK_ONLY); TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory access"); /* Bad host address: */ - rv = ERR_MOP(vcpu, target, WRITE, 0, size, GADDR_V(mem1)); + rv = ERR_MOP(info, target, WRITE, 0, size, GADDR_V(mem1)); TEST_ASSERT(rv == -1 && errno == EFAULT, "ioctl does not report bad host memory address"); /* Bad key: */ - rv = ERR_MOP(vcpu, target, WRITE, mem1, size, GADDR_V(mem1), KEY(17)); + rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR_V(mem1), KEY(17)); TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows invalid key"); } @@ -691,34 +694,89 @@ static void test_errors(void) kvm_vm_free(t.kvm_vm); } +struct testdef { + const char *name; + void (*test)(void); + int extension; +} testlist[] = { + { + .name = "simple copy", + .test = test_copy, + }, + { + .name = "generic error checks", + .test = test_errors, + }, + { + .name = "copy with storage keys", + .test = test_copy_key, + .extension = 1, + }, + { + .name = "copy with key storage protection override", + .test = test_copy_key_storage_prot_override, + .extension = 1, + }, + { + .name = "copy with key fetch protection", + .test = test_copy_key_fetch_prot, + .extension = 1, + }, + { + .name = "copy with key fetch protection override", + .test = test_copy_key_fetch_prot_override, + .extension = 1, + }, + { + .name = "error checks with key", + .test = test_errors_key, + .extension = 1, + }, + { + .name = "termination", + .test = test_termination, + .extension = 1, + }, + { + .name = "error checks with key storage protection override", + .test = test_errors_key_storage_prot_override, + .extension = 1, + }, + { + .name = "error checks without key fetch prot override", + .test = test_errors_key_fetch_prot_override_not_enabled, + .extension = 1, + }, + { + .name = "error checks with key fetch prot override", + .test = test_errors_key_fetch_prot_override_enabled, + .extension = 1, + }, +}; + int main(int argc, char *argv[]) { - int memop_cap, extension_cap; + int extension_cap, idx; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_MEM_OP)); setbuf(stdout, NULL); /* Tell stdout not to buffer its content */ - memop_cap = kvm_check_cap(KVM_CAP_S390_MEM_OP); - extension_cap = kvm_check_cap(KVM_CAP_S390_MEM_OP_EXTENSION); - if (!memop_cap) { - print_skip("CAP_S390_MEM_OP not supported"); - exit(KSFT_SKIP); - } + ksft_print_header(); - test_copy(); - if (extension_cap > 0) { - test_copy_key(); - test_copy_key_storage_prot_override(); - test_copy_key_fetch_prot(); - test_copy_key_fetch_prot_override(); - test_errors_key(); - test_termination(); - test_errors_key_storage_prot_override(); - test_errors_key_fetch_prot_override_not_enabled(); - test_errors_key_fetch_prot_override_enabled(); - } else { - print_skip("storage key memop extension not supported"); + ksft_set_plan(ARRAY_SIZE(testlist)); + + extension_cap = kvm_check_cap(KVM_CAP_S390_MEM_OP_EXTENSION); + for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) { + if (extension_cap >= testlist[idx].extension) { + testlist[idx].test(); + ksft_test_result_pass("%s\n", testlist[idx].name); + } else { + ksft_test_result_skip("%s - extension level %d not supported\n", + testlist[idx].name, + testlist[idx].extension); + } } - test_errors(); - return 0; + ksft_finished(); /* Print results and exit() accordingly */ } diff --git a/tools/testing/selftests/kvm/s390x/resets.c b/tools/testing/selftests/kvm/s390x/resets.c index b143db6d8693..19486084eb30 100644 --- a/tools/testing/selftests/kvm/s390x/resets.c +++ b/tools/testing/selftests/kvm/s390x/resets.c @@ -12,15 +12,14 @@ #include "test_util.h" #include "kvm_util.h" +#include "kselftest.h" -#define VCPU_ID 3 #define LOCAL_IRQS 32 -struct kvm_s390_irq buf[VCPU_ID + LOCAL_IRQS]; +#define ARBITRARY_NON_ZERO_VCPU_ID 3 + +struct kvm_s390_irq buf[ARBITRARY_NON_ZERO_VCPU_ID + LOCAL_IRQS]; -struct kvm_vm *vm; -struct kvm_run *run; -struct kvm_sync_regs *sync_regs; static uint8_t regs_null[512]; static void guest_code_initial(void) @@ -58,25 +57,22 @@ static void guest_code_initial(void) ); } -static void test_one_reg(uint64_t id, uint64_t value) +static void test_one_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t value) { - struct kvm_one_reg reg; uint64_t eval_reg; - reg.addr = (uintptr_t)&eval_reg; - reg.id = id; - vcpu_get_reg(vm, VCPU_ID, ®); + vcpu_get_reg(vcpu, id, &eval_reg); TEST_ASSERT(eval_reg == value, "value == 0x%lx", value); } -static void assert_noirq(void) +static void assert_noirq(struct kvm_vcpu *vcpu) { struct kvm_s390_irq_state irq_state; int irqs; irq_state.len = sizeof(buf); irq_state.buf = (unsigned long)buf; - irqs = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_GET_IRQ_STATE, &irq_state); + irqs = __vcpu_ioctl(vcpu, KVM_S390_GET_IRQ_STATE, &irq_state); /* * irqs contains the number of retrieved interrupts. Any interrupt * (notably, the emergency call interrupt we have injected) should @@ -86,19 +82,20 @@ static void assert_noirq(void) TEST_ASSERT(!irqs, "IRQ pending"); } -static void assert_clear(void) +static void assert_clear(struct kvm_vcpu *vcpu) { + struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs; struct kvm_sregs sregs; struct kvm_regs regs; struct kvm_fpu fpu; - vcpu_regs_get(vm, VCPU_ID, ®s); + vcpu_regs_get(vcpu, ®s); TEST_ASSERT(!memcmp(®s.gprs, regs_null, sizeof(regs.gprs)), "grs == 0"); - vcpu_sregs_get(vm, VCPU_ID, &sregs); + vcpu_sregs_get(vcpu, &sregs); TEST_ASSERT(!memcmp(&sregs.acrs, regs_null, sizeof(sregs.acrs)), "acrs == 0"); - vcpu_fpu_get(vm, VCPU_ID, &fpu); + vcpu_fpu_get(vcpu, &fpu); TEST_ASSERT(!memcmp(&fpu.fprs, regs_null, sizeof(fpu.fprs)), "fprs == 0"); /* sync regs */ @@ -112,8 +109,10 @@ static void assert_clear(void) "vrs0-15 == 0 (sync_regs)"); } -static void assert_initial_noclear(void) +static void assert_initial_noclear(struct kvm_vcpu *vcpu) { + struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs; + TEST_ASSERT(sync_regs->gprs[0] == 0xffff000000000000UL, "gpr0 == 0xffff000000000000 (sync_regs)"); TEST_ASSERT(sync_regs->gprs[1] == 0x0000555500000000UL, @@ -127,13 +126,14 @@ static void assert_initial_noclear(void) TEST_ASSERT(sync_regs->acrs[9] == 1, "ar9 == 1 (sync_regs)"); } -static void assert_initial(void) +static void assert_initial(struct kvm_vcpu *vcpu) { + struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs; struct kvm_sregs sregs; struct kvm_fpu fpu; /* KVM_GET_SREGS */ - vcpu_sregs_get(vm, VCPU_ID, &sregs); + vcpu_sregs_get(vcpu, &sregs); TEST_ASSERT(sregs.crs[0] == 0xE0UL, "cr0 == 0xE0 (KVM_GET_SREGS)"); TEST_ASSERT(sregs.crs[14] == 0xC2000000UL, "cr14 == 0xC2000000 (KVM_GET_SREGS)"); @@ -156,36 +156,38 @@ static void assert_initial(void) TEST_ASSERT(sync_regs->gbea == 1, "gbea == 1 (sync_regs)"); /* kvm_run */ - TEST_ASSERT(run->psw_addr == 0, "psw_addr == 0 (kvm_run)"); - TEST_ASSERT(run->psw_mask == 0, "psw_mask == 0 (kvm_run)"); + TEST_ASSERT(vcpu->run->psw_addr == 0, "psw_addr == 0 (kvm_run)"); + TEST_ASSERT(vcpu->run->psw_mask == 0, "psw_mask == 0 (kvm_run)"); - vcpu_fpu_get(vm, VCPU_ID, &fpu); + vcpu_fpu_get(vcpu, &fpu); TEST_ASSERT(!fpu.fpc, "fpc == 0"); - test_one_reg(KVM_REG_S390_GBEA, 1); - test_one_reg(KVM_REG_S390_PP, 0); - test_one_reg(KVM_REG_S390_TODPR, 0); - test_one_reg(KVM_REG_S390_CPU_TIMER, 0); - test_one_reg(KVM_REG_S390_CLOCK_COMP, 0); + test_one_reg(vcpu, KVM_REG_S390_GBEA, 1); + test_one_reg(vcpu, KVM_REG_S390_PP, 0); + test_one_reg(vcpu, KVM_REG_S390_TODPR, 0); + test_one_reg(vcpu, KVM_REG_S390_CPU_TIMER, 0); + test_one_reg(vcpu, KVM_REG_S390_CLOCK_COMP, 0); } -static void assert_normal_noclear(void) +static void assert_normal_noclear(struct kvm_vcpu *vcpu) { + struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs; + TEST_ASSERT(sync_regs->crs[2] == 0x10, "cr2 == 10 (sync_regs)"); TEST_ASSERT(sync_regs->crs[8] == 1, "cr10 == 1 (sync_regs)"); TEST_ASSERT(sync_regs->crs[10] == 1, "cr10 == 1 (sync_regs)"); TEST_ASSERT(sync_regs->crs[11] == -1, "cr11 == -1 (sync_regs)"); } -static void assert_normal(void) +static void assert_normal(struct kvm_vcpu *vcpu) { - test_one_reg(KVM_REG_S390_PFTOKEN, KVM_S390_PFAULT_TOKEN_INVALID); - TEST_ASSERT(sync_regs->pft == KVM_S390_PFAULT_TOKEN_INVALID, + test_one_reg(vcpu, KVM_REG_S390_PFTOKEN, KVM_S390_PFAULT_TOKEN_INVALID); + TEST_ASSERT(vcpu->run->s.regs.pft == KVM_S390_PFAULT_TOKEN_INVALID, "pft == 0xff..... (sync_regs)"); - assert_noirq(); + assert_noirq(vcpu); } -static void inject_irq(int cpu_id) +static void inject_irq(struct kvm_vcpu *vcpu) { struct kvm_s390_irq_state irq_state; struct kvm_s390_irq *irq = &buf[0]; @@ -195,85 +197,119 @@ static void inject_irq(int cpu_id) irq_state.len = sizeof(struct kvm_s390_irq); irq_state.buf = (unsigned long)buf; irq->type = KVM_S390_INT_EMERGENCY; - irq->u.emerg.code = cpu_id; - irqs = _vcpu_ioctl(vm, cpu_id, KVM_S390_SET_IRQ_STATE, &irq_state); + irq->u.emerg.code = vcpu->id; + irqs = __vcpu_ioctl(vcpu, KVM_S390_SET_IRQ_STATE, &irq_state); TEST_ASSERT(irqs >= 0, "Error injecting EMERGENCY IRQ errno %d\n", errno); } +static struct kvm_vm *create_vm(struct kvm_vcpu **vcpu) +{ + struct kvm_vm *vm; + + vm = vm_create(1); + + *vcpu = vm_vcpu_add(vm, ARBITRARY_NON_ZERO_VCPU_ID, guest_code_initial); + + return vm; +} + static void test_normal(void) { - pr_info("Testing normal reset\n"); - /* Create VM */ - vm = vm_create_default(VCPU_ID, 0, guest_code_initial); - run = vcpu_state(vm, VCPU_ID); - sync_regs = &run->s.regs; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + ksft_print_msg("Testing normal reset\n"); + vm = create_vm(&vcpu); - vcpu_run(vm, VCPU_ID); + vcpu_run(vcpu); - inject_irq(VCPU_ID); + inject_irq(vcpu); - vcpu_ioctl(vm, VCPU_ID, KVM_S390_NORMAL_RESET, 0); + vcpu_ioctl(vcpu, KVM_S390_NORMAL_RESET, NULL); /* must clears */ - assert_normal(); + assert_normal(vcpu); /* must not clears */ - assert_normal_noclear(); - assert_initial_noclear(); + assert_normal_noclear(vcpu); + assert_initial_noclear(vcpu); kvm_vm_free(vm); } static void test_initial(void) { - pr_info("Testing initial reset\n"); - vm = vm_create_default(VCPU_ID, 0, guest_code_initial); - run = vcpu_state(vm, VCPU_ID); - sync_regs = &run->s.regs; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; - vcpu_run(vm, VCPU_ID); + ksft_print_msg("Testing initial reset\n"); + vm = create_vm(&vcpu); - inject_irq(VCPU_ID); + vcpu_run(vcpu); - vcpu_ioctl(vm, VCPU_ID, KVM_S390_INITIAL_RESET, 0); + inject_irq(vcpu); + + vcpu_ioctl(vcpu, KVM_S390_INITIAL_RESET, NULL); /* must clears */ - assert_normal(); - assert_initial(); + assert_normal(vcpu); + assert_initial(vcpu); /* must not clears */ - assert_initial_noclear(); + assert_initial_noclear(vcpu); kvm_vm_free(vm); } static void test_clear(void) { - pr_info("Testing clear reset\n"); - vm = vm_create_default(VCPU_ID, 0, guest_code_initial); - run = vcpu_state(vm, VCPU_ID); - sync_regs = &run->s.regs; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + ksft_print_msg("Testing clear reset\n"); + vm = create_vm(&vcpu); - vcpu_run(vm, VCPU_ID); + vcpu_run(vcpu); - inject_irq(VCPU_ID); + inject_irq(vcpu); - vcpu_ioctl(vm, VCPU_ID, KVM_S390_CLEAR_RESET, 0); + vcpu_ioctl(vcpu, KVM_S390_CLEAR_RESET, NULL); /* must clears */ - assert_normal(); - assert_initial(); - assert_clear(); + assert_normal(vcpu); + assert_initial(vcpu); + assert_clear(vcpu); kvm_vm_free(vm); } +struct testdef { + const char *name; + void (*test)(void); + bool needs_cap; +} testlist[] = { + { "initial", test_initial, false }, + { "normal", test_normal, true }, + { "clear", test_clear, true }, +}; + int main(int argc, char *argv[]) { + bool has_s390_vcpu_resets = kvm_check_cap(KVM_CAP_S390_VCPU_RESETS); + int idx; + setbuf(stdout, NULL); /* Tell stdout not to buffer its content */ - test_initial(); - if (kvm_check_cap(KVM_CAP_S390_VCPU_RESETS)) { - test_normal(); - test_clear(); + ksft_print_header(); + ksft_set_plan(ARRAY_SIZE(testlist)); + + for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) { + if (!testlist[idx].needs_cap || has_s390_vcpu_resets) { + testlist[idx].test(); + ksft_test_result_pass("%s\n", testlist[idx].name); + } else { + ksft_test_result_skip("%s - no VCPU_RESETS capability\n", + testlist[idx].name); + } } - return 0; + + ksft_finished(); /* Print results and exit() accordingly */ } diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390x/sync_regs_test.c index caf7b8859a94..3fdb6e2598eb 100644 --- a/tools/testing/selftests/kvm/s390x/sync_regs_test.c +++ b/tools/testing/selftests/kvm/s390x/sync_regs_test.c @@ -21,8 +21,7 @@ #include "test_util.h" #include "kvm_util.h" #include "diag318_test_handler.h" - -#define VCPU_ID 5 +#include "kselftest.h" static void guest_code(void) { @@ -74,61 +73,58 @@ static void compare_sregs(struct kvm_sregs *left, struct kvm_sync_regs *right) #define TEST_SYNC_FIELDS (KVM_SYNC_GPRS|KVM_SYNC_ACRS|KVM_SYNC_CRS|KVM_SYNC_DIAG318) #define INVALID_SYNC_FIELD 0x80000000 -int main(int argc, char *argv[]) +void test_read_invalid(struct kvm_vcpu *vcpu) { - struct kvm_vm *vm; - struct kvm_run *run; - struct kvm_regs regs; - struct kvm_sregs sregs; - int rv, cap; - - /* Tell stdout not to buffer its content */ - setbuf(stdout, NULL); - - cap = kvm_check_cap(KVM_CAP_SYNC_REGS); - if (!cap) { - print_skip("CAP_SYNC_REGS not supported"); - exit(KSFT_SKIP); - } - - /* Create VM */ - vm = vm_create_default(VCPU_ID, 0, guest_code); - - run = vcpu_state(vm, VCPU_ID); + struct kvm_run *run = vcpu->run; + int rv; /* Request reading invalid register set from VCPU. */ run->kvm_valid_regs = INVALID_SYNC_FIELD; - rv = _vcpu_run(vm, VCPU_ID); + rv = _vcpu_run(vcpu); TEST_ASSERT(rv < 0 && errno == EINVAL, "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n", rv); - vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0; + run->kvm_valid_regs = 0; run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; - rv = _vcpu_run(vm, VCPU_ID); + rv = _vcpu_run(vcpu); TEST_ASSERT(rv < 0 && errno == EINVAL, "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n", rv); - vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0; + run->kvm_valid_regs = 0; +} + +void test_set_invalid(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + int rv; /* Request setting invalid register set into VCPU. */ run->kvm_dirty_regs = INVALID_SYNC_FIELD; - rv = _vcpu_run(vm, VCPU_ID); + rv = _vcpu_run(vcpu); TEST_ASSERT(rv < 0 && errno == EINVAL, "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n", rv); - vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0; + run->kvm_dirty_regs = 0; run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; - rv = _vcpu_run(vm, VCPU_ID); + rv = _vcpu_run(vcpu); TEST_ASSERT(rv < 0 && errno == EINVAL, "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n", rv); - vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0; + run->kvm_dirty_regs = 0; +} + +void test_req_and_verify_all_valid_regs(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + struct kvm_sregs sregs; + struct kvm_regs regs; + int rv; /* Request and verify all valid register sets. */ run->kvm_valid_regs = TEST_SYNC_FIELDS; - rv = _vcpu_run(vm, VCPU_ID); + rv = _vcpu_run(vcpu); TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv); TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC, "Unexpected exit reason: %u (%s)\n", @@ -141,11 +137,19 @@ int main(int argc, char *argv[]) run->s390_sieic.icptcode, run->s390_sieic.ipa, run->s390_sieic.ipb); - vcpu_regs_get(vm, VCPU_ID, ®s); + vcpu_regs_get(vcpu, ®s); compare_regs(®s, &run->s.regs); - vcpu_sregs_get(vm, VCPU_ID, &sregs); + vcpu_sregs_get(vcpu, &sregs); compare_sregs(&sregs, &run->s.regs); +} + +void test_set_and_verify_various_reg_values(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + struct kvm_sregs sregs; + struct kvm_regs regs; + int rv; /* Set and verify various register values */ run->s.regs.gprs[11] = 0xBAD1DEA; @@ -159,7 +163,7 @@ int main(int argc, char *argv[]) run->kvm_dirty_regs |= KVM_SYNC_DIAG318; } - rv = _vcpu_run(vm, VCPU_ID); + rv = _vcpu_run(vcpu); TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv); TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC, "Unexpected exit reason: %u (%s)\n", @@ -175,11 +179,17 @@ int main(int argc, char *argv[]) "diag318 sync regs value incorrect 0x%llx.", run->s.regs.diag318); - vcpu_regs_get(vm, VCPU_ID, ®s); + vcpu_regs_get(vcpu, ®s); compare_regs(®s, &run->s.regs); - vcpu_sregs_get(vm, VCPU_ID, &sregs); + vcpu_sregs_get(vcpu, &sregs); compare_sregs(&sregs, &run->s.regs); +} + +void test_clear_kvm_dirty_regs_bits(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + int rv; /* Clear kvm_dirty_regs bits, verify new s.regs values are * overwritten with existing guest values. @@ -188,7 +198,7 @@ int main(int argc, char *argv[]) run->kvm_dirty_regs = 0; run->s.regs.gprs[11] = 0xDEADBEEF; run->s.regs.diag318 = 0x4B1D; - rv = _vcpu_run(vm, VCPU_ID); + rv = _vcpu_run(vcpu); TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv); TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC, "Unexpected exit reason: %u (%s)\n", @@ -200,8 +210,43 @@ int main(int argc, char *argv[]) TEST_ASSERT(run->s.regs.diag318 != 0x4B1D, "diag318 sync regs value incorrect 0x%llx.", run->s.regs.diag318); +} + +struct testdef { + const char *name; + void (*test)(struct kvm_vcpu *vcpu); +} testlist[] = { + { "read invalid", test_read_invalid }, + { "set invalid", test_set_invalid }, + { "request+verify all valid regs", test_req_and_verify_all_valid_regs }, + { "set+verify various regs", test_set_and_verify_various_reg_values }, + { "clear kvm_dirty_regs bits", test_clear_kvm_dirty_regs_bits }, +}; + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int idx; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_SYNC_REGS)); + + /* Tell stdout not to buffer its content */ + setbuf(stdout, NULL); + + ksft_print_header(); + + ksft_set_plan(ARRAY_SIZE(testlist)); + + /* Create VM */ + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) { + testlist[idx].test(vcpu); + ksft_test_result_pass("%s\n", testlist[idx].name); + } kvm_vm_free(vm); - return 0; + ksft_finished(); /* Print results and exit() accordingly */ } diff --git a/tools/testing/selftests/kvm/s390x/tprot.c b/tools/testing/selftests/kvm/s390x/tprot.c index c097b9db495e..a9a0b76e5fa4 100644 --- a/tools/testing/selftests/kvm/s390x/tprot.c +++ b/tools/testing/selftests/kvm/s390x/tprot.c @@ -8,14 +8,13 @@ #include <sys/mman.h> #include "test_util.h" #include "kvm_util.h" +#include "kselftest.h" #define PAGE_SHIFT 12 #define PAGE_SIZE (1 << PAGE_SHIFT) #define CR0_FETCH_PROTECTION_OVERRIDE (1UL << (63 - 38)) #define CR0_STORAGE_PROTECTION_OVERRIDE (1UL << (63 - 39)) -#define VCPU_ID 1 - static __aligned(PAGE_SIZE) uint8_t pages[2][PAGE_SIZE]; static uint8_t *const page_store_prot = pages[0]; static uint8_t *const page_fetch_prot = pages[1]; @@ -63,12 +62,12 @@ static enum permission test_protection(void *addr, uint8_t key) } enum stage { - STAGE_END, STAGE_INIT_SIMPLE, TEST_SIMPLE, STAGE_INIT_FETCH_PROT_OVERRIDE, TEST_FETCH_PROT_OVERRIDE, TEST_STORAGE_PROT_OVERRIDE, + STAGE_END /* must be the last entry (it's the amount of tests) */ }; struct test { @@ -182,46 +181,63 @@ static void guest_code(void) GUEST_SYNC(perform_next_stage(&i, mapped_0)); } -#define HOST_SYNC(vmp, stage) \ -({ \ - struct kvm_vm *__vm = (vmp); \ - struct ucall uc; \ - int __stage = (stage); \ - \ - vcpu_run(__vm, VCPU_ID); \ - get_ucall(__vm, VCPU_ID, &uc); \ - if (uc.cmd == UCALL_ABORT) { \ - TEST_FAIL("line %lu: %s, hints: %lu, %lu", uc.args[1], \ - (const char *)uc.args[0], uc.args[2], uc.args[3]); \ - } \ - ASSERT_EQ(uc.cmd, UCALL_SYNC); \ - ASSERT_EQ(uc.args[1], __stage); \ +#define HOST_SYNC_NO_TAP(vcpup, stage) \ +({ \ + struct kvm_vcpu *__vcpu = (vcpup); \ + struct ucall uc; \ + int __stage = (stage); \ + \ + vcpu_run(__vcpu); \ + get_ucall(__vcpu, &uc); \ + if (uc.cmd == UCALL_ABORT) \ + REPORT_GUEST_ASSERT_2(uc, "hints: %lu, %lu"); \ + ASSERT_EQ(uc.cmd, UCALL_SYNC); \ + ASSERT_EQ(uc.args[1], __stage); \ +}) + +#define HOST_SYNC(vcpu, stage) \ +({ \ + HOST_SYNC_NO_TAP(vcpu, stage); \ + ksft_test_result_pass("" #stage "\n"); \ }) int main(int argc, char *argv[]) { + struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct kvm_run *run; vm_vaddr_t guest_0_page; - vm = vm_create_default(VCPU_ID, 0, guest_code); - run = vcpu_state(vm, VCPU_ID); + ksft_print_header(); + ksft_set_plan(STAGE_END); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + run = vcpu->run; - HOST_SYNC(vm, STAGE_INIT_SIMPLE); + HOST_SYNC(vcpu, STAGE_INIT_SIMPLE); mprotect(addr_gva2hva(vm, (vm_vaddr_t)pages), PAGE_SIZE * 2, PROT_READ); - HOST_SYNC(vm, TEST_SIMPLE); + HOST_SYNC(vcpu, TEST_SIMPLE); guest_0_page = vm_vaddr_alloc(vm, PAGE_SIZE, 0); - if (guest_0_page != 0) - print_skip("Did not allocate page at 0 for fetch protection override tests"); - HOST_SYNC(vm, STAGE_INIT_FETCH_PROT_OVERRIDE); + if (guest_0_page != 0) { + /* Use NO_TAP so we don't get a PASS print */ + HOST_SYNC_NO_TAP(vcpu, STAGE_INIT_FETCH_PROT_OVERRIDE); + ksft_test_result_skip("STAGE_INIT_FETCH_PROT_OVERRIDE - " + "Did not allocate page at 0\n"); + } else { + HOST_SYNC(vcpu, STAGE_INIT_FETCH_PROT_OVERRIDE); + } if (guest_0_page == 0) mprotect(addr_gva2hva(vm, (vm_vaddr_t)0), PAGE_SIZE, PROT_READ); run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE; run->kvm_dirty_regs = KVM_SYNC_CRS; - HOST_SYNC(vm, TEST_FETCH_PROT_OVERRIDE); + HOST_SYNC(vcpu, TEST_FETCH_PROT_OVERRIDE); run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE; run->kvm_dirty_regs = KVM_SYNC_CRS; - HOST_SYNC(vm, TEST_STORAGE_PROT_OVERRIDE); + HOST_SYNC(vcpu, TEST_STORAGE_PROT_OVERRIDE); + + kvm_vm_free(vm); + + ksft_finished(); /* Print results and exit() accordingly */ } diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index 73bc297dabe6..0d55f508d595 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -17,8 +17,6 @@ #include <kvm_util.h> #include <processor.h> -#define VCPU_ID 0 - /* * s390x needs at least 1MB alignment, and the x86_64 MOVE/DELETE tests need a * 2MB sized and aligned region so that the initial region corresponds to @@ -54,8 +52,8 @@ static inline uint64_t guest_spin_on_val(uint64_t spin_val) static void *vcpu_worker(void *data) { - struct kvm_vm *vm = data; - struct kvm_run *run; + struct kvm_vcpu *vcpu = data; + struct kvm_run *run = vcpu->run; struct ucall uc; uint64_t cmd; @@ -64,13 +62,11 @@ static void *vcpu_worker(void *data) * which will occur if the guest attempts to access a memslot after it * has been deleted or while it is being moved . */ - run = vcpu_state(vm, VCPU_ID); - while (1) { - vcpu_run(vm, VCPU_ID); + vcpu_run(vcpu); if (run->exit_reason == KVM_EXIT_IO) { - cmd = get_ucall(vm, VCPU_ID, &uc); + cmd = get_ucall(vcpu, &uc); if (cmd != UCALL_SYNC) break; @@ -92,8 +88,7 @@ static void *vcpu_worker(void *data) } if (run->exit_reason == KVM_EXIT_IO && cmd == UCALL_ABORT) - TEST_FAIL("%s at %s:%ld, val = %lu", (const char *)uc.args[0], - __FILE__, uc.args[1], uc.args[2]); + REPORT_GUEST_ASSERT_1(uc, "val = %lu"); return NULL; } @@ -113,13 +108,14 @@ static void wait_for_vcpu(void) usleep(100000); } -static struct kvm_vm *spawn_vm(pthread_t *vcpu_thread, void *guest_code) +static struct kvm_vm *spawn_vm(struct kvm_vcpu **vcpu, pthread_t *vcpu_thread, + void *guest_code) { struct kvm_vm *vm; uint64_t *hva; uint64_t gpa; - vm = vm_create_default(VCPU_ID, 0, guest_code); + vm = vm_create_with_one_vcpu(vcpu, guest_code); vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_THP, MEM_REGION_GPA, MEM_REGION_SLOT, @@ -138,7 +134,7 @@ static struct kvm_vm *spawn_vm(pthread_t *vcpu_thread, void *guest_code) hva = addr_gpa2hva(vm, MEM_REGION_GPA); memset(hva, 0, 2 * 4096); - pthread_create(vcpu_thread, NULL, vcpu_worker, vm); + pthread_create(vcpu_thread, NULL, vcpu_worker, *vcpu); /* Ensure the guest thread is spun up. */ wait_for_vcpu(); @@ -180,10 +176,11 @@ static void guest_code_move_memory_region(void) static void test_move_memory_region(void) { pthread_t vcpu_thread; + struct kvm_vcpu *vcpu; struct kvm_vm *vm; uint64_t *hva; - vm = spawn_vm(&vcpu_thread, guest_code_move_memory_region); + vm = spawn_vm(&vcpu, &vcpu_thread, guest_code_move_memory_region); hva = addr_gpa2hva(vm, MEM_REGION_GPA); @@ -258,11 +255,12 @@ static void guest_code_delete_memory_region(void) static void test_delete_memory_region(void) { pthread_t vcpu_thread; + struct kvm_vcpu *vcpu; struct kvm_regs regs; struct kvm_run *run; struct kvm_vm *vm; - vm = spawn_vm(&vcpu_thread, guest_code_delete_memory_region); + vm = spawn_vm(&vcpu, &vcpu_thread, guest_code_delete_memory_region); /* Delete the memory region, the guest should not die. */ vm_mem_region_delete(vm, MEM_REGION_SLOT); @@ -286,13 +284,13 @@ static void test_delete_memory_region(void) pthread_join(vcpu_thread, NULL); - run = vcpu_state(vm, VCPU_ID); + run = vcpu->run; TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN || run->exit_reason == KVM_EXIT_INTERNAL_ERROR, "Unexpected exit reason = %d", run->exit_reason); - vcpu_regs_get(vm, VCPU_ID, ®s); + vcpu_regs_get(vcpu, ®s); /* * On AMD, after KVM_EXIT_SHUTDOWN the VMCB has been reinitialized already, @@ -309,19 +307,19 @@ static void test_delete_memory_region(void) static void test_zero_memory_regions(void) { + struct kvm_vcpu *vcpu; struct kvm_run *run; struct kvm_vm *vm; pr_info("Testing KVM_RUN with zero added memory regions\n"); - vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR); - vm_vcpu_add(vm, VCPU_ID); + vm = vm_create_barebones(); + vcpu = __vm_vcpu_add(vm, 0); - TEST_ASSERT(!ioctl(vm_get_fd(vm), KVM_SET_NR_MMU_PAGES, 64), - "KVM_SET_NR_MMU_PAGES failed, errno = %d\n", errno); - vcpu_run(vm, VCPU_ID); + vm_ioctl(vm, KVM_SET_NR_MMU_PAGES, (void *)64ul); + vcpu_run(vcpu); - run = vcpu_state(vm, VCPU_ID); + run = vcpu->run; TEST_ASSERT(run->exit_reason == KVM_EXIT_INTERNAL_ERROR, "Unexpected exit_reason = %u\n", run->exit_reason); @@ -354,7 +352,7 @@ static void test_add_max_memory_regions(void) "KVM_CAP_NR_MEMSLOTS should be greater than 0"); pr_info("Allowed number of memory slots: %i\n", max_mem_slots); - vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR); + vm = vm_create_barebones(); /* Check it can be added memory slots up to the maximum allowed */ pr_info("Adding slots 0..%i, each memory region with %dK size\n", diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c index 8c4e811bd586..db8967f1a17b 100644 --- a/tools/testing/selftests/kvm/steal_time.c +++ b/tools/testing/selftests/kvm/steal_time.c @@ -58,36 +58,32 @@ static void guest_code(int cpu) GUEST_DONE(); } -static void steal_time_init(struct kvm_vm *vm) +static bool is_steal_time_supported(struct kvm_vcpu *vcpu) { - int i; - - if (!(kvm_get_supported_cpuid_entry(KVM_CPUID_FEATURES)->eax & - KVM_FEATURE_STEAL_TIME)) { - print_skip("steal-time not supported"); - exit(KSFT_SKIP); - } + return kvm_cpu_has(X86_FEATURE_KVM_STEAL_TIME); +} - for (i = 0; i < NR_VCPUS; ++i) { - int ret; +static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i) +{ + int ret; - /* ST_GPA_BASE is identity mapped */ - st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE); - sync_global_to_guest(vm, st_gva[i]); + /* ST_GPA_BASE is identity mapped */ + st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE); + sync_global_to_guest(vcpu->vm, st_gva[i]); - ret = _vcpu_set_msr(vm, i, MSR_KVM_STEAL_TIME, (ulong)st_gva[i] | KVM_STEAL_RESERVED_MASK); - TEST_ASSERT(ret == 0, "Bad GPA didn't fail"); + ret = _vcpu_set_msr(vcpu, MSR_KVM_STEAL_TIME, + (ulong)st_gva[i] | KVM_STEAL_RESERVED_MASK); + TEST_ASSERT(ret == 0, "Bad GPA didn't fail"); - vcpu_set_msr(vm, i, MSR_KVM_STEAL_TIME, (ulong)st_gva[i] | KVM_MSR_ENABLED); - } + vcpu_set_msr(vcpu, MSR_KVM_STEAL_TIME, (ulong)st_gva[i] | KVM_MSR_ENABLED); } -static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpuid) +static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx) { - struct kvm_steal_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpuid]); + struct kvm_steal_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpu_idx]); int i; - pr_info("VCPU%d:\n", vcpuid); + pr_info("VCPU%d:\n", vcpu_idx); pr_info(" steal: %lld\n", st->steal); pr_info(" version: %d\n", st->version); pr_info(" flags: %d\n", st->flags); @@ -158,49 +154,50 @@ static void guest_code(int cpu) GUEST_DONE(); } -static void steal_time_init(struct kvm_vm *vm) +static bool is_steal_time_supported(struct kvm_vcpu *vcpu) { struct kvm_device_attr dev = { .group = KVM_ARM_VCPU_PVTIME_CTRL, .attr = KVM_ARM_VCPU_PVTIME_IPA, }; - int i, ret; - - ret = _vcpu_ioctl(vm, 0, KVM_HAS_DEVICE_ATTR, &dev); - if (ret != 0 && errno == ENXIO) { - print_skip("steal-time not supported"); - exit(KSFT_SKIP); - } - for (i = 0; i < NR_VCPUS; ++i) { - uint64_t st_ipa; + return !__vcpu_ioctl(vcpu, KVM_HAS_DEVICE_ATTR, &dev); +} - vcpu_ioctl(vm, i, KVM_HAS_DEVICE_ATTR, &dev); +static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i) +{ + struct kvm_vm *vm = vcpu->vm; + uint64_t st_ipa; + int ret; - dev.addr = (uint64_t)&st_ipa; + struct kvm_device_attr dev = { + .group = KVM_ARM_VCPU_PVTIME_CTRL, + .attr = KVM_ARM_VCPU_PVTIME_IPA, + .addr = (uint64_t)&st_ipa, + }; - /* ST_GPA_BASE is identity mapped */ - st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE); - sync_global_to_guest(vm, st_gva[i]); + vcpu_ioctl(vcpu, KVM_HAS_DEVICE_ATTR, &dev); - st_ipa = (ulong)st_gva[i] | 1; - ret = _vcpu_ioctl(vm, i, KVM_SET_DEVICE_ATTR, &dev); - TEST_ASSERT(ret == -1 && errno == EINVAL, "Bad IPA didn't report EINVAL"); + /* ST_GPA_BASE is identity mapped */ + st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE); + sync_global_to_guest(vm, st_gva[i]); - st_ipa = (ulong)st_gva[i]; - vcpu_ioctl(vm, i, KVM_SET_DEVICE_ATTR, &dev); + st_ipa = (ulong)st_gva[i] | 1; + ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev); + TEST_ASSERT(ret == -1 && errno == EINVAL, "Bad IPA didn't report EINVAL"); - ret = _vcpu_ioctl(vm, i, KVM_SET_DEVICE_ATTR, &dev); - TEST_ASSERT(ret == -1 && errno == EEXIST, "Set IPA twice without EEXIST"); + st_ipa = (ulong)st_gva[i]; + vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev); - } + ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev); + TEST_ASSERT(ret == -1 && errno == EEXIST, "Set IPA twice without EEXIST"); } -static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpuid) +static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx) { - struct st_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpuid]); + struct st_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpu_idx]); - pr_info("VCPU%d:\n", vcpuid); + pr_info("VCPU%d:\n", vcpu_idx); pr_info(" rev: %d\n", st->rev); pr_info(" attr: %d\n", st->attr); pr_info(" st_time: %ld\n", st->st_time); @@ -224,29 +221,27 @@ static void *do_steal_time(void *arg) return NULL; } -static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid) +static void run_vcpu(struct kvm_vcpu *vcpu) { struct ucall uc; - vcpu_args_set(vm, vcpuid, 1, vcpuid); - - vcpu_ioctl(vm, vcpuid, KVM_RUN, NULL); + vcpu_run(vcpu); - switch (get_ucall(vm, vcpuid, &uc)) { + switch (get_ucall(vcpu, &uc)) { case UCALL_SYNC: case UCALL_DONE: break; case UCALL_ABORT: - TEST_ASSERT(false, "%s at %s:%ld", (const char *)uc.args[0], - __FILE__, uc.args[1]); + REPORT_GUEST_ASSERT(uc); default: TEST_ASSERT(false, "Unexpected exit: %s", - exit_reason_str(vcpu_state(vm, vcpuid)->exit_reason)); + exit_reason_str(vcpu->run->exit_reason)); } } int main(int ac, char **av) { + struct kvm_vcpu *vcpus[NR_VCPUS]; struct kvm_vm *vm; pthread_attr_t attr; pthread_t thread; @@ -266,26 +261,26 @@ int main(int ac, char **av) pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset); pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset); - /* Create a one VCPU guest and an identity mapped memslot for the steal time structure */ - vm = vm_create_default(0, 0, guest_code); + /* Create a VM and an identity mapped memslot for the steal time structure */ + vm = vm_create_with_vcpus(NR_VCPUS, guest_code, vcpus); gpages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, STEAL_TIME_SIZE * NR_VCPUS); vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, gpages, 0); virt_map(vm, ST_GPA_BASE, ST_GPA_BASE, gpages); ucall_init(vm, NULL); - /* Add the rest of the VCPUs */ - for (i = 1; i < NR_VCPUS; ++i) - vm_vcpu_add_default(vm, i, guest_code); - - steal_time_init(vm); + TEST_REQUIRE(is_steal_time_supported(vcpus[0])); /* Run test on each VCPU */ for (i = 0; i < NR_VCPUS; ++i) { + steal_time_init(vcpus[i], i); + + vcpu_args_set(vcpus[i], 1, i); + /* First VCPU run initializes steal-time */ - run_vcpu(vm, i); + run_vcpu(vcpus[i]); /* Second VCPU run, expect guest stolen time to be <= run_delay */ - run_vcpu(vm, i); + run_vcpu(vcpus[i]); sync_global_from_guest(vm, guest_stolen_time[i]); stolen_time = guest_stolen_time[i]; run_delay = get_run_delay(); @@ -306,7 +301,7 @@ int main(int ac, char **av) MIN_RUN_DELAY_NS, run_delay); /* Run VCPU again to confirm stolen time is consistent with run_delay */ - run_vcpu(vm, i); + run_vcpu(vcpus[i]); sync_global_from_guest(vm, guest_stolen_time[i]); stolen_time = guest_stolen_time[i] - stolen_time; TEST_ASSERT(stolen_time >= run_delay, diff --git a/tools/testing/selftests/kvm/system_counter_offset_test.c b/tools/testing/selftests/kvm/system_counter_offset_test.c index b337bbbfa41f..1c274933912b 100644 --- a/tools/testing/selftests/kvm/system_counter_offset_test.c +++ b/tools/testing/selftests/kvm/system_counter_offset_test.c @@ -14,8 +14,6 @@ #include "kvm_util.h" #include "processor.h" -#define VCPU_ID 0 - #ifdef __x86_64__ struct test_case { @@ -28,19 +26,17 @@ static struct test_case test_cases[] = { { -180 * NSEC_PER_SEC }, }; -static void check_preconditions(struct kvm_vm *vm) +static void check_preconditions(struct kvm_vcpu *vcpu) { - if (!_vcpu_has_device_attr(vm, VCPU_ID, KVM_VCPU_TSC_CTRL, KVM_VCPU_TSC_OFFSET)) - return; - - print_skip("KVM_VCPU_TSC_OFFSET not supported; skipping test"); - exit(KSFT_SKIP); + __TEST_REQUIRE(!__vcpu_has_device_attr(vcpu, KVM_VCPU_TSC_CTRL, + KVM_VCPU_TSC_OFFSET), + "KVM_VCPU_TSC_OFFSET not supported; skipping test"); } -static void setup_system_counter(struct kvm_vm *vm, struct test_case *test) +static void setup_system_counter(struct kvm_vcpu *vcpu, struct test_case *test) { - vcpu_access_device_attr(vm, VCPU_ID, KVM_VCPU_TSC_CTRL, - KVM_VCPU_TSC_OFFSET, &test->tsc_offset, true); + vcpu_device_attr_set(vcpu, KVM_VCPU_TSC_CTRL, KVM_VCPU_TSC_OFFSET, + &test->tsc_offset); } static uint64_t guest_read_system_counter(struct test_case *test) @@ -87,11 +83,10 @@ static void handle_sync(struct ucall *uc, uint64_t start, uint64_t end) static void handle_abort(struct ucall *uc) { - TEST_FAIL("%s at %s:%ld", (const char *)uc->args[0], - __FILE__, uc->args[1]); + REPORT_GUEST_ASSERT(*uc); } -static void enter_guest(struct kvm_vm *vm) +static void enter_guest(struct kvm_vcpu *vcpu) { uint64_t start, end; struct ucall uc; @@ -100,12 +95,12 @@ static void enter_guest(struct kvm_vm *vm) for (i = 0; i < ARRAY_SIZE(test_cases); i++) { struct test_case *test = &test_cases[i]; - setup_system_counter(vm, test); + setup_system_counter(vcpu, test); start = host_read_guest_system_counter(test); - vcpu_run(vm, VCPU_ID); + vcpu_run(vcpu); end = host_read_guest_system_counter(test); - switch (get_ucall(vm, VCPU_ID, &uc)) { + switch (get_ucall(vcpu, &uc)) { case UCALL_SYNC: handle_sync(&uc, start, end); break; @@ -114,19 +109,20 @@ static void enter_guest(struct kvm_vm *vm) return; default: TEST_ASSERT(0, "unhandled ucall %ld\n", - get_ucall(vm, VCPU_ID, &uc)); + get_ucall(vcpu, &uc)); } } } int main(void) { + struct kvm_vcpu *vcpu; struct kvm_vm *vm; - vm = vm_create_default(VCPU_ID, 0, guest_main); - check_preconditions(vm); + vm = vm_create_with_one_vcpu(&vcpu, guest_main); + check_preconditions(vcpu); ucall_init(vm, NULL); - enter_guest(vm); + enter_guest(vcpu); kvm_vm_free(vm); } diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c index 76f65c22796f..dadcbad10a1d 100644 --- a/tools/testing/selftests/kvm/x86_64/amx_test.c +++ b/tools/testing/selftests/kvm/x86_64/amx_test.c @@ -25,10 +25,6 @@ # error This test is 64-bit only #endif -#define VCPU_ID 0 -#define X86_FEATURE_XSAVE (1 << 26) -#define X86_FEATURE_OSXSAVE (1 << 27) - #define NUM_TILES 8 #define TILE_SIZE 1024 #define XSAVE_SIZE ((NUM_TILES * TILE_SIZE) + PAGE_SIZE) @@ -124,15 +120,8 @@ static inline void __xsavec(struct xsave_data *data, uint64_t rfbm) static inline void check_cpuid_xsave(void) { - uint32_t eax, ebx, ecx, edx; - - eax = 1; - ecx = 0; - cpuid(&eax, &ebx, &ecx, &edx); - if (!(ecx & X86_FEATURE_XSAVE)) - GUEST_ASSERT(!"cpuid: no CPU xsave support!"); - if (!(ecx & X86_FEATURE_OSXSAVE)) - GUEST_ASSERT(!"cpuid: no OS xsave support!"); + GUEST_ASSERT(this_cpu_has(X86_FEATURE_XSAVE)); + GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE)); } static bool check_xsave_supports_xtile(void) @@ -144,10 +133,7 @@ static bool enum_xtile_config(void) { u32 eax, ebx, ecx, edx; - eax = TILE_CPUID; - ecx = TILE_PALETTE_CPUID_SUBLEAVE; - - cpuid(&eax, &ebx, &ecx, &edx); + __cpuid(TILE_CPUID, TILE_PALETTE_CPUID_SUBLEAVE, &eax, &ebx, &ecx, &edx); if (!eax || !ebx || !ecx) return false; @@ -169,10 +155,7 @@ static bool enum_xsave_tile(void) { u32 eax, ebx, ecx, edx; - eax = XSTATE_CPUID; - ecx = XFEATURE_XTILEDATA; - - cpuid(&eax, &ebx, &ecx, &edx); + __cpuid(XSTATE_CPUID, XFEATURE_XTILEDATA, &eax, &ebx, &ecx, &edx); if (!eax || !ebx) return false; @@ -187,10 +170,7 @@ static bool check_xsave_size(void) u32 eax, ebx, ecx, edx; bool valid = false; - eax = XSTATE_CPUID; - ecx = XSTATE_USER_STATE_SUBLEAVE; - - cpuid(&eax, &ebx, &ecx, &edx); + __cpuid(XSTATE_CPUID, XSTATE_USER_STATE_SUBLEAVE, &eax, &ebx, &ecx, &edx); if (ebx && ebx <= XSAVE_SIZE) valid = true; @@ -316,46 +296,36 @@ void guest_nm_handler(struct ex_regs *regs) int main(int argc, char *argv[]) { - struct kvm_cpuid_entry2 *entry; struct kvm_regs regs1, regs2; - bool amx_supported = false; + struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct kvm_run *run; struct kvm_x86_state *state; - int xsave_restore_size = 0; + int xsave_restore_size; vm_vaddr_t amx_cfg, tiledata, xsavedata; struct ucall uc; u32 amx_offset; int stage, ret; - vm_xsave_req_perm(XSTATE_XTILE_DATA_BIT); + vm_xsave_require_permission(XSTATE_XTILE_DATA_BIT); /* Create VM */ - vm = vm_create_default(VCPU_ID, 0, guest_code); + vm = vm_create_with_one_vcpu(&vcpu, guest_code); - entry = kvm_get_supported_cpuid_entry(1); - if (!(entry->ecx & X86_FEATURE_XSAVE)) { - print_skip("XSAVE feature not supported"); - exit(KSFT_SKIP); - } + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE)); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_AMX_TILE)); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILECFG)); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILEDATA)); - if (kvm_get_cpuid_max_basic() >= 0xd) { - entry = kvm_get_supported_cpuid_index(0xd, 0); - amx_supported = entry && !!(entry->eax & XFEATURE_MASK_XTILE); - if (!amx_supported) { - print_skip("AMX is not supported by the vCPU (eax=0x%x)", entry->eax); - exit(KSFT_SKIP); - } - /* Get xsave/restore max size */ - xsave_restore_size = entry->ecx; - } + /* Get xsave/restore max size */ + xsave_restore_size = kvm_get_supported_cpuid_entry(0xd)->ecx; - run = vcpu_state(vm, VCPU_ID); - vcpu_regs_get(vm, VCPU_ID, ®s1); + run = vcpu->run; + vcpu_regs_get(vcpu, ®s1); /* Register #NM handler */ vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vm, VCPU_ID); + vcpu_init_descriptor_tables(vcpu); vm_install_exception_handler(vm, NM_VECTOR, guest_nm_handler); /* amx cfg for guest_code */ @@ -369,19 +339,18 @@ int main(int argc, char *argv[]) /* xsave data for guest_code */ xsavedata = vm_vaddr_alloc_pages(vm, 3); memset(addr_gva2hva(vm, xsavedata), 0, 3 * getpagesize()); - vcpu_args_set(vm, VCPU_ID, 3, amx_cfg, tiledata, xsavedata); + vcpu_args_set(vcpu, 3, amx_cfg, tiledata, xsavedata); for (stage = 1; ; stage++) { - _vcpu_run(vm, VCPU_ID); + vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Stage %d: unexpected exit reason: %u (%s),\n", stage, run->exit_reason, exit_reason_str(run->exit_reason)); - switch (get_ucall(vm, VCPU_ID, &uc)) { + switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: - TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], - __FILE__, uc.args[1]); + REPORT_GUEST_ASSERT(uc); /* NOT REACHED */ case UCALL_SYNC: switch (uc.args[1]) { @@ -403,7 +372,7 @@ int main(int argc, char *argv[]) * size subtract 8K amx size. */ amx_offset = xsave_restore_size - NUM_TILES*TILE_SIZE; - state = vcpu_save_state(vm, VCPU_ID); + state = vcpu_save_state(vcpu); void *amx_start = (void *)state->xsave + amx_offset; void *tiles_data = (void *)addr_gva2hva(vm, tiledata); /* Only check TMM0 register, 1 tile */ @@ -424,22 +393,20 @@ int main(int argc, char *argv[]) TEST_FAIL("Unknown ucall %lu", uc.cmd); } - state = vcpu_save_state(vm, VCPU_ID); + state = vcpu_save_state(vcpu); memset(®s1, 0, sizeof(regs1)); - vcpu_regs_get(vm, VCPU_ID, ®s1); + vcpu_regs_get(vcpu, ®s1); kvm_vm_release(vm); /* Restore state in a new VM. */ - kvm_vm_restart(vm, O_RDWR); - vm_vcpu_add(vm, VCPU_ID); - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); - vcpu_load_state(vm, VCPU_ID, state); - run = vcpu_state(vm, VCPU_ID); + vcpu = vm_recreate_with_one_vcpu(vm); + vcpu_load_state(vcpu, state); + run = vcpu->run; kvm_x86_state_cleanup(state); memset(®s2, 0, sizeof(regs2)); - vcpu_regs_get(vm, VCPU_ID, ®s2); + vcpu_regs_get(vcpu, ®s2); TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)), "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx", (ulong) regs2.rdi, (ulong) regs2.rsi); diff --git a/tools/testing/selftests/kvm/x86_64/cpuid_test.c b/tools/testing/selftests/kvm/x86_64/cpuid_test.c index 16d2465c5634..a6aeee2e62e4 100644 --- a/tools/testing/selftests/kvm/x86_64/cpuid_test.c +++ b/tools/testing/selftests/kvm/x86_64/cpuid_test.c @@ -12,8 +12,6 @@ #include "kvm_util.h" #include "processor.h" -#define VCPU_ID 0 - /* CPUIDs known to differ */ struct { u32 function; @@ -33,10 +31,9 @@ static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid) u32 eax, ebx, ecx, edx; for (i = 0; i < guest_cpuid->nent; i++) { - eax = guest_cpuid->entries[i].function; - ecx = guest_cpuid->entries[i].index; - - cpuid(&eax, &ebx, &ecx, &edx); + __cpuid(guest_cpuid->entries[i].function, + guest_cpuid->entries[i].index, + &eax, &ebx, &ecx, &edx); GUEST_ASSERT(eax == guest_cpuid->entries[i].eax && ebx == guest_cpuid->entries[i].ebx && @@ -48,9 +45,9 @@ static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid) static void test_cpuid_40000000(struct kvm_cpuid2 *guest_cpuid) { - u32 eax = 0x40000000, ebx, ecx = 0, edx; + u32 eax, ebx, ecx, edx; - cpuid(&eax, &ebx, &ecx, &edx); + cpuid(0x40000000, &eax, &ebx, &ecx, &edx); GUEST_ASSERT(eax == 0x40000001); } @@ -68,7 +65,7 @@ static void guest_main(struct kvm_cpuid2 *guest_cpuid) GUEST_DONE(); } -static bool is_cpuid_mangled(struct kvm_cpuid_entry2 *entrie) +static bool is_cpuid_mangled(const struct kvm_cpuid_entry2 *entrie) { int i; @@ -81,50 +78,44 @@ static bool is_cpuid_mangled(struct kvm_cpuid_entry2 *entrie) return false; } -static void check_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 *entrie) +static void compare_cpuids(const struct kvm_cpuid2 *cpuid1, + const struct kvm_cpuid2 *cpuid2) { + const struct kvm_cpuid_entry2 *e1, *e2; int i; - for (i = 0; i < cpuid->nent; i++) { - if (cpuid->entries[i].function == entrie->function && - cpuid->entries[i].index == entrie->index) { - if (is_cpuid_mangled(entrie)) - return; - - TEST_ASSERT(cpuid->entries[i].eax == entrie->eax && - cpuid->entries[i].ebx == entrie->ebx && - cpuid->entries[i].ecx == entrie->ecx && - cpuid->entries[i].edx == entrie->edx, - "CPUID 0x%x.%x differ: 0x%x:0x%x:0x%x:0x%x vs 0x%x:0x%x:0x%x:0x%x", - entrie->function, entrie->index, - cpuid->entries[i].eax, cpuid->entries[i].ebx, - cpuid->entries[i].ecx, cpuid->entries[i].edx, - entrie->eax, entrie->ebx, entrie->ecx, entrie->edx); - return; - } - } + TEST_ASSERT(cpuid1->nent == cpuid2->nent, + "CPUID nent mismatch: %d vs. %d", cpuid1->nent, cpuid2->nent); - TEST_ASSERT(false, "CPUID 0x%x.%x not found", entrie->function, entrie->index); -} + for (i = 0; i < cpuid1->nent; i++) { + e1 = &cpuid1->entries[i]; + e2 = &cpuid2->entries[i]; -static void compare_cpuids(struct kvm_cpuid2 *cpuid1, struct kvm_cpuid2 *cpuid2) -{ - int i; + TEST_ASSERT(e1->function == e2->function && + e1->index == e2->index && e1->flags == e2->flags, + "CPUID entries[%d] mismtach: 0x%x.%d.%x vs. 0x%x.%d.%x\n", + i, e1->function, e1->index, e1->flags, + e2->function, e2->index, e2->flags); - for (i = 0; i < cpuid1->nent; i++) - check_cpuid(cpuid2, &cpuid1->entries[i]); + if (is_cpuid_mangled(e1)) + continue; - for (i = 0; i < cpuid2->nent; i++) - check_cpuid(cpuid1, &cpuid2->entries[i]); + TEST_ASSERT(e1->eax == e2->eax && e1->ebx == e2->ebx && + e1->ecx == e2->ecx && e1->edx == e2->edx, + "CPUID 0x%x.%x differ: 0x%x:0x%x:0x%x:0x%x vs 0x%x:0x%x:0x%x:0x%x", + e1->function, e1->index, + e1->eax, e1->ebx, e1->ecx, e1->edx, + e2->eax, e2->ebx, e2->ecx, e2->edx); + } } -static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid, int stage) +static void run_vcpu(struct kvm_vcpu *vcpu, int stage) { struct ucall uc; - _vcpu_run(vm, vcpuid); + vcpu_run(vcpu); - switch (get_ucall(vm, vcpuid, &uc)) { + switch (get_ucall(vcpu, &uc)) { case UCALL_SYNC: TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && uc.args[1] == stage + 1, @@ -134,11 +125,10 @@ static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid, int stage) case UCALL_DONE: return; case UCALL_ABORT: - TEST_ASSERT(false, "%s at %s:%ld\n\tvalues: %#lx, %#lx", (const char *)uc.args[0], - __FILE__, uc.args[1], uc.args[2], uc.args[3]); + REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx"); default: TEST_ASSERT(false, "Unexpected exit: %s", - exit_reason_str(vcpu_state(vm, vcpuid)->exit_reason)); + exit_reason_str(vcpu->run->exit_reason)); } } @@ -154,56 +144,53 @@ struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, vm_vaddr_t *p_gva, struct return guest_cpuids; } -static void set_cpuid_after_run(struct kvm_vm *vm, struct kvm_cpuid2 *cpuid) +static void set_cpuid_after_run(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *ent; int rc; u32 eax, ebx, x; /* Setting unmodified CPUID is allowed */ - rc = __vcpu_set_cpuid(vm, VCPU_ID, cpuid); + rc = __vcpu_set_cpuid(vcpu); TEST_ASSERT(!rc, "Setting unmodified CPUID after KVM_RUN failed: %d", rc); /* Changing CPU features is forbidden */ - ent = get_cpuid(cpuid, 0x7, 0); + ent = vcpu_get_cpuid_entry(vcpu, 0x7); ebx = ent->ebx; ent->ebx--; - rc = __vcpu_set_cpuid(vm, VCPU_ID, cpuid); + rc = __vcpu_set_cpuid(vcpu); TEST_ASSERT(rc, "Changing CPU features should fail"); ent->ebx = ebx; /* Changing MAXPHYADDR is forbidden */ - ent = get_cpuid(cpuid, 0x80000008, 0); + ent = vcpu_get_cpuid_entry(vcpu, 0x80000008); eax = ent->eax; x = eax & 0xff; ent->eax = (eax & ~0xffu) | (x - 1); - rc = __vcpu_set_cpuid(vm, VCPU_ID, cpuid); + rc = __vcpu_set_cpuid(vcpu); TEST_ASSERT(rc, "Changing MAXPHYADDR should fail"); ent->eax = eax; } int main(void) { - struct kvm_cpuid2 *supp_cpuid, *cpuid2; + struct kvm_vcpu *vcpu; vm_vaddr_t cpuid_gva; struct kvm_vm *vm; int stage; - vm = vm_create_default(VCPU_ID, 0, guest_main); - - supp_cpuid = kvm_get_supported_cpuid(); - cpuid2 = vcpu_get_cpuid(vm, VCPU_ID); + vm = vm_create_with_one_vcpu(&vcpu, guest_main); - compare_cpuids(supp_cpuid, cpuid2); + compare_cpuids(kvm_get_supported_cpuid(), vcpu->cpuid); - vcpu_alloc_cpuid(vm, &cpuid_gva, cpuid2); + vcpu_alloc_cpuid(vm, &cpuid_gva, vcpu->cpuid); - vcpu_args_set(vm, VCPU_ID, 1, cpuid_gva); + vcpu_args_set(vcpu, 1, cpuid_gva); for (stage = 0; stage < 3; stage++) - run_vcpu(vm, VCPU_ID, stage); + run_vcpu(vcpu, stage); - set_cpuid_after_run(vm, cpuid2); + set_cpuid_after_run(vcpu); kvm_vm_free(vm); } diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c index 6f6fd189dda3..4208487652f8 100644 --- a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c +++ b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c @@ -19,25 +19,11 @@ #include "kvm_util.h" #include "processor.h" -#define X86_FEATURE_XSAVE (1<<26) -#define X86_FEATURE_OSXSAVE (1<<27) -#define VCPU_ID 1 - static inline bool cr4_cpuid_is_sync(void) { - int func, subfunc; - uint32_t eax, ebx, ecx, edx; - uint64_t cr4; - - func = 0x1; - subfunc = 0x0; - __asm__ __volatile__("cpuid" - : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx) - : "a"(func), "c"(subfunc)); + uint64_t cr4 = get_cr4(); - cr4 = get_cr4(); - - return (!!(ecx & X86_FEATURE_OSXSAVE)) == (!!(cr4 & X86_CR4_OSXSAVE)); + return (this_cpu_has(X86_FEATURE_OSXSAVE) == !!(cr4 & X86_CR4_OSXSAVE)); } static void guest_code(void) @@ -63,44 +49,37 @@ static void guest_code(void) int main(int argc, char *argv[]) { + struct kvm_vcpu *vcpu; struct kvm_run *run; struct kvm_vm *vm; struct kvm_sregs sregs; - struct kvm_cpuid_entry2 *entry; struct ucall uc; - int rc; - entry = kvm_get_supported_cpuid_entry(1); - if (!(entry->ecx & X86_FEATURE_XSAVE)) { - print_skip("XSAVE feature not supported"); - return 0; - } + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE)); /* Tell stdout not to buffer its content */ setbuf(stdout, NULL); - /* Create VM */ - vm = vm_create_default(VCPU_ID, 0, guest_code); - run = vcpu_state(vm, VCPU_ID); + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + run = vcpu->run; while (1) { - rc = _vcpu_run(vm, VCPU_ID); + vcpu_run(vcpu); - TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Unexpected exit reason: %u (%s),\n", run->exit_reason, exit_reason_str(run->exit_reason)); - switch (get_ucall(vm, VCPU_ID, &uc)) { + switch (get_ucall(vcpu, &uc)) { case UCALL_SYNC: /* emulate hypervisor clearing CR4.OSXSAVE */ - vcpu_sregs_get(vm, VCPU_ID, &sregs); + vcpu_sregs_get(vcpu, &sregs); sregs.cr4 &= ~X86_CR4_OSXSAVE; - vcpu_sregs_set(vm, VCPU_ID, &sregs); + vcpu_sregs_set(vcpu, &sregs); break; case UCALL_ABORT: - TEST_FAIL("Guest CR4 bit (OSXSAVE) unsynchronized with CPUID bit."); + REPORT_GUEST_ASSERT(uc); break; case UCALL_DONE: goto done; diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86_64/debug_regs.c index 5f078db1bcba..7ef99c3359a0 100644 --- a/tools/testing/selftests/kvm/x86_64/debug_regs.c +++ b/tools/testing/selftests/kvm/x86_64/debug_regs.c @@ -10,8 +10,6 @@ #include "processor.h" #include "apic.h" -#define VCPU_ID 0 - #define DR6_BD (1 << 13) #define DR7_GD (1 << 13) @@ -66,21 +64,22 @@ static void guest_code(void) GUEST_DONE(); } -#define CLEAR_DEBUG() memset(&debug, 0, sizeof(debug)) -#define APPLY_DEBUG() vcpu_set_guest_debug(vm, VCPU_ID, &debug) #define CAST_TO_RIP(v) ((unsigned long long)&(v)) -#define SET_RIP(v) do { \ - vcpu_regs_get(vm, VCPU_ID, ®s); \ - regs.rip = (v); \ - vcpu_regs_set(vm, VCPU_ID, ®s); \ - } while (0) -#define MOVE_RIP(v) SET_RIP(regs.rip + (v)); + +static void vcpu_skip_insn(struct kvm_vcpu *vcpu, int insn_len) +{ + struct kvm_regs regs; + + vcpu_regs_get(vcpu, ®s); + regs.rip += insn_len; + vcpu_regs_set(vcpu, ®s); +} int main(void) { struct kvm_guest_debug debug; unsigned long long target_dr6, target_rip; - struct kvm_regs regs; + struct kvm_vcpu *vcpu; struct kvm_run *run; struct kvm_vm *vm; struct ucall uc; @@ -96,35 +95,32 @@ int main(void) 1, /* cli */ }; - if (!kvm_check_cap(KVM_CAP_SET_GUEST_DEBUG)) { - print_skip("KVM_CAP_SET_GUEST_DEBUG not supported"); - return 0; - } + TEST_REQUIRE(kvm_has_cap(KVM_CAP_SET_GUEST_DEBUG)); - vm = vm_create_default(VCPU_ID, 0, guest_code); - run = vcpu_state(vm, VCPU_ID); + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + run = vcpu->run; /* Test software BPs - int3 */ - CLEAR_DEBUG(); + memset(&debug, 0, sizeof(debug)); debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP; - APPLY_DEBUG(); - vcpu_run(vm, VCPU_ID); + vcpu_guest_debug_set(vcpu, &debug); + vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG && run->debug.arch.exception == BP_VECTOR && run->debug.arch.pc == CAST_TO_RIP(sw_bp), "INT3: exit %d exception %d rip 0x%llx (should be 0x%llx)", run->exit_reason, run->debug.arch.exception, run->debug.arch.pc, CAST_TO_RIP(sw_bp)); - MOVE_RIP(1); + vcpu_skip_insn(vcpu, 1); /* Test instruction HW BP over DR[0-3] */ for (i = 0; i < 4; i++) { - CLEAR_DEBUG(); + memset(&debug, 0, sizeof(debug)); debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP; debug.arch.debugreg[i] = CAST_TO_RIP(hw_bp); debug.arch.debugreg[7] = 0x400 | (1UL << (2*i+1)); - APPLY_DEBUG(); - vcpu_run(vm, VCPU_ID); + vcpu_guest_debug_set(vcpu, &debug); + vcpu_run(vcpu); target_dr6 = 0xffff0ff0 | (1UL << i); TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG && run->debug.arch.exception == DB_VECTOR && @@ -137,17 +133,17 @@ int main(void) run->debug.arch.dr6, target_dr6); } /* Skip "nop" */ - MOVE_RIP(1); + vcpu_skip_insn(vcpu, 1); /* Test data access HW BP over DR[0-3] */ for (i = 0; i < 4; i++) { - CLEAR_DEBUG(); + memset(&debug, 0, sizeof(debug)); debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP; debug.arch.debugreg[i] = CAST_TO_RIP(guest_value); debug.arch.debugreg[7] = 0x00000400 | (1UL << (2*i+1)) | (0x000d0000UL << (4*i)); - APPLY_DEBUG(); - vcpu_run(vm, VCPU_ID); + vcpu_guest_debug_set(vcpu, &debug); + vcpu_run(vcpu); target_dr6 = 0xffff0ff0 | (1UL << i); TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG && run->debug.arch.exception == DB_VECTOR && @@ -159,23 +155,22 @@ int main(void) run->debug.arch.pc, CAST_TO_RIP(write_data), run->debug.arch.dr6, target_dr6); /* Rollback the 4-bytes "mov" */ - MOVE_RIP(-7); + vcpu_skip_insn(vcpu, -7); } /* Skip the 4-bytes "mov" */ - MOVE_RIP(7); + vcpu_skip_insn(vcpu, 7); /* Test single step */ target_rip = CAST_TO_RIP(ss_start); target_dr6 = 0xffff4ff0ULL; - vcpu_regs_get(vm, VCPU_ID, ®s); for (i = 0; i < (sizeof(ss_size) / sizeof(ss_size[0])); i++) { target_rip += ss_size[i]; - CLEAR_DEBUG(); + memset(&debug, 0, sizeof(debug)); debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_BLOCKIRQ; debug.arch.debugreg[7] = 0x00000400; - APPLY_DEBUG(); - vcpu_run(vm, VCPU_ID); + vcpu_guest_debug_set(vcpu, &debug); + vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG && run->debug.arch.exception == DB_VECTOR && run->debug.arch.pc == target_rip && @@ -188,11 +183,11 @@ int main(void) } /* Finally test global disable */ - CLEAR_DEBUG(); + memset(&debug, 0, sizeof(debug)); debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP; debug.arch.debugreg[7] = 0x400 | DR7_GD; - APPLY_DEBUG(); - vcpu_run(vm, VCPU_ID); + vcpu_guest_debug_set(vcpu, &debug); + vcpu_run(vcpu); target_dr6 = 0xffff0ff0 | DR6_BD; TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG && run->debug.arch.exception == DB_VECTOR && @@ -205,12 +200,12 @@ int main(void) target_dr6); /* Disable all debug controls, run to the end */ - CLEAR_DEBUG(); - APPLY_DEBUG(); + memset(&debug, 0, sizeof(debug)); + vcpu_guest_debug_set(vcpu, &debug); - vcpu_run(vm, VCPU_ID); + vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "KVM_EXIT_IO"); - cmd = get_ucall(vm, VCPU_ID, &uc); + cmd = get_ucall(vcpu, &uc); TEST_ASSERT(cmd == UCALL_DONE, "UCALL_DONE"); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/x86_64/emulator_error_test.c b/tools/testing/selftests/kvm/x86_64/emulator_error_test.c index aeb3850f81bd..236e11755ba6 100644 --- a/tools/testing/selftests/kvm/x86_64/emulator_error_test.c +++ b/tools/testing/selftests/kvm/x86_64/emulator_error_test.c @@ -11,7 +11,6 @@ #include "kvm_util.h" #include "vmx.h" -#define VCPU_ID 1 #define MAXPHYADDR 36 #define MEM_REGION_GVA 0x0000123456789000 @@ -27,14 +26,6 @@ static void guest_code(void) GUEST_DONE(); } -static void run_guest(struct kvm_vm *vm) -{ - int rc; - - rc = _vcpu_run(vm, VCPU_ID); - TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc); -} - /* * Accessors to get R/M, REG, and Mod bits described in the SDM vol 2, * figure 2-2 "Table Interpretation of ModR/M Byte (C8H)". @@ -56,9 +47,9 @@ static bool is_flds(uint8_t *insn_bytes, uint8_t insn_size) GET_RM(insn_bytes[1]) != 0x5; } -static void process_exit_on_emulation_error(struct kvm_vm *vm) +static void process_exit_on_emulation_error(struct kvm_vcpu *vcpu) { - struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct kvm_run *run = vcpu->run; struct kvm_regs regs; uint8_t *insn_bytes; uint8_t insn_size; @@ -92,50 +83,48 @@ static void process_exit_on_emulation_error(struct kvm_vm *vm) * contained an flds instruction that is 2-bytes in * length (ie: no prefix, no SIB, no displacement). */ - vcpu_regs_get(vm, VCPU_ID, ®s); + vcpu_regs_get(vcpu, ®s); regs.rip += 2; - vcpu_regs_set(vm, VCPU_ID, ®s); + vcpu_regs_set(vcpu, ®s); } } } -static void do_guest_assert(struct kvm_vm *vm, struct ucall *uc) +static void do_guest_assert(struct ucall *uc) { - TEST_FAIL("%s at %s:%ld", (const char *)uc->args[0], __FILE__, - uc->args[1]); + REPORT_GUEST_ASSERT(*uc); } -static void check_for_guest_assert(struct kvm_vm *vm) +static void check_for_guest_assert(struct kvm_vcpu *vcpu) { - struct kvm_run *run = vcpu_state(vm, VCPU_ID); struct ucall uc; - if (run->exit_reason == KVM_EXIT_IO && - get_ucall(vm, VCPU_ID, &uc) == UCALL_ABORT) { - do_guest_assert(vm, &uc); + if (vcpu->run->exit_reason == KVM_EXIT_IO && + get_ucall(vcpu, &uc) == UCALL_ABORT) { + do_guest_assert(&uc); } } -static void process_ucall_done(struct kvm_vm *vm) +static void process_ucall_done(struct kvm_vcpu *vcpu) { - struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct kvm_run *run = vcpu->run; struct ucall uc; - check_for_guest_assert(vm); + check_for_guest_assert(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Unexpected exit reason: %u (%s)", run->exit_reason, exit_reason_str(run->exit_reason)); - TEST_ASSERT(get_ucall(vm, VCPU_ID, &uc) == UCALL_DONE, + TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_DONE, "Unexpected ucall command: %lu, expected UCALL_DONE (%d)", uc.cmd, UCALL_DONE); } -static uint64_t process_ucall(struct kvm_vm *vm) +static uint64_t process_ucall(struct kvm_vcpu *vcpu) { - struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct kvm_run *run = vcpu->run; struct ucall uc; TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, @@ -143,14 +132,14 @@ static uint64_t process_ucall(struct kvm_vm *vm) run->exit_reason, exit_reason_str(run->exit_reason)); - switch (get_ucall(vm, VCPU_ID, &uc)) { + switch (get_ucall(vcpu, &uc)) { case UCALL_SYNC: break; case UCALL_ABORT: - do_guest_assert(vm, &uc); + do_guest_assert(&uc); break; case UCALL_DONE: - process_ucall_done(vm); + process_ucall_done(vcpu); break; default: TEST_ASSERT(false, "Unexpected ucall"); @@ -161,12 +150,7 @@ static uint64_t process_ucall(struct kvm_vm *vm) int main(int argc, char *argv[]) { - struct kvm_enable_cap emul_failure_cap = { - .cap = KVM_CAP_EXIT_ON_EMULATION_FAILURE, - .args[0] = 1, - }; - struct kvm_cpuid_entry2 *entry; - struct kvm_cpuid2 *cpuid; + struct kvm_vcpu *vcpu; struct kvm_vm *vm; uint64_t gpa, pte; uint64_t *hva; @@ -175,24 +159,15 @@ int main(int argc, char *argv[]) /* Tell stdout not to buffer its content */ setbuf(stdout, NULL); - vm = vm_create_default(VCPU_ID, 0, guest_code); - - if (!kvm_check_cap(KVM_CAP_SMALLER_MAXPHYADDR)) { - printf("module parameter 'allow_smaller_maxphyaddr' is not set. Skipping test.\n"); - return 0; - } - - cpuid = kvm_get_supported_cpuid(); + TEST_REQUIRE(kvm_has_cap(KVM_CAP_SMALLER_MAXPHYADDR)); - entry = kvm_get_supported_cpuid_index(0x80000008, 0); - entry->eax = (entry->eax & 0xffffff00) | MAXPHYADDR; - set_cpuid(cpuid, entry); + vm = vm_create_with_one_vcpu(&vcpu, guest_code); - vcpu_set_cpuid(vm, VCPU_ID, cpuid); + vcpu_set_cpuid_maxphyaddr(vcpu, MAXPHYADDR); rc = kvm_check_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE); TEST_ASSERT(rc, "KVM_CAP_EXIT_ON_EMULATION_FAILURE is unavailable"); - vm_enable_cap(vm, &emul_failure_cap); + vm_enable_cap(vm, KVM_CAP_EXIT_ON_EMULATION_FAILURE, 1); vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, MEM_REGION_GPA, MEM_REGION_SLOT, @@ -203,14 +178,14 @@ int main(int argc, char *argv[]) virt_map(vm, MEM_REGION_GVA, MEM_REGION_GPA, 1); hva = addr_gpa2hva(vm, MEM_REGION_GPA); memset(hva, 0, PAGE_SIZE); - pte = vm_get_page_table_entry(vm, VCPU_ID, MEM_REGION_GVA); - vm_set_page_table_entry(vm, VCPU_ID, MEM_REGION_GVA, pte | (1ull << 36)); + pte = vm_get_page_table_entry(vm, vcpu, MEM_REGION_GVA); + vm_set_page_table_entry(vm, vcpu, MEM_REGION_GVA, pte | (1ull << 36)); - run_guest(vm); - process_exit_on_emulation_error(vm); - run_guest(vm); + vcpu_run(vcpu); + process_exit_on_emulation_error(vcpu); + vcpu_run(vcpu); - TEST_ASSERT(process_ucall(vm) == UCALL_DONE, "Expected UCALL_DONE"); + TEST_ASSERT(process_ucall(vcpu) == UCALL_DONE, "Expected UCALL_DONE"); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c index d12e043aa2ee..99bc202243d2 100644 --- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c +++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c @@ -18,9 +18,6 @@ #include "vmx.h" -#define VCPU_ID 5 -#define NMI_VECTOR 2 - static int ud_count; static void guest_ud_handler(struct ex_regs *regs) @@ -160,88 +157,86 @@ void guest_code(struct vmx_pages *vmx_pages) GUEST_DONE(); } -void inject_nmi(struct kvm_vm *vm) +void inject_nmi(struct kvm_vcpu *vcpu) { struct kvm_vcpu_events events; - vcpu_events_get(vm, VCPU_ID, &events); + vcpu_events_get(vcpu, &events); events.nmi.pending = 1; events.flags |= KVM_VCPUEVENT_VALID_NMI_PENDING; - vcpu_events_set(vm, VCPU_ID, &events); + vcpu_events_set(vcpu, &events); } -static void save_restore_vm(struct kvm_vm *vm) +static struct kvm_vcpu *save_restore_vm(struct kvm_vm *vm, + struct kvm_vcpu *vcpu) { struct kvm_regs regs1, regs2; struct kvm_x86_state *state; - state = vcpu_save_state(vm, VCPU_ID); + state = vcpu_save_state(vcpu); memset(®s1, 0, sizeof(regs1)); - vcpu_regs_get(vm, VCPU_ID, ®s1); + vcpu_regs_get(vcpu, ®s1); kvm_vm_release(vm); /* Restore state in a new VM. */ - kvm_vm_restart(vm, O_RDWR); - vm_vcpu_add(vm, VCPU_ID); - vcpu_set_hv_cpuid(vm, VCPU_ID); - vcpu_enable_evmcs(vm, VCPU_ID); - vcpu_load_state(vm, VCPU_ID, state); + vcpu = vm_recreate_with_one_vcpu(vm); + vcpu_set_hv_cpuid(vcpu); + vcpu_enable_evmcs(vcpu); + vcpu_load_state(vcpu, state); kvm_x86_state_cleanup(state); memset(®s2, 0, sizeof(regs2)); - vcpu_regs_get(vm, VCPU_ID, ®s2); + vcpu_regs_get(vcpu, ®s2); TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)), "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx", (ulong) regs2.rdi, (ulong) regs2.rsi); + return vcpu; } int main(int argc, char *argv[]) { vm_vaddr_t vmx_pages_gva = 0; + struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct kvm_run *run; struct ucall uc; int stage; - /* Create VM */ - vm = vm_create_default(VCPU_ID, 0, guest_code); + vm = vm_create_with_one_vcpu(&vcpu, guest_code); - if (!nested_vmx_supported() || - !kvm_check_cap(KVM_CAP_NESTED_STATE) || - !kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) { - print_skip("Enlightened VMCS is unsupported"); - exit(KSFT_SKIP); - } + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE)); + TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)); - vcpu_set_hv_cpuid(vm, VCPU_ID); - vcpu_enable_evmcs(vm, VCPU_ID); + vcpu_set_hv_cpuid(vcpu); + vcpu_enable_evmcs(vcpu); vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); + vcpu_args_set(vcpu, 1, vmx_pages_gva); vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vm, VCPU_ID); + vcpu_init_descriptor_tables(vcpu); vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler); vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler); pr_info("Running L1 which uses EVMCS to run L2\n"); for (stage = 1;; stage++) { - run = vcpu_state(vm, VCPU_ID); - _vcpu_run(vm, VCPU_ID); + run = vcpu->run; + + vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Stage %d: unexpected exit reason: %u (%s),\n", stage, run->exit_reason, exit_reason_str(run->exit_reason)); - switch (get_ucall(vm, VCPU_ID, &uc)) { + switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: - TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], - __FILE__, uc.args[1]); + REPORT_GUEST_ASSERT(uc); /* NOT REACHED */ case UCALL_SYNC: break; @@ -256,12 +251,12 @@ int main(int argc, char *argv[]) uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx", stage, (ulong)uc.args[1]); - save_restore_vm(vm); + vcpu = save_restore_vm(vm, vcpu); /* Force immediate L2->L1 exit before resuming */ if (stage == 8) { pr_info("Injecting NMI into L1 before L2 had a chance to run after restore\n"); - inject_nmi(vm); + inject_nmi(vcpu); } /* @@ -271,7 +266,7 @@ int main(int argc, char *argv[]) */ if (stage == 9) { pr_info("Trying extra KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE cycle\n"); - save_restore_vm(vm); + vcpu = save_restore_vm(vm, vcpu); } } diff --git a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c b/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c index 1f5c32146f3d..b1905d280ef5 100644 --- a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c +++ b/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c @@ -14,8 +14,6 @@ #include "kvm_util.h" #include "processor.h" -#define VCPU_ID 0 - static bool ud_expected; static void guest_ud_handler(struct ex_regs *regs) @@ -94,29 +92,27 @@ static void guest_main(void) GUEST_DONE(); } -static void setup_ud_vector(struct kvm_vm *vm) +static void setup_ud_vector(struct kvm_vcpu *vcpu) { - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vm, VCPU_ID); - vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler); + vm_init_descriptor_tables(vcpu->vm); + vcpu_init_descriptor_tables(vcpu); + vm_install_exception_handler(vcpu->vm, UD_VECTOR, guest_ud_handler); } -static void enter_guest(struct kvm_vm *vm) +static void enter_guest(struct kvm_vcpu *vcpu) { - struct kvm_run *run; + struct kvm_run *run = vcpu->run; struct ucall uc; - run = vcpu_state(vm, VCPU_ID); - - vcpu_run(vm, VCPU_ID); - switch (get_ucall(vm, VCPU_ID, &uc)) { + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { case UCALL_SYNC: pr_info("%s: %016lx\n", (const char *)uc.args[2], uc.args[3]); break; case UCALL_DONE: return; case UCALL_ABORT: - TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], __FILE__, uc.args[1]); + REPORT_GUEST_ASSERT(uc); default: TEST_FAIL("Unhandled ucall: %ld\nexit_reason: %u (%s)", uc.cmd, run->exit_reason, exit_reason_str(run->exit_reason)); @@ -125,45 +121,42 @@ static void enter_guest(struct kvm_vm *vm) static void test_fix_hypercall(void) { + struct kvm_vcpu *vcpu; struct kvm_vm *vm; - vm = vm_create_default(VCPU_ID, 0, guest_main); - setup_ud_vector(vm); + vm = vm_create_with_one_vcpu(&vcpu, guest_main); + setup_ud_vector(vcpu); ud_expected = false; sync_global_to_guest(vm, ud_expected); virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); - enter_guest(vm); + enter_guest(vcpu); } static void test_fix_hypercall_disabled(void) { - struct kvm_enable_cap cap = {0}; + struct kvm_vcpu *vcpu; struct kvm_vm *vm; - vm = vm_create_default(VCPU_ID, 0, guest_main); - setup_ud_vector(vm); + vm = vm_create_with_one_vcpu(&vcpu, guest_main); + setup_ud_vector(vcpu); - cap.cap = KVM_CAP_DISABLE_QUIRKS2; - cap.args[0] = KVM_X86_QUIRK_FIX_HYPERCALL_INSN; - vm_enable_cap(vm, &cap); + vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, + KVM_X86_QUIRK_FIX_HYPERCALL_INSN); ud_expected = true; sync_global_to_guest(vm, ud_expected); virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); - enter_guest(vm); + enter_guest(vcpu); } int main(void) { - if (!(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_FIX_HYPERCALL_INSN)) { - print_skip("KVM_X86_QUIRK_HYPERCALL_INSN not supported"); - exit(KSFT_SKIP); - } + TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_FIX_HYPERCALL_INSN); test_fix_hypercall(); test_fix_hypercall_disabled(); diff --git a/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c b/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c index 8aed0db1331d..d09b3cbcadc6 100644 --- a/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c +++ b/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c @@ -15,116 +15,21 @@ #include "kvm_util.h" #include "processor.h" -static int kvm_num_index_msrs(int kvm_fd, int nmsrs) -{ - struct kvm_msr_list *list; - int r; - - list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0])); - list->nmsrs = nmsrs; - r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list); - TEST_ASSERT(r == -1 && errno == E2BIG, - "Unexpected result from KVM_GET_MSR_INDEX_LIST probe, r: %i", - r); - - r = list->nmsrs; - free(list); - return r; -} - -static void test_get_msr_index(void) -{ - int old_res, res, kvm_fd, r; - struct kvm_msr_list *list; - - kvm_fd = open_kvm_dev_path_or_exit(); - - old_res = kvm_num_index_msrs(kvm_fd, 0); - TEST_ASSERT(old_res != 0, "Expecting nmsrs to be > 0"); - - if (old_res != 1) { - res = kvm_num_index_msrs(kvm_fd, 1); - TEST_ASSERT(res > 1, "Expecting nmsrs to be > 1"); - TEST_ASSERT(res == old_res, "Expecting nmsrs to be identical"); - } - - list = malloc(sizeof(*list) + old_res * sizeof(list->indices[0])); - list->nmsrs = old_res; - r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list); - - TEST_ASSERT(r == 0, - "Unexpected result from KVM_GET_MSR_FEATURE_INDEX_LIST, r: %i", - r); - TEST_ASSERT(list->nmsrs == old_res, "Expecting nmsrs to be identical"); - free(list); - - close(kvm_fd); -} - -static int kvm_num_feature_msrs(int kvm_fd, int nmsrs) -{ - struct kvm_msr_list *list; - int r; - - list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0])); - list->nmsrs = nmsrs; - r = ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, list); - TEST_ASSERT(r == -1 && errno == E2BIG, - "Unexpected result from KVM_GET_MSR_FEATURE_INDEX_LIST probe, r: %i", - r); - - r = list->nmsrs; - free(list); - return r; -} - -struct kvm_msr_list *kvm_get_msr_feature_list(int kvm_fd, int nmsrs) -{ - struct kvm_msr_list *list; - int r; - - list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0])); - list->nmsrs = nmsrs; - r = ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, list); - - TEST_ASSERT(r == 0, - "Unexpected result from KVM_GET_MSR_FEATURE_INDEX_LIST, r: %i", - r); - - return list; -} - -static void test_get_msr_feature(void) +int main(int argc, char *argv[]) { - int res, old_res, i, kvm_fd; - struct kvm_msr_list *feature_list; + const struct kvm_msr_list *feature_list; + int i; - kvm_fd = open_kvm_dev_path_or_exit(); + /* + * Skip the entire test if MSR_FEATURES isn't supported, other tests + * will cover the "regular" list of MSRs, the coverage here is purely + * opportunistic and not interesting on its own. + */ + TEST_REQUIRE(kvm_has_cap(KVM_CAP_GET_MSR_FEATURES)); - old_res = kvm_num_feature_msrs(kvm_fd, 0); - TEST_ASSERT(old_res != 0, "Expecting nmsrs to be > 0"); - - if (old_res != 1) { - res = kvm_num_feature_msrs(kvm_fd, 1); - TEST_ASSERT(res > 1, "Expecting nmsrs to be > 1"); - TEST_ASSERT(res == old_res, "Expecting nmsrs to be identical"); - } - - feature_list = kvm_get_msr_feature_list(kvm_fd, old_res); - TEST_ASSERT(old_res == feature_list->nmsrs, - "Unmatching number of msr indexes"); + (void)kvm_get_msr_index_list(); + feature_list = kvm_get_feature_msr_index_list(); for (i = 0; i < feature_list->nmsrs; i++) kvm_get_feature_msr(feature_list->indices[i]); - - free(feature_list); - close(kvm_fd); -} - -int main(int argc, char *argv[]) -{ - if (kvm_check_cap(KVM_CAP_GET_MSR_FEATURES)) - test_get_msr_feature(); - - test_get_msr_index(); } diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c index 3330fb183c68..d576bc8ce823 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c @@ -173,23 +173,21 @@ static void guest_main(struct ms_hyperv_tsc_page *tsc_page, vm_paddr_t tsc_page_ GUEST_DONE(); } -#define VCPU_ID 0 - -static void host_check_tsc_msr_rdtsc(struct kvm_vm *vm) +static void host_check_tsc_msr_rdtsc(struct kvm_vcpu *vcpu) { u64 tsc_freq, r1, r2, t1, t2; s64 delta_ns; - tsc_freq = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TSC_FREQUENCY); + tsc_freq = vcpu_get_msr(vcpu, HV_X64_MSR_TSC_FREQUENCY); TEST_ASSERT(tsc_freq > 0, "TSC frequency must be nonzero"); /* For increased accuracy, take mean rdtsc() before and afrer ioctl */ r1 = rdtsc(); - t1 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT); + t1 = vcpu_get_msr(vcpu, HV_X64_MSR_TIME_REF_COUNT); r1 = (r1 + rdtsc()) / 2; nop_loop(); r2 = rdtsc(); - t2 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT); + t2 = vcpu_get_msr(vcpu, HV_X64_MSR_TIME_REF_COUNT); r2 = (r2 + rdtsc()) / 2; TEST_ASSERT(t2 > t1, "Time reference MSR is not monotonic (%ld <= %ld)", t1, t2); @@ -207,36 +205,36 @@ static void host_check_tsc_msr_rdtsc(struct kvm_vm *vm) int main(void) { + struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct kvm_run *run; struct ucall uc; vm_vaddr_t tsc_page_gva; int stage; - vm = vm_create_default(VCPU_ID, 0, guest_main); - run = vcpu_state(vm, VCPU_ID); + vm = vm_create_with_one_vcpu(&vcpu, guest_main); + run = vcpu->run; - vcpu_set_hv_cpuid(vm, VCPU_ID); + vcpu_set_hv_cpuid(vcpu); tsc_page_gva = vm_vaddr_alloc_page(vm); memset(addr_gva2hva(vm, tsc_page_gva), 0x0, getpagesize()); TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0, "TSC page has to be page aligned\n"); - vcpu_args_set(vm, VCPU_ID, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva)); + vcpu_args_set(vcpu, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva)); - host_check_tsc_msr_rdtsc(vm); + host_check_tsc_msr_rdtsc(vcpu); for (stage = 1;; stage++) { - _vcpu_run(vm, VCPU_ID); + vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Stage %d: unexpected exit reason: %u (%s),\n", stage, run->exit_reason, exit_reason_str(run->exit_reason)); - switch (get_ucall(vm, VCPU_ID, &uc)) { + switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: - TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], - __FILE__, uc.args[1]); + REPORT_GUEST_ASSERT(uc); /* NOT REACHED */ case UCALL_SYNC: break; diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c index 8c245ab2d98a..e804eb08dff9 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c @@ -20,8 +20,6 @@ #include "processor.h" #include "vmx.h" -#define VCPU_ID 0 - static void guest_code(void) { } @@ -45,7 +43,7 @@ static bool smt_possible(void) return res; } -static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries, +static void test_hv_cpuid(const struct kvm_cpuid2 *hv_cpuid_entries, bool evmcs_expected) { int i; @@ -58,7 +56,7 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries, nent_expected, hv_cpuid_entries->nent); for (i = 0; i < hv_cpuid_entries->nent; i++) { - struct kvm_cpuid_entry2 *entry = &hv_cpuid_entries->entries[i]; + const struct kvm_cpuid_entry2 *entry = &hv_cpuid_entries->entries[i]; TEST_ASSERT((entry->function >= 0x40000000) && (entry->function <= 0x40000082), @@ -115,64 +113,62 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries, } } -void test_hv_cpuid_e2big(struct kvm_vm *vm, bool system) +void test_hv_cpuid_e2big(struct kvm_vm *vm, struct kvm_vcpu *vcpu) { static struct kvm_cpuid2 cpuid = {.nent = 0}; int ret; - if (!system) - ret = _vcpu_ioctl(vm, VCPU_ID, KVM_GET_SUPPORTED_HV_CPUID, &cpuid); + if (vcpu) + ret = __vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, &cpuid); else - ret = _kvm_ioctl(vm, KVM_GET_SUPPORTED_HV_CPUID, &cpuid); + ret = __kvm_ioctl(vm->kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, &cpuid); TEST_ASSERT(ret == -1 && errno == E2BIG, "%s KVM_GET_SUPPORTED_HV_CPUID didn't fail with -E2BIG when" - " it should have: %d %d", system ? "KVM" : "vCPU", ret, errno); + " it should have: %d %d", !vcpu ? "KVM" : "vCPU", ret, errno); } int main(int argc, char *argv[]) { struct kvm_vm *vm; - struct kvm_cpuid2 *hv_cpuid_entries; + const struct kvm_cpuid2 *hv_cpuid_entries; + struct kvm_vcpu *vcpu; /* Tell stdout not to buffer its content */ setbuf(stdout, NULL); - if (!kvm_check_cap(KVM_CAP_HYPERV_CPUID)) { - print_skip("KVM_CAP_HYPERV_CPUID not supported"); - exit(KSFT_SKIP); - } + TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID)); - vm = vm_create_default(VCPU_ID, 0, guest_code); + vm = vm_create_with_one_vcpu(&vcpu, guest_code); /* Test vCPU ioctl version */ - test_hv_cpuid_e2big(vm, false); + test_hv_cpuid_e2big(vm, vcpu); - hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vm, VCPU_ID); + hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu); test_hv_cpuid(hv_cpuid_entries, false); - free(hv_cpuid_entries); + free((void *)hv_cpuid_entries); - if (!nested_vmx_supported() || - !kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) { + if (!kvm_cpu_has(X86_FEATURE_VMX) || + !kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) { print_skip("Enlightened VMCS is unsupported"); goto do_sys; } - vcpu_enable_evmcs(vm, VCPU_ID); - hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vm, VCPU_ID); + vcpu_enable_evmcs(vcpu); + hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu); test_hv_cpuid(hv_cpuid_entries, true); - free(hv_cpuid_entries); + free((void *)hv_cpuid_entries); do_sys: /* Test system ioctl version */ - if (!kvm_check_cap(KVM_CAP_SYS_HYPERV_CPUID)) { + if (!kvm_has_cap(KVM_CAP_SYS_HYPERV_CPUID)) { print_skip("KVM_CAP_SYS_HYPERV_CPUID not supported"); goto out; } - test_hv_cpuid_e2big(vm, true); + test_hv_cpuid_e2big(vm, NULL); hv_cpuid_entries = kvm_get_supported_hv_cpuid(); - test_hv_cpuid(hv_cpuid_entries, nested_vmx_supported()); + test_hv_cpuid(hv_cpuid_entries, kvm_cpu_has(X86_FEATURE_VMX)); out: kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c index 672915ce73d8..79ab0152d281 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c @@ -13,78 +13,22 @@ #include "processor.h" #include "hyperv.h" -#define VCPU_ID 0 #define LINUX_OS_ID ((u64)0x8100 << 48) -extern unsigned char rdmsr_start; -extern unsigned char rdmsr_end; - -static u64 do_rdmsr(u32 idx) -{ - u32 lo, hi; - - asm volatile("rdmsr_start: rdmsr;" - "rdmsr_end:" - : "=a"(lo), "=c"(hi) - : "c"(idx)); - - return (((u64) hi) << 32) | lo; -} - -extern unsigned char wrmsr_start; -extern unsigned char wrmsr_end; - -static void do_wrmsr(u32 idx, u64 val) -{ - u32 lo, hi; - - lo = val; - hi = val >> 32; - - asm volatile("wrmsr_start: wrmsr;" - "wrmsr_end:" - : : "a"(lo), "c"(idx), "d"(hi)); -} - -static int nr_gp; -static int nr_ud; - -static inline u64 hypercall(u64 control, vm_vaddr_t input_address, - vm_vaddr_t output_address) -{ - u64 hv_status; - - asm volatile("mov %3, %%r8\n" - "vmcall" - : "=a" (hv_status), - "+c" (control), "+d" (input_address) - : "r" (output_address) - : "cc", "memory", "r8", "r9", "r10", "r11"); - - return hv_status; -} - -static void guest_gp_handler(struct ex_regs *regs) +static inline uint8_t hypercall(u64 control, vm_vaddr_t input_address, + vm_vaddr_t output_address, uint64_t *hv_status) { - unsigned char *rip = (unsigned char *)regs->rip; - bool r, w; - - r = rip == &rdmsr_start; - w = rip == &wrmsr_start; - GUEST_ASSERT(r || w); - - nr_gp++; - - if (r) - regs->rip = (uint64_t)&rdmsr_end; - else - regs->rip = (uint64_t)&wrmsr_end; -} - -static void guest_ud_handler(struct ex_regs *regs) -{ - nr_ud++; - regs->rip += 3; + uint8_t vector; + + /* Note both the hypercall and the "asm safe" clobber r9-r11. */ + asm volatile("mov %[output_address], %%r8\n\t" + KVM_ASM_SAFE("vmcall") + : "=a" (*hv_status), + "+c" (control), "+d" (input_address), + KVM_ASM_SAFE_OUTPUTS(vector) + : [output_address] "r"(output_address) + : "cc", "memory", "r8", KVM_ASM_SAFE_CLOBBERS); + return vector; } struct msr_data { @@ -102,111 +46,105 @@ struct hcall_data { static void guest_msr(struct msr_data *msr) { - int i = 0; - - while (msr->idx) { - WRITE_ONCE(nr_gp, 0); - if (!msr->write) - do_rdmsr(msr->idx); - else - do_wrmsr(msr->idx, msr->write_val); + uint64_t ignored; + uint8_t vector; - if (msr->available) - GUEST_ASSERT(READ_ONCE(nr_gp) == 0); - else - GUEST_ASSERT(READ_ONCE(nr_gp) == 1); + GUEST_ASSERT(msr->idx); - GUEST_SYNC(i++); - } + if (!msr->write) + vector = rdmsr_safe(msr->idx, &ignored); + else + vector = wrmsr_safe(msr->idx, msr->write_val); + if (msr->available) + GUEST_ASSERT_2(!vector, msr->idx, vector); + else + GUEST_ASSERT_2(vector == GP_VECTOR, msr->idx, vector); GUEST_DONE(); } static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall) { - int i = 0; u64 res, input, output; + uint8_t vector; + + GUEST_ASSERT(hcall->control); wrmsr(HV_X64_MSR_GUEST_OS_ID, LINUX_OS_ID); wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa); - while (hcall->control) { - nr_ud = 0; - if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) { - input = pgs_gpa; - output = pgs_gpa + 4096; - } else { - input = output = 0; - } - - res = hypercall(hcall->control, input, output); - if (hcall->ud_expected) - GUEST_ASSERT(nr_ud == 1); - else - GUEST_ASSERT(res == hcall->expect); - - GUEST_SYNC(i++); + if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) { + input = pgs_gpa; + output = pgs_gpa + 4096; + } else { + input = output = 0; } + vector = hypercall(hcall->control, input, output, &res); + if (hcall->ud_expected) + GUEST_ASSERT_2(vector == UD_VECTOR, hcall->control, vector); + else + GUEST_ASSERT_2(!vector, hcall->control, vector); + + GUEST_ASSERT_2(!hcall->ud_expected || res == hcall->expect, + hcall->expect, res); GUEST_DONE(); } -static void hv_set_cpuid(struct kvm_vm *vm, struct kvm_cpuid2 *cpuid, - struct kvm_cpuid_entry2 *feat, - struct kvm_cpuid_entry2 *recomm, - struct kvm_cpuid_entry2 *dbg) +static void vcpu_reset_hv_cpuid(struct kvm_vcpu *vcpu) { - TEST_ASSERT(set_cpuid(cpuid, feat), - "failed to set KVM_CPUID_FEATURES leaf"); - TEST_ASSERT(set_cpuid(cpuid, recomm), - "failed to set HYPERV_CPUID_ENLIGHTMENT_INFO leaf"); - TEST_ASSERT(set_cpuid(cpuid, dbg), - "failed to set HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES leaf"); - vcpu_set_cpuid(vm, VCPU_ID, cpuid); + /* + * Enable all supported Hyper-V features, then clear the leafs holding + * the features that will be tested one by one. + */ + vcpu_set_hv_cpuid(vcpu); + + vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_FEATURES); + vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_ENLIGHTMENT_INFO); + vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES); } static void guest_test_msrs_access(void) { + struct kvm_cpuid2 *prev_cpuid = NULL; + struct kvm_cpuid_entry2 *feat, *dbg; + struct kvm_vcpu *vcpu; struct kvm_run *run; struct kvm_vm *vm; struct ucall uc; - int stage = 0, r; - struct kvm_cpuid_entry2 feat = { - .function = HYPERV_CPUID_FEATURES - }; - struct kvm_cpuid_entry2 recomm = { - .function = HYPERV_CPUID_ENLIGHTMENT_INFO - }; - struct kvm_cpuid_entry2 dbg = { - .function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES - }; - struct kvm_cpuid2 *best; + int stage = 0; vm_vaddr_t msr_gva; - struct kvm_enable_cap cap = { - .cap = KVM_CAP_HYPERV_ENFORCE_CPUID, - .args = {1} - }; struct msr_data *msr; while (true) { - vm = vm_create_default(VCPU_ID, 0, guest_msr); + vm = vm_create_with_one_vcpu(&vcpu, guest_msr); msr_gva = vm_vaddr_alloc_page(vm); memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize()); msr = addr_gva2hva(vm, msr_gva); - vcpu_args_set(vm, VCPU_ID, 1, msr_gva); - vcpu_enable_cap(vm, VCPU_ID, &cap); + vcpu_args_set(vcpu, 1, msr_gva); + vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENFORCE_CPUID, 1); - vcpu_set_hv_cpuid(vm, VCPU_ID); + if (!prev_cpuid) { + vcpu_reset_hv_cpuid(vcpu); - best = kvm_get_supported_hv_cpuid(); + prev_cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent); + } else { + vcpu_init_cpuid(vcpu, prev_cpuid); + } + + feat = vcpu_get_cpuid_entry(vcpu, HYPERV_CPUID_FEATURES); + dbg = vcpu_get_cpuid_entry(vcpu, HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES); vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vm, VCPU_ID); - vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler); + vcpu_init_descriptor_tables(vcpu); - run = vcpu_state(vm, VCPU_ID); + run = vcpu->run; + + /* TODO: Make this entire test easier to maintain. */ + if (stage >= 21) + vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_SYNIC2, 0); switch (stage) { case 0: @@ -223,7 +161,7 @@ static void guest_test_msrs_access(void) msr->available = 0; break; case 2: - feat.eax |= HV_MSR_HYPERCALL_AVAILABLE; + feat->eax |= HV_MSR_HYPERCALL_AVAILABLE; /* * HV_X64_MSR_GUEST_OS_ID has to be written first to make * HV_X64_MSR_HYPERCALL available. @@ -250,12 +188,14 @@ static void guest_test_msrs_access(void) msr->available = 0; break; case 6: - feat.eax |= HV_MSR_VP_RUNTIME_AVAILABLE; + feat->eax |= HV_MSR_VP_RUNTIME_AVAILABLE; + msr->idx = HV_X64_MSR_VP_RUNTIME; msr->write = 0; msr->available = 1; break; case 7: /* Read only */ + msr->idx = HV_X64_MSR_VP_RUNTIME; msr->write = 1; msr->write_val = 1; msr->available = 0; @@ -267,12 +207,14 @@ static void guest_test_msrs_access(void) msr->available = 0; break; case 9: - feat.eax |= HV_MSR_TIME_REF_COUNT_AVAILABLE; + feat->eax |= HV_MSR_TIME_REF_COUNT_AVAILABLE; + msr->idx = HV_X64_MSR_TIME_REF_COUNT; msr->write = 0; msr->available = 1; break; case 10: /* Read only */ + msr->idx = HV_X64_MSR_TIME_REF_COUNT; msr->write = 1; msr->write_val = 1; msr->available = 0; @@ -284,12 +226,14 @@ static void guest_test_msrs_access(void) msr->available = 0; break; case 12: - feat.eax |= HV_MSR_VP_INDEX_AVAILABLE; + feat->eax |= HV_MSR_VP_INDEX_AVAILABLE; + msr->idx = HV_X64_MSR_VP_INDEX; msr->write = 0; msr->available = 1; break; case 13: /* Read only */ + msr->idx = HV_X64_MSR_VP_INDEX; msr->write = 1; msr->write_val = 1; msr->available = 0; @@ -301,11 +245,13 @@ static void guest_test_msrs_access(void) msr->available = 0; break; case 15: - feat.eax |= HV_MSR_RESET_AVAILABLE; + feat->eax |= HV_MSR_RESET_AVAILABLE; + msr->idx = HV_X64_MSR_RESET; msr->write = 0; msr->available = 1; break; case 16: + msr->idx = HV_X64_MSR_RESET; msr->write = 1; msr->write_val = 0; msr->available = 1; @@ -317,11 +263,13 @@ static void guest_test_msrs_access(void) msr->available = 0; break; case 18: - feat.eax |= HV_MSR_REFERENCE_TSC_AVAILABLE; + feat->eax |= HV_MSR_REFERENCE_TSC_AVAILABLE; + msr->idx = HV_X64_MSR_REFERENCE_TSC; msr->write = 0; msr->available = 1; break; case 19: + msr->idx = HV_X64_MSR_REFERENCE_TSC; msr->write = 1; msr->write_val = 0; msr->available = 1; @@ -337,16 +285,18 @@ static void guest_test_msrs_access(void) * Remains unavailable even with KVM_CAP_HYPERV_SYNIC2 * capability enabled and guest visible CPUID bit unset. */ - cap.cap = KVM_CAP_HYPERV_SYNIC2; - cap.args[0] = 0; - vcpu_enable_cap(vm, VCPU_ID, &cap); + msr->idx = HV_X64_MSR_EOM; + msr->write = 0; + msr->available = 0; break; case 22: - feat.eax |= HV_MSR_SYNIC_AVAILABLE; + feat->eax |= HV_MSR_SYNIC_AVAILABLE; + msr->idx = HV_X64_MSR_EOM; msr->write = 0; msr->available = 1; break; case 23: + msr->idx = HV_X64_MSR_EOM; msr->write = 1; msr->write_val = 0; msr->available = 1; @@ -358,23 +308,29 @@ static void guest_test_msrs_access(void) msr->available = 0; break; case 25: - feat.eax |= HV_MSR_SYNTIMER_AVAILABLE; + feat->eax |= HV_MSR_SYNTIMER_AVAILABLE; + msr->idx = HV_X64_MSR_STIMER0_CONFIG; msr->write = 0; msr->available = 1; break; case 26: + msr->idx = HV_X64_MSR_STIMER0_CONFIG; msr->write = 1; msr->write_val = 0; msr->available = 1; break; case 27: /* Direct mode test */ + msr->idx = HV_X64_MSR_STIMER0_CONFIG; msr->write = 1; msr->write_val = 1 << 12; msr->available = 0; break; case 28: - feat.edx |= HV_STIMER_DIRECT_MODE_AVAILABLE; + feat->edx |= HV_STIMER_DIRECT_MODE_AVAILABLE; + msr->idx = HV_X64_MSR_STIMER0_CONFIG; + msr->write = 1; + msr->write_val = 1 << 12; msr->available = 1; break; @@ -384,7 +340,8 @@ static void guest_test_msrs_access(void) msr->available = 0; break; case 30: - feat.eax |= HV_MSR_APIC_ACCESS_AVAILABLE; + feat->eax |= HV_MSR_APIC_ACCESS_AVAILABLE; + msr->idx = HV_X64_MSR_EOI; msr->write = 1; msr->write_val = 1; msr->available = 1; @@ -396,12 +353,14 @@ static void guest_test_msrs_access(void) msr->available = 0; break; case 32: - feat.eax |= HV_ACCESS_FREQUENCY_MSRS; + feat->eax |= HV_ACCESS_FREQUENCY_MSRS; + msr->idx = HV_X64_MSR_TSC_FREQUENCY; msr->write = 0; msr->available = 1; break; case 33: /* Read only */ + msr->idx = HV_X64_MSR_TSC_FREQUENCY; msr->write = 1; msr->write_val = 1; msr->available = 0; @@ -413,11 +372,13 @@ static void guest_test_msrs_access(void) msr->available = 0; break; case 35: - feat.eax |= HV_ACCESS_REENLIGHTENMENT; + feat->eax |= HV_ACCESS_REENLIGHTENMENT; + msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL; msr->write = 0; msr->available = 1; break; case 36: + msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL; msr->write = 1; msr->write_val = 1; msr->available = 1; @@ -436,11 +397,13 @@ static void guest_test_msrs_access(void) msr->available = 0; break; case 39: - feat.edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE; + feat->edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE; + msr->idx = HV_X64_MSR_CRASH_P0; msr->write = 0; msr->available = 1; break; case 40: + msr->idx = HV_X64_MSR_CRASH_P0; msr->write = 1; msr->write_val = 1; msr->available = 1; @@ -452,48 +415,44 @@ static void guest_test_msrs_access(void) msr->available = 0; break; case 42: - feat.edx |= HV_FEATURE_DEBUG_MSRS_AVAILABLE; - dbg.eax |= HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING; + feat->edx |= HV_FEATURE_DEBUG_MSRS_AVAILABLE; + dbg->eax |= HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING; + msr->idx = HV_X64_MSR_SYNDBG_STATUS; msr->write = 0; msr->available = 1; break; case 43: + msr->idx = HV_X64_MSR_SYNDBG_STATUS; msr->write = 1; msr->write_val = 0; msr->available = 1; break; case 44: - /* END */ - msr->idx = 0; - break; + kvm_vm_free(vm); + return; } - hv_set_cpuid(vm, best, &feat, &recomm, &dbg); + vcpu_set_cpuid(vcpu); + + memcpy(prev_cpuid, vcpu->cpuid, kvm_cpuid2_size(vcpu->cpuid->nent)); - if (msr->idx) - pr_debug("Stage %d: testing msr: 0x%x for %s\n", stage, - msr->idx, msr->write ? "write" : "read"); - else - pr_debug("Stage %d: finish\n", stage); + pr_debug("Stage %d: testing msr: 0x%x for %s\n", stage, + msr->idx, msr->write ? "write" : "read"); - r = _vcpu_run(vm, VCPU_ID); - TEST_ASSERT(!r, "vcpu_run failed: %d\n", r); + vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "unexpected exit reason: %u (%s)", run->exit_reason, exit_reason_str(run->exit_reason)); - switch (get_ucall(vm, VCPU_ID, &uc)) { - case UCALL_SYNC: - TEST_ASSERT(uc.args[1] == 0, - "Unexpected stage: %ld (0 expected)\n", - uc.args[1]); - break; + switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: - TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], - __FILE__, uc.args[1]); + REPORT_GUEST_ASSERT_2(uc, "MSR = %lx, vector = %lx"); return; case UCALL_DONE: + break; + default: + TEST_FAIL("Unhandled ucall: %ld", uc.cmd); return; } @@ -504,54 +463,50 @@ static void guest_test_msrs_access(void) static void guest_test_hcalls_access(void) { + struct kvm_cpuid_entry2 *feat, *recomm, *dbg; + struct kvm_cpuid2 *prev_cpuid = NULL; + struct kvm_vcpu *vcpu; struct kvm_run *run; struct kvm_vm *vm; struct ucall uc; - int stage = 0, r; - struct kvm_cpuid_entry2 feat = { - .function = HYPERV_CPUID_FEATURES, - .eax = HV_MSR_HYPERCALL_AVAILABLE - }; - struct kvm_cpuid_entry2 recomm = { - .function = HYPERV_CPUID_ENLIGHTMENT_INFO - }; - struct kvm_cpuid_entry2 dbg = { - .function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES - }; - struct kvm_enable_cap cap = { - .cap = KVM_CAP_HYPERV_ENFORCE_CPUID, - .args = {1} - }; + int stage = 0; vm_vaddr_t hcall_page, hcall_params; struct hcall_data *hcall; - struct kvm_cpuid2 *best; while (true) { - vm = vm_create_default(VCPU_ID, 0, guest_hcall); + vm = vm_create_with_one_vcpu(&vcpu, guest_hcall); vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vm, VCPU_ID); - vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler); + vcpu_init_descriptor_tables(vcpu); /* Hypercall input/output */ hcall_page = vm_vaddr_alloc_pages(vm, 2); - hcall = addr_gva2hva(vm, hcall_page); memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize()); hcall_params = vm_vaddr_alloc_page(vm); memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize()); + hcall = addr_gva2hva(vm, hcall_params); + + vcpu_args_set(vcpu, 2, addr_gva2gpa(vm, hcall_page), hcall_params); + vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENFORCE_CPUID, 1); - vcpu_args_set(vm, VCPU_ID, 2, addr_gva2gpa(vm, hcall_page), hcall_params); - vcpu_enable_cap(vm, VCPU_ID, &cap); + if (!prev_cpuid) { + vcpu_reset_hv_cpuid(vcpu); - vcpu_set_hv_cpuid(vm, VCPU_ID); + prev_cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent); + } else { + vcpu_init_cpuid(vcpu, prev_cpuid); + } - best = kvm_get_supported_hv_cpuid(); + feat = vcpu_get_cpuid_entry(vcpu, HYPERV_CPUID_FEATURES); + recomm = vcpu_get_cpuid_entry(vcpu, HYPERV_CPUID_ENLIGHTMENT_INFO); + dbg = vcpu_get_cpuid_entry(vcpu, HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES); - run = vcpu_state(vm, VCPU_ID); + run = vcpu->run; switch (stage) { case 0: + feat->eax |= HV_MSR_HYPERCALL_AVAILABLE; hcall->control = 0xdeadbeef; hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE; break; @@ -561,7 +516,8 @@ static void guest_test_hcalls_access(void) hcall->expect = HV_STATUS_ACCESS_DENIED; break; case 2: - feat.ebx |= HV_POST_MESSAGES; + feat->ebx |= HV_POST_MESSAGES; + hcall->control = HVCALL_POST_MESSAGE; hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT; break; @@ -570,7 +526,8 @@ static void guest_test_hcalls_access(void) hcall->expect = HV_STATUS_ACCESS_DENIED; break; case 4: - feat.ebx |= HV_SIGNAL_EVENTS; + feat->ebx |= HV_SIGNAL_EVENTS; + hcall->control = HVCALL_SIGNAL_EVENT; hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT; break; @@ -579,11 +536,13 @@ static void guest_test_hcalls_access(void) hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE; break; case 6: - dbg.eax |= HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING; + dbg->eax |= HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING; + hcall->control = HVCALL_RESET_DEBUG_SESSION; hcall->expect = HV_STATUS_ACCESS_DENIED; break; case 7: - feat.ebx |= HV_DEBUGGING; + feat->ebx |= HV_DEBUGGING; + hcall->control = HVCALL_RESET_DEBUG_SESSION; hcall->expect = HV_STATUS_OPERATION_DENIED; break; @@ -592,7 +551,8 @@ static void guest_test_hcalls_access(void) hcall->expect = HV_STATUS_ACCESS_DENIED; break; case 9: - recomm.eax |= HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED; + recomm->eax |= HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED; + hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE; hcall->expect = HV_STATUS_SUCCESS; break; case 10: @@ -600,7 +560,8 @@ static void guest_test_hcalls_access(void) hcall->expect = HV_STATUS_ACCESS_DENIED; break; case 11: - recomm.eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED; + recomm->eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED; + hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX; hcall->expect = HV_STATUS_SUCCESS; break; @@ -609,7 +570,8 @@ static void guest_test_hcalls_access(void) hcall->expect = HV_STATUS_ACCESS_DENIED; break; case 13: - recomm.eax |= HV_X64_CLUSTER_IPI_RECOMMENDED; + recomm->eax |= HV_X64_CLUSTER_IPI_RECOMMENDED; + hcall->control = HVCALL_SEND_IPI; hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT; break; case 14: @@ -623,7 +585,8 @@ static void guest_test_hcalls_access(void) hcall->expect = HV_STATUS_ACCESS_DENIED; break; case 16: - recomm.ebx = 0xfff; + recomm->ebx = 0xfff; + hcall->control = HVCALL_NOTIFY_LONG_SPIN_WAIT; hcall->expect = HV_STATUS_SUCCESS; break; case 17: @@ -632,42 +595,35 @@ static void guest_test_hcalls_access(void) hcall->ud_expected = true; break; case 18: - feat.edx |= HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE; + feat->edx |= HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE; + hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT; hcall->ud_expected = false; hcall->expect = HV_STATUS_SUCCESS; break; - case 19: - /* END */ - hcall->control = 0; - break; + kvm_vm_free(vm); + return; } - hv_set_cpuid(vm, best, &feat, &recomm, &dbg); + vcpu_set_cpuid(vcpu); + + memcpy(prev_cpuid, vcpu->cpuid, kvm_cpuid2_size(vcpu->cpuid->nent)); - if (hcall->control) - pr_debug("Stage %d: testing hcall: 0x%lx\n", stage, - hcall->control); - else - pr_debug("Stage %d: finish\n", stage); + pr_debug("Stage %d: testing hcall: 0x%lx\n", stage, hcall->control); - r = _vcpu_run(vm, VCPU_ID); - TEST_ASSERT(!r, "vcpu_run failed: %d\n", r); + vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "unexpected exit reason: %u (%s)", run->exit_reason, exit_reason_str(run->exit_reason)); - switch (get_ucall(vm, VCPU_ID, &uc)) { - case UCALL_SYNC: - TEST_ASSERT(uc.args[1] == 0, - "Unexpected stage: %ld (0 expected)\n", - uc.args[1]); - break; + switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: - TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], - __FILE__, uc.args[1]); + REPORT_GUEST_ASSERT_2(uc, "arg1 = %lx, arg2 = %lx"); return; case UCALL_DONE: + break; + default: + TEST_FAIL("Unhandled ucall: %ld", uc.cmd); return; } diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c index 21f5ca9197da..a380ad7bb9b3 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c @@ -21,7 +21,6 @@ #include "svm_util.h" #include "hyperv.h" -#define VCPU_ID 1 #define L2_GUEST_STACK_SIZE 256 struct hv_enlightenments { @@ -42,11 +41,6 @@ struct hv_enlightenments { */ #define VMCB_HV_NESTED_ENLIGHTENMENTS (1U << 31) -static inline void vmmcall(void) -{ - __asm__ __volatile__("vmmcall"); -} - void l2_guest_code(void) { GUEST_SYNC(3); @@ -127,33 +121,31 @@ int main(int argc, char *argv[]) { vm_vaddr_t nested_gva = 0; + struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct kvm_run *run; struct ucall uc; int stage; - if (!nested_svm_supported()) { - print_skip("Nested SVM not supported"); - exit(KSFT_SKIP); - } + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); + /* Create VM */ - vm = vm_create_default(VCPU_ID, 0, guest_code); - vcpu_set_hv_cpuid(vm, VCPU_ID); - run = vcpu_state(vm, VCPU_ID); + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + vcpu_set_hv_cpuid(vcpu); + run = vcpu->run; vcpu_alloc_svm(vm, &nested_gva); - vcpu_args_set(vm, VCPU_ID, 1, nested_gva); + vcpu_args_set(vcpu, 1, nested_gva); for (stage = 1;; stage++) { - _vcpu_run(vm, VCPU_ID); + vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Stage %d: unexpected exit reason: %u (%s),\n", stage, run->exit_reason, exit_reason_str(run->exit_reason)); - switch (get_ucall(vm, VCPU_ID, &uc)) { + switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: - TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], - __FILE__, uc.args[1]); + REPORT_GUEST_ASSERT(uc); /* NOT REACHED */ case UCALL_SYNC: break; diff --git a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c b/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c index 97731454f3f3..813ce282cf56 100644 --- a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c +++ b/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c @@ -16,8 +16,6 @@ #include "kvm_util.h" #include "processor.h" -#define VCPU_ID 0 - struct test_case { uint64_t kvmclock_base; int64_t realtime_offset; @@ -73,8 +71,7 @@ static void handle_sync(struct ucall *uc, struct kvm_clock_data *start, static void handle_abort(struct ucall *uc) { - TEST_FAIL("%s at %s:%ld", (const char *)uc->args[0], - __FILE__, uc->args[1]); + REPORT_GUEST_ASSERT(*uc); } static void setup_clock(struct kvm_vm *vm, struct test_case *test_case) @@ -105,29 +102,27 @@ static void setup_clock(struct kvm_vm *vm, struct test_case *test_case) vm_ioctl(vm, KVM_SET_CLOCK, &data); } -static void enter_guest(struct kvm_vm *vm) +static void enter_guest(struct kvm_vcpu *vcpu) { struct kvm_clock_data start, end; - struct kvm_run *run; + struct kvm_run *run = vcpu->run; + struct kvm_vm *vm = vcpu->vm; struct ucall uc; - int i, r; - - run = vcpu_state(vm, VCPU_ID); + int i; for (i = 0; i < ARRAY_SIZE(test_cases); i++) { setup_clock(vm, &test_cases[i]); vm_ioctl(vm, KVM_GET_CLOCK, &start); - r = _vcpu_run(vm, VCPU_ID); + vcpu_run(vcpu); vm_ioctl(vm, KVM_GET_CLOCK, &end); - TEST_ASSERT(!r, "vcpu_run failed: %d\n", r); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "unexpected exit reason: %u (%s)", run->exit_reason, exit_reason_str(run->exit_reason)); - switch (get_ucall(vm, VCPU_ID, &uc)) { + switch (get_ucall(vcpu, &uc)) { case UCALL_SYNC: handle_sync(&uc, &start, &end); break; @@ -178,26 +173,23 @@ out: int main(void) { + struct kvm_vcpu *vcpu; vm_vaddr_t pvti_gva; vm_paddr_t pvti_gpa; struct kvm_vm *vm; int flags; flags = kvm_check_cap(KVM_CAP_ADJUST_CLOCK); - if (!(flags & KVM_CLOCK_REALTIME)) { - print_skip("KVM_CLOCK_REALTIME not supported; flags: %x", - flags); - exit(KSFT_SKIP); - } + TEST_REQUIRE(flags & KVM_CLOCK_REALTIME); check_clocksource(); - vm = vm_create_default(VCPU_ID, 0, guest_main); + vm = vm_create_with_one_vcpu(&vcpu, guest_main); pvti_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000); pvti_gpa = addr_gva2gpa(vm, pvti_gva); - vcpu_args_set(vm, VCPU_ID, 2, pvti_gpa, pvti_gva); + vcpu_args_set(vcpu, 2, pvti_gpa, pvti_gva); - enter_guest(vm); + enter_guest(vcpu); kvm_vm_free(vm); } diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c index 04ed975662c9..619655c1a1f3 100644 --- a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c +++ b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c @@ -12,55 +12,6 @@ #include "kvm_util.h" #include "processor.h" -extern unsigned char rdmsr_start; -extern unsigned char rdmsr_end; - -static u64 do_rdmsr(u32 idx) -{ - u32 lo, hi; - - asm volatile("rdmsr_start: rdmsr;" - "rdmsr_end:" - : "=a"(lo), "=c"(hi) - : "c"(idx)); - - return (((u64) hi) << 32) | lo; -} - -extern unsigned char wrmsr_start; -extern unsigned char wrmsr_end; - -static void do_wrmsr(u32 idx, u64 val) -{ - u32 lo, hi; - - lo = val; - hi = val >> 32; - - asm volatile("wrmsr_start: wrmsr;" - "wrmsr_end:" - : : "a"(lo), "c"(idx), "d"(hi)); -} - -static int nr_gp; - -static void guest_gp_handler(struct ex_regs *regs) -{ - unsigned char *rip = (unsigned char *)regs->rip; - bool r, w; - - r = rip == &rdmsr_start; - w = rip == &wrmsr_start; - GUEST_ASSERT(r || w); - - nr_gp++; - - if (r) - regs->rip = (uint64_t)&rdmsr_end; - else - regs->rip = (uint64_t)&wrmsr_end; -} - struct msr_data { uint32_t idx; const char *name; @@ -89,14 +40,16 @@ static struct msr_data msrs_to_test[] = { static void test_msr(struct msr_data *msr) { + uint64_t ignored; + uint8_t vector; + PR_MSR(msr); - do_rdmsr(msr->idx); - GUEST_ASSERT(READ_ONCE(nr_gp) == 1); - nr_gp = 0; - do_wrmsr(msr->idx, 0); - GUEST_ASSERT(READ_ONCE(nr_gp) == 1); - nr_gp = 0; + vector = rdmsr_safe(msr->idx, &ignored); + GUEST_ASSERT_1(vector == GP_VECTOR, vector); + + vector = wrmsr_safe(msr->idx, 0); + GUEST_ASSERT_1(vector == GP_VECTOR, vector); } struct hcall_data { @@ -142,15 +95,6 @@ static void guest_main(void) GUEST_DONE(); } -static void clear_kvm_cpuid_features(struct kvm_cpuid2 *cpuid) -{ - struct kvm_cpuid_entry2 ent = {0}; - - ent.function = KVM_CPUID_FEATURES; - TEST_ASSERT(set_cpuid(cpuid, &ent), - "failed to clear KVM_CPUID_FEATURES leaf"); -} - static void pr_msr(struct ucall *uc) { struct msr_data *msr = (struct msr_data *)uc->args[0]; @@ -165,30 +109,18 @@ static void pr_hcall(struct ucall *uc) pr_info("testing hcall: %s (%lu)\n", hc->name, hc->nr); } -static void handle_abort(struct ucall *uc) +static void enter_guest(struct kvm_vcpu *vcpu) { - TEST_FAIL("%s at %s:%ld", (const char *)uc->args[0], - __FILE__, uc->args[1]); -} - -#define VCPU_ID 0 - -static void enter_guest(struct kvm_vm *vm) -{ - struct kvm_run *run; + struct kvm_run *run = vcpu->run; struct ucall uc; - int r; - - run = vcpu_state(vm, VCPU_ID); while (true) { - r = _vcpu_run(vm, VCPU_ID); - TEST_ASSERT(!r, "vcpu_run failed: %d\n", r); + vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "unexpected exit reason: %u (%s)", run->exit_reason, exit_reason_str(run->exit_reason)); - switch (get_ucall(vm, VCPU_ID, &uc)) { + switch (get_ucall(vcpu, &uc)) { case UCALL_PR_MSR: pr_msr(&uc); break; @@ -196,7 +128,7 @@ static void enter_guest(struct kvm_vm *vm) pr_hcall(&uc); break; case UCALL_ABORT: - handle_abort(&uc); + REPORT_GUEST_ASSERT_1(uc, "vector = %lu"); return; case UCALL_DONE: return; @@ -206,29 +138,20 @@ static void enter_guest(struct kvm_vm *vm) int main(void) { - struct kvm_enable_cap cap = {0}; - struct kvm_cpuid2 *best; + struct kvm_vcpu *vcpu; struct kvm_vm *vm; - if (!kvm_check_cap(KVM_CAP_ENFORCE_PV_FEATURE_CPUID)) { - print_skip("KVM_CAP_ENFORCE_PV_FEATURE_CPUID not supported"); - exit(KSFT_SKIP); - } + TEST_REQUIRE(kvm_has_cap(KVM_CAP_ENFORCE_PV_FEATURE_CPUID)); - vm = vm_create_default(VCPU_ID, 0, guest_main); + vm = vm_create_with_one_vcpu(&vcpu, guest_main); - cap.cap = KVM_CAP_ENFORCE_PV_FEATURE_CPUID; - cap.args[0] = 1; - vcpu_enable_cap(vm, VCPU_ID, &cap); + vcpu_enable_cap(vcpu, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, 1); - best = kvm_get_supported_cpuid(); - clear_kvm_cpuid_features(best); - vcpu_set_cpuid(vm, VCPU_ID, best); + vcpu_clear_cpuid_entry(vcpu, KVM_CPUID_FEATURES); vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vm, VCPU_ID); - vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler); + vcpu_init_descriptor_tables(vcpu); - enter_guest(vm); + enter_guest(vcpu); kvm_vm_free(vm); } diff --git a/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c b/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c new file mode 100644 index 000000000000..3cc4b86832fe --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * maximum APIC ID capability tests + * + * Copyright (C) 2022, Intel, Inc. + * + * Tests for getting/setting maximum APIC ID capability + */ + +#include "kvm_util.h" + +#define MAX_VCPU_ID 2 + +int main(int argc, char *argv[]) +{ + struct kvm_vm *vm; + int ret; + + vm = vm_create_barebones(); + + /* Get KVM_CAP_MAX_VCPU_ID cap supported in KVM */ + ret = vm_check_cap(vm, KVM_CAP_MAX_VCPU_ID); + + /* Try to set KVM_CAP_MAX_VCPU_ID beyond KVM cap */ + ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, ret + 1); + TEST_ASSERT(ret < 0, + "Setting KVM_CAP_MAX_VCPU_ID beyond KVM cap should fail"); + + /* Set KVM_CAP_MAX_VCPU_ID */ + vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID); + + + /* Try to set KVM_CAP_MAX_VCPU_ID again */ + ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID + 1); + TEST_ASSERT(ret < 0, + "Setting KVM_CAP_MAX_VCPU_ID multiple times should fail"); + + /* Create vCPU with id beyond KVM_CAP_MAX_VCPU_ID cap*/ + ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)MAX_VCPU_ID); + TEST_ASSERT(ret < 0, "Creating vCPU with ID > MAX_VCPU_ID should fail"); + + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c index 9f55ccd169a1..fb02581953a3 100644 --- a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c +++ b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c @@ -59,10 +59,10 @@ void test(void) kvm = open("/dev/kvm", O_RDWR); TEST_ASSERT(kvm != -1, "failed to open /dev/kvm"); - kvmvm = ioctl(kvm, KVM_CREATE_VM, 0); - TEST_ASSERT(kvmvm != -1, "KVM_CREATE_VM failed"); + kvmvm = __kvm_ioctl(kvm, KVM_CREATE_VM, NULL); + TEST_ASSERT(kvmvm > 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, kvmvm)); kvmcpu = ioctl(kvmvm, KVM_CREATE_VCPU, 0); - TEST_ASSERT(kvmcpu != -1, "KVM_CREATE_VCPU failed"); + TEST_ASSERT(kvmcpu != -1, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, kvmcpu)); run = (struct kvm_run *)mmap(0, 4096, PROT_READ|PROT_WRITE, MAP_SHARED, kvmcpu, 0); tc.kvmcpu = kvmcpu; @@ -93,15 +93,9 @@ int main(void) { int warnings_before, warnings_after; - if (!is_intel_cpu()) { - print_skip("Must be run on an Intel CPU"); - exit(KSFT_SKIP); - } + TEST_REQUIRE(is_intel_cpu()); - if (vm_is_unrestricted_guest(NULL)) { - print_skip("Unrestricted guest must be disabled"); - exit(KSFT_SKIP); - } + TEST_REQUIRE(!vm_is_unrestricted_guest(NULL)); warnings_before = get_warnings_count(); diff --git a/tools/testing/selftests/kvm/x86_64/mmu_role_test.c b/tools/testing/selftests/kvm/x86_64/mmu_role_test.c deleted file mode 100644 index bdecd532f935..000000000000 --- a/tools/testing/selftests/kvm/x86_64/mmu_role_test.c +++ /dev/null @@ -1,147 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include "kvm_util.h" -#include "processor.h" - -#define VCPU_ID 1 - -#define MMIO_GPA 0x100000000ull - -static void guest_code(void) -{ - (void)READ_ONCE(*((uint64_t *)MMIO_GPA)); - (void)READ_ONCE(*((uint64_t *)MMIO_GPA)); - - GUEST_ASSERT(0); -} - -static void guest_pf_handler(struct ex_regs *regs) -{ - /* PFEC == RSVD | PRESENT (read, kernel). */ - GUEST_ASSERT(regs->error_code == 0x9); - GUEST_DONE(); -} - -static void mmu_role_test(u32 *cpuid_reg, u32 evil_cpuid_val) -{ - u32 good_cpuid_val = *cpuid_reg; - struct kvm_run *run; - struct kvm_vm *vm; - uint64_t cmd; - int r; - - /* Create VM */ - vm = vm_create_default(VCPU_ID, 0, guest_code); - run = vcpu_state(vm, VCPU_ID); - - /* Map 1gb page without a backing memlot. */ - __virt_pg_map(vm, MMIO_GPA, MMIO_GPA, PG_LEVEL_1G); - - r = _vcpu_run(vm, VCPU_ID); - - /* Guest access to the 1gb page should trigger MMIO. */ - TEST_ASSERT(r == 0, "vcpu_run failed: %d\n", r); - TEST_ASSERT(run->exit_reason == KVM_EXIT_MMIO, - "Unexpected exit reason: %u (%s), expected MMIO exit (1gb page w/o memslot)\n", - run->exit_reason, exit_reason_str(run->exit_reason)); - - TEST_ASSERT(run->mmio.len == 8, "Unexpected exit mmio size = %u", run->mmio.len); - - TEST_ASSERT(run->mmio.phys_addr == MMIO_GPA, - "Unexpected exit mmio address = 0x%llx", run->mmio.phys_addr); - - /* - * Effect the CPUID change for the guest and re-enter the guest. Its - * access should now #PF due to the PAGE_SIZE bit being reserved or - * the resulting GPA being invalid. Note, kvm_get_supported_cpuid() - * returns the struct that contains the entry being modified. Eww. - */ - *cpuid_reg = evil_cpuid_val; - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); - - /* - * Add a dummy memslot to coerce KVM into bumping the MMIO generation. - * KVM does not "officially" support mucking with CPUID after KVM_RUN, - * and will incorrectly reuse MMIO SPTEs. Don't delete the memslot! - * KVM x86 zaps all shadow pages on memslot deletion. - */ - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - MMIO_GPA << 1, 10, 1, 0); - - /* Set up a #PF handler to eat the RSVD #PF and signal all done! */ - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vm, VCPU_ID); - vm_install_exception_handler(vm, PF_VECTOR, guest_pf_handler); - - r = _vcpu_run(vm, VCPU_ID); - TEST_ASSERT(r == 0, "vcpu_run failed: %d\n", r); - - cmd = get_ucall(vm, VCPU_ID, NULL); - TEST_ASSERT(cmd == UCALL_DONE, - "Unexpected guest exit, exit_reason=%s, ucall.cmd = %lu\n", - exit_reason_str(run->exit_reason), cmd); - - /* - * Restore the happy CPUID value for the next test. Yes, changes are - * indeed persistent across VM destruction. - */ - *cpuid_reg = good_cpuid_val; - - kvm_vm_free(vm); -} - -int main(int argc, char *argv[]) -{ - struct kvm_cpuid_entry2 *entry; - int opt; - - /* - * All tests are opt-in because TDP doesn't play nice with reserved #PF - * in the GVA->GPA translation. The hardware page walker doesn't let - * software change GBPAGES or MAXPHYADDR, and KVM doesn't manually walk - * the GVA on fault for performance reasons. - */ - bool do_gbpages = false; - bool do_maxphyaddr = false; - - setbuf(stdout, NULL); - - while ((opt = getopt(argc, argv, "gm")) != -1) { - switch (opt) { - case 'g': - do_gbpages = true; - break; - case 'm': - do_maxphyaddr = true; - break; - case 'h': - default: - printf("usage: %s [-g (GBPAGES)] [-m (MAXPHYADDR)]\n", argv[0]); - break; - } - } - - if (!do_gbpages && !do_maxphyaddr) { - print_skip("No sub-tests selected"); - return 0; - } - - entry = kvm_get_supported_cpuid_entry(0x80000001); - if (!(entry->edx & CPUID_GBPAGES)) { - print_skip("1gb hugepages not supported"); - return 0; - } - - if (do_gbpages) { - pr_info("Test MMIO after toggling CPUID.GBPAGES\n\n"); - mmu_role_test(&entry->edx, entry->edx & ~CPUID_GBPAGES); - } - - if (do_maxphyaddr) { - pr_info("Test MMIO after changing CPUID.MAXPHYADDR\n\n"); - entry = kvm_get_supported_cpuid_entry(0x80000008); - mmu_role_test(&entry->eax, (entry->eax & ~0xff) | 0x20); - } - - return 0; -} diff --git a/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c new file mode 100644 index 000000000000..016070cad36e --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "kvm_util.h" +#include "processor.h" + +#define CPUID_MWAIT (1u << 3) + +enum monitor_mwait_testcases { + MWAIT_QUIRK_DISABLED = BIT(0), + MISC_ENABLES_QUIRK_DISABLED = BIT(1), + MWAIT_DISABLED = BIT(2), +}; + +static void guest_monitor_wait(int testcase) +{ + /* + * If both MWAIT and its quirk are disabled, MONITOR/MWAIT should #UD, + * in all other scenarios KVM should emulate them as nops. + */ + bool fault_wanted = (testcase & MWAIT_QUIRK_DISABLED) && + (testcase & MWAIT_DISABLED); + u8 vector; + + GUEST_SYNC(testcase); + + /* + * Arbitrarily MONITOR this function, SVM performs fault checks before + * intercept checks, so the inputs for MONITOR and MWAIT must be valid. + */ + vector = kvm_asm_safe("monitor", "a"(guest_monitor_wait), "c"(0), "d"(0)); + if (fault_wanted) + GUEST_ASSERT_2(vector == UD_VECTOR, testcase, vector); + else + GUEST_ASSERT_2(!vector, testcase, vector); + + vector = kvm_asm_safe("mwait", "a"(guest_monitor_wait), "c"(0), "d"(0)); + if (fault_wanted) + GUEST_ASSERT_2(vector == UD_VECTOR, testcase, vector); + else + GUEST_ASSERT_2(!vector, testcase, vector); +} + +static void guest_code(void) +{ + guest_monitor_wait(MWAIT_DISABLED); + + guest_monitor_wait(MWAIT_QUIRK_DISABLED | MWAIT_DISABLED); + + guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_DISABLED); + guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED); + + guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED | MWAIT_DISABLED); + guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + uint64_t disabled_quirks; + struct kvm_vcpu *vcpu; + struct kvm_run *run; + struct kvm_vm *vm; + struct ucall uc; + int testcase; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2)); + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT); + + run = vcpu->run; + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vcpu); + + while (1) { + vcpu_run(vcpu); + + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Unexpected exit reason: %u (%s),\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + testcase = uc.args[1]; + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT_2(uc, "testcase = %lx, vector = %ld"); + goto done; + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + goto done; + } + + disabled_quirks = 0; + if (testcase & MWAIT_QUIRK_DISABLED) + disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS; + if (testcase & MISC_ENABLES_QUIRK_DISABLED) + disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT; + vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks); + + /* + * If the MISC_ENABLES quirk (KVM neglects to update CPUID to + * enable/disable MWAIT) is disabled, toggle the ENABLE_MWAIT + * bit in MISC_ENABLES accordingly. If the quirk is enabled, + * the only valid configuration is MWAIT disabled, as CPUID + * can't be manually changed after running the vCPU. + */ + if (!(testcase & MISC_ENABLES_QUIRK_DISABLED)) { + TEST_ASSERT(testcase & MWAIT_DISABLED, + "Can't toggle CPUID features after running vCPU"); + continue; + } + + vcpu_set_msr(vcpu, MSR_IA32_MISC_ENABLE, + (testcase & MWAIT_DISABLED) ? 0 : MSR_IA32_MISC_ENABLE_MWAIT); + } + +done: + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c new file mode 100644 index 000000000000..cc6421716400 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c @@ -0,0 +1,269 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * tools/testing/selftests/kvm/nx_huge_page_test.c + * + * Usage: to be run via nx_huge_page_test.sh, which does the necessary + * environment setup and teardown + * + * Copyright (C) 2022, Google LLC. + */ + +#define _GNU_SOURCE + +#include <fcntl.h> +#include <stdint.h> +#include <time.h> + +#include <test_util.h> +#include "kvm_util.h" +#include "processor.h" + +#define HPAGE_SLOT 10 +#define HPAGE_GPA (4UL << 30) /* 4G prevents collision w/ slot 0 */ +#define HPAGE_GVA HPAGE_GPA /* GVA is arbitrary, so use GPA. */ +#define PAGES_PER_2MB_HUGE_PAGE 512 +#define HPAGE_SLOT_NPAGES (3 * PAGES_PER_2MB_HUGE_PAGE) + +/* + * Passed by nx_huge_pages_test.sh to provide an easy warning if this test is + * being run without it. + */ +#define MAGIC_TOKEN 887563923 + +/* + * x86 opcode for the return instruction. Used to call into, and then + * immediately return from, memory backed with hugepages. + */ +#define RETURN_OPCODE 0xC3 + +/* Call the specified memory address. */ +static void guest_do_CALL(uint64_t target) +{ + ((void (*)(void)) target)(); +} + +/* + * Exit the VM after each memory access so that the userspace component of the + * test can make assertions about the pages backing the VM. + * + * See the below for an explanation of how each access should affect the + * backing mappings. + */ +void guest_code(void) +{ + uint64_t hpage_1 = HPAGE_GVA; + uint64_t hpage_2 = hpage_1 + (PAGE_SIZE * 512); + uint64_t hpage_3 = hpage_2 + (PAGE_SIZE * 512); + + READ_ONCE(*(uint64_t *)hpage_1); + GUEST_SYNC(1); + + READ_ONCE(*(uint64_t *)hpage_2); + GUEST_SYNC(2); + + guest_do_CALL(hpage_1); + GUEST_SYNC(3); + + guest_do_CALL(hpage_3); + GUEST_SYNC(4); + + READ_ONCE(*(uint64_t *)hpage_1); + GUEST_SYNC(5); + + READ_ONCE(*(uint64_t *)hpage_3); + GUEST_SYNC(6); +} + +static void check_2m_page_count(struct kvm_vm *vm, int expected_pages_2m) +{ + int actual_pages_2m; + + actual_pages_2m = vm_get_stat(vm, "pages_2m"); + + TEST_ASSERT(actual_pages_2m == expected_pages_2m, + "Unexpected 2m page count. Expected %d, got %d", + expected_pages_2m, actual_pages_2m); +} + +static void check_split_count(struct kvm_vm *vm, int expected_splits) +{ + int actual_splits; + + actual_splits = vm_get_stat(vm, "nx_lpage_splits"); + + TEST_ASSERT(actual_splits == expected_splits, + "Unexpected NX huge page split count. Expected %d, got %d", + expected_splits, actual_splits); +} + +static void wait_for_reclaim(int reclaim_period_ms) +{ + long reclaim_wait_ms; + struct timespec ts; + + reclaim_wait_ms = reclaim_period_ms * 5; + ts.tv_sec = reclaim_wait_ms / 1000; + ts.tv_nsec = (reclaim_wait_ms - (ts.tv_sec * 1000)) * 1000000; + nanosleep(&ts, NULL); +} + +void run_test(int reclaim_period_ms, bool disable_nx_huge_pages, + bool reboot_permissions) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + void *hva; + int r; + + vm = vm_create(1); + + if (disable_nx_huge_pages) { + /* + * Cannot run the test without NX huge pages if the kernel + * does not support it. + */ + if (!kvm_check_cap(KVM_CAP_VM_DISABLE_NX_HUGE_PAGES)) + return; + + r = __vm_disable_nx_huge_pages(vm); + if (reboot_permissions) { + TEST_ASSERT(!r, "Disabling NX huge pages should succeed if process has reboot permissions"); + } else { + TEST_ASSERT(r == -1 && errno == EPERM, + "This process should not have permission to disable NX huge pages"); + return; + } + } + + vcpu = vm_vcpu_add(vm, 0, guest_code); + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_HUGETLB, + HPAGE_GPA, HPAGE_SLOT, + HPAGE_SLOT_NPAGES, 0); + + virt_map(vm, HPAGE_GVA, HPAGE_GPA, HPAGE_SLOT_NPAGES); + + hva = addr_gpa2hva(vm, HPAGE_GPA); + memset(hva, RETURN_OPCODE, HPAGE_SLOT_NPAGES * PAGE_SIZE); + + check_2m_page_count(vm, 0); + check_split_count(vm, 0); + + /* + * The guest code will first read from the first hugepage, resulting + * in a huge page mapping being created. + */ + vcpu_run(vcpu); + check_2m_page_count(vm, 1); + check_split_count(vm, 0); + + /* + * Then the guest code will read from the second hugepage, resulting + * in another huge page mapping being created. + */ + vcpu_run(vcpu); + check_2m_page_count(vm, 2); + check_split_count(vm, 0); + + /* + * Next, the guest will execute from the first huge page, causing it + * to be remapped at 4k. + * + * If NX huge pages are disabled, this should have no effect. + */ + vcpu_run(vcpu); + check_2m_page_count(vm, disable_nx_huge_pages ? 2 : 1); + check_split_count(vm, disable_nx_huge_pages ? 0 : 1); + + /* + * Executing from the third huge page (previously unaccessed) will + * cause part to be mapped at 4k. + * + * If NX huge pages are disabled, it should be mapped at 2M. + */ + vcpu_run(vcpu); + check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1); + check_split_count(vm, disable_nx_huge_pages ? 0 : 2); + + /* Reading from the first huge page again should have no effect. */ + vcpu_run(vcpu); + check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1); + check_split_count(vm, disable_nx_huge_pages ? 0 : 2); + + /* Give recovery thread time to run. */ + wait_for_reclaim(reclaim_period_ms); + + /* + * Now that the reclaimer has run, all the split pages should be gone. + * + * If NX huge pages are disabled, the relaimer will not run, so + * nothing should change from here on. + */ + check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1); + check_split_count(vm, 0); + + /* + * The 4k mapping on hpage 3 should have been removed, so check that + * reading from it causes a huge page mapping to be installed. + */ + vcpu_run(vcpu); + check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 2); + check_split_count(vm, 0); + + kvm_vm_free(vm); +} + +static void help(char *name) +{ + puts(""); + printf("usage: %s [-h] [-p period_ms] [-t token]\n", name); + puts(""); + printf(" -p: The NX reclaim period in miliseconds.\n"); + printf(" -t: The magic token to indicate environment setup is done.\n"); + printf(" -r: The test has reboot permissions and can disable NX huge pages.\n"); + puts(""); + exit(0); +} + +int main(int argc, char **argv) +{ + int reclaim_period_ms = 0, token = 0, opt; + bool reboot_permissions = false; + + while ((opt = getopt(argc, argv, "hp:t:r")) != -1) { + switch (opt) { + case 'p': + reclaim_period_ms = atoi(optarg); + break; + case 't': + token = atoi(optarg); + break; + case 'r': + reboot_permissions = true; + break; + case 'h': + default: + help(argv[0]); + break; + } + } + + if (token != MAGIC_TOKEN) { + print_skip("This test must be run with the magic token %d.\n" + "This is done by nx_huge_pages_test.sh, which\n" + "also handles environment setup for the test.", + MAGIC_TOKEN); + exit(KSFT_SKIP); + } + + if (!reclaim_period_ms) { + print_skip("The NX reclaim period must be specified and non-zero"); + exit(KSFT_SKIP); + } + + run_test(reclaim_period_ms, false, reboot_permissions); + run_test(reclaim_period_ms, true, reboot_permissions); + + return 0; +} + diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh new file mode 100755 index 000000000000..0560149e66ed --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only */ +# +# Wrapper script which performs setup and cleanup for nx_huge_pages_test. +# Makes use of root privileges to set up huge pages and KVM module parameters. +# +# tools/testing/selftests/kvm/nx_huge_page_test.sh +# Copyright (C) 2022, Google LLC. + +set -e + +NX_HUGE_PAGES=$(cat /sys/module/kvm/parameters/nx_huge_pages) +NX_HUGE_PAGES_RECOVERY_RATIO=$(cat /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio) +NX_HUGE_PAGES_RECOVERY_PERIOD=$(cat /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms) +HUGE_PAGES=$(cat /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages) + +set +e + +function sudo_echo () { + echo "$1" | sudo tee -a "$2" > /dev/null +} + +NXECUTABLE="$(dirname $0)/nx_huge_pages_test" + +sudo_echo test /dev/null || exit 4 # KSFT_SKIP=4 + +( + set -e + + sudo_echo 1 /sys/module/kvm/parameters/nx_huge_pages + sudo_echo 1 /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio + sudo_echo 100 /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms + sudo_echo "$(( $HUGE_PAGES + 3 ))" /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages + + # Test with reboot permissions + if [ $(whoami) == "root" ] || sudo setcap cap_sys_boot+ep $NXECUTABLE 2> /dev/null; then + echo Running test with CAP_SYS_BOOT enabled + $NXECUTABLE -t 887563923 -p 100 -r + test $(whoami) == "root" || sudo setcap cap_sys_boot-ep $NXECUTABLE + else + echo setcap failed, skipping nx_huge_pages_test with CAP_SYS_BOOT enabled + fi + + # Test without reboot permissions + if [ $(whoami) != "root" ] ; then + echo Running test with CAP_SYS_BOOT disabled + $NXECUTABLE -t 887563923 -p 100 + else + echo Running as root, skipping nx_huge_pages_test with CAP_SYS_BOOT disabled + fi +) +RET=$? + +sudo_echo "$NX_HUGE_PAGES" /sys/module/kvm/parameters/nx_huge_pages +sudo_echo "$NX_HUGE_PAGES_RECOVERY_RATIO" /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio +sudo_echo "$NX_HUGE_PAGES_RECOVERY_PERIOD" /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms +sudo_echo "$HUGE_PAGES" /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages + +exit $RET diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c index 1e89688cbbbf..76417c7d687b 100644 --- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c +++ b/tools/testing/selftests/kvm/x86_64/platform_info_test.c @@ -21,7 +21,6 @@ #include "kvm_util.h" #include "processor.h" -#define VCPU_ID 0 #define MSR_PLATFORM_INFO_MAX_TURBO_RATIO 0xff00 static void guest_code(void) @@ -35,28 +34,18 @@ static void guest_code(void) } } -static void set_msr_platform_info_enabled(struct kvm_vm *vm, bool enable) +static void test_msr_platform_info_enabled(struct kvm_vcpu *vcpu) { - struct kvm_enable_cap cap = {}; - - cap.cap = KVM_CAP_MSR_PLATFORM_INFO; - cap.flags = 0; - cap.args[0] = (int)enable; - vm_enable_cap(vm, &cap); -} - -static void test_msr_platform_info_enabled(struct kvm_vm *vm) -{ - struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct kvm_run *run = vcpu->run; struct ucall uc; - set_msr_platform_info_enabled(vm, true); - vcpu_run(vm, VCPU_ID); + vm_enable_cap(vcpu->vm, KVM_CAP_MSR_PLATFORM_INFO, true); + vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Exit_reason other than KVM_EXIT_IO: %u (%s),\n", run->exit_reason, exit_reason_str(run->exit_reason)); - get_ucall(vm, VCPU_ID, &uc); + get_ucall(vcpu, &uc); TEST_ASSERT(uc.cmd == UCALL_SYNC, "Received ucall other than UCALL_SYNC: %lu\n", uc.cmd); TEST_ASSERT((uc.args[1] & MSR_PLATFORM_INFO_MAX_TURBO_RATIO) == @@ -65,12 +54,12 @@ static void test_msr_platform_info_enabled(struct kvm_vm *vm) MSR_PLATFORM_INFO_MAX_TURBO_RATIO); } -static void test_msr_platform_info_disabled(struct kvm_vm *vm) +static void test_msr_platform_info_disabled(struct kvm_vcpu *vcpu) { - struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct kvm_run *run = vcpu->run; - set_msr_platform_info_enabled(vm, false); - vcpu_run(vm, VCPU_ID); + vm_enable_cap(vcpu->vm, KVM_CAP_MSR_PLATFORM_INFO, false); + vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN, "Exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s)\n", run->exit_reason, @@ -79,27 +68,23 @@ static void test_msr_platform_info_disabled(struct kvm_vm *vm) int main(int argc, char *argv[]) { + struct kvm_vcpu *vcpu; struct kvm_vm *vm; - int rv; uint64_t msr_platform_info; /* Tell stdout not to buffer its content */ setbuf(stdout, NULL); - rv = kvm_check_cap(KVM_CAP_MSR_PLATFORM_INFO); - if (!rv) { - print_skip("KVM_CAP_MSR_PLATFORM_INFO not supported"); - exit(KSFT_SKIP); - } + TEST_REQUIRE(kvm_has_cap(KVM_CAP_MSR_PLATFORM_INFO)); - vm = vm_create_default(VCPU_ID, 0, guest_code); + vm = vm_create_with_one_vcpu(&vcpu, guest_code); - msr_platform_info = vcpu_get_msr(vm, VCPU_ID, MSR_PLATFORM_INFO); - vcpu_set_msr(vm, VCPU_ID, MSR_PLATFORM_INFO, - msr_platform_info | MSR_PLATFORM_INFO_MAX_TURBO_RATIO); - test_msr_platform_info_enabled(vm); - test_msr_platform_info_disabled(vm); - vcpu_set_msr(vm, VCPU_ID, MSR_PLATFORM_INFO, msr_platform_info); + msr_platform_info = vcpu_get_msr(vcpu, MSR_PLATFORM_INFO); + vcpu_set_msr(vcpu, MSR_PLATFORM_INFO, + msr_platform_info | MSR_PLATFORM_INFO_MAX_TURBO_RATIO); + test_msr_platform_info_enabled(vcpu); + test_msr_platform_info_disabled(vcpu); + vcpu_set_msr(vcpu, MSR_PLATFORM_INFO, msr_platform_info); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c index 93d77574b255..ea4e259a1e2e 100644 --- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c +++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c @@ -49,7 +49,6 @@ union cpuid10_ebx { /* Oddly, this isn't in perf_event.h. */ #define ARCH_PERFMON_BRANCHES_RETIRED 5 -#define VCPU_ID 0 #define NUM_BRANCHES 42 /* @@ -173,17 +172,17 @@ static void amd_guest_code(void) * Run the VM to the next GUEST_SYNC(value), and return the value passed * to the sync. Any other exit from the guest is fatal. */ -static uint64_t run_vm_to_sync(struct kvm_vm *vm) +static uint64_t run_vcpu_to_sync(struct kvm_vcpu *vcpu) { - struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct kvm_run *run = vcpu->run; struct ucall uc; - vcpu_run(vm, VCPU_ID); + vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Exit_reason other than KVM_EXIT_IO: %u (%s)\n", run->exit_reason, exit_reason_str(run->exit_reason)); - get_ucall(vm, VCPU_ID, &uc); + get_ucall(vcpu, &uc); TEST_ASSERT(uc.cmd == UCALL_SYNC, "Received ucall other than UCALL_SYNC: %lu", uc.cmd); return uc.args[1]; @@ -197,13 +196,13 @@ static uint64_t run_vm_to_sync(struct kvm_vm *vm) * a sanity check and then GUEST_SYNC(success). In the case of failure, * the behavior of the guest on resumption is undefined. */ -static bool sanity_check_pmu(struct kvm_vm *vm) +static bool sanity_check_pmu(struct kvm_vcpu *vcpu) { bool success; - vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler); - success = run_vm_to_sync(vm); - vm_install_exception_handler(vm, GP_VECTOR, NULL); + vm_install_exception_handler(vcpu->vm, GP_VECTOR, guest_gp_handler); + success = run_vcpu_to_sync(vcpu); + vm_install_exception_handler(vcpu->vm, GP_VECTOR, NULL); return success; } @@ -264,9 +263,9 @@ static struct kvm_pmu_event_filter *remove_event(struct kvm_pmu_event_filter *f, return f; } -static void test_without_filter(struct kvm_vm *vm) +static void test_without_filter(struct kvm_vcpu *vcpu) { - uint64_t count = run_vm_to_sync(vm); + uint64_t count = run_vcpu_to_sync(vcpu); if (count != NUM_BRANCHES) pr_info("%s: Branch instructions retired = %lu (expected %u)\n", @@ -274,21 +273,21 @@ static void test_without_filter(struct kvm_vm *vm) TEST_ASSERT(count, "Allowed PMU event is not counting"); } -static uint64_t test_with_filter(struct kvm_vm *vm, +static uint64_t test_with_filter(struct kvm_vcpu *vcpu, struct kvm_pmu_event_filter *f) { - vm_ioctl(vm, KVM_SET_PMU_EVENT_FILTER, (void *)f); - return run_vm_to_sync(vm); + vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f); + return run_vcpu_to_sync(vcpu); } -static void test_amd_deny_list(struct kvm_vm *vm) +static void test_amd_deny_list(struct kvm_vcpu *vcpu) { uint64_t event = EVENT(0x1C2, 0); struct kvm_pmu_event_filter *f; uint64_t count; f = create_pmu_event_filter(&event, 1, KVM_PMU_EVENT_DENY); - count = test_with_filter(vm, f); + count = test_with_filter(vcpu, f); free(f); if (count != NUM_BRANCHES) @@ -297,10 +296,10 @@ static void test_amd_deny_list(struct kvm_vm *vm) TEST_ASSERT(count, "Allowed PMU event is not counting"); } -static void test_member_deny_list(struct kvm_vm *vm) +static void test_member_deny_list(struct kvm_vcpu *vcpu) { struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY); - uint64_t count = test_with_filter(vm, f); + uint64_t count = test_with_filter(vcpu, f); free(f); if (count) @@ -309,10 +308,10 @@ static void test_member_deny_list(struct kvm_vm *vm) TEST_ASSERT(!count, "Disallowed PMU Event is counting"); } -static void test_member_allow_list(struct kvm_vm *vm) +static void test_member_allow_list(struct kvm_vcpu *vcpu) { struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW); - uint64_t count = test_with_filter(vm, f); + uint64_t count = test_with_filter(vcpu, f); free(f); if (count != NUM_BRANCHES) @@ -321,14 +320,14 @@ static void test_member_allow_list(struct kvm_vm *vm) TEST_ASSERT(count, "Allowed PMU event is not counting"); } -static void test_not_member_deny_list(struct kvm_vm *vm) +static void test_not_member_deny_list(struct kvm_vcpu *vcpu) { struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY); uint64_t count; remove_event(f, INTEL_BR_RETIRED); remove_event(f, AMD_ZEN_BR_RETIRED); - count = test_with_filter(vm, f); + count = test_with_filter(vcpu, f); free(f); if (count != NUM_BRANCHES) pr_info("%s: Branch instructions retired = %lu (expected %u)\n", @@ -336,14 +335,14 @@ static void test_not_member_deny_list(struct kvm_vm *vm) TEST_ASSERT(count, "Allowed PMU event is not counting"); } -static void test_not_member_allow_list(struct kvm_vm *vm) +static void test_not_member_allow_list(struct kvm_vcpu *vcpu) { struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW); uint64_t count; remove_event(f, INTEL_BR_RETIRED); remove_event(f, AMD_ZEN_BR_RETIRED); - count = test_with_filter(vm, f); + count = test_with_filter(vcpu, f); free(f); if (count) pr_info("%s: Branch instructions retired = %lu (expected 0)\n", @@ -358,25 +357,23 @@ static void test_not_member_allow_list(struct kvm_vm *vm) */ static void test_pmu_config_disable(void (*guest_code)(void)) { + struct kvm_vcpu *vcpu; int r; struct kvm_vm *vm; - struct kvm_enable_cap cap = { 0 }; r = kvm_check_cap(KVM_CAP_PMU_CAPABILITY); if (!(r & KVM_PMU_CAP_DISABLE)) return; - vm = vm_create_without_vcpus(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES); + vm = vm_create(1); - cap.cap = KVM_CAP_PMU_CAPABILITY; - cap.args[0] = KVM_PMU_CAP_DISABLE; - TEST_ASSERT(!vm_enable_cap(vm, &cap), "Failed to set KVM_PMU_CAP_DISABLE."); + vm_enable_cap(vm, KVM_CAP_PMU_CAPABILITY, KVM_PMU_CAP_DISABLE); - vm_vcpu_add_default(vm, VCPU_ID, guest_code); + vcpu = vm_vcpu_add(vm, 0, guest_code); vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vm, VCPU_ID); + vcpu_init_descriptor_tables(vcpu); - TEST_ASSERT(!sanity_check_pmu(vm), + TEST_ASSERT(!sanity_check_pmu(vcpu), "Guest should not be able to use disabled PMU."); kvm_vm_free(vm); @@ -387,7 +384,7 @@ static void test_pmu_config_disable(void (*guest_code)(void)) * counter per logical processor, an EBX bit vector of length greater * than 5, and EBX[5] clear. */ -static bool check_intel_pmu_leaf(struct kvm_cpuid_entry2 *entry) +static bool check_intel_pmu_leaf(const struct kvm_cpuid_entry2 *entry) { union cpuid10_eax eax = { .full = entry->eax }; union cpuid10_ebx ebx = { .full = entry->ebx }; @@ -403,10 +400,10 @@ static bool check_intel_pmu_leaf(struct kvm_cpuid_entry2 *entry) */ static bool use_intel_pmu(void) { - struct kvm_cpuid_entry2 *entry; + const struct kvm_cpuid_entry2 *entry; - entry = kvm_get_supported_cpuid_index(0xa, 0); - return is_intel_cpu() && entry && check_intel_pmu_leaf(entry); + entry = kvm_get_supported_cpuid_entry(0xa); + return is_intel_cpu() && check_intel_pmu_leaf(entry); } static bool is_zen1(uint32_t eax) @@ -435,10 +432,10 @@ static bool is_zen3(uint32_t eax) */ static bool use_amd_pmu(void) { - struct kvm_cpuid_entry2 *entry; + const struct kvm_cpuid_entry2 *entry; - entry = kvm_get_supported_cpuid_index(1, 0); - return is_amd_cpu() && entry && + entry = kvm_get_supported_cpuid_entry(1); + return is_amd_cpu() && (is_zen1(entry->eax) || is_zen2(entry->eax) || is_zen3(entry->eax)); @@ -446,47 +443,33 @@ static bool use_amd_pmu(void) int main(int argc, char *argv[]) { - void (*guest_code)(void) = NULL; + void (*guest_code)(void); + struct kvm_vcpu *vcpu; struct kvm_vm *vm; - int r; /* Tell stdout not to buffer its content */ setbuf(stdout, NULL); - r = kvm_check_cap(KVM_CAP_PMU_EVENT_FILTER); - if (!r) { - print_skip("KVM_CAP_PMU_EVENT_FILTER not supported"); - exit(KSFT_SKIP); - } + TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_FILTER)); - if (use_intel_pmu()) - guest_code = intel_guest_code; - else if (use_amd_pmu()) - guest_code = amd_guest_code; + TEST_REQUIRE(use_intel_pmu() || use_amd_pmu()); + guest_code = use_intel_pmu() ? intel_guest_code : amd_guest_code; - if (!guest_code) { - print_skip("Don't know how to test this guest PMU"); - exit(KSFT_SKIP); - } - - vm = vm_create_default(VCPU_ID, 0, guest_code); + vm = vm_create_with_one_vcpu(&vcpu, guest_code); vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vm, VCPU_ID); + vcpu_init_descriptor_tables(vcpu); - if (!sanity_check_pmu(vm)) { - print_skip("Guest PMU is not functional"); - exit(KSFT_SKIP); - } + TEST_REQUIRE(sanity_check_pmu(vcpu)); if (use_amd_pmu()) - test_amd_deny_list(vm); + test_amd_deny_list(vcpu); - test_without_filter(vm); - test_member_deny_list(vm); - test_member_allow_list(vm); - test_not_member_deny_list(vm); - test_not_member_allow_list(vm); + test_without_filter(vcpu); + test_member_deny_list(vcpu); + test_member_allow_list(vcpu); + test_not_member_deny_list(vcpu); + test_not_member_allow_list(vcpu); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c index ae76436af0cc..b25d7556b638 100644 --- a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c +++ b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c @@ -16,10 +16,6 @@ #include "processor.h" #include "apic.h" -#define N_VCPU 2 -#define VCPU_ID0 0 -#define VCPU_ID1 1 - static void guest_bsp_vcpu(void *arg) { GUEST_SYNC(1); @@ -38,31 +34,30 @@ static void guest_not_bsp_vcpu(void *arg) GUEST_DONE(); } -static void test_set_boot_busy(struct kvm_vm *vm) +static void test_set_bsp_busy(struct kvm_vcpu *vcpu, const char *msg) { - int res; + int r = __vm_ioctl(vcpu->vm, KVM_SET_BOOT_CPU_ID, + (void *)(unsigned long)vcpu->id); - res = _vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID0); - TEST_ASSERT(res == -1 && errno == EBUSY, - "KVM_SET_BOOT_CPU_ID set while running vm"); + TEST_ASSERT(r == -1 && errno == EBUSY, "KVM_SET_BOOT_CPU_ID set %s", msg); } -static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid) +static void run_vcpu(struct kvm_vcpu *vcpu) { struct ucall uc; int stage; for (stage = 0; stage < 2; stage++) { - vcpu_run(vm, vcpuid); + vcpu_run(vcpu); - switch (get_ucall(vm, vcpuid, &uc)) { + switch (get_ucall(vcpu, &uc)) { case UCALL_SYNC: TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && uc.args[1] == stage + 1, "Stage %d: Unexpected register values vmexit, got %lx", stage + 1, (ulong)uc.args[1]); - test_set_boot_busy(vm); + test_set_bsp_busy(vcpu, "while running vm"); break; case UCALL_DONE: TEST_ASSERT(stage == 1, @@ -70,91 +65,67 @@ static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid) stage); break; case UCALL_ABORT: - TEST_ASSERT(false, "%s at %s:%ld\n\tvalues: %#lx, %#lx", - (const char *)uc.args[0], __FILE__, - uc.args[1], uc.args[2], uc.args[3]); + REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx"); default: TEST_ASSERT(false, "Unexpected exit: %s", - exit_reason_str(vcpu_state(vm, vcpuid)->exit_reason)); + exit_reason_str(vcpu->run->exit_reason)); } } } -static struct kvm_vm *create_vm(void) +static struct kvm_vm *create_vm(uint32_t nr_vcpus, uint32_t bsp_vcpu_id, + struct kvm_vcpu *vcpus[]) { struct kvm_vm *vm; - uint64_t vcpu_pages = (DEFAULT_STACK_PGS) * 2; - uint64_t extra_pg_pages = vcpu_pages / PTES_PER_MIN_PAGE * N_VCPU; - uint64_t pages = DEFAULT_GUEST_PHY_PAGES + vcpu_pages + extra_pg_pages; + uint32_t i; - pages = vm_adjust_num_guest_pages(VM_MODE_DEFAULT, pages); - vm = vm_create(VM_MODE_DEFAULT, pages, O_RDWR); + vm = vm_create(nr_vcpus); - kvm_vm_elf_load(vm, program_invocation_name); - vm_create_irqchip(vm); + vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(unsigned long)bsp_vcpu_id); + for (i = 0; i < nr_vcpus; i++) + vcpus[i] = vm_vcpu_add(vm, i, i == bsp_vcpu_id ? guest_bsp_vcpu : + guest_not_bsp_vcpu); return vm; } -static void add_x86_vcpu(struct kvm_vm *vm, uint32_t vcpuid, bool bsp_code) -{ - if (bsp_code) - vm_vcpu_add_default(vm, vcpuid, guest_bsp_vcpu); - else - vm_vcpu_add_default(vm, vcpuid, guest_not_bsp_vcpu); -} - -static void run_vm_bsp(uint32_t bsp_vcpu) +static void run_vm_bsp(uint32_t bsp_vcpu_id) { + struct kvm_vcpu *vcpus[2]; struct kvm_vm *vm; - bool is_bsp_vcpu1 = bsp_vcpu == VCPU_ID1; - vm = create_vm(); + vm = create_vm(ARRAY_SIZE(vcpus), bsp_vcpu_id, vcpus); - if (is_bsp_vcpu1) - vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID1); - - add_x86_vcpu(vm, VCPU_ID0, !is_bsp_vcpu1); - add_x86_vcpu(vm, VCPU_ID1, is_bsp_vcpu1); - - run_vcpu(vm, VCPU_ID0); - run_vcpu(vm, VCPU_ID1); + run_vcpu(vcpus[0]); + run_vcpu(vcpus[1]); kvm_vm_free(vm); } static void check_set_bsp_busy(void) { + struct kvm_vcpu *vcpus[2]; struct kvm_vm *vm; - int res; - vm = create_vm(); + vm = create_vm(ARRAY_SIZE(vcpus), 0, vcpus); - add_x86_vcpu(vm, VCPU_ID0, true); - add_x86_vcpu(vm, VCPU_ID1, false); + test_set_bsp_busy(vcpus[1], "after adding vcpu"); - res = _vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID1); - TEST_ASSERT(res == -1 && errno == EBUSY, "KVM_SET_BOOT_CPU_ID set after adding vcpu"); + run_vcpu(vcpus[0]); + run_vcpu(vcpus[1]); - run_vcpu(vm, VCPU_ID0); - run_vcpu(vm, VCPU_ID1); - - res = _vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID1); - TEST_ASSERT(res == -1 && errno == EBUSY, "KVM_SET_BOOT_CPU_ID set to a terminated vcpu"); + test_set_bsp_busy(vcpus[1], "to a terminated vcpu"); kvm_vm_free(vm); } int main(int argc, char *argv[]) { - if (!kvm_check_cap(KVM_CAP_SET_BOOT_CPU_ID)) { - print_skip("set_boot_cpu_id not available"); - return 0; - } + TEST_REQUIRE(kvm_has_cap(KVM_CAP_SET_BOOT_CPU_ID)); - run_vm_bsp(VCPU_ID0); - run_vm_bsp(VCPU_ID1); - run_vm_bsp(VCPU_ID0); + run_vm_bsp(0); + run_vm_bsp(1); + run_vm_bsp(0); check_set_bsp_busy(); } diff --git a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c index 318be0bf77ab..2bb08bf2125d 100644 --- a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c +++ b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c @@ -22,9 +22,7 @@ #include "kvm_util.h" #include "processor.h" -#define VCPU_ID 5 - -static void test_cr4_feature_bit(struct kvm_vm *vm, struct kvm_sregs *orig, +static void test_cr4_feature_bit(struct kvm_vcpu *vcpu, struct kvm_sregs *orig, uint64_t feature_bit) { struct kvm_sregs sregs; @@ -37,44 +35,40 @@ static void test_cr4_feature_bit(struct kvm_vm *vm, struct kvm_sregs *orig, memcpy(&sregs, orig, sizeof(sregs)); sregs.cr4 |= feature_bit; - rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs); + rc = _vcpu_sregs_set(vcpu, &sregs); TEST_ASSERT(rc, "KVM allowed unsupported CR4 bit (0x%lx)", feature_bit); /* Sanity check that KVM didn't change anything. */ - vcpu_sregs_get(vm, VCPU_ID, &sregs); + vcpu_sregs_get(vcpu, &sregs); TEST_ASSERT(!memcmp(&sregs, orig, sizeof(sregs)), "KVM modified sregs"); } -static uint64_t calc_cr4_feature_bits(struct kvm_vm *vm) +static uint64_t calc_supported_cr4_feature_bits(void) { - struct kvm_cpuid_entry2 *cpuid_1, *cpuid_7; uint64_t cr4; - cpuid_1 = kvm_get_supported_cpuid_entry(1); - cpuid_7 = kvm_get_supported_cpuid_entry(7); - cr4 = X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT; - if (cpuid_7->ecx & CPUID_UMIP) + if (kvm_cpu_has(X86_FEATURE_UMIP)) cr4 |= X86_CR4_UMIP; - if (cpuid_7->ecx & CPUID_LA57) + if (kvm_cpu_has(X86_FEATURE_LA57)) cr4 |= X86_CR4_LA57; - if (cpuid_1->ecx & CPUID_VMX) + if (kvm_cpu_has(X86_FEATURE_VMX)) cr4 |= X86_CR4_VMXE; - if (cpuid_1->ecx & CPUID_SMX) + if (kvm_cpu_has(X86_FEATURE_SMX)) cr4 |= X86_CR4_SMXE; - if (cpuid_7->ebx & CPUID_FSGSBASE) + if (kvm_cpu_has(X86_FEATURE_FSGSBASE)) cr4 |= X86_CR4_FSGSBASE; - if (cpuid_1->ecx & CPUID_PCID) + if (kvm_cpu_has(X86_FEATURE_PCID)) cr4 |= X86_CR4_PCIDE; - if (cpuid_1->ecx & CPUID_XSAVE) + if (kvm_cpu_has(X86_FEATURE_XSAVE)) cr4 |= X86_CR4_OSXSAVE; - if (cpuid_7->ebx & CPUID_SMEP) + if (kvm_cpu_has(X86_FEATURE_SMEP)) cr4 |= X86_CR4_SMEP; - if (cpuid_7->ebx & CPUID_SMAP) + if (kvm_cpu_has(X86_FEATURE_SMAP)) cr4 |= X86_CR4_SMAP; - if (cpuid_7->ecx & CPUID_PKU) + if (kvm_cpu_has(X86_FEATURE_PKU)) cr4 |= X86_CR4_PKE; return cr4; @@ -83,6 +77,7 @@ static uint64_t calc_cr4_feature_bits(struct kvm_vm *vm) int main(int argc, char *argv[]) { struct kvm_sregs sregs; + struct kvm_vcpu *vcpu; struct kvm_vm *vm; uint64_t cr4; int rc; @@ -95,44 +90,44 @@ int main(int argc, char *argv[]) * use it to verify all supported CR4 bits can be set prior to defining * the vCPU model, i.e. without doing KVM_SET_CPUID2. */ - vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); - vm_vcpu_add(vm, VCPU_ID); + vm = vm_create_barebones(); + vcpu = __vm_vcpu_add(vm, 0); - vcpu_sregs_get(vm, VCPU_ID, &sregs); + vcpu_sregs_get(vcpu, &sregs); - sregs.cr4 |= calc_cr4_feature_bits(vm); + sregs.cr4 |= calc_supported_cr4_feature_bits(); cr4 = sregs.cr4; - rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs); + rc = _vcpu_sregs_set(vcpu, &sregs); TEST_ASSERT(!rc, "Failed to set supported CR4 bits (0x%lx)", cr4); - vcpu_sregs_get(vm, VCPU_ID, &sregs); + vcpu_sregs_get(vcpu, &sregs); TEST_ASSERT(sregs.cr4 == cr4, "sregs.CR4 (0x%llx) != CR4 (0x%lx)", sregs.cr4, cr4); /* Verify all unsupported features are rejected by KVM. */ - test_cr4_feature_bit(vm, &sregs, X86_CR4_UMIP); - test_cr4_feature_bit(vm, &sregs, X86_CR4_LA57); - test_cr4_feature_bit(vm, &sregs, X86_CR4_VMXE); - test_cr4_feature_bit(vm, &sregs, X86_CR4_SMXE); - test_cr4_feature_bit(vm, &sregs, X86_CR4_FSGSBASE); - test_cr4_feature_bit(vm, &sregs, X86_CR4_PCIDE); - test_cr4_feature_bit(vm, &sregs, X86_CR4_OSXSAVE); - test_cr4_feature_bit(vm, &sregs, X86_CR4_SMEP); - test_cr4_feature_bit(vm, &sregs, X86_CR4_SMAP); - test_cr4_feature_bit(vm, &sregs, X86_CR4_PKE); + test_cr4_feature_bit(vcpu, &sregs, X86_CR4_UMIP); + test_cr4_feature_bit(vcpu, &sregs, X86_CR4_LA57); + test_cr4_feature_bit(vcpu, &sregs, X86_CR4_VMXE); + test_cr4_feature_bit(vcpu, &sregs, X86_CR4_SMXE); + test_cr4_feature_bit(vcpu, &sregs, X86_CR4_FSGSBASE); + test_cr4_feature_bit(vcpu, &sregs, X86_CR4_PCIDE); + test_cr4_feature_bit(vcpu, &sregs, X86_CR4_OSXSAVE); + test_cr4_feature_bit(vcpu, &sregs, X86_CR4_SMEP); + test_cr4_feature_bit(vcpu, &sregs, X86_CR4_SMAP); + test_cr4_feature_bit(vcpu, &sregs, X86_CR4_PKE); kvm_vm_free(vm); /* Create a "real" VM and verify APIC_BASE can be set. */ - vm = vm_create_default(VCPU_ID, 0, NULL); + vm = vm_create_with_one_vcpu(&vcpu, NULL); - vcpu_sregs_get(vm, VCPU_ID, &sregs); + vcpu_sregs_get(vcpu, &sregs); sregs.apic_base = 1 << 10; - rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs); + rc = _vcpu_sregs_set(vcpu, &sregs); TEST_ASSERT(rc, "Set IA32_APIC_BASE to %llx (invalid)", sregs.apic_base); sregs.apic_base = 1 << 11; - rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs); + rc = _vcpu_sregs_set(vcpu, &sregs); TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)", sregs.apic_base); diff --git a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c index d1dc1acf997c..c7ef97561038 100644 --- a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c +++ b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c @@ -12,7 +12,6 @@ #include "processor.h" #include "svm_util.h" #include "kselftest.h" -#include "../lib/kvm_util_internal.h" #define SEV_POLICY_ES 0b100 @@ -54,10 +53,10 @@ static struct kvm_vm *sev_vm_create(bool es) struct kvm_sev_launch_start start = { 0 }; int i; - vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR); + vm = vm_create_barebones(); sev_ioctl(vm->fd, es ? KVM_SEV_ES_INIT : KVM_SEV_INIT, NULL); for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i) - vm_vcpu_add(vm, i); + __vm_vcpu_add(vm, i); if (es) start.policy |= SEV_POLICY_ES; sev_ioctl(vm->fd, KVM_SEV_LAUNCH_START, &start); @@ -71,32 +70,27 @@ static struct kvm_vm *aux_vm_create(bool with_vcpus) struct kvm_vm *vm; int i; - vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR); + vm = vm_create_barebones(); if (!with_vcpus) return vm; for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i) - vm_vcpu_add(vm, i); + __vm_vcpu_add(vm, i); return vm; } -static int __sev_migrate_from(int dst_fd, int src_fd) +static int __sev_migrate_from(struct kvm_vm *dst, struct kvm_vm *src) { - struct kvm_enable_cap cap = { - .cap = KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM, - .args = { src_fd } - }; - - return ioctl(dst_fd, KVM_ENABLE_CAP, &cap); + return __vm_enable_cap(dst, KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM, src->fd); } -static void sev_migrate_from(int dst_fd, int src_fd) +static void sev_migrate_from(struct kvm_vm *dst, struct kvm_vm *src) { int ret; - ret = __sev_migrate_from(dst_fd, src_fd); + ret = __sev_migrate_from(dst, src); TEST_ASSERT(!ret, "Migration failed, ret: %d, errno: %d\n", ret, errno); } @@ -111,13 +105,13 @@ static void test_sev_migrate_from(bool es) dst_vms[i] = aux_vm_create(true); /* Initial migration from the src to the first dst. */ - sev_migrate_from(dst_vms[0]->fd, src_vm->fd); + sev_migrate_from(dst_vms[0], src_vm); for (i = 1; i < NR_MIGRATE_TEST_VMS; i++) - sev_migrate_from(dst_vms[i]->fd, dst_vms[i - 1]->fd); + sev_migrate_from(dst_vms[i], dst_vms[i - 1]); /* Migrate the guest back to the original VM. */ - ret = __sev_migrate_from(src_vm->fd, dst_vms[NR_MIGRATE_TEST_VMS - 1]->fd); + ret = __sev_migrate_from(src_vm, dst_vms[NR_MIGRATE_TEST_VMS - 1]); TEST_ASSERT(ret == -1 && errno == EIO, "VM that was migrated from should be dead. ret %d, errno: %d\n", ret, errno); @@ -129,7 +123,7 @@ static void test_sev_migrate_from(bool es) struct locking_thread_input { struct kvm_vm *vm; - int source_fds[NR_LOCK_TESTING_THREADS]; + struct kvm_vm *source_vms[NR_LOCK_TESTING_THREADS]; }; static void *locking_test_thread(void *arg) @@ -139,7 +133,7 @@ static void *locking_test_thread(void *arg) for (i = 0; i < NR_LOCK_TESTING_ITERATIONS; ++i) { j = i % NR_LOCK_TESTING_THREADS; - __sev_migrate_from(input->vm->fd, input->source_fds[j]); + __sev_migrate_from(input->vm, input->source_vms[j]); } return NULL; @@ -153,11 +147,11 @@ static void test_sev_migrate_locking(void) for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i) { input[i].vm = sev_vm_create(/* es= */ false); - input[0].source_fds[i] = input[i].vm->fd; + input[0].source_vms[i] = input[i].vm; } for (i = 1; i < NR_LOCK_TESTING_THREADS; ++i) - memcpy(input[i].source_fds, input[0].source_fds, - sizeof(input[i].source_fds)); + memcpy(input[i].source_vms, input[0].source_vms, + sizeof(input[i].source_vms)); for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i) pthread_create(&pt[i], NULL, locking_test_thread, &input[i]); @@ -174,9 +168,9 @@ static void test_sev_migrate_parameters(void) *sev_es_vm_no_vmsa; int ret; - vm_no_vcpu = vm_create(VM_MODE_DEFAULT, 0, O_RDWR); + vm_no_vcpu = vm_create_barebones(); vm_no_sev = aux_vm_create(true); - ret = __sev_migrate_from(vm_no_vcpu->fd, vm_no_sev->fd); + ret = __sev_migrate_from(vm_no_vcpu, vm_no_sev); TEST_ASSERT(ret == -1 && errno == EINVAL, "Migrations require SEV enabled. ret %d, errno: %d\n", ret, errno); @@ -186,29 +180,29 @@ static void test_sev_migrate_parameters(void) sev_vm = sev_vm_create(/* es= */ false); sev_es_vm = sev_vm_create(/* es= */ true); - sev_es_vm_no_vmsa = vm_create(VM_MODE_DEFAULT, 0, O_RDWR); + sev_es_vm_no_vmsa = vm_create_barebones(); sev_ioctl(sev_es_vm_no_vmsa->fd, KVM_SEV_ES_INIT, NULL); - vm_vcpu_add(sev_es_vm_no_vmsa, 1); + __vm_vcpu_add(sev_es_vm_no_vmsa, 1); - ret = __sev_migrate_from(sev_vm->fd, sev_es_vm->fd); + ret = __sev_migrate_from(sev_vm, sev_es_vm); TEST_ASSERT( ret == -1 && errno == EINVAL, "Should not be able migrate to SEV enabled VM. ret: %d, errno: %d\n", ret, errno); - ret = __sev_migrate_from(sev_es_vm->fd, sev_vm->fd); + ret = __sev_migrate_from(sev_es_vm, sev_vm); TEST_ASSERT( ret == -1 && errno == EINVAL, "Should not be able migrate to SEV-ES enabled VM. ret: %d, errno: %d\n", ret, errno); - ret = __sev_migrate_from(vm_no_vcpu->fd, sev_es_vm->fd); + ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm); TEST_ASSERT( ret == -1 && errno == EINVAL, "SEV-ES migrations require same number of vCPUS. ret: %d, errno: %d\n", ret, errno); - ret = __sev_migrate_from(vm_no_vcpu->fd, sev_es_vm_no_vmsa->fd); + ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm_no_vmsa); TEST_ASSERT( ret == -1 && errno == EINVAL, "SEV-ES migrations require UPDATE_VMSA. ret %d, errno: %d\n", @@ -222,22 +216,17 @@ out: kvm_vm_free(vm_no_sev); } -static int __sev_mirror_create(int dst_fd, int src_fd) +static int __sev_mirror_create(struct kvm_vm *dst, struct kvm_vm *src) { - struct kvm_enable_cap cap = { - .cap = KVM_CAP_VM_COPY_ENC_CONTEXT_FROM, - .args = { src_fd } - }; - - return ioctl(dst_fd, KVM_ENABLE_CAP, &cap); + return __vm_enable_cap(dst, KVM_CAP_VM_COPY_ENC_CONTEXT_FROM, src->fd); } -static void sev_mirror_create(int dst_fd, int src_fd) +static void sev_mirror_create(struct kvm_vm *dst, struct kvm_vm *src) { int ret; - ret = __sev_mirror_create(dst_fd, src_fd); + ret = __sev_mirror_create(dst, src); TEST_ASSERT(!ret, "Copying context failed, ret: %d, errno: %d\n", ret, errno); } @@ -285,11 +274,11 @@ static void test_sev_mirror(bool es) src_vm = sev_vm_create(es); dst_vm = aux_vm_create(false); - sev_mirror_create(dst_vm->fd, src_vm->fd); + sev_mirror_create(dst_vm, src_vm); /* Check that we can complete creation of the mirror VM. */ for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i) - vm_vcpu_add(dst_vm, i); + __vm_vcpu_add(dst_vm, i); if (es) sev_ioctl(dst_vm->fd, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL); @@ -309,18 +298,18 @@ static void test_sev_mirror_parameters(void) vm_with_vcpu = aux_vm_create(true); vm_no_vcpu = aux_vm_create(false); - ret = __sev_mirror_create(sev_vm->fd, sev_vm->fd); + ret = __sev_mirror_create(sev_vm, sev_vm); TEST_ASSERT( ret == -1 && errno == EINVAL, "Should not be able copy context to self. ret: %d, errno: %d\n", ret, errno); - ret = __sev_mirror_create(vm_no_vcpu->fd, vm_with_vcpu->fd); + ret = __sev_mirror_create(vm_no_vcpu, vm_with_vcpu); TEST_ASSERT(ret == -1 && errno == EINVAL, "Copy context requires SEV enabled. ret %d, errno: %d\n", ret, errno); - ret = __sev_mirror_create(vm_with_vcpu->fd, sev_vm->fd); + ret = __sev_mirror_create(vm_with_vcpu, sev_vm); TEST_ASSERT( ret == -1 && errno == EINVAL, "SEV copy context requires no vCPUS on the destination. ret: %d, errno: %d\n", @@ -330,13 +319,13 @@ static void test_sev_mirror_parameters(void) goto out; sev_es_vm = sev_vm_create(/* es= */ true); - ret = __sev_mirror_create(sev_vm->fd, sev_es_vm->fd); + ret = __sev_mirror_create(sev_vm, sev_es_vm); TEST_ASSERT( ret == -1 && errno == EINVAL, "Should not be able copy context to SEV enabled VM. ret: %d, errno: %d\n", ret, errno); - ret = __sev_mirror_create(sev_es_vm->fd, sev_vm->fd); + ret = __sev_mirror_create(sev_es_vm, sev_vm); TEST_ASSERT( ret == -1 && errno == EINVAL, "Should not be able copy context to SEV-ES enabled VM. ret: %d, errno: %d\n", @@ -364,16 +353,16 @@ static void test_sev_move_copy(void) dst2_mirror_vm = aux_vm_create(false); dst3_mirror_vm = aux_vm_create(false); - sev_mirror_create(mirror_vm->fd, sev_vm->fd); + sev_mirror_create(mirror_vm, sev_vm); - sev_migrate_from(dst_mirror_vm->fd, mirror_vm->fd); - sev_migrate_from(dst_vm->fd, sev_vm->fd); + sev_migrate_from(dst_mirror_vm, mirror_vm); + sev_migrate_from(dst_vm, sev_vm); - sev_migrate_from(dst2_vm->fd, dst_vm->fd); - sev_migrate_from(dst2_mirror_vm->fd, dst_mirror_vm->fd); + sev_migrate_from(dst2_vm, dst_vm); + sev_migrate_from(dst2_mirror_vm, dst_mirror_vm); - sev_migrate_from(dst3_mirror_vm->fd, dst2_mirror_vm->fd); - sev_migrate_from(dst3_vm->fd, dst2_vm->fd); + sev_migrate_from(dst3_mirror_vm, dst2_mirror_vm); + sev_migrate_from(dst3_vm, dst2_vm); kvm_vm_free(dst_vm); kvm_vm_free(sev_vm); @@ -393,10 +382,10 @@ static void test_sev_move_copy(void) mirror_vm = aux_vm_create(false); dst_mirror_vm = aux_vm_create(false); - sev_mirror_create(mirror_vm->fd, sev_vm->fd); + sev_mirror_create(mirror_vm, sev_vm); - sev_migrate_from(dst_mirror_vm->fd, mirror_vm->fd); - sev_migrate_from(dst_vm->fd, sev_vm->fd); + sev_migrate_from(dst_mirror_vm, mirror_vm); + sev_migrate_from(dst_vm, sev_vm); kvm_vm_free(mirror_vm); kvm_vm_free(dst_mirror_vm); @@ -404,41 +393,25 @@ static void test_sev_move_copy(void) kvm_vm_free(sev_vm); } -#define X86_FEATURE_SEV (1 << 1) -#define X86_FEATURE_SEV_ES (1 << 3) - int main(int argc, char *argv[]) { - struct kvm_cpuid_entry2 *cpuid; + TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM)); + TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM)); - if (!kvm_check_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM) && - !kvm_check_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM)) { - print_skip("Capabilities not available"); - exit(KSFT_SKIP); - } + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV)); - cpuid = kvm_get_supported_cpuid_entry(0x80000000); - if (cpuid->eax < 0x8000001f) { - print_skip("AMD memory encryption not available"); - exit(KSFT_SKIP); - } - cpuid = kvm_get_supported_cpuid_entry(0x8000001f); - if (!(cpuid->eax & X86_FEATURE_SEV)) { - print_skip("AMD SEV not available"); - exit(KSFT_SKIP); - } - have_sev_es = !!(cpuid->eax & X86_FEATURE_SEV_ES); + have_sev_es = kvm_cpu_has(X86_FEATURE_SEV_ES); - if (kvm_check_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM)) { + if (kvm_has_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM)) { test_sev_migrate_from(/* es= */ false); if (have_sev_es) test_sev_migrate_from(/* es= */ true); test_sev_migrate_locking(); test_sev_migrate_parameters(); - if (kvm_check_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM)) + if (kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM)) test_sev_move_copy(); } - if (kvm_check_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM)) { + if (kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM)) { test_sev_mirror(/* es= */ false); if (have_sev_es) test_sev_mirror(/* es= */ true); diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c index b4e0c860769e..1f136a81858e 100644 --- a/tools/testing/selftests/kvm/x86_64/smm_test.c +++ b/tools/testing/selftests/kvm/x86_64/smm_test.c @@ -19,8 +19,6 @@ #include "vmx.h" #include "svm_util.h" -#define VCPU_ID 1 - #define SMRAM_SIZE 65536 #define SMRAM_MEMSLOT ((1 << 16) | 1) #define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE) @@ -85,7 +83,7 @@ static void guest_code(void *arg) sync_with_host(4); if (arg) { - if (cpu_has_svm()) { + if (this_cpu_has(X86_FEATURE_SVM)) { generic_svm_setup(svm, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); } else { @@ -101,7 +99,7 @@ static void guest_code(void *arg) sync_with_host(7); - if (cpu_has_svm()) { + if (this_cpu_has(X86_FEATURE_SVM)) { run_guest(svm->vmcb, svm->vmcb_gpa); run_guest(svm->vmcb, svm->vmcb_gpa); } else { @@ -116,22 +114,23 @@ static void guest_code(void *arg) sync_with_host(DONE); } -void inject_smi(struct kvm_vm *vm) +void inject_smi(struct kvm_vcpu *vcpu) { struct kvm_vcpu_events events; - vcpu_events_get(vm, VCPU_ID, &events); + vcpu_events_get(vcpu, &events); events.smi.pending = 1; events.flags |= KVM_VCPUEVENT_VALID_SMM; - vcpu_events_set(vm, VCPU_ID, &events); + vcpu_events_set(vcpu, &events); } int main(int argc, char *argv[]) { vm_vaddr_t nested_gva = 0; + struct kvm_vcpu *vcpu; struct kvm_regs regs; struct kvm_vm *vm; struct kvm_run *run; @@ -139,9 +138,9 @@ int main(int argc, char *argv[]) int stage, stage_reported; /* Create VM */ - vm = vm_create_default(VCPU_ID, 0, guest_code); + vm = vm_create_with_one_vcpu(&vcpu, guest_code); - run = vcpu_state(vm, VCPU_ID); + run = vcpu->run; vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, SMRAM_GPA, SMRAM_MEMSLOT, SMRAM_PAGES, 0); @@ -152,29 +151,29 @@ int main(int argc, char *argv[]) memcpy(addr_gpa2hva(vm, SMRAM_GPA) + 0x8000, smi_handler, sizeof(smi_handler)); - vcpu_set_msr(vm, VCPU_ID, MSR_IA32_SMBASE, SMRAM_GPA); + vcpu_set_msr(vcpu, MSR_IA32_SMBASE, SMRAM_GPA); - if (kvm_check_cap(KVM_CAP_NESTED_STATE)) { - if (nested_svm_supported()) + if (kvm_has_cap(KVM_CAP_NESTED_STATE)) { + if (kvm_cpu_has(X86_FEATURE_SVM)) vcpu_alloc_svm(vm, &nested_gva); - else if (nested_vmx_supported()) + else if (kvm_cpu_has(X86_FEATURE_VMX)) vcpu_alloc_vmx(vm, &nested_gva); } if (!nested_gva) pr_info("will skip SMM test with VMX enabled\n"); - vcpu_args_set(vm, VCPU_ID, 1, nested_gva); + vcpu_args_set(vcpu, 1, nested_gva); for (stage = 1;; stage++) { - _vcpu_run(vm, VCPU_ID); + vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Stage %d: unexpected exit reason: %u (%s),\n", stage, run->exit_reason, exit_reason_str(run->exit_reason)); memset(®s, 0, sizeof(regs)); - vcpu_regs_get(vm, VCPU_ID, ®s); + vcpu_regs_get(vcpu, ®s); stage_reported = regs.rax & 0xff; @@ -191,7 +190,7 @@ int main(int argc, char *argv[]) * return from it. Do not perform save/restore while in SMM yet. */ if (stage == 8) { - inject_smi(vm); + inject_smi(vcpu); continue; } @@ -200,15 +199,14 @@ int main(int argc, char *argv[]) * during L2 execution. */ if (stage == 10) - inject_smi(vm); + inject_smi(vcpu); - state = vcpu_save_state(vm, VCPU_ID); + state = vcpu_save_state(vcpu); kvm_vm_release(vm); - kvm_vm_restart(vm, O_RDWR); - vm_vcpu_add(vm, VCPU_ID); - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); - vcpu_load_state(vm, VCPU_ID, state); - run = vcpu_state(vm, VCPU_ID); + + vcpu = vm_recreate_with_one_vcpu(vm); + vcpu_load_state(vcpu, state); + run = vcpu->run; kvm_x86_state_cleanup(state); } diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c index 2e0a92da8ff5..ea578971fb9f 100644 --- a/tools/testing/selftests/kvm/x86_64/state_test.c +++ b/tools/testing/selftests/kvm/x86_64/state_test.c @@ -20,7 +20,6 @@ #include "vmx.h" #include "svm_util.h" -#define VCPU_ID 5 #define L2_GUEST_STACK_SIZE 256 void svm_l2_guest_code(void) @@ -143,7 +142,7 @@ static void __attribute__((__flatten__)) guest_code(void *arg) GUEST_SYNC(2); if (arg) { - if (cpu_has_svm()) + if (this_cpu_has(X86_FEATURE_SVM)) svm_l1_guest_code(arg); else vmx_l1_guest_code(arg); @@ -157,6 +156,7 @@ int main(int argc, char *argv[]) vm_vaddr_t nested_gva = 0; struct kvm_regs regs1, regs2; + struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct kvm_run *run; struct kvm_x86_state *state; @@ -164,34 +164,33 @@ int main(int argc, char *argv[]) int stage; /* Create VM */ - vm = vm_create_default(VCPU_ID, 0, guest_code); - run = vcpu_state(vm, VCPU_ID); + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + run = vcpu->run; - vcpu_regs_get(vm, VCPU_ID, ®s1); + vcpu_regs_get(vcpu, ®s1); - if (kvm_check_cap(KVM_CAP_NESTED_STATE)) { - if (nested_svm_supported()) + if (kvm_has_cap(KVM_CAP_NESTED_STATE)) { + if (kvm_cpu_has(X86_FEATURE_SVM)) vcpu_alloc_svm(vm, &nested_gva); - else if (nested_vmx_supported()) + else if (kvm_cpu_has(X86_FEATURE_VMX)) vcpu_alloc_vmx(vm, &nested_gva); } if (!nested_gva) pr_info("will skip nested state checks\n"); - vcpu_args_set(vm, VCPU_ID, 1, nested_gva); + vcpu_args_set(vcpu, 1, nested_gva); for (stage = 1;; stage++) { - _vcpu_run(vm, VCPU_ID); + vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Stage %d: unexpected exit reason: %u (%s),\n", stage, run->exit_reason, exit_reason_str(run->exit_reason)); - switch (get_ucall(vm, VCPU_ID, &uc)) { + switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: - TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], - __FILE__, uc.args[1]); + REPORT_GUEST_ASSERT(uc); /* NOT REACHED */ case UCALL_SYNC: break; @@ -206,22 +205,20 @@ int main(int argc, char *argv[]) uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx", stage, (ulong)uc.args[1]); - state = vcpu_save_state(vm, VCPU_ID); + state = vcpu_save_state(vcpu); memset(®s1, 0, sizeof(regs1)); - vcpu_regs_get(vm, VCPU_ID, ®s1); + vcpu_regs_get(vcpu, ®s1); kvm_vm_release(vm); /* Restore state in a new VM. */ - kvm_vm_restart(vm, O_RDWR); - vm_vcpu_add(vm, VCPU_ID); - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); - vcpu_load_state(vm, VCPU_ID, state); - run = vcpu_state(vm, VCPU_ID); + vcpu = vm_recreate_with_one_vcpu(vm); + vcpu_load_state(vcpu, state); + run = vcpu->run; kvm_x86_state_cleanup(state); memset(®s2, 0, sizeof(regs2)); - vcpu_regs_get(vm, VCPU_ID, ®s2); + vcpu_regs_get(vcpu, ®s2); TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)), "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx", (ulong) regs2.rdi, (ulong) regs2.rsi); diff --git a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c index 30a81038df46..4a07ba227b99 100644 --- a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c +++ b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c @@ -13,10 +13,6 @@ #include "svm_util.h" #include "apic.h" -#define VCPU_ID 0 - -static struct kvm_vm *vm; - bool vintr_irq_called; bool intr_irq_called; @@ -88,33 +84,36 @@ static void l1_guest_code(struct svm_test_data *svm) int main(int argc, char *argv[]) { + struct kvm_vcpu *vcpu; + struct kvm_run *run; vm_vaddr_t svm_gva; + struct kvm_vm *vm; + struct ucall uc; - nested_svm_check_supported(); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); - vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code); + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vm, VCPU_ID); + vcpu_init_descriptor_tables(vcpu); vm_install_exception_handler(vm, VINTR_IRQ_NUMBER, vintr_irq_handler); vm_install_exception_handler(vm, INTR_IRQ_NUMBER, intr_irq_handler); vcpu_alloc_svm(vm, &svm_gva); - vcpu_args_set(vm, VCPU_ID, 1, svm_gva); + vcpu_args_set(vcpu, 1, svm_gva); - struct kvm_run *run = vcpu_state(vm, VCPU_ID); - struct ucall uc; + run = vcpu->run; - vcpu_run(vm, VCPU_ID); + vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n", run->exit_reason, exit_reason_str(run->exit_reason)); - switch (get_ucall(vm, VCPU_ID, &uc)) { + switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: - TEST_FAIL("%s", (const char *)uc.args[0]); + REPORT_GUEST_ASSERT(uc); break; /* NOT REACHED */ case UCALL_DONE: diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c new file mode 100644 index 000000000000..e637d7736012 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c @@ -0,0 +1,211 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2022 Oracle and/or its affiliates. + * + * Based on: + * svm_int_ctl_test + * + * Copyright (C) 2021, Red Hat, Inc. + * + */ + +#include <stdatomic.h> +#include <stdio.h> +#include <unistd.h> +#include "apic.h" +#include "kvm_util.h" +#include "processor.h" +#include "svm_util.h" +#include "test_util.h" + +#define INT_NR 0x20 + +static_assert(ATOMIC_INT_LOCK_FREE == 2, "atomic int is not lockless"); + +static unsigned int bp_fired; +static void guest_bp_handler(struct ex_regs *regs) +{ + bp_fired++; +} + +static unsigned int int_fired; +static void l2_guest_code_int(void); + +static void guest_int_handler(struct ex_regs *regs) +{ + int_fired++; + GUEST_ASSERT_2(regs->rip == (unsigned long)l2_guest_code_int, + regs->rip, (unsigned long)l2_guest_code_int); +} + +static void l2_guest_code_int(void) +{ + GUEST_ASSERT_1(int_fired == 1, int_fired); + vmmcall(); + ud2(); + + GUEST_ASSERT_1(bp_fired == 1, bp_fired); + hlt(); +} + +static atomic_int nmi_stage; +#define nmi_stage_get() atomic_load_explicit(&nmi_stage, memory_order_acquire) +#define nmi_stage_inc() atomic_fetch_add_explicit(&nmi_stage, 1, memory_order_acq_rel) +static void guest_nmi_handler(struct ex_regs *regs) +{ + nmi_stage_inc(); + + if (nmi_stage_get() == 1) { + vmmcall(); + GUEST_ASSERT(false); + } else { + GUEST_ASSERT_1(nmi_stage_get() == 3, nmi_stage_get()); + GUEST_DONE(); + } +} + +static void l2_guest_code_nmi(void) +{ + ud2(); +} + +static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t idt_alt) +{ + #define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + struct vmcb *vmcb = svm->vmcb; + + if (is_nmi) + x2apic_enable(); + + /* Prepare for L2 execution. */ + generic_svm_setup(svm, + is_nmi ? l2_guest_code_nmi : l2_guest_code_int, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + vmcb->control.intercept_exceptions |= BIT(PF_VECTOR) | BIT(UD_VECTOR); + vmcb->control.intercept |= BIT(INTERCEPT_NMI) | BIT(INTERCEPT_HLT); + + if (is_nmi) { + vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI; + } else { + vmcb->control.event_inj = INT_NR | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_SOFT; + /* The return address pushed on stack */ + vmcb->control.next_rip = vmcb->save.rip; + } + + run_guest(vmcb, svm->vmcb_gpa); + GUEST_ASSERT_3(vmcb->control.exit_code == SVM_EXIT_VMMCALL, + vmcb->control.exit_code, + vmcb->control.exit_info_1, vmcb->control.exit_info_2); + + if (is_nmi) { + clgi(); + x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_NMI); + + GUEST_ASSERT_1(nmi_stage_get() == 1, nmi_stage_get()); + nmi_stage_inc(); + + stgi(); + /* self-NMI happens here */ + while (true) + cpu_relax(); + } + + /* Skip over VMMCALL */ + vmcb->save.rip += 3; + + /* Switch to alternate IDT to cause intervening NPF again */ + vmcb->save.idtr.base = idt_alt; + vmcb->control.clean = 0; /* &= ~BIT(VMCB_DT) would be enough */ + + vmcb->control.event_inj = BP_VECTOR | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT; + /* The return address pushed on stack, skip over UD2 */ + vmcb->control.next_rip = vmcb->save.rip + 2; + + run_guest(vmcb, svm->vmcb_gpa); + GUEST_ASSERT_3(vmcb->control.exit_code == SVM_EXIT_HLT, + vmcb->control.exit_code, + vmcb->control.exit_info_1, vmcb->control.exit_info_2); + + GUEST_DONE(); +} + +static void run_test(bool is_nmi) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + vm_vaddr_t svm_gva; + vm_vaddr_t idt_alt_vm; + struct kvm_guest_debug debug; + + pr_info("Running %s test\n", is_nmi ? "NMI" : "soft int"); + + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vcpu); + + vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler); + vm_install_exception_handler(vm, BP_VECTOR, guest_bp_handler); + vm_install_exception_handler(vm, INT_NR, guest_int_handler); + + vcpu_alloc_svm(vm, &svm_gva); + + if (!is_nmi) { + void *idt, *idt_alt; + + idt_alt_vm = vm_vaddr_alloc_page(vm); + idt_alt = addr_gva2hva(vm, idt_alt_vm); + idt = addr_gva2hva(vm, vm->idt); + memcpy(idt_alt, idt, getpagesize()); + } else { + idt_alt_vm = 0; + } + vcpu_args_set(vcpu, 3, svm_gva, (uint64_t)is_nmi, (uint64_t)idt_alt_vm); + + memset(&debug, 0, sizeof(debug)); + vcpu_guest_debug_set(vcpu, &debug); + + struct kvm_run *run = vcpu->run; + struct ucall uc; + + alarm(2); + vcpu_run(vcpu); + alarm(0); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT_3(uc, "vals = 0x%lx 0x%lx 0x%lx"); + break; + /* NOT REACHED */ + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd); + } +done: + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + /* Tell stdout not to buffer its content */ + setbuf(stdout, NULL); + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); + + TEST_ASSERT(kvm_cpu_has(X86_FEATURE_NRIPS), + "KVM with nSVM is supposed to unconditionally advertise nRIP Save"); + + atomic_init(&nmi_stage, 0); + + run_test(false); + run_test(true); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c index be2ca157485b..c3ac45df7483 100644 --- a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c +++ b/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c @@ -12,10 +12,6 @@ #include "processor.h" #include "svm_util.h" -#define VCPU_ID 5 - -static struct kvm_vm *vm; - static void l2_guest_code(struct svm_test_data *svm) { __asm__ __volatile__("vmcall"); @@ -39,28 +35,30 @@ static void l1_guest_code(struct svm_test_data *svm) int main(int argc, char *argv[]) { + struct kvm_vcpu *vcpu; vm_vaddr_t svm_gva; + struct kvm_vm *vm; - nested_svm_check_supported(); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); - vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code); + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); vcpu_alloc_svm(vm, &svm_gva); - vcpu_args_set(vm, VCPU_ID, 1, svm_gva); + vcpu_args_set(vcpu, 1, svm_gva); for (;;) { - volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); + volatile struct kvm_run *run = vcpu->run; struct ucall uc; - vcpu_run(vm, VCPU_ID); + vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n", run->exit_reason, exit_reason_str(run->exit_reason)); - switch (get_ucall(vm, VCPU_ID, &uc)) { + switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: - TEST_FAIL("%s", (const char *)uc.args[0]); + REPORT_GUEST_ASSERT(uc); /* NOT REACHED */ case UCALL_SYNC: break; diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c index fc03a150278d..9b6db0b0b13e 100644 --- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c +++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c @@ -20,8 +20,6 @@ #include "kvm_util.h" #include "processor.h" -#define VCPU_ID 5 - #define UCALL_PIO_PORT ((uint16_t)0x1000) struct ucall uc_none = { @@ -84,6 +82,7 @@ static void compare_vcpu_events(struct kvm_vcpu_events *left, int main(int argc, char *argv[]) { + struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct kvm_run *run; struct kvm_regs regs; @@ -95,66 +94,59 @@ int main(int argc, char *argv[]) setbuf(stdout, NULL); cap = kvm_check_cap(KVM_CAP_SYNC_REGS); - if ((cap & TEST_SYNC_FIELDS) != TEST_SYNC_FIELDS) { - print_skip("KVM_CAP_SYNC_REGS not supported"); - exit(KSFT_SKIP); - } - if ((cap & INVALID_SYNC_FIELD) != 0) { - print_skip("The \"invalid\" field is not invalid"); - exit(KSFT_SKIP); - } + TEST_REQUIRE((cap & TEST_SYNC_FIELDS) == TEST_SYNC_FIELDS); + TEST_REQUIRE(!(cap & INVALID_SYNC_FIELD)); - /* Create VM */ - vm = vm_create_default(VCPU_ID, 0, guest_code); + vm = vm_create_with_one_vcpu(&vcpu, guest_code); - run = vcpu_state(vm, VCPU_ID); + run = vcpu->run; /* Request reading invalid register set from VCPU. */ run->kvm_valid_regs = INVALID_SYNC_FIELD; - rv = _vcpu_run(vm, VCPU_ID); + rv = _vcpu_run(vcpu); TEST_ASSERT(rv < 0 && errno == EINVAL, "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n", rv); - vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0; + run->kvm_valid_regs = 0; run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; - rv = _vcpu_run(vm, VCPU_ID); + rv = _vcpu_run(vcpu); TEST_ASSERT(rv < 0 && errno == EINVAL, "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n", rv); - vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0; + run->kvm_valid_regs = 0; /* Request setting invalid register set into VCPU. */ run->kvm_dirty_regs = INVALID_SYNC_FIELD; - rv = _vcpu_run(vm, VCPU_ID); + rv = _vcpu_run(vcpu); TEST_ASSERT(rv < 0 && errno == EINVAL, "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n", rv); - vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0; + run->kvm_dirty_regs = 0; run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; - rv = _vcpu_run(vm, VCPU_ID); + rv = _vcpu_run(vcpu); TEST_ASSERT(rv < 0 && errno == EINVAL, "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n", rv); - vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0; + run->kvm_dirty_regs = 0; /* Request and verify all valid register sets. */ /* TODO: BUILD TIME CHECK: TEST_ASSERT(KVM_SYNC_X86_NUM_FIELDS != 3); */ run->kvm_valid_regs = TEST_SYNC_FIELDS; - rv = _vcpu_run(vm, VCPU_ID); + rv = _vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Unexpected exit reason: %u (%s),\n", run->exit_reason, exit_reason_str(run->exit_reason)); - vcpu_regs_get(vm, VCPU_ID, ®s); + vcpu_regs_get(vcpu, ®s); compare_regs(®s, &run->s.regs.regs); - vcpu_sregs_get(vm, VCPU_ID, &sregs); + vcpu_sregs_get(vcpu, &sregs); compare_sregs(&sregs, &run->s.regs.sregs); - vcpu_events_get(vm, VCPU_ID, &events); + vcpu_events_get(vcpu, &events); compare_vcpu_events(&events, &run->s.regs.events); /* Set and verify various register values. */ @@ -164,7 +156,7 @@ int main(int argc, char *argv[]) run->kvm_valid_regs = TEST_SYNC_FIELDS; run->kvm_dirty_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS; - rv = _vcpu_run(vm, VCPU_ID); + rv = _vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Unexpected exit reason: %u (%s),\n", run->exit_reason, @@ -176,13 +168,13 @@ int main(int argc, char *argv[]) "apic_base sync regs value incorrect 0x%llx.", run->s.regs.sregs.apic_base); - vcpu_regs_get(vm, VCPU_ID, ®s); + vcpu_regs_get(vcpu, ®s); compare_regs(®s, &run->s.regs.regs); - vcpu_sregs_get(vm, VCPU_ID, &sregs); + vcpu_sregs_get(vcpu, &sregs); compare_sregs(&sregs, &run->s.regs.sregs); - vcpu_events_get(vm, VCPU_ID, &events); + vcpu_events_get(vcpu, &events); compare_vcpu_events(&events, &run->s.regs.events); /* Clear kvm_dirty_regs bits, verify new s.regs values are @@ -191,7 +183,7 @@ int main(int argc, char *argv[]) run->kvm_valid_regs = TEST_SYNC_FIELDS; run->kvm_dirty_regs = 0; run->s.regs.regs.rbx = 0xDEADBEEF; - rv = _vcpu_run(vm, VCPU_ID); + rv = _vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Unexpected exit reason: %u (%s),\n", run->exit_reason, @@ -208,8 +200,8 @@ int main(int argc, char *argv[]) run->kvm_dirty_regs = 0; run->s.regs.regs.rbx = 0xAAAA; regs.rbx = 0xBAC0; - vcpu_regs_set(vm, VCPU_ID, ®s); - rv = _vcpu_run(vm, VCPU_ID); + vcpu_regs_set(vcpu, ®s); + rv = _vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Unexpected exit reason: %u (%s),\n", run->exit_reason, @@ -217,7 +209,7 @@ int main(int argc, char *argv[]) TEST_ASSERT(run->s.regs.regs.rbx == 0xAAAA, "rbx sync regs value incorrect 0x%llx.", run->s.regs.regs.rbx); - vcpu_regs_get(vm, VCPU_ID, ®s); + vcpu_regs_get(vcpu, ®s); TEST_ASSERT(regs.rbx == 0xBAC0 + 1, "rbx guest value incorrect 0x%llx.", regs.rbx); @@ -229,7 +221,7 @@ int main(int argc, char *argv[]) run->kvm_valid_regs = 0; run->kvm_dirty_regs = TEST_SYNC_FIELDS; run->s.regs.regs.rbx = 0xBBBB; - rv = _vcpu_run(vm, VCPU_ID); + rv = _vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Unexpected exit reason: %u (%s),\n", run->exit_reason, @@ -237,7 +229,7 @@ int main(int argc, char *argv[]) TEST_ASSERT(run->s.regs.regs.rbx == 0xBBBB, "rbx sync regs value incorrect 0x%llx.", run->s.regs.regs.rbx); - vcpu_regs_get(vm, VCPU_ID, ®s); + vcpu_regs_get(vcpu, ®s); TEST_ASSERT(regs.rbx == 0xBBBB + 1, "rbx guest value incorrect 0x%llx.", regs.rbx); diff --git a/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c b/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c new file mode 100644 index 000000000000..70b44f0b52fe --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" + +#include <string.h> +#include <sys/ioctl.h> + +#include "kselftest.h" + +#define ARBITRARY_IO_PORT 0x2000 + +/* The virtual machine object. */ +static struct kvm_vm *vm; + +static void l2_guest_code(void) +{ + asm volatile("inb %%dx, %%al" + : : [port] "d" (ARBITRARY_IO_PORT) : "rax"); +} + +void l1_guest_code(struct vmx_pages *vmx) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + GUEST_ASSERT(vmx->vmcs_gpa); + GUEST_ASSERT(prepare_for_vmx_operation(vmx)); + GUEST_ASSERT(load_vmcs(vmx)); + + prepare_vmcs(vmx, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + GUEST_ASSERT(!vmlaunch()); + /* L2 should triple fault after a triple fault event injected. */ + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_TRIPLE_FAULT); + GUEST_DONE(); +} + +int main(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_run *run; + struct kvm_vcpu_events events; + vm_vaddr_t vmx_pages_gva; + struct ucall uc; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_TRIPLE_FAULT_EVENT)); + + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); + vm_enable_cap(vm, KVM_CAP_X86_TRIPLE_FAULT_EVENT, 1); + + run = vcpu->run; + vcpu_alloc_vmx(vm, &vmx_pages_gva); + vcpu_args_set(vcpu, 1, vmx_pages_gva); + vcpu_run(vcpu); + + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Expected KVM_EXIT_IO, got: %u (%s)\n", + run->exit_reason, exit_reason_str(run->exit_reason)); + TEST_ASSERT(run->io.port == ARBITRARY_IO_PORT, + "Expected IN from port %d from L2, got port %d", + ARBITRARY_IO_PORT, run->io.port); + vcpu_events_get(vcpu, &events); + events.flags |= KVM_VCPUEVENT_VALID_TRIPLE_FAULT; + events.triple_fault.pending = true; + vcpu_events_set(vcpu, &events); + run->immediate_exit = true; + vcpu_run_complete_io(vcpu); + + vcpu_events_get(vcpu, &events); + TEST_ASSERT(events.flags & KVM_VCPUEVENT_VALID_TRIPLE_FAULT, + "Triple fault event invalid"); + TEST_ASSERT(events.triple_fault.pending, + "No triple fault pending"); + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_DONE: + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + default: + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); + } + +} diff --git a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c index a426078b16a3..22d366c697f7 100644 --- a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c +++ b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c @@ -9,14 +9,12 @@ #include "kvm_util.h" #include "processor.h" -#define VCPU_ID 0 - #define UNITY (1ull << 30) #define HOST_ADJUST (UNITY * 64) #define GUEST_STEP (UNITY * 4) #define ROUND(x) ((x + UNITY / 2) & -UNITY) #define rounded_rdmsr(x) ROUND(rdmsr(x)) -#define rounded_host_rdmsr(x) ROUND(vcpu_get_msr(vm, 0, x)) +#define rounded_host_rdmsr(x) ROUND(vcpu_get_msr(vcpu, x)) static void guest_code(void) { @@ -66,15 +64,13 @@ static void guest_code(void) GUEST_DONE(); } -static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid, int stage) +static void run_vcpu(struct kvm_vcpu *vcpu, int stage) { struct ucall uc; - vcpu_args_set(vm, vcpuid, 1, vcpuid); - - vcpu_ioctl(vm, vcpuid, KVM_RUN, NULL); + vcpu_run(vcpu); - switch (get_ucall(vm, vcpuid, &uc)) { + switch (get_ucall(vcpu, &uc)) { case UCALL_SYNC: TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && uc.args[1] == stage + 1, "Stage %d: Unexpected register values vmexit, got %lx", @@ -83,34 +79,33 @@ static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid, int stage) case UCALL_DONE: return; case UCALL_ABORT: - TEST_ASSERT(false, "%s at %s:%ld\n" \ - "\tvalues: %#lx, %#lx", (const char *)uc.args[0], - __FILE__, uc.args[1], uc.args[2], uc.args[3]); + REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx"); default: TEST_ASSERT(false, "Unexpected exit: %s", - exit_reason_str(vcpu_state(vm, vcpuid)->exit_reason)); + exit_reason_str(vcpu->run->exit_reason)); } } int main(void) { + struct kvm_vcpu *vcpu; struct kvm_vm *vm; uint64_t val; - vm = vm_create_default(VCPU_ID, 0, guest_code); + vm = vm_create_with_one_vcpu(&vcpu, guest_code); val = 0; ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); /* Guest: writes to MSR_IA32_TSC affect both MSRs. */ - run_vcpu(vm, VCPU_ID, 1); + run_vcpu(vcpu, 1); val = 1ull * GUEST_STEP; ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); /* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs. */ - run_vcpu(vm, VCPU_ID, 2); + run_vcpu(vcpu, 2); val = 2ull * GUEST_STEP; ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); @@ -119,18 +114,18 @@ int main(void) * Host: writes to MSR_IA32_TSC set the host-side offset * and therefore do not change MSR_IA32_TSC_ADJUST. */ - vcpu_set_msr(vm, 0, MSR_IA32_TSC, HOST_ADJUST + val); + vcpu_set_msr(vcpu, MSR_IA32_TSC, HOST_ADJUST + val); ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); - run_vcpu(vm, VCPU_ID, 3); + run_vcpu(vcpu, 3); /* Host: writes to MSR_IA32_TSC_ADJUST do not modify the TSC. */ - vcpu_set_msr(vm, 0, MSR_IA32_TSC_ADJUST, UNITY * 123456); + vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, UNITY * 123456); ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); - ASSERT_EQ(vcpu_get_msr(vm, 0, MSR_IA32_TSC_ADJUST), UNITY * 123456); + ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_TSC_ADJUST), UNITY * 123456); /* Restore previous value. */ - vcpu_set_msr(vm, 0, MSR_IA32_TSC_ADJUST, val); + vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, val); ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); @@ -138,7 +133,7 @@ int main(void) * Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the * host-side offset and affect both MSRs. */ - run_vcpu(vm, VCPU_ID, 4); + run_vcpu(vcpu, 4); val = 3ull * GUEST_STEP; ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); @@ -147,7 +142,7 @@ int main(void) * Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side * offset is now visible in MSR_IA32_TSC_ADJUST. */ - run_vcpu(vm, VCPU_ID, 5); + run_vcpu(vcpu, 5); val = 4ull * GUEST_STEP; ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST); diff --git a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c b/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c index f0083d8cfe98..47139aab7408 100644 --- a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c +++ b/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c @@ -46,38 +46,40 @@ static void guest_code(void) static void *run_vcpu(void *_cpu_nr) { - unsigned long cpu = (unsigned long)_cpu_nr; + unsigned long vcpu_id = (unsigned long)_cpu_nr; unsigned long failures = 0; static bool first_cpu_done; + struct kvm_vcpu *vcpu; - /* The kernel is fine, but vm_vcpu_add_default() needs locking */ + /* The kernel is fine, but vm_vcpu_add() needs locking */ pthread_spin_lock(&create_lock); - vm_vcpu_add_default(vm, cpu, guest_code); + vcpu = vm_vcpu_add(vm, vcpu_id, guest_code); if (!first_cpu_done) { first_cpu_done = true; - vcpu_set_msr(vm, cpu, MSR_IA32_TSC, TEST_TSC_OFFSET); + vcpu_set_msr(vcpu, MSR_IA32_TSC, TEST_TSC_OFFSET); } pthread_spin_unlock(&create_lock); for (;;) { - volatile struct kvm_run *run = vcpu_state(vm, cpu); + volatile struct kvm_run *run = vcpu->run; struct ucall uc; - vcpu_run(vm, cpu); + vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n", run->exit_reason, exit_reason_str(run->exit_reason)); - switch (get_ucall(vm, cpu, &uc)) { + switch (get_ucall(vcpu, &uc)) { case UCALL_DONE: goto out; case UCALL_SYNC: - printf("Guest %ld sync %lx %lx %ld\n", cpu, uc.args[2], uc.args[3], uc.args[2] - uc.args[3]); + printf("Guest %d sync %lx %lx %ld\n", vcpu->id, + uc.args[2], uc.args[3], uc.args[2] - uc.args[3]); failures++; break; @@ -91,12 +93,9 @@ static void *run_vcpu(void *_cpu_nr) int main(int argc, char *argv[]) { - if (!kvm_check_cap(KVM_CAP_VM_TSC_CONTROL)) { - print_skip("KVM_CAP_VM_TSC_CONTROL not available"); - exit(KSFT_SKIP); - } + TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_TSC_CONTROL)); - vm = vm_create_default_with_vcpus(0, DEFAULT_STACK_PGS * NR_TEST_VCPUS, 0, guest_code, NULL); + vm = vm_create(NR_TEST_VCPUS); vm_ioctl(vm, KVM_SET_TSC_KHZ, (void *) TEST_TSC_KHZ); pthread_spin_init(&create_lock, PTHREAD_PROCESS_PRIVATE); diff --git a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c b/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c new file mode 100644 index 000000000000..a897c7fd8abe --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c @@ -0,0 +1,316 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ucna_injection_test + * + * Copyright (C) 2022, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * Test that user space can inject UnCorrectable No Action required (UCNA) + * memory errors to the guest. + * + * The test starts one vCPU with the MCG_CMCI_P enabled. It verifies that + * proper UCNA errors can be injected to a vCPU with MCG_CMCI_P and + * corresponding per-bank control register (MCI_CTL2) bit enabled. + * The test also checks that the UCNA errors get recorded in the + * Machine Check bank registers no matter the error signal interrupts get + * delivered into the guest or not. + * + */ + +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include <pthread.h> +#include <inttypes.h> +#include <string.h> +#include <time.h> + +#include "kvm_util_base.h" +#include "kvm_util.h" +#include "mce.h" +#include "processor.h" +#include "test_util.h" +#include "apic.h" + +#define SYNC_FIRST_UCNA 9 +#define SYNC_SECOND_UCNA 10 +#define SYNC_GP 11 +#define FIRST_UCNA_ADDR 0xdeadbeef +#define SECOND_UCNA_ADDR 0xcafeb0ba + +/* + * Vector for the CMCI interrupt. + * Value is arbitrary. Any value in 0x20-0xFF should work: + * https://wiki.osdev.org/Interrupt_Vector_Table + */ +#define CMCI_VECTOR 0xa9 + +#define UCNA_BANK 0x7 // IMC0 bank + +#define MCI_CTL2_RESERVED_BIT BIT_ULL(29) + +static uint64_t supported_mcg_caps; + +/* + * Record states about the injected UCNA. + * The variables started with the 'i_' prefixes are recorded in interrupt + * handler. Variables without the 'i_' prefixes are recorded in guest main + * execution thread. + */ +static volatile uint64_t i_ucna_rcvd; +static volatile uint64_t i_ucna_addr; +static volatile uint64_t ucna_addr; +static volatile uint64_t ucna_addr2; + +struct thread_params { + struct kvm_vcpu *vcpu; + uint64_t *p_i_ucna_rcvd; + uint64_t *p_i_ucna_addr; + uint64_t *p_ucna_addr; + uint64_t *p_ucna_addr2; +}; + +static void verify_apic_base_addr(void) +{ + uint64_t msr = rdmsr(MSR_IA32_APICBASE); + uint64_t base = GET_APIC_BASE(msr); + + GUEST_ASSERT(base == APIC_DEFAULT_GPA); +} + +static void ucna_injection_guest_code(void) +{ + uint64_t ctl2; + verify_apic_base_addr(); + xapic_enable(); + + /* Sets up the interrupt vector and enables per-bank CMCI sigaling. */ + xapic_write_reg(APIC_LVTCMCI, CMCI_VECTOR | APIC_DM_FIXED); + ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK)); + wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN); + + /* Enables interrupt in guest. */ + asm volatile("sti"); + + /* Let user space inject the first UCNA */ + GUEST_SYNC(SYNC_FIRST_UCNA); + + ucna_addr = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK)); + + /* Disables the per-bank CMCI signaling. */ + ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK)); + wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 & ~MCI_CTL2_CMCI_EN); + + /* Let the user space inject the second UCNA */ + GUEST_SYNC(SYNC_SECOND_UCNA); + + ucna_addr2 = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK)); + GUEST_DONE(); +} + +static void cmci_disabled_guest_code(void) +{ + uint64_t ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK)); + wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN); + + GUEST_DONE(); +} + +static void cmci_enabled_guest_code(void) +{ + uint64_t ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK)); + wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_RESERVED_BIT); + + GUEST_DONE(); +} + +static void guest_cmci_handler(struct ex_regs *regs) +{ + i_ucna_rcvd++; + i_ucna_addr = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK)); + xapic_write_reg(APIC_EOI, 0); +} + +static void guest_gp_handler(struct ex_regs *regs) +{ + GUEST_SYNC(SYNC_GP); +} + +static void run_vcpu_expect_gp(struct kvm_vcpu *vcpu) +{ + unsigned int exit_reason; + struct ucall uc; + + vcpu_run(vcpu); + + exit_reason = vcpu->run->exit_reason; + TEST_ASSERT(exit_reason == KVM_EXIT_IO, + "exited with unexpected exit reason %u-%s, expected KVM_EXIT_IO", + exit_reason, exit_reason_str(exit_reason)); + TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_SYNC, + "Expect UCALL_SYNC\n"); + TEST_ASSERT(uc.args[1] == SYNC_GP, "#GP is expected."); + printf("vCPU received GP in guest.\n"); +} + +static void inject_ucna(struct kvm_vcpu *vcpu, uint64_t addr) { + /* + * A UCNA error is indicated with VAL=1, UC=1, PCC=0, S=0 and AR=0 in + * the IA32_MCi_STATUS register. + * MSCOD=1 (BIT[16] - MscodDataRdErr). + * MCACOD=0x0090 (Memory controller error format, channel 0) + */ + uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN | + MCI_STATUS_MISCV | MCI_STATUS_ADDRV | 0x10090; + struct kvm_x86_mce mce = {}; + mce.status = status; + mce.mcg_status = 0; + /* + * MCM_ADDR_PHYS indicates the reported address is a physical address. + * Lowest 6 bits is the recoverable address LSB, i.e., the injected MCE + * is at 4KB granularity. + */ + mce.misc = (MCM_ADDR_PHYS << 6) | 0xc; + mce.addr = addr; + mce.bank = UCNA_BANK; + + vcpu_ioctl(vcpu, KVM_X86_SET_MCE, &mce); +} + +static void *run_ucna_injection(void *arg) +{ + struct thread_params *params = (struct thread_params *)arg; + struct ucall uc; + int old; + int r; + unsigned int exit_reason; + + r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old); + TEST_ASSERT(r == 0, + "pthread_setcanceltype failed with errno=%d", + r); + + vcpu_run(params->vcpu); + + exit_reason = params->vcpu->run->exit_reason; + TEST_ASSERT(exit_reason == KVM_EXIT_IO, + "unexpected exit reason %u-%s, expected KVM_EXIT_IO", + exit_reason, exit_reason_str(exit_reason)); + TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC, + "Expect UCALL_SYNC\n"); + TEST_ASSERT(uc.args[1] == SYNC_FIRST_UCNA, "Injecting first UCNA."); + + printf("Injecting first UCNA at %#x.\n", FIRST_UCNA_ADDR); + + inject_ucna(params->vcpu, FIRST_UCNA_ADDR); + vcpu_run(params->vcpu); + + exit_reason = params->vcpu->run->exit_reason; + TEST_ASSERT(exit_reason == KVM_EXIT_IO, + "unexpected exit reason %u-%s, expected KVM_EXIT_IO", + exit_reason, exit_reason_str(exit_reason)); + TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC, + "Expect UCALL_SYNC\n"); + TEST_ASSERT(uc.args[1] == SYNC_SECOND_UCNA, "Injecting second UCNA."); + + printf("Injecting second UCNA at %#x.\n", SECOND_UCNA_ADDR); + + inject_ucna(params->vcpu, SECOND_UCNA_ADDR); + vcpu_run(params->vcpu); + + exit_reason = params->vcpu->run->exit_reason; + TEST_ASSERT(exit_reason == KVM_EXIT_IO, + "unexpected exit reason %u-%s, expected KVM_EXIT_IO", + exit_reason, exit_reason_str(exit_reason)); + if (get_ucall(params->vcpu, &uc) == UCALL_ABORT) { + TEST_ASSERT(false, "vCPU assertion failure: %s.\n", + (const char *)uc.args[0]); + } + + return NULL; +} + +static void test_ucna_injection(struct kvm_vcpu *vcpu, struct thread_params *params) +{ + struct kvm_vm *vm = vcpu->vm; + params->vcpu = vcpu; + params->p_i_ucna_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&i_ucna_rcvd); + params->p_i_ucna_addr = (uint64_t *)addr_gva2hva(vm, (uint64_t)&i_ucna_addr); + params->p_ucna_addr = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ucna_addr); + params->p_ucna_addr2 = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ucna_addr2); + + run_ucna_injection(params); + + TEST_ASSERT(*params->p_i_ucna_rcvd == 1, "Only first UCNA get signaled."); + TEST_ASSERT(*params->p_i_ucna_addr == FIRST_UCNA_ADDR, + "Only first UCNA reported addr get recorded via interrupt."); + TEST_ASSERT(*params->p_ucna_addr == FIRST_UCNA_ADDR, + "First injected UCNAs should get exposed via registers."); + TEST_ASSERT(*params->p_ucna_addr2 == SECOND_UCNA_ADDR, + "Second injected UCNAs should get exposed via registers."); + + printf("Test successful.\n" + "UCNA CMCI interrupts received: %ld\n" + "Last UCNA address received via CMCI: %lx\n" + "First UCNA address in vCPU thread: %lx\n" + "Second UCNA address in vCPU thread: %lx\n", + *params->p_i_ucna_rcvd, *params->p_i_ucna_addr, + *params->p_ucna_addr, *params->p_ucna_addr2); +} + +static void setup_mce_cap(struct kvm_vcpu *vcpu, bool enable_cmci_p) +{ + uint64_t mcg_caps = MCG_CTL_P | MCG_SER_P | MCG_LMCE_P | KVM_MAX_MCE_BANKS; + if (enable_cmci_p) + mcg_caps |= MCG_CMCI_P; + + mcg_caps &= supported_mcg_caps | MCG_CAP_BANKS_MASK; + vcpu_ioctl(vcpu, KVM_X86_SETUP_MCE, &mcg_caps); +} + +static struct kvm_vcpu *create_vcpu_with_mce_cap(struct kvm_vm *vm, uint32_t vcpuid, + bool enable_cmci_p, void *guest_code) +{ + struct kvm_vcpu *vcpu = vm_vcpu_add(vm, vcpuid, guest_code); + setup_mce_cap(vcpu, enable_cmci_p); + return vcpu; +} + +int main(int argc, char *argv[]) +{ + struct thread_params params; + struct kvm_vm *vm; + struct kvm_vcpu *ucna_vcpu; + struct kvm_vcpu *cmcidis_vcpu; + struct kvm_vcpu *cmci_vcpu; + + kvm_check_cap(KVM_CAP_MCE); + + vm = __vm_create(VM_MODE_DEFAULT, 3, 0); + + kvm_ioctl(vm->kvm_fd, KVM_X86_GET_MCE_CAP_SUPPORTED, + &supported_mcg_caps); + + if (!(supported_mcg_caps & MCG_CMCI_P)) { + print_skip("MCG_CMCI_P is not supported"); + exit(KSFT_SKIP); + } + + ucna_vcpu = create_vcpu_with_mce_cap(vm, 0, true, ucna_injection_guest_code); + cmcidis_vcpu = create_vcpu_with_mce_cap(vm, 1, false, cmci_disabled_guest_code); + cmci_vcpu = create_vcpu_with_mce_cap(vm, 2, true, cmci_enabled_guest_code); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(ucna_vcpu); + vcpu_init_descriptor_tables(cmcidis_vcpu); + vcpu_init_descriptor_tables(cmci_vcpu); + vm_install_exception_handler(vm, CMCI_VECTOR, guest_cmci_handler); + vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler); + + virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); + + test_ucna_injection(ucna_vcpu, ¶ms); + run_vcpu_expect_gp(cmcidis_vcpu); + run_vcpu_expect_gp(cmci_vcpu); + + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c index e4bef2e05686..7316521428f8 100644 --- a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c +++ b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c @@ -10,8 +10,6 @@ #include "kvm_util.h" #include "processor.h" -#define VCPU_ID 1 - static void guest_ins_port80(uint8_t *buffer, unsigned int count) { unsigned long end; @@ -52,31 +50,29 @@ static void guest_code(void) int main(int argc, char *argv[]) { + struct kvm_vcpu *vcpu; struct kvm_regs regs; struct kvm_run *run; struct kvm_vm *vm; struct ucall uc; - int rc; /* Tell stdout not to buffer its content */ setbuf(stdout, NULL); - /* Create VM */ - vm = vm_create_default(VCPU_ID, 0, guest_code); - run = vcpu_state(vm, VCPU_ID); + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + run = vcpu->run; memset(®s, 0, sizeof(regs)); while (1) { - rc = _vcpu_run(vm, VCPU_ID); + vcpu_run(vcpu); - TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Unexpected exit reason: %u (%s),\n", run->exit_reason, exit_reason_str(run->exit_reason)); - if (get_ucall(vm, VCPU_ID, &uc)) + if (get_ucall(vcpu, &uc)) break; TEST_ASSERT(run->io.port == 0x80, @@ -89,22 +85,20 @@ int main(int argc, char *argv[]) * scope from a testing perspective as it's not ABI in any way, * i.e. it really is abusing internal KVM knowledge. */ - vcpu_regs_get(vm, VCPU_ID, ®s); + vcpu_regs_get(vcpu, ®s); if (regs.rcx == 2) regs.rcx = 1; if (regs.rcx == 3) regs.rcx = 8192; memset((void *)run + run->io.data_offset, 0xaa, 4096); - vcpu_regs_set(vm, VCPU_ID, ®s); + vcpu_regs_set(vcpu, ®s); } switch (uc.cmd) { case UCALL_DONE: break; case UCALL_ABORT: - TEST_FAIL("%s at %s:%ld : argN+1 = 0x%lx, argN+2 = 0x%lx", - (const char *)uc.args[0], __FILE__, uc.args[1], - uc.args[2], uc.args[3]); + REPORT_GUEST_ASSERT_2(uc, "argN+1 = 0x%lx, argN+2 = 0x%lx"); default: TEST_FAIL("Unknown ucall %lu", uc.cmd); } diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c index e3e20e8848d0..a4f06370a245 100644 --- a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c +++ b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c @@ -17,7 +17,6 @@ #define KVM_FEP_LENGTH 5 static int fep_available = 1; -#define VCPU_ID 1 #define MSR_NON_EXISTENT 0x474f4f00 static u64 deny_bits = 0; @@ -395,31 +394,21 @@ static void guest_ud_handler(struct ex_regs *regs) regs->rip += KVM_FEP_LENGTH; } -static void run_guest(struct kvm_vm *vm) +static void check_for_guest_assert(struct kvm_vcpu *vcpu) { - int rc; - - rc = _vcpu_run(vm, VCPU_ID); - TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc); -} - -static void check_for_guest_assert(struct kvm_vm *vm) -{ - struct kvm_run *run = vcpu_state(vm, VCPU_ID); struct ucall uc; - if (run->exit_reason == KVM_EXIT_IO && - get_ucall(vm, VCPU_ID, &uc) == UCALL_ABORT) { - TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], - __FILE__, uc.args[1]); + if (vcpu->run->exit_reason == KVM_EXIT_IO && + get_ucall(vcpu, &uc) == UCALL_ABORT) { + REPORT_GUEST_ASSERT(uc); } } -static void process_rdmsr(struct kvm_vm *vm, uint32_t msr_index) +static void process_rdmsr(struct kvm_vcpu *vcpu, uint32_t msr_index) { - struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct kvm_run *run = vcpu->run; - check_for_guest_assert(vm); + check_for_guest_assert(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_X86_RDMSR, "Unexpected exit reason: %u (%s),\n", @@ -450,11 +439,11 @@ static void process_rdmsr(struct kvm_vm *vm, uint32_t msr_index) } } -static void process_wrmsr(struct kvm_vm *vm, uint32_t msr_index) +static void process_wrmsr(struct kvm_vcpu *vcpu, uint32_t msr_index) { - struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct kvm_run *run = vcpu->run; - check_for_guest_assert(vm); + check_for_guest_assert(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_X86_WRMSR, "Unexpected exit reason: %u (%s),\n", @@ -481,43 +470,43 @@ static void process_wrmsr(struct kvm_vm *vm, uint32_t msr_index) } } -static void process_ucall_done(struct kvm_vm *vm) +static void process_ucall_done(struct kvm_vcpu *vcpu) { - struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct kvm_run *run = vcpu->run; struct ucall uc; - check_for_guest_assert(vm); + check_for_guest_assert(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Unexpected exit reason: %u (%s)", run->exit_reason, exit_reason_str(run->exit_reason)); - TEST_ASSERT(get_ucall(vm, VCPU_ID, &uc) == UCALL_DONE, + TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_DONE, "Unexpected ucall command: %lu, expected UCALL_DONE (%d)", uc.cmd, UCALL_DONE); } -static uint64_t process_ucall(struct kvm_vm *vm) +static uint64_t process_ucall(struct kvm_vcpu *vcpu) { - struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct kvm_run *run = vcpu->run; struct ucall uc = {}; - check_for_guest_assert(vm); + check_for_guest_assert(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Unexpected exit reason: %u (%s)", run->exit_reason, exit_reason_str(run->exit_reason)); - switch (get_ucall(vm, VCPU_ID, &uc)) { + switch (get_ucall(vcpu, &uc)) { case UCALL_SYNC: break; case UCALL_ABORT: - check_for_guest_assert(vm); + check_for_guest_assert(vcpu); break; case UCALL_DONE: - process_ucall_done(vm); + process_ucall_done(vcpu); break; default: TEST_ASSERT(false, "Unexpected ucall"); @@ -526,45 +515,43 @@ static uint64_t process_ucall(struct kvm_vm *vm) return uc.cmd; } -static void run_guest_then_process_rdmsr(struct kvm_vm *vm, uint32_t msr_index) +static void run_guest_then_process_rdmsr(struct kvm_vcpu *vcpu, + uint32_t msr_index) { - run_guest(vm); - process_rdmsr(vm, msr_index); + vcpu_run(vcpu); + process_rdmsr(vcpu, msr_index); } -static void run_guest_then_process_wrmsr(struct kvm_vm *vm, uint32_t msr_index) +static void run_guest_then_process_wrmsr(struct kvm_vcpu *vcpu, + uint32_t msr_index) { - run_guest(vm); - process_wrmsr(vm, msr_index); + vcpu_run(vcpu); + process_wrmsr(vcpu, msr_index); } -static uint64_t run_guest_then_process_ucall(struct kvm_vm *vm) +static uint64_t run_guest_then_process_ucall(struct kvm_vcpu *vcpu) { - run_guest(vm); - return process_ucall(vm); + vcpu_run(vcpu); + return process_ucall(vcpu); } -static void run_guest_then_process_ucall_done(struct kvm_vm *vm) +static void run_guest_then_process_ucall_done(struct kvm_vcpu *vcpu) { - run_guest(vm); - process_ucall_done(vm); + vcpu_run(vcpu); + process_ucall_done(vcpu); } -static void test_msr_filter_allow(void) { - struct kvm_enable_cap cap = { - .cap = KVM_CAP_X86_USER_SPACE_MSR, - .args[0] = KVM_MSR_EXIT_REASON_FILTER, - }; +static void test_msr_filter_allow(void) +{ + struct kvm_vcpu *vcpu; struct kvm_vm *vm; int rc; - /* Create VM */ - vm = vm_create_default(VCPU_ID, 0, guest_code_filter_allow); - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + vm = vm_create_with_one_vcpu(&vcpu, guest_code_filter_allow); rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR); TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available"); - vm_enable_cap(vm, &cap); + vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_FILTER); rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER); TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available"); @@ -572,43 +559,43 @@ static void test_msr_filter_allow(void) { vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_allow); vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vm, VCPU_ID); + vcpu_init_descriptor_tables(vcpu); vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler); /* Process guest code userspace exits. */ - run_guest_then_process_rdmsr(vm, MSR_IA32_XSS); - run_guest_then_process_wrmsr(vm, MSR_IA32_XSS); - run_guest_then_process_wrmsr(vm, MSR_IA32_XSS); + run_guest_then_process_rdmsr(vcpu, MSR_IA32_XSS); + run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS); + run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS); - run_guest_then_process_rdmsr(vm, MSR_IA32_FLUSH_CMD); - run_guest_then_process_wrmsr(vm, MSR_IA32_FLUSH_CMD); - run_guest_then_process_wrmsr(vm, MSR_IA32_FLUSH_CMD); + run_guest_then_process_rdmsr(vcpu, MSR_IA32_FLUSH_CMD); + run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD); + run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD); - run_guest_then_process_wrmsr(vm, MSR_NON_EXISTENT); - run_guest_then_process_rdmsr(vm, MSR_NON_EXISTENT); + run_guest_then_process_wrmsr(vcpu, MSR_NON_EXISTENT); + run_guest_then_process_rdmsr(vcpu, MSR_NON_EXISTENT); vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler); - run_guest(vm); + vcpu_run(vcpu); vm_install_exception_handler(vm, UD_VECTOR, NULL); - if (process_ucall(vm) != UCALL_DONE) { + if (process_ucall(vcpu) != UCALL_DONE) { vm_install_exception_handler(vm, GP_VECTOR, guest_fep_gp_handler); /* Process emulated rdmsr and wrmsr instructions. */ - run_guest_then_process_rdmsr(vm, MSR_IA32_XSS); - run_guest_then_process_wrmsr(vm, MSR_IA32_XSS); - run_guest_then_process_wrmsr(vm, MSR_IA32_XSS); + run_guest_then_process_rdmsr(vcpu, MSR_IA32_XSS); + run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS); + run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS); - run_guest_then_process_rdmsr(vm, MSR_IA32_FLUSH_CMD); - run_guest_then_process_wrmsr(vm, MSR_IA32_FLUSH_CMD); - run_guest_then_process_wrmsr(vm, MSR_IA32_FLUSH_CMD); + run_guest_then_process_rdmsr(vcpu, MSR_IA32_FLUSH_CMD); + run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD); + run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD); - run_guest_then_process_wrmsr(vm, MSR_NON_EXISTENT); - run_guest_then_process_rdmsr(vm, MSR_NON_EXISTENT); + run_guest_then_process_wrmsr(vcpu, MSR_NON_EXISTENT); + run_guest_then_process_rdmsr(vcpu, MSR_NON_EXISTENT); /* Confirm the guest completed without issues. */ - run_guest_then_process_ucall_done(vm); + run_guest_then_process_ucall_done(vcpu); } else { printf("To run the instruction emulated tests set the module parameter 'kvm.force_emulation_prefix=1'\n"); } @@ -616,16 +603,16 @@ static void test_msr_filter_allow(void) { kvm_vm_free(vm); } -static int handle_ucall(struct kvm_vm *vm) +static int handle_ucall(struct kvm_vcpu *vcpu) { struct ucall uc; - switch (get_ucall(vm, VCPU_ID, &uc)) { + switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: - TEST_FAIL("Guest assertion not met"); + REPORT_GUEST_ASSERT(uc); break; case UCALL_SYNC: - vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &no_filter_deny); + vm_ioctl(vcpu->vm, KVM_X86_SET_MSR_FILTER, &no_filter_deny); break; case UCALL_DONE: return 1; @@ -673,25 +660,21 @@ static void handle_wrmsr(struct kvm_run *run) } } -static void test_msr_filter_deny(void) { - struct kvm_enable_cap cap = { - .cap = KVM_CAP_X86_USER_SPACE_MSR, - .args[0] = KVM_MSR_EXIT_REASON_INVAL | - KVM_MSR_EXIT_REASON_UNKNOWN | - KVM_MSR_EXIT_REASON_FILTER, - }; +static void test_msr_filter_deny(void) +{ + struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct kvm_run *run; int rc; - /* Create VM */ - vm = vm_create_default(VCPU_ID, 0, guest_code_filter_deny); - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); - run = vcpu_state(vm, VCPU_ID); + vm = vm_create_with_one_vcpu(&vcpu, guest_code_filter_deny); + run = vcpu->run; rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR); TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available"); - vm_enable_cap(vm, &cap); + vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_INVAL | + KVM_MSR_EXIT_REASON_UNKNOWN | + KVM_MSR_EXIT_REASON_FILTER); rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER); TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available"); @@ -700,9 +683,7 @@ static void test_msr_filter_deny(void) { vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_deny); while (1) { - rc = _vcpu_run(vm, VCPU_ID); - - TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc); + vcpu_run(vcpu); switch (run->exit_reason) { case KVM_EXIT_X86_RDMSR: @@ -712,7 +693,7 @@ static void test_msr_filter_deny(void) { handle_wrmsr(run); break; case KVM_EXIT_IO: - if (handle_ucall(vm)) + if (handle_ucall(vcpu)) goto done; break; } @@ -726,31 +707,28 @@ done: kvm_vm_free(vm); } -static void test_msr_permission_bitmap(void) { - struct kvm_enable_cap cap = { - .cap = KVM_CAP_X86_USER_SPACE_MSR, - .args[0] = KVM_MSR_EXIT_REASON_FILTER, - }; +static void test_msr_permission_bitmap(void) +{ + struct kvm_vcpu *vcpu; struct kvm_vm *vm; int rc; - /* Create VM */ - vm = vm_create_default(VCPU_ID, 0, guest_code_permission_bitmap); - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + vm = vm_create_with_one_vcpu(&vcpu, guest_code_permission_bitmap); rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR); TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available"); - vm_enable_cap(vm, &cap); + vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_FILTER); rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER); TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available"); vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_fs); - run_guest_then_process_rdmsr(vm, MSR_FS_BASE); - TEST_ASSERT(run_guest_then_process_ucall(vm) == UCALL_SYNC, "Expected ucall state to be UCALL_SYNC."); + run_guest_then_process_rdmsr(vcpu, MSR_FS_BASE); + TEST_ASSERT(run_guest_then_process_ucall(vcpu) == UCALL_SYNC, + "Expected ucall state to be UCALL_SYNC."); vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_gs); - run_guest_then_process_rdmsr(vm, MSR_GS_BASE); - run_guest_then_process_ucall_done(vm); + run_guest_then_process_rdmsr(vcpu, MSR_GS_BASE); + run_guest_then_process_ucall_done(vcpu); kvm_vm_free(vm); } diff --git a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c index d438c4d3228a..5abecf06329e 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c @@ -28,11 +28,6 @@ #include "kselftest.h" -#define VCPU_ID 0 - -/* The virtual machine object. */ -static struct kvm_vm *vm; - static void l2_guest_code(void) { /* Exit to L1 */ @@ -77,33 +72,29 @@ static void l1_guest_code(struct vmx_pages *vmx_pages, unsigned long high_gpa) int main(int argc, char *argv[]) { unsigned long apic_access_addr = ~0ul; - unsigned int paddr_width; - unsigned int vaddr_width; vm_vaddr_t vmx_pages_gva; unsigned long high_gpa; struct vmx_pages *vmx; bool done = false; - nested_vmx_check_supported(); + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; - vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); - kvm_get_cpu_address_width(&paddr_width, &vaddr_width); - high_gpa = (1ul << paddr_width) - getpagesize(); - if ((unsigned long)DEFAULT_GUEST_PHY_PAGES * getpagesize() > high_gpa) { - print_skip("No unbacked physical page available"); - exit(KSFT_SKIP); - } + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); + + high_gpa = (vm->max_gfn - 1) << vm->page_shift; vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva); prepare_virtualize_apic_accesses(vmx, vm); - vcpu_args_set(vm, VCPU_ID, 2, vmx_pages_gva, high_gpa); + vcpu_args_set(vcpu, 2, vmx_pages_gva, high_gpa); while (!done) { - volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); + volatile struct kvm_run *run = vcpu->run; struct ucall uc; - vcpu_run(vm, VCPU_ID); + vcpu_run(vcpu); if (apic_access_addr == high_gpa) { TEST_ASSERT(run->exit_reason == KVM_EXIT_INTERNAL_ERROR, @@ -121,10 +112,9 @@ int main(int argc, char *argv[]) run->exit_reason, exit_reason_str(run->exit_reason)); - switch (get_ucall(vm, VCPU_ID, &uc)) { + switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: - TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], - __FILE__, uc.args[1]); + REPORT_GUEST_ASSERT(uc); /* NOT REACHED */ case UCALL_SYNC: apic_access_addr = uc.args[1]; diff --git a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c index edac8839e717..d79651b02740 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c @@ -18,15 +18,10 @@ #include "kselftest.h" -#define VCPU_ID 5 - enum { PORT_L0_EXIT = 0x2000, }; -/* The virtual machine object. */ -static struct kvm_vm *vm; - static void l2_guest_code(void) { /* Exit to L0 */ @@ -53,20 +48,22 @@ static void l1_guest_code(struct vmx_pages *vmx_pages) int main(int argc, char *argv[]) { vm_vaddr_t vmx_pages_gva; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; - nested_vmx_check_supported(); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); - vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code); + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); /* Allocate VMX pages and shared descriptors (vmx_pages). */ vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); + vcpu_args_set(vcpu, 1, vmx_pages_gva); for (;;) { - volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); + volatile struct kvm_run *run = vcpu->run; struct ucall uc; - vcpu_run(vm, VCPU_ID); + vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n", run->exit_reason, @@ -75,9 +72,9 @@ int main(int argc, char *argv[]) if (run->io.port == PORT_L0_EXIT) break; - switch (get_ucall(vm, VCPU_ID, &uc)) { + switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: - TEST_FAIL("%s", (const char *)uc.args[0]); + REPORT_GUEST_ASSERT(uc); /* NOT REACHED */ default: TEST_FAIL("Unknown ucall %lu", uc.cmd); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c index 68f26a8b4f42..2d8c23d639f7 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c @@ -17,8 +17,6 @@ #include "processor.h" #include "vmx.h" -#define VCPU_ID 1 - /* The memory slot index to track dirty pages */ #define TEST_MEM_SLOT_INDEX 1 #define TEST_MEM_PAGES 3 @@ -73,18 +71,19 @@ int main(int argc, char *argv[]) unsigned long *bmap; uint64_t *host_test_mem; + struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct kvm_run *run; struct ucall uc; bool done = false; - nested_vmx_check_supported(); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); /* Create VM */ - vm = vm_create_default(VCPU_ID, 0, l1_guest_code); + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); - run = vcpu_state(vm, VCPU_ID); + vcpu_args_set(vcpu, 1, vmx_pages_gva); + run = vcpu->run; /* Add an extra memory slot for testing dirty logging */ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, @@ -116,16 +115,15 @@ int main(int argc, char *argv[]) while (!done) { memset(host_test_mem, 0xaa, TEST_MEM_PAGES * 4096); - _vcpu_run(vm, VCPU_ID); + vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Unexpected exit reason: %u (%s),\n", run->exit_reason, exit_reason_str(run->exit_reason)); - switch (get_ucall(vm, VCPU_ID, &uc)) { + switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: - TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], - __FILE__, uc.args[1]); + REPORT_GUEST_ASSERT(uc); /* NOT REACHED */ case UCALL_SYNC: /* diff --git a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c index 27a850f3d7ce..2641b286b4ed 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c @@ -10,10 +10,6 @@ #include "kselftest.h" -#define VCPU_ID 0 - -static struct kvm_vm *vm; - static void guest_ud_handler(struct ex_regs *regs) { /* Loop on the ud2 until guest state is made invalid. */ @@ -24,11 +20,11 @@ static void guest_code(void) asm volatile("ud2"); } -static void __run_vcpu_with_invalid_state(void) +static void __run_vcpu_with_invalid_state(struct kvm_vcpu *vcpu) { - struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct kvm_run *run = vcpu->run; - vcpu_run(vm, VCPU_ID); + vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_INTERNAL_ERROR, "Expected KVM_EXIT_INTERNAL_ERROR, got %d (%s)\n", @@ -38,15 +34,15 @@ static void __run_vcpu_with_invalid_state(void) run->emulation_failure.suberror); } -static void run_vcpu_with_invalid_state(void) +static void run_vcpu_with_invalid_state(struct kvm_vcpu *vcpu) { /* * Always run twice to verify KVM handles the case where _KVM_ queues * an exception with invalid state and then exits to userspace, i.e. * that KVM doesn't explode if userspace ignores the initial error. */ - __run_vcpu_with_invalid_state(); - __run_vcpu_with_invalid_state(); + __run_vcpu_with_invalid_state(vcpu); + __run_vcpu_with_invalid_state(vcpu); } static void set_timer(void) @@ -59,33 +55,43 @@ static void set_timer(void) ASSERT_EQ(setitimer(ITIMER_REAL, &timer, NULL), 0); } -static void set_or_clear_invalid_guest_state(bool set) +static void set_or_clear_invalid_guest_state(struct kvm_vcpu *vcpu, bool set) { static struct kvm_sregs sregs; if (!sregs.cr0) - vcpu_sregs_get(vm, VCPU_ID, &sregs); + vcpu_sregs_get(vcpu, &sregs); sregs.tr.unusable = !!set; - vcpu_sregs_set(vm, VCPU_ID, &sregs); + vcpu_sregs_set(vcpu, &sregs); +} + +static void set_invalid_guest_state(struct kvm_vcpu *vcpu) +{ + set_or_clear_invalid_guest_state(vcpu, true); } -static void set_invalid_guest_state(void) +static void clear_invalid_guest_state(struct kvm_vcpu *vcpu) { - set_or_clear_invalid_guest_state(true); + set_or_clear_invalid_guest_state(vcpu, false); } -static void clear_invalid_guest_state(void) +static struct kvm_vcpu *get_set_sigalrm_vcpu(struct kvm_vcpu *__vcpu) { - set_or_clear_invalid_guest_state(false); + static struct kvm_vcpu *vcpu = NULL; + + if (__vcpu) + vcpu = __vcpu; + return vcpu; } static void sigalrm_handler(int sig) { + struct kvm_vcpu *vcpu = get_set_sigalrm_vcpu(NULL); struct kvm_vcpu_events events; TEST_ASSERT(sig == SIGALRM, "Unexpected signal = %d", sig); - vcpu_events_get(vm, VCPU_ID, &events); + vcpu_events_get(vcpu, &events); /* * If an exception is pending, attempt KVM_RUN with invalid guest, @@ -93,8 +99,8 @@ static void sigalrm_handler(int sig) * between KVM queueing an exception and re-entering the guest. */ if (events.exception.pending) { - set_invalid_guest_state(); - run_vcpu_with_invalid_state(); + set_invalid_guest_state(vcpu); + run_vcpu_with_invalid_state(vcpu); } else { set_timer(); } @@ -102,15 +108,17 @@ static void sigalrm_handler(int sig) int main(int argc, char *argv[]) { - if (!is_intel_cpu() || vm_is_unrestricted_guest(NULL)) { - print_skip("Must be run with kvm_intel.unrestricted_guest=0"); - exit(KSFT_SKIP); - } + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + TEST_REQUIRE(is_intel_cpu()); + TEST_REQUIRE(!vm_is_unrestricted_guest(NULL)); - vm = vm_create_default(VCPU_ID, 0, (void *)guest_code); + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + get_set_sigalrm_vcpu(vcpu); vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vm, VCPU_ID); + vcpu_init_descriptor_tables(vcpu); vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler); @@ -119,8 +127,8 @@ int main(int argc, char *argv[]) * KVM_RUN should induce a TRIPLE_FAULT in L2 as KVM doesn't support * emulating invalid guest state for L2. */ - set_invalid_guest_state(); - run_vcpu_with_invalid_state(); + set_invalid_guest_state(vcpu); + run_vcpu_with_invalid_state(vcpu); /* * Verify KVM also handles the case where userspace gains control while @@ -129,11 +137,11 @@ int main(int argc, char *argv[]) * guest with invalid state when the handler interrupts KVM with an * exception pending. */ - clear_invalid_guest_state(); + clear_invalid_guest_state(vcpu); TEST_ASSERT(signal(SIGALRM, sigalrm_handler) != SIG_ERR, "Failed to register SIGALRM handler, errno = %d (%s)", errno, strerror(errno)); set_timer(); - run_vcpu_with_invalid_state(); + run_vcpu_with_invalid_state(vcpu); } diff --git a/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c b/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c index 489fbed4ca6f..6bfb4bb471ca 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c @@ -9,7 +9,6 @@ #include "kselftest.h" -#define VCPU_ID 0 #define ARBITRARY_IO_PORT 0x2000 static struct kvm_vm *vm; @@ -55,20 +54,21 @@ int main(int argc, char *argv[]) { vm_vaddr_t vmx_pages_gva; struct kvm_sregs sregs; + struct kvm_vcpu *vcpu; struct kvm_run *run; struct ucall uc; - nested_vmx_check_supported(); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); - vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code); + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); /* Allocate VMX pages and shared descriptors (vmx_pages). */ vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); + vcpu_args_set(vcpu, 1, vmx_pages_gva); - vcpu_run(vm, VCPU_ID); + vcpu_run(vcpu); - run = vcpu_state(vm, VCPU_ID); + run = vcpu->run; /* * The first exit to L0 userspace should be an I/O access from L2. @@ -88,17 +88,17 @@ int main(int argc, char *argv[]) * emulating invalid guest state for L2. */ memset(&sregs, 0, sizeof(sregs)); - vcpu_sregs_get(vm, VCPU_ID, &sregs); + vcpu_sregs_get(vcpu, &sregs); sregs.tr.unusable = 1; - vcpu_sregs_set(vm, VCPU_ID, &sregs); + vcpu_sregs_set(vcpu, &sregs); - vcpu_run(vm, VCPU_ID); + vcpu_run(vcpu); - switch (get_ucall(vm, VCPU_ID, &uc)) { + switch (get_ucall(vcpu, &uc)) { case UCALL_DONE: break; case UCALL_ABORT: - TEST_FAIL("%s", (const char *)uc.args[0]); + REPORT_GUEST_ASSERT(uc); default: TEST_FAIL("Unexpected ucall: %lu", uc.cmd); } diff --git a/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c b/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c new file mode 100644 index 000000000000..322d561b4260 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * VMX control MSR test + * + * Copyright (C) 2022 Google LLC. + * + * Tests for KVM ownership of bits in the VMX entry/exit control MSRs. Checks + * that KVM will set owned bits where appropriate, and will not if + * KVM_X86_QUIRK_TWEAK_VMX_CTRL_MSRS is disabled. + */ +#include <linux/bitmap.h> +#include "kvm_util.h" +#include "vmx.h" + +static void vmx_fixed1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index, + uint64_t mask) +{ + uint64_t val = vcpu_get_msr(vcpu, msr_index); + uint64_t bit; + + mask &= val; + + for_each_set_bit(bit, &mask, 64) { + vcpu_set_msr(vcpu, msr_index, val & ~BIT_ULL(bit)); + vcpu_set_msr(vcpu, msr_index, val); + } +} + +static void vmx_fixed0_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index, + uint64_t mask) +{ + uint64_t val = vcpu_get_msr(vcpu, msr_index); + uint64_t bit; + + mask = ~mask | val; + + for_each_clear_bit(bit, &mask, 64) { + vcpu_set_msr(vcpu, msr_index, val | BIT_ULL(bit)); + vcpu_set_msr(vcpu, msr_index, val); + } +} + +static void vmx_fixed0and1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index) +{ + vmx_fixed0_msr_test(vcpu, msr_index, GENMASK_ULL(31, 0)); + vmx_fixed1_msr_test(vcpu, msr_index, GENMASK_ULL(63, 32)); +} + +static void vmx_save_restore_msrs_test(struct kvm_vcpu *vcpu) +{ + vcpu_set_msr(vcpu, MSR_IA32_VMX_VMCS_ENUM, 0); + vcpu_set_msr(vcpu, MSR_IA32_VMX_VMCS_ENUM, -1ull); + + vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_BASIC, + BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55)); + + vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_MISC, + BIT_ULL(5) | GENMASK_ULL(8, 6) | BIT_ULL(14) | + BIT_ULL(15) | BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30)); + + vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_PROCBASED_CTLS2); + vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_EPT_VPID_CAP, -1ull); + vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_PINBASED_CTLS); + vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_PROCBASED_CTLS); + vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_EXIT_CTLS); + vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_ENTRY_CTLS); + vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_VMFUNC, -1ull); +} + +int main(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2)); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + + /* No need to actually do KVM_RUN, thus no guest code. */ + vm = vm_create_with_one_vcpu(&vcpu, NULL); + + vmx_save_restore_msrs_test(vcpu); + + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c index 280c01fd2412..465a9434d61c 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c @@ -15,9 +15,6 @@ #include "vmx.h" #include "kselftest.h" - -#define VCPU_ID 0 - /* L2 is scaled up (from L1's perspective) by this factor */ #define L2_SCALE_FACTOR 4ULL @@ -119,14 +116,6 @@ static void l1_guest_code(struct vmx_pages *vmx_pages) GUEST_DONE(); } -static void tsc_scaling_check_supported(void) -{ - if (!kvm_check_cap(KVM_CAP_TSC_CONTROL)) { - print_skip("TSC scaling not supported by the HW"); - exit(KSFT_SKIP); - } -} - static void stable_tsc_check_supported(void) { FILE *fp; @@ -150,6 +139,7 @@ skip_test: int main(int argc, char *argv[]) { + struct kvm_vcpu *vcpu; struct kvm_vm *vm; vm_vaddr_t vmx_pages_gva; @@ -160,8 +150,8 @@ int main(int argc, char *argv[]) uint64_t l1_tsc_freq = 0; uint64_t l2_tsc_freq = 0; - nested_vmx_check_supported(); - tsc_scaling_check_supported(); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); + TEST_REQUIRE(kvm_has_cap(KVM_CAP_TSC_CONTROL)); stable_tsc_check_supported(); /* @@ -182,30 +172,29 @@ int main(int argc, char *argv[]) l0_tsc_freq = tsc_end - tsc_start; printf("real TSC frequency is around: %"PRIu64"\n", l0_tsc_freq); - vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code); + vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code); vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); + vcpu_args_set(vcpu, 1, vmx_pages_gva); - tsc_khz = _vcpu_ioctl(vm, VCPU_ID, KVM_GET_TSC_KHZ, NULL); + tsc_khz = __vcpu_ioctl(vcpu, KVM_GET_TSC_KHZ, NULL); TEST_ASSERT(tsc_khz != -1, "vcpu ioctl KVM_GET_TSC_KHZ failed"); /* scale down L1's TSC frequency */ - vcpu_ioctl(vm, VCPU_ID, KVM_SET_TSC_KHZ, - (void *) (tsc_khz / l1_scale_factor)); + vcpu_ioctl(vcpu, KVM_SET_TSC_KHZ, (void *) (tsc_khz / l1_scale_factor)); for (;;) { - volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); + volatile struct kvm_run *run = vcpu->run; struct ucall uc; - vcpu_run(vm, VCPU_ID); + vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n", run->exit_reason, exit_reason_str(run->exit_reason)); - switch (get_ucall(vm, VCPU_ID, &uc)) { + switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: - TEST_FAIL("%s", (const char *) uc.args[0]); + REPORT_GUEST_ASSERT(uc); case UCALL_SYNC: switch (uc.args[0]) { case USLEEP: diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c index 97b7fd4a9a3d..6ec901dab61e 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c @@ -17,9 +17,6 @@ #include "kvm_util.h" #include "vmx.h" -#define VCPU_ID 0 - -#define X86_FEATURE_PDCM (1<<15) #define PMU_CAP_FW_WRITES (1ULL << 13) #define PMU_CAP_LBR_FMT 0x3f @@ -56,11 +53,9 @@ static void guest_code(void) int main(int argc, char *argv[]) { - struct kvm_cpuid2 *cpuid; - struct kvm_cpuid_entry2 *entry_1_0; - struct kvm_cpuid_entry2 *entry_a_0; - bool pdcm_supported = false; + const struct kvm_cpuid_entry2 *entry_a_0; struct kvm_vm *vm; + struct kvm_vcpu *vcpu; int ret; union cpuid10_eax eax; union perf_capabilities host_cap; @@ -69,46 +64,37 @@ int main(int argc, char *argv[]) host_cap.capabilities &= (PMU_CAP_FW_WRITES | PMU_CAP_LBR_FMT); /* Create VM */ - vm = vm_create_default(VCPU_ID, 0, guest_code); - cpuid = kvm_get_supported_cpuid(); - - if (kvm_get_cpuid_max_basic() >= 0xa) { - entry_1_0 = kvm_get_supported_cpuid_index(1, 0); - entry_a_0 = kvm_get_supported_cpuid_index(0xa, 0); - pdcm_supported = entry_1_0 && !!(entry_1_0->ecx & X86_FEATURE_PDCM); - eax.full = entry_a_0->eax; - } - if (!pdcm_supported) { - print_skip("MSR_IA32_PERF_CAPABILITIES is not supported by the vCPU"); - exit(KSFT_SKIP); - } - if (!eax.split.version_id) { - print_skip("PMU is not supported by the vCPU"); - exit(KSFT_SKIP); - } + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_PDCM)); + + TEST_REQUIRE(kvm_get_cpuid_max_basic() >= 0xa); + entry_a_0 = kvm_get_supported_cpuid_entry(0xa); + + eax.full = entry_a_0->eax; + __TEST_REQUIRE(eax.split.version_id, "PMU is not supported by the vCPU"); /* testcase 1, set capabilities when we have PDCM bit */ - vcpu_set_cpuid(vm, VCPU_ID, cpuid); - vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_FW_WRITES); + vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_FW_WRITES); /* check capabilities can be retrieved with KVM_GET_MSR */ - ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), PMU_CAP_FW_WRITES); + ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), PMU_CAP_FW_WRITES); /* check whatever we write with KVM_SET_MSR is _not_ modified */ - vcpu_run(vm, VCPU_ID); - ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), PMU_CAP_FW_WRITES); + vcpu_run(vcpu); + ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), PMU_CAP_FW_WRITES); /* testcase 2, check valid LBR formats are accepted */ - vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, 0); - ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), 0); + vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0); + ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), 0); - vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, host_cap.lbr_format); - ASSERT_EQ(vcpu_get_msr(vm, VCPU_ID, MSR_IA32_PERF_CAPABILITIES), (u64)host_cap.lbr_format); + vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.lbr_format); + ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), (u64)host_cap.lbr_format); /* testcase 3, check invalid LBR format is rejected */ /* Note, on Arch LBR capable platforms, LBR_FMT in perf capability msr is 0x3f, * to avoid the failure, use a true invalid format 0x30 for the test. */ - ret = _vcpu_set_msr(vm, 0, MSR_IA32_PERF_CAPABILITIES, 0x30); + ret = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0x30); TEST_ASSERT(ret == 0, "Bad PERF_CAPABILITIES didn't fail."); printf("Completed perf capability tests.\n"); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c index ff92e25b6f1e..0efdc05969a5 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c @@ -22,7 +22,6 @@ #include "processor.h" #include "vmx.h" -#define VCPU_ID 5 #define PREEMPTION_TIMER_VALUE 100000000ull #define PREEMPTION_TIMER_VALUE_THRESHOLD1 80000000ull @@ -159,6 +158,7 @@ int main(int argc, char *argv[]) struct kvm_regs regs1, regs2; struct kvm_vm *vm; struct kvm_run *run; + struct kvm_vcpu *vcpu; struct kvm_x86_state *state; struct ucall uc; int stage; @@ -167,33 +167,29 @@ int main(int argc, char *argv[]) * AMD currently does not implement any VMX features, so for now we * just early out. */ - nested_vmx_check_supported(); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); - if (!kvm_check_cap(KVM_CAP_NESTED_STATE)) { - print_skip("KVM_CAP_NESTED_STATE not supported"); - exit(KSFT_SKIP); - } + TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE)); /* Create VM */ - vm = vm_create_default(VCPU_ID, 0, guest_code); - run = vcpu_state(vm, VCPU_ID); + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + run = vcpu->run; - vcpu_regs_get(vm, VCPU_ID, ®s1); + vcpu_regs_get(vcpu, ®s1); vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); + vcpu_args_set(vcpu, 1, vmx_pages_gva); for (stage = 1;; stage++) { - _vcpu_run(vm, VCPU_ID); + vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Stage %d: unexpected exit reason: %u (%s),\n", stage, run->exit_reason, exit_reason_str(run->exit_reason)); - switch (get_ucall(vm, VCPU_ID, &uc)) { + switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: - TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], - __FILE__, uc.args[1]); + REPORT_GUEST_ASSERT(uc); /* NOT REACHED */ case UCALL_SYNC: break; @@ -232,22 +228,20 @@ int main(int argc, char *argv[]) stage, uc.args[4], uc.args[5]); } - state = vcpu_save_state(vm, VCPU_ID); + state = vcpu_save_state(vcpu); memset(®s1, 0, sizeof(regs1)); - vcpu_regs_get(vm, VCPU_ID, ®s1); + vcpu_regs_get(vcpu, ®s1); kvm_vm_release(vm); /* Restore state in a new VM. */ - kvm_vm_restart(vm, O_RDWR); - vm_vcpu_add(vm, VCPU_ID); - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); - vcpu_load_state(vm, VCPU_ID, state); - run = vcpu_state(vm, VCPU_ID); + vcpu = vm_recreate_with_one_vcpu(vm); + vcpu_load_state(vcpu, state); + run = vcpu->run; kvm_x86_state_cleanup(state); memset(®s2, 0, sizeof(regs2)); - vcpu_regs_get(vm, VCPU_ID, ®s2); + vcpu_regs_get(vcpu, ®s2); TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)), "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx", (ulong) regs2.rdi, (ulong) regs2.rsi); diff --git a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c index 5827b9bae468..41ea7028a1f8 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c @@ -23,38 +23,37 @@ * changes this should be updated. */ #define VMCS12_REVISION 0x11e57ed0 -#define VCPU_ID 5 bool have_evmcs; -void test_nested_state(struct kvm_vm *vm, struct kvm_nested_state *state) +void test_nested_state(struct kvm_vcpu *vcpu, struct kvm_nested_state *state) { - vcpu_nested_state_set(vm, VCPU_ID, state, false); + vcpu_nested_state_set(vcpu, state); } -void test_nested_state_expect_errno(struct kvm_vm *vm, +void test_nested_state_expect_errno(struct kvm_vcpu *vcpu, struct kvm_nested_state *state, int expected_errno) { int rv; - rv = vcpu_nested_state_set(vm, VCPU_ID, state, true); + rv = __vcpu_nested_state_set(vcpu, state); TEST_ASSERT(rv == -1 && errno == expected_errno, "Expected %s (%d) from vcpu_nested_state_set but got rv: %i errno: %s (%d)", strerror(expected_errno), expected_errno, rv, strerror(errno), errno); } -void test_nested_state_expect_einval(struct kvm_vm *vm, +void test_nested_state_expect_einval(struct kvm_vcpu *vcpu, struct kvm_nested_state *state) { - test_nested_state_expect_errno(vm, state, EINVAL); + test_nested_state_expect_errno(vcpu, state, EINVAL); } -void test_nested_state_expect_efault(struct kvm_vm *vm, +void test_nested_state_expect_efault(struct kvm_vcpu *vcpu, struct kvm_nested_state *state) { - test_nested_state_expect_errno(vm, state, EFAULT); + test_nested_state_expect_errno(vcpu, state, EFAULT); } void set_revision_id_for_vmcs12(struct kvm_nested_state *state, @@ -86,7 +85,7 @@ void set_default_vmx_state(struct kvm_nested_state *state, int size) set_revision_id_for_vmcs12(state, VMCS12_REVISION); } -void test_vmx_nested_state(struct kvm_vm *vm) +void test_vmx_nested_state(struct kvm_vcpu *vcpu) { /* Add a page for VMCS12. */ const int state_sz = sizeof(struct kvm_nested_state) + getpagesize(); @@ -96,14 +95,14 @@ void test_vmx_nested_state(struct kvm_vm *vm) /* The format must be set to 0. 0 for VMX, 1 for SVM. */ set_default_vmx_state(state, state_sz); state->format = 1; - test_nested_state_expect_einval(vm, state); + test_nested_state_expect_einval(vcpu, state); /* * We cannot virtualize anything if the guest does not have VMX * enabled. */ set_default_vmx_state(state, state_sz); - test_nested_state_expect_einval(vm, state); + test_nested_state_expect_einval(vcpu, state); /* * We cannot virtualize anything if the guest does not have VMX @@ -112,17 +111,17 @@ void test_vmx_nested_state(struct kvm_vm *vm) */ set_default_vmx_state(state, state_sz); state->hdr.vmx.vmxon_pa = -1ull; - test_nested_state_expect_einval(vm, state); + test_nested_state_expect_einval(vcpu, state); state->hdr.vmx.vmcs12_pa = -1ull; state->flags = KVM_STATE_NESTED_EVMCS; - test_nested_state_expect_einval(vm, state); + test_nested_state_expect_einval(vcpu, state); state->flags = 0; - test_nested_state(vm, state); + test_nested_state(vcpu, state); /* Enable VMX in the guest CPUID. */ - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + vcpu_set_cpuid_feature(vcpu, X86_FEATURE_VMX); /* * Setting vmxon_pa == -1ull and vmcs_pa == -1ull exits early without @@ -133,34 +132,34 @@ void test_vmx_nested_state(struct kvm_vm *vm) set_default_vmx_state(state, state_sz); state->hdr.vmx.vmxon_pa = -1ull; state->hdr.vmx.vmcs12_pa = -1ull; - test_nested_state_expect_einval(vm, state); + test_nested_state_expect_einval(vcpu, state); state->flags &= KVM_STATE_NESTED_EVMCS; if (have_evmcs) { - test_nested_state_expect_einval(vm, state); - vcpu_enable_evmcs(vm, VCPU_ID); + test_nested_state_expect_einval(vcpu, state); + vcpu_enable_evmcs(vcpu); } - test_nested_state(vm, state); + test_nested_state(vcpu, state); /* It is invalid to have vmxon_pa == -1ull and SMM flags non-zero. */ state->hdr.vmx.smm.flags = 1; - test_nested_state_expect_einval(vm, state); + test_nested_state_expect_einval(vcpu, state); /* Invalid flags are rejected. */ set_default_vmx_state(state, state_sz); state->hdr.vmx.flags = ~0; - test_nested_state_expect_einval(vm, state); + test_nested_state_expect_einval(vcpu, state); /* It is invalid to have vmxon_pa == -1ull and vmcs_pa != -1ull. */ set_default_vmx_state(state, state_sz); state->hdr.vmx.vmxon_pa = -1ull; state->flags = 0; - test_nested_state_expect_einval(vm, state); + test_nested_state_expect_einval(vcpu, state); /* It is invalid to have vmxon_pa set to a non-page aligned address. */ set_default_vmx_state(state, state_sz); state->hdr.vmx.vmxon_pa = 1; - test_nested_state_expect_einval(vm, state); + test_nested_state_expect_einval(vcpu, state); /* * It is invalid to have KVM_STATE_NESTED_SMM_GUEST_MODE and @@ -170,7 +169,7 @@ void test_vmx_nested_state(struct kvm_vm *vm) state->flags = KVM_STATE_NESTED_GUEST_MODE | KVM_STATE_NESTED_RUN_PENDING; state->hdr.vmx.smm.flags = KVM_STATE_NESTED_SMM_GUEST_MODE; - test_nested_state_expect_einval(vm, state); + test_nested_state_expect_einval(vcpu, state); /* * It is invalid to have any of the SMM flags set besides: @@ -180,13 +179,13 @@ void test_vmx_nested_state(struct kvm_vm *vm) set_default_vmx_state(state, state_sz); state->hdr.vmx.smm.flags = ~(KVM_STATE_NESTED_SMM_GUEST_MODE | KVM_STATE_NESTED_SMM_VMXON); - test_nested_state_expect_einval(vm, state); + test_nested_state_expect_einval(vcpu, state); /* Outside SMM, SMM flags must be zero. */ set_default_vmx_state(state, state_sz); state->flags = 0; state->hdr.vmx.smm.flags = KVM_STATE_NESTED_SMM_GUEST_MODE; - test_nested_state_expect_einval(vm, state); + test_nested_state_expect_einval(vcpu, state); /* * Size must be large enough to fit kvm_nested_state and vmcs12 @@ -195,13 +194,13 @@ void test_vmx_nested_state(struct kvm_vm *vm) set_default_vmx_state(state, state_sz); state->size = sizeof(*state); state->flags = 0; - test_nested_state_expect_einval(vm, state); + test_nested_state_expect_einval(vcpu, state); set_default_vmx_state(state, state_sz); state->size = sizeof(*state); state->flags = 0; state->hdr.vmx.vmcs12_pa = -1; - test_nested_state(vm, state); + test_nested_state(vcpu, state); /* * KVM_SET_NESTED_STATE succeeds with invalid VMCS @@ -209,7 +208,7 @@ void test_vmx_nested_state(struct kvm_vm *vm) */ set_default_vmx_state(state, state_sz); state->flags = 0; - test_nested_state(vm, state); + test_nested_state(vcpu, state); /* Invalid flags are rejected, even if no VMCS loaded. */ set_default_vmx_state(state, state_sz); @@ -217,13 +216,13 @@ void test_vmx_nested_state(struct kvm_vm *vm) state->flags = 0; state->hdr.vmx.vmcs12_pa = -1; state->hdr.vmx.flags = ~0; - test_nested_state_expect_einval(vm, state); + test_nested_state_expect_einval(vcpu, state); /* vmxon_pa cannot be the same address as vmcs_pa. */ set_default_vmx_state(state, state_sz); state->hdr.vmx.vmxon_pa = 0; state->hdr.vmx.vmcs12_pa = 0; - test_nested_state_expect_einval(vm, state); + test_nested_state_expect_einval(vcpu, state); /* * Test that if we leave nesting the state reflects that when we get @@ -233,8 +232,8 @@ void test_vmx_nested_state(struct kvm_vm *vm) state->hdr.vmx.vmxon_pa = -1ull; state->hdr.vmx.vmcs12_pa = -1ull; state->flags = 0; - test_nested_state(vm, state); - vcpu_nested_state_get(vm, VCPU_ID, state); + test_nested_state(vcpu, state); + vcpu_nested_state_get(vcpu, state); TEST_ASSERT(state->size >= sizeof(*state) && state->size <= state_sz, "Size must be between %ld and %d. The size returned was %d.", sizeof(*state), state_sz, state->size); @@ -244,54 +243,36 @@ void test_vmx_nested_state(struct kvm_vm *vm) free(state); } -void disable_vmx(struct kvm_vm *vm) -{ - struct kvm_cpuid2 *cpuid = kvm_get_supported_cpuid(); - int i; - - for (i = 0; i < cpuid->nent; ++i) - if (cpuid->entries[i].function == 1 && - cpuid->entries[i].index == 0) - break; - TEST_ASSERT(i != cpuid->nent, "CPUID function 1 not found"); - - cpuid->entries[i].ecx &= ~CPUID_VMX; - vcpu_set_cpuid(vm, VCPU_ID, cpuid); - cpuid->entries[i].ecx |= CPUID_VMX; -} - int main(int argc, char *argv[]) { struct kvm_vm *vm; struct kvm_nested_state state; + struct kvm_vcpu *vcpu; have_evmcs = kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS); - if (!kvm_check_cap(KVM_CAP_NESTED_STATE)) { - print_skip("KVM_CAP_NESTED_STATE not available"); - exit(KSFT_SKIP); - } + TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE)); /* * AMD currently does not implement set_nested_state, so for now we * just early out. */ - nested_vmx_check_supported(); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); - vm = vm_create_default(VCPU_ID, 0, 0); + vm = vm_create_with_one_vcpu(&vcpu, NULL); /* * First run tests with VMX disabled to check error handling. */ - disable_vmx(vm); + vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_VMX); /* Passing a NULL kvm_nested_state causes a EFAULT. */ - test_nested_state_expect_efault(vm, NULL); + test_nested_state_expect_efault(vcpu, NULL); /* 'size' cannot be smaller than sizeof(kvm_nested_state). */ set_default_state(&state); state.size = 0; - test_nested_state_expect_einval(vm, &state); + test_nested_state_expect_einval(vcpu, &state); /* * Setting the flags 0xf fails the flags check. The only flags that @@ -302,7 +283,7 @@ int main(int argc, char *argv[]) */ set_default_state(&state); state.flags = 0xf; - test_nested_state_expect_einval(vm, &state); + test_nested_state_expect_einval(vcpu, &state); /* * If KVM_STATE_NESTED_RUN_PENDING is set then @@ -310,9 +291,9 @@ int main(int argc, char *argv[]) */ set_default_state(&state); state.flags = KVM_STATE_NESTED_RUN_PENDING; - test_nested_state_expect_einval(vm, &state); + test_nested_state_expect_einval(vcpu, &state); - test_vmx_nested_state(vm); + test_vmx_nested_state(vcpu); kvm_vm_free(vm); return 0; diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c index 19b35c607dc6..5943187e8594 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c @@ -32,8 +32,6 @@ #define MSR_IA32_TSC_ADJUST 0x3b #endif -#define VCPU_ID 5 - #define TSC_ADJUST_VALUE (1ll << 32) #define TSC_OFFSET_VALUE -(1ll << 48) @@ -127,28 +125,29 @@ static void report(int64_t val) int main(int argc, char *argv[]) { vm_vaddr_t vmx_pages_gva; + struct kvm_vcpu *vcpu; - nested_vmx_check_supported(); + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); - vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code); + vm = vm_create_with_one_vcpu(&vcpu, (void *) l1_guest_code); /* Allocate VMX pages and shared descriptors (vmx_pages). */ vcpu_alloc_vmx(vm, &vmx_pages_gva); - vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); + vcpu_args_set(vcpu, 1, vmx_pages_gva); for (;;) { - volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); + volatile struct kvm_run *run = vcpu->run; struct ucall uc; - vcpu_run(vm, VCPU_ID); + vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n", run->exit_reason, exit_reason_str(run->exit_reason)); - switch (get_ucall(vm, VCPU_ID, &uc)) { + switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: - TEST_FAIL("%s", (const char *)uc.args[0]); + REPORT_GUEST_ASSERT(uc); /* NOT REACHED */ case UCALL_SYNC: report(uc.args[1]); diff --git a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c index afbbc40df884..3d272d7f961e 100644 --- a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c +++ b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c @@ -39,9 +39,6 @@ /* Default delay between migrate_pages calls (microseconds) */ #define DEFAULT_DELAY_USECS 500000 -#define HALTER_VCPU_ID 0 -#define SENDER_VCPU_ID 1 - /* * Vector for IPI from sender vCPU to halting vCPU. * Value is arbitrary and was chosen for the alternating bit pattern. Any @@ -79,8 +76,7 @@ struct test_data_page { struct thread_params { struct test_data_page *data; - struct kvm_vm *vm; - uint32_t vcpu_id; + struct kvm_vcpu *vcpu; uint64_t *pipis_rcvd; /* host address of ipis_rcvd global */ }; @@ -198,6 +194,7 @@ static void sender_guest_code(struct test_data_page *data) static void *vcpu_thread(void *arg) { struct thread_params *params = (struct thread_params *)arg; + struct kvm_vcpu *vcpu = params->vcpu; struct ucall uc; int old; int r; @@ -206,17 +203,17 @@ static void *vcpu_thread(void *arg) r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old); TEST_ASSERT(r == 0, "pthread_setcanceltype failed on vcpu_id=%u with errno=%d", - params->vcpu_id, r); + vcpu->id, r); - fprintf(stderr, "vCPU thread running vCPU %u\n", params->vcpu_id); - vcpu_run(params->vm, params->vcpu_id); - exit_reason = vcpu_state(params->vm, params->vcpu_id)->exit_reason; + fprintf(stderr, "vCPU thread running vCPU %u\n", vcpu->id); + vcpu_run(vcpu); + exit_reason = vcpu->run->exit_reason; TEST_ASSERT(exit_reason == KVM_EXIT_IO, "vCPU %u exited with unexpected exit reason %u-%s, expected KVM_EXIT_IO", - params->vcpu_id, exit_reason, exit_reason_str(exit_reason)); + vcpu->id, exit_reason, exit_reason_str(exit_reason)); - if (get_ucall(params->vm, params->vcpu_id, &uc) == UCALL_ABORT) { + if (get_ucall(vcpu, &uc) == UCALL_ABORT) { TEST_ASSERT(false, "vCPU %u exited with error: %s.\n" "Sending vCPU sent %lu IPIs to halting vCPU\n" @@ -224,7 +221,7 @@ static void *vcpu_thread(void *arg) "Halter TPR=%#x PPR=%#x LVR=%#x\n" "Migrations attempted: %lu\n" "Migrations completed: %lu\n", - params->vcpu_id, (const char *)uc.args[0], + vcpu->id, (const char *)uc.args[0], params->data->ipis_sent, params->data->hlt_count, params->data->wake_count, *params->pipis_rcvd, params->data->halter_tpr, @@ -236,7 +233,7 @@ static void *vcpu_thread(void *arg) return NULL; } -static void cancel_join_vcpu_thread(pthread_t thread, uint32_t vcpu_id) +static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu) { void *retval; int r; @@ -244,12 +241,12 @@ static void cancel_join_vcpu_thread(pthread_t thread, uint32_t vcpu_id) r = pthread_cancel(thread); TEST_ASSERT(r == 0, "pthread_cancel on vcpu_id=%d failed with errno=%d", - vcpu_id, r); + vcpu->id, r); r = pthread_join(thread, &retval); TEST_ASSERT(r == 0, "pthread_join on vcpu_id=%d failed with errno=%d", - vcpu_id, r); + vcpu->id, r); TEST_ASSERT(retval == PTHREAD_CANCELED, "expected retval=%p, got %p", PTHREAD_CANCELED, retval); @@ -415,34 +412,30 @@ int main(int argc, char *argv[]) if (delay_usecs <= 0) delay_usecs = DEFAULT_DELAY_USECS; - vm = vm_create_default(HALTER_VCPU_ID, 0, halter_guest_code); - params[0].vm = vm; - params[1].vm = vm; + vm = vm_create_with_one_vcpu(¶ms[0].vcpu, halter_guest_code); vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vm, HALTER_VCPU_ID); + vcpu_init_descriptor_tables(params[0].vcpu); vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler); virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); - vm_vcpu_add_default(vm, SENDER_VCPU_ID, sender_guest_code); + params[1].vcpu = vm_vcpu_add(vm, 1, sender_guest_code); test_data_page_vaddr = vm_vaddr_alloc_page(vm); - data = - (struct test_data_page *)addr_gva2hva(vm, test_data_page_vaddr); + data = addr_gva2hva(vm, test_data_page_vaddr); memset(data, 0, sizeof(*data)); params[0].data = data; params[1].data = data; - vcpu_args_set(vm, HALTER_VCPU_ID, 1, test_data_page_vaddr); - vcpu_args_set(vm, SENDER_VCPU_ID, 1, test_data_page_vaddr); + vcpu_args_set(params[0].vcpu, 1, test_data_page_vaddr); + vcpu_args_set(params[1].vcpu, 1, test_data_page_vaddr); pipis_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ipis_rcvd); params[0].pipis_rcvd = pipis_rcvd; params[1].pipis_rcvd = pipis_rcvd; /* Start halter vCPU thread and wait for it to execute first HLT. */ - params[0].vcpu_id = HALTER_VCPU_ID; r = pthread_create(&threads[0], NULL, vcpu_thread, ¶ms[0]); TEST_ASSERT(r == 0, "pthread_create halter failed errno=%d", errno); @@ -462,7 +455,6 @@ int main(int argc, char *argv[]) "Halter vCPU thread reported its APIC ID: %u after %d seconds.\n", data->halter_apic_id, wait_secs); - params[1].vcpu_id = SENDER_VCPU_ID; r = pthread_create(&threads[1], NULL, vcpu_thread, ¶ms[1]); TEST_ASSERT(r == 0, "pthread_create sender failed errno=%d", errno); @@ -478,8 +470,8 @@ int main(int argc, char *argv[]) /* * Cancel threads and wait for them to stop. */ - cancel_join_vcpu_thread(threads[0], HALTER_VCPU_ID); - cancel_join_vcpu_thread(threads[1], SENDER_VCPU_ID); + cancel_join_vcpu_thread(threads[0], params[0].vcpu); + cancel_join_vcpu_thread(threads[1], params[1].vcpu); fprintf(stderr, "Test successful after running for %d seconds.\n" diff --git a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c index 0792334ba243..6f7a5ef66718 100644 --- a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c +++ b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c @@ -11,8 +11,8 @@ #include "processor.h" #include "test_util.h" -struct kvm_vcpu { - uint32_t id; +struct xapic_vcpu { + struct kvm_vcpu *vcpu; bool is_x2apic; }; @@ -47,8 +47,9 @@ static void x2apic_guest_code(void) } while (1); } -static void ____test_icr(struct kvm_vm *vm, struct kvm_vcpu *vcpu, uint64_t val) +static void ____test_icr(struct xapic_vcpu *x, uint64_t val) { + struct kvm_vcpu *vcpu = x->vcpu; struct kvm_lapic_state xapic; struct ucall uc; uint64_t icr; @@ -58,40 +59,55 @@ static void ____test_icr(struct kvm_vm *vm, struct kvm_vcpu *vcpu, uint64_t val) * all bits are valid and should not be modified by KVM (ignoring the * fact that vectors 0-15 are technically illegal). */ - vcpu_ioctl(vm, vcpu->id, KVM_GET_LAPIC, &xapic); + vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic); *((u32 *)&xapic.regs[APIC_IRR]) = val; *((u32 *)&xapic.regs[APIC_IRR + 0x10]) = val >> 32; - vcpu_ioctl(vm, vcpu->id, KVM_SET_LAPIC, &xapic); + vcpu_ioctl(vcpu, KVM_SET_LAPIC, &xapic); - vcpu_run(vm, vcpu->id); - ASSERT_EQ(get_ucall(vm, vcpu->id, &uc), UCALL_SYNC); + vcpu_run(vcpu); + ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC); ASSERT_EQ(uc.args[1], val); - vcpu_ioctl(vm, vcpu->id, KVM_GET_LAPIC, &xapic); + vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic); icr = (u64)(*((u32 *)&xapic.regs[APIC_ICR])) | (u64)(*((u32 *)&xapic.regs[APIC_ICR2])) << 32; - if (!vcpu->is_x2apic) + if (!x->is_x2apic) { val &= (-1u | (0xffull << (32 + 24))); - ASSERT_EQ(icr, val & ~APIC_ICR_BUSY); + ASSERT_EQ(icr, val & ~APIC_ICR_BUSY); + } else { + ASSERT_EQ(icr & ~APIC_ICR_BUSY, val & ~APIC_ICR_BUSY); + } } -static void __test_icr(struct kvm_vm *vm, struct kvm_vcpu *vcpu, uint64_t val) +#define X2APIC_RSVED_BITS_MASK (GENMASK_ULL(31,20) | \ + GENMASK_ULL(17,16) | \ + GENMASK_ULL(13,13)) + +static void __test_icr(struct xapic_vcpu *x, uint64_t val) { - ____test_icr(vm, vcpu, val | APIC_ICR_BUSY); - ____test_icr(vm, vcpu, val & ~(u64)APIC_ICR_BUSY); + if (x->is_x2apic) { + /* Hardware writing vICR register requires reserved bits 31:20, + * 17:16 and 13 kept as zero to avoid #GP exception. Data value + * written to vICR should mask out those bits above. + */ + val &= ~X2APIC_RSVED_BITS_MASK; + } + ____test_icr(x, val | APIC_ICR_BUSY); + ____test_icr(x, val & ~(u64)APIC_ICR_BUSY); } -static void test_icr(struct kvm_vm *vm, struct kvm_vcpu *vcpu) +static void test_icr(struct xapic_vcpu *x) { + struct kvm_vcpu *vcpu = x->vcpu; uint64_t icr, i, j; icr = APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_FIXED; for (i = 0; i <= 0xff; i++) - __test_icr(vm, vcpu, icr | i); + __test_icr(x, icr | i); icr = APIC_INT_ASSERT | APIC_DM_FIXED; for (i = 0; i <= 0xff; i++) - __test_icr(vm, vcpu, icr | i); + __test_icr(x, icr | i); /* * Send all flavors of IPIs to non-existent vCPUs. TODO: use number of @@ -100,32 +116,30 @@ static void test_icr(struct kvm_vm *vm, struct kvm_vcpu *vcpu) icr = APIC_INT_ASSERT | 0xff; for (i = vcpu->id + 1; i < 0xff; i++) { for (j = 0; j < 8; j++) - __test_icr(vm, vcpu, i << (32 + 24) | APIC_INT_ASSERT | (j << 8)); + __test_icr(x, i << (32 + 24) | icr | (j << 8)); } /* And again with a shorthand destination for all types of IPIs. */ icr = APIC_DEST_ALLBUT | APIC_INT_ASSERT; for (i = 0; i < 8; i++) - __test_icr(vm, vcpu, icr | (i << 8)); + __test_icr(x, icr | (i << 8)); /* And a few garbage value, just make sure it's an IRQ (blocked). */ - __test_icr(vm, vcpu, 0xa5a5a5a5a5a5a5a5 & ~APIC_DM_FIXED_MASK); - __test_icr(vm, vcpu, 0x5a5a5a5a5a5a5a5a & ~APIC_DM_FIXED_MASK); - __test_icr(vm, vcpu, -1ull & ~APIC_DM_FIXED_MASK); + __test_icr(x, 0xa5a5a5a5a5a5a5a5 & ~APIC_DM_FIXED_MASK); + __test_icr(x, 0x5a5a5a5a5a5a5a5a & ~APIC_DM_FIXED_MASK); + __test_icr(x, -1ull & ~APIC_DM_FIXED_MASK); } int main(int argc, char *argv[]) { - struct kvm_vcpu vcpu = { - .id = 0, + struct xapic_vcpu x = { + .vcpu = NULL, .is_x2apic = true, }; - struct kvm_cpuid2 *cpuid; struct kvm_vm *vm; - int i; - vm = vm_create_default(vcpu.id, 0, x2apic_guest_code); - test_icr(vm, &vcpu); + vm = vm_create_with_one_vcpu(&x.vcpu, x2apic_guest_code); + test_icr(&x); kvm_vm_free(vm); /* @@ -133,18 +147,12 @@ int main(int argc, char *argv[]) * the guest in order to test AVIC. KVM disallows changing CPUID after * KVM_RUN and AVIC is disabled if _any_ vCPU is allowed to use x2APIC. */ - vm = vm_create_default(vcpu.id, 0, xapic_guest_code); - vcpu.is_x2apic = false; + vm = vm_create_with_one_vcpu(&x.vcpu, xapic_guest_code); + x.is_x2apic = false; - cpuid = vcpu_get_cpuid(vm, vcpu.id); - for (i = 0; i < cpuid->nent; i++) { - if (cpuid->entries[i].function == 1) - break; - } - cpuid->entries[i].ecx &= ~BIT(21); - vcpu_set_cpuid(vm, vcpu.id, cpuid); + vcpu_clear_cpuid_feature(x.vcpu, X86_FEATURE_X2APIC); virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); - test_icr(vm, &vcpu); + test_icr(&x); kvm_vm_free(vm); } diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c index 7a51bb648fbb..8a5cb800f50e 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c @@ -18,8 +18,6 @@ #include <sys/eventfd.h> -#define VCPU_ID 5 - #define SHINFO_REGION_GVA 0xc0000000ULL #define SHINFO_REGION_GPA 0xc0000000ULL #define SHINFO_REGION_SLOT 10 @@ -42,8 +40,6 @@ #define EVTCHN_TEST2 66 #define EVTCHN_TIMER 13 -static struct kvm_vm *vm; - #define XEN_HYPERCALL_MSR 0x40000000 #define MIN_STEAL_TIME 50000 @@ -344,29 +340,29 @@ static int cmp_timespec(struct timespec *a, struct timespec *b) else return 0; } -struct vcpu_info *vinfo; + +static struct vcpu_info *vinfo; +static struct kvm_vcpu *vcpu; static void handle_alrm(int sig) { if (vinfo) printf("evtchn_upcall_pending 0x%x\n", vinfo->evtchn_upcall_pending); - vcpu_dump(stdout, vm, VCPU_ID, 0); + vcpu_dump(stdout, vcpu, 0); TEST_FAIL("IRQ delivery timed out"); } int main(int argc, char *argv[]) { struct timespec min_ts, max_ts, vm_ts; + struct kvm_vm *vm; bool verbose; verbose = argc > 1 && (!strncmp(argv[1], "-v", 3) || !strncmp(argv[1], "--verbose", 10)); int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM); - if (!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO) ) { - print_skip("KVM_XEN_HVM_CONFIG_SHARED_INFO not available"); - exit(KSFT_SKIP); - } + TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO); bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE); bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL); @@ -374,8 +370,7 @@ int main(int argc, char *argv[]) clock_gettime(CLOCK_REALTIME, &min_ts); - vm = vm_create_default(VCPU_ID, 0, (void *) guest_code); - vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + vm = vm_create_with_one_vcpu(&vcpu, guest_code); /* Map a region for the shared_info page */ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, @@ -425,13 +420,13 @@ int main(int argc, char *argv[]) .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, .u.gpa = VCPU_INFO_ADDR, }; - vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &vi); + vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &vi); struct kvm_xen_vcpu_attr pvclock = { .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO, .u.gpa = PVTIME_ADDR, }; - vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &pvclock); + vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &pvclock); struct kvm_xen_hvm_attr vec = { .type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR, @@ -440,7 +435,7 @@ int main(int argc, char *argv[]) vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &vec); vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vm, VCPU_ID); + vcpu_init_descriptor_tables(vcpu); vm_install_exception_handler(vm, EVTCHN_VECTOR, evtchn_handler); if (do_runstate_tests) { @@ -448,7 +443,7 @@ int main(int argc, char *argv[]) .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR, .u.gpa = RUNSTATE_ADDR, }; - vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &st); + vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st); } int irq_fd[2] = { -1, -1 }; @@ -468,16 +463,16 @@ int main(int argc, char *argv[]) irq_routes.entries[0].gsi = 32; irq_routes.entries[0].type = KVM_IRQ_ROUTING_XEN_EVTCHN; irq_routes.entries[0].u.xen_evtchn.port = EVTCHN_TEST1; - irq_routes.entries[0].u.xen_evtchn.vcpu = VCPU_ID; + irq_routes.entries[0].u.xen_evtchn.vcpu = vcpu->id; irq_routes.entries[0].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; irq_routes.entries[1].gsi = 33; irq_routes.entries[1].type = KVM_IRQ_ROUTING_XEN_EVTCHN; irq_routes.entries[1].u.xen_evtchn.port = EVTCHN_TEST2; - irq_routes.entries[1].u.xen_evtchn.vcpu = VCPU_ID; + irq_routes.entries[1].u.xen_evtchn.vcpu = vcpu->id; irq_routes.entries[1].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; - vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes); + vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes.info); struct kvm_irqfd ifd = { }; @@ -508,14 +503,14 @@ int main(int argc, char *argv[]) .u.evtchn.type = EVTCHNSTAT_interdomain, .u.evtchn.flags = 0, .u.evtchn.deliver.port.port = EVTCHN_TEST1, - .u.evtchn.deliver.port.vcpu = VCPU_ID + 1, + .u.evtchn.deliver.port.vcpu = vcpu->id + 1, .u.evtchn.deliver.port.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL, }; vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj); /* Test migration to a different vCPU */ inj.u.evtchn.flags = KVM_XEN_EVTCHN_UPDATE; - inj.u.evtchn.deliver.port.vcpu = VCPU_ID; + inj.u.evtchn.deliver.port.vcpu = vcpu->id; vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj); inj.u.evtchn.send_port = 197; @@ -524,7 +519,7 @@ int main(int argc, char *argv[]) inj.u.evtchn.flags = 0; vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj); - vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &tmr); + vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); } vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR); vinfo->evtchn_upcall_pending = 0; @@ -535,19 +530,19 @@ int main(int argc, char *argv[]) bool evtchn_irq_expected = false; for (;;) { - volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); + volatile struct kvm_run *run = vcpu->run; struct ucall uc; - vcpu_run(vm, VCPU_ID); + vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n", run->exit_reason, exit_reason_str(run->exit_reason)); - switch (get_ucall(vm, VCPU_ID, &uc)) { + switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: - TEST_FAIL("%s", (const char *)uc.args[0]); + REPORT_GUEST_ASSERT(uc); /* NOT REACHED */ case UCALL_SYNC: { struct kvm_xen_vcpu_attr rst; @@ -574,7 +569,7 @@ int main(int argc, char *argv[]) printf("Testing runstate %s\n", runstate_names[uc.args[1]]); rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT; rst.u.runstate.state = uc.args[1]; - vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst); + vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst); break; case 4: @@ -589,7 +584,7 @@ int main(int argc, char *argv[]) 0x6b6b - rs->time[RUNSTATE_offline]; rst.u.runstate.time_runnable = -rst.u.runstate.time_blocked - rst.u.runstate.time_offline; - vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst); + vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst); break; case 5: @@ -601,7 +596,7 @@ int main(int argc, char *argv[]) rst.u.runstate.state_entry_time = 0x6b6b + 0x5a; rst.u.runstate.time_blocked = 0x6b6b; rst.u.runstate.time_offline = 0x5a; - vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst); + vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst); break; case 6: @@ -660,7 +655,7 @@ int main(int argc, char *argv[]) struct kvm_irq_routing_xen_evtchn e; e.port = EVTCHN_TEST2; - e.vcpu = VCPU_ID; + e.vcpu = vcpu->id; e.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &e); @@ -702,7 +697,7 @@ int main(int argc, char *argv[]) case 14: memset(&tmr, 0, sizeof(tmr)); tmr.type = KVM_XEN_VCPU_ATTR_TYPE_TIMER; - vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &tmr); + vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr); TEST_ASSERT(tmr.u.timer.port == EVTCHN_TIMER, "Timer port not returned"); TEST_ASSERT(tmr.u.timer.priority == KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL, @@ -721,8 +716,8 @@ int main(int argc, char *argv[]) if (verbose) printf("Testing restored oneshot timer\n"); - tmr.u.timer.expires_ns = rs->state_entry_time + 100000000, - vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &tmr); + tmr.u.timer.expires_ns = rs->state_entry_time + 100000000; + vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); evtchn_irq_expected = true; alarm(1); break; @@ -748,8 +743,8 @@ int main(int argc, char *argv[]) if (verbose) printf("Testing SCHEDOP_poll wake on masked event\n"); - tmr.u.timer.expires_ns = rs->state_entry_time + 100000000, - vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &tmr); + tmr.u.timer.expires_ns = rs->state_entry_time + 100000000; + vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); alarm(1); break; @@ -760,11 +755,11 @@ int main(int argc, char *argv[]) evtchn_irq_expected = true; tmr.u.timer.expires_ns = rs->state_entry_time + 100000000; - vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &tmr); + vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); /* Read it back and check the pending time is reported correctly */ tmr.u.timer.expires_ns = 0; - vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &tmr); + vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr); TEST_ASSERT(tmr.u.timer.expires_ns == rs->state_entry_time + 100000000, "Timer not reported pending"); alarm(1); @@ -774,7 +769,7 @@ int main(int argc, char *argv[]) TEST_ASSERT(!evtchn_irq_expected, "Expected event channel IRQ but it didn't happen"); /* Read timer and check it is no longer pending */ - vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &tmr); + vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr); TEST_ASSERT(!tmr.u.timer.expires_ns, "Timer still reported pending"); shinfo->evtchn_pending[0] = 0; @@ -783,7 +778,7 @@ int main(int argc, char *argv[]) evtchn_irq_expected = true; tmr.u.timer.expires_ns = rs->state_entry_time - 100000000ULL; - vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &tmr); + vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr); alarm(1); break; @@ -853,7 +848,7 @@ int main(int argc, char *argv[]) struct kvm_xen_vcpu_attr rst = { .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA, }; - vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &rst); + vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &rst); if (verbose) { printf("Runstate: %s(%d), entry %" PRIu64 " ns\n", diff --git a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c index b30fe9de1d4f..88914d48c65e 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c +++ b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c @@ -11,13 +11,9 @@ #include "kvm_util.h" #include "processor.h" -#define VCPU_ID 5 - #define HCALL_REGION_GPA 0xc0000000ULL #define HCALL_REGION_SLOT 10 -static struct kvm_vm *vm; - #define INPUTVALUE 17 #define ARGVALUE(x) (0xdeadbeef5a5a0000UL + x) #define RETVALUE 0xcafef00dfbfbffffUL @@ -84,14 +80,15 @@ static void guest_code(void) int main(int argc, char *argv[]) { - if (!(kvm_check_cap(KVM_CAP_XEN_HVM) & - KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL) ) { - print_skip("KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL not available"); - exit(KSFT_SKIP); - } + unsigned int xen_caps; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM); + TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL); - vm = vm_create_default(VCPU_ID, 0, (void *) guest_code); - vcpu_set_hv_cpuid(vm, VCPU_ID); + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + vcpu_set_hv_cpuid(vcpu); struct kvm_xen_hvm_config hvmc = { .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL, @@ -105,10 +102,10 @@ int main(int argc, char *argv[]) virt_map(vm, HCALL_REGION_GPA, HCALL_REGION_GPA, 2); for (;;) { - volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); + volatile struct kvm_run *run = vcpu->run; struct ucall uc; - vcpu_run(vm, VCPU_ID); + vcpu_run(vcpu); if (run->exit_reason == KVM_EXIT_XEN) { ASSERT_EQ(run->xen.type, KVM_EXIT_XEN_HCALL); @@ -130,9 +127,9 @@ int main(int argc, char *argv[]) run->exit_reason, exit_reason_str(run->exit_reason)); - switch (get_ucall(vm, VCPU_ID, &uc)) { + switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: - TEST_FAIL("%s", (const char *)uc.args[0]); + REPORT_GUEST_ASSERT(uc); /* NOT REACHED */ case UCALL_SYNC: break; diff --git a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c index 3529376747c2..e0ddf47362e7 100644 --- a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c +++ b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c @@ -12,64 +12,44 @@ #include "kvm_util.h" #include "vmx.h" -#define VCPU_ID 1 #define MSR_BITS 64 -#define X86_FEATURE_XSAVES (1<<3) - -bool is_supported_msr(u32 msr_index) -{ - struct kvm_msr_list *list; - bool found = false; - int i; - - list = kvm_get_msr_index_list(); - for (i = 0; i < list->nmsrs; ++i) { - if (list->indices[i] == msr_index) { - found = true; - break; - } - } - - free(list); - return found; -} - int main(int argc, char *argv[]) { - struct kvm_cpuid_entry2 *entry; - bool xss_supported = false; + bool xss_in_msr_list; struct kvm_vm *vm; + struct kvm_vcpu *vcpu; uint64_t xss_val; int i, r; /* Create VM */ - vm = vm_create_default(VCPU_ID, 0, 0); + vm = vm_create_with_one_vcpu(&vcpu, NULL); - if (kvm_get_cpuid_max_basic() >= 0xd) { - entry = kvm_get_supported_cpuid_index(0xd, 1); - xss_supported = entry && !!(entry->eax & X86_FEATURE_XSAVES); - } - if (!xss_supported) { - print_skip("IA32_XSS is not supported by the vCPU"); - exit(KSFT_SKIP); - } + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVES)); - xss_val = vcpu_get_msr(vm, VCPU_ID, MSR_IA32_XSS); + xss_val = vcpu_get_msr(vcpu, MSR_IA32_XSS); TEST_ASSERT(xss_val == 0, "MSR_IA32_XSS should be initialized to zero\n"); - vcpu_set_msr(vm, VCPU_ID, MSR_IA32_XSS, xss_val); + vcpu_set_msr(vcpu, MSR_IA32_XSS, xss_val); + /* * At present, KVM only supports a guest IA32_XSS value of 0. Verify * that trying to set the guest IA32_XSS to an unsupported value fails. * Also, in the future when a non-zero value succeeds check that - * IA32_XSS is in the KVM_GET_MSR_INDEX_LIST. + * IA32_XSS is in the list of MSRs to save/restore. */ + xss_in_msr_list = kvm_msr_is_in_save_restore_list(MSR_IA32_XSS); for (i = 0; i < MSR_BITS; ++i) { - r = _vcpu_set_msr(vm, VCPU_ID, MSR_IA32_XSS, 1ull << i); - TEST_ASSERT(r == 0 || is_supported_msr(MSR_IA32_XSS), - "IA32_XSS was able to be set, but was not found in KVM_GET_MSR_INDEX_LIST.\n"); + r = _vcpu_set_msr(vcpu, MSR_IA32_XSS, 1ull << i); + + /* + * Setting a list of MSRs returns the entry that "faulted", or + * the last entry +1 if all MSRs were successfully written. + */ + TEST_ASSERT(!r || r == 1, KVM_IOCTL_ERROR(KVM_SET_MSRS, r)); + TEST_ASSERT(r != 1 || xss_in_msr_list, + "IA32_XSS was able to be set, but was not in save/restore list"); } kvm_vm_free(vm); diff --git a/tools/testing/selftests/landlock/Makefile b/tools/testing/selftests/landlock/Makefile index 0b0049e133bb..a6959df28eb0 100644 --- a/tools/testing/selftests/landlock/Makefile +++ b/tools/testing/selftests/landlock/Makefile @@ -8,17 +8,11 @@ TEST_GEN_PROGS := $(src_test:.c=) TEST_GEN_PROGS_EXTENDED := true -KSFT_KHDR_INSTALL := 1 OVERRIDE_TARGETS := 1 include ../lib.mk -khdr_dir = $(top_srcdir)/usr/include - -$(khdr_dir)/linux/landlock.h: khdr - @: - $(OUTPUT)/true: true.c $(LINK.c) $< $(LDLIBS) -o $@ -static -$(OUTPUT)/%_test: %_test.c $(khdr_dir)/linux/landlock.h ../kselftest_harness.h common.h - $(LINK.c) $< $(LDLIBS) -o $@ -lcap -I$(khdr_dir) +$(OUTPUT)/%_test: %_test.c ../kselftest_harness.h common.h + $(LINK.c) $< $(LDLIBS) -o $@ -lcap diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 2a2d240cdc1b..947fc72413e9 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -7,10 +7,31 @@ else ifneq ($(filter -%,$(LLVM)),) LLVM_SUFFIX := $(LLVM) endif -CC := $(LLVM_PREFIX)clang$(LLVM_SUFFIX) +CLANG_TARGET_FLAGS_arm := arm-linux-gnueabi +CLANG_TARGET_FLAGS_arm64 := aarch64-linux-gnu +CLANG_TARGET_FLAGS_hexagon := hexagon-linux-musl +CLANG_TARGET_FLAGS_m68k := m68k-linux-gnu +CLANG_TARGET_FLAGS_mips := mipsel-linux-gnu +CLANG_TARGET_FLAGS_powerpc := powerpc64le-linux-gnu +CLANG_TARGET_FLAGS_riscv := riscv64-linux-gnu +CLANG_TARGET_FLAGS_s390 := s390x-linux-gnu +CLANG_TARGET_FLAGS_x86 := x86_64-linux-gnu +CLANG_TARGET_FLAGS := $(CLANG_TARGET_FLAGS_$(ARCH)) + +ifeq ($(CROSS_COMPILE),) +ifeq ($(CLANG_TARGET_FLAGS),) +$(error Specify CROSS_COMPILE or add '--target=' option to lib.mk +else +CLANG_FLAGS += --target=$(CLANG_TARGET_FLAGS) +endif # CLANG_TARGET_FLAGS +else +CLANG_FLAGS += --target=$(notdir $(CROSS_COMPILE:%-=%)) +endif # CROSS_COMPILE + +CC := $(LLVM_PREFIX)clang$(LLVM_SUFFIX) $(CLANG_FLAGS) -fintegrated-as else CC := $(CROSS_COMPILE)gcc -endif +endif # LLVM ifeq (0,$(MAKELEVEL)) ifeq ($(OUTPUT),) @@ -30,45 +51,7 @@ TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS)) TEST_GEN_PROGS_EXTENDED := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS_EXTENDED)) TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES)) -ifdef KSFT_KHDR_INSTALL -top_srcdir ?= ../../../.. -include $(top_srcdir)/scripts/subarch.include -ARCH ?= $(SUBARCH) - -# set default goal to all, so make without a target runs all, even when -# all isn't the first target in the file. -.DEFAULT_GOAL := all - -# Invoke headers install with --no-builtin-rules to avoid circular -# dependency in "make kselftest" case. In this case, second level -# make inherits builtin-rules which will use the rule generate -# Makefile.o and runs into -# "Circular Makefile.o <- prepare dependency dropped." -# and headers_install fails and test compile fails. -# O= KBUILD_OUTPUT cases don't run into this error, since main Makefile -# invokes them as sub-makes and --no-builtin-rules is not necessary, -# but doesn't cause any failures. Keep it simple and use the same -# flags in both cases. -# Note that the support to install headers from lib.mk is necessary -# when test Makefile is run directly with "make -C". -# When local build is done, headers are installed in the default -# INSTALL_HDR_PATH usr/include. -.PHONY: khdr -.NOTPARALLEL: -khdr: -ifndef KSFT_KHDR_INSTALL_DONE -ifeq (1,$(DEFAULT_INSTALL_HDR_PATH)) - $(MAKE) --no-builtin-rules ARCH=$(ARCH) -C $(top_srcdir) headers_install -else - $(MAKE) --no-builtin-rules INSTALL_HDR_PATH=$$OUTPUT/usr \ - ARCH=$(ARCH) -C $(top_srcdir) headers_install -endif -endif - -all: khdr $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) -else all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) -endif define RUN_TESTS BASE_DIR="$(selfdir)"; \ diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index a29f79618934..892306bdb47d 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -36,4 +36,6 @@ test_unix_oob gro ioam6_parser toeplitz +tun cmsg_sender +unix_connect
\ No newline at end of file diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 7ea54af55490..e2dfef8b78a7 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -11,7 +11,7 @@ TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh TEST_PROGS += tcp_fastopen_backup_key.sh fcnal-test.sh l2tp.sh traceroute.sh -TEST_PROGS += fin_ack_lat.sh fib_nexthop_multiprefix.sh fib_nexthops.sh +TEST_PROGS += fin_ack_lat.sh fib_nexthop_multiprefix.sh fib_nexthops.sh fib_nexthop_nongw.sh TEST_PROGS += altnames.sh icmp.sh icmp_redirect.sh ip6_gre_headroom.sh TEST_PROGS += route_localnet.sh TEST_PROGS += reuseaddr_ports_exhausted.sh @@ -35,9 +35,12 @@ TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh TEST_PROGS += srv6_end_dt46_l3vpn_test.sh TEST_PROGS += srv6_end_dt4_l3vpn_test.sh TEST_PROGS += srv6_end_dt6_l3vpn_test.sh +TEST_PROGS += srv6_hencap_red_l3vpn_test.sh +TEST_PROGS += srv6_hl2encap_red_l2vpn_test.sh TEST_PROGS += vrf_strict_mode_test.sh TEST_PROGS += arp_ndisc_evict_nocarrier.sh TEST_PROGS += ndisc_unsolicited_na_test.sh +TEST_PROGS += arp_ndisc_untracked_subnets.sh TEST_PROGS += stress_reuseport_listen.sh TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh @@ -54,15 +57,15 @@ TEST_GEN_FILES += ipsec TEST_GEN_FILES += ioam6_parser TEST_GEN_FILES += gro TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa -TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls +TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls tun TEST_GEN_FILES += toeplitz TEST_GEN_FILES += cmsg_sender TEST_GEN_FILES += stress_reuseport_listen TEST_PROGS += test_vxlan_vnifiltering.sh +TEST_GEN_FILES += io_uring_zerocopy_tx TEST_FILES := settings -KSFT_KHDR_INSTALL := 1 include ../lib.mk include bpf/Makefile diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile index df341648f818..969620ae9928 100644 --- a/tools/testing/selftests/net/af_unix/Makefile +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -1,2 +1,3 @@ -TEST_GEN_PROGS := test_unix_oob +TEST_GEN_PROGS := test_unix_oob unix_connect + include ../../lib.mk diff --git a/tools/testing/selftests/net/af_unix/unix_connect.c b/tools/testing/selftests/net/af_unix/unix_connect.c new file mode 100644 index 000000000000..d799fd8f5c7c --- /dev/null +++ b/tools/testing/selftests/net/af_unix/unix_connect.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <sched.h> + +#include <stddef.h> +#include <stdio.h> +#include <unistd.h> + +#include <sys/socket.h> +#include <sys/un.h> + +#include "../../kselftest_harness.h" + +FIXTURE(unix_connect) +{ + int server, client; + int family; +}; + +FIXTURE_VARIANT(unix_connect) +{ + int type; + char sun_path[8]; + int len; + int flags; + int err; +}; + +FIXTURE_VARIANT_ADD(unix_connect, stream_pathname) +{ + .type = SOCK_STREAM, + .sun_path = "test", + .len = 4 + 1, + .flags = 0, + .err = 0, +}; + +FIXTURE_VARIANT_ADD(unix_connect, stream_abstract) +{ + .type = SOCK_STREAM, + .sun_path = "\0test", + .len = 5, + .flags = 0, + .err = 0, +}; + +FIXTURE_VARIANT_ADD(unix_connect, stream_pathname_netns) +{ + .type = SOCK_STREAM, + .sun_path = "test", + .len = 4 + 1, + .flags = CLONE_NEWNET, + .err = 0, +}; + +FIXTURE_VARIANT_ADD(unix_connect, stream_abstract_netns) +{ + .type = SOCK_STREAM, + .sun_path = "\0test", + .len = 5, + .flags = CLONE_NEWNET, + .err = ECONNREFUSED, +}; + +FIXTURE_VARIANT_ADD(unix_connect, dgram_pathname) +{ + .type = SOCK_DGRAM, + .sun_path = "test", + .len = 4 + 1, + .flags = 0, + .err = 0, +}; + +FIXTURE_VARIANT_ADD(unix_connect, dgram_abstract) +{ + .type = SOCK_DGRAM, + .sun_path = "\0test", + .len = 5, + .flags = 0, + .err = 0, +}; + +FIXTURE_VARIANT_ADD(unix_connect, dgram_pathname_netns) +{ + .type = SOCK_DGRAM, + .sun_path = "test", + .len = 4 + 1, + .flags = CLONE_NEWNET, + .err = 0, +}; + +FIXTURE_VARIANT_ADD(unix_connect, dgram_abstract_netns) +{ + .type = SOCK_DGRAM, + .sun_path = "\0test", + .len = 5, + .flags = CLONE_NEWNET, + .err = ECONNREFUSED, +}; + +FIXTURE_SETUP(unix_connect) +{ + self->family = AF_UNIX; +} + +FIXTURE_TEARDOWN(unix_connect) +{ + close(self->server); + close(self->client); + + if (variant->sun_path[0]) + remove("test"); +} + +TEST_F(unix_connect, test) +{ + socklen_t addrlen; + struct sockaddr_un addr = { + .sun_family = self->family, + }; + int err; + + self->server = socket(self->family, variant->type, 0); + ASSERT_NE(-1, self->server); + + addrlen = offsetof(struct sockaddr_un, sun_path) + variant->len; + memcpy(&addr.sun_path, variant->sun_path, variant->len); + + err = bind(self->server, (struct sockaddr *)&addr, addrlen); + ASSERT_EQ(0, err); + + if (variant->type == SOCK_STREAM) { + err = listen(self->server, 32); + ASSERT_EQ(0, err); + } + + err = unshare(variant->flags); + ASSERT_EQ(0, err); + + self->client = socket(self->family, variant->type, 0); + ASSERT_LT(0, self->client); + + err = connect(self->client, (struct sockaddr *)&addr, addrlen); + ASSERT_EQ(variant->err, err == -1 ? errno : 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh b/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh new file mode 100755 index 000000000000..c899b446acb6 --- /dev/null +++ b/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh @@ -0,0 +1,308 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# 2 namespaces: one host and one router. Use arping from the host to send a +# garp to the router. Router accepts or ignores based on its arp_accept +# or accept_untracked_na configuration. + +TESTS="arp ndisc" + +ROUTER_NS="ns-router" +ROUTER_NS_V6="ns-router-v6" +ROUTER_INTF="veth-router" +ROUTER_ADDR="10.0.10.1" +ROUTER_ADDR_V6="2001:db8:abcd:0012::1" + +HOST_NS="ns-host" +HOST_NS_V6="ns-host-v6" +HOST_INTF="veth-host" +HOST_ADDR="10.0.10.2" +HOST_ADDR_V6="2001:db8:abcd:0012::2" + +SUBNET_WIDTH=24 +PREFIX_WIDTH_V6=64 + +cleanup() { + ip netns del ${HOST_NS} + ip netns del ${ROUTER_NS} +} + +cleanup_v6() { + ip netns del ${HOST_NS_V6} + ip netns del ${ROUTER_NS_V6} +} + +setup() { + set -e + local arp_accept=$1 + + # Set up two namespaces + ip netns add ${ROUTER_NS} + ip netns add ${HOST_NS} + + # Set up interfaces veth0 and veth1, which are pairs in separate + # namespaces. veth0 is veth-router, veth1 is veth-host. + # first, set up the inteface's link to the namespace + # then, set the interface "up" + ip netns exec ${ROUTER_NS} ip link add name ${ROUTER_INTF} \ + type veth peer name ${HOST_INTF} + + ip netns exec ${ROUTER_NS} ip link set dev ${ROUTER_INTF} up + ip netns exec ${ROUTER_NS} ip link set dev ${HOST_INTF} netns ${HOST_NS} + + ip netns exec ${HOST_NS} ip link set dev ${HOST_INTF} up + ip netns exec ${ROUTER_NS} ip addr add ${ROUTER_ADDR}/${SUBNET_WIDTH} \ + dev ${ROUTER_INTF} + + ip netns exec ${HOST_NS} ip addr add ${HOST_ADDR}/${SUBNET_WIDTH} \ + dev ${HOST_INTF} + ip netns exec ${HOST_NS} ip route add default via ${HOST_ADDR} \ + dev ${HOST_INTF} + ip netns exec ${ROUTER_NS} ip route add default via ${ROUTER_ADDR} \ + dev ${ROUTER_INTF} + + ROUTER_CONF=net.ipv4.conf.${ROUTER_INTF} + ip netns exec ${ROUTER_NS} sysctl -w \ + ${ROUTER_CONF}.arp_accept=${arp_accept} >/dev/null 2>&1 + set +e +} + +setup_v6() { + set -e + local accept_untracked_na=$1 + + # Set up two namespaces + ip netns add ${ROUTER_NS_V6} + ip netns add ${HOST_NS_V6} + + # Set up interfaces veth0 and veth1, which are pairs in separate + # namespaces. veth0 is veth-router, veth1 is veth-host. + # first, set up the inteface's link to the namespace + # then, set the interface "up" + ip -6 -netns ${ROUTER_NS_V6} link add name ${ROUTER_INTF} \ + type veth peer name ${HOST_INTF} + + ip -6 -netns ${ROUTER_NS_V6} link set dev ${ROUTER_INTF} up + ip -6 -netns ${ROUTER_NS_V6} link set dev ${HOST_INTF} netns \ + ${HOST_NS_V6} + + ip -6 -netns ${HOST_NS_V6} link set dev ${HOST_INTF} up + ip -6 -netns ${ROUTER_NS_V6} addr add \ + ${ROUTER_ADDR_V6}/${PREFIX_WIDTH_V6} dev ${ROUTER_INTF} nodad + + HOST_CONF=net.ipv6.conf.${HOST_INTF} + ip netns exec ${HOST_NS_V6} sysctl -qw ${HOST_CONF}.ndisc_notify=1 + ip netns exec ${HOST_NS_V6} sysctl -qw ${HOST_CONF}.disable_ipv6=0 + ip -6 -netns ${HOST_NS_V6} addr add ${HOST_ADDR_V6}/${PREFIX_WIDTH_V6} \ + dev ${HOST_INTF} + + ROUTER_CONF=net.ipv6.conf.${ROUTER_INTF} + + ip netns exec ${ROUTER_NS_V6} sysctl -w \ + ${ROUTER_CONF}.forwarding=1 >/dev/null 2>&1 + ip netns exec ${ROUTER_NS_V6} sysctl -w \ + ${ROUTER_CONF}.drop_unsolicited_na=0 >/dev/null 2>&1 + ip netns exec ${ROUTER_NS_V6} sysctl -w \ + ${ROUTER_CONF}.accept_untracked_na=${accept_untracked_na} \ + >/dev/null 2>&1 + set +e +} + +verify_arp() { + local arp_accept=$1 + local same_subnet=$2 + + neigh_show_output=$(ip netns exec ${ROUTER_NS} ip neigh get \ + ${HOST_ADDR} dev ${ROUTER_INTF} 2>/dev/null) + + if [ ${arp_accept} -eq 1 ]; then + # Neighbor entries expected + [[ ${neigh_show_output} ]] + elif [ ${arp_accept} -eq 2 ]; then + if [ ${same_subnet} -eq 1 ]; then + # Neighbor entries expected + [[ ${neigh_show_output} ]] + else + [[ -z "${neigh_show_output}" ]] + fi + else + [[ -z "${neigh_show_output}" ]] + fi + } + +arp_test_gratuitous() { + set -e + local arp_accept=$1 + local same_subnet=$2 + + if [ ${arp_accept} -eq 2 ]; then + test_msg=("test_arp: " + "accept_arp=$1 " + "same_subnet=$2") + if [ ${same_subnet} -eq 0 ]; then + HOST_ADDR=10.0.11.3 + else + HOST_ADDR=10.0.10.3 + fi + else + test_msg=("test_arp: " + "accept_arp=$1") + fi + # Supply arp_accept option to set up which sets it in sysctl + setup ${arp_accept} + ip netns exec ${HOST_NS} arping -A -U ${HOST_ADDR} -c1 2>&1 >/dev/null + + if verify_arp $1 $2; then + printf " TEST: %-60s [ OK ]\n" "${test_msg[*]}" + else + printf " TEST: %-60s [FAIL]\n" "${test_msg[*]}" + fi + cleanup + set +e +} + +arp_test_gratuitous_combinations() { + arp_test_gratuitous 0 + arp_test_gratuitous 1 + arp_test_gratuitous 2 0 # Second entry indicates subnet or not + arp_test_gratuitous 2 1 +} + +cleanup_tcpdump() { + set -e + [[ ! -z ${tcpdump_stdout} ]] && rm -f ${tcpdump_stdout} + [[ ! -z ${tcpdump_stderr} ]] && rm -f ${tcpdump_stderr} + tcpdump_stdout= + tcpdump_stderr= + set +e +} + +start_tcpdump() { + set -e + tcpdump_stdout=`mktemp` + tcpdump_stderr=`mktemp` + ip netns exec ${ROUTER_NS_V6} timeout 15s \ + tcpdump --immediate-mode -tpni ${ROUTER_INTF} -c 1 \ + "icmp6 && icmp6[0] == 136 && src ${HOST_ADDR_V6}" \ + > ${tcpdump_stdout} 2> /dev/null + set +e +} + +verify_ndisc() { + local accept_untracked_na=$1 + local same_subnet=$2 + + neigh_show_output=$(ip -6 -netns ${ROUTER_NS_V6} neigh show \ + to ${HOST_ADDR_V6} dev ${ROUTER_INTF} nud stale) + + if [ ${accept_untracked_na} -eq 1 ]; then + # Neighbour entry expected to be present + [[ ${neigh_show_output} ]] + elif [ ${accept_untracked_na} -eq 2 ]; then + if [ ${same_subnet} -eq 1 ]; then + [[ ${neigh_show_output} ]] + else + [[ -z "${neigh_show_output}" ]] + fi + else + # Neighbour entry expected to be absent for all other cases + [[ -z "${neigh_show_output}" ]] + fi +} + +ndisc_test_untracked_advertisements() { + set -e + test_msg=("test_ndisc: " + "accept_untracked_na=$1") + + local accept_untracked_na=$1 + local same_subnet=$2 + if [ ${accept_untracked_na} -eq 2 ]; then + test_msg=("test_ndisc: " + "accept_untracked_na=$1 " + "same_subnet=$2") + if [ ${same_subnet} -eq 0 ]; then + # Not same subnet + HOST_ADDR_V6=2000:db8:abcd:0013::4 + else + HOST_ADDR_V6=2001:db8:abcd:0012::3 + fi + fi + setup_v6 $1 $2 + start_tcpdump + + if verify_ndisc $1 $2; then + printf " TEST: %-60s [ OK ]\n" "${test_msg[*]}" + else + printf " TEST: %-60s [FAIL]\n" "${test_msg[*]}" + fi + + cleanup_tcpdump + cleanup_v6 + set +e +} + +ndisc_test_untracked_combinations() { + ndisc_test_untracked_advertisements 0 + ndisc_test_untracked_advertisements 1 + ndisc_test_untracked_advertisements 2 0 + ndisc_test_untracked_advertisements 2 1 +} + +################################################################################ +# usage + +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS + + -t <test> Test(s) to run (default: all) + (options: $TESTS) +EOF +} + +################################################################################ +# main + +while getopts ":t:h" opt; do + case $opt in + t) TESTS=$OPTARG;; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip; +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v tcpdump)" ]; then + echo "SKIP: Could not run test without tcpdump tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v arping)" ]; then + echo "SKIP: Could not run test without arping tool" + exit $ksft_skip +fi + +# start clean +cleanup &> /dev/null +cleanup_v6 &> /dev/null + +for t in $TESTS +do + case $t in + arp_test_gratuitous_combinations|arp) arp_test_gratuitous_combinations;; + ndisc_test_untracked_combinations|ndisc) \ + ndisc_test_untracked_combinations;; + help) echo "Test names: $TESTS"; exit 0;; +esac +done diff --git a/tools/testing/selftests/net/bpf/Makefile b/tools/testing/selftests/net/bpf/Makefile index 8a69c91fcca0..8ccaf8732eb2 100644 --- a/tools/testing/selftests/net/bpf/Makefile +++ b/tools/testing/selftests/net/bpf/Makefile @@ -2,7 +2,7 @@ CLANG ?= clang CCINCLUDE += -I../../bpf -CCINCLUDE += -I../../../lib +CCINCLUDE += -I../../../../lib CCINCLUDE += -I../../../../../usr/include/ TEST_CUSTOM_PROGS = $(OUTPUT)/bpf/nat6to4.o diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c index bc2162909a1a..75dd83e39207 100644 --- a/tools/testing/selftests/net/cmsg_sender.c +++ b/tools/testing/selftests/net/cmsg_sender.c @@ -456,7 +456,7 @@ int main(int argc, char *argv[]) buf[1] = 0; } else if (opt.sock.type == SOCK_RAW) { struct udphdr hdr = { 1, 2, htons(opt.size), 0 }; - struct sockaddr_in6 *sin6 = (void *)ai->ai_addr;; + struct sockaddr_in6 *sin6 = (void *)ai->ai_addr; memcpy(buf, &hdr, sizeof(hdr)); sin6->sin6_port = htons(opt.sock.proto); diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 54701c8b0cd7..03b586760164 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -70,6 +70,10 @@ NSB_LO_IP6=2001:db8:2::2 NL_IP=172.17.1.1 NL_IP6=2001:db8:4::1 +# multicast and broadcast addresses +MCAST_IP=224.0.0.1 +BCAST_IP=255.255.255.255 + MD5_PW=abc123 MD5_WRONG_PW=abc1234 @@ -308,6 +312,9 @@ addr2str() 127.0.0.1) echo "loopback";; ::1) echo "IPv6 loopback";; + ${BCAST_IP}) echo "broadcast";; + ${MCAST_IP}) echo "multicast";; + ${NSA_IP}) echo "ns-A IP";; ${NSA_IP6}) echo "ns-A IPv6";; ${NSA_LO_IP}) echo "ns-A loopback IP";; @@ -1793,12 +1800,33 @@ ipv4_addr_bind_novrf() done # - # raw socket with nonlocal bind + # tests for nonlocal bind # a=${NL_IP} log_start - run_cmd nettest -s -R -P icmp -f -l ${a} -I ${NSA_DEV} -b - log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address after device bind" + run_cmd nettest -s -R -f -l ${a} -b + log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address" + + log_start + run_cmd nettest -s -f -l ${a} -b + log_test_addr ${a} $? 0 "TCP socket bind to nonlocal address" + + log_start + run_cmd nettest -s -D -P icmp -f -l ${a} -b + log_test_addr ${a} $? 0 "ICMP socket bind to nonlocal address" + + # + # check that ICMP sockets cannot bind to broadcast and multicast addresses + # + a=${BCAST_IP} + log_start + run_cmd nettest -s -D -P icmp -l ${a} -b + log_test_addr ${a} $? 1 "ICMP socket bind to broadcast address" + + a=${MCAST_IP} + log_start + run_cmd nettest -s -D -P icmp -l ${a} -b + log_test_addr ${a} $? 1 "ICMP socket bind to multicast address" # # tcp sockets @@ -1850,13 +1878,34 @@ ipv4_addr_bind_vrf() log_test_addr ${a} $? 1 "Raw socket bind to out of scope address after VRF bind" # - # raw socket with nonlocal bind + # tests for nonlocal bind # a=${NL_IP} log_start - run_cmd nettest -s -R -P icmp -f -l ${a} -I ${VRF} -b + run_cmd nettest -s -R -f -l ${a} -I ${VRF} -b log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address after VRF bind" + log_start + run_cmd nettest -s -f -l ${a} -I ${VRF} -b + log_test_addr ${a} $? 0 "TCP socket bind to nonlocal address after VRF bind" + + log_start + run_cmd nettest -s -D -P icmp -f -l ${a} -I ${VRF} -b + log_test_addr ${a} $? 0 "ICMP socket bind to nonlocal address after VRF bind" + + # + # check that ICMP sockets cannot bind to broadcast and multicast addresses + # + a=${BCAST_IP} + log_start + run_cmd nettest -s -D -P icmp -l ${a} -I ${VRF} -b + log_test_addr ${a} $? 1 "ICMP socket bind to broadcast address after VRF bind" + + a=${MCAST_IP} + log_start + run_cmd nettest -s -D -P icmp -l ${a} -I ${VRF} -b + log_test_addr ${a} $? 1 "ICMP socket bind to multicast address after VRF bind" + # # tcp sockets # @@ -1889,10 +1938,12 @@ ipv4_addr_bind() log_subsection "No VRF" setup + set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null ipv4_addr_bind_novrf log_subsection "With VRF" setup "yes" + set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null ipv4_addr_bind_vrf } diff --git a/tools/testing/selftests/net/fib_nexthop_nongw.sh b/tools/testing/selftests/net/fib_nexthop_nongw.sh new file mode 100755 index 000000000000..b7b928b38ce4 --- /dev/null +++ b/tools/testing/selftests/net/fib_nexthop_nongw.sh @@ -0,0 +1,119 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# ns: h1 | ns: h2 +# 192.168.0.1/24 | +# eth0 | +# | 192.168.1.1/32 +# veth0 <---|---> veth1 +# Validate source address selection for route without gateway + +PAUSE_ON_FAIL=no +VERBOSE=0 +ret=0 + +################################################################################ +# helpers + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + printf "TEST: %-60s [ OK ]\n" "${msg}" + nsuccess=$((nsuccess+1)) + else + ret=1 + nfail=$((nfail+1)) + printf "TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi + + [ "$VERBOSE" = "1" ] && echo +} + +run_cmd() +{ + local cmd="$*" + local out + local rc + + if [ "$VERBOSE" = "1" ]; then + echo "COMMAND: $cmd" + fi + + out=$(eval $cmd 2>&1) + rc=$? + if [ "$VERBOSE" = "1" -a -n "$out" ]; then + echo "$out" + fi + + [ "$VERBOSE" = "1" ] && echo + + return $rc +} + +################################################################################ +# config +setup() +{ + ip netns add h1 + ip -n h1 link set lo up + ip netns add h2 + ip -n h2 link set lo up + + # Add a fake eth0 to support an ip address + ip -n h1 link add name eth0 type dummy + ip -n h1 link set eth0 up + ip -n h1 address add 192.168.0.1/24 dev eth0 + + # Configure veths (same @mac, arp off) + ip -n h1 link add name veth0 type veth peer name veth1 netns h2 + ip -n h1 link set veth0 up + + ip -n h2 link set veth1 up + + # Configure @IP in the peer netns + ip -n h2 address add 192.168.1.1/32 dev veth1 + ip -n h2 route add default dev veth1 + + # Add a nexthop without @gw and use it in a route + ip -n h1 nexthop add id 1 dev veth0 + ip -n h1 route add 192.168.1.1 nhid 1 +} + +cleanup() +{ + ip netns del h1 2>/dev/null + ip netns del h2 2>/dev/null +} + +trap cleanup EXIT + +################################################################################ +# main + +while getopts :pv o +do + case $o in + p) PAUSE_ON_FAIL=yes;; + v) VERBOSE=1;; + esac +done + +cleanup +setup + +run_cmd ip -netns h1 route get 192.168.1.1 +log_test $? 0 "nexthop: get route with nexthop without gw" +run_cmd ip netns exec h1 ping -c1 192.168.1.1 +log_test $? 0 "nexthop: ping through nexthop without gw" + +exit $ret diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index bbe3b379927a..c245476fa29d 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -303,6 +303,29 @@ run_fibrule_tests() log_section "IPv6 fib rule" fib_rule6_test } +################################################################################ +# usage + +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS + + -t <test> Test(s) to run (default: all) + (options: $TESTS) +EOF +} + +################################################################################ +# main + +while getopts ":t:h" opt; do + case $opt in + t) TESTS=$OPTARG;; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done if [ "$(id -u)" -ne 0 ];then echo "SKIP: Need root privileges" diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index 8f481218a492..a9c5c1be5088 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -3,6 +3,7 @@ TEST_PROGS = bridge_igmp.sh \ bridge_locked_port.sh \ bridge_mdb.sh \ + bridge_mdb_port_down.sh \ bridge_mld.sh \ bridge_port_isolation.sh \ bridge_sticky_fdb.sh \ @@ -37,6 +38,7 @@ TEST_PROGS = bridge_igmp.sh \ ipip_hier_gre_key.sh \ ipip_hier_gre_keys.sh \ ipip_hier_gre.sh \ + local_termination.sh \ loopback.sh \ mirror_gre_bound.sh \ mirror_gre_bridge_1d.sh \ @@ -52,6 +54,7 @@ TEST_PROGS = bridge_igmp.sh \ mirror_gre_vlan_bridge_1q.sh \ mirror_gre_vlan.sh \ mirror_vlan.sh \ + no_forwarding.sh \ pedit_dsfield.sh \ pedit_ip.sh \ pedit_l4port.sh \ diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb_port_down.sh b/tools/testing/selftests/net/forwarding/bridge_mdb_port_down.sh new file mode 100755 index 000000000000..1a0480e71d83 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_mdb_port_down.sh @@ -0,0 +1,118 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Verify that permanent mdb entries can be added to and deleted from bridge +# interfaces that are down, and works correctly when done so. + +ALL_TESTS="add_del_to_port_down" +NUM_NETIFS=4 + +TEST_GROUP="239.10.10.10" +TEST_GROUP_MAC="01:00:5e:0a:0a:0a" + +source lib.sh + + +add_del_to_port_down() { + RET=0 + + ip link set dev $swp2 down + bridge mdb add dev br0 port "$swp2" grp $TEST_GROUP permanent 2>/dev/null + check_err $? "Failed adding mdb entry" + + ip link set dev $swp2 up + setup_wait_dev $swp2 + mcast_packet_test $TEST_GROUP_MAC 192.0.2.1 $TEST_GROUP $h1 $h2 + check_fail $? "Traffic to $TEST_GROUP wasn't forwarded" + + ip link set dev $swp2 down + bridge mdb show dev br0 | grep -q "$TEST_GROUP permanent" 2>/dev/null + check_err $? "MDB entry did not persist after link up/down" + + bridge mdb del dev br0 port "$swp2" grp $TEST_GROUP 2>/dev/null + check_err $? "Failed deleting mdb entry" + + ip link set dev $swp2 up + setup_wait_dev $swp2 + mcast_packet_test $TEST_GROUP_MAC 192.0.2.1 $TEST_GROUP $h1 $h2 + check_err $? "Traffic to $TEST_GROUP was forwarded after entry removed" + + log_test "MDB add/del entry to port with state down " +} + +h1_create() +{ + simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64 +} + +h2_destroy() +{ + simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64 +} + +switch_create() +{ + # Enable multicast filtering + ip link add dev br0 type bridge mcast_snooping 1 mcast_querier 1 + + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br0 + + ip link set dev br0 up + ip link set dev $swp1 up + + bridge link set dev $swp2 mcast_flood off + # Bridge currently has a "grace time" at creation time before it + # forwards multicast according to the mdb. Since we disable the + # mcast_flood setting per port + sleep 10 +} + +switch_destroy() +{ + ip link set dev $swp1 down + ip link set dev $swp2 down + ip link del dev br0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h1_destroy + h2_destroy + + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +tests_run +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh b/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh index 4b42dfd4efd1..072faa77f53b 100755 --- a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh +++ b/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh @@ -11,6 +11,8 @@ NUM_NETIFS=2 source lib.sh source ethtool_lib.sh +TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms + setup_prepare() { swp1=${NETIFS[p1]} @@ -18,7 +20,7 @@ setup_prepare() swp3=$NETIF_NO_CABLE } -ethtool_extended_state_check() +ethtool_ext_state() { local dev=$1; shift local expected_ext_state=$1; shift @@ -30,21 +32,27 @@ ethtool_extended_state_check() | sed -e 's/^[[:space:]]*//') ext_state=$(echo $ext_state | cut -d "," -f1) - [[ $ext_state == $expected_ext_state ]] - check_err $? "Expected \"$expected_ext_state\", got \"$ext_state\"" - - [[ $ext_substate == $expected_ext_substate ]] - check_err $? "Expected \"$expected_ext_substate\", got \"$ext_substate\"" + if [[ $ext_state != $expected_ext_state ]]; then + echo "Expected \"$expected_ext_state\", got \"$ext_state\"" + return 1 + fi + if [[ $ext_substate != $expected_ext_substate ]]; then + echo "Expected \"$expected_ext_substate\", got \"$ext_substate\"" + return 1 + fi } autoneg() { + local msg + RET=0 ip link set dev $swp1 up - sleep 4 - ethtool_extended_state_check $swp1 "Autoneg" "No partner detected" + msg=$(busywait $TIMEOUT ethtool_ext_state $swp1 \ + "Autoneg" "No partner detected") + check_err $? "$msg" log_test "Autoneg, No partner detected" @@ -53,6 +61,8 @@ autoneg() autoneg_force_mode() { + local msg + RET=0 ip link set dev $swp1 up @@ -65,12 +75,13 @@ autoneg_force_mode() ethtool_set $swp1 speed $speed1 autoneg off ethtool_set $swp2 speed $speed2 autoneg off - sleep 4 - ethtool_extended_state_check $swp1 "Autoneg" \ - "No partner detected during force mode" + msg=$(busywait $TIMEOUT ethtool_ext_state $swp1 \ + "Autoneg" "No partner detected during force mode") + check_err $? "$msg" - ethtool_extended_state_check $swp2 "Autoneg" \ - "No partner detected during force mode" + msg=$(busywait $TIMEOUT ethtool_ext_state $swp2 \ + "Autoneg" "No partner detected during force mode") + check_err $? "$msg" log_test "Autoneg, No partner detected during force mode" @@ -83,12 +94,14 @@ autoneg_force_mode() no_cable() { + local msg + RET=0 ip link set dev $swp3 up - sleep 1 - ethtool_extended_state_check $swp3 "No cable" + msg=$(busywait $TIMEOUT ethtool_ext_state $swp3 "No cable") + check_err $? "$msg" log_test "No cable" diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 37ae49d47853..3ffb9d6c0950 100755 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -1240,6 +1240,7 @@ learning_test() # FDB entry was installed. bridge link set dev $br_port1 flood off + ip link set $host1_if promisc on tc qdisc add dev $host1_if ingress tc filter add dev $host1_if ingress protocol ip pref 1 handle 101 \ flower dst_mac $mac action drop @@ -1250,7 +1251,7 @@ learning_test() tc -j -s filter show dev $host1_if ingress \ | jq -e ".[] | select(.options.handle == 101) \ | select(.options.actions[0].stats.packets == 1)" &> /dev/null - check_fail $? "Packet reached second host when should not" + check_fail $? "Packet reached first host when should not" $MZ $host1_if -c 1 -p 64 -a $mac -t ip -q sleep 1 @@ -1289,6 +1290,7 @@ learning_test() tc filter del dev $host1_if ingress protocol ip pref 1 handle 101 flower tc qdisc del dev $host1_if ingress + ip link set $host1_if promisc off bridge link set dev $br_port1 flood on @@ -1306,6 +1308,7 @@ flood_test_do() # Add an ACL on `host2_if` which will tell us whether the packet # was flooded to it or not. + ip link set $host2_if promisc on tc qdisc add dev $host2_if ingress tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \ flower dst_mac $mac action drop @@ -1323,6 +1326,7 @@ flood_test_do() tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower tc qdisc del dev $host2_if ingress + ip link set $host2_if promisc off return $err } diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh index 28d568c48a73..91e431cd919e 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh @@ -141,12 +141,13 @@ switch_create() ip link set dev $swp4 up ip link add name br1 type bridge vlan_filtering 1 - ip link set dev br1 up - __addr_add_del br1 add 192.0.2.129/32 - ip -4 route add 192.0.2.130/32 dev br1 team_create lag loadbalance $swp3 $swp4 ip link set dev lag master br1 + + ip link set dev br1 up + __addr_add_del br1 add 192.0.2.129/32 + ip -4 route add 192.0.2.130/32 dev br1 } switch_destroy() diff --git a/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh b/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh index 0727e2012b68..43469c7de118 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh @@ -525,7 +525,7 @@ arp_suppression() log_test "neigh_suppress: on / neigh exists: yes" - # Delete the neighbour from the the SVI. A single ARP request should be + # Delete the neighbour from the SVI. A single ARP request should be # received by the remote VTEP RET=0 diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.c b/tools/testing/selftests/net/io_uring_zerocopy_tx.c new file mode 100644 index 000000000000..9d64c560a2d6 --- /dev/null +++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.c @@ -0,0 +1,605 @@ +/* SPDX-License-Identifier: MIT */ +/* based on linux-kernel/tools/testing/selftests/net/msg_zerocopy.c */ +#include <assert.h> +#include <errno.h> +#include <error.h> +#include <fcntl.h> +#include <limits.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <arpa/inet.h> +#include <linux/errqueue.h> +#include <linux/if_packet.h> +#include <linux/io_uring.h> +#include <linux/ipv6.h> +#include <linux/socket.h> +#include <linux/sockios.h> +#include <net/ethernet.h> +#include <net/if.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/tcp.h> +#include <netinet/udp.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/resource.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/un.h> +#include <sys/wait.h> + +#define NOTIF_TAG 0xfffffffULL +#define NONZC_TAG 0 +#define ZC_TAG 1 + +enum { + MODE_NONZC = 0, + MODE_ZC = 1, + MODE_ZC_FIXED = 2, + MODE_MIXED = 3, +}; + +static bool cfg_flush = false; +static bool cfg_cork = false; +static int cfg_mode = MODE_ZC_FIXED; +static int cfg_nr_reqs = 8; +static int cfg_family = PF_UNSPEC; +static int cfg_payload_len; +static int cfg_port = 8000; +static int cfg_runtime_ms = 4200; + +static socklen_t cfg_alen; +static struct sockaddr_storage cfg_dst_addr; + +static char payload[IP_MAXPACKET] __attribute__((aligned(4096))); + +struct io_sq_ring { + unsigned *head; + unsigned *tail; + unsigned *ring_mask; + unsigned *ring_entries; + unsigned *flags; + unsigned *array; +}; + +struct io_cq_ring { + unsigned *head; + unsigned *tail; + unsigned *ring_mask; + unsigned *ring_entries; + struct io_uring_cqe *cqes; +}; + +struct io_uring_sq { + unsigned *khead; + unsigned *ktail; + unsigned *kring_mask; + unsigned *kring_entries; + unsigned *kflags; + unsigned *kdropped; + unsigned *array; + struct io_uring_sqe *sqes; + + unsigned sqe_head; + unsigned sqe_tail; + + size_t ring_sz; +}; + +struct io_uring_cq { + unsigned *khead; + unsigned *ktail; + unsigned *kring_mask; + unsigned *kring_entries; + unsigned *koverflow; + struct io_uring_cqe *cqes; + + size_t ring_sz; +}; + +struct io_uring { + struct io_uring_sq sq; + struct io_uring_cq cq; + int ring_fd; +}; + +#ifdef __alpha__ +# ifndef __NR_io_uring_setup +# define __NR_io_uring_setup 535 +# endif +# ifndef __NR_io_uring_enter +# define __NR_io_uring_enter 536 +# endif +# ifndef __NR_io_uring_register +# define __NR_io_uring_register 537 +# endif +#else /* !__alpha__ */ +# ifndef __NR_io_uring_setup +# define __NR_io_uring_setup 425 +# endif +# ifndef __NR_io_uring_enter +# define __NR_io_uring_enter 426 +# endif +# ifndef __NR_io_uring_register +# define __NR_io_uring_register 427 +# endif +#endif + +#if defined(__x86_64) || defined(__i386__) +#define read_barrier() __asm__ __volatile__("":::"memory") +#define write_barrier() __asm__ __volatile__("":::"memory") +#else + +#define read_barrier() __sync_synchronize() +#define write_barrier() __sync_synchronize() +#endif + +static int io_uring_setup(unsigned int entries, struct io_uring_params *p) +{ + return syscall(__NR_io_uring_setup, entries, p); +} + +static int io_uring_enter(int fd, unsigned int to_submit, + unsigned int min_complete, + unsigned int flags, sigset_t *sig) +{ + return syscall(__NR_io_uring_enter, fd, to_submit, min_complete, + flags, sig, _NSIG / 8); +} + +static int io_uring_register_buffers(struct io_uring *ring, + const struct iovec *iovecs, + unsigned nr_iovecs) +{ + int ret; + + ret = syscall(__NR_io_uring_register, ring->ring_fd, + IORING_REGISTER_BUFFERS, iovecs, nr_iovecs); + return (ret < 0) ? -errno : ret; +} + +static int io_uring_register_notifications(struct io_uring *ring, + unsigned nr, + struct io_uring_notification_slot *slots) +{ + int ret; + struct io_uring_notification_register r = { + .nr_slots = nr, + .data = (unsigned long)slots, + }; + + ret = syscall(__NR_io_uring_register, ring->ring_fd, + IORING_REGISTER_NOTIFIERS, &r, sizeof(r)); + return (ret < 0) ? -errno : ret; +} + +static int io_uring_mmap(int fd, struct io_uring_params *p, + struct io_uring_sq *sq, struct io_uring_cq *cq) +{ + size_t size; + void *ptr; + int ret; + + sq->ring_sz = p->sq_off.array + p->sq_entries * sizeof(unsigned); + ptr = mmap(0, sq->ring_sz, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQ_RING); + if (ptr == MAP_FAILED) + return -errno; + sq->khead = ptr + p->sq_off.head; + sq->ktail = ptr + p->sq_off.tail; + sq->kring_mask = ptr + p->sq_off.ring_mask; + sq->kring_entries = ptr + p->sq_off.ring_entries; + sq->kflags = ptr + p->sq_off.flags; + sq->kdropped = ptr + p->sq_off.dropped; + sq->array = ptr + p->sq_off.array; + + size = p->sq_entries * sizeof(struct io_uring_sqe); + sq->sqes = mmap(0, size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQES); + if (sq->sqes == MAP_FAILED) { + ret = -errno; +err: + munmap(sq->khead, sq->ring_sz); + return ret; + } + + cq->ring_sz = p->cq_off.cqes + p->cq_entries * sizeof(struct io_uring_cqe); + ptr = mmap(0, cq->ring_sz, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_CQ_RING); + if (ptr == MAP_FAILED) { + ret = -errno; + munmap(sq->sqes, p->sq_entries * sizeof(struct io_uring_sqe)); + goto err; + } + cq->khead = ptr + p->cq_off.head; + cq->ktail = ptr + p->cq_off.tail; + cq->kring_mask = ptr + p->cq_off.ring_mask; + cq->kring_entries = ptr + p->cq_off.ring_entries; + cq->koverflow = ptr + p->cq_off.overflow; + cq->cqes = ptr + p->cq_off.cqes; + return 0; +} + +static int io_uring_queue_init(unsigned entries, struct io_uring *ring, + unsigned flags) +{ + struct io_uring_params p; + int fd, ret; + + memset(ring, 0, sizeof(*ring)); + memset(&p, 0, sizeof(p)); + p.flags = flags; + + fd = io_uring_setup(entries, &p); + if (fd < 0) + return fd; + ret = io_uring_mmap(fd, &p, &ring->sq, &ring->cq); + if (!ret) + ring->ring_fd = fd; + else + close(fd); + return ret; +} + +static int io_uring_submit(struct io_uring *ring) +{ + struct io_uring_sq *sq = &ring->sq; + const unsigned mask = *sq->kring_mask; + unsigned ktail, submitted, to_submit; + int ret; + + read_barrier(); + if (*sq->khead != *sq->ktail) { + submitted = *sq->kring_entries; + goto submit; + } + if (sq->sqe_head == sq->sqe_tail) + return 0; + + ktail = *sq->ktail; + to_submit = sq->sqe_tail - sq->sqe_head; + for (submitted = 0; submitted < to_submit; submitted++) { + read_barrier(); + sq->array[ktail++ & mask] = sq->sqe_head++ & mask; + } + if (!submitted) + return 0; + + if (*sq->ktail != ktail) { + write_barrier(); + *sq->ktail = ktail; + write_barrier(); + } +submit: + ret = io_uring_enter(ring->ring_fd, submitted, 0, + IORING_ENTER_GETEVENTS, NULL); + return ret < 0 ? -errno : ret; +} + +static inline void io_uring_prep_send(struct io_uring_sqe *sqe, int sockfd, + const void *buf, size_t len, int flags) +{ + memset(sqe, 0, sizeof(*sqe)); + sqe->opcode = (__u8) IORING_OP_SEND; + sqe->fd = sockfd; + sqe->addr = (unsigned long) buf; + sqe->len = len; + sqe->msg_flags = (__u32) flags; +} + +static inline void io_uring_prep_sendzc(struct io_uring_sqe *sqe, int sockfd, + const void *buf, size_t len, int flags, + unsigned slot_idx, unsigned zc_flags) +{ + io_uring_prep_send(sqe, sockfd, buf, len, flags); + sqe->opcode = (__u8) IORING_OP_SENDZC_NOTIF; + sqe->notification_idx = slot_idx; + sqe->ioprio = zc_flags; +} + +static struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring) +{ + struct io_uring_sq *sq = &ring->sq; + + if (sq->sqe_tail + 1 - sq->sqe_head > *sq->kring_entries) + return NULL; + return &sq->sqes[sq->sqe_tail++ & *sq->kring_mask]; +} + +static int io_uring_wait_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr) +{ + struct io_uring_cq *cq = &ring->cq; + const unsigned mask = *cq->kring_mask; + unsigned head = *cq->khead; + int ret; + + *cqe_ptr = NULL; + do { + read_barrier(); + if (head != *cq->ktail) { + *cqe_ptr = &cq->cqes[head & mask]; + break; + } + ret = io_uring_enter(ring->ring_fd, 0, 1, + IORING_ENTER_GETEVENTS, NULL); + if (ret < 0) + return -errno; + } while (1); + + return 0; +} + +static inline void io_uring_cqe_seen(struct io_uring *ring) +{ + *(&ring->cq)->khead += 1; + write_barrier(); +} + +static unsigned long gettimeofday_ms(void) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000) + (tv.tv_usec / 1000); +} + +static void do_setsockopt(int fd, int level, int optname, int val) +{ + if (setsockopt(fd, level, optname, &val, sizeof(val))) + error(1, errno, "setsockopt %d.%d: %d", level, optname, val); +} + +static int do_setup_tx(int domain, int type, int protocol) +{ + int fd; + + fd = socket(domain, type, protocol); + if (fd == -1) + error(1, errno, "socket t"); + + do_setsockopt(fd, SOL_SOCKET, SO_SNDBUF, 1 << 21); + + if (connect(fd, (void *) &cfg_dst_addr, cfg_alen)) + error(1, errno, "connect"); + return fd; +} + +static void do_tx(int domain, int type, int protocol) +{ + struct io_uring_notification_slot b[1] = {{.tag = NOTIF_TAG}}; + struct io_uring_sqe *sqe; + struct io_uring_cqe *cqe; + unsigned long packets = 0, bytes = 0; + struct io_uring ring; + struct iovec iov; + uint64_t tstop; + int i, fd, ret; + int compl_cqes = 0; + + fd = do_setup_tx(domain, type, protocol); + + ret = io_uring_queue_init(512, &ring, 0); + if (ret) + error(1, ret, "io_uring: queue init"); + + ret = io_uring_register_notifications(&ring, 1, b); + if (ret) + error(1, ret, "io_uring: tx ctx registration"); + + iov.iov_base = payload; + iov.iov_len = cfg_payload_len; + + ret = io_uring_register_buffers(&ring, &iov, 1); + if (ret) + error(1, ret, "io_uring: buffer registration"); + + tstop = gettimeofday_ms() + cfg_runtime_ms; + do { + if (cfg_cork) + do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 1); + + for (i = 0; i < cfg_nr_reqs; i++) { + unsigned zc_flags = 0; + unsigned buf_idx = 0; + unsigned slot_idx = 0; + unsigned mode = cfg_mode; + unsigned msg_flags = 0; + + if (cfg_mode == MODE_MIXED) + mode = rand() % 3; + + sqe = io_uring_get_sqe(&ring); + + if (mode == MODE_NONZC) { + io_uring_prep_send(sqe, fd, payload, + cfg_payload_len, msg_flags); + sqe->user_data = NONZC_TAG; + } else { + if (cfg_flush) { + zc_flags |= IORING_RECVSEND_NOTIF_FLUSH; + compl_cqes++; + } + io_uring_prep_sendzc(sqe, fd, payload, + cfg_payload_len, + msg_flags, slot_idx, zc_flags); + if (mode == MODE_ZC_FIXED) { + sqe->ioprio |= IORING_RECVSEND_FIXED_BUF; + sqe->buf_index = buf_idx; + } + sqe->user_data = ZC_TAG; + } + } + + ret = io_uring_submit(&ring); + if (ret != cfg_nr_reqs) + error(1, ret, "submit"); + + for (i = 0; i < cfg_nr_reqs; i++) { + ret = io_uring_wait_cqe(&ring, &cqe); + if (ret) + error(1, ret, "wait cqe"); + + if (cqe->user_data == NOTIF_TAG) { + compl_cqes--; + i--; + } else if (cqe->user_data != NONZC_TAG && + cqe->user_data != ZC_TAG) { + error(1, cqe->res, "invalid user_data"); + } else if (cqe->res <= 0 && cqe->res != -EAGAIN) { + error(1, cqe->res, "send failed"); + } else { + if (cqe->res > 0) { + packets++; + bytes += cqe->res; + } + /* failed requests don't flush */ + if (cfg_flush && + cqe->res <= 0 && + cqe->user_data == ZC_TAG) + compl_cqes--; + } + io_uring_cqe_seen(&ring); + } + if (cfg_cork) + do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0); + } while (gettimeofday_ms() < tstop); + + if (close(fd)) + error(1, errno, "close"); + + fprintf(stderr, "tx=%lu (MB=%lu), tx/s=%lu (MB/s=%lu)\n", + packets, bytes >> 20, + packets / (cfg_runtime_ms / 1000), + (bytes >> 20) / (cfg_runtime_ms / 1000)); + + while (compl_cqes) { + ret = io_uring_wait_cqe(&ring, &cqe); + if (ret) + error(1, ret, "wait cqe"); + io_uring_cqe_seen(&ring); + compl_cqes--; + } +} + +static void do_test(int domain, int type, int protocol) +{ + int i; + + for (i = 0; i < IP_MAXPACKET; i++) + payload[i] = 'a' + (i % 26); + do_tx(domain, type, protocol); +} + +static void usage(const char *filepath) +{ + error(1, 0, "Usage: %s [-f] [-n<N>] [-z0] [-s<payload size>] " + "(-4|-6) [-t<time s>] -D<dst_ip> udp", filepath); +} + +static void parse_opts(int argc, char **argv) +{ + const int max_payload_len = sizeof(payload) - + sizeof(struct ipv6hdr) - + sizeof(struct tcphdr) - + 40 /* max tcp options */; + struct sockaddr_in6 *addr6 = (void *) &cfg_dst_addr; + struct sockaddr_in *addr4 = (void *) &cfg_dst_addr; + char *daddr = NULL; + int c; + + if (argc <= 1) + usage(argv[0]); + cfg_payload_len = max_payload_len; + + while ((c = getopt(argc, argv, "46D:p:s:t:n:fc:m:")) != -1) { + switch (c) { + case '4': + if (cfg_family != PF_UNSPEC) + error(1, 0, "Pass one of -4 or -6"); + cfg_family = PF_INET; + cfg_alen = sizeof(struct sockaddr_in); + break; + case '6': + if (cfg_family != PF_UNSPEC) + error(1, 0, "Pass one of -4 or -6"); + cfg_family = PF_INET6; + cfg_alen = sizeof(struct sockaddr_in6); + break; + case 'D': + daddr = optarg; + break; + case 'p': + cfg_port = strtoul(optarg, NULL, 0); + break; + case 's': + cfg_payload_len = strtoul(optarg, NULL, 0); + break; + case 't': + cfg_runtime_ms = 200 + strtoul(optarg, NULL, 10) * 1000; + break; + case 'n': + cfg_nr_reqs = strtoul(optarg, NULL, 0); + break; + case 'f': + cfg_flush = 1; + break; + case 'c': + cfg_cork = strtol(optarg, NULL, 0); + break; + case 'm': + cfg_mode = strtol(optarg, NULL, 0); + break; + } + } + + switch (cfg_family) { + case PF_INET: + memset(addr4, 0, sizeof(*addr4)); + addr4->sin_family = AF_INET; + addr4->sin_port = htons(cfg_port); + if (daddr && + inet_pton(AF_INET, daddr, &(addr4->sin_addr)) != 1) + error(1, 0, "ipv4 parse error: %s", daddr); + break; + case PF_INET6: + memset(addr6, 0, sizeof(*addr6)); + addr6->sin6_family = AF_INET6; + addr6->sin6_port = htons(cfg_port); + if (daddr && + inet_pton(AF_INET6, daddr, &(addr6->sin6_addr)) != 1) + error(1, 0, "ipv6 parse error: %s", daddr); + break; + default: + error(1, 0, "illegal domain"); + } + + if (cfg_payload_len > max_payload_len) + error(1, 0, "-s: payload exceeds max (%d)", max_payload_len); + if (cfg_mode == MODE_NONZC && cfg_flush) + error(1, 0, "-f: only zerocopy modes support notifications"); + if (optind != argc - 1) + usage(argv[0]); +} + +int main(int argc, char **argv) +{ + const char *cfg_test = argv[argc - 1]; + + parse_opts(argc, argv); + + if (!strcmp(cfg_test, "tcp")) + do_test(cfg_family, SOCK_STREAM, 0); + else if (!strcmp(cfg_test, "udp")) + do_test(cfg_family, SOCK_DGRAM, 0); + else + error(1, 0, "unknown cfg_test %s", cfg_test); + return 0; +} diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.sh b/tools/testing/selftests/net/io_uring_zerocopy_tx.sh new file mode 100755 index 000000000000..6a65e4437640 --- /dev/null +++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.sh @@ -0,0 +1,131 @@ +#!/bin/bash +# +# Send data between two processes across namespaces +# Run twice: once without and once with zerocopy + +set -e + +readonly DEV="veth0" +readonly DEV_MTU=65535 +readonly BIN_TX="./io_uring_zerocopy_tx" +readonly BIN_RX="./msg_zerocopy" + +readonly RAND="$(mktemp -u XXXXXX)" +readonly NSPREFIX="ns-${RAND}" +readonly NS1="${NSPREFIX}1" +readonly NS2="${NSPREFIX}2" + +readonly SADDR4='192.168.1.1' +readonly DADDR4='192.168.1.2' +readonly SADDR6='fd::1' +readonly DADDR6='fd::2' + +readonly path_sysctl_mem="net.core.optmem_max" + +# No arguments: automated test +if [[ "$#" -eq "0" ]]; then + IPs=( "4" "6" ) + protocols=( "tcp" "udp" ) + + for IP in "${IPs[@]}"; do + for proto in "${protocols[@]}"; do + for mode in $(seq 1 3); do + $0 "$IP" "$proto" -m "$mode" -t 1 -n 32 + $0 "$IP" "$proto" -m "$mode" -t 1 -n 32 -f + $0 "$IP" "$proto" -m "$mode" -t 1 -n 32 -c -f + done + done + done + + echo "OK. All tests passed" + exit 0 +fi + +# Argument parsing +if [[ "$#" -lt "2" ]]; then + echo "Usage: $0 [4|6] [tcp|udp|raw|raw_hdrincl|packet|packet_dgram] <args>" + exit 1 +fi + +readonly IP="$1" +shift +readonly TXMODE="$1" +shift +readonly EXTRA_ARGS="$@" + +# Argument parsing: configure addresses +if [[ "${IP}" == "4" ]]; then + readonly SADDR="${SADDR4}" + readonly DADDR="${DADDR4}" +elif [[ "${IP}" == "6" ]]; then + readonly SADDR="${SADDR6}" + readonly DADDR="${DADDR6}" +else + echo "Invalid IP version ${IP}" + exit 1 +fi + +# Argument parsing: select receive mode +# +# This differs from send mode for +# - packet: use raw recv, because packet receives skb clones +# - raw_hdrinc: use raw recv, because hdrincl is a tx-only option +case "${TXMODE}" in +'packet' | 'packet_dgram' | 'raw_hdrincl') + RXMODE='raw' + ;; +*) + RXMODE="${TXMODE}" + ;; +esac + +# Start of state changes: install cleanup handler +save_sysctl_mem="$(sysctl -n ${path_sysctl_mem})" + +cleanup() { + ip netns del "${NS2}" + ip netns del "${NS1}" + sysctl -w -q "${path_sysctl_mem}=${save_sysctl_mem}" +} + +trap cleanup EXIT + +# Configure system settings +sysctl -w -q "${path_sysctl_mem}=1000000" + +# Create virtual ethernet pair between network namespaces +ip netns add "${NS1}" +ip netns add "${NS2}" + +ip link add "${DEV}" mtu "${DEV_MTU}" netns "${NS1}" type veth \ + peer name "${DEV}" mtu "${DEV_MTU}" netns "${NS2}" + +# Bring the devices up +ip -netns "${NS1}" link set "${DEV}" up +ip -netns "${NS2}" link set "${DEV}" up + +# Set fixed MAC addresses on the devices +ip -netns "${NS1}" link set dev "${DEV}" address 02:02:02:02:02:02 +ip -netns "${NS2}" link set dev "${DEV}" address 06:06:06:06:06:06 + +# Add fixed IP addresses to the devices +ip -netns "${NS1}" addr add 192.168.1.1/24 dev "${DEV}" +ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}" +ip -netns "${NS1}" addr add fd::1/64 dev "${DEV}" nodad +ip -netns "${NS2}" addr add fd::2/64 dev "${DEV}" nodad + +# Optionally disable sg or csum offload to test edge cases +# ip netns exec "${NS1}" ethtool -K "${DEV}" sg off + +do_test() { + local readonly ARGS="$1" + + echo "ipv${IP} ${TXMODE} ${ARGS}" + ip netns exec "${NS2}" "${BIN_RX}" "-${IP}" -t 2 -C 2 -S "${SADDR}" -D "${DADDR}" -r "${RXMODE}" & + sleep 0.2 + ip netns exec "${NS1}" "${BIN_TX}" "-${IP}" -t 1 -D "${DADDR}" ${ARGS} "${TXMODE}" + wait +} + +do_test "${EXTRA_ARGS}" +echo ok diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh index a2b9fad5a9a6..4ceb401da1bf 100755 --- a/tools/testing/selftests/net/ioam6.sh +++ b/tools/testing/selftests/net/ioam6.sh @@ -117,6 +117,8 @@ # | Schema Data | | # +-----------------------------------------------------------+ +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 ################################################################################ # # @@ -211,7 +213,7 @@ check_kernel_compatibility() echo "SKIP: kernel version probably too old, missing ioam support" ip link del veth0 2>/dev/null || true ip netns del ioam-tmp-node || true - exit 1 + exit $ksft_skip fi ip -netns ioam-tmp-node route add db02::/64 encap ioam6 mode inline \ @@ -227,7 +229,7 @@ check_kernel_compatibility() "without CONFIG_IPV6_IOAM6_LWTUNNEL?" ip link del veth0 2>/dev/null || true ip netns del ioam-tmp-node || true - exit 1 + exit $ksft_skip fi ip link del veth0 2>/dev/null || true @@ -752,20 +754,20 @@ nfailed=0 if [ "$(id -u)" -ne 0 ] then echo "SKIP: Need root privileges" - exit 1 + exit $ksft_skip fi if [ ! -x "$(command -v ip)" ] then echo "SKIP: Could not run test without ip tool" - exit 1 + exit $ksft_skip fi ip ioam &>/dev/null if [ $? = 1 ] then echo "SKIP: iproute2 too old, missing ioam command" - exit 1 + exit $ksft_skip fi check_kernel_compatibility diff --git a/tools/testing/selftests/net/ipv6_flowlabel.c b/tools/testing/selftests/net/ipv6_flowlabel.c index a7c41375374f..708a9822259d 100644 --- a/tools/testing/selftests/net/ipv6_flowlabel.c +++ b/tools/testing/selftests/net/ipv6_flowlabel.c @@ -9,6 +9,7 @@ #include <errno.h> #include <fcntl.h> #include <limits.h> +#include <linux/icmpv6.h> #include <linux/in6.h> #include <stdbool.h> #include <stdio.h> @@ -29,26 +30,48 @@ #ifndef IPV6_FLOWLABEL_MGR #define IPV6_FLOWLABEL_MGR 32 #endif +#ifndef IPV6_FLOWINFO_SEND +#define IPV6_FLOWINFO_SEND 33 +#endif #define FLOWLABEL_WILDCARD ((uint32_t) -1) static const char cfg_data[] = "a"; static uint32_t cfg_label = 1; +static bool use_ping; +static bool use_flowinfo_send; + +static struct icmp6hdr icmp6 = { + .icmp6_type = ICMPV6_ECHO_REQUEST +}; + +static struct sockaddr_in6 addr = { + .sin6_family = AF_INET6, + .sin6_addr = IN6ADDR_LOOPBACK_INIT, +}; static void do_send(int fd, bool with_flowlabel, uint32_t flowlabel) { char control[CMSG_SPACE(sizeof(flowlabel))] = {0}; struct msghdr msg = {0}; - struct iovec iov = {0}; + struct iovec iov = { + .iov_base = (char *)cfg_data, + .iov_len = sizeof(cfg_data) + }; int ret; - iov.iov_base = (char *)cfg_data; - iov.iov_len = sizeof(cfg_data); + if (use_ping) { + iov.iov_base = &icmp6; + iov.iov_len = sizeof(icmp6); + } msg.msg_iov = &iov; msg.msg_iovlen = 1; - if (with_flowlabel) { + if (use_flowinfo_send) { + msg.msg_name = &addr; + msg.msg_namelen = sizeof(addr); + } else if (with_flowlabel) { struct cmsghdr *cm; cm = (void *)control; @@ -94,6 +117,8 @@ static void do_recv(int fd, bool with_flowlabel, uint32_t expect) ret = recvmsg(fd, &msg, 0); if (ret == -1) error(1, errno, "recv"); + if (use_ping) + goto parse_cmsg; if (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) error(1, 0, "recv: truncated"); if (ret != sizeof(cfg_data)) @@ -101,6 +126,7 @@ static void do_recv(int fd, bool with_flowlabel, uint32_t expect) if (memcmp(data, cfg_data, sizeof(data))) error(1, 0, "recv: data mismatch"); +parse_cmsg: cm = CMSG_FIRSTHDR(&msg); if (with_flowlabel) { if (!cm) @@ -114,9 +140,11 @@ static void do_recv(int fd, bool with_flowlabel, uint32_t expect) flowlabel = ntohl(*(uint32_t *)CMSG_DATA(cm)); fprintf(stderr, "recv with label %u\n", flowlabel); - if (expect != FLOWLABEL_WILDCARD && expect != flowlabel) + if (expect != FLOWLABEL_WILDCARD && expect != flowlabel) { fprintf(stderr, "recv: incorrect flowlabel %u != %u\n", flowlabel, expect); + error(1, 0, "recv: flowlabel is wrong"); + } } else { fprintf(stderr, "recv without label\n"); @@ -165,11 +193,17 @@ static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "l:")) != -1) { + while ((c = getopt(argc, argv, "l:ps")) != -1) { switch (c) { case 'l': cfg_label = strtoul(optarg, NULL, 0); break; + case 'p': + use_ping = true; + break; + case 's': + use_flowinfo_send = true; + break; default: error(1, 0, "%s: parse error", argv[0]); } @@ -178,27 +212,30 @@ static void parse_opts(int argc, char **argv) int main(int argc, char **argv) { - struct sockaddr_in6 addr = { - .sin6_family = AF_INET6, - .sin6_port = htons(8000), - .sin6_addr = IN6ADDR_LOOPBACK_INIT, - }; const int one = 1; int fdt, fdr; + int prot = 0; + + addr.sin6_port = htons(8000); parse_opts(argc, argv); - fdt = socket(PF_INET6, SOCK_DGRAM, 0); + if (use_ping) { + fprintf(stderr, "attempting to use ping sockets\n"); + prot = IPPROTO_ICMPV6; + } + + fdt = socket(PF_INET6, SOCK_DGRAM, prot); if (fdt == -1) error(1, errno, "socket t"); - fdr = socket(PF_INET6, SOCK_DGRAM, 0); + fdr = use_ping ? fdt : socket(PF_INET6, SOCK_DGRAM, 0); if (fdr == -1) error(1, errno, "socket r"); if (connect(fdt, (void *)&addr, sizeof(addr))) error(1, errno, "connect"); - if (bind(fdr, (void *)&addr, sizeof(addr))) + if (!use_ping && bind(fdr, (void *)&addr, sizeof(addr))) error(1, errno, "bind"); flowlabel_get(fdt, cfg_label, IPV6_FL_S_EXCL, IPV6_FL_F_CREATE); @@ -216,13 +253,21 @@ int main(int argc, char **argv) do_recv(fdr, false, 0); } + if (use_flowinfo_send) { + fprintf(stderr, "using IPV6_FLOWINFO_SEND to send label\n"); + addr.sin6_flowinfo = htonl(cfg_label); + if (setsockopt(fdt, SOL_IPV6, IPV6_FLOWINFO_SEND, &one, + sizeof(one)) == -1) + error(1, errno, "setsockopt flowinfo_send"); + } + fprintf(stderr, "send label\n"); do_send(fdt, true, cfg_label); do_recv(fdr, true, cfg_label); if (close(fdr)) error(1, errno, "close r"); - if (close(fdt)) + if (!use_ping && close(fdt)) error(1, errno, "close t"); return 0; diff --git a/tools/testing/selftests/net/ipv6_flowlabel.sh b/tools/testing/selftests/net/ipv6_flowlabel.sh index d3bc6442704e..cee95e252bee 100755 --- a/tools/testing/selftests/net/ipv6_flowlabel.sh +++ b/tools/testing/selftests/net/ipv6_flowlabel.sh @@ -18,4 +18,20 @@ echo "TEST datapath (with auto-flowlabels)" ./in_netns.sh \ sh -c 'sysctl -q -w net.ipv6.auto_flowlabels=1 && ./ipv6_flowlabel -l 1' +echo "TEST datapath (with ping-sockets)" +./in_netns.sh \ + sh -c 'sysctl -q -w net.ipv6.flowlabel_reflect=4 && \ + sysctl -q -w net.ipv4.ping_group_range="0 2147483647" && \ + ./ipv6_flowlabel -l 1 -p' + +echo "TEST datapath (with flowinfo-send)" +./in_netns.sh \ + sh -c './ipv6_flowlabel -l 1 -s' + +echo "TEST datapath (with ping-sockets flowinfo-send)" +./in_netns.sh \ + sh -c 'sysctl -q -w net.ipv6.flowlabel_reflect=4 && \ + sysctl -q -w net.ipv4.ping_group_range="0 2147483647" && \ + ./ipv6_flowlabel -l 1 -p -s' + echo OK. All tests passed diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index f905d5358e68..43a723626126 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -1,12 +1,11 @@ # SPDX-License-Identifier: GPL-2.0 top_srcdir = ../../../../.. -KSFT_KHDR_INSTALL := 1 CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \ - simult_flows.sh mptcp_sockopt.sh + simult_flows.sh mptcp_sockopt.sh userspace_pm.sh TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 9dd43d7d957b..515859a5168b 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -61,6 +61,39 @@ chk_msk_nr() __chk_nr "grep -c token:" $* } +wait_msk_nr() +{ + local condition="grep -c token:" + local expected=$1 + local timeout=20 + local msg nr + local max=0 + local i=0 + + shift 1 + msg=$* + + while [ $i -lt $timeout ]; do + nr=$(ss -inmHMN $ns | $condition) + [ $nr == $expected ] && break; + [ $nr -gt $max ] && max=$nr + i=$((i + 1)) + sleep 1 + done + + printf "%-50s" "$msg" + if [ $i -ge $timeout ]; then + echo "[ fail ] timeout while expecting $expected max $max last $nr" + ret=$test_cnt + elif [ $nr != $expected ]; then + echo "[ fail ] expected $expected found $nr" + ret=$test_cnt + else + echo "[ ok ]" + fi + test_cnt=$((test_cnt+1)) +} + chk_msk_fallback_nr() { __chk_nr "grep -c fallback" $* @@ -146,7 +179,7 @@ ip -n $ns link set dev lo up echo "a" | \ timeout ${timeout_test} \ ip netns exec $ns \ - ./mptcp_connect -p 10000 -l -t ${timeout_poll} \ + ./mptcp_connect -p 10000 -l -t ${timeout_poll} -w 20 \ 0.0.0.0 >/dev/null & wait_local_port_listen $ns 10000 chk_msk_nr 0 "no msk on netns creation" @@ -155,7 +188,7 @@ chk_msk_listen 10000 echo "b" | \ timeout ${timeout_test} \ ip netns exec $ns \ - ./mptcp_connect -p 10000 -r 0 -t ${timeout_poll} \ + ./mptcp_connect -p 10000 -r 0 -t ${timeout_poll} -w 20 \ 127.0.0.1 >/dev/null & wait_connected $ns 10000 chk_msk_nr 2 "after MPC handshake " @@ -167,13 +200,13 @@ flush_pids echo "a" | \ timeout ${timeout_test} \ ip netns exec $ns \ - ./mptcp_connect -p 10001 -l -s TCP -t ${timeout_poll} \ + ./mptcp_connect -p 10001 -l -s TCP -t ${timeout_poll} -w 20 \ 0.0.0.0 >/dev/null & wait_local_port_listen $ns 10001 echo "b" | \ timeout ${timeout_test} \ ip netns exec $ns \ - ./mptcp_connect -p 10001 -r 0 -t ${timeout_poll} \ + ./mptcp_connect -p 10001 -r 0 -t ${timeout_poll} -w 20 \ 127.0.0.1 >/dev/null & wait_connected $ns 10001 chk_msk_fallback_nr 1 "check fallback" @@ -184,7 +217,7 @@ for I in `seq 1 $NR_CLIENTS`; do echo "a" | \ timeout ${timeout_test} \ ip netns exec $ns \ - ./mptcp_connect -p $((I+10001)) -l -w 10 \ + ./mptcp_connect -p $((I+10001)) -l -w 20 \ -t ${timeout_poll} 0.0.0.0 >/dev/null & done wait_local_port_listen $ns $((NR_CLIENTS + 10001)) @@ -193,12 +226,11 @@ for I in `seq 1 $NR_CLIENTS`; do echo "b" | \ timeout ${timeout_test} \ ip netns exec $ns \ - ./mptcp_connect -p $((I+10001)) -w 10 \ + ./mptcp_connect -p $((I+10001)) -w 20 \ -t ${timeout_poll} 127.0.0.1 >/dev/null & done -sleep 1.5 -chk_msk_nr $((NR_CLIENTS*2)) "many msk socket present" +wait_msk_nr $((NR_CLIENTS*2)) "many msk socket present" flush_pids exit $ret diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index 8628aa61b763..e2ea6c126c99 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -265,7 +265,7 @@ static void sock_test_tcpulp(int sock, int proto, unsigned int line) static int sock_listen_mptcp(const char * const listenaddr, const char * const port) { - int sock; + int sock = -1; struct addrinfo hints = { .ai_protocol = IPPROTO_TCP, .ai_socktype = SOCK_STREAM, diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c index 29f75e2a1116..8672d898f8cd 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_inq.c +++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c @@ -88,7 +88,7 @@ static void xgetaddrinfo(const char *node, const char *service, static int sock_listen_mptcp(const char * const listenaddr, const char * const port) { - int sock; + int sock = -1; struct addrinfo hints = { .ai_protocol = IPPROTO_TCP, .ai_socktype = SOCK_STREAM, diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index a4406b7a8064..ff83ef426df5 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -455,6 +455,12 @@ wait_mpj() done } +kill_wait() +{ + kill $1 > /dev/null 2>&1 + wait $1 2>/dev/null +} + pm_nl_set_limits() { local ns=$1 @@ -654,6 +660,11 @@ do_transfer() local port=$((10000 + TEST_COUNT - 1)) local cappid + local userspace_pm=0 + local evts_ns1 + local evts_ns1_pid + local evts_ns2 + local evts_ns2_pid :> "$cout" :> "$sout" @@ -690,10 +701,29 @@ do_transfer() extra_args="-r ${speed:6}" fi + if [[ "${addr_nr_ns1}" = "userspace_"* ]]; then + userspace_pm=1 + addr_nr_ns1=${addr_nr_ns1:10} + fi + if [[ "${addr_nr_ns2}" = "fastclose_"* ]]; then # disconnect extra_args="$extra_args -I ${addr_nr_ns2:10}" addr_nr_ns2=0 + elif [[ "${addr_nr_ns2}" = "userspace_"* ]]; then + userspace_pm=1 + addr_nr_ns2=${addr_nr_ns2:10} + fi + + if [ $userspace_pm -eq 1 ]; then + evts_ns1=$(mktemp) + evts_ns2=$(mktemp) + :> "$evts_ns1" + :> "$evts_ns2" + ip netns exec ${listener_ns} ./pm_nl_ctl events >> "$evts_ns1" 2>&1 & + evts_ns1_pid=$! + ip netns exec ${connector_ns} ./pm_nl_ctl events >> "$evts_ns2" 2>&1 & + evts_ns2_pid=$! fi local local_addr @@ -748,6 +778,8 @@ do_transfer() if [ $addr_nr_ns1 -gt 0 ]; then local counter=2 local add_nr_ns1=${addr_nr_ns1} + local id=10 + local tk while [ $add_nr_ns1 -gt 0 ]; do local addr if is_v6 "${connect_addr}"; then @@ -755,9 +787,18 @@ do_transfer() else addr="10.0.$counter.1" fi - pm_nl_add_endpoint $ns1 $addr flags signal + if [ $userspace_pm -eq 0 ]; then + pm_nl_add_endpoint $ns1 $addr flags signal + else + tk=$(sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns1") + ip netns exec ${listener_ns} ./pm_nl_ctl ann $addr token $tk id $id + sleep 1 + ip netns exec ${listener_ns} ./pm_nl_ctl rem token $tk id $id + fi + counter=$((counter + 1)) add_nr_ns1=$((add_nr_ns1 - 1)) + id=$((id + 1)) done elif [ $addr_nr_ns1 -lt 0 ]; then local rm_nr_ns1=$((-addr_nr_ns1)) @@ -804,6 +845,8 @@ do_transfer() if [ $addr_nr_ns2 -gt 0 ]; then local add_nr_ns2=${addr_nr_ns2} local counter=3 + local id=20 + local tk da dp sp while [ $add_nr_ns2 -gt 0 ]; do local addr if is_v6 "${connect_addr}"; then @@ -811,9 +854,23 @@ do_transfer() else addr="10.0.$counter.2" fi - pm_nl_add_endpoint $ns2 $addr flags $flags + if [ $userspace_pm -eq 0 ]; then + pm_nl_add_endpoint $ns2 $addr flags $flags + else + tk=$(sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2") + da=$(sed -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evts_ns2") + dp=$(sed -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2") + ip netns exec ${connector_ns} ./pm_nl_ctl csf lip $addr lid $id \ + rip $da rport $dp token $tk + sleep 1 + sp=$(grep "type:10" "$evts_ns2" | + sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q') + ip netns exec ${connector_ns} ./pm_nl_ctl dsf lip $addr lport $sp \ + rip $da rport $dp token $tk + fi counter=$((counter + 1)) add_nr_ns2=$((add_nr_ns2 - 1)) + id=$((id + 1)) done elif [ $addr_nr_ns2 -lt 0 ]; then local rm_nr_ns2=$((-addr_nr_ns2)) @@ -890,6 +947,12 @@ do_transfer() kill $cappid fi + if [ $userspace_pm -eq 1 ]; then + kill_wait $evts_ns1_pid + kill_wait $evts_ns2_pid + rm -rf $evts_ns1 $evts_ns2 + fi + NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ nstat | grep Tcp > /tmp/${listener_ns}.out NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ @@ -2365,6 +2428,36 @@ backup_tests() chk_add_nr 1 1 chk_prio_nr 1 1 fi + + if reset "mpc backup"; then + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow + chk_join_nr 0 0 0 + chk_prio_nr 0 1 + fi + + if reset "mpc backup both sides"; then + pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow,backup + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow + chk_join_nr 0 0 0 + chk_prio_nr 1 1 + fi + + if reset "mpc switch to backup"; then + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup + chk_join_nr 0 0 0 + chk_prio_nr 0 1 + fi + + if reset "mpc switch to backup both sides"; then + pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup + chk_join_nr 0 0 0 + chk_prio_nr 1 1 + fi } add_addr_ports_tests() @@ -2810,6 +2903,25 @@ userspace_tests() chk_join_nr 0 0 0 chk_rm_nr 0 0 fi + + # userspace pm add & remove address + if reset "userspace pm add & remove address"; then + set_userspace_pm $ns1 + pm_nl_set_limits $ns2 1 1 + run_tests $ns1 $ns2 10.0.1.1 0 userspace_1 0 slow + chk_join_nr 1 1 1 + chk_add_nr 1 1 + chk_rm_nr 1 1 invert + fi + + # userspace pm create destroy subflow + if reset "userspace pm create destroy subflow"; then + set_userspace_pm $ns2 + pm_nl_set_limits $ns1 0 1 + run_tests $ns1 $ns2 10.0.1.1 0 0 userspace_1 slow + chk_join_nr 1 1 1 + chk_rm_nr 0 1 + fi } endpoint_tests() diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c index ac9a4d9c1764..ae61f39556ca 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c @@ -136,7 +136,7 @@ static void xgetaddrinfo(const char *node, const char *service, static int sock_listen_mptcp(const char * const listenaddr, const char * const port) { - int sock; + int sock = -1; struct addrinfo hints = { .ai_protocol = IPPROTO_TCP, .ai_socktype = SOCK_STREAM, diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c index 6a2f4b981e1d..abddf4c63e79 100644 --- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c +++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c @@ -31,7 +31,7 @@ static void syntax(char *argv[]) { - fprintf(stderr, "%s add|get|set|del|flush|dump|accept [<args>]\n", argv[0]); + fprintf(stderr, "%s add|ann|rem|csf|dsf|get|set|del|flush|dump|events|listen|accept [<args>]\n", argv[0]); fprintf(stderr, "\tadd [flags signal|subflow|backup|fullmesh] [id <nr>] [dev <name>] <ip>\n"); fprintf(stderr, "\tann <local-ip> id <local-id> token <token> [port <local-port>] [dev <name>]\n"); fprintf(stderr, "\trem id <local-id> token <token>\n"); @@ -39,7 +39,7 @@ static void syntax(char *argv[]) fprintf(stderr, "\tdsf lip <local-ip> lport <local-port> rip <remote-ip> rport <remote-port> token <token>\n"); fprintf(stderr, "\tdel <id> [<ip>]\n"); fprintf(stderr, "\tget <id>\n"); - fprintf(stderr, "\tset [<ip>] [id <nr>] flags [no]backup|[no]fullmesh [port <nr>]\n"); + fprintf(stderr, "\tset [<ip>] [id <nr>] flags [no]backup|[no]fullmesh [port <nr>] [token <token>] [rip <ip>] [rport <port>]\n"); fprintf(stderr, "\tflush\n"); fprintf(stderr, "\tdump\n"); fprintf(stderr, "\tlimits [<rcv addr max> <subflow max>]\n"); @@ -1279,7 +1279,10 @@ int set_flags(int fd, int pm_family, int argc, char *argv[]) struct rtattr *rta, *nest; struct nlmsghdr *nh; u_int32_t flags = 0; + u_int32_t token = 0; + u_int16_t rport = 0; u_int16_t family; + void *rip = NULL; int nest_start; int use_id = 0; u_int8_t id; @@ -1339,7 +1342,13 @@ int set_flags(int fd, int pm_family, int argc, char *argv[]) error(1, 0, " missing flags keyword"); for (; arg < argc; arg++) { - if (!strcmp(argv[arg], "flags")) { + if (!strcmp(argv[arg], "token")) { + if (++arg >= argc) + error(1, 0, " missing token value"); + + /* token */ + token = atoi(argv[arg]); + } else if (!strcmp(argv[arg], "flags")) { char *tok, *str; /* flags */ @@ -1378,12 +1387,72 @@ int set_flags(int fd, int pm_family, int argc, char *argv[]) rta->rta_len = RTA_LENGTH(2); memcpy(RTA_DATA(rta), &port, 2); off += NLMSG_ALIGN(rta->rta_len); + } else if (!strcmp(argv[arg], "rport")) { + if (++arg >= argc) + error(1, 0, " missing remote port"); + + rport = atoi(argv[arg]); + } else if (!strcmp(argv[arg], "rip")) { + if (++arg >= argc) + error(1, 0, " missing remote ip"); + + rip = argv[arg]; } else { error(1, 0, "unknown keyword %s", argv[arg]); } } nest->rta_len = off - nest_start; + /* token */ + if (token) { + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ATTR_TOKEN; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &token, 4); + off += NLMSG_ALIGN(rta->rta_len); + } + + /* remote addr/port */ + if (rip) { + nest_start = off; + nest = (void *)(data + off); + nest->rta_type = NLA_F_NESTED | MPTCP_PM_ATTR_ADDR_REMOTE; + nest->rta_len = RTA_LENGTH(0); + off += NLMSG_ALIGN(nest->rta_len); + + /* addr data */ + rta = (void *)(data + off); + if (inet_pton(AF_INET, rip, RTA_DATA(rta))) { + family = AF_INET; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4; + rta->rta_len = RTA_LENGTH(4); + } else if (inet_pton(AF_INET6, rip, RTA_DATA(rta))) { + family = AF_INET6; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6; + rta->rta_len = RTA_LENGTH(16); + } else { + error(1, errno, "can't parse ip %s", (char *)rip); + } + off += NLMSG_ALIGN(rta->rta_len); + + /* family */ + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY; + rta->rta_len = RTA_LENGTH(2); + memcpy(RTA_DATA(rta), &family, 2); + off += NLMSG_ALIGN(rta->rta_len); + + if (rport) { + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT; + rta->rta_len = RTA_LENGTH(2); + memcpy(RTA_DATA(rta), &rport, 2); + off += NLMSG_ALIGN(rta->rta_len); + } + + nest->rta_len = off - nest_start; + } + do_nl_req(fd, nh, off, 0); return 0; } diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index f441ff7904fc..ffa13a957a36 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -12,6 +12,7 @@ timeout_test=$((timeout_poll * 2 + 1)) test_cnt=1 ret=0 bail=0 +slack=50 usage() { echo "Usage: $0 [ -b ] [ -c ] [ -d ]" @@ -52,6 +53,7 @@ setup() cout=$(mktemp) capout=$(mktemp) size=$((2 * 2048 * 4096)) + dd if=/dev/zero of=$small bs=4096 count=20 >/dev/null 2>&1 dd if=/dev/zero of=$large bs=4096 count=$((size / 4096)) >/dev/null 2>&1 @@ -104,6 +106,16 @@ setup() ip -net "$ns3" route add default via dead:beef:3::2 ip netns exec "$ns3" ./pm_nl_ctl limits 1 1 + + # debug build can slow down measurably the test program + # we use quite tight time limit on the run-time, to ensure + # maximum B/W usage. + # Use kmemleak/lockdep/kasan/prove_locking presence as a rough + # estimate for this being a debug kernel and increase the + # maximum run-time accordingly. Observed run times for CI builds + # running selftests, including kbuild, were used to determine the + # amount of time to add. + grep -q ' kmemleak_init$\| lockdep_init$\| kasan_init$\| prove_locking$' /proc/kallsyms && slack=$((slack+550)) } # $1: ns, $2: port @@ -241,7 +253,7 @@ run_test() # mptcp_connect will do some sleeps to allow the mp_join handshake # completion (see mptcp_connect): 200ms on each side, add some slack - time=$((time + 450)) + time=$((time + 400 + slack)) printf "%-60s" "$msg" do_transfer $small $large $time diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 78d0bb640b11..3229725b64b0 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -37,6 +37,12 @@ rndh=$(stdbuf -o0 -e0 printf %x "$sec")-$(mktemp -u XXXXXX) ns1="ns1-$rndh" ns2="ns2-$rndh" +kill_wait() +{ + kill $1 > /dev/null 2>&1 + wait $1 2>/dev/null +} + cleanup() { echo "cleanup" @@ -48,16 +54,16 @@ cleanup() kill -SIGUSR1 $client4_pid > /dev/null 2>&1 fi if [ $server4_pid -ne 0 ]; then - kill $server4_pid > /dev/null 2>&1 + kill_wait $server4_pid fi if [ $client6_pid -ne 0 ]; then kill -SIGUSR1 $client6_pid > /dev/null 2>&1 fi if [ $server6_pid -ne 0 ]; then - kill $server6_pid > /dev/null 2>&1 + kill_wait $server6_pid fi if [ $evts_pid -ne 0 ]; then - kill $evts_pid > /dev/null 2>&1 + kill_wait $evts_pid fi local netns for netns in "$ns1" "$ns2" ;do @@ -153,7 +159,7 @@ make_connection() sleep 1 # Capture client/server attributes from MPTCP connection netlink events - kill $client_evts_pid + kill_wait $client_evts_pid local client_token local client_port @@ -165,7 +171,7 @@ make_connection() client_port=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts") client_serverside=$(sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q'\ "$client_evts") - kill $server_evts_pid + kill_wait $server_evts_pid server_token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") server_serverside=$(sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q'\ "$server_evts") @@ -286,7 +292,7 @@ test_announce() verify_announce_event "$evts" "$ANNOUNCED" "$server4_token" "10.0.2.2"\ "$client_addr_id" "$new4_port" - kill $evts_pid + kill_wait $evts_pid # Capture events on the network namespace running the client :>"$evts" @@ -321,7 +327,7 @@ test_announce() verify_announce_event "$evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\ "$server_addr_id" "$new4_port" - kill $evts_pid + kill_wait $evts_pid rm -f "$evts" } @@ -416,7 +422,7 @@ test_remove() sleep 0.5 verify_remove_event "$evts" "$REMOVED" "$server6_token" "$client_addr_id" - kill $evts_pid + kill_wait $evts_pid # Capture events on the network namespace running the client :>"$evts" @@ -449,7 +455,7 @@ test_remove() sleep 0.5 verify_remove_event "$evts" "$REMOVED" "$client6_token" "$server_addr_id" - kill $evts_pid + kill_wait $evts_pid rm -f "$evts" } @@ -553,7 +559,7 @@ test_subflows() "10.0.2.2" "$client4_port" "23" "$client_addr_id" "ns1" "ns2" # Delete the listener from the client ns, if one was created - kill $listener_pid > /dev/null 2>&1 + kill_wait $listener_pid local sport sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") @@ -592,7 +598,7 @@ test_subflows() "$client_addr_id" "ns1" "ns2" # Delete the listener from the client ns, if one was created - kill $listener_pid > /dev/null 2>&1 + kill_wait $listener_pid sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") @@ -631,7 +637,7 @@ test_subflows() "$client_addr_id" "ns1" "ns2" # Delete the listener from the client ns, if one was created - kill $listener_pid > /dev/null 2>&1 + kill_wait $listener_pid sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") @@ -647,7 +653,7 @@ test_subflows() ip netns exec "$ns2" ./pm_nl_ctl rem id $client_addr_id token\ "$client4_token" > /dev/null 2>&1 - kill $evts_pid + kill_wait $evts_pid # Capture events on the network namespace running the client :>"$evts" @@ -674,7 +680,7 @@ test_subflows() "10.0.2.1" "$app4_port" "23" "$server_addr_id" "ns2" "ns1" # Delete the listener from the server ns, if one was created - kill $listener_pid> /dev/null 2>&1 + kill_wait $listener_pid sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") @@ -713,7 +719,7 @@ test_subflows() "$server_addr_id" "ns2" "ns1" # Delete the listener from the server ns, if one was created - kill $listener_pid > /dev/null 2>&1 + kill_wait $listener_pid sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") @@ -750,7 +756,7 @@ test_subflows() "10.0.2.2" "10.0.2.1" "$new4_port" "23" "$server_addr_id" "ns2" "ns1" # Delete the listener from the server ns, if one was created - kill $listener_pid > /dev/null 2>&1 + kill_wait $listener_pid sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts") @@ -766,14 +772,46 @@ test_subflows() ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\ "$server4_token" > /dev/null 2>&1 - kill $evts_pid + kill_wait $evts_pid rm -f "$evts" } +test_prio() +{ + local count + + # Send MP_PRIO signal from client to server machine + ip netns exec "$ns2" ./pm_nl_ctl set 10.0.1.2 port "$client4_port" flags backup token "$client4_token" rip 10.0.1.1 rport "$server4_port" + sleep 0.5 + + # Check TX + stdbuf -o0 -e0 printf "MP_PRIO TX \t" + count=$(ip netns exec "$ns2" nstat -as | grep MPTcpExtMPPrioTx | awk '{print $2}') + [ -z "$count" ] && count=0 + if [ $count != 1 ]; then + stdbuf -o0 -e0 printf "[FAIL]\n" + exit 1 + else + stdbuf -o0 -e0 printf "[OK]\n" + fi + + # Check RX + stdbuf -o0 -e0 printf "MP_PRIO RX \t" + count=$(ip netns exec "$ns1" nstat -as | grep MPTcpExtMPPrioRx | awk '{print $2}') + [ -z "$count" ] && count=0 + if [ $count != 1 ]; then + stdbuf -o0 -e0 printf "[FAIL]\n" + exit 1 + else + stdbuf -o0 -e0 printf "[OK]\n" + fi +} + make_connection make_connection "v6" test_announce test_remove test_subflows +test_prio exit 0 diff --git a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh new file mode 100755 index 000000000000..28a775654b92 --- /dev/null +++ b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh @@ -0,0 +1,879 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# author: Andrea Mayer <andrea.mayer@uniroma2.it> +# +# This script is designed for testing the SRv6 H.Encaps.Red behavior. +# +# Below is depicted the IPv6 network of an operator which offers advanced +# IPv4/IPv6 VPN services to hosts, enabling them to communicate with each +# other. +# In this example, hosts hs-1 and hs-2 are connected through an IPv4/IPv6 VPN +# service, while hs-3 and hs-4 are connected using an IPv6 only VPN. +# +# Routers rt-1,rt-2,rt-3 and rt-4 implement IPv4/IPv6 L3 VPN services +# leveraging the SRv6 architecture. The key components for such VPNs are: +# +# i) The SRv6 H.Encaps.Red behavior applies SRv6 Policies on traffic received +# by connected hosts, initiating the VPN tunnel. Such a behavior is an +# optimization of the SRv6 H.Encap aiming to reduce the length of the SID +# List carried in the pushed SRH. Specifically, the H.Encaps.Red removes +# the first SID contained in the SID List (i.e. SRv6 Policy) by storing it +# into the IPv6 Destination Address. When a SRv6 Policy is made of only one +# SID, the SRv6 H.Encaps.Red behavior omits the SRH at all and pushes that +# SID directly into the IPv6 DA; +# +# ii) The SRv6 End behavior advances the active SID in the SID List carried by +# the SRH; +# +# iii) The SRv6 End.DT46 behavior is used for removing the SRv6 Policy and, +# thus, it terminates the VPN tunnel. Such a behavior is capable of +# handling, at the same time, both tunneled IPv4 and IPv6 traffic. +# +# +# cafe::1 cafe::2 +# 10.0.0.1 10.0.0.2 +# +--------+ +--------+ +# | | | | +# | hs-1 | | hs-2 | +# | | | | +# +---+----+ +--- +---+ +# cafe::/64 | | cafe::/64 +# 10.0.0.0/24 | | 10.0.0.0/24 +# +---+----+ +----+---+ +# | | fcf0:0:1:2::/64 | | +# | rt-1 +-------------------+ rt-2 | +# | | | | +# +---+----+ +----+---+ +# | . . | +# | fcf0:0:1:3::/64 . | +# | . . | +# | . . | +# fcf0:0:1:4::/64 | . | fcf0:0:2:3::/64 +# | . . | +# | . . | +# | fcf0:0:2:4::/64 . | +# | . . | +# +---+----+ +----+---+ +# | | | | +# | rt-4 +-------------------+ rt-3 | +# | | fcf0:0:3:4::/64 | | +# +---+----+ +----+---+ +# cafe::/64 | | cafe::/64 +# 10.0.0.0/24 | | 10.0.0.0/24 +# +---+----+ +--- +---+ +# | | | | +# | hs-4 | | hs-3 | +# | | | | +# +--------+ +--------+ +# cafe::4 cafe::3 +# 10.0.0.4 10.0.0.3 +# +# +# Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y +# in the IPv6 operator network. +# +# Local SID table +# =============== +# +# Each SRv6 router is configured with a Local SID table in which SIDs are +# stored. Considering the given SRv6 router rt-x, at least two SIDs are +# configured in the Local SID table: +# +# Local SID table for SRv6 router rt-x +# +----------------------------------------------------------+ +# |fcff:x::e is associated with the SRv6 End behavior | +# |fcff:x::d46 is associated with the SRv6 End.DT46 behavior | +# +----------------------------------------------------------+ +# +# The fcff::/16 prefix is reserved by the operator for implementing SRv6 VPN +# services. Reachability of SIDs is ensured by proper configuration of the IPv6 +# operator's network and SRv6 routers. +# +# # SRv6 Policies +# =============== +# +# An SRv6 ingress router applies SRv6 policies to the traffic received from a +# connected host. SRv6 policy enforcement consists of encapsulating the +# received traffic into a new IPv6 packet with a given SID List contained in +# the SRH. +# +# IPv4/IPv6 VPN between hs-1 and hs-2 +# ----------------------------------- +# +# Hosts hs-1 and hs-2 are connected using dedicated IPv4/IPv6 VPNs. +# Specifically, packets generated from hs-1 and directed towards hs-2 are +# handled by rt-1 which applies the following SRv6 Policies: +# +# i.a) IPv6 traffic, SID List=fcff:3::e,fcff:4::e,fcff:2::d46 +# ii.a) IPv4 traffic, SID List=fcff:2::d46 +# +# Policy (i.a) steers tunneled IPv6 traffic through SRv6 routers +# rt-3,rt-4,rt-2. Instead, Policy (ii.a) steers tunneled IPv4 traffic through +# rt-2. +# The H.Encaps.Red reduces the SID List (i.a) carried in SRH by removing the +# first SID (fcff:3::e) and pushing it into the IPv6 DA. In case of IPv4 +# traffic, the H.Encaps.Red omits the presence of SRH at all, since the SID +# List (ii.a) consists of only one SID that can be stored directly in the IPv6 +# DA. +# +# On the reverse path (i.e. from hs-2 to hs-1), rt-2 applies the following +# policies: +# +# i.b) IPv6 traffic, SID List=fcff:1::d46 +# ii.b) IPv4 traffic, SID List=fcff:4::e,fcff:3::e,fcff:1::d46 +# +# Policy (i.b) steers tunneled IPv6 traffic through the SRv6 router rt-1. +# Conversely, Policy (ii.b) steers tunneled IPv4 traffic through SRv6 routers +# rt-4,rt-3,rt-1. +# The H.Encaps.Red omits the SRH at all in case of (i.b) by pushing the single +# SID (fcff::1::d46) inside the IPv6 DA. +# The H.Encaps.Red reduces the SID List (ii.b) in the SRH by removing the first +# SID (fcff:4::e) and pushing it into the IPv6 DA. +# +# In summary: +# hs-1->hs-2 |IPv6 DA=fcff:3::e|SRH SIDs=fcff:4::e,fcff:2::d46|IPv6|...| (i.a) +# hs-1->hs-2 |IPv6 DA=fcff:2::d46|IPv4|...| (ii.a) +# +# hs-2->hs-1 |IPv6 DA=fcff:1::d46|IPv6|...| (i.b) +# hs-2->hs-1 |IPv6 DA=fcff:4::e|SRH SIDs=fcff:3::e,fcff:1::d46|IPv4|...| (ii.b) +# +# +# IPv6 VPN between hs-3 and hs-4 +# ------------------------------ +# +# Hosts hs-3 and hs-4 are connected using a dedicated IPv6 only VPN. +# Specifically, packets generated from hs-3 and directed towards hs-4 are +# handled by rt-3 which applies the following SRv6 Policy: +# +# i.c) IPv6 traffic, SID List=fcff:2::e,fcff:4::d46 +# +# Policy (i.c) steers tunneled IPv6 traffic through SRv6 routers rt-2,rt-4. +# The H.Encaps.Red reduces the SID List (i.c) carried in SRH by pushing the +# first SID (fcff:2::e) in the IPv6 DA. +# +# On the reverse path (i.e. from hs-4 to hs-3) the router rt-4 applies the +# following SRv6 Policy: +# +# i.d) IPv6 traffic, SID List=fcff:1::e,fcff:3::d46. +# +# Policy (i.d) steers tunneled IPv6 traffic through SRv6 routers rt-1,rt-3. +# The H.Encaps.Red reduces the SID List (i.d) carried in SRH by pushing the +# first SID (fcff:1::e) in the IPv6 DA. +# +# In summary: +# hs-3->hs-4 |IPv6 DA=fcff:2::e|SRH SIDs=fcff:4::d46|IPv6|...| (i.c) +# hs-4->hs-3 |IPv6 DA=fcff:1::e|SRH SIDs=fcff:3::d46|IPv6|...| (i.d) +# + +# Kselftest framework requirement - SKIP code is 4. +readonly ksft_skip=4 + +readonly RDMSUFF="$(mktemp -u XXXXXXXX)" +readonly VRF_TID=100 +readonly VRF_DEVNAME="vrf-${VRF_TID}" +readonly RT2HS_DEVNAME="veth-t${VRF_TID}" +readonly LOCALSID_TABLE_ID=90 +readonly IPv6_RT_NETWORK=fcf0:0 +readonly IPv6_HS_NETWORK=cafe +readonly IPv4_HS_NETWORK=10.0.0 +readonly VPN_LOCATOR_SERVICE=fcff +readonly END_FUNC=000e +readonly DT46_FUNC=0d46 + +PING_TIMEOUT_SEC=4 +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} + +# IDs of routers and hosts are initialized during the setup of the testing +# network +ROUTERS='' +HOSTS='' + +SETUP_ERR=1 + +ret=${ksft_skip} +nsuccess=0 +nfail=0 + +log_test() +{ + local rc="$1" + local expected="$2" + local msg="$3" + + if [ "${rc}" -eq "${expected}" ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-60s [ OK ]\n" "${msg}" + else + ret=1 + nfail=$((nfail+1)) + printf "\n TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +print_log_test_results() +{ + printf "\nTests passed: %3d\n" "${nsuccess}" + printf "Tests failed: %3d\n" "${nfail}" + + # when a test fails, the value of 'ret' is set to 1 (error code). + # Conversely, when all tests are passed successfully, the 'ret' value + # is set to 0 (success code). + if [ "${ret}" -ne 1 ]; then + ret=0 + fi +} + +log_section() +{ + echo + echo "################################################################################" + echo "TEST SECTION: $*" + echo "################################################################################" +} + +test_command_or_ksft_skip() +{ + local cmd="$1" + + if [ ! -x "$(command -v "${cmd}")" ]; then + echo "SKIP: Could not run test without \"${cmd}\" tool"; + exit "${ksft_skip}" + fi +} + +get_nodename() +{ + local name="$1" + + echo "${name}-${RDMSUFF}" +} + +get_rtname() +{ + local rtid="$1" + + get_nodename "rt-${rtid}" +} + +get_hsname() +{ + local hsid="$1" + + get_nodename "hs-${hsid}" +} + +__create_namespace() +{ + local name="$1" + + ip netns add "${name}" +} + +create_router() +{ + local rtid="$1" + local nsname + + nsname="$(get_rtname "${rtid}")" + + __create_namespace "${nsname}" +} + +create_host() +{ + local hsid="$1" + local nsname + + nsname="$(get_hsname "${hsid}")" + + __create_namespace "${nsname}" +} + +cleanup() +{ + local nsname + local i + + # destroy routers + for i in ${ROUTERS}; do + nsname="$(get_rtname "${i}")" + + ip netns del "${nsname}" &>/dev/null || true + done + + # destroy hosts + for i in ${HOSTS}; do + nsname="$(get_hsname "${i}")" + + ip netns del "${nsname}" &>/dev/null || true + done + + # check whether the setup phase was completed successfully or not. In + # case of an error during the setup phase of the testing environment, + # the selftest is considered as "skipped". + if [ "${SETUP_ERR}" -ne 0 ]; then + echo "SKIP: Setting up the testing environment failed" + exit "${ksft_skip}" + fi + + exit "${ret}" +} + +add_link_rt_pairs() +{ + local rt="$1" + local rt_neighs="$2" + local neigh + local nsname + local neigh_nsname + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + neigh_nsname="$(get_rtname "${neigh}")" + + ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \ + type veth peer name "veth-rt-${neigh}-${rt}" \ + netns "${neigh_nsname}" + done +} + +get_network_prefix() +{ + local rt="$1" + local neigh="$2" + local p="${rt}" + local q="${neigh}" + + if [ "${p}" -gt "${q}" ]; then + p="${q}"; q="${rt}" + fi + + echo "${IPv6_RT_NETWORK}:${p}:${q}" +} + +# Setup the basic networking for the routers +setup_rt_networking() +{ + local rt="$1" + local rt_neighs="$2" + local nsname + local net_prefix + local devname + local neigh + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + devname="veth-rt-${rt}-${neigh}" + + net_prefix="$(get_network_prefix "${rt}" "${neigh}")" + + ip -netns "${nsname}" addr \ + add "${net_prefix}::${rt}/64" dev "${devname}" nodad + + ip -netns "${nsname}" link set "${devname}" up + done + + ip -netns "${nsname}" link set lo up + + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1 + + ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0 + ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0 + ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1 +} + +# Setup local SIDs for an SRv6 router +setup_rt_local_sids() +{ + local rt="$1" + local rt_neighs="$2" + local net_prefix + local devname + local nsname + local neigh + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + devname="veth-rt-${rt}-${neigh}" + + net_prefix="$(get_network_prefix "${rt}" "${neigh}")" + + # set underlay network routes for SIDs reachability + ip -netns "${nsname}" -6 route \ + add "${VPN_LOCATOR_SERVICE}:${neigh}::/32" \ + table "${LOCALSID_TABLE_ID}" \ + via "${net_prefix}::${neigh}" dev "${devname}" + done + + # Local End behavior (note that "dev" is dummy and the VRF is chosen + # for the sake of simplicity). + ip -netns "${nsname}" -6 route \ + add "${VPN_LOCATOR_SERVICE}:${rt}::${END_FUNC}" \ + table "${LOCALSID_TABLE_ID}" \ + encap seg6local action End dev "${VRF_DEVNAME}" + + # Local End.DT46 behavior + ip -netns "${nsname}" -6 route \ + add "${VPN_LOCATOR_SERVICE}:${rt}::${DT46_FUNC}" \ + table "${LOCALSID_TABLE_ID}" \ + encap seg6local action End.DT46 vrftable "${VRF_TID}" \ + dev "${VRF_DEVNAME}" + + # all SIDs for VPNs start with a common locator. Routes and SRv6 + # Endpoint behavior instaces are grouped together in the 'localsid' + # table. + ip -netns "${nsname}" -6 rule \ + add to "${VPN_LOCATOR_SERVICE}::/16" \ + lookup "${LOCALSID_TABLE_ID}" prio 999 + + # set default routes to unreachable for both ipv4 and ipv6 + ip -netns "${nsname}" -6 route \ + add unreachable default metric 4278198272 \ + vrf "${VRF_DEVNAME}" + + ip -netns "${nsname}" -4 route \ + add unreachable default metric 4278198272 \ + vrf "${VRF_DEVNAME}" +} + +# build and install the SRv6 policy into the ingress SRv6 router. +# args: +# $1 - destination host (i.e. cafe::x host) +# $2 - SRv6 router configured for enforcing the SRv6 Policy +# $3 - SRv6 routers configured for steering traffic (End behaviors) +# $4 - SRv6 router configured for removing the SRv6 Policy (router connected +# to the destination host) +# $5 - encap mode (full or red) +# $6 - traffic type (IPv6 or IPv4) +__setup_rt_policy() +{ + local dst="$1" + local encap_rt="$2" + local end_rts="$3" + local dec_rt="$4" + local mode="$5" + local traffic="$6" + local nsname + local policy='' + local n + + nsname="$(get_rtname "${encap_rt}")" + + for n in ${end_rts}; do + policy="${policy}${VPN_LOCATOR_SERVICE}:${n}::${END_FUNC}," + done + + policy="${policy}${VPN_LOCATOR_SERVICE}:${dec_rt}::${DT46_FUNC}" + + # add SRv6 policy to incoming traffic sent by connected hosts + if [ "${traffic}" -eq 6 ]; then + ip -netns "${nsname}" -6 route \ + add "${IPv6_HS_NETWORK}::${dst}" vrf "${VRF_DEVNAME}" \ + encap seg6 mode "${mode}" segs "${policy}" \ + dev "${VRF_DEVNAME}" + + ip -netns "${nsname}" -6 neigh \ + add proxy "${IPv6_HS_NETWORK}::${dst}" \ + dev "${RT2HS_DEVNAME}" + else + # "dev" must be different from the one where the packet is + # received, otherwise the proxy arp does not work. + ip -netns "${nsname}" -4 route \ + add "${IPv4_HS_NETWORK}.${dst}" vrf "${VRF_DEVNAME}" \ + encap seg6 mode "${mode}" segs "${policy}" \ + dev "${VRF_DEVNAME}" + fi +} + +# see __setup_rt_policy +setup_rt_policy_ipv6() +{ + __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 6 +} + +#see __setup_rt_policy +setup_rt_policy_ipv4() +{ + __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 4 +} + +setup_hs() +{ + local hs="$1" + local rt="$2" + local hsname + local rtname + + hsname="$(get_hsname "${hs}")" + rtname="$(get_rtname "${rt}")" + + ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip -netns "${hsname}" link add veth0 type veth \ + peer name "${RT2HS_DEVNAME}" netns "${rtname}" + + ip -netns "${hsname}" addr \ + add "${IPv6_HS_NETWORK}::${hs}/64" dev veth0 nodad + ip -netns "${hsname}" addr add "${IPv4_HS_NETWORK}.${hs}/24" dev veth0 + + ip -netns "${hsname}" link set veth0 up + ip -netns "${hsname}" link set lo up + + # configure the VRF on the router which is directly connected to the + # source host. + ip -netns "${rtname}" link \ + add "${VRF_DEVNAME}" type vrf table "${VRF_TID}" + ip -netns "${rtname}" link set "${VRF_DEVNAME}" up + + # enslave the veth interface connecting the router with the host to the + # VRF in the access router + ip -netns "${rtname}" link \ + set "${RT2HS_DEVNAME}" master "${VRF_DEVNAME}" + + ip -netns "${rtname}" addr \ + add "${IPv6_HS_NETWORK}::254/64" dev "${RT2HS_DEVNAME}" nodad + ip -netns "${rtname}" addr \ + add "${IPv4_HS_NETWORK}.254/24" dev "${RT2HS_DEVNAME}" + + ip -netns "${rtname}" link set "${RT2HS_DEVNAME}" up + + ip netns exec "${rtname}" \ + sysctl -wq net.ipv6.conf."${RT2HS_DEVNAME}".proxy_ndp=1 + ip netns exec "${rtname}" \ + sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1 + + # disable the rp_filter otherwise the kernel gets confused about how + # to route decap ipv4 packets. + ip netns exec "${rtname}" \ + sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0 + + ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode" +} + +setup() +{ + local i + + # create routers + ROUTERS="1 2 3 4"; readonly ROUTERS + for i in ${ROUTERS}; do + create_router "${i}" + done + + # create hosts + HOSTS="1 2 3 4"; readonly HOSTS + for i in ${HOSTS}; do + create_host "${i}" + done + + # set up the links for connecting routers + add_link_rt_pairs 1 "2 3 4" + add_link_rt_pairs 2 "3 4" + add_link_rt_pairs 3 "4" + + # set up the basic connectivity of routers and routes required for + # reachability of SIDs. + setup_rt_networking 1 "2 3 4" + setup_rt_networking 2 "1 3 4" + setup_rt_networking 3 "1 2 4" + setup_rt_networking 4 "1 2 3" + + # set up the hosts connected to routers + setup_hs 1 1 + setup_hs 2 2 + setup_hs 3 3 + setup_hs 4 4 + + # set up default SRv6 Endpoints (i.e. SRv6 End and SRv6 End.DT46) + setup_rt_local_sids 1 "2 3 4" + setup_rt_local_sids 2 "1 3 4" + setup_rt_local_sids 3 "1 2 4" + setup_rt_local_sids 4 "1 2 3" + + # set up SRv6 policies + + # create an IPv6 VPN between hosts hs-1 and hs-2. + # the network path between hs-1 and hs-2 traverses several routers + # depending on the direction of traffic. + # + # Direction hs-1 -> hs-2 (H.Encaps.Red) + # - rt-3,rt-4 (SRv6 End behaviors) + # - rt-2 (SRv6 End.DT46 behavior) + # + # Direction hs-2 -> hs-1 (H.Encaps.Red) + # - rt-1 (SRv6 End.DT46 behavior) + setup_rt_policy_ipv6 2 1 "3 4" 2 encap.red + setup_rt_policy_ipv6 1 2 "" 1 encap.red + + # create an IPv4 VPN between hosts hs-1 and hs-2 + # the network path between hs-1 and hs-2 traverses several routers + # depending on the direction of traffic. + # + # Direction hs-1 -> hs-2 (H.Encaps.Red) + # - rt-2 (SRv6 End.DT46 behavior) + # + # Direction hs-2 -> hs-1 (H.Encaps.Red) + # - rt-4,rt-3 (SRv6 End behaviors) + # - rt-1 (SRv6 End.DT46 behavior) + setup_rt_policy_ipv4 2 1 "" 2 encap.red + setup_rt_policy_ipv4 1 2 "4 3" 1 encap.red + + # create an IPv6 VPN between hosts hs-3 and hs-4 + # the network path between hs-3 and hs-4 traverses several routers + # depending on the direction of traffic. + # + # Direction hs-3 -> hs-4 (H.Encaps.Red) + # - rt-2 (SRv6 End Behavior) + # - rt-4 (SRv6 End.DT46 behavior) + # + # Direction hs-4 -> hs-3 (H.Encaps.Red) + # - rt-1 (SRv6 End behavior) + # - rt-3 (SRv6 End.DT46 behavior) + setup_rt_policy_ipv6 4 3 "2" 4 encap.red + setup_rt_policy_ipv6 3 4 "1" 3 encap.red + + # testing environment was set up successfully + SETUP_ERR=0 +} + +check_rt_connectivity() +{ + local rtsrc="$1" + local rtdst="$2" + local prefix + local rtsrc_nsname + + rtsrc_nsname="$(get_rtname "${rtsrc}")" + + prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")" + + ip netns exec "${rtsrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${prefix}::${rtdst}" >/dev/null 2>&1 +} + +check_and_log_rt_connectivity() +{ + local rtsrc="$1" + local rtdst="$2" + + check_rt_connectivity "${rtsrc}" "${rtdst}" + log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}" +} + +check_hs_ipv6_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + local hssrc_nsname + + hssrc_nsname="$(get_hsname "${hssrc}")" + + ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1 +} + +check_hs_ipv4_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + local hssrc_nsname + + hssrc_nsname="$(get_hsname "${hssrc}")" + + ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1 +} + +check_and_log_hs2gw_connectivity() +{ + local hssrc="$1" + + check_hs_ipv6_connectivity "${hssrc}" 254 + log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> gw" + + check_hs_ipv4_connectivity "${hssrc}" 254 + log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> gw" +} + +check_and_log_hs_ipv6_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + + check_hs_ipv6_connectivity "${hssrc}" "${hsdst}" + log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}" +} + +check_and_log_hs_ipv4_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + + check_hs_ipv4_connectivity "${hssrc}" "${hsdst}" + log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}" +} + +check_and_log_hs_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + + check_and_log_hs_ipv4_connectivity "${hssrc}" "${hsdst}" + check_and_log_hs_ipv6_connectivity "${hssrc}" "${hsdst}" +} + +check_and_log_hs_ipv6_isolation() +{ + local hssrc="$1" + local hsdst="$2" + + # in this case, the connectivity test must fail + check_hs_ipv6_connectivity "${hssrc}" "${hsdst}" + log_test $? 1 "IPv6 Hosts isolation: hs-${hssrc} -X-> hs-${hsdst}" +} + +check_and_log_hs_ipv4_isolation() +{ + local hssrc="$1" + local hsdst="$2" + + # in this case, the connectivity test must fail + check_hs_ipv4_connectivity "${hssrc}" "${hsdst}" + log_test $? 1 "IPv4 Hosts isolation: hs-${hssrc} -X-> hs-${hsdst}" +} + +check_and_log_hs_isolation() +{ + local hssrc="$1" + local hsdst="$2" + + check_and_log_hs_ipv6_isolation "${hssrc}" "${hsdst}" + check_and_log_hs_ipv4_isolation "${hssrc}" "${hsdst}" +} + +router_tests() +{ + local i + local j + + log_section "IPv6 routers connectivity test" + + for i in ${ROUTERS}; do + for j in ${ROUTERS}; do + if [ "${i}" -eq "${j}" ]; then + continue + fi + + check_and_log_rt_connectivity "${i}" "${j}" + done + done +} + +host2gateway_tests() +{ + local hs + + log_section "IPv4/IPv6 connectivity test among hosts and gateways" + + for hs in ${HOSTS}; do + check_and_log_hs2gw_connectivity "${hs}" + done +} + +host_vpn_tests() +{ + log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv4/IPv6)" + + check_and_log_hs_connectivity 1 2 + check_and_log_hs_connectivity 2 1 + + log_section "SRv6 VPN connectivity test hosts (h3 <-> h4, IPv6 only)" + + check_and_log_hs_ipv6_connectivity 3 4 + check_and_log_hs_ipv6_connectivity 4 3 +} + +host_vpn_isolation_tests() +{ + local l1="1 2" + local l2="3 4" + local tmp + local i + local j + local k + + log_section "SRv6 VPN isolation test among hosts" + + for k in 0 1; do + for i in ${l1}; do + for j in ${l2}; do + check_and_log_hs_isolation "${i}" "${j}" + done + done + + # let us test the reverse path + tmp="${l1}"; l1="${l2}"; l2="${tmp}" + done + + log_section "SRv6 VPN isolation test among hosts (h2 <-> h4, IPv4 only)" + + check_and_log_hs_ipv4_isolation 2 4 + check_and_log_hs_ipv4_isolation 4 2 +} + +test_iproute2_supp_or_ksft_skip() +{ + if ! ip route help 2>&1 | grep -qo "encap.red"; then + echo "SKIP: Missing SRv6 encap.red support in iproute2" + exit "${ksft_skip}" + fi +} + +test_vrf_or_ksft_skip() +{ + modprobe vrf &>/dev/null || true + if [ ! -e /proc/sys/net/vrf/strict_mode ]; then + echo "SKIP: vrf sysctl does not exist" + exit "${ksft_skip}" + fi +} + +if [ "$(id -u)" -ne 0 ]; then + echo "SKIP: Need root privileges" + exit "${ksft_skip}" +fi + +# required programs to carry out this selftest +test_command_or_ksft_skip ip +test_command_or_ksft_skip ping +test_command_or_ksft_skip sysctl +test_command_or_ksft_skip grep + +test_iproute2_supp_or_ksft_skip +test_vrf_or_ksft_skip + +set -e +trap cleanup EXIT + +setup +set +e + +router_tests +host2gateway_tests +host_vpn_tests +host_vpn_isolation_tests + +print_log_test_results diff --git a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh new file mode 100755 index 000000000000..cb4177d41b21 --- /dev/null +++ b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh @@ -0,0 +1,821 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# author: Andrea Mayer <andrea.mayer@uniroma2.it> +# +# This script is designed for testing the SRv6 H.L2Encaps.Red behavior. +# +# Below is depicted the IPv6 network of an operator which offers L2 VPN +# services to hosts, enabling them to communicate with each other. +# In this example, hosts hs-1 and hs-2 are connected through an L2 VPN service. +# Currently, the SRv6 subsystem in Linux allows hosts hs-1 and hs-2 to exchange +# full L2 frames as long as they carry IPv4/IPv6. +# +# Routers rt-1,rt-2,rt-3 and rt-4 implement L2 VPN services +# leveraging the SRv6 architecture. The key components for such VPNs are: +# +# i) The SRv6 H.L2Encaps.Red behavior applies SRv6 Policies on traffic +# received by connected hosts, initiating the VPN tunnel. Such a behavior +# is an optimization of the SRv6 H.L2Encap aiming to reduce the +# length of the SID List carried in the pushed SRH. Specifically, the +# H.L2Encaps.Red removes the first SID contained in the SID List (i.e. SRv6 +# Policy) by storing it into the IPv6 Destination Address. When a SRv6 +# Policy is made of only one SID, the SRv6 H.L2Encaps.Red behavior omits +# the SRH at all and pushes that SID directly into the IPv6 DA; +# +# ii) The SRv6 End behavior advances the active SID in the SID List +# carried by the SRH; +# +# iii) The SRv6 End.DX2 behavior is used for removing the SRv6 Policy +# and, thus, it terminates the VPN tunnel. The decapsulated L2 frame is +# sent over the interface connected with the destination host. +# +# cafe::1 cafe::2 +# 10.0.0.1 10.0.0.2 +# +--------+ +--------+ +# | | | | +# | hs-1 | | hs-2 | +# | | | | +# +---+----+ +--- +---+ +# cafe::/64 | | cafe::/64 +# 10.0.0.0/24 | | 10.0.0.0/24 +# +---+----+ +----+---+ +# | | fcf0:0:1:2::/64 | | +# | rt-1 +-------------------+ rt-2 | +# | | | | +# +---+----+ +----+---+ +# | . . | +# | fcf0:0:1:3::/64 . | +# | . . | +# | . . | +# fcf0:0:1:4::/64 | . | fcf0:0:2:3::/64 +# | . . | +# | . . | +# | fcf0:0:2:4::/64 . | +# | . . | +# +---+----+ +----+---+ +# | | | | +# | rt-4 +-------------------+ rt-3 | +# | | fcf0:0:3:4::/64 | | +# +---+----+ +----+---+ +# +# +# Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y +# in the IPv6 operator network. +# +# Local SID table +# =============== +# +# Each SRv6 router is configured with a Local SID table in which SIDs are +# stored. Considering the given SRv6 router rt-x, at least two SIDs are +# configured in the Local SID table: +# +# Local SID table for SRv6 router rt-x +# +----------------------------------------------------------+ +# |fcff:x::e is associated with the SRv6 End behavior | +# |fcff:x::d2 is associated with the SRv6 End.DX2 behavior | +# +----------------------------------------------------------+ +# +# The fcff::/16 prefix is reserved by the operator for implementing SRv6 VPN +# services. Reachability of SIDs is ensured by proper configuration of the IPv6 +# operator's network and SRv6 routers. +# +# SRv6 Policies +# ============= +# +# An SRv6 ingress router applies SRv6 policies to the traffic received from a +# connected host. SRv6 policy enforcement consists of encapsulating the +# received traffic into a new IPv6 packet with a given SID List contained in +# the SRH. +# +# L2 VPN between hs-1 and hs-2 +# ---------------------------- +# +# Hosts hs-1 and hs-2 are connected using a dedicated L2 VPN. +# Specifically, packets generated from hs-1 and directed towards hs-2 are +# handled by rt-1 which applies the following SRv6 Policies: +# +# i.a) L2 traffic, SID List=fcff:2::d2 +# +# Policy (i.a) steers tunneled L2 traffic through SRv6 router rt-2. +# The H.L2Encaps.Red omits the presence of SRH at all, since the SID List +# consists of only one SID (fcff:2::d2) that can be stored directly in the IPv6 +# DA. +# +# On the reverse path (i.e. from hs-2 to hs-1), rt-2 applies the following +# policies: +# +# i.b) L2 traffic, SID List=fcff:4::e,fcff:3::e,fcff:1::d2 +# +# Policy (i.b) steers tunneled L2 traffic through the SRv6 routers +# rt-4,rt-3,rt2. The H.L2Encaps.Red reduces the SID List in the SRH by removing +# the first SID (fcff:4::e) and pushing it into the IPv6 DA. +# +# In summary: +# hs-1->hs-2 |IPv6 DA=fcff:2::d2|eth|...| (i.a) +# hs-2->hs-1 |IPv6 DA=fcff:4::e|SRH SIDs=fcff:3::e,fcff:1::d2|eth|...| (i.b) +# + +# Kselftest framework requirement - SKIP code is 4. +readonly ksft_skip=4 + +readonly RDMSUFF="$(mktemp -u XXXXXXXX)" +readonly DUMMY_DEVNAME="dum0" +readonly RT2HS_DEVNAME="veth-hs" +readonly HS_VETH_NAME="veth0" +readonly LOCALSID_TABLE_ID=90 +readonly IPv6_RT_NETWORK=fcf0:0 +readonly IPv6_HS_NETWORK=cafe +readonly IPv4_HS_NETWORK=10.0.0 +readonly VPN_LOCATOR_SERVICE=fcff +readonly MAC_PREFIX=00:00:00:c0:01 +readonly END_FUNC=000e +readonly DX2_FUNC=00d2 + +PING_TIMEOUT_SEC=4 +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} + +# IDs of routers and hosts are initialized during the setup of the testing +# network +ROUTERS='' +HOSTS='' + +SETUP_ERR=1 + +ret=${ksft_skip} +nsuccess=0 +nfail=0 + +log_test() +{ + local rc="$1" + local expected="$2" + local msg="$3" + + if [ "${rc}" -eq "${expected}" ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-60s [ OK ]\n" "${msg}" + else + ret=1 + nfail=$((nfail+1)) + printf "\n TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +print_log_test_results() +{ + printf "\nTests passed: %3d\n" "${nsuccess}" + printf "Tests failed: %3d\n" "${nfail}" + + # when a test fails, the value of 'ret' is set to 1 (error code). + # Conversely, when all tests are passed successfully, the 'ret' value + # is set to 0 (success code). + if [ "${ret}" -ne 1 ]; then + ret=0 + fi +} + +log_section() +{ + echo + echo "################################################################################" + echo "TEST SECTION: $*" + echo "################################################################################" +} + +test_command_or_ksft_skip() +{ + local cmd="$1" + + if [ ! -x "$(command -v "${cmd}")" ]; then + echo "SKIP: Could not run test without \"${cmd}\" tool"; + exit "${ksft_skip}" + fi +} + +get_nodename() +{ + local name="$1" + + echo "${name}-${RDMSUFF}" +} + +get_rtname() +{ + local rtid="$1" + + get_nodename "rt-${rtid}" +} + +get_hsname() +{ + local hsid="$1" + + get_nodename "hs-${hsid}" +} + +__create_namespace() +{ + local name="$1" + + ip netns add "${name}" +} + +create_router() +{ + local rtid="$1" + local nsname + + nsname="$(get_rtname "${rtid}")" + + __create_namespace "${nsname}" +} + +create_host() +{ + local hsid="$1" + local nsname + + nsname="$(get_hsname "${hsid}")" + + __create_namespace "${nsname}" +} + +cleanup() +{ + local nsname + local i + + # destroy routers + for i in ${ROUTERS}; do + nsname="$(get_rtname "${i}")" + + ip netns del "${nsname}" &>/dev/null || true + done + + # destroy hosts + for i in ${HOSTS}; do + nsname="$(get_hsname "${i}")" + + ip netns del "${nsname}" &>/dev/null || true + done + + # check whether the setup phase was completed successfully or not. In + # case of an error during the setup phase of the testing environment, + # the selftest is considered as "skipped". + if [ "${SETUP_ERR}" -ne 0 ]; then + echo "SKIP: Setting up the testing environment failed" + exit "${ksft_skip}" + fi + + exit "${ret}" +} + +add_link_rt_pairs() +{ + local rt="$1" + local rt_neighs="$2" + local neigh + local nsname + local neigh_nsname + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + neigh_nsname="$(get_rtname "${neigh}")" + + ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \ + type veth peer name "veth-rt-${neigh}-${rt}" \ + netns "${neigh_nsname}" + done +} + +get_network_prefix() +{ + local rt="$1" + local neigh="$2" + local p="${rt}" + local q="${neigh}" + + if [ "${p}" -gt "${q}" ]; then + p="${q}"; q="${rt}" + fi + + echo "${IPv6_RT_NETWORK}:${p}:${q}" +} + +# Setup the basic networking for the routers +setup_rt_networking() +{ + local rt="$1" + local rt_neighs="$2" + local nsname + local net_prefix + local devname + local neigh + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + devname="veth-rt-${rt}-${neigh}" + + net_prefix="$(get_network_prefix "${rt}" "${neigh}")" + + ip -netns "${nsname}" addr \ + add "${net_prefix}::${rt}/64" dev "${devname}" nodad + + ip -netns "${nsname}" link set "${devname}" up + done + + ip -netns "${nsname}" link add "${DUMMY_DEVNAME}" type dummy + + ip -netns "${nsname}" link set "${DUMMY_DEVNAME}" up + ip -netns "${nsname}" link set lo up + + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1 + + ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0 + ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0 + ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1 +} + +# Setup local SIDs for an SRv6 router +setup_rt_local_sids() +{ + local rt="$1" + local rt_neighs="$2" + local net_prefix + local devname + local nsname + local neigh + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + devname="veth-rt-${rt}-${neigh}" + + net_prefix="$(get_network_prefix "${rt}" "${neigh}")" + + # set underlay network routes for SIDs reachability + ip -netns "${nsname}" -6 route \ + add "${VPN_LOCATOR_SERVICE}:${neigh}::/32" \ + table "${LOCALSID_TABLE_ID}" \ + via "${net_prefix}::${neigh}" dev "${devname}" + done + + # Local End behavior (note that dev "${DUMMY_DEVNAME}" is a dummy + # interface) + ip -netns "${nsname}" -6 route \ + add "${VPN_LOCATOR_SERVICE}:${rt}::${END_FUNC}" \ + table "${LOCALSID_TABLE_ID}" \ + encap seg6local action End dev "${DUMMY_DEVNAME}" + + # all SIDs for VPNs start with a common locator. Routes and SRv6 + # Endpoint behaviors instaces are grouped together in the 'localsid' + # table. + ip -netns "${nsname}" -6 rule add \ + to "${VPN_LOCATOR_SERVICE}::/16" \ + lookup "${LOCALSID_TABLE_ID}" prio 999 +} + +# build and install the SRv6 policy into the ingress SRv6 router. +# args: +# $1 - destination host (i.e. cafe::x host) +# $2 - SRv6 router configured for enforcing the SRv6 Policy +# $3 - SRv6 routers configured for steering traffic (End behaviors) +# $4 - SRv6 router configured for removing the SRv6 Policy (router connected +# to the destination host) +# $5 - encap mode (full or red) +# $6 - traffic type (IPv6 or IPv4) +__setup_rt_policy() +{ + local dst="$1" + local encap_rt="$2" + local end_rts="$3" + local dec_rt="$4" + local mode="$5" + local traffic="$6" + local nsname + local policy='' + local n + + nsname="$(get_rtname "${encap_rt}")" + + for n in ${end_rts}; do + policy="${policy}${VPN_LOCATOR_SERVICE}:${n}::${END_FUNC}," + done + + policy="${policy}${VPN_LOCATOR_SERVICE}:${dec_rt}::${DX2_FUNC}" + + # add SRv6 policy to incoming traffic sent by connected hosts + if [ "${traffic}" -eq 6 ]; then + ip -netns "${nsname}" -6 route \ + add "${IPv6_HS_NETWORK}::${dst}" \ + encap seg6 mode "${mode}" segs "${policy}" \ + dev dum0 + else + ip -netns "${nsname}" -4 route \ + add "${IPv4_HS_NETWORK}.${dst}" \ + encap seg6 mode "${mode}" segs "${policy}" \ + dev dum0 + fi +} + +# see __setup_rt_policy +setup_rt_policy_ipv6() +{ + __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 6 +} + +#see __setup_rt_policy +setup_rt_policy_ipv4() +{ + __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 4 +} + +setup_decap() +{ + local rt="$1" + local nsname + + nsname="$(get_rtname "${rt}")" + + # Local End.DX2 behavior + ip -netns "${nsname}" -6 route \ + add "${VPN_LOCATOR_SERVICE}:${rt}::${DX2_FUNC}" \ + table "${LOCALSID_TABLE_ID}" \ + encap seg6local action End.DX2 oif "${RT2HS_DEVNAME}" \ + dev "${RT2HS_DEVNAME}" +} + +setup_hs() +{ + local hs="$1" + local rt="$2" + local hsname + local rtname + + hsname="$(get_hsname "${hs}")" + rtname="$(get_rtname "${rt}")" + + ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip -netns "${hsname}" link add "${HS_VETH_NAME}" type veth \ + peer name "${RT2HS_DEVNAME}" netns "${rtname}" + + ip -netns "${hsname}" addr add "${IPv6_HS_NETWORK}::${hs}/64" \ + dev "${HS_VETH_NAME}" nodad + ip -netns "${hsname}" addr add "${IPv4_HS_NETWORK}.${hs}/24" \ + dev "${HS_VETH_NAME}" + + ip -netns "${hsname}" link set "${HS_VETH_NAME}" up + ip -netns "${hsname}" link set lo up + + ip -netns "${rtname}" addr add "${IPv6_HS_NETWORK}::254/64" \ + dev "${RT2HS_DEVNAME}" nodad + ip -netns "${rtname}" addr \ + add "${IPv4_HS_NETWORK}.254/24" dev "${RT2HS_DEVNAME}" + + ip -netns "${rtname}" link set "${RT2HS_DEVNAME}" up + + # disable the rp_filter otherwise the kernel gets confused about how + # to route decap ipv4 packets. + ip netns exec "${rtname}" \ + sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0 +} + +# set an auto-generated mac address +# args: +# $1 - name of the node (e.g.: hs-1, rt-3, etc) +# $2 - id of the node (e.g.: 1 for hs-1, 3 for rt-3, etc) +# $3 - host part of the IPv6 network address +# $4 - name of the network interface to which the generated mac address must +# be set. +set_mac_address() +{ + local nodename="$1" + local nodeid="$2" + local host="$3" + local ifname="$4" + local nsname + + nsname=$(get_nodename "${nodename}") + + ip -netns "${nsname}" link set dev "${ifname}" down + + ip -netns "${nsname}" link set address "${MAC_PREFIX}:${nodeid}" \ + dev "${ifname}" + + # the IPv6 address must be set once again after the MAC address has + # been changed. + ip -netns "${nsname}" addr add "${IPv6_HS_NETWORK}::${host}/64" \ + dev "${ifname}" nodad + + ip -netns "${nsname}" link set dev "${ifname}" up +} + +set_host_l2peer() +{ + local hssrc="$1" + local hsdst="$2" + local ipprefix="$3" + local proto="$4" + local hssrc_name + local ipaddr + + hssrc_name="$(get_hsname "${hssrc}")" + + if [ "${proto}" -eq 6 ]; then + ipaddr="${ipprefix}::${hsdst}" + else + ipaddr="${ipprefix}.${hsdst}" + fi + + ip -netns "${hssrc_name}" route add "${ipaddr}" dev "${HS_VETH_NAME}" + + ip -netns "${hssrc_name}" neigh \ + add "${ipaddr}" lladdr "${MAC_PREFIX}:${hsdst}" \ + dev "${HS_VETH_NAME}" +} + +# setup an SRv6 L2 VPN between host hs-x and hs-y (currently, the SRv6 +# subsystem only supports L2 frames whose layer-3 is IPv4/IPv6). +# args: +# $1 - source host +# $2 - SRv6 routers configured for steering tunneled traffic +# $3 - destination host +setup_l2vpn() +{ + local hssrc="$1" + local end_rts="$2" + local hsdst="$3" + local rtsrc="${hssrc}" + local rtdst="${hsdst}" + + # set fixed mac for source node and the neigh MAC address + set_mac_address "hs-${hssrc}" "${hssrc}" "${hssrc}" "${HS_VETH_NAME}" + set_host_l2peer "${hssrc}" "${hsdst}" "${IPv6_HS_NETWORK}" 6 + set_host_l2peer "${hssrc}" "${hsdst}" "${IPv4_HS_NETWORK}" 4 + + # we have to set the mac address of the veth-host (on ingress router) + # to the mac address of the remote peer (L2 VPN destination host). + # Otherwise, traffic coming from the source host is dropped at the + # ingress router. + set_mac_address "rt-${rtsrc}" "${hsdst}" 254 "${RT2HS_DEVNAME}" + + # set the SRv6 Policies at the ingress router + setup_rt_policy_ipv6 "${hsdst}" "${rtsrc}" "${end_rts}" "${rtdst}" \ + l2encap.red 6 + setup_rt_policy_ipv4 "${hsdst}" "${rtsrc}" "${end_rts}" "${rtdst}" \ + l2encap.red 4 + + # set the decap behavior + setup_decap "${rtsrc}" +} + +setup() +{ + local i + + # create routers + ROUTERS="1 2 3 4"; readonly ROUTERS + for i in ${ROUTERS}; do + create_router "${i}" + done + + # create hosts + HOSTS="1 2"; readonly HOSTS + for i in ${HOSTS}; do + create_host "${i}" + done + + # set up the links for connecting routers + add_link_rt_pairs 1 "2 3 4" + add_link_rt_pairs 2 "3 4" + add_link_rt_pairs 3 "4" + + # set up the basic connectivity of routers and routes required for + # reachability of SIDs. + setup_rt_networking 1 "2 3 4" + setup_rt_networking 2 "1 3 4" + setup_rt_networking 3 "1 2 4" + setup_rt_networking 4 "1 2 3" + + # set up the hosts connected to routers + setup_hs 1 1 + setup_hs 2 2 + + # set up default SRv6 Endpoints (i.e. SRv6 End and SRv6 End.DX2) + setup_rt_local_sids 1 "2 3 4" + setup_rt_local_sids 2 "1 3 4" + setup_rt_local_sids 3 "1 2 4" + setup_rt_local_sids 4 "1 2 3" + + # create a L2 VPN between hs-1 and hs-2. + # NB: currently, H.L2Encap* enables tunneling of L2 frames whose + # layer-3 is IPv4/IPv6. + # + # the network path between hs-1 and hs-2 traverses several routers + # depending on the direction of traffic. + # + # Direction hs-1 -> hs-2 (H.L2Encaps.Red) + # - rt-2 (SRv6 End.DX2 behavior) + # + # Direction hs-2 -> hs-1 (H.L2Encaps.Red) + # - rt-4,rt-3 (SRv6 End behaviors) + # - rt-1 (SRv6 End.DX2 behavior) + setup_l2vpn 1 "" 2 + setup_l2vpn 2 "4 3" 1 + + # testing environment was set up successfully + SETUP_ERR=0 +} + +check_rt_connectivity() +{ + local rtsrc="$1" + local rtdst="$2" + local prefix + local rtsrc_nsname + + rtsrc_nsname="$(get_rtname "${rtsrc}")" + + prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")" + + ip netns exec "${rtsrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${prefix}::${rtdst}" >/dev/null 2>&1 +} + +check_and_log_rt_connectivity() +{ + local rtsrc="$1" + local rtdst="$2" + + check_rt_connectivity "${rtsrc}" "${rtdst}" + log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}" +} + +check_hs_ipv6_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + local hssrc_nsname + + hssrc_nsname="$(get_hsname "${hssrc}")" + + ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1 +} + +check_hs_ipv4_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + local hssrc_nsname + + hssrc_nsname="$(get_hsname "${hssrc}")" + + ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1 +} + +check_and_log_hs2gw_connectivity() +{ + local hssrc="$1" + + check_hs_ipv6_connectivity "${hssrc}" 254 + log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> gw" + + check_hs_ipv4_connectivity "${hssrc}" 254 + log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> gw" +} + +check_and_log_hs_ipv6_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + + check_hs_ipv6_connectivity "${hssrc}" "${hsdst}" + log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}" +} + +check_and_log_hs_ipv4_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + + check_hs_ipv4_connectivity "${hssrc}" "${hsdst}" + log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}" +} + +check_and_log_hs_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + + check_and_log_hs_ipv4_connectivity "${hssrc}" "${hsdst}" + check_and_log_hs_ipv6_connectivity "${hssrc}" "${hsdst}" +} + +router_tests() +{ + local i + local j + + log_section "IPv6 routers connectivity test" + + for i in ${ROUTERS}; do + for j in ${ROUTERS}; do + if [ "${i}" -eq "${j}" ]; then + continue + fi + + check_and_log_rt_connectivity "${i}" "${j}" + done + done +} + +host2gateway_tests() +{ + local hs + + log_section "IPv4/IPv6 connectivity test among hosts and gateways" + + for hs in ${HOSTS}; do + check_and_log_hs2gw_connectivity "${hs}" + done +} + +host_vpn_tests() +{ + log_section "SRv6 L2 VPN connectivity test hosts (h1 <-> h2)" + + check_and_log_hs_connectivity 1 2 + check_and_log_hs_connectivity 2 1 +} + +test_dummy_dev_or_ksft_skip() +{ + local test_netns + + test_netns="dummy-$(mktemp -u XXXXXXXX)" + + if ! ip netns add "${test_netns}"; then + echo "SKIP: Cannot set up netns for testing dummy dev support" + exit "${ksft_skip}" + fi + + modprobe dummy &>/dev/null || true + if ! ip -netns "${test_netns}" link \ + add "${DUMMY_DEVNAME}" type dummy; then + echo "SKIP: dummy dev not supported" + + ip netns del "${test_netns}" + exit "${ksft_skip}" + fi + + ip netns del "${test_netns}" +} + +test_iproute2_supp_or_ksft_skip() +{ + if ! ip route help 2>&1 | grep -qo "l2encap.red"; then + echo "SKIP: Missing SRv6 l2encap.red support in iproute2" + exit "${ksft_skip}" + fi +} + +if [ "$(id -u)" -ne 0 ]; then + echo "SKIP: Need root privileges" + exit "${ksft_skip}" +fi + +# required programs to carry out this selftest +test_command_or_ksft_skip ip +test_command_or_ksft_skip ping +test_command_or_ksft_skip sysctl +test_command_or_ksft_skip grep + +test_iproute2_supp_or_ksft_skip +test_dummy_dev_or_ksft_skip + +set -e +trap cleanup EXIT + +setup +set +e + +router_tests +host2gateway_tests +host_vpn_tests + +print_log_test_results diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 5d70b04c482c..2cbb12736596 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -235,6 +235,7 @@ FIXTURE_VARIANT(tls) { uint16_t tls_version; uint16_t cipher_type; + bool nopad; }; FIXTURE_VARIANT_ADD(tls, 12_aes_gcm) @@ -297,9 +298,17 @@ FIXTURE_VARIANT_ADD(tls, 13_aes_gcm_256) .cipher_type = TLS_CIPHER_AES_GCM_256, }; +FIXTURE_VARIANT_ADD(tls, 13_nopad) +{ + .tls_version = TLS_1_3_VERSION, + .cipher_type = TLS_CIPHER_AES_GCM_128, + .nopad = true, +}; + FIXTURE_SETUP(tls) { struct tls_crypto_info_keys tls12; + int one = 1; int ret; tls_crypto_info_init(variant->tls_version, variant->cipher_type, @@ -315,6 +324,12 @@ FIXTURE_SETUP(tls) ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len); ASSERT_EQ(ret, 0); + + if (variant->nopad) { + ret = setsockopt(self->cfd, SOL_TLS, TLS_RX_EXPECT_NO_PAD, + (void *)&one, sizeof(one)); + ASSERT_EQ(ret, 0); + } } FIXTURE_TEARDOWN(tls) @@ -629,12 +644,14 @@ TEST_F(tls, splice_from_pipe2) int p2[2]; int p[2]; + memrnd(mem_send, sizeof(mem_send)); + ASSERT_GE(pipe(p), 0); ASSERT_GE(pipe(p2), 0); - EXPECT_GE(write(p[1], mem_send, 8000), 0); - EXPECT_GE(splice(p[0], NULL, self->fd, NULL, 8000, 0), 0); - EXPECT_GE(write(p2[1], mem_send + 8000, 8000), 0); - EXPECT_GE(splice(p2[0], NULL, self->fd, NULL, 8000, 0), 0); + EXPECT_EQ(write(p[1], mem_send, 8000), 8000); + EXPECT_EQ(splice(p[0], NULL, self->fd, NULL, 8000, 0), 8000); + EXPECT_EQ(write(p2[1], mem_send + 8000, 8000), 8000); + EXPECT_EQ(splice(p2[0], NULL, self->fd, NULL, 8000, 0), 8000); EXPECT_EQ(recv(self->cfd, mem_recv, send_len, MSG_WAITALL), send_len); EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); } @@ -668,10 +685,12 @@ TEST_F(tls, splice_to_pipe) char mem_recv[TLS_PAYLOAD_MAX_LEN]; int p[2]; + memrnd(mem_send, sizeof(mem_send)); + ASSERT_GE(pipe(p), 0); - EXPECT_GE(send(self->fd, mem_send, send_len, 0), 0); - EXPECT_GE(splice(self->cfd, NULL, p[1], NULL, send_len, 0), 0); - EXPECT_GE(read(p[0], mem_recv, send_len), 0); + EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len); + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, send_len, 0), send_len); + EXPECT_EQ(read(p[0], mem_recv, send_len), send_len); EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); } @@ -860,6 +879,8 @@ TEST_F(tls, multiple_send_single_recv) char recv_mem[2 * 10]; char send_mem[10]; + memrnd(send_mem, sizeof(send_mem)); + EXPECT_GE(send(self->fd, send_mem, send_len, 0), 0); EXPECT_GE(send(self->fd, send_mem, send_len, 0), 0); memset(recv_mem, 0, total_len); @@ -876,6 +897,8 @@ TEST_F(tls, single_send_multiple_recv_non_align) char recv_mem[recv_len * 2]; char send_mem[total_len]; + memrnd(send_mem, sizeof(send_mem)); + EXPECT_GE(send(self->fd, send_mem, total_len, 0), 0); memset(recv_mem, 0, total_len); @@ -921,10 +944,10 @@ TEST_F(tls, recv_peek) char buf[15]; EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); - EXPECT_NE(recv(self->cfd, buf, send_len, MSG_PEEK), -1); + EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_PEEK), send_len); EXPECT_EQ(memcmp(test_str, buf, send_len), 0); memset(buf, 0, sizeof(buf)); - EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1); + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); EXPECT_EQ(memcmp(test_str, buf, send_len), 0); } @@ -1582,6 +1605,38 @@ TEST_F(tls_err, bad_cmsg) EXPECT_EQ(errno, EBADMSG); } +TEST_F(tls_err, timeo) +{ + struct timeval tv = { .tv_usec = 10000, }; + char buf[128]; + int ret; + + if (self->notls) + SKIP(return, "no TLS support"); + + ret = setsockopt(self->cfd2, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)); + ASSERT_EQ(ret, 0); + + ret = fork(); + ASSERT_GE(ret, 0); + + if (ret) { + usleep(1000); /* Give child a head start */ + + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EAGAIN); + + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EAGAIN); + + wait(&ret); + } else { + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EAGAIN); + exit(0); + } +} + TEST(non_established) { struct tls12_crypto_info_aes_gcm_256 tls12; struct sockaddr_in addr; @@ -1659,6 +1714,57 @@ TEST(keysizes) { close(cfd); } +TEST(no_pad) { + struct tls12_crypto_info_aes_gcm_256 tls12; + int ret, fd, cfd, val; + socklen_t len; + bool notls; + + memset(&tls12, 0, sizeof(tls12)); + tls12.info.version = TLS_1_3_VERSION; + tls12.info.cipher_type = TLS_CIPHER_AES_GCM_256; + + ulp_sock_pair(_metadata, &fd, &cfd, ¬ls); + + if (notls) + exit(KSFT_SKIP); + + ret = setsockopt(fd, SOL_TLS, TLS_TX, &tls12, sizeof(tls12)); + EXPECT_EQ(ret, 0); + + ret = setsockopt(cfd, SOL_TLS, TLS_RX, &tls12, sizeof(tls12)); + EXPECT_EQ(ret, 0); + + val = 1; + ret = setsockopt(cfd, SOL_TLS, TLS_RX_EXPECT_NO_PAD, + (void *)&val, sizeof(val)); + EXPECT_EQ(ret, 0); + + len = sizeof(val); + val = 2; + ret = getsockopt(cfd, SOL_TLS, TLS_RX_EXPECT_NO_PAD, + (void *)&val, &len); + EXPECT_EQ(ret, 0); + EXPECT_EQ(val, 1); + EXPECT_EQ(len, 4); + + val = 0; + ret = setsockopt(cfd, SOL_TLS, TLS_RX_EXPECT_NO_PAD, + (void *)&val, sizeof(val)); + EXPECT_EQ(ret, 0); + + len = sizeof(val); + val = 2; + ret = getsockopt(cfd, SOL_TLS, TLS_RX_EXPECT_NO_PAD, + (void *)&val, &len); + EXPECT_EQ(ret, 0); + EXPECT_EQ(val, 0); + EXPECT_EQ(len, 4); + + close(fd); + close(cfd); +} + TEST(tls_v6ops) { struct tls_crypto_info_keys tls12; struct sockaddr_in6 addr, addr2; diff --git a/tools/testing/selftests/net/tun.c b/tools/testing/selftests/net/tun.c new file mode 100644 index 000000000000..fa83918b62d1 --- /dev/null +++ b/tools/testing/selftests/net/tun.c @@ -0,0 +1,162 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <linux/if.h> +#include <linux/if_tun.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> +#include <sys/ioctl.h> +#include <sys/socket.h> + +#include "../kselftest_harness.h" + +static int tun_attach(int fd, char *dev) +{ + struct ifreq ifr; + + memset(&ifr, 0, sizeof(ifr)); + strcpy(ifr.ifr_name, dev); + ifr.ifr_flags = IFF_ATTACH_QUEUE; + + return ioctl(fd, TUNSETQUEUE, (void *) &ifr); +} + +static int tun_detach(int fd, char *dev) +{ + struct ifreq ifr; + + memset(&ifr, 0, sizeof(ifr)); + strcpy(ifr.ifr_name, dev); + ifr.ifr_flags = IFF_DETACH_QUEUE; + + return ioctl(fd, TUNSETQUEUE, (void *) &ifr); +} + +static int tun_alloc(char *dev) +{ + struct ifreq ifr; + int fd, err; + + fd = open("/dev/net/tun", O_RDWR); + if (fd < 0) { + fprintf(stderr, "can't open tun: %s\n", strerror(errno)); + return fd; + } + + memset(&ifr, 0, sizeof(ifr)); + strcpy(ifr.ifr_name, dev); + ifr.ifr_flags = IFF_TAP | IFF_NAPI | IFF_MULTI_QUEUE; + + err = ioctl(fd, TUNSETIFF, (void *) &ifr); + if (err < 0) { + fprintf(stderr, "can't TUNSETIFF: %s\n", strerror(errno)); + close(fd); + return err; + } + strcpy(dev, ifr.ifr_name); + return fd; +} + +static int tun_delete(char *dev) +{ + struct { + struct nlmsghdr nh; + struct ifinfomsg ifm; + unsigned char data[64]; + } req; + struct rtattr *rta; + int ret, rtnl; + + rtnl = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE); + if (rtnl < 0) { + fprintf(stderr, "can't open rtnl: %s\n", strerror(errno)); + return 1; + } + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_ALIGN(NLMSG_LENGTH(sizeof(req.ifm))); + req.nh.nlmsg_flags = NLM_F_REQUEST; + req.nh.nlmsg_type = RTM_DELLINK; + + req.ifm.ifi_family = AF_UNSPEC; + + rta = (struct rtattr *)(((char *)&req) + NLMSG_ALIGN(req.nh.nlmsg_len)); + rta->rta_type = IFLA_IFNAME; + rta->rta_len = RTA_LENGTH(IFNAMSIZ); + req.nh.nlmsg_len += rta->rta_len; + memcpy(RTA_DATA(rta), dev, IFNAMSIZ); + + ret = send(rtnl, &req, req.nh.nlmsg_len, 0); + if (ret < 0) + fprintf(stderr, "can't send: %s\n", strerror(errno)); + ret = (unsigned int)ret != req.nh.nlmsg_len; + + close(rtnl); + return ret; +} + +FIXTURE(tun) +{ + char ifname[IFNAMSIZ]; + int fd, fd2; +}; + +FIXTURE_SETUP(tun) +{ + memset(self->ifname, 0, sizeof(self->ifname)); + + self->fd = tun_alloc(self->ifname); + ASSERT_GE(self->fd, 0); + + self->fd2 = tun_alloc(self->ifname); + ASSERT_GE(self->fd2, 0); +} + +FIXTURE_TEARDOWN(tun) +{ + if (self->fd >= 0) + close(self->fd); + if (self->fd2 >= 0) + close(self->fd2); +} + +TEST_F(tun, delete_detach_close) { + EXPECT_EQ(tun_delete(self->ifname), 0); + EXPECT_EQ(tun_detach(self->fd, self->ifname), -1); + EXPECT_EQ(errno, 22); +} + +TEST_F(tun, detach_delete_close) { + EXPECT_EQ(tun_detach(self->fd, self->ifname), 0); + EXPECT_EQ(tun_delete(self->ifname), 0); +} + +TEST_F(tun, detach_close_delete) { + EXPECT_EQ(tun_detach(self->fd, self->ifname), 0); + close(self->fd); + self->fd = -1; + EXPECT_EQ(tun_delete(self->ifname), 0); +} + +TEST_F(tun, reattach_delete_close) { + EXPECT_EQ(tun_detach(self->fd, self->ifname), 0); + EXPECT_EQ(tun_attach(self->fd, self->ifname), 0); + EXPECT_EQ(tun_delete(self->ifname), 0); +} + +TEST_F(tun, reattach_close_delete) { + EXPECT_EQ(tun_detach(self->fd, self->ifname), 0); + EXPECT_EQ(tun_attach(self->fd, self->ifname), 0); + close(self->fd); + self->fd = -1; + EXPECT_EQ(tun_delete(self->ifname), 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index f8a19f548ae9..ebbd0b282432 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -34,7 +34,7 @@ cfg_veth() { ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24 ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad ip -netns "${PEER_NS}" link set dev veth1 up - ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp_dummy + ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp } run_one() { diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh index 820bc50f6b68..fad2d1a71cac 100755 --- a/tools/testing/selftests/net/udpgro_bench.sh +++ b/tools/testing/selftests/net/udpgro_bench.sh @@ -34,7 +34,7 @@ run_one() { ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad ip -netns "${PEER_NS}" link set dev veth1 up - ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp_dummy + ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r & ip netns exec "${PEER_NS}" ./udpgso_bench_rx -t ${rx_args} -r & diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh index 807b74c8fd80..832c738cc3c2 100755 --- a/tools/testing/selftests/net/udpgro_frglist.sh +++ b/tools/testing/selftests/net/udpgro_frglist.sh @@ -36,7 +36,7 @@ run_one() { ip netns exec "${PEER_NS}" ethtool -K veth1 rx-gro-list on - ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp_dummy + ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp tc -n "${PEER_NS}" qdisc add dev veth1 clsact tc -n "${PEER_NS}" filter add dev veth1 ingress prio 4 protocol ipv6 bpf object-file ../bpf/nat6to4.o section schedcls/ingress6/nat_6 direct-action tc -n "${PEER_NS}" filter add dev veth1 egress prio 4 protocol ip bpf object-file ../bpf/nat6to4.o section schedcls/egress4/snat4 direct-action diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index 6f05e06f6761..1bcd82e1f662 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -46,7 +46,7 @@ create_ns() { ip -n $BASE$ns addr add dev veth$ns $BM_NET_V4$ns/24 ip -n $BASE$ns addr add dev veth$ns $BM_NET_V6$ns/64 nodad done - ip -n $NS_DST link set veth$DST xdp object ../bpf/xdp_dummy.o section xdp_dummy 2>/dev/null + ip -n $NS_DST link set veth$DST xdp object ../bpf/xdp_dummy.o section xdp 2>/dev/null } create_vxlan_endpoint() { diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh index 80b5d352702e..dc932fd65363 100755 --- a/tools/testing/selftests/net/udpgso_bench.sh +++ b/tools/testing/selftests/net/udpgso_bench.sh @@ -120,7 +120,7 @@ run_all() { run_udp "${ipv4_args}" echo "ipv6" - run_tcp "${ipv4_args}" + run_tcp "${ipv6_args}" run_udp "${ipv6_args}" } diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh index 19eac3e44c06..430895d1a2b6 100755 --- a/tools/testing/selftests/net/veth.sh +++ b/tools/testing/selftests/net/veth.sh @@ -289,14 +289,14 @@ if [ $CPUS -gt 1 ]; then ip netns exec $NS_SRC ethtool -L veth$SRC rx 1 tx 2 2>/dev/null printf "%-60s" "bad setting: XDP with RX nr less than TX" ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o \ - section xdp_dummy 2>/dev/null &&\ + section xdp 2>/dev/null &&\ echo "fail - set operation successful ?!?" || echo " ok " # the following tests will run with multiple channels active ip netns exec $NS_SRC ethtool -L veth$SRC rx 2 ip netns exec $NS_DST ethtool -L veth$DST rx 2 ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o \ - section xdp_dummy 2>/dev/null + section xdp 2>/dev/null printf "%-60s" "bad setting: reducing RX nr below peer TX with XDP set" ip netns exec $NS_DST ethtool -L veth$DST rx 1 2>/dev/null &&\ echo "fail - set operation successful ?!?" || echo " ok " @@ -311,7 +311,7 @@ if [ $CPUS -gt 2 ]; then chk_channels "setting invalid channels nr" $DST 2 2 fi -ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o section xdp_dummy 2>/dev/null +ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o section xdp 2>/dev/null chk_gro_flag "with xdp attached - gro flag" $DST on chk_gro_flag " - peer gro flag" $SRC off chk_tso_flag " - tso flag" $SRC off diff --git a/tools/testing/selftests/netfilter/nft_concat_range.sh b/tools/testing/selftests/netfilter/nft_concat_range.sh index b35010cc7f6a..a6991877e50c 100755 --- a/tools/testing/selftests/netfilter/nft_concat_range.sh +++ b/tools/testing/selftests/netfilter/nft_concat_range.sh @@ -31,7 +31,7 @@ BUGS="flush_remove_add reload" # List of possible paths to pktgen script from kernel tree for performance tests PKTGEN_SCRIPT_PATHS=" - ../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh + ../../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh pktgen/pktgen_bench_xmit_mode_netif_receive.sh" # Definition of set types: diff --git a/tools/testing/selftests/powerpc/include/basic_asm.h b/tools/testing/selftests/powerpc/include/basic_asm.h index 886dc026fe7a..26cde8ea1f49 100644 --- a/tools/testing/selftests/powerpc/include/basic_asm.h +++ b/tools/testing/selftests/powerpc/include/basic_asm.h @@ -5,6 +5,16 @@ #include <ppc-asm.h> #include <asm/unistd.h> +#ifdef __powerpc64__ +#define PPC_LL ld +#define PPC_STL std +#define PPC_STLU stdu +#else +#define PPC_LL lwz +#define PPC_STL stw +#define PPC_STLU stwu +#endif + #define LOAD_REG_IMMEDIATE(reg, expr) \ lis reg, (expr)@highest; \ ori reg, reg, (expr)@higher; \ @@ -14,16 +24,20 @@ /* * Note: These macros assume that variables being stored on the stack are - * doublewords, while this is usually the case it may not always be the + * sizeof(long), while this is usually the case it may not always be the * case for each use case. */ +#ifdef __powerpc64__ + +// ABIv2 #if defined(_CALL_ELF) && _CALL_ELF == 2 #define STACK_FRAME_MIN_SIZE 32 #define STACK_FRAME_TOC_POS 24 #define __STACK_FRAME_PARAM(_param) (32 + ((_param)*8)) #define __STACK_FRAME_LOCAL(_num_params, _var_num) \ ((STACK_FRAME_PARAM(_num_params)) + ((_var_num)*8)) -#else + +#else // ABIv1 below #define STACK_FRAME_MIN_SIZE 112 #define STACK_FRAME_TOC_POS 40 #define __STACK_FRAME_PARAM(i) (48 + ((i)*8)) @@ -34,7 +48,24 @@ */ #define __STACK_FRAME_LOCAL(_num_params, _var_num) \ (112 + ((_var_num)*8)) -#endif + + +#endif // ABIv2 + +// Common 64-bit +#define STACK_FRAME_LR_POS 16 +#define STACK_FRAME_CR_POS 8 + +#else // 32-bit below + +#define STACK_FRAME_MIN_SIZE 16 +#define STACK_FRAME_LR_POS 4 + +#define __STACK_FRAME_PARAM(_param) (STACK_FRAME_MIN_SIZE + ((_param)*4)) +#define __STACK_FRAME_LOCAL(_num_params, _var_num) \ + ((STACK_FRAME_PARAM(_num_params)) + ((_var_num)*4)) + +#endif // __powerpc64__ /* Parameter x saved to the stack */ #define STACK_FRAME_PARAM(var) __STACK_FRAME_PARAM(var) @@ -42,8 +73,6 @@ /* Local variable x saved to the stack after x parameters */ #define STACK_FRAME_LOCAL(num_params, var) \ __STACK_FRAME_LOCAL(num_params, var) -#define STACK_FRAME_LR_POS 16 -#define STACK_FRAME_CR_POS 8 /* * It is very important to note here that _extra is the extra amount of @@ -56,19 +85,21 @@ * preprocessed incorrectly, hence r0. */ #define PUSH_BASIC_STACK(_extra) \ - mflr r0; \ - std r0, STACK_FRAME_LR_POS(%r1); \ - stdu %r1, -(_extra + STACK_FRAME_MIN_SIZE)(%r1); \ - mfcr r0; \ - stw r0, STACK_FRAME_CR_POS(%r1); \ - std %r2, STACK_FRAME_TOC_POS(%r1); + mflr r0; \ + PPC_STL r0, STACK_FRAME_LR_POS(%r1); \ + PPC_STLU %r1, -(((_extra + 15) & ~15) + STACK_FRAME_MIN_SIZE)(%r1); #define POP_BASIC_STACK(_extra) \ - ld %r2, STACK_FRAME_TOC_POS(%r1); \ - lwz r0, STACK_FRAME_CR_POS(%r1); \ - mtcr r0; \ - addi %r1, %r1, (_extra + STACK_FRAME_MIN_SIZE); \ - ld r0, STACK_FRAME_LR_POS(%r1); \ + addi %r1, %r1, (((_extra + 15) & ~15) + STACK_FRAME_MIN_SIZE); \ + PPC_LL r0, STACK_FRAME_LR_POS(%r1); \ mtlr r0; +.macro OP_REGS op, reg_width, start_reg, end_reg, base_reg, base_reg_offset=0, skip=0 + .set i, \start_reg + .rept (\end_reg - \start_reg + 1) + \op i, (\reg_width * (i - \skip) + \base_reg_offset)(\base_reg) + .set i, i + 1 + .endr +.endm + #endif /* _SELFTESTS_POWERPC_BASIC_ASM_H */ diff --git a/tools/testing/selftests/powerpc/include/reg.h b/tools/testing/selftests/powerpc/include/reg.h index c422be8a42b2..d5a547f72669 100644 --- a/tools/testing/selftests/powerpc/include/reg.h +++ b/tools/testing/selftests/powerpc/include/reg.h @@ -55,6 +55,10 @@ #define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF) #define SPRN_PVR 0x11F +#define PVR_CFG(pvr) (((pvr) >> 8) & 0xF) /* Configuration field */ +#define PVR_MAJ(pvr) (((pvr) >> 4) & 0xF) /* Major revision field */ +#define PVR_MIN(pvr) (((pvr) >> 0) & 0xF) /* Minor revision field */ + #define SPRN_DSCR_PRIV 0x11 /* Privilege State DSCR */ #define SPRN_DSCR 0x03 /* Data Stream Control Register */ #define SPRN_PPR 896 /* Program Priority Register */ @@ -123,45 +127,44 @@ "li 30, %[" #_asm_symbol_name_immed "];" \ "li 31, %[" #_asm_symbol_name_immed "];" -#define ASM_LOAD_FPR_SINGLE_PRECISION(_asm_symbol_name_addr) \ - "lfs 0, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 1, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 2, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 3, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 4, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 5, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 6, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 7, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 8, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 9, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 10, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 11, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 12, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 13, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 14, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 15, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 16, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 17, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 18, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 19, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 20, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 21, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 22, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 23, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 24, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 25, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 26, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 27, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 28, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 29, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 30, 0(%[" #_asm_symbol_name_addr "]);" \ - "lfs 31, 0(%[" #_asm_symbol_name_addr "]);" +#define ASM_LOAD_FPR(_asm_symbol_name_addr) \ + "lfd 0, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 1, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 2, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 3, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 4, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 5, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 6, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 7, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 8, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 9, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 10, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 11, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 12, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 13, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 14, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 15, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 16, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 17, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 18, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 19, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 20, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 21, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 22, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 23, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 24, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 25, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 26, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 27, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 28, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 29, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 30, 0(%[" #_asm_symbol_name_addr "]);" \ + "lfd 31, 0(%[" #_asm_symbol_name_addr "]);" #ifndef __ASSEMBLER__ void store_gpr(unsigned long *addr); void load_gpr(unsigned long *addr); -void load_fpr_single_precision(float *addr); -void store_fpr_single_precision(float *addr); +void store_fpr(double *addr); #endif /* end of __ASSEMBLER__ */ #endif /* _SELFTESTS_POWERPC_REG_H */ diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h index b9fa9cd709df..e222a5858450 100644 --- a/tools/testing/selftests/powerpc/include/utils.h +++ b/tools/testing/selftests/powerpc/include/utils.h @@ -74,6 +74,16 @@ static inline bool have_hwcap2(unsigned long ftr2) } #endif +static inline char *auxv_base_platform(void) +{ + return ((char *)get_auxv_entry(AT_BASE_PLATFORM)); +} + +static inline char *auxv_platform(void) +{ + return ((char *)get_auxv_entry(AT_PLATFORM)); +} + bool is_ppc64le(void); int using_hash_mmu(bool *using_hash); diff --git a/tools/testing/selftests/powerpc/lib/reg.S b/tools/testing/selftests/powerpc/lib/reg.S index 9304ea7d59b9..6d1af4a9a6b4 100644 --- a/tools/testing/selftests/powerpc/lib/reg.S +++ b/tools/testing/selftests/powerpc/lib/reg.S @@ -53,79 +53,42 @@ FUNC_START(store_gpr) blr FUNC_END(store_gpr) -/* Single Precision Float - float buf[32] */ -FUNC_START(load_fpr_single_precision) - lfs 0, 0*4(3) - lfs 1, 1*4(3) - lfs 2, 2*4(3) - lfs 3, 3*4(3) - lfs 4, 4*4(3) - lfs 5, 5*4(3) - lfs 6, 6*4(3) - lfs 7, 7*4(3) - lfs 8, 8*4(3) - lfs 9, 9*4(3) - lfs 10, 10*4(3) - lfs 11, 11*4(3) - lfs 12, 12*4(3) - lfs 13, 13*4(3) - lfs 14, 14*4(3) - lfs 15, 15*4(3) - lfs 16, 16*4(3) - lfs 17, 17*4(3) - lfs 18, 18*4(3) - lfs 19, 19*4(3) - lfs 20, 20*4(3) - lfs 21, 21*4(3) - lfs 22, 22*4(3) - lfs 23, 23*4(3) - lfs 24, 24*4(3) - lfs 25, 25*4(3) - lfs 26, 26*4(3) - lfs 27, 27*4(3) - lfs 28, 28*4(3) - lfs 29, 29*4(3) - lfs 30, 30*4(3) - lfs 31, 31*4(3) +/* Double Precision Float - double buf[32] */ +FUNC_START(store_fpr) + stfd 0, 0*8(3) + stfd 1, 1*8(3) + stfd 2, 2*8(3) + stfd 3, 3*8(3) + stfd 4, 4*8(3) + stfd 5, 5*8(3) + stfd 6, 6*8(3) + stfd 7, 7*8(3) + stfd 8, 8*8(3) + stfd 9, 9*8(3) + stfd 10, 10*8(3) + stfd 11, 11*8(3) + stfd 12, 12*8(3) + stfd 13, 13*8(3) + stfd 14, 14*8(3) + stfd 15, 15*8(3) + stfd 16, 16*8(3) + stfd 17, 17*8(3) + stfd 18, 18*8(3) + stfd 19, 19*8(3) + stfd 20, 20*8(3) + stfd 21, 21*8(3) + stfd 22, 22*8(3) + stfd 23, 23*8(3) + stfd 24, 24*8(3) + stfd 25, 25*8(3) + stfd 26, 26*8(3) + stfd 27, 27*8(3) + stfd 28, 28*8(3) + stfd 29, 29*8(3) + stfd 30, 30*8(3) + stfd 31, 31*8(3) blr -FUNC_END(load_fpr_single_precision) - -/* Single Precision Float - float buf[32] */ -FUNC_START(store_fpr_single_precision) - stfs 0, 0*4(3) - stfs 1, 1*4(3) - stfs 2, 2*4(3) - stfs 3, 3*4(3) - stfs 4, 4*4(3) - stfs 5, 5*4(3) - stfs 6, 6*4(3) - stfs 7, 7*4(3) - stfs 8, 8*4(3) - stfs 9, 9*4(3) - stfs 10, 10*4(3) - stfs 11, 11*4(3) - stfs 12, 12*4(3) - stfs 13, 13*4(3) - stfs 14, 14*4(3) - stfs 15, 15*4(3) - stfs 16, 16*4(3) - stfs 17, 17*4(3) - stfs 18, 18*4(3) - stfs 19, 19*4(3) - stfs 20, 20*4(3) - stfs 21, 21*4(3) - stfs 22, 22*4(3) - stfs 23, 23*4(3) - stfs 24, 24*4(3) - stfs 25, 25*4(3) - stfs 26, 26*4(3) - stfs 27, 27*4(3) - stfs 28, 28*4(3) - stfs 29, 29*4(3) - stfs 30, 30*4(3) - stfs 31, 31*4(3) - blr -FUNC_END(store_fpr_single_precision) +FUNC_END(store_fpr) /* VMX/VSX registers - unsigned long buf[128] */ FUNC_START(loadvsx) diff --git a/tools/testing/selftests/powerpc/math/.gitignore b/tools/testing/selftests/powerpc/math/.gitignore index d0c23b2e4b60..07b4893ef7af 100644 --- a/tools/testing/selftests/powerpc/math/.gitignore +++ b/tools/testing/selftests/powerpc/math/.gitignore @@ -7,3 +7,4 @@ fpu_signal vmx_signal vsx_preempt fpu_denormal +mma diff --git a/tools/testing/selftests/powerpc/math/mma.S b/tools/testing/selftests/powerpc/math/mma.S index 8528c9849565..61cc88b1b26b 100644 --- a/tools/testing/selftests/powerpc/math/mma.S +++ b/tools/testing/selftests/powerpc/math/mma.S @@ -20,6 +20,9 @@ test_mma: /* xvi16ger2s */ .long 0xec042958 + /* Deprime the accumulator - xxmfacc 0 */ + .long 0x7c000162 + /* Store result in image passed in r5 */ stxvw4x 0,0,5 addi 5,5,16 diff --git a/tools/testing/selftests/powerpc/mce/.gitignore b/tools/testing/selftests/powerpc/mce/.gitignore new file mode 100644 index 000000000000..f5921462a495 --- /dev/null +++ b/tools/testing/selftests/powerpc/mce/.gitignore @@ -0,0 +1 @@ +inject-ra-err diff --git a/tools/testing/selftests/powerpc/papr_attributes/attr_test.c b/tools/testing/selftests/powerpc/papr_attributes/attr_test.c index bab0dc06e90b..9b655be641c9 100644 --- a/tools/testing/selftests/powerpc/papr_attributes/attr_test.c +++ b/tools/testing/selftests/powerpc/papr_attributes/attr_test.c @@ -7,6 +7,7 @@ * Copyright 2022, Pratik Rajesh Sampat, IBM Corp. */ +#include <errno.h> #include <stdio.h> #include <string.h> #include <dirent.h> @@ -32,7 +33,7 @@ enum type { NUM_VAL }; -int value_type(int id) +static int value_type(int id) { int val_type; @@ -54,15 +55,21 @@ int value_type(int id) return val_type; } -int verify_energy_info(void) +static int verify_energy_info(void) { const char *path = "/sys/firmware/papr/energy_scale_info"; struct dirent *entry; struct stat s; DIR *dirp; - if (stat(path, &s) || !S_ISDIR(s.st_mode)) - return -1; + errno = 0; + if (stat(path, &s)) { + SKIP_IF(errno == ENOENT); + FAIL_IF(errno); + } + + FAIL_IF(!S_ISDIR(s.st_mode)); + dirp = opendir(path); while ((entry = readdir(dirp)) != NULL) { @@ -76,25 +83,24 @@ int verify_energy_info(void) id = atoi(entry->d_name); attr_type = value_type(id); - if (attr_type == INVALID) - return -1; + FAIL_IF(attr_type == INVALID); /* Check if the files exist and have data in them */ sprintf(file_name, "%s/%d/desc", path, id); f = fopen(file_name, "r"); - if (!f || fgetc(f) == EOF) - return -1; + FAIL_IF(!f); + FAIL_IF(fgetc(f) == EOF); sprintf(file_name, "%s/%d/value", path, id); f = fopen(file_name, "r"); - if (!f || fgetc(f) == EOF) - return -1; + FAIL_IF(!f); + FAIL_IF(fgetc(f) == EOF); if (attr_type == STR_VAL) { sprintf(file_name, "%s/%d/value_desc", path, id); f = fopen(file_name, "r"); - if (!f || fgetc(f) == EOF) - return -1; + FAIL_IF(!f); + FAIL_IF(fgetc(f) == EOF); } } diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile index edbd96d3b2ab..30803353bd7c 100644 --- a/tools/testing/selftests/powerpc/pmu/Makefile +++ b/tools/testing/selftests/powerpc/pmu/Makefile @@ -8,7 +8,7 @@ EXTRA_SOURCES := ../harness.c event.c lib.c ../utils.c top_srcdir = ../../../../.. include ../../lib.mk -all: $(TEST_GEN_PROGS) ebb sampling_tests +all: $(TEST_GEN_PROGS) ebb sampling_tests event_code_tests $(TEST_GEN_PROGS): $(EXTRA_SOURCES) @@ -27,6 +27,7 @@ override define RUN_TESTS $(DEFAULT_RUN_TESTS) TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests + TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests endef DEFAULT_EMIT_TESTS := $(EMIT_TESTS) @@ -34,6 +35,7 @@ override define EMIT_TESTS $(DEFAULT_EMIT_TESTS) TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests + TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests endef DEFAULT_INSTALL_RULE := $(INSTALL_RULE) @@ -41,12 +43,14 @@ override define INSTALL_RULE $(DEFAULT_INSTALL_RULE) TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install + TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install endef clean: $(RM) $(TEST_GEN_PROGS) $(OUTPUT)/loop.o TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean + TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean ebb: TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all @@ -54,4 +58,7 @@ ebb: sampling_tests: TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all -.PHONY: all run_tests clean ebb sampling_tests +event_code_tests: + TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all + +.PHONY: all run_tests clean ebb sampling_tests event_code_tests diff --git a/tools/testing/selftests/powerpc/pmu/branch_loops.S b/tools/testing/selftests/powerpc/pmu/branch_loops.S new file mode 100644 index 000000000000..de758dd3cecf --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/branch_loops.S @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include <ppc-asm.h> + + .text + +#define ITER_SHIFT 31 + +FUNC_START(indirect_branch_loop) + li r3, 1 + sldi r3, r3, ITER_SHIFT + +1: cmpdi r3, 0 + beqlr + + addi r3, r3, -1 + + ld r4, 2f@got(%r2) + mtctr r4 + bctr + + .balign 32 +2: b 1b + +FUNC_END(indirect_branch_loop) diff --git a/tools/testing/selftests/powerpc/pmu/ebb/.gitignore b/tools/testing/selftests/powerpc/pmu/ebb/.gitignore index 2920fb39439b..64d8dfdac74a 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/.gitignore +++ b/tools/testing/selftests/powerpc/pmu/ebb/.gitignore @@ -21,3 +21,4 @@ back_to_back_ebbs_test lost_exception_test no_handler_test cycles_with_mmcr2_test +regs_access_pmccext_test diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c index 4b45a2e70f62..fc32187d483d 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c +++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c @@ -50,6 +50,7 @@ int cycles_with_mmcr2(void) expected[1] = MMCR2_EXPECTED_2; i = 0; bad_mmcr2 = false; + actual = 0; /* Make sure we loop until we take at least one EBB */ while ((ebb_state.stats.ebb_count < 20 && !bad_mmcr2) || diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile new file mode 100644 index 000000000000..4e07d7046457 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0 +CFLAGS += -m64 + +TEST_GEN_PROGS := group_constraint_pmc56_test group_pmc56_exclude_constraints_test group_constraint_pmc_count_test \ + group_constraint_repeat_test group_constraint_radix_scope_qual_test reserved_bits_mmcra_sample_elig_mode_test \ + group_constraint_mmcra_sample_test invalid_event_code_test reserved_bits_mmcra_thresh_ctl_test \ + blacklisted_events_test event_alternatives_tests_p9 event_alternatives_tests_p10 generic_events_valid_test \ + group_constraint_l2l3_sel_test group_constraint_cache_test group_constraint_thresh_cmp_test \ + group_constraint_unit_test group_constraint_thresh_ctl_test group_constraint_thresh_sel_test \ + hw_cache_event_type_test + +top_srcdir = ../../../../../.. +include ../../../lib.mk + +$(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c ../sampling_tests/misc.h ../sampling_tests/misc.c diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/blacklisted_events_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/blacklisted_events_test.c new file mode 100644 index 000000000000..fafeff19cb34 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/blacklisted_events_test.c @@ -0,0 +1,132 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include <stdio.h> +#include <sys/prctl.h> +#include <limits.h> +#include "../event.h" +#include "../sampling_tests/misc.h" + +#define PM_DTLB_MISS_16G 0x1c058 +#define PM_DERAT_MISS_2M 0x1c05a +#define PM_DTLB_MISS_2M 0x1c05c +#define PM_MRK_DTLB_MISS_1G 0x1d15c +#define PM_DTLB_MISS_4K 0x2c056 +#define PM_DERAT_MISS_1G 0x2c05a +#define PM_MRK_DERAT_MISS_2M 0x2d152 +#define PM_MRK_DTLB_MISS_4K 0x2d156 +#define PM_MRK_DTLB_MISS_16G 0x2d15e +#define PM_DTLB_MISS_64K 0x3c056 +#define PM_MRK_DERAT_MISS_1G 0x3d152 +#define PM_MRK_DTLB_MISS_64K 0x3d156 +#define PM_DISP_HELD_SYNC_HOLD 0x4003c +#define PM_DTLB_MISS_16M 0x4c056 +#define PM_DTLB_MISS_1G 0x4c05a +#define PM_MRK_DTLB_MISS_16M 0x4c15e +#define PM_MRK_ST_DONE_L2 0x10134 +#define PM_RADIX_PWC_L1_HIT 0x1f056 +#define PM_FLOP_CMPL 0x100f4 +#define PM_MRK_NTF_FIN 0x20112 +#define PM_RADIX_PWC_L2_HIT 0x2d024 +#define PM_IFETCH_THROTTLE 0x3405e +#define PM_MRK_L2_TM_ST_ABORT_SISTER 0x3e15c +#define PM_RADIX_PWC_L3_HIT 0x3f056 +#define PM_RUN_CYC_SMT2_MODE 0x3006c +#define PM_TM_TX_PASS_RUN_INST 0x4e014 + +#define PVR_POWER9_CUMULUS 0x00002000 + +int blacklist_events_dd21[] = { + PM_MRK_ST_DONE_L2, + PM_RADIX_PWC_L1_HIT, + PM_FLOP_CMPL, + PM_MRK_NTF_FIN, + PM_RADIX_PWC_L2_HIT, + PM_IFETCH_THROTTLE, + PM_MRK_L2_TM_ST_ABORT_SISTER, + PM_RADIX_PWC_L3_HIT, + PM_RUN_CYC_SMT2_MODE, + PM_TM_TX_PASS_RUN_INST, + PM_DISP_HELD_SYNC_HOLD, +}; + +int blacklist_events_dd22[] = { + PM_DTLB_MISS_16G, + PM_DERAT_MISS_2M, + PM_DTLB_MISS_2M, + PM_MRK_DTLB_MISS_1G, + PM_DTLB_MISS_4K, + PM_DERAT_MISS_1G, + PM_MRK_DERAT_MISS_2M, + PM_MRK_DTLB_MISS_4K, + PM_MRK_DTLB_MISS_16G, + PM_DTLB_MISS_64K, + PM_MRK_DERAT_MISS_1G, + PM_MRK_DTLB_MISS_64K, + PM_DISP_HELD_SYNC_HOLD, + PM_DTLB_MISS_16M, + PM_DTLB_MISS_1G, + PM_MRK_DTLB_MISS_16M, +}; + +int pvr_min; + +/* + * check for power9 support for 2.1 and + * 2.2 model where blacklist is applicable. + */ +int check_for_power9_version(void) +{ + pvr_min = PVR_MIN(mfspr(SPRN_PVR)); + + SKIP_IF(PVR_VER(pvr) != POWER9); + SKIP_IF(!(pvr & PVR_POWER9_CUMULUS)); + + SKIP_IF(!(3 - pvr_min)); + + return 0; +} + +/* + * Testcase to ensure that using blacklisted bits in + * event code should cause event_open to fail in power9 + */ + +static int blacklisted_events(void) +{ + struct event event; + int i = 0; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* + * check for power9 support for 2.1 and + * 2.2 model where blacklist is applicable. + */ + SKIP_IF(check_for_power9_version()); + + /* Skip for Generic compat mode */ + SKIP_IF(check_for_generic_compat_pmu()); + + if (pvr_min == 1) { + for (i = 0; i < ARRAY_SIZE(blacklist_events_dd21); i++) { + event_init(&event, blacklist_events_dd21[i]); + FAIL_IF(!event_open(&event)); + } + } else if (pvr_min == 2) { + for (i = 0; i < ARRAY_SIZE(blacklist_events_dd22); i++) { + event_init(&event, blacklist_events_dd22[i]); + FAIL_IF(!event_open(&event)); + } + } + + return 0; +} + +int main(void) +{ + return test_harness(blacklisted_events, "blacklisted_events"); +} diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/event_alternatives_tests_p10.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/event_alternatives_tests_p10.c new file mode 100644 index 000000000000..8be7aada6523 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/event_alternatives_tests_p10.c @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include <stdio.h> +#include "../event.h" +#include "../sampling_tests/misc.h" + +#define PM_RUN_CYC_ALT 0x200f4 +#define PM_INST_DISP 0x200f2 +#define PM_BR_2PATH 0x20036 +#define PM_LD_MISS_L1 0x3e054 +#define PM_RUN_INST_CMPL_ALT 0x400fa + +#define EventCode_1 0x100fc +#define EventCode_2 0x200fa +#define EventCode_3 0x300fc +#define EventCode_4 0x400fc + +/* + * Check for event alternatives. + */ + +static int event_alternatives_tests_p10(void) +{ + struct event *e, events[5]; + int i; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* + * PVR check is used here since PMU specific data like + * alternative events is handled by respective PMU driver + * code and using PVR will work correctly for all cases + * including generic compat mode. + */ + SKIP_IF(PVR_VER(mfspr(SPRN_PVR)) != POWER10); + + SKIP_IF(check_for_generic_compat_pmu()); + + /* + * Test for event alternative for 0x0001e + * and 0x00002. + */ + e = &events[0]; + event_init(e, 0x0001e); + + e = &events[1]; + event_init(e, EventCode_1); + + e = &events[2]; + event_init(e, EventCode_2); + + e = &events[3]; + event_init(e, EventCode_3); + + e = &events[4]; + event_init(e, EventCode_4); + + FAIL_IF(event_open(&events[0])); + + /* + * Expected to pass since 0x0001e has alternative event + * 0x600f4 in PMC6. So it can go in with other events + * in PMC1 to PMC4. + */ + for (i = 1; i < 5; i++) + FAIL_IF(event_open_with_group(&events[i], events[0].fd)); + + for (i = 0; i < 5; i++) + event_close(&events[i]); + + e = &events[0]; + event_init(e, 0x00002); + + e = &events[1]; + event_init(e, EventCode_1); + + e = &events[2]; + event_init(e, EventCode_2); + + e = &events[3]; + event_init(e, EventCode_3); + + e = &events[4]; + event_init(e, EventCode_4); + + FAIL_IF(event_open(&events[0])); + + /* + * Expected to pass since 0x00020 has alternative event + * 0x500fa in PMC5. So it can go in with other events + * in PMC1 to PMC4. + */ + for (i = 1; i < 5; i++) + FAIL_IF(event_open_with_group(&events[i], events[0].fd)); + + for (i = 0; i < 5; i++) + event_close(&events[i]); + + return 0; +} + +int main(void) +{ + return test_harness(event_alternatives_tests_p10, "event_alternatives_tests_p10"); +} diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/event_alternatives_tests_p9.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/event_alternatives_tests_p9.c new file mode 100644 index 000000000000..f7dcf0e0447c --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/event_alternatives_tests_p9.c @@ -0,0 +1,116 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include <stdio.h> +#include "../event.h" +#include "../sampling_tests/misc.h" + +#define PM_RUN_CYC_ALT 0x200f4 +#define PM_INST_DISP 0x200f2 +#define PM_BR_2PATH 0x20036 +#define PM_LD_MISS_L1 0x3e054 +#define PM_RUN_INST_CMPL_ALT 0x400fa + +#define EventCode_1 0x200fa +#define EventCode_2 0x200fc +#define EventCode_3 0x300fc +#define EventCode_4 0x400fc + +/* + * Check for event alternatives. + */ + +static int event_alternatives_tests_p9(void) +{ + struct event event, leader; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* + * PVR check is used here since PMU specific data like + * alternative events is handled by respective PMU driver + * code and using PVR will work correctly for all cases + * including generic compat mode. + */ + SKIP_IF(PVR_VER(mfspr(SPRN_PVR)) != POWER9); + + /* Skip for generic compat PMU */ + SKIP_IF(check_for_generic_compat_pmu()); + + /* Init the event for PM_RUN_CYC_ALT */ + event_init(&leader, PM_RUN_CYC_ALT); + FAIL_IF(event_open(&leader)); + + event_init(&event, EventCode_1); + + /* + * Expected to pass since PM_RUN_CYC_ALT in PMC2 has alternative event + * 0x600f4. So it can go in with EventCode_1 which is using PMC2 + */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + + event_init(&leader, PM_INST_DISP); + FAIL_IF(event_open(&leader)); + + event_init(&event, EventCode_2); + /* + * Expected to pass since PM_INST_DISP in PMC2 has alternative event + * 0x300f2 in PMC3. So it can go in with EventCode_2 which is using PMC2 + */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + + event_init(&leader, PM_BR_2PATH); + FAIL_IF(event_open(&leader)); + + event_init(&event, EventCode_2); + /* + * Expected to pass since PM_BR_2PATH in PMC2 has alternative event + * 0x40036 in PMC4. So it can go in with EventCode_2 which is using PMC2 + */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + + event_init(&leader, PM_LD_MISS_L1); + FAIL_IF(event_open(&leader)); + + event_init(&event, EventCode_3); + /* + * Expected to pass since PM_LD_MISS_L1 in PMC3 has alternative event + * 0x400f0 in PMC4. So it can go in with EventCode_3 which is using PMC3 + */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + + event_init(&leader, PM_RUN_INST_CMPL_ALT); + FAIL_IF(event_open(&leader)); + + event_init(&event, EventCode_4); + /* + * Expected to pass since PM_RUN_INST_CMPL_ALT in PMC4 has alternative event + * 0x500fa in PMC5. So it can go in with EventCode_4 which is using PMC4 + */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + + return 0; +} + +int main(void) +{ + return test_harness(event_alternatives_tests_p9, "event_alternatives_tests_p9"); +} diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/generic_events_valid_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/generic_events_valid_test.c new file mode 100644 index 000000000000..0d237c15d3f2 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/generic_events_valid_test.c @@ -0,0 +1,130 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include <stdio.h> +#include <sys/prctl.h> +#include <limits.h> +#include "../event.h" +#include "../sampling_tests/misc.h" + +/* + * Testcase to ensure that using invalid event in generic + * event for PERF_TYPE_HARDWARE should fail + */ + +static int generic_events_valid_test(void) +{ + struct event event; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* generic events is different in compat_mode */ + SKIP_IF(check_for_generic_compat_pmu()); + + /* + * Invalid generic events in power10: + * - PERF_COUNT_HW_BUS_CYCLES + * - PERF_COUNT_HW_STALLED_CYCLES_FRONTEND + * - PERF_COUNT_HW_STALLED_CYCLES_BACKEND + * - PERF_COUNT_HW_REF_CPU_CYCLES + */ + if (PVR_VER(mfspr(SPRN_PVR)) == POWER10) { + event_init_opts(&event, PERF_COUNT_HW_CPU_CYCLES, PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_INSTRUCTIONS, + PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_CACHE_REFERENCES, + PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_CACHE_MISSES, PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_BRANCH_INSTRUCTIONS, + PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_BRANCH_MISSES, PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_BUS_CYCLES, PERF_TYPE_HARDWARE, "event"); + FAIL_IF(!event_open(&event)); + + event_init_opts(&event, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, + PERF_TYPE_HARDWARE, "event"); + FAIL_IF(!event_open(&event)); + + event_init_opts(&event, PERF_COUNT_HW_STALLED_CYCLES_BACKEND, + PERF_TYPE_HARDWARE, "event"); + FAIL_IF(!event_open(&event)); + + event_init_opts(&event, PERF_COUNT_HW_REF_CPU_CYCLES, PERF_TYPE_HARDWARE, "event"); + FAIL_IF(!event_open(&event)); + } else if (PVR_VER(mfspr(SPRN_PVR)) == POWER9) { + /* + * Invalid generic events in power9: + * - PERF_COUNT_HW_BUS_CYCLES + * - PERF_COUNT_HW_REF_CPU_CYCLES + */ + event_init_opts(&event, PERF_COUNT_HW_CPU_CYCLES, PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_INSTRUCTIONS, PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_CACHE_REFERENCES, + PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_CACHE_MISSES, PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_BRANCH_INSTRUCTIONS, + PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_BRANCH_MISSES, PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_BUS_CYCLES, PERF_TYPE_HARDWARE, "event"); + FAIL_IF(!event_open(&event)); + + event_init_opts(&event, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, + PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_STALLED_CYCLES_BACKEND, + PERF_TYPE_HARDWARE, "event"); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init_opts(&event, PERF_COUNT_HW_REF_CPU_CYCLES, PERF_TYPE_HARDWARE, "event"); + FAIL_IF(!event_open(&event)); + } + + return 0; +} + +int main(void) +{ + return test_harness(generic_events_valid_test, "generic_events_valid_test"); +} diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_cache_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_cache_test.c new file mode 100644 index 000000000000..f4be05aa3a3d --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_cache_test.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "../event.h" +#include "utils.h" +#include "../sampling_tests/misc.h" + +/* All L1 D cache load references counted at finish, gated by reject */ +#define EventCode_1 0x1100fc +/* Load Missed L1 */ +#define EventCode_2 0x23e054 +/* Load Missed L1 */ +#define EventCode_3 0x13e054 + +/* + * Testcase for group constraint check of data and instructions + * cache qualifier bits which is used to program cache select field in + * Monitor Mode Control Register 1 (MMCR1: 16-17) for l1 cache. + * All events in the group should match cache select bits otherwise + * event_open for the group will fail. + */ +static int group_constraint_cache(void) +{ + struct event event, leader; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* Init the events for the group contraint check for l1 cache select bits */ + event_init(&leader, EventCode_1); + FAIL_IF(event_open(&leader)); + + event_init(&event, EventCode_2); + + /* Expected to fail as sibling event doesn't request same l1 cache select bits as leader */ + FAIL_IF(!event_open_with_group(&event, leader.fd)); + + event_close(&event); + + /* Init the event for the group contraint l1 cache select test */ + event_init(&event, EventCode_3); + + /* Expected to succeed as sibling event request same l1 cache select bits as leader */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + + return 0; +} + +int main(void) +{ + return test_harness(group_constraint_cache, "group_constraint_cache"); +} diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_l2l3_sel_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_l2l3_sel_test.c new file mode 100644 index 000000000000..85a636886069 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_l2l3_sel_test.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "../event.h" +#include "utils.h" +#include "../sampling_tests/misc.h" + +/* All successful D-side store dispatches for this thread */ +#define EventCode_1 0x010000046080 +/* All successful D-side store dispatches for this thread that were L2 Miss */ +#define EventCode_2 0x26880 +/* All successful D-side store dispatches for this thread that were L2 Miss */ +#define EventCode_3 0x010000026880 + +/* + * Testcase for group constraint check of l2l3_sel bits which is + * used to program l2l3 select field in Monitor Mode Control Register 0 + * (MMCR0: 56-60). + * All events in the group should match l2l3_sel bits otherwise + * event_open for the group should fail. + */ +static int group_constraint_l2l3_sel(void) +{ + struct event event, leader; + + /* + * Check for platform support for the test. + * This test is only aplicable on power10 + */ + SKIP_IF(platform_check_for_tests()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + /* Init the events for the group contraint check for l2l3_sel bits */ + event_init(&leader, EventCode_1); + FAIL_IF(event_open(&leader)); + + event_init(&event, EventCode_2); + + /* Expected to fail as sibling event doesn't request same l2l3_sel bits as leader */ + FAIL_IF(!event_open_with_group(&event, leader.fd)); + + event_close(&event); + + /* Init the event for the group contraint l2l3_sel test */ + event_init(&event, EventCode_3); + + /* Expected to succeed as sibling event request same l2l3_sel bits as leader */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + + return 0; +} + +int main(void) +{ + return test_harness(group_constraint_l2l3_sel, "group_constraint_l2l3_sel"); +} diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_mmcra_sample_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_mmcra_sample_test.c new file mode 100644 index 000000000000..ff625b5d80eb --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_mmcra_sample_test.c @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include <stdio.h> +#include "../event.h" +#include "../sampling_tests/misc.h" + +#define EventCode_1 0x35340401e0 +#define EventCode_2 0x353c0101ec +#define EventCode_3 0x35340101ec +/* + * Test that using different sample bits in + * event code cause failure in schedule for + * group of events. + */ + +static int group_constraint_mmcra_sample(void) +{ + struct event event, leader; + + SKIP_IF(platform_check_for_tests()); + + /* + * Events with different "sample" field values + * in a group will fail to schedule. + * Use event with load only sampling mode as + * group leader. Use event with store only sampling + * as sibling event. + */ + event_init(&leader, EventCode_1); + FAIL_IF(event_open(&leader)); + + event_init(&event, EventCode_2); + + /* Expected to fail as sibling event doesn't use same sampling bits as leader */ + FAIL_IF(!event_open_with_group(&event, leader.fd)); + + event_init(&event, EventCode_3); + + /* Expected to pass as sibling event use same sampling bits as leader */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + + return 0; +} + +int main(void) +{ + return test_harness(group_constraint_mmcra_sample, "group_constraint_mmcra_sample"); +} diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_pmc56_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_pmc56_test.c new file mode 100644 index 000000000000..f5ee4796d46c --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_pmc56_test.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include <stdio.h> +#include "../event.h" +#include "../sampling_tests/misc.h" + +/* + * Testcase for checking constraint checks for + * Performance Monitor Counter 5 (PMC5) and also + * Performance Monitor Counter 6 (PMC6). Events using + * PMC5/PMC6 shouldn't have other fields in event + * code like cache bits, thresholding or marked bit. + */ + +static int group_constraint_pmc56(void) +{ + struct event event; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* + * Events using PMC5 and PMC6 with cache bit + * set in event code is expected to fail. + */ + event_init(&event, 0x2500fa); + FAIL_IF(!event_open(&event)); + + event_init(&event, 0x2600f4); + FAIL_IF(!event_open(&event)); + + /* + * PMC5 and PMC6 only supports base events: + * ie 500fa and 600f4. Other combinations + * should fail. + */ + event_init(&event, 0x501e0); + FAIL_IF(!event_open(&event)); + + event_init(&event, 0x6001e); + FAIL_IF(!event_open(&event)); + + event_init(&event, 0x501fa); + FAIL_IF(!event_open(&event)); + + /* + * Events using PMC5 and PMC6 with random + * sampling bits set in event code should fail + * to schedule. + */ + event_init(&event, 0x35340500fa); + FAIL_IF(!event_open(&event)); + + return 0; +} + +int main(void) +{ + return test_harness(group_constraint_pmc56, "group_constraint_pmc56"); +} diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_pmc_count_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_pmc_count_test.c new file mode 100644 index 000000000000..af7c5c75101c --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_pmc_count_test.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include <stdio.h> +#include "../event.h" +#include "../sampling_tests/misc.h" + +/* + * Testcase for number of counters in use. + * The number of programmable counters is from + * performance monitor counter 1 to performance + * monitor counter 4 (PMC1-PMC4). If number of + * counters in use exceeds the limit, next event + * should fail to schedule. + */ + +static int group_constraint_pmc_count(void) +{ + struct event *e, events[5]; + int i; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* + * Test for number of counters in use. + * Use PMC1 to PMC4 for leader and 3 sibling + * events. Trying to open fourth event should + * fail here. + */ + e = &events[0]; + event_init(e, 0x1001a); + + e = &events[1]; + event_init(e, 0x200fc); + + e = &events[2]; + event_init(e, 0x30080); + + e = &events[3]; + event_init(e, 0x40054); + + e = &events[4]; + event_init(e, 0x0002c); + + FAIL_IF(event_open(&events[0])); + + /* + * The event_open will fail on event 4 if constraint + * check fails + */ + for (i = 1; i < 5; i++) { + if (i == 4) + FAIL_IF(!event_open_with_group(&events[i], events[0].fd)); + else + FAIL_IF(event_open_with_group(&events[i], events[0].fd)); + } + + for (i = 1; i < 4; i++) + event_close(&events[i]); + + return 0; +} + +int main(void) +{ + return test_harness(group_constraint_pmc_count, "group_constraint_pmc_count"); +} diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_radix_scope_qual_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_radix_scope_qual_test.c new file mode 100644 index 000000000000..9225618b846a --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_radix_scope_qual_test.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include <stdio.h> +#include "../event.h" +#include "../sampling_tests/misc.h" + +/* PM_DATA_RADIX_PROCESS_L2_PTE_FROM_L2 */ +#define EventCode_1 0x14242 +/* PM_DATA_RADIX_PROCESS_L2_PTE_FROM_L3 */ +#define EventCode_2 0x24242 + +/* + * Testcase for group constraint check for radix_scope_qual + * field which is used to program Monitor Mode Control + * egister (MMCR1) bit 18. + * All events in the group should match radix_scope_qual, + * bits otherwise event_open for the group should fail. + */ + +static int group_constraint_radix_scope_qual(void) +{ + struct event event, leader; + + /* + * Check for platform support for the test. + * This test is aplicable on power10 only. + */ + SKIP_IF(platform_check_for_tests()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + /* Init the events for the group contraint check for radix_scope_qual bits */ + event_init(&leader, EventCode_1); + FAIL_IF(event_open(&leader)); + + event_init(&event, 0x200fc); + + /* Expected to fail as sibling event doesn't request same radix_scope_qual bits as leader */ + FAIL_IF(!event_open_with_group(&event, leader.fd)); + + event_init(&event, EventCode_2); + /* Expected to pass as sibling event request same radix_scope_qual bits as leader */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(group_constraint_radix_scope_qual, + "group_constraint_radix_scope_qual"); +} diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_repeat_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_repeat_test.c new file mode 100644 index 000000000000..371cd05bb3ed --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_repeat_test.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include <stdio.h> +#include "../event.h" +#include "../sampling_tests/misc.h" + +/* The processor's L1 data cache was reloaded */ +#define EventCode1 0x21C040 +#define EventCode2 0x22C040 + +/* + * Testcase for group constraint check + * when using events with same PMC. + * Multiple events in a group shouldn't + * ask for same PMC. If so it should fail. + */ + +static int group_constraint_repeat(void) +{ + struct event event, leader; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* + * Two events in a group using same PMC + * should fail to get scheduled. Usei same PMC2 + * for leader and sibling event which is expected + * to fail. + */ + event_init(&leader, EventCode1); + FAIL_IF(event_open(&leader)); + + event_init(&event, EventCode1); + + /* Expected to fail since sibling event is requesting same PMC as leader */ + FAIL_IF(!event_open_with_group(&event, leader.fd)); + + event_init(&event, EventCode2); + + /* Expected to pass since sibling event is requesting different PMC */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + + return 0; +} + +int main(void) +{ + return test_harness(group_constraint_repeat, "group_constraint_repeat"); +} diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_cmp_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_cmp_test.c new file mode 100644 index 000000000000..9f1197104e8c --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_cmp_test.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "../event.h" +#include "utils.h" +#include "../sampling_tests/misc.h" + +/* + * Primary PMU events used here is PM_MRK_INST_CMPL (0x401e0) and + * PM_THRESH_MET (0x101ec) + * Threshold event selection used is issue to complete for cycles + * Sampling criteria is Load or Store only sampling + */ +#define p9_EventCode_1 0x13e35340401e0 +#define p9_EventCode_2 0x17d34340101ec +#define p9_EventCode_3 0x13e35340101ec +#define p10_EventCode_1 0x35340401e0 +#define p10_EventCode_2 0x35340101ec + +/* + * Testcase for group constraint check of thresh_cmp bits which is + * used to program thresh compare field in Monitor Mode Control Register A + * (MMCRA: 9-18 bits for power9 and MMCRA: 8-18 bits for power10). + * All events in the group should match thresh compare bits otherwise + * event_open for the group will fail. + */ +static int group_constraint_thresh_cmp(void) +{ + struct event event, leader; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + if (have_hwcap2(PPC_FEATURE2_ARCH_3_1)) { + /* Init the events for the group contraint check for thresh_cmp bits */ + event_init(&leader, p10_EventCode_1); + + /* Add the thresh_cmp value for leader in config1 */ + leader.attr.config1 = 1000; + FAIL_IF(event_open(&leader)); + + event_init(&event, p10_EventCode_2); + + /* Add the different thresh_cmp value from the leader event in config1 */ + event.attr.config1 = 2000; + + /* Expected to fail as sibling and leader event request different thresh_cmp bits */ + FAIL_IF(!event_open_with_group(&event, leader.fd)); + + event_close(&event); + + /* Init the event for the group contraint thresh compare test */ + event_init(&event, p10_EventCode_2); + + /* Add the same thresh_cmp value for leader and sibling event in config1 */ + event.attr.config1 = 1000; + + /* Expected to succeed as sibling and leader event request same thresh_cmp bits */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + } else { + /* Init the events for the group contraint check for thresh_cmp bits */ + event_init(&leader, p9_EventCode_1); + FAIL_IF(event_open(&leader)); + + event_init(&event, p9_EventCode_2); + + /* Expected to fail as sibling and leader event request different thresh_cmp bits */ + FAIL_IF(!event_open_with_group(&event, leader.fd)); + + event_close(&event); + + /* Init the event for the group contraint thresh compare test */ + event_init(&event, p9_EventCode_3); + + /* Expected to succeed as sibling and leader event request same thresh_cmp bits */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + } + + return 0; +} + +int main(void) +{ + return test_harness(group_constraint_thresh_cmp, "group_constraint_thresh_cmp"); +} diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_ctl_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_ctl_test.c new file mode 100644 index 000000000000..e0852ebc1671 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_ctl_test.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "../event.h" +#include "utils.h" +#include "../sampling_tests/misc.h" + +/* + * Primary PMU events used here are PM_MRK_INST_CMPL (0x401e0) and + * PM_THRESH_MET (0x101ec). + * Threshold event selection used is issue to complete and issue to + * finished for cycles + * Sampling criteria is Load or Store only sampling + */ +#define EventCode_1 0x35340401e0 +#define EventCode_2 0x34340101ec +#define EventCode_3 0x35340101ec + +/* + * Testcase for group constraint check of thresh_ctl bits which is + * used to program thresh compare field in Monitor Mode Control Register A + * (MMCR0: 48-55). + * All events in the group should match thresh ctl bits otherwise + * event_open for the group will fail. + */ +static int group_constraint_thresh_ctl(void) +{ + struct event event, leader; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* Init the events for the group contraint thresh control test */ + event_init(&leader, EventCode_1); + FAIL_IF(event_open(&leader)); + + event_init(&event, EventCode_2); + + /* Expected to fail as sibling and leader event request different thresh_ctl bits */ + FAIL_IF(!event_open_with_group(&event, leader.fd)); + + event_close(&event); + + /* Init the event for the group contraint thresh control test */ + event_init(&event, EventCode_3); + + /* Expected to succeed as sibling and leader event request same thresh_ctl bits */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + + return 0; +} + +int main(void) +{ + return test_harness(group_constraint_thresh_ctl, "group_constraint_thresh_ctl"); +} diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_sel_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_sel_test.c new file mode 100644 index 000000000000..50a8cd843ce7 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_sel_test.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "../event.h" +#include "utils.h" +#include "../sampling_tests/misc.h" + +/* + * Primary PMU events used here are PM_MRK_INST_CMPL (0x401e0) and + * PM_THRESH_MET (0x101ec). + * Threshold event selection used is issue to complete + * Sampling criteria is Load or Store only sampling + */ +#define EventCode_1 0x35340401e0 +#define EventCode_2 0x35540101ec +#define EventCode_3 0x35340101ec + +/* + * Testcase for group constraint check of thresh_sel bits which is + * used to program thresh select field in Monitor Mode Control Register A + * (MMCRA: 45-57). + * All events in the group should match thresh sel bits otherwise + * event_open for the group will fail. + */ +static int group_constraint_thresh_sel(void) +{ + struct event event, leader; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* Init the events for the group contraint thresh select test */ + event_init(&leader, EventCode_1); + FAIL_IF(event_open(&leader)); + + event_init(&event, EventCode_2); + + /* Expected to fail as sibling and leader event request different thresh_sel bits */ + FAIL_IF(!event_open_with_group(&event, leader.fd)); + + event_close(&event); + + /* Init the event for the group contraint thresh select test */ + event_init(&event, EventCode_3); + + /* Expected to succeed as sibling and leader event request same thresh_sel bits */ + FAIL_IF(event_open_with_group(&event, leader.fd)); + + event_close(&leader); + event_close(&event); + + return 0; +} + +int main(void) +{ + return test_harness(group_constraint_thresh_sel, "group_constraint_thresh_sel"); +} diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_unit_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_unit_test.c new file mode 100644 index 000000000000..a2c18923dcec --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_unit_test.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "../event.h" +#include "utils.h" +#include "../sampling_tests/misc.h" + +/* All successful D-side store dispatches for this thread with PMC 2 */ +#define EventCode_1 0x26080 +/* All successful D-side store dispatches for this thread with PMC 4 */ +#define EventCode_2 0x46080 +/* All successful D-side store dispatches for this thread that were L2 Miss with PMC 3 */ +#define EventCode_3 0x36880 + +/* + * Testcase for group constraint check of unit and pmc bits which is + * used to program corresponding unit and pmc field in Monitor Mode + * Control Register 1 (MMCR1) + * One of the event in the group should use PMC 4 incase units field + * value is within 6 to 9 otherwise event_open for the group will fail. + */ +static int group_constraint_unit(void) +{ + struct event *e, events[3]; + + /* + * Check for platform support for the test. + * Constraint to use PMC4 with one of the event in group, + * when the unit is within 6 to 9 is only applicable on + * power9. + */ + SKIP_IF(platform_check_for_tests()); + SKIP_IF(have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + /* Init the events for the group contraint check for unit bits */ + e = &events[0]; + event_init(e, EventCode_1); + + /* Expected to fail as PMC 4 is not used with unit field value 6 to 9 */ + FAIL_IF(!event_open(&events[0])); + + /* Init the events for the group contraint check for unit bits */ + e = &events[1]; + event_init(e, EventCode_2); + + /* Expected to pass as PMC 4 is used with unit field value 6 to 9 */ + FAIL_IF(event_open(&events[1])); + + /* Init the event for the group contraint unit test */ + e = &events[2]; + event_init(e, EventCode_3); + + /* Expected to fail as PMC4 is not being used */ + FAIL_IF(!event_open_with_group(&events[2], events[0].fd)); + + /* Expected to succeed as event using PMC4 */ + FAIL_IF(event_open_with_group(&events[2], events[1].fd)); + + event_close(&events[0]); + event_close(&events[1]); + event_close(&events[2]); + + return 0; +} + +int main(void) +{ + return test_harness(group_constraint_unit, "group_constraint_unit"); +} diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_pmc56_exclude_constraints_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_pmc56_exclude_constraints_test.c new file mode 100644 index 000000000000..cff9ac170df6 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_pmc56_exclude_constraints_test.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include <stdio.h> +#include "../event.h" +#include <sys/prctl.h> +#include <limits.h> +#include "../sampling_tests/misc.h" + +/* + * Testcase for group constraint check for + * Performance Monitor Counter 5 (PMC5) and also + * Performance Monitor Counter 6 (PMC6). + * Test that pmc5/6 is excluded from constraint + * check when scheduled along with group of events. + */ + +static int group_pmc56_exclude_constraints(void) +{ + struct event *e, events[3]; + int i; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* + * PMC5/6 is excluded from constraint bit + * check along with group of events. Use + * group of events with PMC5, PMC6 and also + * event with cache bit (dc_ic) set. Test expects + * this set of events to go in as a group. + */ + e = &events[0]; + event_init(e, 0x500fa); + + e = &events[1]; + event_init(e, 0x600f4); + + e = &events[2]; + event_init(e, 0x22C040); + + FAIL_IF(event_open(&events[0])); + + /* + * The event_open will fail if constraint check fails. + * Since we are asking for events in a group and since + * PMC5/PMC6 is excluded from group constraints, even_open + * should pass. + */ + for (i = 1; i < 3; i++) + FAIL_IF(event_open_with_group(&events[i], events[0].fd)); + + for (i = 0; i < 3; i++) + event_close(&events[i]); + + return 0; +} + +int main(void) +{ + return test_harness(group_pmc56_exclude_constraints, "group_pmc56_exclude_constraints"); +} diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/hw_cache_event_type_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/hw_cache_event_type_test.c new file mode 100644 index 000000000000..a45b1da5b568 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/hw_cache_event_type_test.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "../event.h" +#include "utils.h" +#include "../sampling_tests/misc.h" + +/* + * Load Missed L1, for power9 its pointing to PM_LD_MISS_L1_FIN (0x2c04e) and + * for power10 its pointing to PM_LD_MISS_L1 (0x3e054) + * + * Hardware cache level : PERF_COUNT_HW_CACHE_L1D + * Hardware cache event operation type : PERF_COUNT_HW_CACHE_OP_READ + * Hardware cache event result type : PERF_COUNT_HW_CACHE_RESULT_MISS + */ +#define EventCode_1 0x10000 +/* + * Hardware cache level : PERF_COUNT_HW_CACHE_L1D + * Hardware cache event operation type : PERF_COUNT_HW_CACHE_OP_WRITE + * Hardware cache event result type : PERF_COUNT_HW_CACHE_RESULT_ACCESS + */ +#define EventCode_2 0x0100 +/* + * Hardware cache level : PERF_COUNT_HW_CACHE_DTLB + * Hardware cache event operation type : PERF_COUNT_HW_CACHE_OP_WRITE + * Hardware cache event result type : PERF_COUNT_HW_CACHE_RESULT_ACCESS + */ +#define EventCode_3 0x0103 +/* + * Hardware cache level : PERF_COUNT_HW_CACHE_L1D + * Hardware cache event operation type : PERF_COUNT_HW_CACHE_OP_READ + * Hardware cache event result type : Invalid ( > PERF_COUNT_HW_CACHE_RESULT_MAX) + */ +#define EventCode_4 0x030000 + +/* + * A perf test to check valid hardware cache events. + */ +static int hw_cache_event_type_test(void) +{ + struct event event; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* Skip for Generic compat PMU */ + SKIP_IF(check_for_generic_compat_pmu()); + + /* Init the event to test hardware cache event */ + event_init_opts(&event, EventCode_1, PERF_TYPE_HW_CACHE, "event"); + + /* Expected to success as its pointing to L1 load miss */ + FAIL_IF(event_open(&event)); + event_close(&event); + + /* Init the event to test hardware cache event */ + event_init_opts(&event, EventCode_2, PERF_TYPE_HW_CACHE, "event"); + + /* Expected to fail as the corresponding cache event entry have 0 in that index */ + FAIL_IF(!event_open(&event)); + event_close(&event); + + /* Init the event to test hardware cache event */ + event_init_opts(&event, EventCode_3, PERF_TYPE_HW_CACHE, "event"); + + /* Expected to fail as the corresponding cache event entry have -1 in that index */ + FAIL_IF(!event_open(&event)); + event_close(&event); + + /* Init the event to test hardware cache event */ + event_init_opts(&event, EventCode_4, PERF_TYPE_HW_CACHE, "event"); + + /* Expected to fail as hardware cache event result type is Invalid */ + FAIL_IF(!event_open(&event)); + event_close(&event); + + return 0; +} + +int main(void) +{ + return test_harness(hw_cache_event_type_test, "hw_cache_event_type_test"); +} diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/invalid_event_code_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/invalid_event_code_test.c new file mode 100644 index 000000000000..f51fcab837fc --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/invalid_event_code_test.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include <stdio.h> +#include <sys/prctl.h> +#include <limits.h> +#include "../event.h" +#include "../sampling_tests/misc.h" + +/* The data cache was reloaded from local core's L3 due to a demand load */ +#define EventCode_1 0x1340000001c040 +/* PM_DATA_RADIX_PROCESS_L2_PTE_FROM_L2 */ +#define EventCode_2 0x14242 +/* Event code with IFM, EBB, BHRB bits set in event code */ +#define EventCode_3 0xf00000000000001e + +/* + * Some of the bits in the event code is + * reserved for specific platforms. + * Event code bits 52-59 are reserved in power9, + * whereas in power10, these are used for programming + * Monitor Mode Control Register 3 (MMCR3). + * Bit 9 in event code is reserved in power9, + * whereas it is used for programming "radix_scope_qual" + * bit 18 in Monitor Mode Control Register 1 (MMCR1). + * + * Testcase to ensure that using reserved bits in + * event code should cause event_open to fail. + */ + +static int invalid_event_code(void) +{ + struct event event; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* + * Events using MMCR3 bits and radix scope qual bits + * should fail in power9 and should succeed in power10. + * Init the events and check for pass/fail in event open. + */ + if (have_hwcap2(PPC_FEATURE2_ARCH_3_1)) { + event_init(&event, EventCode_1); + FAIL_IF(event_open(&event)); + event_close(&event); + + event_init(&event, EventCode_2); + FAIL_IF(event_open(&event)); + event_close(&event); + } else { + event_init(&event, EventCode_1); + FAIL_IF(!event_open(&event)); + + event_init(&event, EventCode_2); + FAIL_IF(!event_open(&event)); + } + + return 0; +} + +int main(void) +{ + return test_harness(invalid_event_code, "invalid_event_code"); +} diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/reserved_bits_mmcra_sample_elig_mode_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/reserved_bits_mmcra_sample_elig_mode_test.c new file mode 100644 index 000000000000..4c119c821b99 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/reserved_bits_mmcra_sample_elig_mode_test.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include <stdio.h> +#include "../event.h" +#include "../sampling_tests/misc.h" + +/* + * Testcase for reserved bits in Monitor Mode Control + * Register A (MMCRA) Random Sampling Mode (SM) value. + * As per Instruction Set Architecture (ISA), the values + * 0x5, 0x9, 0xD, 0x19, 0x1D, 0x1A, 0x1E are reserved + * for sampling mode field. Test that having these reserved + * bit values should cause event_open to fail. + * Input event code uses these sampling bits along with + * 401e0 (PM_MRK_INST_CMPL). + */ + +static int reserved_bits_mmcra_sample_elig_mode(void) +{ + struct event event; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* Skip for Generic compat PMU */ + SKIP_IF(check_for_generic_compat_pmu()); + + /* + * MMCRA Random Sampling Mode (SM) values: 0x5 + * 0x9, 0xD, 0x19, 0x1D, 0x1A, 0x1E is reserved. + * Expected to fail when using these reserved values. + */ + event_init(&event, 0x50401e0); + FAIL_IF(!event_open(&event)); + + event_init(&event, 0x90401e0); + FAIL_IF(!event_open(&event)); + + event_init(&event, 0xD0401e0); + FAIL_IF(!event_open(&event)); + + event_init(&event, 0x190401e0); + FAIL_IF(!event_open(&event)); + + event_init(&event, 0x1D0401e0); + FAIL_IF(!event_open(&event)); + + event_init(&event, 0x1A0401e0); + FAIL_IF(!event_open(&event)); + + event_init(&event, 0x1E0401e0); + FAIL_IF(!event_open(&event)); + + /* + * MMCRA Random Sampling Mode (SM) value 0x10 + * is reserved in power10 and 0xC is reserved in + * power9. + */ + if (PVR_VER(mfspr(SPRN_PVR)) == POWER10) { + event_init(&event, 0x100401e0); + FAIL_IF(!event_open(&event)); + } else if (PVR_VER(mfspr(SPRN_PVR)) == POWER9) { + event_init(&event, 0xC0401e0); + FAIL_IF(!event_open(&event)); + } + + return 0; +} + +int main(void) +{ + return test_harness(reserved_bits_mmcra_sample_elig_mode, + "reserved_bits_mmcra_sample_elig_mode"); +} diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/reserved_bits_mmcra_thresh_ctl_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/reserved_bits_mmcra_thresh_ctl_test.c new file mode 100644 index 000000000000..4ea1c2f8913f --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/reserved_bits_mmcra_thresh_ctl_test.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include <stdio.h> +#include "../event.h" +#include "../sampling_tests/misc.h" + +/* + * Testcase for reserved bits in Monitor Mode + * Control Register A (MMCRA) thresh_ctl bits. + * For MMCRA[48:51]/[52:55]) Threshold Start/Stop, + * 0b11110000/0b00001111 is reserved. + */ + +static int reserved_bits_mmcra_thresh_ctl(void) +{ + struct event event; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* Skip for Generic compat PMU */ + SKIP_IF(check_for_generic_compat_pmu()); + + /* + * MMCRA[48:51]/[52:55]) Threshold Start/Stop + * events Selection. 0b11110000/0b00001111 is reserved. + * Expected to fail when using these reserved values. + */ + event_init(&event, 0xf0340401e0); + FAIL_IF(!event_open(&event)); + + event_init(&event, 0x0f340401e0); + FAIL_IF(!event_open(&event)); + + return 0; +} + +int main(void) +{ + return test_harness(reserved_bits_mmcra_thresh_ctl, "reserved_bits_mmcra_thresh_ctl"); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile index a785c6a173b9..9e67351fb252 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile @@ -4,9 +4,12 @@ CFLAGS += -m64 TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test \ mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test \ mmcr1_comb_test mmcr2_l2l3_test mmcr2_fcs_fch_test \ - mmcr3_src_test mmcra_thresh_marked_sample_test + mmcr3_src_test mmcra_thresh_marked_sample_test mmcra_thresh_cmp_test \ + mmcra_bhrb_ind_call_test mmcra_bhrb_any_test mmcra_bhrb_cond_test \ + mmcra_bhrb_disable_test bhrb_no_crash_wo_pmu_test intr_regs_no_crash_wo_pmu_test \ + bhrb_filter_map_test mmcr1_sel_unit_cache_test mmcra_bhrb_disable_no_branch_test top_srcdir = ../../../../../.. include ../../../lib.mk -$(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c misc.c misc.h ../loop.S +$(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c misc.c misc.h ../loop.S ../branch_loops.S diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/bhrb_filter_map_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/bhrb_filter_map_test.c new file mode 100644 index 000000000000..8182647c63c8 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/bhrb_filter_map_test.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +/* + * A perf sampling test to check bhrb filter + * map. All the branch filters are not supported + * in powerpc. Supported filters in: + * power10: any, any_call, ind_call, cond + * power9: any, any_call + * + * Testcase checks event open for invalid bhrb filter + * types should fail and valid filter types should pass. + * Testcase does validity check for these branch + * sample types. + */ + +/* Invalid types for powerpc */ +/* Valid bhrb filters in power9/power10 */ +int bhrb_filter_map_valid_common[] = { + PERF_SAMPLE_BRANCH_ANY, + PERF_SAMPLE_BRANCH_ANY_CALL, +}; + +/* Valid bhrb filters in power10 */ +int bhrb_filter_map_valid_p10[] = { + PERF_SAMPLE_BRANCH_IND_CALL, + PERF_SAMPLE_BRANCH_COND, +}; + +#define EventCode 0x1001e + +static int bhrb_filter_map_test(void) +{ + struct event event; + int i; + + /* Check for platform support for the test */ + SKIP_IF(platform_check_for_tests()); + + /* + * Skip for Generic compat PMU since + * bhrb filters is not supported + */ + SKIP_IF(check_for_generic_compat_pmu()); + + /* Init the event for the sampling test */ + event_init(&event, EventCode); + + event.attr.sample_period = 1000; + event.attr.sample_type = PERF_SAMPLE_BRANCH_STACK; + event.attr.disabled = 1; + + /* Invalid filter maps which are expected to fail in event_open */ + for (i = PERF_SAMPLE_BRANCH_USER_SHIFT; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) { + /* Skip the valid branch sample type */ + if (i == PERF_SAMPLE_BRANCH_ANY_SHIFT || i == PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT \ + || i == PERF_SAMPLE_BRANCH_IND_CALL_SHIFT || i == PERF_SAMPLE_BRANCH_COND_SHIFT) + continue; + event.attr.branch_sample_type = 1U << i; + FAIL_IF(!event_open(&event)); + } + + /* valid filter maps for power9/power10 which are expected to pass in event_open */ + for (i = 0; i < ARRAY_SIZE(bhrb_filter_map_valid_common); i++) { + event.attr.branch_sample_type = bhrb_filter_map_valid_common[i]; + FAIL_IF(event_open(&event)); + event_close(&event); + } + + /* + * filter maps which are valid in power10 and invalid in power9. + * PVR check is used here since PMU specific data like bhrb filter + * alternative tests is handled by respective PMU driver code and + * using PVR will work correctly for all cases including generic + * compat mode. + */ + if (PVR_VER(mfspr(SPRN_PVR)) == POWER10) { + for (i = 0; i < ARRAY_SIZE(bhrb_filter_map_valid_p10); i++) { + event.attr.branch_sample_type = bhrb_filter_map_valid_p10[i]; + FAIL_IF(event_open(&event)); + event_close(&event); + } + } else { + for (i = 0; i < ARRAY_SIZE(bhrb_filter_map_valid_p10); i++) { + event.attr.branch_sample_type = bhrb_filter_map_valid_p10[i]; + FAIL_IF(!event_open(&event)); + } + } + + return 0; +} + +int main(void) +{ + return test_harness(bhrb_filter_map_test, "bhrb_filter_map_test"); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/bhrb_no_crash_wo_pmu_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/bhrb_no_crash_wo_pmu_test.c new file mode 100644 index 000000000000..4644c6782974 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/bhrb_no_crash_wo_pmu_test.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +/* + * A perf sampling test for making sure + * enabling branch stack doesn't crash in any + * environment, say: + * - With generic compat PMU + * - without any PMU registered + * - With platform specific PMU + * A fix for bhrb sampling crash was added in kernel + * via commit: b460b512417a ("powerpc/perf: Fix crashes + * with generic_compat_pmu & BHRB") + * + * This testcase exercises this code by doing branch + * stack enable for software event. s/w event is used + * since software event will work even in platform + * without PMU. + */ +static int bhrb_no_crash_wo_pmu_test(void) +{ + struct event event; + + /* + * Init the event for the sampling test. + * This uses software event which works on + * any platform. + */ + event_init_opts(&event, 0, PERF_TYPE_SOFTWARE, "cycles"); + + event.attr.sample_period = 1000; + event.attr.sample_type = PERF_SAMPLE_BRANCH_STACK; + event.attr.disabled = 1; + + /* + * Return code of event_open is not + * considered since test just expects no crash from + * using PERF_SAMPLE_BRANCH_STACK. Also for environment + * like generic compat PMU, branch stack is unsupported. + */ + event_open(&event); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(bhrb_no_crash_wo_pmu_test, "bhrb_no_crash_wo_pmu_test"); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/intr_regs_no_crash_wo_pmu_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/intr_regs_no_crash_wo_pmu_test.c new file mode 100644 index 000000000000..839d2d225da0 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/intr_regs_no_crash_wo_pmu_test.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +/* + * A perf sampling test for making sure + * sampling with -intr-regs doesn't crash + * in any environment, say: + * - With generic compat PMU + * - without any PMU registered + * - With platform specific PMU. + * A fix for crash with intr_regs was + * addressed in commit: f75e7d73bdf7 in kernel. + * + * This testcase exercises this code path by doing + * intr_regs using software event. Software event is + * used since s/w event will work even in platform + * without PMU. + */ +static int intr_regs_no_crash_wo_pmu_test(void) +{ + struct event event; + + /* + * Init the event for the sampling test. + * This uses software event which works on + * any platform. + */ + event_init_opts(&event, 0, PERF_TYPE_SOFTWARE, "cycles"); + + event.attr.sample_period = 1000; + event.attr.sample_type = PERF_SAMPLE_REGS_INTR; + event.attr.disabled = 1; + + /* + * Return code of event_open is not considered + * since test just expects no crash from using + * PERF_SAMPLE_REGS_INTR. + */ + event_open(&event); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(intr_regs_no_crash_wo_pmu_test, "intr_regs_no_crash_wo_pmu_test"); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c index c01a31d5f4ee..eac6420abdf1 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c @@ -60,6 +60,8 @@ static void init_ev_encodes(void) switch (pvr) { case POWER10: + ev_mask_thd_cmp = 0x3ffff; + ev_shift_thd_cmp = 0; ev_mask_rsq = 1; ev_shift_rsq = 9; ev_mask_comb = 3; @@ -119,12 +121,10 @@ int check_extended_regs_support(void) return -1; } -int check_pvr_for_sampling_tests(void) +int platform_check_for_tests(void) { pvr = PVR_VER(mfspr(SPRN_PVR)); - platform_extended_mask = perf_get_platform_reg_mask(); - /* * Check for supported platforms * for sampling test @@ -136,19 +136,33 @@ int check_pvr_for_sampling_tests(void) * Check PMU driver registered by looking for * PPC_FEATURE2_EBB bit in AT_HWCAP2 */ - if (!have_hwcap2(PPC_FEATURE2_EBB)) + if (!have_hwcap2(PPC_FEATURE2_EBB) || !have_hwcap2(PPC_FEATURE2_ARCH_3_00)) goto out; + return 0; + +out: + printf("%s: Tests unsupported for this platform\n", __func__); + return -1; +} + +int check_pvr_for_sampling_tests(void) +{ + SKIP_IF(platform_check_for_tests()); + + platform_extended_mask = perf_get_platform_reg_mask(); /* check if platform supports extended regs */ if (check_extended_regs_support()) goto out; init_ev_encodes(); return 0; + out: printf("%s: Sampling tests un-supported\n", __func__); return -1; } + /* * Allocate mmap buffer of "mmap_pages" number of * pages. @@ -257,13 +271,32 @@ u64 *get_intr_regs(struct event *event, void *sample_buff) u64 *intr_regs; size_t size = 0; - if ((type ^ PERF_SAMPLE_REGS_INTR)) + if ((type ^ (PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_BRANCH_STACK)) && + (type ^ PERF_SAMPLE_REGS_INTR)) return NULL; intr_regs = (u64 *)perf_read_first_sample(sample_buff, &size); if (!intr_regs) return NULL; + if (type & PERF_SAMPLE_BRANCH_STACK) { + /* + * PERF_RECORD_SAMPLE and PERF_SAMPLE_BRANCH_STACK: + * struct { + * struct perf_event_header hdr; + * u64 number_of_branches; + * struct perf_branch_entry[number_of_branches]; + * u64 data[]; + * }; + * struct perf_branch_entry { + * u64 from; + * u64 to; + * u64 misc; + * }; + */ + intr_regs += ((*intr_regs) * 3) + 1; + } + /* * First entry in the sample buffer used to specify * PERF_SAMPLE_REGS_ABI_64, skip perf regs abi to access @@ -410,3 +443,95 @@ u64 get_reg_value(u64 *intr_regs, char *register_name) return *(intr_regs + register_bit_position); } + +int get_thresh_cmp_val(struct event event) +{ + int exp = 0; + u64 result = 0; + u64 value; + + if (!have_hwcap2(PPC_FEATURE2_ARCH_3_1)) + return EV_CODE_EXTRACT(event.attr.config, thd_cmp); + + value = EV_CODE_EXTRACT(event.attr.config1, thd_cmp); + + if (!value) + return value; + + /* + * Incase of P10, thresh_cmp value is not part of raw event code + * and provided via attr.config1 parameter. To program threshold in MMCRA, + * take a 18 bit number N and shift right 2 places and increment + * the exponent E by 1 until the upper 10 bits of N are zero. + * Write E to the threshold exponent and write the lower 8 bits of N + * to the threshold mantissa. + * The max threshold that can be written is 261120. + */ + if (value > 261120) + value = 261120; + while ((64 - __builtin_clzl(value)) > 8) { + exp++; + value >>= 2; + } + + /* + * Note that it is invalid to write a mantissa with the + * upper 2 bits of mantissa being zero, unless the + * exponent is also zero. + */ + if (!(value & 0xC0) && exp) + result = -1; + else + result = (exp << 8) | value; + return result; +} + +/* + * Utility function to check for generic compat PMU + * by comparing base_platform value from auxv and real + * PVR value. + */ +static bool auxv_generic_compat_pmu(void) +{ + int base_pvr = 0; + + if (!strcmp(auxv_base_platform(), "power9")) + base_pvr = POWER9; + else if (!strcmp(auxv_base_platform(), "power10")) + base_pvr = POWER10; + + return (!base_pvr); +} + +/* + * Check for generic compat PMU. + * First check for presence of pmu_name from + * "/sys/bus/event_source/devices/cpu/caps". + * If doesn't exist, fallback to using value + * auxv. + */ +bool check_for_generic_compat_pmu(void) +{ + char pmu_name[256]; + + memset(pmu_name, 0, sizeof(pmu_name)); + if (read_sysfs_file("bus/event_source/devices/cpu/caps/pmu_name", + pmu_name, sizeof(pmu_name)) < 0) + return auxv_generic_compat_pmu(); + + if (!strcmp(pmu_name, "ISAv3")) + return true; + else + return false; +} + +/* + * Check if system is booted in compat mode. + */ +bool check_for_compat_mode(void) +{ + char *platform = auxv_platform(); + char *base_platform = auxv_base_platform(); + + return strcmp(platform, base_platform); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h index 7675f3177725..4181755cf5a0 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h @@ -5,6 +5,7 @@ * Copyright 2022, Kajol Jain, IBM Corp. */ +#include <sys/stat.h> #include "../event.h" #define POWER10 0x80 @@ -17,6 +18,8 @@ #define MMCR1_RSQ 0x200000000000ULL /* radix scope qual field */ #define BHRB_DISABLE 0x2000000000ULL /* MMCRA BHRB DISABLE bit */ +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) + extern int ev_mask_pmcxsel, ev_shift_pmcxsel; extern int ev_mask_marked, ev_shift_marked; extern int ev_mask_comb, ev_shift_comb; @@ -35,6 +38,7 @@ extern int ev_mask_mmcr3_src, ev_shift_mmcr3_src; extern int pvr; extern u64 platform_extended_mask; extern int check_pvr_for_sampling_tests(void); +extern int platform_check_for_tests(void); /* * Event code field extraction macro. @@ -52,6 +56,9 @@ void *__event_read_samples(void *sample_buff, size_t *size, u64 *sample_count); int collect_samples(void *sample_buff); u64 *get_intr_regs(struct event *event, void *sample_buff); u64 get_reg_value(u64 *intr_regs, char *register_name); +int get_thresh_cmp_val(struct event event); +bool check_for_generic_compat_pmu(void); +bool check_for_compat_mode(void); static inline int get_mmcr0_fc56(u64 mmcr0, int pmc) { @@ -184,7 +191,7 @@ static inline int get_mmcra_sm(u64 mmcra, int pmc) return ((mmcra >> 42) & 0x3); } -static inline int get_mmcra_bhrb_disable(u64 mmcra, int pmc) +static inline u64 get_mmcra_bhrb_disable(u64 mmcra, int pmc) { if (pvr == POWER10) return mmcra & BHRB_DISABLE; diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr1_sel_unit_cache_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr1_sel_unit_cache_test.c new file mode 100644 index 000000000000..f0c003282630 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr1_sel_unit_cache_test.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Athira Rajeev, IBM Corp. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +#define MALLOC_SIZE (0x10000 * 10) /* Ought to be enough .. */ + +/* The data cache was reloaded from local core's L3 due to a demand load */ +#define EventCode 0x21c040 + +/* + * A perf sampling test for mmcr1 + * fields : pmcxsel, unit, cache. + */ +static int mmcr1_sel_unit_cache(void) +{ + struct event event; + u64 *intr_regs; + char *p; + int i; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + + p = malloc(MALLOC_SIZE); + FAIL_IF(!p); + + /* Init the event for the sampling test */ + event_init_sampling(&event, EventCode); + event.attr.sample_regs_intr = platform_extended_mask; + event.attr.sample_period = 1; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + event_enable(&event); + + /* workload to make the event overflow */ + for (i = 0; i < MALLOC_SIZE; i += 0x10000) + p[i] = i; + + event_disable(&event); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* + * Verify that pmcxsel, unit and cache field of MMCR1 + * match with corresponding event code fields + */ + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, pmcxsel) != + get_mmcr1_pmcxsel(get_reg_value(intr_regs, "MMCR1"), 1)); + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, unit) != + get_mmcr1_unit(get_reg_value(intr_regs, "MMCR1"), 1)); + FAIL_IF(EV_CODE_EXTRACT(event.attr.config, cache) != + get_mmcr1_cache(get_reg_value(intr_regs, "MMCR1"), 1)); + + free(p); + event_close(&event); + return 0; +} + +int main(void) +{ + FAIL_IF(test_harness(mmcr1_sel_unit_cache, "mmcr1_sel_unit_cache")); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_any_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_any_test.c new file mode 100644 index 000000000000..14854694af62 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_any_test.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop(int loops); + +/* Instructions */ +#define EventCode 0x500fa + +/* ifm field for any branch mode */ +#define IFM_ANY_BRANCH 0x0 + +/* + * A perf sampling test for mmcra + * field: ifm for bhrb any call. + */ +static int mmcra_bhrb_any_test(void) +{ + struct event event; + u64 *intr_regs; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + + /* Init the event for the sampling test */ + event_init_sampling(&event, EventCode); + event.attr.sample_regs_intr = platform_extended_mask; + event.attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; + event.attr.branch_sample_type = PERF_SAMPLE_BRANCH_ANY; + event.attr.exclude_kernel = 1; + + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop(10000); + + FAIL_IF(event_disable(&event)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that ifm bit is set properly in MMCRA */ + FAIL_IF(get_mmcra_ifm(get_reg_value(intr_regs, "MMCRA"), 5) != IFM_ANY_BRANCH); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcra_bhrb_any_test, "mmcra_bhrb_any_test"); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_cond_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_cond_test.c new file mode 100644 index 000000000000..3e08176eb7f8 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_cond_test.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop(int loops); + +/* Instructions */ +#define EventCode 0x500fa + +/* ifm field for conditional branch mode */ +#define IFM_COND_BRANCH 0x3 + +/* + * A perf sampling test for mmcra + * field: ifm for bhrb cond call. + */ +static int mmcra_bhrb_cond_test(void) +{ + struct event event; + u64 *intr_regs; + + /* + * Check for platform support for the test. + * This test is only aplicable on power10 + */ + SKIP_IF(check_pvr_for_sampling_tests()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + /* Init the event for the sampling test */ + event_init_sampling(&event, EventCode); + event.attr.sample_regs_intr = platform_extended_mask; + event.attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; + event.attr.branch_sample_type = PERF_SAMPLE_BRANCH_COND; + event.attr.exclude_kernel = 1; + + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop(10000); + + FAIL_IF(event_disable(&event)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that ifm bit is set properly in MMCRA */ + FAIL_IF(get_mmcra_ifm(get_reg_value(intr_regs, "MMCRA"), 5) != IFM_COND_BRANCH); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcra_bhrb_cond_test, "mmcra_bhrb_cond_test"); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_no_branch_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_no_branch_test.c new file mode 100644 index 000000000000..488c865387e4 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_no_branch_test.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop(int loops); + +/* Instructions */ +#define EventCode 0x500fa + +/* + * A perf sampling test for mmcra + * field: bhrb_disable. + */ +static int mmcra_bhrb_disable_no_branch_test(void) +{ + struct event event; + u64 *intr_regs; + + /* + * Check for platform support for the test. + * This test is only aplicable on power10 + */ + SKIP_IF(check_pvr_for_sampling_tests()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + /* Init the event for the sampling test */ + event_init_sampling(&event, EventCode); + event.attr.sample_regs_intr = platform_extended_mask; + event.attr.exclude_kernel = 1; + + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop(10000); + + FAIL_IF(event_disable(&event)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that bhrb_disable bit is set in MMCRA for non-branch samples */ + FAIL_IF(!get_mmcra_bhrb_disable(get_reg_value(intr_regs, "MMCRA"), 5)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcra_bhrb_disable_no_branch_test, "mmcra_bhrb_disable_no_branch_test"); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_test.c new file mode 100644 index 000000000000..186a853c0f62 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_test.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void thirty_two_instruction_loop(int loops); + +/* Instructions */ +#define EventCode 0x500fa + +/* + * A perf sampling test for mmcra + * field: bhrb_disable. + */ +static int mmcra_bhrb_disable_test(void) +{ + struct event event; + u64 *intr_regs; + + /* + * Check for platform support for the test. + * This test is only aplicable on power10 + */ + SKIP_IF(check_pvr_for_sampling_tests()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + /* Init the event for the sampling test */ + event_init_sampling(&event, EventCode); + event.attr.sample_regs_intr = platform_extended_mask; + event.attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; + event.attr.branch_sample_type = PERF_SAMPLE_BRANCH_ANY; + event.attr.exclude_kernel = 1; + + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop(10000); + + FAIL_IF(event_disable(&event)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that bhrb_disable bit is set in MMCRA */ + FAIL_IF(get_mmcra_bhrb_disable(get_reg_value(intr_regs, "MMCRA"), 5)); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcra_bhrb_disable_test, "mmcra_bhrb_disable_test"); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_ind_call_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_ind_call_test.c new file mode 100644 index 000000000000..f0706730c099 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_ind_call_test.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +extern void indirect_branch_loop(void); + +/* Instructions */ +#define EventCode 0x500fa + +/* ifm field for indirect branch mode */ +#define IFM_IND_BRANCH 0x2 + +/* + * A perf sampling test for mmcra + * field: ifm for bhrb ind_call. + */ +static int mmcra_bhrb_ind_call_test(void) +{ + struct event event; + u64 *intr_regs; + + /* + * Check for platform support for the test. + * This test is only aplicable on power10 + */ + SKIP_IF(check_pvr_for_sampling_tests()); + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1)); + + /* Init the event for the sampling test */ + event_init_sampling(&event, EventCode); + event.attr.sample_regs_intr = platform_extended_mask; + event.attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; + event.attr.branch_sample_type = PERF_SAMPLE_BRANCH_IND_CALL; + event.attr.exclude_kernel = 1; + + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + indirect_branch_loop(); + + FAIL_IF(event_disable(&event)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that ifm bit is set properly in MMCRA */ + FAIL_IF(get_mmcra_ifm(get_reg_value(intr_regs, "MMCRA"), 5) != IFM_IND_BRANCH); + + event_close(&event); + return 0; +} + +int main(void) +{ + return test_harness(mmcra_bhrb_ind_call_test, "mmcra_bhrb_ind_call_test"); +} diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_thresh_cmp_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_thresh_cmp_test.c new file mode 100644 index 000000000000..904362f172c9 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_thresh_cmp_test.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2022, Kajol Jain, IBM Corp. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "../event.h" +#include "misc.h" +#include "utils.h" + +/* + * Primary PMU event used here is PM_MRK_INST_CMPL (0x401e0) + * Threshold event selection used is issue to complete for cycles + * Sampling criteria is Load only sampling + */ +#define p9_EventCode 0x13E35340401e0 +#define p10_EventCode 0x35340401e0 + +extern void thirty_two_instruction_loop_with_ll_sc(u64 loops, u64 *ll_sc_target); + +/* A perf sampling test to test mmcra fields */ +static int mmcra_thresh_cmp(void) +{ + struct event event; + u64 *intr_regs; + u64 dummy; + + /* Check for platform support for the test */ + SKIP_IF(check_pvr_for_sampling_tests()); + + /* Skip for comapt mode */ + SKIP_IF(check_for_compat_mode()); + + /* Init the event for the sampling test */ + if (!have_hwcap2(PPC_FEATURE2_ARCH_3_1)) { + event_init_sampling(&event, p9_EventCode); + } else { + event_init_sampling(&event, p10_EventCode); + event.attr.config1 = 1000; + } + + event.attr.sample_regs_intr = platform_extended_mask; + FAIL_IF(event_open(&event)); + event.mmap_buffer = event_sample_buf_mmap(event.fd, 1); + + FAIL_IF(event_enable(&event)); + + /* workload to make the event overflow */ + thirty_two_instruction_loop_with_ll_sc(1000000, &dummy); + + FAIL_IF(event_disable(&event)); + + /* Check for sample count */ + FAIL_IF(!collect_samples(event.mmap_buffer)); + + intr_regs = get_intr_regs(&event, event.mmap_buffer); + + /* Check for intr_regs */ + FAIL_IF(!intr_regs); + + /* Verify that thresh cmp match with the corresponding event code fields */ + FAIL_IF(get_thresh_cmp_val(event) != + get_mmcra_thd_cmp(get_reg_value(intr_regs, "MMCRA"), 4)); + + event_close(&event); + return 0; +} + +int main(void) +{ + FAIL_IF(test_harness(mmcra_thresh_cmp, "mmcra_thresh_cmp")); +} diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile index a500639da97a..2f02cb54224d 100644 --- a/tools/testing/selftests/powerpc/ptrace/Makefile +++ b/tools/testing/selftests/powerpc/ptrace/Makefile @@ -1,15 +1,41 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_GEN_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \ - ptrace-tar ptrace-tm-tar ptrace-tm-spd-tar ptrace-vsx ptrace-tm-vsx \ - ptrace-tm-spd-vsx ptrace-tm-spr ptrace-hwbreak ptrace-pkey core-pkey \ - perf-hwbreak ptrace-syscall ptrace-perf-hwbreak + +TM_TESTS := ptrace-tm-gpr +TM_TESTS += ptrace-tm-spd-gpr +TM_TESTS += ptrace-tm-spd-tar +TM_TESTS += ptrace-tm-spd-vsx +TM_TESTS += ptrace-tm-spr +TM_TESTS += ptrace-tm-tar +TM_TESTS += ptrace-tm-vsx + +TESTS_64 := $(TM_TESTS) +TESTS_64 += core-pkey +TESTS_64 += perf-hwbreak +TESTS_64 += ptrace-hwbreak +TESTS_64 += ptrace-perf-hwbreak +TESTS_64 += ptrace-pkey +TESTS_64 += ptrace-syscall +TESTS_64 += ptrace-tar +TESTS_64 += ptrace-vsx + +TESTS += ptrace-gpr + +TEST_GEN_PROGS := $(TESTS) $(TESTS_64) + +LOCAL_HDRS += $(patsubst %,$(selfdir)/powerpc/ptrace/%,$(wildcard *.h)) top_srcdir = ../../../../.. include ../../lib.mk -CFLAGS += -m64 -I../../../../../usr/include -I../tm -mhtm -fno-pie +TM_TESTS := $(patsubst %,$(OUTPUT)/%,$(TM_TESTS)) +TESTS_64 := $(patsubst %,$(OUTPUT)/%,$(TESTS_64)) + +$(TESTS_64): CFLAGS += -m64 +$(TM_TESTS): CFLAGS += -I../tm -mhtm + +CFLAGS += -I../../../../../usr/include -fno-pie -$(OUTPUT)/ptrace-pkey $(OUTPUT)/core-pkey: child.h +$(OUTPUT)/ptrace-gpr: ptrace-gpr.S $(OUTPUT)/ptrace-pkey $(OUTPUT)/core-pkey: LDLIBS += -pthread -$(TEST_GEN_PROGS): ../harness.c ../utils.c ../lib/reg.S ptrace.h +$(TEST_GEN_PROGS): ../harness.c ../utils.c ../lib/reg.S diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.S b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.S new file mode 100644 index 000000000000..070e8443e3cc --- /dev/null +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.S @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * test helper assembly functions + * + * Copyright (C) 2016 Simon Guo, IBM Corporation. + * Copyright 2022 Michael Ellerman, IBM Corporation. + */ +#include "basic_asm.h" + +#define GPR_SIZE __SIZEOF_LONG__ +#define FIRST_GPR 14 +#define NUM_GPRS (32 - FIRST_GPR) +#define STACK_SIZE (NUM_GPRS * GPR_SIZE) + +// gpr_child_loop(int *read_flag, int *write_flag, +// unsigned long *gpr_buf, double *fpr_buf); +FUNC_START(gpr_child_loop) + // r3 = read_flag + // r4 = write_flag + // r5 = gpr_buf + // r6 = fpr_buf + PUSH_BASIC_STACK(STACK_SIZE) + + // Save non-volatile GPRs + OP_REGS PPC_STL, GPR_SIZE, FIRST_GPR, 31, %r1, STACK_FRAME_LOCAL(0, 0), FIRST_GPR + + // Load GPRs with expected values + OP_REGS PPC_LL, GPR_SIZE, FIRST_GPR, 31, r5, 0, FIRST_GPR + + // Load FPRs with expected values + OP_REGS lfd, 8, 0, 31, r6 + + // Signal to parent that we're ready + li r0, 1 + stw r0, 0(r4) + + // Wait for parent to finish +1: lwz r0, 0(r3) + cmpwi r0, 0 + beq 1b // Loop while flag is zero + + // Save GPRs back to caller buffer + OP_REGS PPC_STL, GPR_SIZE, FIRST_GPR, 31, r5, 0, FIRST_GPR + + // Save FPRs + OP_REGS stfd, 8, 0, 31, r6 + + // Reload non-volatile GPRs + OP_REGS PPC_LL, GPR_SIZE, FIRST_GPR, 31, %r1, STACK_FRAME_LOCAL(0, 0), FIRST_GPR + + POP_BASIC_STACK(STACK_SIZE) + blr diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c index 17cd480c8780..9ed87d297799 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c @@ -7,72 +7,127 @@ #include "ptrace.h" #include "ptrace-gpr.h" #include "reg.h" +#include <time.h> /* Tracer and Tracee Shared Data */ int shm_id; int *cptr, *pptr; -float a = FPR_1; -float b = FPR_2; -float c = FPR_3; +extern void gpr_child_loop(int *read_flag, int *write_flag, + unsigned long *gpr_buf, double *fpr_buf); -void gpr(void) +unsigned long child_gpr_val, parent_gpr_val; +double child_fpr_val, parent_fpr_val; + +static int child(void) { - unsigned long gpr_buf[18]; - float fpr_buf[32]; + unsigned long gpr_buf[32]; + double fpr_buf[32]; + int i; cptr = (int *)shmat(shm_id, NULL, 0); + memset(gpr_buf, 0, sizeof(gpr_buf)); + memset(fpr_buf, 0, sizeof(fpr_buf)); - asm __volatile__( - ASM_LOAD_GPR_IMMED(gpr_1) - ASM_LOAD_FPR_SINGLE_PRECISION(flt_1) - : - : [gpr_1]"i"(GPR_1), [flt_1] "b" (&a) - : "memory", "r6", "r7", "r8", "r9", "r10", - "r11", "r12", "r13", "r14", "r15", "r16", "r17", - "r18", "r19", "r20", "r21", "r22", "r23", "r24", - "r25", "r26", "r27", "r28", "r29", "r30", "r31" - ); - - cptr[1] = 1; + for (i = 0; i < 32; i++) { + gpr_buf[i] = child_gpr_val; + fpr_buf[i] = child_fpr_val; + } - while (!cptr[0]) - asm volatile("" : : : "memory"); + gpr_child_loop(&cptr[0], &cptr[1], gpr_buf, fpr_buf); shmdt((void *)cptr); - store_gpr(gpr_buf); - store_fpr_single_precision(fpr_buf); - - if (validate_gpr(gpr_buf, GPR_3)) - exit(1); - if (validate_fpr_float(fpr_buf, c)) - exit(1); + FAIL_IF(validate_gpr(gpr_buf, parent_gpr_val)); + FAIL_IF(validate_fpr_double(fpr_buf, parent_fpr_val)); - exit(0); + return 0; } int trace_gpr(pid_t child) { + __u64 tmp, fpr[32], *peeked_fprs; unsigned long gpr[18]; - unsigned long fpr[32]; FAIL_IF(start_trace(child)); + + // Check child GPRs match what we expect using GETREGS FAIL_IF(show_gpr(child, gpr)); - FAIL_IF(validate_gpr(gpr, GPR_1)); + FAIL_IF(validate_gpr(gpr, child_gpr_val)); + + // Check child FPRs match what we expect using GETFPREGS FAIL_IF(show_fpr(child, fpr)); - FAIL_IF(validate_fpr(fpr, FPR_1_REP)); - FAIL_IF(write_gpr(child, GPR_3)); - FAIL_IF(write_fpr(child, FPR_3_REP)); + memcpy(&tmp, &child_fpr_val, sizeof(tmp)); + FAIL_IF(validate_fpr(fpr, tmp)); + + // Check child FPRs match what we expect using PEEKUSR + peeked_fprs = peek_fprs(child); + FAIL_IF(!peeked_fprs); + FAIL_IF(validate_fpr(peeked_fprs, tmp)); + free(peeked_fprs); + + // Write child GPRs using SETREGS + FAIL_IF(write_gpr(child, parent_gpr_val)); + + // Write child FPRs using SETFPREGS + memcpy(&tmp, &parent_fpr_val, sizeof(tmp)); + FAIL_IF(write_fpr(child, tmp)); + + // Check child FPRs match what we just set, using PEEKUSR + peeked_fprs = peek_fprs(child); + FAIL_IF(!peeked_fprs); + FAIL_IF(validate_fpr(peeked_fprs, tmp)); + + // Write child FPRs using POKEUSR + FAIL_IF(poke_fprs(child, (unsigned long *)peeked_fprs)); + + // Child will check its FPRs match before exiting FAIL_IF(stop_trace(child)); return TEST_PASS; } +#ifndef __LONG_WIDTH__ +#define __LONG_WIDTH__ (sizeof(long) * 8) +#endif + +static uint64_t rand_reg(void) +{ + uint64_t result; + long r; + + r = random(); + + // Small values are typical + result = r & 0xffff; + if (r & 0x10000) + return result; + + // Pointers tend to have high bits set + result |= random() << (__LONG_WIDTH__ - 31); + if (r & 0x100000) + return result; + + // And sometimes we want a full 64-bit value + result ^= random() << 16; + + return result; +} + int ptrace_gpr(void) { - pid_t pid; + unsigned long seed; int ret, status; + pid_t pid; + + seed = getpid() ^ time(NULL); + printf("srand(%lu)\n", seed); + srand(seed); + + child_gpr_val = rand_reg(); + child_fpr_val = rand_reg(); + parent_gpr_val = rand_reg(); + parent_fpr_val = rand_reg(); shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT); pid = fork(); @@ -81,7 +136,7 @@ int ptrace_gpr(void) return TEST_FAIL; } if (pid == 0) - gpr(); + exit(child()); if (pid) { pptr = (int *)shmat(shm_id, NULL, 0); diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.h b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.h index c5cd53181e2e..a5470b88bd08 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.h +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.h @@ -12,10 +12,10 @@ #define FPR_3 0.003 #define FPR_4 0.004 -#define FPR_1_REP 0x3f50624de0000000 -#define FPR_2_REP 0x3f60624de0000000 -#define FPR_3_REP 0x3f689374c0000000 -#define FPR_4_REP 0x3f70624de0000000 +#define FPR_1_REP 0x3f50624dd2f1a9fcull +#define FPR_2_REP 0x3f60624dd2f1a9fcull +#define FPR_3_REP 0x3f689374bc6a7efaull +#define FPR_4_REP 0x3f70624dd2f1a9fcull /* Buffer must have 18 elements */ int validate_gpr(unsigned long *gpr, unsigned long val) @@ -36,13 +36,13 @@ int validate_gpr(unsigned long *gpr, unsigned long val) } /* Buffer must have 32 elements */ -int validate_fpr(unsigned long *fpr, unsigned long val) +int validate_fpr(__u64 *fpr, __u64 val) { int i, found = 1; for (i = 0; i < 32; i++) { if (fpr[i] != val) { - printf("FPR[%d]: %lx Expected: %lx\n", i, fpr[i], val); + printf("FPR[%d]: %llx Expected: %llx\n", i, fpr[i], val); found = 0; } } @@ -53,7 +53,7 @@ int validate_fpr(unsigned long *fpr, unsigned long val) } /* Buffer must have 32 elements */ -int validate_fpr_float(float *fpr, float val) +int validate_fpr_double(double *fpr, double val) { int i, found = 1; diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c index 67ca297c5cca..5dc152b162df 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c @@ -12,15 +12,15 @@ int shm_id; unsigned long *cptr, *pptr; -float a = FPR_1; -float b = FPR_2; -float c = FPR_3; +double a = FPR_1; +double b = FPR_2; +double c = FPR_3; void tm_gpr(void) { unsigned long gpr_buf[18]; unsigned long result, texasr; - float fpr_buf[32]; + double fpr_buf[32]; printf("Starting the child\n"); cptr = (unsigned long *)shmat(shm_id, NULL, 0); @@ -29,12 +29,12 @@ trans: cptr[1] = 0; asm __volatile__( ASM_LOAD_GPR_IMMED(gpr_1) - ASM_LOAD_FPR_SINGLE_PRECISION(flt_1) + ASM_LOAD_FPR(flt_1) "1: ;" "tbegin.;" "beq 2f;" ASM_LOAD_GPR_IMMED(gpr_2) - ASM_LOAD_FPR_SINGLE_PRECISION(flt_2) + ASM_LOAD_FPR(flt_2) "tsuspend.;" "li 7, 1;" "stw 7, 0(%[cptr1]);" @@ -70,12 +70,12 @@ trans: shmdt((void *)cptr); store_gpr(gpr_buf); - store_fpr_single_precision(fpr_buf); + store_fpr(fpr_buf); if (validate_gpr(gpr_buf, GPR_3)) exit(1); - if (validate_fpr_float(fpr_buf, c)) + if (validate_fpr_double(fpr_buf, c)) exit(1); exit(0); @@ -87,7 +87,7 @@ trans: int trace_tm_gpr(pid_t child) { unsigned long gpr[18]; - unsigned long fpr[32]; + __u64 fpr[32]; FAIL_IF(start_trace(child)); FAIL_IF(show_gpr(child, gpr)); diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c index 6f2bce1b6c5d..458cc1a70ccf 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c @@ -12,10 +12,10 @@ int shm_id; int *cptr, *pptr; -float a = FPR_1; -float b = FPR_2; -float c = FPR_3; -float d = FPR_4; +double a = FPR_1; +double b = FPR_2; +double c = FPR_3; +double d = FPR_4; __attribute__((used)) void wait_parent(void) { @@ -28,7 +28,7 @@ void tm_spd_gpr(void) { unsigned long gpr_buf[18]; unsigned long result, texasr; - float fpr_buf[32]; + double fpr_buf[32]; cptr = (int *)shmat(shm_id, NULL, 0); @@ -36,7 +36,7 @@ trans: cptr[2] = 0; asm __volatile__( ASM_LOAD_GPR_IMMED(gpr_1) - ASM_LOAD_FPR_SINGLE_PRECISION(flt_1) + ASM_LOAD_FPR(flt_1) "1: ;" "tbegin.;" @@ -45,7 +45,7 @@ trans: ASM_LOAD_GPR_IMMED(gpr_2) "tsuspend.;" ASM_LOAD_GPR_IMMED(gpr_4) - ASM_LOAD_FPR_SINGLE_PRECISION(flt_4) + ASM_LOAD_FPR(flt_4) "bl wait_parent;" "tresume.;" @@ -77,12 +77,12 @@ trans: shmdt((void *)cptr); store_gpr(gpr_buf); - store_fpr_single_precision(fpr_buf); + store_fpr(fpr_buf); if (validate_gpr(gpr_buf, GPR_3)) exit(1); - if (validate_fpr_float(fpr_buf, c)) + if (validate_fpr_double(fpr_buf, c)) exit(1); exit(0); } @@ -93,7 +93,7 @@ trans: int trace_tm_spd_gpr(pid_t child) { unsigned long gpr[18]; - unsigned long fpr[32]; + __u64 fpr[32]; FAIL_IF(start_trace(child)); FAIL_IF(show_gpr(child, gpr)); diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace.h b/tools/testing/selftests/powerpc/ptrace/ptrace.h index 5181ad9b4b6c..4e0233c0f2b3 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace.h +++ b/tools/testing/selftests/powerpc/ptrace/ptrace.h @@ -4,6 +4,9 @@ * * Copyright (C) 2015 Anshuman Khandual, IBM Corporation. */ + +#define __SANE_USERSPACE_TYPES__ + #include <inttypes.h> #include <unistd.h> #include <stdlib.h> @@ -20,6 +23,7 @@ #include <sys/ipc.h> #include <sys/shm.h> #include <sys/user.h> +#include <sys/syscall.h> #include <linux/elf.h> #include <linux/types.h> #include <linux/auxvec.h> @@ -30,8 +34,8 @@ #define TEST_FAIL 1 struct fpr_regs { - unsigned long fpr[32]; - unsigned long fpscr; + __u64 fpr[32]; + __u64 fpscr; }; struct tm_spr_regs { @@ -318,7 +322,7 @@ fail: } /* FPR */ -int show_fpr(pid_t child, unsigned long *fpr) +int show_fpr(pid_t child, __u64 *fpr) { struct fpr_regs *regs; int ret, i; @@ -337,7 +341,7 @@ int show_fpr(pid_t child, unsigned long *fpr) return TEST_PASS; } -int write_fpr(pid_t child, unsigned long val) +int write_fpr(pid_t child, __u64 val) { struct fpr_regs *regs; int ret, i; @@ -360,7 +364,7 @@ int write_fpr(pid_t child, unsigned long val) return TEST_PASS; } -int show_ckpt_fpr(pid_t child, unsigned long *fpr) +int show_ckpt_fpr(pid_t child, __u64 *fpr) { struct fpr_regs *regs; struct iovec iov; @@ -437,6 +441,70 @@ int show_gpr(pid_t child, unsigned long *gpr) return TEST_PASS; } +long sys_ptrace(enum __ptrace_request request, pid_t pid, unsigned long addr, unsigned long data) +{ + return syscall(__NR_ptrace, request, pid, (void *)addr, data); +} + +// 33 because of FPSCR +#define PT_NUM_FPRS (33 * (sizeof(__u64) / sizeof(unsigned long))) + +__u64 *peek_fprs(pid_t child) +{ + unsigned long *fprs, *p, addr; + long ret; + int i; + + fprs = malloc(sizeof(unsigned long) * PT_NUM_FPRS); + if (!fprs) { + perror("malloc() failed"); + return NULL; + } + + for (i = 0, p = fprs; i < PT_NUM_FPRS; i++, p++) { + addr = sizeof(unsigned long) * (PT_FPR0 + i); + ret = sys_ptrace(PTRACE_PEEKUSER, child, addr, (unsigned long)p); + if (ret) { + perror("ptrace(PTRACE_PEEKUSR) failed"); + return NULL; + } + } + + addr = sizeof(unsigned long) * (PT_FPR0 + i); + ret = sys_ptrace(PTRACE_PEEKUSER, child, addr, (unsigned long)&addr); + if (!ret) { + printf("ptrace(PTRACE_PEEKUSR) succeeded unexpectedly!\n"); + return NULL; + } + + return (__u64 *)fprs; +} + +int poke_fprs(pid_t child, unsigned long *fprs) +{ + unsigned long *p, addr; + long ret; + int i; + + for (i = 0, p = fprs; i < PT_NUM_FPRS; i++, p++) { + addr = sizeof(unsigned long) * (PT_FPR0 + i); + ret = sys_ptrace(PTRACE_POKEUSER, child, addr, *p); + if (ret) { + perror("ptrace(PTRACE_POKEUSR) failed"); + return -1; + } + } + + addr = sizeof(unsigned long) * (PT_FPR0 + i); + ret = sys_ptrace(PTRACE_POKEUSER, child, addr, addr); + if (!ret) { + printf("ptrace(PTRACE_POKEUSR) succeeded unexpectedly!\n"); + return -1; + } + + return 0; +} + int write_gpr(pid_t child, unsigned long val) { struct pt_regs *regs; @@ -742,4 +810,3 @@ void analyse_texasr(unsigned long texasr) } void store_gpr(unsigned long *addr); -void store_fpr(float *addr); diff --git a/tools/testing/selftests/powerpc/security/.gitignore b/tools/testing/selftests/powerpc/security/.gitignore index 93614b125ded..9357b186b13c 100644 --- a/tools/testing/selftests/powerpc/security/.gitignore +++ b/tools/testing/selftests/powerpc/security/.gitignore @@ -2,3 +2,4 @@ rfi_flush entry_flush spectre_v2 +uaccess_flush diff --git a/tools/testing/selftests/proc/proc-pid-vm.c b/tools/testing/selftests/proc/proc-pid-vm.c index 28604c9f805c..e5962f4794f5 100644 --- a/tools/testing/selftests/proc/proc-pid-vm.c +++ b/tools/testing/selftests/proc/proc-pid-vm.c @@ -211,10 +211,19 @@ static int make_exe(const uint8_t *payload, size_t len) } #endif -static bool g_vsyscall = false; +/* + * 0: vsyscall VMA doesn't exist vsyscall=none + * 1: vsyscall VMA is r-xp vsyscall=emulate + * 2: vsyscall VMA is --xp vsyscall=xonly + */ +static int g_vsyscall; +static const char *str_vsyscall; -static const char str_vsyscall[] = +static const char str_vsyscall_0[] = ""; +static const char str_vsyscall_1[] = "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n"; +static const char str_vsyscall_2[] = +"ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n"; #ifdef __x86_64__ static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___) @@ -223,7 +232,7 @@ static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___) } /* - * vsyscall page can't be unmapped, probe it with memory load. + * vsyscall page can't be unmapped, probe it directly. */ static void vsyscall(void) { @@ -246,13 +255,52 @@ static void vsyscall(void) act.sa_sigaction = sigaction_SIGSEGV; (void)sigaction(SIGSEGV, &act, NULL); + /* gettimeofday(NULL, NULL); */ + asm volatile ( + "call %P0" + : + : "i" (0xffffffffff600000), "D" (NULL), "S" (NULL) + : "rax", "rcx", "r11" + ); + exit(0); + } + waitpid(pid, &wstatus, 0); + if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) == 0) { + /* vsyscall page exists and is executable. */ + } else { + /* vsyscall page doesn't exist. */ + g_vsyscall = 0; + return; + } + + pid = fork(); + if (pid < 0) { + fprintf(stderr, "fork, errno %d\n", errno); + exit(1); + } + if (pid == 0) { + struct rlimit rlim = {0, 0}; + (void)setrlimit(RLIMIT_CORE, &rlim); + + /* Hide "segfault at ffffffffff600000" messages. */ + struct sigaction act; + memset(&act, 0, sizeof(struct sigaction)); + act.sa_flags = SA_SIGINFO; + act.sa_sigaction = sigaction_SIGSEGV; + (void)sigaction(SIGSEGV, &act, NULL); + *(volatile int *)0xffffffffff600000UL; exit(0); } waitpid(pid, &wstatus, 0); if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) == 0) { - g_vsyscall = true; + /* vsyscall page is readable and executable. */ + g_vsyscall = 1; + return; } + + /* vsyscall page is executable but unreadable. */ + g_vsyscall = 2; } int main(void) @@ -261,6 +309,19 @@ int main(void) int exec_fd; vsyscall(); + switch (g_vsyscall) { + case 0: + str_vsyscall = str_vsyscall_0; + break; + case 1: + str_vsyscall = str_vsyscall_1; + break; + case 2: + str_vsyscall = str_vsyscall_2; + break; + default: + abort(); + } atexit(ate); @@ -314,7 +375,7 @@ int main(void) /* Test /proc/$PID/maps */ { - const size_t len = strlen(buf0) + (g_vsyscall ? strlen(str_vsyscall) : 0); + const size_t len = strlen(buf0) + strlen(str_vsyscall); char buf[256]; ssize_t rv; int fd; @@ -327,7 +388,7 @@ int main(void) rv = read(fd, buf, sizeof(buf)); assert(rv == len); assert(memcmp(buf, buf0, strlen(buf0)) == 0); - if (g_vsyscall) { + if (g_vsyscall > 0) { assert(memcmp(buf + strlen(buf0), str_vsyscall, strlen(str_vsyscall)) == 0); } } @@ -374,7 +435,7 @@ int main(void) assert(memmem(buf, rv, S[i], strlen(S[i]))); } - if (g_vsyscall) { + if (g_vsyscall > 0) { assert(memmem(buf, rv, str_vsyscall, strlen(str_vsyscall))); } } diff --git a/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh b/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh index f17000a2ccf1..ed0ec7f0927e 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh @@ -35,7 +35,7 @@ then exit 1 fi -# Remember where we started so that we can get back and the end. +# Remember where we started so that we can get back at the end. curcommit="`git status | head -1 | awk '{ print $NF }'`" nfail=0 @@ -73,15 +73,10 @@ do # Test the specified commit. git checkout $i > $resdir/$ds/$idir/git-checkout.out 2>&1 echo git checkout return code: $? "(Commit $ntry: $i)" - kvm.sh --allcpus --duration 3 --trust-make > $resdir/$ds/$idir/kvm.sh.out 2>&1 + kvm.sh --allcpus --duration 3 --trust-make --datestamp "$ds/$idir" > $resdir/$ds/$idir/kvm.sh.out 2>&1 ret=$? echo kvm.sh return code $ret for commit $i from branch $gitbr - - # Move the build products to their resting place. - runresdir="`grep -m 1 '^Results directory:' < $resdir/$ds/$idir/kvm.sh.out | sed -e 's/^Results directory://'`" - mv $runresdir $resdir/$ds/$idir - rrd="`echo $runresdir | sed -e 's,^.*/,,'`" - echo Run results: $resdir/$ds/$idir/$rrd + echo Run results: $resdir/$ds/$idir if test "$ret" -ne 0 then # Failure, so leave all evidence intact. diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh index 0ff59bd8b640..9f0a5d5ff2dd 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh @@ -262,6 +262,7 @@ echo All batches started. `date` | tee -a "$oldrun/remote-log" # Wait for all remaining scenarios to complete and collect results. for i in $systems do + echo " ---" Waiting for $i `date` | tee -a "$oldrun/remote-log" while checkremotefile "$i" "$resdir/$ds/remote.run" do sleep 30 diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 263e16aeca0e..6c734818a875 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -164,7 +164,7 @@ do shift ;; --gdb) - TORTURE_KCONFIG_GDB_ARG="CONFIG_DEBUG_INFO=y"; export TORTURE_KCONFIG_GDB_ARG + TORTURE_KCONFIG_GDB_ARG="CONFIG_DEBUG_INFO_NONE=n CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y"; export TORTURE_KCONFIG_GDB_ARG TORTURE_BOOT_GDB_ARG="nokaslr"; export TORTURE_BOOT_GDB_ARG TORTURE_QEMU_GDB_ARG="-s -S"; export TORTURE_QEMU_GDB_ARG ;; @@ -180,7 +180,7 @@ do shift ;; --kasan) - TORTURE_KCONFIG_KASAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KASAN=y"; export TORTURE_KCONFIG_KASAN_ARG + TORTURE_KCONFIG_KASAN_ARG="CONFIG_DEBUG_INFO_NONE=n CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_KASAN=y"; export TORTURE_KCONFIG_KASAN_ARG if test -n "$torture_qemu_mem_default" then TORTURE_QEMU_MEM=2G @@ -192,7 +192,7 @@ do shift ;; --kcsan) - TORTURE_KCONFIG_KCSAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KCSAN=y CONFIG_KCSAN_STRICT=y CONFIG_KCSAN_REPORT_ONCE_IN_MS=100000 CONFIG_KCSAN_VERBOSE=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y"; export TORTURE_KCONFIG_KCSAN_ARG + TORTURE_KCONFIG_KCSAN_ARG="CONFIG_DEBUG_INFO_NONE=n CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_KCSAN=y CONFIG_KCSAN_STRICT=y CONFIG_KCSAN_REPORT_ONCE_IN_MS=100000 CONFIG_KCSAN_VERBOSE=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y"; export TORTURE_KCONFIG_KCSAN_ARG ;; --kmake-arg|--kmake-args) checkarg --kmake-arg "(kernel make arguments)" $# "$2" '.*' '^error$' diff --git a/tools/testing/selftests/rseq/rseq-riscv.h b/tools/testing/selftests/rseq/rseq-riscv.h index b86642f90d7f..3a391c9bf468 100644 --- a/tools/testing/selftests/rseq/rseq-riscv.h +++ b/tools/testing/selftests/rseq/rseq-riscv.h @@ -86,7 +86,7 @@ do { \ #define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \ RSEQ_INJECT_ASM(1) \ - "la "RSEQ_ASM_TMP_REG_1 ", " __rseq_str(cs_label) "\n" \ + "la " RSEQ_ASM_TMP_REG_1 ", " __rseq_str(cs_label) "\n" \ REG_S RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(rseq_cs) "]\n" \ __rseq_str(label) ":\n" @@ -103,17 +103,17 @@ do { \ #define RSEQ_ASM_OP_CMPEQ(var, expect, label) \ REG_L RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \ - "bne "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \ + "bne " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \ __rseq_str(label) "\n" #define RSEQ_ASM_OP_CMPEQ32(var, expect, label) \ - "lw "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \ - "bne "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \ + "lw " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \ + "bne " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \ __rseq_str(label) "\n" #define RSEQ_ASM_OP_CMPNE(var, expect, label) \ REG_L RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \ - "beq "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \ + "beq " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \ __rseq_str(label) "\n" #define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \ @@ -127,12 +127,12 @@ do { \ REG_S RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" #define RSEQ_ASM_OP_R_LOAD_OFF(offset) \ - "add "RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(offset) "], " \ + "add " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(offset) "], " \ RSEQ_ASM_TMP_REG_1 "\n" \ REG_L RSEQ_ASM_TMP_REG_1 ", (" RSEQ_ASM_TMP_REG_1 ")\n" #define RSEQ_ASM_OP_R_ADD(count) \ - "add "RSEQ_ASM_TMP_REG_1 ", " RSEQ_ASM_TMP_REG_1 \ + "add " RSEQ_ASM_TMP_REG_1 ", " RSEQ_ASM_TMP_REG_1 \ ", %[" __rseq_str(count) "]\n" #define RSEQ_ASM_OP_FINAL_STORE(value, var, post_commit_label) \ @@ -194,8 +194,8 @@ int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu) RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "m" (*v), [expect] "r" (expect), [newv] "r" (newv) @@ -251,8 +251,8 @@ int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "m" (*v), [expectnot] "r" (expectnot), [load] "m" (*load), @@ -301,8 +301,8 @@ int rseq_addv(intptr_t *v, intptr_t count, int cpu) RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "m" (*v), [count] "r" (count) RSEQ_INJECT_INPUT @@ -352,8 +352,8 @@ int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [expect] "r" (expect), [v] "m" (*v), [newv] "r" (newv), @@ -411,8 +411,8 @@ int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [expect] "r" (expect), [v] "m" (*v), [newv] "r" (newv), @@ -472,8 +472,8 @@ int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [v] "m" (*v), [expect] "r" (expect), [v2] "m" (*v2), @@ -532,8 +532,8 @@ int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [expect] "r" (expect), [v] "m" (*v), [newv] "r" (newv), @@ -593,8 +593,8 @@ int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect, RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [expect] "r" (expect), [v] "m" (*v), [newv] "r" (newv), @@ -651,8 +651,8 @@ int rseq_offset_deref_addv(intptr_t *ptr, off_t off, intptr_t inc, int cpu) RSEQ_ASM_DEFINE_ABORT(4, abort) : /* gcc asm goto does not allow outputs */ : [cpu_id] "r" (cpu), - [current_cpu_id] "m" (__rseq_abi.cpu_id), - [rseq_cs] "m" (__rseq_abi.rseq_cs), + [current_cpu_id] "m" (rseq_get_abi()->cpu_id), + [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr), [ptr] "r" (ptr), [off] "er" (off), [inc] "er" (inc) diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c index 986b9458efb2..4177f9507bbe 100644 --- a/tools/testing/selftests/rseq/rseq.c +++ b/tools/testing/selftests/rseq/rseq.c @@ -111,7 +111,8 @@ void rseq_init(void) libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset"); libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size"); libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags"); - if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p) { + if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p && + *libc_rseq_size_p != 0) { /* rseq registration owned by glibc */ rseq_offset = *libc_rseq_offset_p; rseq_size = *libc_rseq_size_p; diff --git a/tools/testing/selftests/safesetid/Makefile b/tools/testing/selftests/safesetid/Makefile index fa02c4d5ec13..e815bbf2d0f4 100644 --- a/tools/testing/selftests/safesetid/Makefile +++ b/tools/testing/selftests/safesetid/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -# Makefile for mount selftests. +# Makefile for SafeSetID selftest. CFLAGS = -Wall -O2 LDLIBS = -lcap diff --git a/tools/testing/selftests/safesetid/safesetid-test.c b/tools/testing/selftests/safesetid/safesetid-test.c index 4b809c93ba36..eb9bf0aee951 100644 --- a/tools/testing/selftests/safesetid/safesetid-test.c +++ b/tools/testing/selftests/safesetid/safesetid-test.c @@ -3,6 +3,7 @@ #include <stdio.h> #include <errno.h> #include <pwd.h> +#include <grp.h> #include <string.h> #include <syscall.h> #include <sys/capability.h> @@ -16,17 +17,28 @@ #include <stdbool.h> #include <stdarg.h> +/* + * NOTES about this test: + * - requries libcap-dev to be installed on test system + * - requires securityfs to me mounted at /sys/kernel/security, e.g.: + * mount -n -t securityfs -o nodev,noexec,nosuid securityfs /sys/kernel/security + * - needs CONFIG_SECURITYFS and CONFIG_SAFESETID to be enabled + */ + #ifndef CLONE_NEWUSER # define CLONE_NEWUSER 0x10000000 #endif -#define ROOT_USER 0 -#define RESTRICTED_PARENT 1 -#define ALLOWED_CHILD1 2 -#define ALLOWED_CHILD2 3 -#define NO_POLICY_USER 4 +#define ROOT_UGID 0 +#define RESTRICTED_PARENT_UGID 1 +#define ALLOWED_CHILD1_UGID 2 +#define ALLOWED_CHILD2_UGID 3 +#define NO_POLICY_UGID 4 + +#define UGID_POLICY_STRING "1:2\n1:3\n2:2\n3:3\n" -char* add_whitelist_policy_file = "/sys/kernel/security/safesetid/add_whitelist_policy"; +char* add_uid_whitelist_policy_file = "/sys/kernel/security/safesetid/uid_allowlist_policy"; +char* add_gid_whitelist_policy_file = "/sys/kernel/security/safesetid/gid_allowlist_policy"; static void die(char *fmt, ...) { @@ -106,9 +118,10 @@ static void ensure_user_exists(uid_t uid) die("couldn't open file\n"); if (fseek(fd, 0, SEEK_END)) die("couldn't fseek\n"); - snprintf(name_str, 10, "%d", uid); + snprintf(name_str, 10, "user %d", uid); p.pw_name=name_str; p.pw_uid=uid; + p.pw_gid=uid; p.pw_gecos="Test account"; p.pw_dir="/dev/null"; p.pw_shell="/bin/false"; @@ -120,9 +133,36 @@ static void ensure_user_exists(uid_t uid) } } +static void ensure_group_exists(gid_t gid) +{ + struct group g; + + FILE *fd; + char name_str[10]; + + if (getgrgid(gid) == NULL) { + memset(&g,0x00,sizeof(g)); + fd=fopen("/etc/group","a"); + if (fd == NULL) + die("couldn't open group file\n"); + if (fseek(fd, 0, SEEK_END)) + die("couldn't fseek group file\n"); + snprintf(name_str, 10, "group %d", gid); + g.gr_name=name_str; + g.gr_gid=gid; + g.gr_passwd=NULL; + g.gr_mem=NULL; + int value = putgrent(&g,fd); + if (value != 0) + die("putgrent failed\n"); + if (fclose(fd)) + die("fclose failed\n"); + } +} + static void ensure_securityfs_mounted(void) { - int fd = open(add_whitelist_policy_file, O_WRONLY); + int fd = open(add_uid_whitelist_policy_file, O_WRONLY); if (fd < 0) { if (errno == ENOENT) { // Need to mount securityfs @@ -135,39 +175,60 @@ static void ensure_securityfs_mounted(void) } else { if (close(fd) != 0) { die("close of %s failed: %s\n", - add_whitelist_policy_file, strerror(errno)); + add_uid_whitelist_policy_file, strerror(errno)); + } + } +} + +static void write_uid_policies() +{ + static char *policy_str = UGID_POLICY_STRING; + ssize_t written; + int fd; + + fd = open(add_uid_whitelist_policy_file, O_WRONLY); + if (fd < 0) + die("can't open add_uid_whitelist_policy file\n"); + written = write(fd, policy_str, strlen(policy_str)); + if (written != strlen(policy_str)) { + if (written >= 0) { + die("short write to %s\n", add_uid_whitelist_policy_file); + } else { + die("write to %s failed: %s\n", + add_uid_whitelist_policy_file, strerror(errno)); } } + if (close(fd) != 0) { + die("close of %s failed: %s\n", + add_uid_whitelist_policy_file, strerror(errno)); + } } -static void write_policies(void) +static void write_gid_policies() { - static char *policy_str = - "1:2\n" - "1:3\n" - "2:2\n" - "3:3\n"; + static char *policy_str = UGID_POLICY_STRING; ssize_t written; int fd; - fd = open(add_whitelist_policy_file, O_WRONLY); + fd = open(add_gid_whitelist_policy_file, O_WRONLY); if (fd < 0) - die("can't open add_whitelist_policy file\n"); + die("can't open add_gid_whitelist_policy file\n"); written = write(fd, policy_str, strlen(policy_str)); if (written != strlen(policy_str)) { if (written >= 0) { - die("short write to %s\n", add_whitelist_policy_file); + die("short write to %s\n", add_gid_whitelist_policy_file); } else { die("write to %s failed: %s\n", - add_whitelist_policy_file, strerror(errno)); + add_gid_whitelist_policy_file, strerror(errno)); } } if (close(fd) != 0) { die("close of %s failed: %s\n", - add_whitelist_policy_file, strerror(errno)); + add_gid_whitelist_policy_file, strerror(errno)); } } + static bool test_userns(bool expect_success) { uid_t uid; @@ -194,7 +255,7 @@ static bool test_userns(bool expect_success) printf("preparing file name string failed"); return false; } - success = write_file(map_file_name, "0 0 1", uid); + success = write_file(map_file_name, "0 %d 1", uid); return success == expect_success; } @@ -258,13 +319,144 @@ static void test_setuid(uid_t child_uid, bool expect_success) die("should not reach here\n"); } +static void test_setgid(gid_t child_gid, bool expect_success) +{ + pid_t cpid, w; + int wstatus; + + cpid = fork(); + if (cpid == -1) { + die("fork\n"); + } + + if (cpid == 0) { /* Code executed by child */ + if (setgid(child_gid) < 0) + exit(EXIT_FAILURE); + if (getgid() == child_gid) + exit(EXIT_SUCCESS); + else + exit(EXIT_FAILURE); + } else { /* Code executed by parent */ + do { + w = waitpid(cpid, &wstatus, WUNTRACED | WCONTINUED); + if (w == -1) { + die("waitpid\n"); + } + + if (WIFEXITED(wstatus)) { + if (WEXITSTATUS(wstatus) == EXIT_SUCCESS) { + if (expect_success) { + return; + } else { + die("unexpected success\n"); + } + } else { + if (expect_success) { + die("unexpected failure\n"); + } else { + return; + } + } + } else if (WIFSIGNALED(wstatus)) { + if (WTERMSIG(wstatus) == 9) { + if (expect_success) + die("killed unexpectedly\n"); + else + return; + } else { + die("unexpected signal: %d\n", wstatus); + } + } else { + die("unexpected status: %d\n", wstatus); + } + } while (!WIFEXITED(wstatus) && !WIFSIGNALED(wstatus)); + } + + die("should not reach here\n"); +} + +static void test_setgroups(gid_t* child_groups, size_t len, bool expect_success) +{ + pid_t cpid, w; + int wstatus; + gid_t groupset[len]; + int i, j; + + cpid = fork(); + if (cpid == -1) { + die("fork\n"); + } + + if (cpid == 0) { /* Code executed by child */ + if (setgroups(len, child_groups) != 0) + exit(EXIT_FAILURE); + if (getgroups(len, groupset) != len) + exit(EXIT_FAILURE); + for (i = 0; i < len; i++) { + for (j = 0; j < len; j++) { + if (child_groups[i] == groupset[j]) + break; + if (j == len - 1) + exit(EXIT_FAILURE); + } + } + exit(EXIT_SUCCESS); + } else { /* Code executed by parent */ + do { + w = waitpid(cpid, &wstatus, WUNTRACED | WCONTINUED); + if (w == -1) { + die("waitpid\n"); + } + + if (WIFEXITED(wstatus)) { + if (WEXITSTATUS(wstatus) == EXIT_SUCCESS) { + if (expect_success) { + return; + } else { + die("unexpected success\n"); + } + } else { + if (expect_success) { + die("unexpected failure\n"); + } else { + return; + } + } + } else if (WIFSIGNALED(wstatus)) { + if (WTERMSIG(wstatus) == 9) { + if (expect_success) + die("killed unexpectedly\n"); + else + return; + } else { + die("unexpected signal: %d\n", wstatus); + } + } else { + die("unexpected status: %d\n", wstatus); + } + } while (!WIFEXITED(wstatus) && !WIFSIGNALED(wstatus)); + } + + die("should not reach here\n"); +} + + static void ensure_users_exist(void) { - ensure_user_exists(ROOT_USER); - ensure_user_exists(RESTRICTED_PARENT); - ensure_user_exists(ALLOWED_CHILD1); - ensure_user_exists(ALLOWED_CHILD2); - ensure_user_exists(NO_POLICY_USER); + ensure_user_exists(ROOT_UGID); + ensure_user_exists(RESTRICTED_PARENT_UGID); + ensure_user_exists(ALLOWED_CHILD1_UGID); + ensure_user_exists(ALLOWED_CHILD2_UGID); + ensure_user_exists(NO_POLICY_UGID); +} + +static void ensure_groups_exist(void) +{ + ensure_group_exists(ROOT_UGID); + ensure_group_exists(RESTRICTED_PARENT_UGID); + ensure_group_exists(ALLOWED_CHILD1_UGID); + ensure_group_exists(ALLOWED_CHILD2_UGID); + ensure_group_exists(NO_POLICY_UGID); } static void drop_caps(bool setid_retained) @@ -283,41 +475,52 @@ static void drop_caps(bool setid_retained) int main(int argc, char **argv) { + ensure_groups_exist(); ensure_users_exist(); ensure_securityfs_mounted(); - write_policies(); + write_uid_policies(); + write_gid_policies(); if (prctl(PR_SET_KEEPCAPS, 1L)) die("Error with set keepcaps\n"); - // First test to make sure we can write userns mappings from a user - // that doesn't have any restrictions (as long as it has CAP_SETUID); - if (setuid(NO_POLICY_USER) < 0) - die("Error with set uid(%d)\n", NO_POLICY_USER); - if (setgid(NO_POLICY_USER) < 0) - die("Error with set gid(%d)\n", NO_POLICY_USER); - + // First test to make sure we can write userns mappings from a non-root + // user that doesn't have any restrictions (as long as it has + // CAP_SETUID); + if (setgid(NO_POLICY_UGID) < 0) + die("Error with set gid(%d)\n", NO_POLICY_UGID); + if (setuid(NO_POLICY_UGID) < 0) + die("Error with set uid(%d)\n", NO_POLICY_UGID); // Take away all but setid caps drop_caps(true); - // Need PR_SET_DUMPABLE flag set so we can write /proc/[pid]/uid_map // from non-root parent process. if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0)) die("Error with set dumpable\n"); - if (!test_userns(true)) { die("test_userns failed when it should work\n"); } - if (setuid(RESTRICTED_PARENT) < 0) - die("Error with set uid(%d)\n", RESTRICTED_PARENT); - if (setgid(RESTRICTED_PARENT) < 0) - die("Error with set gid(%d)\n", RESTRICTED_PARENT); + // Now switch to a user/group with restrictions + if (setgid(RESTRICTED_PARENT_UGID) < 0) + die("Error with set gid(%d)\n", RESTRICTED_PARENT_UGID); + if (setuid(RESTRICTED_PARENT_UGID) < 0) + die("Error with set uid(%d)\n", RESTRICTED_PARENT_UGID); + + test_setuid(ROOT_UGID, false); + test_setuid(ALLOWED_CHILD1_UGID, true); + test_setuid(ALLOWED_CHILD2_UGID, true); + test_setuid(NO_POLICY_UGID, false); + + test_setgid(ROOT_UGID, false); + test_setgid(ALLOWED_CHILD1_UGID, true); + test_setgid(ALLOWED_CHILD2_UGID, true); + test_setgid(NO_POLICY_UGID, false); - test_setuid(ROOT_USER, false); - test_setuid(ALLOWED_CHILD1, true); - test_setuid(ALLOWED_CHILD2, true); - test_setuid(NO_POLICY_USER, false); + gid_t allowed_supp_groups[2] = {ALLOWED_CHILD1_UGID, ALLOWED_CHILD2_UGID}; + gid_t disallowed_supp_groups[2] = {ROOT_UGID, NO_POLICY_UGID}; + test_setgroups(allowed_supp_groups, 2, true); + test_setgroups(disallowed_supp_groups, 2, false); if (!test_userns(false)) { die("test_userns worked when it should fail\n"); @@ -328,8 +531,12 @@ int main(int argc, char **argv) test_setuid(2, false); test_setuid(3, false); test_setuid(4, false); + test_setgid(2, false); + test_setgid(3, false); + test_setgid(4, false); // NOTE: this test doesn't clean up users that were created in // /etc/passwd or flush policies that were added to the LSM. + printf("test successful!\n"); return EXIT_SUCCESS; } diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 136df5b76319..4ae6c8991307 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -809,7 +809,7 @@ void kill_thread_or_group(struct __test_metadata *_metadata, .len = (unsigned short)ARRAY_SIZE(filter_thread), .filter = filter_thread, }; - int kill = kill_how == KILL_PROCESS ? SECCOMP_RET_KILL_PROCESS : 0xAAAAAAAAA; + int kill = kill_how == KILL_PROCESS ? SECCOMP_RET_KILL_PROCESS : 0xAAAAAAAA; struct sock_filter filter_process[] = { BPF_STMT(BPF_LD|BPF_W|BPF_ABS, offsetof(struct seccomp_data, nr)), diff --git a/tools/testing/selftests/sgx/defines.h b/tools/testing/selftests/sgx/defines.h index 02d775789ea7..d8587c971941 100644 --- a/tools/testing/selftests/sgx/defines.h +++ b/tools/testing/selftests/sgx/defines.h @@ -24,6 +24,9 @@ enum encl_op_type { ENCL_OP_PUT_TO_ADDRESS, ENCL_OP_GET_FROM_ADDRESS, ENCL_OP_NOP, + ENCL_OP_EACCEPT, + ENCL_OP_EMODPE, + ENCL_OP_INIT_TCS_PAGE, ENCL_OP_MAX, }; @@ -53,4 +56,24 @@ struct encl_op_get_from_addr { uint64_t addr; }; +struct encl_op_eaccept { + struct encl_op_header header; + uint64_t epc_addr; + uint64_t flags; + uint64_t ret; +}; + +struct encl_op_emodpe { + struct encl_op_header header; + uint64_t epc_addr; + uint64_t flags; +}; + +struct encl_op_init_tcs_page { + struct encl_op_header header; + uint64_t tcs_page; + uint64_t ssa; + uint64_t entry; +}; + #endif /* DEFINES_H */ diff --git a/tools/testing/selftests/sgx/load.c b/tools/testing/selftests/sgx/load.c index 006b464c8fc9..94bdeac1cf04 100644 --- a/tools/testing/selftests/sgx/load.c +++ b/tools/testing/selftests/sgx/load.c @@ -130,6 +130,47 @@ static bool encl_ioc_add_pages(struct encl *encl, struct encl_segment *seg) return true; } +/* + * Parse the enclave code's symbol table to locate and return address of + * the provided symbol + */ +uint64_t encl_get_entry(struct encl *encl, const char *symbol) +{ + Elf64_Shdr *sections; + Elf64_Sym *symtab; + Elf64_Ehdr *ehdr; + char *sym_names; + int num_sym; + int i; + + ehdr = encl->bin; + sections = encl->bin + ehdr->e_shoff; + + for (i = 0; i < ehdr->e_shnum; i++) { + if (sections[i].sh_type == SHT_SYMTAB) { + symtab = (Elf64_Sym *)((char *)encl->bin + sections[i].sh_offset); + num_sym = sections[i].sh_size / sections[i].sh_entsize; + break; + } + } + + for (i = 0; i < ehdr->e_shnum; i++) { + if (sections[i].sh_type == SHT_STRTAB) { + sym_names = (char *)encl->bin + sections[i].sh_offset; + break; + } + } + + for (i = 0; i < num_sym; i++) { + Elf64_Sym *sym = &symtab[i]; + + if (!strcmp(symbol, sym_names + sym->st_name)) + return (uint64_t)sym->st_value; + } + + return 0; +} + bool encl_load(const char *path, struct encl *encl, unsigned long heap_size) { const char device_path[] = "/dev/sgx_enclave"; diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c index dd74fa42302e..9820b3809c69 100644 --- a/tools/testing/selftests/sgx/main.c +++ b/tools/testing/selftests/sgx/main.c @@ -25,6 +25,18 @@ static const uint64_t MAGIC = 0x1122334455667788ULL; static const uint64_t MAGIC2 = 0x8877665544332211ULL; vdso_sgx_enter_enclave_t vdso_sgx_enter_enclave; +/* + * Security Information (SECINFO) data structure needed by a few SGX + * instructions (eg. ENCLU[EACCEPT] and ENCLU[EMODPE]) holds meta-data + * about an enclave page. &enum sgx_secinfo_page_state specifies the + * secinfo flags used for page state. + */ +enum sgx_secinfo_page_state { + SGX_SECINFO_PENDING = (1 << 3), + SGX_SECINFO_MODIFIED = (1 << 4), + SGX_SECINFO_PR = (1 << 5), +}; + struct vdso_symtab { Elf64_Sym *elf_symtab; const char *elf_symstrtab; @@ -74,6 +86,15 @@ static bool vdso_get_symtab(void *addr, struct vdso_symtab *symtab) return true; } +static inline int sgx2_supported(void) +{ + unsigned int eax, ebx, ecx, edx; + + __cpuid_count(SGX_CPUID, 0x0, eax, ebx, ecx, edx); + + return eax & 0x2; +} + static unsigned long elf_sym_hash(const char *name) { unsigned long h = 0, high; @@ -110,6 +131,24 @@ static Elf64_Sym *vdso_symtab_get(struct vdso_symtab *symtab, const char *name) } /* + * Return the offset in the enclave where the TCS segment can be found. + * The first RW segment loaded is the TCS. + */ +static off_t encl_get_tcs_offset(struct encl *encl) +{ + int i; + + for (i = 0; i < encl->nr_segments; i++) { + struct encl_segment *seg = &encl->segment_tbl[i]; + + if (i == 0 && seg->prot == (PROT_READ | PROT_WRITE)) + return seg->offset; + } + + return -1; +} + +/* * Return the offset in the enclave where the data segment can be found. * The first RW segment loaded is the TCS, skip that to get info on the * data segment. @@ -339,7 +378,127 @@ TEST_F(enclave, unclobbered_vdso_oversubscribed) EXPECT_EQ(get_op.value, MAGIC); EXPECT_EEXIT(&self->run); EXPECT_EQ(self->run.user_data, 0); +} + +TEST_F_TIMEOUT(enclave, unclobbered_vdso_oversubscribed_remove, 900) +{ + struct sgx_enclave_remove_pages remove_ioc; + struct sgx_enclave_modify_types modt_ioc; + struct encl_op_get_from_buf get_op; + struct encl_op_eaccept eaccept_op; + struct encl_op_put_to_buf put_op; + struct encl_segment *heap; + unsigned long total_mem; + int ret, errno_save; + unsigned long addr; + unsigned long i; + + /* + * Create enclave with additional heap that is as big as all + * available physical SGX memory. + */ + total_mem = get_total_epc_mem(); + ASSERT_NE(total_mem, 0); + TH_LOG("Creating an enclave with %lu bytes heap may take a while ...", + total_mem); + ASSERT_TRUE(setup_test_encl(total_mem, &self->encl, _metadata)); + + /* + * Hardware (SGX2) and kernel support is needed for this test. Start + * with check that test has a chance of succeeding. + */ + memset(&modt_ioc, 0, sizeof(modt_ioc)); + ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_MODIFY_TYPES, &modt_ioc); + + if (ret == -1) { + if (errno == ENOTTY) + SKIP(return, + "Kernel does not support SGX_IOC_ENCLAVE_MODIFY_TYPES ioctl()"); + else if (errno == ENODEV) + SKIP(return, "System does not support SGX2"); + } + + /* + * Invalid parameters were provided during sanity check, + * expect command to fail. + */ + EXPECT_EQ(ret, -1); + + /* SGX2 is supported by kernel and hardware, test can proceed. */ + memset(&self->run, 0, sizeof(self->run)); + self->run.tcs = self->encl.encl_base; + + heap = &self->encl.segment_tbl[self->encl.nr_segments - 1]; + + put_op.header.type = ENCL_OP_PUT_TO_BUFFER; + put_op.value = MAGIC; + + EXPECT_EQ(ENCL_CALL(&put_op, &self->run, false), 0); + + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.user_data, 0); + + get_op.header.type = ENCL_OP_GET_FROM_BUFFER; + get_op.value = 0; + + EXPECT_EQ(ENCL_CALL(&get_op, &self->run, false), 0); + + EXPECT_EQ(get_op.value, MAGIC); + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.user_data, 0); + + /* Trim entire heap. */ + memset(&modt_ioc, 0, sizeof(modt_ioc)); + + modt_ioc.offset = heap->offset; + modt_ioc.length = heap->size; + modt_ioc.page_type = SGX_PAGE_TYPE_TRIM; + + TH_LOG("Changing type of %zd bytes to trimmed may take a while ...", + heap->size); + ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_MODIFY_TYPES, &modt_ioc); + errno_save = ret == -1 ? errno : 0; + + EXPECT_EQ(ret, 0); + EXPECT_EQ(errno_save, 0); + EXPECT_EQ(modt_ioc.result, 0); + EXPECT_EQ(modt_ioc.count, heap->size); + + /* EACCEPT all removed pages. */ + addr = self->encl.encl_base + heap->offset; + + eaccept_op.flags = SGX_SECINFO_TRIM | SGX_SECINFO_MODIFIED; + eaccept_op.header.type = ENCL_OP_EACCEPT; + + TH_LOG("Entering enclave to run EACCEPT for each page of %zd bytes may take a while ...", + heap->size); + for (i = 0; i < heap->size; i += 4096) { + eaccept_op.epc_addr = addr + i; + eaccept_op.ret = 0; + + EXPECT_EQ(ENCL_CALL(&eaccept_op, &self->run, true), 0); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + ASSERT_EQ(eaccept_op.ret, 0); + ASSERT_EQ(self->run.function, EEXIT); + } + + /* Complete page removal. */ + memset(&remove_ioc, 0, sizeof(remove_ioc)); + + remove_ioc.offset = heap->offset; + remove_ioc.length = heap->size; + + TH_LOG("Removing %zd bytes from enclave may take a while ...", + heap->size); + ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_REMOVE_PAGES, &remove_ioc); + errno_save = ret == -1 ? errno : 0; + + EXPECT_EQ(ret, 0); + EXPECT_EQ(errno_save, 0); + EXPECT_EQ(remove_ioc.count, heap->size); } TEST_F(enclave, clobbered_vdso) @@ -555,4 +714,1280 @@ TEST_F(enclave, pte_permissions) EXPECT_EQ(self->run.exception_addr, 0); } +/* + * Modifying permissions of TCS page should not be possible. + */ +TEST_F(enclave, tcs_permissions) +{ + struct sgx_enclave_restrict_permissions ioc; + int ret, errno_save; + + ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata)); + + memset(&self->run, 0, sizeof(self->run)); + self->run.tcs = self->encl.encl_base; + + memset(&ioc, 0, sizeof(ioc)); + + /* + * Ensure kernel supports needed ioctl() and system supports needed + * commands. + */ + + ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_RESTRICT_PERMISSIONS, &ioc); + errno_save = ret == -1 ? errno : 0; + + /* + * Invalid parameters were provided during sanity check, + * expect command to fail. + */ + ASSERT_EQ(ret, -1); + + /* ret == -1 */ + if (errno_save == ENOTTY) + SKIP(return, + "Kernel does not support SGX_IOC_ENCLAVE_RESTRICT_PERMISSIONS ioctl()"); + else if (errno_save == ENODEV) + SKIP(return, "System does not support SGX2"); + + /* + * Attempt to make TCS page read-only. This is not allowed and + * should be prevented by the kernel. + */ + ioc.offset = encl_get_tcs_offset(&self->encl); + ioc.length = PAGE_SIZE; + ioc.permissions = SGX_SECINFO_R; + + ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_RESTRICT_PERMISSIONS, &ioc); + errno_save = ret == -1 ? errno : 0; + + EXPECT_EQ(ret, -1); + EXPECT_EQ(errno_save, EINVAL); + EXPECT_EQ(ioc.result, 0); + EXPECT_EQ(ioc.count, 0); +} + +/* + * Enclave page permission test. + * + * Modify and restore enclave page's EPCM (enclave) permissions from + * outside enclave (ENCLS[EMODPR] via kernel) as well as from within + * enclave (via ENCLU[EMODPE]). Check for page fault if + * VMA allows access but EPCM permissions do not. + */ +TEST_F(enclave, epcm_permissions) +{ + struct sgx_enclave_restrict_permissions restrict_ioc; + struct encl_op_get_from_addr get_addr_op; + struct encl_op_put_to_addr put_addr_op; + struct encl_op_eaccept eaccept_op; + struct encl_op_emodpe emodpe_op; + unsigned long data_start; + int ret, errno_save; + + ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata)); + + memset(&self->run, 0, sizeof(self->run)); + self->run.tcs = self->encl.encl_base; + + /* + * Ensure kernel supports needed ioctl() and system supports needed + * commands. + */ + memset(&restrict_ioc, 0, sizeof(restrict_ioc)); + + ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_RESTRICT_PERMISSIONS, + &restrict_ioc); + errno_save = ret == -1 ? errno : 0; + + /* + * Invalid parameters were provided during sanity check, + * expect command to fail. + */ + ASSERT_EQ(ret, -1); + + /* ret == -1 */ + if (errno_save == ENOTTY) + SKIP(return, + "Kernel does not support SGX_IOC_ENCLAVE_RESTRICT_PERMISSIONS ioctl()"); + else if (errno_save == ENODEV) + SKIP(return, "System does not support SGX2"); + + /* + * Page that will have its permissions changed is the second data + * page in the .data segment. This forms part of the local encl_buffer + * within the enclave. + * + * At start of test @data_start should have EPCM as well as PTE and + * VMA permissions of RW. + */ + + data_start = self->encl.encl_base + + encl_get_data_offset(&self->encl) + PAGE_SIZE; + + /* + * Sanity check that page at @data_start is writable before making + * any changes to page permissions. + * + * Start by writing MAGIC to test page. + */ + put_addr_op.value = MAGIC; + put_addr_op.addr = data_start; + put_addr_op.header.type = ENCL_OP_PUT_TO_ADDRESS; + + EXPECT_EQ(ENCL_CALL(&put_addr_op, &self->run, true), 0); + + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + + /* + * Read memory that was just written to, confirming that + * page is writable. + */ + get_addr_op.value = 0; + get_addr_op.addr = data_start; + get_addr_op.header.type = ENCL_OP_GET_FROM_ADDRESS; + + EXPECT_EQ(ENCL_CALL(&get_addr_op, &self->run, true), 0); + + EXPECT_EQ(get_addr_op.value, MAGIC); + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + + /* + * Change EPCM permissions to read-only. Kernel still considers + * the page writable. + */ + memset(&restrict_ioc, 0, sizeof(restrict_ioc)); + + restrict_ioc.offset = encl_get_data_offset(&self->encl) + PAGE_SIZE; + restrict_ioc.length = PAGE_SIZE; + restrict_ioc.permissions = SGX_SECINFO_R; + + ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_RESTRICT_PERMISSIONS, + &restrict_ioc); + errno_save = ret == -1 ? errno : 0; + + EXPECT_EQ(ret, 0); + EXPECT_EQ(errno_save, 0); + EXPECT_EQ(restrict_ioc.result, 0); + EXPECT_EQ(restrict_ioc.count, 4096); + + /* + * EPCM permissions changed from kernel, need to EACCEPT from enclave. + */ + eaccept_op.epc_addr = data_start; + eaccept_op.flags = SGX_SECINFO_R | SGX_SECINFO_REG | SGX_SECINFO_PR; + eaccept_op.ret = 0; + eaccept_op.header.type = ENCL_OP_EACCEPT; + + EXPECT_EQ(ENCL_CALL(&eaccept_op, &self->run, true), 0); + + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + EXPECT_EQ(eaccept_op.ret, 0); + + /* + * EPCM permissions of page is now read-only, expect #PF + * on EPCM when attempting to write to page from within enclave. + */ + put_addr_op.value = MAGIC2; + + EXPECT_EQ(ENCL_CALL(&put_addr_op, &self->run, true), 0); + + EXPECT_EQ(self->run.function, ERESUME); + EXPECT_EQ(self->run.exception_vector, 14); + EXPECT_EQ(self->run.exception_error_code, 0x8007); + EXPECT_EQ(self->run.exception_addr, data_start); + + self->run.exception_vector = 0; + self->run.exception_error_code = 0; + self->run.exception_addr = 0; + + /* + * Received AEX but cannot return to enclave at same entrypoint, + * need different TCS from where EPCM permission can be made writable + * again. + */ + self->run.tcs = self->encl.encl_base + PAGE_SIZE; + + /* + * Enter enclave at new TCS to change EPCM permissions to be + * writable again and thus fix the page fault that triggered the + * AEX. + */ + + emodpe_op.epc_addr = data_start; + emodpe_op.flags = SGX_SECINFO_R | SGX_SECINFO_W; + emodpe_op.header.type = ENCL_OP_EMODPE; + + EXPECT_EQ(ENCL_CALL(&emodpe_op, &self->run, true), 0); + + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + + /* + * Attempt to return to main TCS to resume execution at faulting + * instruction, PTE should continue to allow writing to the page. + */ + self->run.tcs = self->encl.encl_base; + + /* + * Wrong page permissions that caused original fault has + * now been fixed via EPCM permissions. + * Resume execution in main TCS to re-attempt the memory access. + */ + self->run.tcs = self->encl.encl_base; + + EXPECT_EQ(vdso_sgx_enter_enclave((unsigned long)&put_addr_op, 0, 0, + ERESUME, 0, 0, + &self->run), + 0); + + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + + get_addr_op.value = 0; + + EXPECT_EQ(ENCL_CALL(&get_addr_op, &self->run, true), 0); + + EXPECT_EQ(get_addr_op.value, MAGIC2); + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.user_data, 0); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); +} + +/* + * Test the addition of pages to an initialized enclave via writing to + * a page belonging to the enclave's address space but was not added + * during enclave creation. + */ +TEST_F(enclave, augment) +{ + struct encl_op_get_from_addr get_addr_op; + struct encl_op_put_to_addr put_addr_op; + struct encl_op_eaccept eaccept_op; + size_t total_size = 0; + void *addr; + int i; + + if (!sgx2_supported()) + SKIP(return, "SGX2 not supported"); + + ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata)); + + memset(&self->run, 0, sizeof(self->run)); + self->run.tcs = self->encl.encl_base; + + for (i = 0; i < self->encl.nr_segments; i++) { + struct encl_segment *seg = &self->encl.segment_tbl[i]; + + total_size += seg->size; + } + + /* + * Actual enclave size is expected to be larger than the loaded + * test enclave since enclave size must be a power of 2 in bytes + * and test_encl does not consume it all. + */ + EXPECT_LT(total_size + PAGE_SIZE, self->encl.encl_size); + + /* + * Create memory mapping for the page that will be added. New + * memory mapping is for one page right after all existing + * mappings. + * Kernel will allow new mapping using any permissions if it + * falls into the enclave's address range but not backed + * by existing enclave pages. + */ + addr = mmap((void *)self->encl.encl_base + total_size, PAGE_SIZE, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_SHARED | MAP_FIXED, self->encl.fd, 0); + EXPECT_NE(addr, MAP_FAILED); + + self->run.exception_vector = 0; + self->run.exception_error_code = 0; + self->run.exception_addr = 0; + + /* + * Attempt to write to the new page from within enclave. + * Expected to fail since page is not (yet) part of the enclave. + * The first #PF will trigger the addition of the page to the + * enclave, but since the new page needs an EACCEPT from within the + * enclave before it can be used it would not be possible + * to successfully return to the failing instruction. This is the + * cause of the second #PF captured here having the SGX bit set, + * it is from hardware preventing the page from being used. + */ + put_addr_op.value = MAGIC; + put_addr_op.addr = (unsigned long)addr; + put_addr_op.header.type = ENCL_OP_PUT_TO_ADDRESS; + + EXPECT_EQ(ENCL_CALL(&put_addr_op, &self->run, true), 0); + + EXPECT_EQ(self->run.function, ERESUME); + EXPECT_EQ(self->run.exception_vector, 14); + EXPECT_EQ(self->run.exception_addr, (unsigned long)addr); + + if (self->run.exception_error_code == 0x6) { + munmap(addr, PAGE_SIZE); + SKIP(return, "Kernel does not support adding pages to initialized enclave"); + } + + EXPECT_EQ(self->run.exception_error_code, 0x8007); + + self->run.exception_vector = 0; + self->run.exception_error_code = 0; + self->run.exception_addr = 0; + + /* Handle AEX by running EACCEPT from new entry point. */ + self->run.tcs = self->encl.encl_base + PAGE_SIZE; + + eaccept_op.epc_addr = self->encl.encl_base + total_size; + eaccept_op.flags = SGX_SECINFO_R | SGX_SECINFO_W | SGX_SECINFO_REG | SGX_SECINFO_PENDING; + eaccept_op.ret = 0; + eaccept_op.header.type = ENCL_OP_EACCEPT; + + EXPECT_EQ(ENCL_CALL(&eaccept_op, &self->run, true), 0); + + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + EXPECT_EQ(eaccept_op.ret, 0); + + /* Can now return to main TCS to resume execution. */ + self->run.tcs = self->encl.encl_base; + + EXPECT_EQ(vdso_sgx_enter_enclave((unsigned long)&put_addr_op, 0, 0, + ERESUME, 0, 0, + &self->run), + 0); + + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + + /* + * Read memory from newly added page that was just written to, + * confirming that data previously written (MAGIC) is present. + */ + get_addr_op.value = 0; + get_addr_op.addr = (unsigned long)addr; + get_addr_op.header.type = ENCL_OP_GET_FROM_ADDRESS; + + EXPECT_EQ(ENCL_CALL(&get_addr_op, &self->run, true), 0); + + EXPECT_EQ(get_addr_op.value, MAGIC); + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + + munmap(addr, PAGE_SIZE); +} + +/* + * Test for the addition of pages to an initialized enclave via a + * pre-emptive run of EACCEPT on page to be added. + */ +TEST_F(enclave, augment_via_eaccept) +{ + struct encl_op_get_from_addr get_addr_op; + struct encl_op_put_to_addr put_addr_op; + struct encl_op_eaccept eaccept_op; + size_t total_size = 0; + void *addr; + int i; + + if (!sgx2_supported()) + SKIP(return, "SGX2 not supported"); + + ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata)); + + memset(&self->run, 0, sizeof(self->run)); + self->run.tcs = self->encl.encl_base; + + for (i = 0; i < self->encl.nr_segments; i++) { + struct encl_segment *seg = &self->encl.segment_tbl[i]; + + total_size += seg->size; + } + + /* + * Actual enclave size is expected to be larger than the loaded + * test enclave since enclave size must be a power of 2 in bytes while + * test_encl does not consume it all. + */ + EXPECT_LT(total_size + PAGE_SIZE, self->encl.encl_size); + + /* + * mmap() a page at end of existing enclave to be used for dynamic + * EPC page. + * + * Kernel will allow new mapping using any permissions if it + * falls into the enclave's address range but not backed + * by existing enclave pages. + */ + + addr = mmap((void *)self->encl.encl_base + total_size, PAGE_SIZE, + PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED | MAP_FIXED, + self->encl.fd, 0); + EXPECT_NE(addr, MAP_FAILED); + + self->run.exception_vector = 0; + self->run.exception_error_code = 0; + self->run.exception_addr = 0; + + /* + * Run EACCEPT on new page to trigger the #PF->EAUG->EACCEPT(again + * without a #PF). All should be transparent to userspace. + */ + eaccept_op.epc_addr = self->encl.encl_base + total_size; + eaccept_op.flags = SGX_SECINFO_R | SGX_SECINFO_W | SGX_SECINFO_REG | SGX_SECINFO_PENDING; + eaccept_op.ret = 0; + eaccept_op.header.type = ENCL_OP_EACCEPT; + + EXPECT_EQ(ENCL_CALL(&eaccept_op, &self->run, true), 0); + + if (self->run.exception_vector == 14 && + self->run.exception_error_code == 4 && + self->run.exception_addr == self->encl.encl_base + total_size) { + munmap(addr, PAGE_SIZE); + SKIP(return, "Kernel does not support adding pages to initialized enclave"); + } + + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + EXPECT_EQ(eaccept_op.ret, 0); + + /* + * New page should be accessible from within enclave - attempt to + * write to it. + */ + put_addr_op.value = MAGIC; + put_addr_op.addr = (unsigned long)addr; + put_addr_op.header.type = ENCL_OP_PUT_TO_ADDRESS; + + EXPECT_EQ(ENCL_CALL(&put_addr_op, &self->run, true), 0); + + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + + /* + * Read memory from newly added page that was just written to, + * confirming that data previously written (MAGIC) is present. + */ + get_addr_op.value = 0; + get_addr_op.addr = (unsigned long)addr; + get_addr_op.header.type = ENCL_OP_GET_FROM_ADDRESS; + + EXPECT_EQ(ENCL_CALL(&get_addr_op, &self->run, true), 0); + + EXPECT_EQ(get_addr_op.value, MAGIC); + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + + munmap(addr, PAGE_SIZE); +} + +/* + * SGX2 page type modification test in two phases: + * Phase 1: + * Create a new TCS, consisting out of three new pages (stack page with regular + * page type, SSA page with regular page type, and TCS page with TCS page + * type) in an initialized enclave and run a simple workload within it. + * Phase 2: + * Remove the three pages added in phase 1, add a new regular page at the + * same address that previously hosted the TCS page and verify that it can + * be modified. + */ +TEST_F(enclave, tcs_create) +{ + struct encl_op_init_tcs_page init_tcs_page_op; + struct sgx_enclave_remove_pages remove_ioc; + struct encl_op_get_from_addr get_addr_op; + struct sgx_enclave_modify_types modt_ioc; + struct encl_op_put_to_addr put_addr_op; + struct encl_op_get_from_buf get_buf_op; + struct encl_op_put_to_buf put_buf_op; + void *addr, *tcs, *stack_end, *ssa; + struct encl_op_eaccept eaccept_op; + size_t total_size = 0; + uint64_t val_64; + int errno_save; + int ret, i; + + ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, + _metadata)); + + memset(&self->run, 0, sizeof(self->run)); + self->run.tcs = self->encl.encl_base; + + /* + * Hardware (SGX2) and kernel support is needed for this test. Start + * with check that test has a chance of succeeding. + */ + memset(&modt_ioc, 0, sizeof(modt_ioc)); + ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_MODIFY_TYPES, &modt_ioc); + + if (ret == -1) { + if (errno == ENOTTY) + SKIP(return, + "Kernel does not support SGX_IOC_ENCLAVE_MODIFY_TYPES ioctl()"); + else if (errno == ENODEV) + SKIP(return, "System does not support SGX2"); + } + + /* + * Invalid parameters were provided during sanity check, + * expect command to fail. + */ + EXPECT_EQ(ret, -1); + + /* + * Add three regular pages via EAUG: one will be the TCS stack, one + * will be the TCS SSA, and one will be the new TCS. The stack and + * SSA will remain as regular pages, the TCS page will need its + * type changed after populated with needed data. + */ + for (i = 0; i < self->encl.nr_segments; i++) { + struct encl_segment *seg = &self->encl.segment_tbl[i]; + + total_size += seg->size; + } + + /* + * Actual enclave size is expected to be larger than the loaded + * test enclave since enclave size must be a power of 2 in bytes while + * test_encl does not consume it all. + */ + EXPECT_LT(total_size + 3 * PAGE_SIZE, self->encl.encl_size); + + /* + * mmap() three pages at end of existing enclave to be used for the + * three new pages. + */ + addr = mmap((void *)self->encl.encl_base + total_size, 3 * PAGE_SIZE, + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, + self->encl.fd, 0); + EXPECT_NE(addr, MAP_FAILED); + + self->run.exception_vector = 0; + self->run.exception_error_code = 0; + self->run.exception_addr = 0; + + stack_end = (void *)self->encl.encl_base + total_size; + tcs = (void *)self->encl.encl_base + total_size + PAGE_SIZE; + ssa = (void *)self->encl.encl_base + total_size + 2 * PAGE_SIZE; + + /* + * Run EACCEPT on each new page to trigger the + * EACCEPT->(#PF)->EAUG->EACCEPT(again without a #PF) flow. + */ + + eaccept_op.epc_addr = (unsigned long)stack_end; + eaccept_op.flags = SGX_SECINFO_R | SGX_SECINFO_W | SGX_SECINFO_REG | SGX_SECINFO_PENDING; + eaccept_op.ret = 0; + eaccept_op.header.type = ENCL_OP_EACCEPT; + + EXPECT_EQ(ENCL_CALL(&eaccept_op, &self->run, true), 0); + + if (self->run.exception_vector == 14 && + self->run.exception_error_code == 4 && + self->run.exception_addr == (unsigned long)stack_end) { + munmap(addr, 3 * PAGE_SIZE); + SKIP(return, "Kernel does not support adding pages to initialized enclave"); + } + + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + EXPECT_EQ(eaccept_op.ret, 0); + + eaccept_op.epc_addr = (unsigned long)ssa; + + EXPECT_EQ(ENCL_CALL(&eaccept_op, &self->run, true), 0); + + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + EXPECT_EQ(eaccept_op.ret, 0); + + eaccept_op.epc_addr = (unsigned long)tcs; + + EXPECT_EQ(ENCL_CALL(&eaccept_op, &self->run, true), 0); + + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + EXPECT_EQ(eaccept_op.ret, 0); + + /* + * Three new pages added to enclave. Now populate the TCS page with + * needed data. This should be done from within enclave. Provide + * the function that will do the actual data population with needed + * data. + */ + + /* + * New TCS will use the "encl_dyn_entry" entrypoint that expects + * stack to begin in page before TCS page. + */ + val_64 = encl_get_entry(&self->encl, "encl_dyn_entry"); + EXPECT_NE(val_64, 0); + + init_tcs_page_op.tcs_page = (unsigned long)tcs; + init_tcs_page_op.ssa = (unsigned long)total_size + 2 * PAGE_SIZE; + init_tcs_page_op.entry = val_64; + init_tcs_page_op.header.type = ENCL_OP_INIT_TCS_PAGE; + + EXPECT_EQ(ENCL_CALL(&init_tcs_page_op, &self->run, true), 0); + + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + + /* Change TCS page type to TCS. */ + memset(&modt_ioc, 0, sizeof(modt_ioc)); + + modt_ioc.offset = total_size + PAGE_SIZE; + modt_ioc.length = PAGE_SIZE; + modt_ioc.page_type = SGX_PAGE_TYPE_TCS; + + ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_MODIFY_TYPES, &modt_ioc); + errno_save = ret == -1 ? errno : 0; + + EXPECT_EQ(ret, 0); + EXPECT_EQ(errno_save, 0); + EXPECT_EQ(modt_ioc.result, 0); + EXPECT_EQ(modt_ioc.count, 4096); + + /* EACCEPT new TCS page from enclave. */ + eaccept_op.epc_addr = (unsigned long)tcs; + eaccept_op.flags = SGX_SECINFO_TCS | SGX_SECINFO_MODIFIED; + eaccept_op.ret = 0; + eaccept_op.header.type = ENCL_OP_EACCEPT; + + EXPECT_EQ(ENCL_CALL(&eaccept_op, &self->run, true), 0); + + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + EXPECT_EQ(eaccept_op.ret, 0); + + /* Run workload from new TCS. */ + self->run.tcs = (unsigned long)tcs; + + /* + * Simple workload to write to data buffer and read value back. + */ + put_buf_op.header.type = ENCL_OP_PUT_TO_BUFFER; + put_buf_op.value = MAGIC; + + EXPECT_EQ(ENCL_CALL(&put_buf_op, &self->run, true), 0); + + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + + get_buf_op.header.type = ENCL_OP_GET_FROM_BUFFER; + get_buf_op.value = 0; + + EXPECT_EQ(ENCL_CALL(&get_buf_op, &self->run, true), 0); + + EXPECT_EQ(get_buf_op.value, MAGIC); + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + + /* + * Phase 2 of test: + * Remove pages associated with new TCS, create a regular page + * where TCS page used to be and verify it can be used as a regular + * page. + */ + + /* Start page removal by requesting change of page type to PT_TRIM. */ + memset(&modt_ioc, 0, sizeof(modt_ioc)); + + modt_ioc.offset = total_size; + modt_ioc.length = 3 * PAGE_SIZE; + modt_ioc.page_type = SGX_PAGE_TYPE_TRIM; + + ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_MODIFY_TYPES, &modt_ioc); + errno_save = ret == -1 ? errno : 0; + + EXPECT_EQ(ret, 0); + EXPECT_EQ(errno_save, 0); + EXPECT_EQ(modt_ioc.result, 0); + EXPECT_EQ(modt_ioc.count, 3 * PAGE_SIZE); + + /* + * Enter enclave via TCS #1 and approve page removal by sending + * EACCEPT for each of three removed pages. + */ + self->run.tcs = self->encl.encl_base; + + eaccept_op.epc_addr = (unsigned long)stack_end; + eaccept_op.flags = SGX_SECINFO_TRIM | SGX_SECINFO_MODIFIED; + eaccept_op.ret = 0; + eaccept_op.header.type = ENCL_OP_EACCEPT; + + EXPECT_EQ(ENCL_CALL(&eaccept_op, &self->run, true), 0); + + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + EXPECT_EQ(eaccept_op.ret, 0); + + eaccept_op.epc_addr = (unsigned long)tcs; + eaccept_op.ret = 0; + + EXPECT_EQ(ENCL_CALL(&eaccept_op, &self->run, true), 0); + + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + EXPECT_EQ(eaccept_op.ret, 0); + + eaccept_op.epc_addr = (unsigned long)ssa; + eaccept_op.ret = 0; + + EXPECT_EQ(ENCL_CALL(&eaccept_op, &self->run, true), 0); + + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + EXPECT_EQ(eaccept_op.ret, 0); + + /* Send final ioctl() to complete page removal. */ + memset(&remove_ioc, 0, sizeof(remove_ioc)); + + remove_ioc.offset = total_size; + remove_ioc.length = 3 * PAGE_SIZE; + + ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_REMOVE_PAGES, &remove_ioc); + errno_save = ret == -1 ? errno : 0; + + EXPECT_EQ(ret, 0); + EXPECT_EQ(errno_save, 0); + EXPECT_EQ(remove_ioc.count, 3 * PAGE_SIZE); + + /* + * Enter enclave via TCS #1 and access location where TCS #3 was to + * trigger dynamic add of regular page at that location. + */ + eaccept_op.epc_addr = (unsigned long)tcs; + eaccept_op.flags = SGX_SECINFO_R | SGX_SECINFO_W | SGX_SECINFO_REG | SGX_SECINFO_PENDING; + eaccept_op.ret = 0; + eaccept_op.header.type = ENCL_OP_EACCEPT; + + EXPECT_EQ(ENCL_CALL(&eaccept_op, &self->run, true), 0); + + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + EXPECT_EQ(eaccept_op.ret, 0); + + /* + * New page should be accessible from within enclave - write to it. + */ + put_addr_op.value = MAGIC; + put_addr_op.addr = (unsigned long)tcs; + put_addr_op.header.type = ENCL_OP_PUT_TO_ADDRESS; + + EXPECT_EQ(ENCL_CALL(&put_addr_op, &self->run, true), 0); + + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + + /* + * Read memory from newly added page that was just written to, + * confirming that data previously written (MAGIC) is present. + */ + get_addr_op.value = 0; + get_addr_op.addr = (unsigned long)tcs; + get_addr_op.header.type = ENCL_OP_GET_FROM_ADDRESS; + + EXPECT_EQ(ENCL_CALL(&get_addr_op, &self->run, true), 0); + + EXPECT_EQ(get_addr_op.value, MAGIC); + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + + munmap(addr, 3 * PAGE_SIZE); +} + +/* + * Ensure sane behavior if user requests page removal, does not run + * EACCEPT from within enclave but still attempts to finalize page removal + * with the SGX_IOC_ENCLAVE_REMOVE_PAGES ioctl(). The latter should fail + * because the removal was not EACCEPTed from within the enclave. + */ +TEST_F(enclave, remove_added_page_no_eaccept) +{ + struct sgx_enclave_remove_pages remove_ioc; + struct encl_op_get_from_addr get_addr_op; + struct sgx_enclave_modify_types modt_ioc; + struct encl_op_put_to_addr put_addr_op; + unsigned long data_start; + int ret, errno_save; + + ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata)); + + memset(&self->run, 0, sizeof(self->run)); + self->run.tcs = self->encl.encl_base; + + /* + * Hardware (SGX2) and kernel support is needed for this test. Start + * with check that test has a chance of succeeding. + */ + memset(&modt_ioc, 0, sizeof(modt_ioc)); + ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_MODIFY_TYPES, &modt_ioc); + + if (ret == -1) { + if (errno == ENOTTY) + SKIP(return, + "Kernel does not support SGX_IOC_ENCLAVE_MODIFY_TYPES ioctl()"); + else if (errno == ENODEV) + SKIP(return, "System does not support SGX2"); + } + + /* + * Invalid parameters were provided during sanity check, + * expect command to fail. + */ + EXPECT_EQ(ret, -1); + + /* + * Page that will be removed is the second data page in the .data + * segment. This forms part of the local encl_buffer within the + * enclave. + */ + data_start = self->encl.encl_base + + encl_get_data_offset(&self->encl) + PAGE_SIZE; + + /* + * Sanity check that page at @data_start is writable before + * removing it. + * + * Start by writing MAGIC to test page. + */ + put_addr_op.value = MAGIC; + put_addr_op.addr = data_start; + put_addr_op.header.type = ENCL_OP_PUT_TO_ADDRESS; + + EXPECT_EQ(ENCL_CALL(&put_addr_op, &self->run, true), 0); + + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + + /* + * Read memory that was just written to, confirming that data + * previously written (MAGIC) is present. + */ + get_addr_op.value = 0; + get_addr_op.addr = data_start; + get_addr_op.header.type = ENCL_OP_GET_FROM_ADDRESS; + + EXPECT_EQ(ENCL_CALL(&get_addr_op, &self->run, true), 0); + + EXPECT_EQ(get_addr_op.value, MAGIC); + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + + /* Start page removal by requesting change of page type to PT_TRIM */ + memset(&modt_ioc, 0, sizeof(modt_ioc)); + + modt_ioc.offset = encl_get_data_offset(&self->encl) + PAGE_SIZE; + modt_ioc.length = PAGE_SIZE; + modt_ioc.page_type = SGX_PAGE_TYPE_TRIM; + + ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_MODIFY_TYPES, &modt_ioc); + errno_save = ret == -1 ? errno : 0; + + EXPECT_EQ(ret, 0); + EXPECT_EQ(errno_save, 0); + EXPECT_EQ(modt_ioc.result, 0); + EXPECT_EQ(modt_ioc.count, 4096); + + /* Skip EACCEPT */ + + /* Send final ioctl() to complete page removal */ + memset(&remove_ioc, 0, sizeof(remove_ioc)); + + remove_ioc.offset = encl_get_data_offset(&self->encl) + PAGE_SIZE; + remove_ioc.length = PAGE_SIZE; + + ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_REMOVE_PAGES, &remove_ioc); + errno_save = ret == -1 ? errno : 0; + + /* Operation not permitted since EACCEPT was omitted. */ + EXPECT_EQ(ret, -1); + EXPECT_EQ(errno_save, EPERM); + EXPECT_EQ(remove_ioc.count, 0); +} + +/* + * Request enclave page removal but instead of correctly following with + * EACCEPT a read attempt to page is made from within the enclave. + */ +TEST_F(enclave, remove_added_page_invalid_access) +{ + struct encl_op_get_from_addr get_addr_op; + struct encl_op_put_to_addr put_addr_op; + struct sgx_enclave_modify_types ioc; + unsigned long data_start; + int ret, errno_save; + + ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata)); + + memset(&self->run, 0, sizeof(self->run)); + self->run.tcs = self->encl.encl_base; + + /* + * Hardware (SGX2) and kernel support is needed for this test. Start + * with check that test has a chance of succeeding. + */ + memset(&ioc, 0, sizeof(ioc)); + ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_MODIFY_TYPES, &ioc); + + if (ret == -1) { + if (errno == ENOTTY) + SKIP(return, + "Kernel does not support SGX_IOC_ENCLAVE_MODIFY_TYPES ioctl()"); + else if (errno == ENODEV) + SKIP(return, "System does not support SGX2"); + } + + /* + * Invalid parameters were provided during sanity check, + * expect command to fail. + */ + EXPECT_EQ(ret, -1); + + /* + * Page that will be removed is the second data page in the .data + * segment. This forms part of the local encl_buffer within the + * enclave. + */ + data_start = self->encl.encl_base + + encl_get_data_offset(&self->encl) + PAGE_SIZE; + + /* + * Sanity check that page at @data_start is writable before + * removing it. + * + * Start by writing MAGIC to test page. + */ + put_addr_op.value = MAGIC; + put_addr_op.addr = data_start; + put_addr_op.header.type = ENCL_OP_PUT_TO_ADDRESS; + + EXPECT_EQ(ENCL_CALL(&put_addr_op, &self->run, true), 0); + + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + + /* + * Read memory that was just written to, confirming that data + * previously written (MAGIC) is present. + */ + get_addr_op.value = 0; + get_addr_op.addr = data_start; + get_addr_op.header.type = ENCL_OP_GET_FROM_ADDRESS; + + EXPECT_EQ(ENCL_CALL(&get_addr_op, &self->run, true), 0); + + EXPECT_EQ(get_addr_op.value, MAGIC); + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + + /* Start page removal by requesting change of page type to PT_TRIM. */ + memset(&ioc, 0, sizeof(ioc)); + + ioc.offset = encl_get_data_offset(&self->encl) + PAGE_SIZE; + ioc.length = PAGE_SIZE; + ioc.page_type = SGX_PAGE_TYPE_TRIM; + + ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_MODIFY_TYPES, &ioc); + errno_save = ret == -1 ? errno : 0; + + EXPECT_EQ(ret, 0); + EXPECT_EQ(errno_save, 0); + EXPECT_EQ(ioc.result, 0); + EXPECT_EQ(ioc.count, 4096); + + /* + * Read from page that was just removed. + */ + get_addr_op.value = 0; + + EXPECT_EQ(ENCL_CALL(&get_addr_op, &self->run, true), 0); + + /* + * From kernel perspective the page is present but according to SGX the + * page should not be accessible so a #PF with SGX bit set is + * expected. + */ + + EXPECT_EQ(self->run.function, ERESUME); + EXPECT_EQ(self->run.exception_vector, 14); + EXPECT_EQ(self->run.exception_error_code, 0x8005); + EXPECT_EQ(self->run.exception_addr, data_start); +} + +/* + * Request enclave page removal and correctly follow with + * EACCEPT but do not follow with removal ioctl() but instead a read attempt + * to removed page is made from within the enclave. + */ +TEST_F(enclave, remove_added_page_invalid_access_after_eaccept) +{ + struct encl_op_get_from_addr get_addr_op; + struct encl_op_put_to_addr put_addr_op; + struct sgx_enclave_modify_types ioc; + struct encl_op_eaccept eaccept_op; + unsigned long data_start; + int ret, errno_save; + + ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata)); + + memset(&self->run, 0, sizeof(self->run)); + self->run.tcs = self->encl.encl_base; + + /* + * Hardware (SGX2) and kernel support is needed for this test. Start + * with check that test has a chance of succeeding. + */ + memset(&ioc, 0, sizeof(ioc)); + ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_MODIFY_TYPES, &ioc); + + if (ret == -1) { + if (errno == ENOTTY) + SKIP(return, + "Kernel does not support SGX_IOC_ENCLAVE_MODIFY_TYPES ioctl()"); + else if (errno == ENODEV) + SKIP(return, "System does not support SGX2"); + } + + /* + * Invalid parameters were provided during sanity check, + * expect command to fail. + */ + EXPECT_EQ(ret, -1); + + /* + * Page that will be removed is the second data page in the .data + * segment. This forms part of the local encl_buffer within the + * enclave. + */ + data_start = self->encl.encl_base + + encl_get_data_offset(&self->encl) + PAGE_SIZE; + + /* + * Sanity check that page at @data_start is writable before + * removing it. + * + * Start by writing MAGIC to test page. + */ + put_addr_op.value = MAGIC; + put_addr_op.addr = data_start; + put_addr_op.header.type = ENCL_OP_PUT_TO_ADDRESS; + + EXPECT_EQ(ENCL_CALL(&put_addr_op, &self->run, true), 0); + + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + + /* + * Read memory that was just written to, confirming that data + * previously written (MAGIC) is present. + */ + get_addr_op.value = 0; + get_addr_op.addr = data_start; + get_addr_op.header.type = ENCL_OP_GET_FROM_ADDRESS; + + EXPECT_EQ(ENCL_CALL(&get_addr_op, &self->run, true), 0); + + EXPECT_EQ(get_addr_op.value, MAGIC); + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + + /* Start page removal by requesting change of page type to PT_TRIM. */ + memset(&ioc, 0, sizeof(ioc)); + + ioc.offset = encl_get_data_offset(&self->encl) + PAGE_SIZE; + ioc.length = PAGE_SIZE; + ioc.page_type = SGX_PAGE_TYPE_TRIM; + + ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_MODIFY_TYPES, &ioc); + errno_save = ret == -1 ? errno : 0; + + EXPECT_EQ(ret, 0); + EXPECT_EQ(errno_save, 0); + EXPECT_EQ(ioc.result, 0); + EXPECT_EQ(ioc.count, 4096); + + eaccept_op.epc_addr = (unsigned long)data_start; + eaccept_op.ret = 0; + eaccept_op.flags = SGX_SECINFO_TRIM | SGX_SECINFO_MODIFIED; + eaccept_op.header.type = ENCL_OP_EACCEPT; + + EXPECT_EQ(ENCL_CALL(&eaccept_op, &self->run, true), 0); + + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + EXPECT_EQ(eaccept_op.ret, 0); + + /* Skip ioctl() to remove page. */ + + /* + * Read from page that was just removed. + */ + get_addr_op.value = 0; + + EXPECT_EQ(ENCL_CALL(&get_addr_op, &self->run, true), 0); + + /* + * From kernel perspective the page is present but according to SGX the + * page should not be accessible so a #PF with SGX bit set is + * expected. + */ + + EXPECT_EQ(self->run.function, ERESUME); + EXPECT_EQ(self->run.exception_vector, 14); + EXPECT_EQ(self->run.exception_error_code, 0x8005); + EXPECT_EQ(self->run.exception_addr, data_start); +} + +TEST_F(enclave, remove_untouched_page) +{ + struct sgx_enclave_remove_pages remove_ioc; + struct sgx_enclave_modify_types modt_ioc; + struct encl_op_eaccept eaccept_op; + unsigned long data_start; + int ret, errno_save; + + ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata)); + + /* + * Hardware (SGX2) and kernel support is needed for this test. Start + * with check that test has a chance of succeeding. + */ + memset(&modt_ioc, 0, sizeof(modt_ioc)); + ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_MODIFY_TYPES, &modt_ioc); + + if (ret == -1) { + if (errno == ENOTTY) + SKIP(return, + "Kernel does not support SGX_IOC_ENCLAVE_MODIFY_TYPES ioctl()"); + else if (errno == ENODEV) + SKIP(return, "System does not support SGX2"); + } + + /* + * Invalid parameters were provided during sanity check, + * expect command to fail. + */ + EXPECT_EQ(ret, -1); + + /* SGX2 is supported by kernel and hardware, test can proceed. */ + memset(&self->run, 0, sizeof(self->run)); + self->run.tcs = self->encl.encl_base; + + data_start = self->encl.encl_base + + encl_get_data_offset(&self->encl) + PAGE_SIZE; + + memset(&modt_ioc, 0, sizeof(modt_ioc)); + + modt_ioc.offset = encl_get_data_offset(&self->encl) + PAGE_SIZE; + modt_ioc.length = PAGE_SIZE; + modt_ioc.page_type = SGX_PAGE_TYPE_TRIM; + ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_MODIFY_TYPES, &modt_ioc); + errno_save = ret == -1 ? errno : 0; + + EXPECT_EQ(ret, 0); + EXPECT_EQ(errno_save, 0); + EXPECT_EQ(modt_ioc.result, 0); + EXPECT_EQ(modt_ioc.count, 4096); + + /* + * Enter enclave via TCS #1 and approve page removal by sending + * EACCEPT for removed page. + */ + + eaccept_op.epc_addr = data_start; + eaccept_op.flags = SGX_SECINFO_TRIM | SGX_SECINFO_MODIFIED; + eaccept_op.ret = 0; + eaccept_op.header.type = ENCL_OP_EACCEPT; + + EXPECT_EQ(ENCL_CALL(&eaccept_op, &self->run, true), 0); + EXPECT_EEXIT(&self->run); + EXPECT_EQ(self->run.exception_vector, 0); + EXPECT_EQ(self->run.exception_error_code, 0); + EXPECT_EQ(self->run.exception_addr, 0); + EXPECT_EQ(eaccept_op.ret, 0); + + memset(&remove_ioc, 0, sizeof(remove_ioc)); + + remove_ioc.offset = encl_get_data_offset(&self->encl) + PAGE_SIZE; + remove_ioc.length = PAGE_SIZE; + ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_REMOVE_PAGES, &remove_ioc); + errno_save = ret == -1 ? errno : 0; + + EXPECT_EQ(ret, 0); + EXPECT_EQ(errno_save, 0); + EXPECT_EQ(remove_ioc.count, 4096); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/sgx/main.h b/tools/testing/selftests/sgx/main.h index b45c52ec7ab3..fc585be97e2f 100644 --- a/tools/testing/selftests/sgx/main.h +++ b/tools/testing/selftests/sgx/main.h @@ -38,6 +38,7 @@ void encl_delete(struct encl *ctx); bool encl_load(const char *path, struct encl *encl, unsigned long heap_size); bool encl_measure(struct encl *encl); bool encl_build(struct encl *encl); +uint64_t encl_get_entry(struct encl *encl, const char *symbol); int sgx_enter_enclave(void *rdi, void *rsi, long rdx, u32 function, void *r8, void *r9, struct sgx_enclave_run *run); diff --git a/tools/testing/selftests/sgx/test_encl.c b/tools/testing/selftests/sgx/test_encl.c index 4fca01cfd898..c0d6397295e3 100644 --- a/tools/testing/selftests/sgx/test_encl.c +++ b/tools/testing/selftests/sgx/test_encl.c @@ -11,6 +11,42 @@ */ static uint8_t encl_buffer[8192] = { 1 }; +enum sgx_enclu_function { + EACCEPT = 0x5, + EMODPE = 0x6, +}; + +static void do_encl_emodpe(void *_op) +{ + struct sgx_secinfo secinfo __aligned(sizeof(struct sgx_secinfo)) = {0}; + struct encl_op_emodpe *op = _op; + + secinfo.flags = op->flags; + + asm volatile(".byte 0x0f, 0x01, 0xd7" + : + : "a" (EMODPE), + "b" (&secinfo), + "c" (op->epc_addr)); +} + +static void do_encl_eaccept(void *_op) +{ + struct sgx_secinfo secinfo __aligned(sizeof(struct sgx_secinfo)) = {0}; + struct encl_op_eaccept *op = _op; + int rax; + + secinfo.flags = op->flags; + + asm volatile(".byte 0x0f, 0x01, 0xd7" + : "=a" (rax) + : "a" (EACCEPT), + "b" (&secinfo), + "c" (op->epc_addr)); + + op->ret = rax; +} + static void *memcpy(void *dest, const void *src, size_t n) { size_t i; @@ -21,6 +57,35 @@ static void *memcpy(void *dest, const void *src, size_t n) return dest; } +static void *memset(void *dest, int c, size_t n) +{ + size_t i; + + for (i = 0; i < n; i++) + ((char *)dest)[i] = c; + + return dest; +} + +static void do_encl_init_tcs_page(void *_op) +{ + struct encl_op_init_tcs_page *op = _op; + void *tcs = (void *)op->tcs_page; + uint32_t val_32; + + memset(tcs, 0, 16); /* STATE and FLAGS */ + memcpy(tcs + 16, &op->ssa, 8); /* OSSA */ + memset(tcs + 24, 0, 4); /* CSSA */ + val_32 = 1; + memcpy(tcs + 28, &val_32, 4); /* NSSA */ + memcpy(tcs + 32, &op->entry, 8); /* OENTRY */ + memset(tcs + 40, 0, 24); /* AEP, OFSBASE, OGSBASE */ + val_32 = 0xFFFFFFFF; + memcpy(tcs + 64, &val_32, 4); /* FSLIMIT */ + memcpy(tcs + 68, &val_32, 4); /* GSLIMIT */ + memset(tcs + 72, 0, 4024); /* Reserved */ +} + static void do_encl_op_put_to_buf(void *op) { struct encl_op_put_to_buf *op2 = op; @@ -62,6 +127,9 @@ void encl_body(void *rdi, void *rsi) do_encl_op_put_to_addr, do_encl_op_get_from_addr, do_encl_op_nop, + do_encl_eaccept, + do_encl_emodpe, + do_encl_init_tcs_page, }; struct encl_op_header *op = (struct encl_op_header *)rdi; diff --git a/tools/testing/selftests/sgx/test_encl_bootstrap.S b/tools/testing/selftests/sgx/test_encl_bootstrap.S index 82fb0dfcbd23..03ae0f57e29d 100644 --- a/tools/testing/selftests/sgx/test_encl_bootstrap.S +++ b/tools/testing/selftests/sgx/test_encl_bootstrap.S @@ -45,6 +45,12 @@ encl_entry: # TCS #2. By adding the value of encl_stack to it, we get # the absolute address for the stack. lea (encl_stack)(%rbx), %rax + jmp encl_entry_core +encl_dyn_entry: + # Entry point for dynamically created TCS page expected to follow + # its stack directly. + lea -1(%rbx), %rax +encl_entry_core: xchg %rsp, %rax push %rax diff --git a/tools/testing/selftests/sync/config b/tools/testing/selftests/sync/config index 47ff5afc3727..64c60f38b446 100644 --- a/tools/testing/selftests/sync/config +++ b/tools/testing/selftests/sync/config @@ -1,3 +1,2 @@ CONFIG_STAGING=y -CONFIG_ANDROID=y CONFIG_SW_SYNC=y diff --git a/tools/testing/selftests/tc-testing/.gitignore b/tools/testing/selftests/tc-testing/.gitignore index d52f65de23b4..9fe1cef72728 100644 --- a/tools/testing/selftests/tc-testing/.gitignore +++ b/tools/testing/selftests/tc-testing/.gitignore @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only __pycache__/ *.pyc -plugins/ *.xml *.tap tdc_config_local.py diff --git a/tools/testing/selftests/tc-testing/Makefile b/tools/testing/selftests/tc-testing/Makefile index 4d639279f41e..cb553eac9f41 100644 --- a/tools/testing/selftests/tc-testing/Makefile +++ b/tools/testing/selftests/tc-testing/Makefile @@ -5,7 +5,6 @@ top_srcdir = $(abspath ../../../..) APIDIR := $(top_scrdir)/include/uapi TEST_GEN_FILES = action.o -KSFT_KHDR_INSTALL := 1 include ../lib.mk PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1) diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json index b24494c6f546..c652e8c1157d 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json @@ -609,5 +609,82 @@ "teardown": [ "$TC actions flush action gact" ] + }, + { + "id": "7f52", + "name": "Try to flush action which is referenced by filter", + "category": [ + "actions", + "gact" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ], + "$TC qdisc add dev $DEV1 ingress", + "$TC actions add action pass index 1", + "$TC filter add dev $DEV1 protocol all ingress prio 1 handle 0x1234 matchall action gact index 1" + ], + "cmdUnderTest": "$TC actions flush action gact", + "expExitCode": "1", + "verifyCmd": "$TC actions ls action gact", + "matchPattern": "total acts 1.*action order [0-9]*: gact action pass.*index 1 ref 2 bind 1", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress", + [ + "sleep 1; $TC actions flush action gact", + 0, + 1 + ] + ] + }, + { + "id": "ae1e", + "name": "Try to flush actions when last one is referenced by filter", + "category": [ + "actions", + "gact" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ], + "$TC qdisc add dev $DEV1 ingress", + [ + "$TC actions add action pass index 1", + 0, + 1, + 255 + ], + "$TC actions add action reclassify index 2", + "$TC actions add action drop index 3", + "$TC filter add dev $DEV1 protocol all ingress prio 1 handle 0x1234 matchall action gact index 3" + ], + "cmdUnderTest": "$TC actions flush action gact", + "expExitCode": "0", + "verifyCmd": "$TC actions ls action gact", + "matchPattern": "total acts 1.*action order [0-9]*: gact action drop.*index 3 ref 2 bind 1", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress", + [ + "sleep 1; $TC actions flush action gact", + 0, + 1 + ] + ] } ] diff --git a/tools/testing/selftests/timens/Makefile b/tools/testing/selftests/timens/Makefile index 3a5936cc10ab..f0d51d4d2c87 100644 --- a/tools/testing/selftests/timens/Makefile +++ b/tools/testing/selftests/timens/Makefile @@ -1,4 +1,4 @@ -TEST_GEN_PROGS := timens timerfd timer clock_nanosleep procfs exec futex +TEST_GEN_PROGS := timens timerfd timer clock_nanosleep procfs exec futex vfork_exec TEST_GEN_PROGS_EXTENDED := gettime_perf CFLAGS := -Wall -Werror -pthread diff --git a/tools/testing/selftests/timens/vfork_exec.c b/tools/testing/selftests/timens/vfork_exec.c new file mode 100644 index 000000000000..e6ccd900f30a --- /dev/null +++ b/tools/testing/selftests/timens/vfork_exec.c @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <sched.h> +#include <stdio.h> +#include <stdbool.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <time.h> +#include <unistd.h> +#include <string.h> + +#include "log.h" +#include "timens.h" + +#define OFFSET (36000) + +int main(int argc, char *argv[]) +{ + struct timespec now, tst; + int status, i; + pid_t pid; + + if (argc > 1) { + if (sscanf(argv[1], "%ld", &now.tv_sec) != 1) + return pr_perror("sscanf"); + + for (i = 0; i < 2; i++) { + _gettime(CLOCK_MONOTONIC, &tst, i); + if (abs(tst.tv_sec - now.tv_sec) > 5) + return pr_fail("%ld %ld\n", now.tv_sec, tst.tv_sec); + } + return 0; + } + + nscheck(); + + ksft_set_plan(1); + + clock_gettime(CLOCK_MONOTONIC, &now); + + if (unshare_timens()) + return 1; + + if (_settime(CLOCK_MONOTONIC, OFFSET)) + return 1; + + for (i = 0; i < 2; i++) { + _gettime(CLOCK_MONOTONIC, &tst, i); + if (abs(tst.tv_sec - now.tv_sec) > 5) + return pr_fail("%ld %ld\n", + now.tv_sec, tst.tv_sec); + } + + pid = vfork(); + if (pid < 0) + return pr_perror("fork"); + + if (pid == 0) { + char now_str[64]; + char *cargv[] = {"exec", now_str, NULL}; + char *cenv[] = {NULL}; + + // Check that we are still in the source timens. + for (i = 0; i < 2; i++) { + _gettime(CLOCK_MONOTONIC, &tst, i); + if (abs(tst.tv_sec - now.tv_sec) > 5) + return pr_fail("%ld %ld\n", + now.tv_sec, tst.tv_sec); + } + + /* Check for proper vvar offsets after execve. */ + snprintf(now_str, sizeof(now_str), "%ld", now.tv_sec + OFFSET); + execve("/proc/self/exe", cargv, cenv); + return pr_perror("execve"); + } + + if (waitpid(pid, &status, 0) != pid) + return pr_perror("waitpid"); + + if (status) + ksft_exit_fail(); + + ksft_test_result_pass("exec\n"); + ksft_exit_pass(); + return 0; +} diff --git a/tools/testing/selftests/timers/adjtick.c b/tools/testing/selftests/timers/adjtick.c index 54d8d87f36b3..47e05fdc32c5 100644 --- a/tools/testing/selftests/timers/adjtick.c +++ b/tools/testing/selftests/timers/adjtick.c @@ -165,7 +165,7 @@ int check_tick_adj(long tickval) return 0; } -int main(int argv, char **argc) +int main(int argc, char **argv) { struct timespec raw; long tick, max, interval, err; diff --git a/tools/testing/selftests/timers/alarmtimer-suspend.c b/tools/testing/selftests/timers/alarmtimer-suspend.c index 54da4b088f4c..4332b494103d 100644 --- a/tools/testing/selftests/timers/alarmtimer-suspend.c +++ b/tools/testing/selftests/timers/alarmtimer-suspend.c @@ -92,7 +92,7 @@ long long timespec_sub(struct timespec a, struct timespec b) return ret; } -int final_ret = 0; +int final_ret; void sigalarm(int signo) { diff --git a/tools/testing/selftests/timers/change_skew.c b/tools/testing/selftests/timers/change_skew.c index c4eab7124990..992a77f2a74c 100644 --- a/tools/testing/selftests/timers/change_skew.c +++ b/tools/testing/selftests/timers/change_skew.c @@ -55,7 +55,7 @@ int change_skew_test(int ppm) } -int main(int argv, char **argc) +int main(int argc, char **argv) { struct timex tx; int i, ret; diff --git a/tools/testing/selftests/timers/clocksource-switch.c b/tools/testing/selftests/timers/clocksource-switch.c index ef8eb3604595..c5264594064c 100644 --- a/tools/testing/selftests/timers/clocksource-switch.c +++ b/tools/testing/selftests/timers/clocksource-switch.c @@ -23,17 +23,17 @@ */ +#include <fcntl.h> #include <stdio.h> -#include <unistd.h> #include <stdlib.h> +#include <string.h> +#include <sys/stat.h> #include <sys/time.h> #include <sys/timex.h> -#include <time.h> #include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> -#include <string.h> #include <sys/wait.h> +#include <time.h> +#include <unistd.h> #include "../kselftest.h" @@ -110,21 +110,40 @@ int run_tests(int secs) sprintf(buf, "./inconsistency-check -t %i", secs); ret = system(buf); - if (ret) - return ret; + if (WIFEXITED(ret) && WEXITSTATUS(ret)) + return WEXITSTATUS(ret); ret = system("./nanosleep"); - return ret; + return WIFEXITED(ret) ? WEXITSTATUS(ret) : 0; } char clocksource_list[10][30]; -int main(int argv, char **argc) +int main(int argc, char **argv) { char orig_clk[512]; - int count, i, status; + int count, i, status, opt; + int do_sanity_check = 1; + int runtime = 60; pid_t pid; + /* Process arguments */ + while ((opt = getopt(argc, argv, "st:")) != -1) { + switch (opt) { + case 's': + do_sanity_check = 0; + break; + case 't': + runtime = atoi(optarg); + break; + default: + printf("Usage: %s [-s] [-t <secs>]\n", argv[0]); + printf(" -s: skip sanity checks\n"); + printf(" -t: Number of seconds to run\n"); + exit(-1); + } + } + get_cur_clocksource(orig_clk, 512); count = get_clocksources(clocksource_list); @@ -135,23 +154,25 @@ int main(int argv, char **argc) } /* Check everything is sane before we start switching asynchronously */ - for (i = 0; i < count; i++) { - printf("Validating clocksource %s\n", clocksource_list[i]); - if (change_clocksource(clocksource_list[i])) { - status = -1; - goto out; - } - if (run_tests(5)) { - status = -1; - goto out; + if (do_sanity_check) { + for (i = 0; i < count; i++) { + printf("Validating clocksource %s\n", + clocksource_list[i]); + if (change_clocksource(clocksource_list[i])) { + status = -1; + goto out; + } + if (run_tests(5)) { + status = -1; + goto out; + } } } - printf("Running Asynchronous Switching Tests...\n"); pid = fork(); if (!pid) - return run_tests(60); + return run_tests(runtime); while (pid != waitpid(pid, &status, WNOHANG)) for (i = 0; i < count; i++) @@ -162,7 +183,9 @@ int main(int argv, char **argc) out: change_clocksource(orig_clk); - if (status) - return ksft_exit_fail(); - return ksft_exit_pass(); + /* Print at the end to not mix output with child process */ + ksft_print_header(); + ksft_set_plan(1); + ksft_test_result(!status, "clocksource-switch\n"); + ksft_exit(!status); } diff --git a/tools/testing/selftests/timers/inconsistency-check.c b/tools/testing/selftests/timers/inconsistency-check.c index e6756d9c60a7..36a49fba6c9b 100644 --- a/tools/testing/selftests/timers/inconsistency-check.c +++ b/tools/testing/selftests/timers/inconsistency-check.c @@ -122,30 +122,28 @@ int consistency_test(int clock_type, unsigned long seconds) if (inconsistent >= 0) { unsigned long long delta; - printf("\%s\n", start_str); + ksft_print_msg("\%s\n", start_str); for (i = 0; i < CALLS_PER_LOOP; i++) { if (i == inconsistent) - printf("--------------------\n"); - printf("%lu:%lu\n", list[i].tv_sec, + ksft_print_msg("--------------------\n"); + ksft_print_msg("%lu:%lu\n", list[i].tv_sec, list[i].tv_nsec); if (i == inconsistent + 1) - printf("--------------------\n"); + ksft_print_msg("--------------------\n"); } delta = list[inconsistent].tv_sec * NSEC_PER_SEC; delta += list[inconsistent].tv_nsec; delta -= list[inconsistent+1].tv_sec * NSEC_PER_SEC; delta -= list[inconsistent+1].tv_nsec; - printf("Delta: %llu ns\n", delta); + ksft_print_msg("Delta: %llu ns\n", delta); fflush(0); /* timestamp inconsistency*/ t = time(0); - printf("%s\n", ctime(&t)); - printf("[FAILED]\n"); + ksft_print_msg("%s\n", ctime(&t)); return -1; } now = list[0].tv_sec; } - printf("[OK]\n"); return 0; } @@ -178,16 +176,22 @@ int main(int argc, char *argv[]) setbuf(stdout, NULL); + ksft_print_header(); + ksft_set_plan(maxclocks - userclock); + for (clockid = userclock; clockid < maxclocks; clockid++) { - if (clockid == CLOCK_HWSPECIFIC) + if (clockid == CLOCK_HWSPECIFIC || clock_gettime(clockid, &ts)) { + ksft_test_result_skip("%-31s\n", clockstring(clockid)); continue; + } - if (!clock_gettime(clockid, &ts)) { - printf("Consistent %-30s ", clockstring(clockid)); - if (consistency_test(clockid, runtime)) - return ksft_exit_fail(); + if (consistency_test(clockid, runtime)) { + ksft_test_result_fail("%-31s\n", clockstring(clockid)); + ksft_exit_fail(); + } else { + ksft_test_result_pass("%-31s\n", clockstring(clockid)); } } - return ksft_exit_pass(); + ksft_exit_pass(); } diff --git a/tools/testing/selftests/timers/nanosleep.c b/tools/testing/selftests/timers/nanosleep.c index 71b5441c2fd9..df1d03516e7b 100644 --- a/tools/testing/selftests/timers/nanosleep.c +++ b/tools/testing/selftests/timers/nanosleep.c @@ -133,33 +133,37 @@ int main(int argc, char **argv) long long length; int clockid, ret; + ksft_print_header(); + ksft_set_plan(NR_CLOCKIDS); + for (clockid = CLOCK_REALTIME; clockid < NR_CLOCKIDS; clockid++) { /* Skip cputime clockids since nanosleep won't increment cputime */ if (clockid == CLOCK_PROCESS_CPUTIME_ID || clockid == CLOCK_THREAD_CPUTIME_ID || - clockid == CLOCK_HWSPECIFIC) + clockid == CLOCK_HWSPECIFIC) { + ksft_test_result_skip("%-31s\n", clockstring(clockid)); continue; + } - printf("Nanosleep %-31s ", clockstring(clockid)); fflush(stdout); length = 10; while (length <= (NSEC_PER_SEC * 10)) { ret = nanosleep_test(clockid, length); if (ret == UNSUPPORTED) { - printf("[UNSUPPORTED]\n"); + ksft_test_result_skip("%-31s\n", clockstring(clockid)); goto next; } if (ret < 0) { - printf("[FAILED]\n"); - return ksft_exit_fail(); + ksft_test_result_fail("%-31s\n", clockstring(clockid)); + ksft_exit_fail(); } length *= 100; } - printf("[OK]\n"); + ksft_test_result_pass("%-31s\n", clockstring(clockid)); next: ret = 0; } - return ksft_exit_pass(); + ksft_exit_pass(); } diff --git a/tools/testing/selftests/timers/raw_skew.c b/tools/testing/selftests/timers/raw_skew.c index b41d8dd0c40c..5beceeed0d11 100644 --- a/tools/testing/selftests/timers/raw_skew.c +++ b/tools/testing/selftests/timers/raw_skew.c @@ -89,7 +89,7 @@ void get_monotonic_and_raw(struct timespec *mon, struct timespec *raw) } } -int main(int argv, char **argc) +int main(int argc, char **argv) { struct timespec mon, raw, start, end; long long delta1, delta2, interval, eppm, ppm; diff --git a/tools/testing/selftests/timers/skew_consistency.c b/tools/testing/selftests/timers/skew_consistency.c index 8066be9aff11..63913f75b384 100644 --- a/tools/testing/selftests/timers/skew_consistency.c +++ b/tools/testing/selftests/timers/skew_consistency.c @@ -38,7 +38,7 @@ #define NSEC_PER_SEC 1000000000LL -int main(int argv, char **argc) +int main(int argc, char **argv) { struct timex tx; int ret, ppm; diff --git a/tools/testing/selftests/timers/valid-adjtimex.c b/tools/testing/selftests/timers/valid-adjtimex.c index 5397de708d3c..48b9a803235a 100644 --- a/tools/testing/selftests/timers/valid-adjtimex.c +++ b/tools/testing/selftests/timers/valid-adjtimex.c @@ -40,7 +40,7 @@ #define ADJ_SETOFFSET 0x0100 #include <sys/syscall.h> -static int clock_adjtime(clockid_t id, struct timex *tx) +int clock_adjtime(clockid_t id, struct timex *tx) { return syscall(__NR_clock_adjtime, id, tx); } diff --git a/tools/testing/selftests/tpm2/settings b/tools/testing/selftests/tpm2/settings new file mode 100644 index 000000000000..a62d2fa1275c --- /dev/null +++ b/tools/testing/selftests/tpm2/settings @@ -0,0 +1 @@ +timeout=600 diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 44f25acfbeca..d9fa6a9ea584 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -93,8 +93,8 @@ TEST_PROGS := run_vmtests.sh TEST_FILES := test_vmalloc.sh TEST_FILES += test_hmm.sh +TEST_FILES += va_128TBswitch.sh -KSFT_KHDR_INSTALL := 1 include ../lib.mk $(OUTPUT)/madv_populate: vm_util.c diff --git a/tools/testing/selftests/vm/gup_test.c b/tools/testing/selftests/vm/gup_test.c index 6bb36ca71cb5..a309876d832f 100644 --- a/tools/testing/selftests/vm/gup_test.c +++ b/tools/testing/selftests/vm/gup_test.c @@ -209,7 +209,7 @@ int main(int argc, char **argv) if (write) gup.gup_flags |= FOLL_WRITE; - gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR); + gup_fd = open(GUP_TEST_FILE, O_RDWR); if (gup_fd == -1) { switch (errno) { case EACCES: @@ -224,7 +224,7 @@ int main(int argc, char **argv) printf("check if CONFIG_GUP_TEST is enabled in kernel config\n"); break; default: - perror("failed to open /sys/kernel/debug/gup_test"); + perror("failed to open " GUP_TEST_FILE); break; } exit(KSFT_SKIP); diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c index 203323967b50..529f53b40296 100644 --- a/tools/testing/selftests/vm/hmm-tests.c +++ b/tools/testing/selftests/vm/hmm-tests.c @@ -36,6 +36,7 @@ * in the usual include/uapi/... directory. */ #include "../../../../lib/test_hmm_uapi.h" +#include "../../../../mm/gup_test.h" struct hmm_buffer { void *ptr; @@ -46,12 +47,22 @@ struct hmm_buffer { uint64_t faults; }; +enum { + HMM_PRIVATE_DEVICE_ONE, + HMM_PRIVATE_DEVICE_TWO, + HMM_COHERENCE_DEVICE_ONE, + HMM_COHERENCE_DEVICE_TWO, +}; + #define TWOMEG (1 << 21) #define HMM_BUFFER_SIZE (1024 << 12) #define HMM_PATH_MAX 64 #define NTIMES 10 #define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1))) +/* Just the flags we need, copied from mm.h: */ +#define FOLL_WRITE 0x01 /* check pte is writable */ +#define FOLL_LONGTERM 0x10000 /* mapping lifetime is indefinite */ FIXTURE(hmm) { @@ -60,6 +71,21 @@ FIXTURE(hmm) unsigned int page_shift; }; +FIXTURE_VARIANT(hmm) +{ + int device_number; +}; + +FIXTURE_VARIANT_ADD(hmm, hmm_device_private) +{ + .device_number = HMM_PRIVATE_DEVICE_ONE, +}; + +FIXTURE_VARIANT_ADD(hmm, hmm_device_coherent) +{ + .device_number = HMM_COHERENCE_DEVICE_ONE, +}; + FIXTURE(hmm2) { int fd0; @@ -68,6 +94,24 @@ FIXTURE(hmm2) unsigned int page_shift; }; +FIXTURE_VARIANT(hmm2) +{ + int device_number0; + int device_number1; +}; + +FIXTURE_VARIANT_ADD(hmm2, hmm2_device_private) +{ + .device_number0 = HMM_PRIVATE_DEVICE_ONE, + .device_number1 = HMM_PRIVATE_DEVICE_TWO, +}; + +FIXTURE_VARIANT_ADD(hmm2, hmm2_device_coherent) +{ + .device_number0 = HMM_COHERENCE_DEVICE_ONE, + .device_number1 = HMM_COHERENCE_DEVICE_TWO, +}; + static int hmm_open(int unit) { char pathname[HMM_PATH_MAX]; @@ -81,12 +125,19 @@ static int hmm_open(int unit) return fd; } +static bool hmm_is_coherent_type(int dev_num) +{ + return (dev_num >= HMM_COHERENCE_DEVICE_ONE); +} + FIXTURE_SETUP(hmm) { self->page_size = sysconf(_SC_PAGE_SIZE); self->page_shift = ffs(self->page_size) - 1; - self->fd = hmm_open(0); + self->fd = hmm_open(variant->device_number); + if (self->fd < 0 && hmm_is_coherent_type(variant->device_number)) + SKIP(exit(0), "DEVICE_COHERENT not available"); ASSERT_GE(self->fd, 0); } @@ -95,9 +146,11 @@ FIXTURE_SETUP(hmm2) self->page_size = sysconf(_SC_PAGE_SIZE); self->page_shift = ffs(self->page_size) - 1; - self->fd0 = hmm_open(0); + self->fd0 = hmm_open(variant->device_number0); + if (self->fd0 < 0 && hmm_is_coherent_type(variant->device_number0)) + SKIP(exit(0), "DEVICE_COHERENT not available"); ASSERT_GE(self->fd0, 0); - self->fd1 = hmm_open(1); + self->fd1 = hmm_open(variant->device_number1); ASSERT_GE(self->fd1, 0); } @@ -211,6 +264,20 @@ static void hmm_nanosleep(unsigned int n) nanosleep(&t, NULL); } +static int hmm_migrate_sys_to_dev(int fd, + struct hmm_buffer *buffer, + unsigned long npages) +{ + return hmm_dmirror_cmd(fd, HMM_DMIRROR_MIGRATE_TO_DEV, buffer, npages); +} + +static int hmm_migrate_dev_to_sys(int fd, + struct hmm_buffer *buffer, + unsigned long npages) +{ + return hmm_dmirror_cmd(fd, HMM_DMIRROR_MIGRATE_TO_SYS, buffer, npages); +} + /* * Simple NULL test of device open/close. */ @@ -875,7 +942,7 @@ TEST_F(hmm, migrate) ptr[i] = i; /* Migrate memory to device. */ - ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages); + ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); ASSERT_EQ(ret, 0); ASSERT_EQ(buffer->cpages, npages); @@ -923,7 +990,7 @@ TEST_F(hmm, migrate_fault) ptr[i] = i; /* Migrate memory to device. */ - ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages); + ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); ASSERT_EQ(ret, 0); ASSERT_EQ(buffer->cpages, npages); @@ -936,7 +1003,7 @@ TEST_F(hmm, migrate_fault) ASSERT_EQ(ptr[i], i); /* Migrate memory to the device again. */ - ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages); + ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); ASSERT_EQ(ret, 0); ASSERT_EQ(buffer->cpages, npages); @@ -976,7 +1043,7 @@ TEST_F(hmm, migrate_shared) ASSERT_NE(buffer->ptr, MAP_FAILED); /* Migrate memory to device. */ - ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages); + ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); ASSERT_EQ(ret, -ENOENT); hmm_buffer_free(buffer); @@ -1015,7 +1082,7 @@ TEST_F(hmm2, migrate_mixed) p = buffer->ptr; /* Migrating a protected area should be an error. */ - ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, npages); + ret = hmm_migrate_sys_to_dev(self->fd1, buffer, npages); ASSERT_EQ(ret, -EINVAL); /* Punch a hole after the first page address. */ @@ -1023,7 +1090,7 @@ TEST_F(hmm2, migrate_mixed) ASSERT_EQ(ret, 0); /* We expect an error if the vma doesn't cover the range. */ - ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, 3); + ret = hmm_migrate_sys_to_dev(self->fd1, buffer, 3); ASSERT_EQ(ret, -EINVAL); /* Page 2 will be a read-only zero page. */ @@ -1055,13 +1122,13 @@ TEST_F(hmm2, migrate_mixed) /* Now try to migrate pages 2-5 to device 1. */ buffer->ptr = p + 2 * self->page_size; - ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, 4); + ret = hmm_migrate_sys_to_dev(self->fd1, buffer, 4); ASSERT_EQ(ret, 0); ASSERT_EQ(buffer->cpages, 4); /* Page 5 won't be migrated to device 0 because it's on device 1. */ buffer->ptr = p + 5 * self->page_size; - ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_MIGRATE, buffer, 1); + ret = hmm_migrate_sys_to_dev(self->fd0, buffer, 1); ASSERT_EQ(ret, -ENOENT); buffer->ptr = p; @@ -1070,8 +1137,12 @@ TEST_F(hmm2, migrate_mixed) } /* - * Migrate anonymous memory to device private memory and fault it back to system - * memory multiple times. + * Migrate anonymous memory to device memory and back to system memory + * multiple times. In case of private zone configuration, this is done + * through fault pages accessed by CPU. In case of coherent zone configuration, + * the pages from the device should be explicitly migrated back to system memory. + * The reason is Coherent device zone has coherent access by CPU, therefore + * it will not generate any page fault. */ TEST_F(hmm, migrate_multiple) { @@ -1107,8 +1178,7 @@ TEST_F(hmm, migrate_multiple) ptr[i] = i; /* Migrate memory to device. */ - ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, - npages); + ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); ASSERT_EQ(ret, 0); ASSERT_EQ(buffer->cpages, npages); @@ -1116,7 +1186,13 @@ TEST_F(hmm, migrate_multiple) for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) ASSERT_EQ(ptr[i], i); - /* Fault pages back to system memory and check them. */ + /* Migrate back to system memory and check them. */ + if (hmm_is_coherent_type(variant->device_number)) { + ret = hmm_migrate_dev_to_sys(self->fd, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + } + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) ASSERT_EQ(ptr[i], i); @@ -1354,13 +1430,13 @@ TEST_F(hmm2, snapshot) /* Page 5 will be migrated to device 0. */ buffer->ptr = p + 5 * self->page_size; - ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_MIGRATE, buffer, 1); + ret = hmm_migrate_sys_to_dev(self->fd0, buffer, 1); ASSERT_EQ(ret, 0); ASSERT_EQ(buffer->cpages, 1); /* Page 6 will be migrated to device 1. */ buffer->ptr = p + 6 * self->page_size; - ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, 1); + ret = hmm_migrate_sys_to_dev(self->fd1, buffer, 1); ASSERT_EQ(ret, 0); ASSERT_EQ(buffer->cpages, 1); @@ -1377,9 +1453,16 @@ TEST_F(hmm2, snapshot) ASSERT_EQ(m[2], HMM_DMIRROR_PROT_ZERO | HMM_DMIRROR_PROT_READ); ASSERT_EQ(m[3], HMM_DMIRROR_PROT_READ); ASSERT_EQ(m[4], HMM_DMIRROR_PROT_WRITE); - ASSERT_EQ(m[5], HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL | - HMM_DMIRROR_PROT_WRITE); - ASSERT_EQ(m[6], HMM_DMIRROR_PROT_NONE); + if (!hmm_is_coherent_type(variant->device_number0)) { + ASSERT_EQ(m[5], HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL | + HMM_DMIRROR_PROT_WRITE); + ASSERT_EQ(m[6], HMM_DMIRROR_PROT_NONE); + } else { + ASSERT_EQ(m[5], HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL | + HMM_DMIRROR_PROT_WRITE); + ASSERT_EQ(m[6], HMM_DMIRROR_PROT_DEV_COHERENT_REMOTE | + HMM_DMIRROR_PROT_WRITE); + } hmm_buffer_free(buffer); } @@ -1520,9 +1603,19 @@ TEST_F(hmm2, double_map) for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) ASSERT_EQ(ptr[i], i); - /* Punch a hole after the first page address. */ - ret = munmap(buffer->ptr + self->page_size, self->page_size); + /* Migrate pages to device 1 and try to read from device 0. */ + ret = hmm_migrate_sys_to_dev(self->fd1, buffer, npages); ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_READ, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + ASSERT_EQ(buffer->faults, 1); + + /* Check what device 0 read. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); hmm_buffer_free(buffer); } @@ -1685,4 +1778,190 @@ TEST_F(hmm, exclusive_cow) hmm_buffer_free(buffer); } +static int gup_test_exec(int gup_fd, unsigned long addr, int cmd, + int npages, int size, int flags) +{ + struct gup_test gup = { + .nr_pages_per_call = npages, + .addr = addr, + .gup_flags = FOLL_WRITE | flags, + .size = size, + }; + + if (ioctl(gup_fd, cmd, &gup)) { + perror("ioctl on error\n"); + return errno; + } + + return 0; +} + +/* + * Test get user device pages through gup_test. Setting PIN_LONGTERM flag. + * This should trigger a migration back to system memory for both, private + * and coherent type pages. + * This test makes use of gup_test module. Make sure GUP_TEST_CONFIG is added + * to your configuration before you run it. + */ +TEST_F(hmm, hmm_gup_test) +{ + struct hmm_buffer *buffer; + int gup_fd; + unsigned long npages; + unsigned long size; + unsigned long i; + int *ptr; + int ret; + unsigned char *m; + + gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR); + if (gup_fd == -1) + SKIP(return, "Skipping test, could not find gup_test driver"); + + npages = 4; + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Initialize buffer in system memory. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Migrate memory to device. */ + ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + /* Check what the device read. */ + for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + ASSERT_EQ(gup_test_exec(gup_fd, + (unsigned long)buffer->ptr, + GUP_BASIC_TEST, 1, self->page_size, 0), 0); + ASSERT_EQ(gup_test_exec(gup_fd, + (unsigned long)buffer->ptr + 1 * self->page_size, + GUP_FAST_BENCHMARK, 1, self->page_size, 0), 0); + ASSERT_EQ(gup_test_exec(gup_fd, + (unsigned long)buffer->ptr + 2 * self->page_size, + PIN_FAST_BENCHMARK, 1, self->page_size, FOLL_LONGTERM), 0); + ASSERT_EQ(gup_test_exec(gup_fd, + (unsigned long)buffer->ptr + 3 * self->page_size, + PIN_LONGTERM_BENCHMARK, 1, self->page_size, 0), 0); + + /* Take snapshot to CPU pagetables */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + m = buffer->mirror; + if (hmm_is_coherent_type(variant->device_number)) { + ASSERT_EQ(HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL | HMM_DMIRROR_PROT_WRITE, m[0]); + ASSERT_EQ(HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL | HMM_DMIRROR_PROT_WRITE, m[1]); + } else { + ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[0]); + ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[1]); + } + ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[2]); + ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[3]); + /* + * Check again the content on the pages. Make sure there's no + * corrupted data. + */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ASSERT_EQ(ptr[i], i); + + close(gup_fd); + hmm_buffer_free(buffer); +} + +/* + * Test copy-on-write in device pages. + * In case of writing to COW private page(s), a page fault will migrate pages + * back to system memory first. Then, these pages will be duplicated. In case + * of COW device coherent type, pages are duplicated directly from device + * memory. + */ +TEST_F(hmm, hmm_cow_in_device) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned long i; + int *ptr; + int ret; + unsigned char *m; + pid_t pid; + int status; + + npages = 4; + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(size); + ASSERT_NE(buffer->mirror, NULL); + + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + buffer->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Initialize buffer in system memory. */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Migrate memory to device. */ + + ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + pid = fork(); + if (pid == -1) + ASSERT_EQ(pid, 0); + if (!pid) { + /* Child process waitd for SIGTERM from the parent. */ + while (1) { + } + perror("Should not reach this\n"); + exit(0); + } + /* Parent process writes to COW pages(s) and gets a + * new copy in system. In case of device private pages, + * this write causes a migration to system mem first. + */ + for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i) + ptr[i] = i; + + /* Terminate child and wait */ + EXPECT_EQ(0, kill(pid, SIGTERM)); + EXPECT_EQ(pid, waitpid(pid, &status, 0)); + EXPECT_NE(0, WIFSIGNALED(status)); + EXPECT_EQ(SIGTERM, WTERMSIG(status)); + + /* Take snapshot to CPU pagetables */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + m = buffer->mirror; + for (i = 0; i < npages; i++) + ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[i]); + + hmm_buffer_free(buffer); +} TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/vm/hugepage-mremap.c b/tools/testing/selftests/vm/hugepage-mremap.c index 585978f181ed..e63a0214f639 100644 --- a/tools/testing/selftests/vm/hugepage-mremap.c +++ b/tools/testing/selftests/vm/hugepage-mremap.c @@ -107,7 +107,7 @@ static void register_region_with_uffd(char *addr, size_t len) int main(int argc, char *argv[]) { - size_t length; + size_t length = 0; if (argc != 2 && argc != 3) { printf("Usage: %s [length_in_MB] <hugetlb_file>\n", argv[0]); diff --git a/tools/testing/selftests/vm/hugetlb-madvise.c b/tools/testing/selftests/vm/hugetlb-madvise.c index 6c6af40f5747..3c9943131881 100644 --- a/tools/testing/selftests/vm/hugetlb-madvise.c +++ b/tools/testing/selftests/vm/hugetlb-madvise.c @@ -89,10 +89,11 @@ void write_fault_pages(void *addr, unsigned long nr_pages) void read_fault_pages(void *addr, unsigned long nr_pages) { - unsigned long i, tmp; + unsigned long dummy = 0; + unsigned long i; for (i = 0; i < nr_pages; i++) - tmp += *((unsigned long *)(addr + (i * huge_page_size))); + dummy += *((unsigned long *)(addr + (i * huge_page_size))); } int main(int argc, char **argv) diff --git a/tools/testing/selftests/vm/ksm_tests.c b/tools/testing/selftests/vm/ksm_tests.c index 2fcf24312da8..f5e4e0bbd081 100644 --- a/tools/testing/selftests/vm/ksm_tests.c +++ b/tools/testing/selftests/vm/ksm_tests.c @@ -54,6 +54,7 @@ static int ksm_write_sysfs(const char *file_path, unsigned long val) } if (fprintf(f, "%lu", val) < 0) { perror("fprintf"); + fclose(f); return 1; } fclose(f); @@ -72,6 +73,7 @@ static int ksm_read_sysfs(const char *file_path, unsigned long *val) } if (fscanf(f, "%lu", val) != 1) { perror("fscanf"); + fclose(f); return 1; } fclose(f); diff --git a/tools/testing/selftests/vm/mrelease_test.c b/tools/testing/selftests/vm/mrelease_test.c index 96671c2f7d48..6c62966ab5db 100644 --- a/tools/testing/selftests/vm/mrelease_test.c +++ b/tools/testing/selftests/vm/mrelease_test.c @@ -62,19 +62,22 @@ static int alloc_noexit(unsigned long nr_pages, int pipefd) /* The process_mrelease calls in this test are expected to fail */ static void run_negative_tests(int pidfd) { + int res; /* Test invalid flags. Expect to fail with EINVAL error code. */ if (!syscall(__NR_process_mrelease, pidfd, (unsigned int)-1) || errno != EINVAL) { + res = (errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL); perror("process_mrelease with wrong flags"); - exit(errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL); + exit(res); } /* * Test reaping while process is alive with no pending SIGKILL. * Expect to fail with EINVAL error code. */ if (!syscall(__NR_process_mrelease, pidfd, 0) || errno != EINVAL) { + res = (errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL); perror("process_mrelease on a live process"); - exit(errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL); + exit(res); } } @@ -100,8 +103,9 @@ int main(void) /* Test a wrong pidfd */ if (!syscall(__NR_process_mrelease, -1, 0) || errno != EBADF) { + res = (errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL); perror("process_mrelease with wrong pidfd"); - exit(errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL); + exit(res); } /* Start the test with 1MB child memory allocation */ @@ -156,8 +160,9 @@ retry: run_negative_tests(pidfd); if (kill(pid, SIGKILL)) { + res = (errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL); perror("kill"); - exit(errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL); + exit(res); } success = (syscall(__NR_process_mrelease, pidfd, 0) == 0); @@ -172,9 +177,10 @@ retry: if (errno == ESRCH) { retry = (size <= MAX_SIZE_MB); } else { + res = (errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL); perror("process_mrelease"); waitpid(pid, NULL, 0); - exit(errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL); + exit(res); } } diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh index 41fce8bea929..de86983b8a0f 100755 --- a/tools/testing/selftests/vm/run_vmtests.sh +++ b/tools/testing/selftests/vm/run_vmtests.sh @@ -151,7 +151,7 @@ if [ $VADDR64 -ne 0 ]; then run_test ./virtual_address_range # virtual address 128TB switch test - run_test ./va_128TBswitch + run_test ./va_128TBswitch.sh fi # VADDR64 # vmalloc stability smoke test @@ -179,4 +179,17 @@ run_test ./ksm_tests -N -m 1 # KSM test with 2 NUMA nodes and merge_across_nodes = 0 run_test ./ksm_tests -N -m 0 +# protection_keys tests +if [ -x ./protection_keys_32 ] +then + run_test ./protection_keys_32 +fi + +if [ -x ./protection_keys_64 ] +then + run_test ./protection_keys_64 +fi + +run_test ./soft-dirty + exit $exitcode diff --git a/tools/testing/selftests/vm/soft-dirty.c b/tools/testing/selftests/vm/soft-dirty.c index 08ab62a4a9d0..e3a43f5d4fa2 100644 --- a/tools/testing/selftests/vm/soft-dirty.c +++ b/tools/testing/selftests/vm/soft-dirty.c @@ -121,13 +121,76 @@ static void test_hugepage(int pagemap_fd, int pagesize) free(map); } +static void test_mprotect(int pagemap_fd, int pagesize, bool anon) +{ + const char *type[] = {"file", "anon"}; + const char *fname = "./soft-dirty-test-file"; + int test_fd; + char *map; + + if (anon) { + map = mmap(NULL, pagesize, PROT_READ|PROT_WRITE, + MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + if (!map) + ksft_exit_fail_msg("anon mmap failed\n"); + } else { + test_fd = open(fname, O_RDWR | O_CREAT); + if (test_fd < 0) { + ksft_test_result_skip("Test %s open() file failed\n", __func__); + return; + } + unlink(fname); + ftruncate(test_fd, pagesize); + map = mmap(NULL, pagesize, PROT_READ|PROT_WRITE, + MAP_SHARED, test_fd, 0); + if (!map) + ksft_exit_fail_msg("file mmap failed\n"); + } + + *map = 1; + ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 1, + "Test %s-%s dirty bit of new written page\n", + __func__, type[anon]); + clear_softdirty(); + ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 0, + "Test %s-%s soft-dirty clear after clear_refs\n", + __func__, type[anon]); + mprotect(map, pagesize, PROT_READ); + ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 0, + "Test %s-%s soft-dirty clear after marking RO\n", + __func__, type[anon]); + mprotect(map, pagesize, PROT_READ|PROT_WRITE); + ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 0, + "Test %s-%s soft-dirty clear after marking RW\n", + __func__, type[anon]); + *map = 2; + ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 1, + "Test %s-%s soft-dirty after rewritten\n", + __func__, type[anon]); + + munmap(map, pagesize); + + if (!anon) + close(test_fd); +} + +static void test_mprotect_anon(int pagemap_fd, int pagesize) +{ + test_mprotect(pagemap_fd, pagesize, true); +} + +static void test_mprotect_file(int pagemap_fd, int pagesize) +{ + test_mprotect(pagemap_fd, pagesize, false); +} + int main(int argc, char **argv) { int pagemap_fd; int pagesize; ksft_print_header(); - ksft_set_plan(5); + ksft_set_plan(15); pagemap_fd = open(PAGEMAP_FILE_PATH, O_RDONLY); if (pagemap_fd < 0) @@ -138,6 +201,8 @@ int main(int argc, char **argv) test_simple(pagemap_fd, pagesize); test_vma_reuse(pagemap_fd, pagesize); test_hugepage(pagemap_fd, pagesize); + test_mprotect_anon(pagemap_fd, pagesize); + test_mprotect_file(pagemap_fd, pagesize); close(pagemap_fd); diff --git a/tools/testing/selftests/vm/test_hmm.sh b/tools/testing/selftests/vm/test_hmm.sh index 0647b525a625..539c9371e592 100755 --- a/tools/testing/selftests/vm/test_hmm.sh +++ b/tools/testing/selftests/vm/test_hmm.sh @@ -40,11 +40,26 @@ check_test_requirements() load_driver() { - modprobe $DRIVER > /dev/null 2>&1 + if [ $# -eq 0 ]; then + modprobe $DRIVER > /dev/null 2>&1 + else + if [ $# -eq 2 ]; then + modprobe $DRIVER spm_addr_dev0=$1 spm_addr_dev1=$2 + > /dev/null 2>&1 + else + echo "Missing module parameters. Make sure pass"\ + "spm_addr_dev0 and spm_addr_dev1" + usage + fi + fi if [ $? == 0 ]; then major=$(awk "\$2==\"HMM_DMIRROR\" {print \$1}" /proc/devices) mknod /dev/hmm_dmirror0 c $major 0 mknod /dev/hmm_dmirror1 c $major 1 + if [ $# -eq 2 ]; then + mknod /dev/hmm_dmirror2 c $major 2 + mknod /dev/hmm_dmirror3 c $major 3 + fi fi } @@ -58,7 +73,7 @@ run_smoke() { echo "Running smoke test. Note, this test provides basic coverage." - load_driver + load_driver $1 $2 $(dirname "${BASH_SOURCE[0]}")/hmm-tests unload_driver } @@ -75,6 +90,9 @@ usage() echo "# Smoke testing" echo "./${TEST_NAME}.sh smoke" echo + echo "# Smoke testing with SPM enabled" + echo "./${TEST_NAME}.sh smoke <spm_addr_dev0> <spm_addr_dev1>" + echo exit 0 } @@ -84,7 +102,7 @@ function run_test() usage else if [ "$1" = "smoke" ]; then - run_smoke + run_smoke $2 $3 else usage fi diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 0bdfc1955229..7c3f1b0ab468 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -860,7 +860,7 @@ static int stress(struct uffd_stats *uffd_stats) /* * Be strict and immediately zap area_src, the whole area has * been transferred already by the background treads. The - * area_src could then be faulted in in a racy way by still + * area_src could then be faulted in a racy way by still * running uffdio_threads reading zeropages after we zapped * area_src (but they're guaranteed to get -EEXIST from * UFFDIO_COPY without writing zero pages into area_dst @@ -931,7 +931,7 @@ static int faulting_process(int signal_test) unsigned long split_nr_pages; unsigned long lastnr; struct sigaction act; - unsigned long signalled = 0; + volatile unsigned long signalled = 0; split_nr_pages = (nr_pages + 1) / 2; @@ -946,7 +946,7 @@ static int faulting_process(int signal_test) } for (nr = 0; nr < split_nr_pages; nr++) { - int steps = 1; + volatile int steps = 1; unsigned long offset = nr * page_size; if (signal_test) { diff --git a/tools/testing/selftests/vm/va_128TBswitch.c b/tools/testing/selftests/vm/va_128TBswitch.c index da6ec3b53ea8..1d2068989883 100644 --- a/tools/testing/selftests/vm/va_128TBswitch.c +++ b/tools/testing/selftests/vm/va_128TBswitch.c @@ -231,7 +231,7 @@ static struct testcase hugetlb_testcases[] = { static int run_test(struct testcase *test, int count) { void *p; - int i, ret = 0; + int i, ret = KSFT_PASS; for (i = 0; i < count; i++) { struct testcase *t = test + i; @@ -242,13 +242,13 @@ static int run_test(struct testcase *test, int count) if (p == MAP_FAILED) { printf("FAILED\n"); - ret = 1; + ret = KSFT_FAIL; continue; } if (t->low_addr_required && p >= (void *)(ADDR_SWITCH_HINT)) { printf("FAILED\n"); - ret = 1; + ret = KSFT_FAIL; } else { /* * Do a dereference of the address returned so that we catch @@ -280,7 +280,7 @@ int main(int argc, char **argv) int ret; if (!supported_arch()) - return 0; + return KSFT_SKIP; ret = run_test(testcases, ARRAY_SIZE(testcases)); if (argc == 2 && !strcmp(argv[1], "--run-hugetlb")) diff --git a/tools/testing/selftests/vm/va_128TBswitch.sh b/tools/testing/selftests/vm/va_128TBswitch.sh new file mode 100755 index 000000000000..41580751dc51 --- /dev/null +++ b/tools/testing/selftests/vm/va_128TBswitch.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (C) 2022 Adam Sindelar (Meta) <adam@wowsignal.io> +# +# This is a test for mmap behavior with 5-level paging. This script wraps the +# real test to check that the kernel is configured to support at least 5 +# pagetable levels. + +# 1 means the test failed +exitcode=1 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +fail() +{ + echo "$1" + exit $exitcode +} + +check_supported_x86_64() +{ + local config="/proc/config.gz" + [[ -f "${config}" ]] || config="/boot/config-$(uname -r)" + [[ -f "${config}" ]] || fail "Cannot find kernel config in /proc or /boot" + + # gzip -dcfq automatically handles both compressed and plaintext input. + # See man 1 gzip under '-f'. + local pg_table_levels=$(gzip -dcfq "${config}" | grep PGTABLE_LEVELS | cut -d'=' -f 2) + + if [[ "${pg_table_levels}" -lt 5 ]]; then + echo "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test" + exit $ksft_skip + fi +} + +check_test_requirements() +{ + # The test supports x86_64 and powerpc64. We currently have no useful + # eligibility check for powerpc64, and the test itself will reject other + # architectures. + case `uname -m` in + "x86_64") + check_supported_x86_64 + ;; + *) + return 0 + ;; + esac +} + +check_test_requirements +./va_128TBswitch diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile index 7d1b80988d8a..fda76282d34b 100644 --- a/tools/testing/selftests/wireguard/qemu/Makefile +++ b/tools/testing/selftests/wireguard/qemu/Makefile @@ -19,8 +19,6 @@ endif MIRROR := https://download.wireguard.com/qemu-test/distfiles/ KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug) -rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d)) -WIREGUARD_SOURCES := $(call rwildcard,$(KERNEL_PATH)/drivers/net/wireguard/,*) default: qemu @@ -109,20 +107,22 @@ CHOST := x86_64-linux-musl QEMU_ARCH := x86_64 KERNEL_ARCH := x86_64 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage +QEMU_VPORT_RESULT := virtio-serial-device ifeq ($(HOST_ARCH),$(ARCH)) -QEMU_MACHINE := -cpu host -machine q35,accel=kvm +QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off -no-acpi else -QEMU_MACHINE := -cpu max -machine q35 +QEMU_MACHINE := -cpu max -machine microvm -no-acpi endif else ifeq ($(ARCH),i686) CHOST := i686-linux-musl QEMU_ARCH := i386 KERNEL_ARCH := x86 KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage +QEMU_VPORT_RESULT := virtio-serial-device ifeq ($(subst x86_64,i686,$(HOST_ARCH)),$(ARCH)) -QEMU_MACHINE := -cpu host -machine q35,accel=kvm +QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off -no-acpi else -QEMU_MACHINE := -cpu max -machine q35 +QEMU_MACHINE := -cpu coreduo -machine microvm -no-acpi endif else ifeq ($(ARCH),mips64) CHOST := mips64-linux-musl @@ -208,10 +208,11 @@ QEMU_ARCH := m68k KERNEL_ARCH := m68k KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux KERNEL_CMDLINE := $(shell sed -n 's/CONFIG_CMDLINE=\(.*\)/\1/p' arch/m68k.config) +QEMU_VPORT_RESULT := virtio-serial-device ifeq ($(HOST_ARCH),$(ARCH)) -QEMU_MACHINE := -cpu host,accel=kvm -machine q800 -append $(KERNEL_CMDLINE) +QEMU_MACHINE := -cpu host,accel=kvm -machine virt -append $(KERNEL_CMDLINE) else -QEMU_MACHINE := -machine q800 -smp 1 -append $(KERNEL_CMDLINE) +QEMU_MACHINE := -machine virt -smp 1 -append $(KERNEL_CMDLINE) endif else ifeq ($(ARCH),riscv64) CHOST := riscv64-linux-musl @@ -247,8 +248,13 @@ QEMU_MACHINE := -cpu host,accel=kvm -machine s390-ccw-virtio -append $(KERNEL_CM else QEMU_MACHINE := -cpu max -machine s390-ccw-virtio -append $(KERNEL_CMDLINE) endif +else ifeq ($(ARCH),um) +CHOST := $(HOST_ARCH)-linux-musl +KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux +KERNEL_ARCH := um +KERNEL_CMDLINE := $(shell sed -n 's/CONFIG_CMDLINE=\(.*\)/\1/p' arch/um.config) else -$(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64, powerpc64le, powerpc, m68k, riscv64, riscv32, s390x) +$(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64, powerpc64le, powerpc, m68k, riscv64, riscv32, s390x, um) endif TOOLCHAIN_FILENAME := $(CHOST)-cross.tgz @@ -261,7 +267,9 @@ $(eval $(call file_download,$(TOOLCHAIN_FILENAME),$(TOOLCHAIN_DIR),,$(DISTFILES_ STRIP := $(CHOST)-strip CROSS_COMPILE_FLAG := --build=$(CBUILD) --host=$(CHOST) $(info Building for $(CHOST) using $(CBUILD)) +ifneq ($(ARCH),um) export CROSS_COMPILE := $(CHOST)- +endif export PATH := $(TOOLCHAIN_PATH)/bin:$(PATH) export CC := $(CHOST)-gcc CCACHE_PATH := $(shell which ccache 2>/dev/null) @@ -278,6 +286,7 @@ comma := , build: $(KERNEL_BZIMAGE) qemu: $(KERNEL_BZIMAGE) rm -f $(BUILD_PATH)/result +ifneq ($(ARCH),um) timeout --foreground 20m qemu-system-$(QEMU_ARCH) \ -nodefaults \ -nographic \ @@ -290,6 +299,13 @@ qemu: $(KERNEL_BZIMAGE) -no-reboot \ -monitor none \ -kernel $< +else + timeout --foreground 20m $< \ + $(KERNEL_CMDLINE) \ + mem=$$(grep -q CONFIG_DEBUG_KMEMLEAK=y $(KERNEL_BUILD_PATH)/.config && echo 1G || echo 256M) \ + noreboot \ + con1=fd:51 51>$(BUILD_PATH)/result </dev/null 2>&1 | cat +endif grep -Fq success $(BUILD_PATH)/result $(BUILD_PATH)/init-cpio-spec.txt: $(TOOLCHAIN_PATH)/.installed $(BUILD_PATH)/init @@ -322,8 +338,9 @@ $(KERNEL_BUILD_PATH)/.config: $(TOOLCHAIN_PATH)/.installed kernel.config arch/$( cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config $(KERNEL_BUILD_PATH)/minimal.config $(if $(findstring yes,$(DEBUG_KERNEL)),cp debug.config $(KERNEL_BUILD_PATH) && cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config debug.config,) -$(KERNEL_BZIMAGE): $(TOOLCHAIN_PATH)/.installed $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-legacy-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/wg $(BUILD_PATH)/init ../netns.sh $(WIREGUARD_SOURCES) +$(KERNEL_BZIMAGE): $(TOOLCHAIN_PATH)/.installed $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-legacy-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/wg $(BUILD_PATH)/init $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) +.PHONY: $(KERNEL_BZIMAGE) $(TOOLCHAIN_PATH)/$(CHOST)/include/linux/.installed: | $(KERNEL_BUILD_PATH)/.config $(TOOLCHAIN_PATH)/.installed rm -rf $(TOOLCHAIN_PATH)/$(CHOST)/include/linux diff --git a/tools/testing/selftests/wireguard/qemu/arch/arm.config b/tools/testing/selftests/wireguard/qemu/arch/arm.config index fc7959bef9c2..0579c66be83e 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/arm.config +++ b/tools/testing/selftests/wireguard/qemu/arch/arm.config @@ -7,6 +7,7 @@ CONFIG_SERIAL_AMBA_PL011_CONSOLE=y CONFIG_VIRTIO_MENU=y CONFIG_VIRTIO_MMIO=y CONFIG_VIRTIO_CONSOLE=y +CONFIG_COMPAT_32BIT_TIME=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1" CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/armeb.config b/tools/testing/selftests/wireguard/qemu/arch/armeb.config index f3066be81c19..2a3307bbe534 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/armeb.config +++ b/tools/testing/selftests/wireguard/qemu/arch/armeb.config @@ -7,6 +7,7 @@ CONFIG_SERIAL_AMBA_PL011_CONSOLE=y CONFIG_VIRTIO_MENU=y CONFIG_VIRTIO_MMIO=y CONFIG_VIRTIO_CONSOLE=y +CONFIG_COMPAT_32BIT_TIME=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1" CONFIG_CPU_BIG_ENDIAN=y diff --git a/tools/testing/selftests/wireguard/qemu/arch/i686.config b/tools/testing/selftests/wireguard/qemu/arch/i686.config index 6d90892a85a2..35b06502606f 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/i686.config +++ b/tools/testing/selftests/wireguard/qemu/arch/i686.config @@ -1,6 +1,10 @@ -CONFIG_ACPI=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_VIRTIO_MENU=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y +CONFIG_COMPAT_32BIT_TIME=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" +CONFIG_CMDLINE="console=ttyS0 wg.success=vport0p1 panic_on_warn=1 reboot=t" CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/m68k.config b/tools/testing/selftests/wireguard/qemu/arch/m68k.config index 82c925e49beb..39c48cba56b7 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/m68k.config +++ b/tools/testing/selftests/wireguard/qemu/arch/m68k.config @@ -1,9 +1,7 @@ CONFIG_MMU=y +CONFIG_VIRT=y CONFIG_M68KCLASSIC=y -CONFIG_M68040=y -CONFIG_MAC=y -CONFIG_SERIAL_PMACZILOG=y -CONFIG_SERIAL_PMACZILOG_TTYS=y -CONFIG_SERIAL_PMACZILOG_CONSOLE=y -CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" +CONFIG_VIRTIO_CONSOLE=y +CONFIG_COMPAT_32BIT_TIME=y +CONFIG_CMDLINE="console=ttyGF0 wg.success=vport0p1 panic_on_warn=1" CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips.config b/tools/testing/selftests/wireguard/qemu/arch/mips.config index d7ec63c17b30..2a84402353ab 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/mips.config +++ b/tools/testing/selftests/wireguard/qemu/arch/mips.config @@ -6,6 +6,7 @@ CONFIG_POWER_RESET=y CONFIG_POWER_RESET_SYSCON=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_COMPAT_32BIT_TIME=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/mipsel.config b/tools/testing/selftests/wireguard/qemu/arch/mipsel.config index 18a498293737..56146a101e7e 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/mipsel.config +++ b/tools/testing/selftests/wireguard/qemu/arch/mipsel.config @@ -7,6 +7,7 @@ CONFIG_POWER_RESET=y CONFIG_POWER_RESET_SYSCON=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_COMPAT_32BIT_TIME=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" CONFIG_FRAME_WARN=1024 diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc.config index 5e04882e8e35..174a9ffe2a36 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/powerpc.config +++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc.config @@ -4,6 +4,7 @@ CONFIG_PPC_85xx=y CONFIG_PHYS_64BIT=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_COMPAT_32BIT_TIME=y CONFIG_MATH_EMULATION=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" diff --git a/tools/testing/selftests/wireguard/qemu/arch/um.config b/tools/testing/selftests/wireguard/qemu/arch/um.config new file mode 100644 index 000000000000..c8b229e0810e --- /dev/null +++ b/tools/testing/selftests/wireguard/qemu/arch/um.config @@ -0,0 +1,3 @@ +CONFIG_64BIT=y +CONFIG_CMDLINE="wg.success=tty1 panic_on_warn=1" +CONFIG_FRAME_WARN=1280 diff --git a/tools/testing/selftests/wireguard/qemu/arch/x86_64.config b/tools/testing/selftests/wireguard/qemu/arch/x86_64.config index efa00693e08b..cf2d1376d121 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/x86_64.config +++ b/tools/testing/selftests/wireguard/qemu/arch/x86_64.config @@ -1,6 +1,9 @@ -CONFIG_ACPI=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_VIRTIO_MENU=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1" +CONFIG_CMDLINE="console=ttyS0 wg.success=vport0p1 panic_on_warn=1 reboot=t" CONFIG_FRAME_WARN=1280 diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config index 2b321b8a96cf..9d172210e2c6 100644 --- a/tools/testing/selftests/wireguard/qemu/debug.config +++ b/tools/testing/selftests/wireguard/qemu/debug.config @@ -18,15 +18,12 @@ CONFIG_DEBUG_VM=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_HAVE_DEBUG_STACKOVERFLOW=y CONFIG_DEBUG_STACKOVERFLOW=y -CONFIG_HAVE_ARCH_KMEMCHECK=y CONFIG_HAVE_ARCH_KASAN=y CONFIG_KASAN=y CONFIG_KASAN_INLINE=y CONFIG_UBSAN=y CONFIG_UBSAN_SANITIZE_ALL=y -CONFIG_UBSAN_NULL=y CONFIG_DEBUG_KMEMLEAK=y -CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE=8192 CONFIG_DEBUG_STACK_USAGE=y CONFIG_DEBUG_SHIRQ=y CONFIG_WQ_WATCHDOG=y @@ -35,7 +32,6 @@ CONFIG_SCHED_INFO=y CONFIG_SCHEDSTATS=y CONFIG_SCHED_STACK_END_CHECK=y CONFIG_DEBUG_TIMEKEEPING=y -CONFIG_TIMER_STATS=y CONFIG_DEBUG_PREEMPT=y CONFIG_DEBUG_RT_MUTEXES=y CONFIG_DEBUG_SPINLOCK=y @@ -49,7 +45,6 @@ CONFIG_DEBUG_BUGVERBOSE=y CONFIG_DEBUG_LIST=y CONFIG_DEBUG_PLIST=y CONFIG_PROVE_RCU=y -CONFIG_SPARSE_RCU_POINTER=y CONFIG_RCU_CPU_STALL_TIMEOUT=21 CONFIG_RCU_TRACE=y CONFIG_RCU_EQS_DEBUG=y diff --git a/tools/testing/selftests/wireguard/qemu/init.c b/tools/testing/selftests/wireguard/qemu/init.c index c9e128436546..3e49924dd77e 100644 --- a/tools/testing/selftests/wireguard/qemu/init.c +++ b/tools/testing/selftests/wireguard/qemu/init.c @@ -11,6 +11,7 @@ #include <stdlib.h> #include <stdbool.h> #include <fcntl.h> +#include <time.h> #include <sys/wait.h> #include <sys/mount.h> #include <sys/stat.h> @@ -70,6 +71,15 @@ static void seed_rng(void) close(fd); } +static void set_time(void) +{ + if (time(NULL)) + return; + pretty_message("[+] Setting fake time..."); + if (stime(&(time_t){1433512680}) < 0) + panic("settimeofday()"); +} + static void mount_filesystems(void) { pretty_message("[+] Mounting filesystems..."); @@ -259,6 +269,7 @@ int main(int argc, char *argv[]) print_banner(); mount_filesystems(); seed_rng(); + set_time(); kmod_selftests(); enable_logging(); clear_leaks(); diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config index bad88f4b0a03..ce2a04717300 100644 --- a/tools/testing/selftests/wireguard/qemu/kernel.config +++ b/tools/testing/selftests/wireguard/qemu/kernel.config @@ -19,7 +19,6 @@ CONFIG_NETFILTER_XTABLES=y CONFIG_NETFILTER_XT_NAT=y CONFIG_NETFILTER_XT_MATCH_LENGTH=y CONFIG_NETFILTER_XT_MARK=y -CONFIG_NF_NAT_IPV4=y CONFIG_IP_NF_IPTABLES=y CONFIG_IP_NF_FILTER=y CONFIG_IP_NF_MANGLE=y @@ -58,7 +57,6 @@ CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n CONFIG_HZ_PERIODIC=n CONFIG_HIGH_RES_TIMERS=y -CONFIG_ARCH_RANDOM=y CONFIG_FILE_LOCKING=y CONFIG_POSIX_TIMERS=y CONFIG_DEVTMPFS=y |