aboutsummaryrefslogtreecommitdiff
path: root/tools/testing/selftests
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing/selftests')
-rw-r--r--tools/testing/selftests/Makefile3
-rw-r--r--tools/testing/selftests/alsa/alsa-local.h10
-rw-r--r--tools/testing/selftests/alsa/conf.c100
-rw-r--r--tools/testing/selftests/alsa/pcm-test.c10
-rwxr-xr-xtools/testing/selftests/amd-pstate/gitsource.sh17
-rwxr-xr-xtools/testing/selftests/amd-pstate/run.sh21
-rwxr-xr-xtools/testing/selftests/amd-pstate/tbench.sh4
-rw-r--r--tools/testing/selftests/arm64/abi/hwcap.c54
-rw-r--r--tools/testing/selftests/arm64/fp/sve-test.S19
-rw-r--r--tools/testing/selftests/bpf/Makefile19
-rw-r--r--tools/testing/selftests/bpf/bpf_experimental.h19
-rw-r--r--tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c6
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.c116
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.h4
-rw-r--r--tools/testing/selftests/bpf/config1
-rw-r--r--tools/testing/selftests/bpf/config.aarch6417
-rw-r--r--tools/testing/selftests/bpf/config.s390x9
-rw-r--r--tools/testing/selftests/bpf/config.vm12
-rw-r--r--tools/testing/selftests/bpf/config.x86_6412
-rw-r--r--tools/testing/selftests/bpf/map_tests/map_percpu_stats.c59
-rw-r--r--tools/testing/selftests/bpf/netlink_helpers.c358
-rw-r--r--tools/testing/selftests/bpf/netlink_helpers.h46
-rw-r--r--tools/testing/selftests/bpf/prog_tests/align.c42
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bind_perm.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter.c105
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c204
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c48
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c158
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_iter.c33
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/iters.c151
-rw-r--r--tools/testing/selftests/bpf/prog_tests/linked_list.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c33
-rw-r--r--tools/testing/selftests/bpf/prog_tests/log_buf.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/reg_bounds.c2124
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt.c113
-rw-r--r--tools/testing/selftests/bpf/prog_tests/spin_lock.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c11
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_helpers.h4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_netkit.c687
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_opts.c137
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_redirect.c317
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c20
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_bpffs.c11
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verifier.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/vmlinux.c16
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c5
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_tasks.c (renamed from tools/testing/selftests/bpf/progs/bpf_iter_task.c)0
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_loop_bench.c13
-rw-r--r--tools/testing/selftests/bpf/progs/cb_refs.c1
-rw-r--r--tools/testing/selftests/bpf/progs/exceptions_assert.c40
-rw-r--r--tools/testing/selftests/bpf/progs/exceptions_fail.c2
-rw-r--r--tools/testing/selftests/bpf/progs/iters.c717
-rw-r--r--tools/testing/selftests/bpf/progs/iters_css.c72
-rw-r--r--tools/testing/selftests/bpf/progs/iters_css_task.c102
-rw-r--r--tools/testing/selftests/bpf/progs/iters_task.c41
-rw-r--r--tools/testing/selftests/bpf/progs/iters_task_failure.c105
-rw-r--r--tools/testing/selftests/bpf/progs/iters_task_vma.c1
-rw-r--r--tools/testing/selftests/bpf/progs/linked_list_fail.c4
-rw-r--r--tools/testing/selftests/bpf/progs/local_kptr_stash.c71
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf180.c22
-rw-r--r--tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c19
-rw-r--r--tools/testing/selftests/bpf/progs/strobemeta.h78
-rw-r--r--tools/testing/selftests/bpf/progs/test_bpf_ma.c180
-rw-r--r--tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c71
-rw-r--r--tools/testing/selftests/bpf/progs/test_ldsx_insn.c3
-rw-r--r--tools/testing/selftests/bpf/progs/test_task_under_cgroup.c28
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_link.c13
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bounds.c2
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bswap.c3
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_cfg.c62
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_gotol.c3
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c242
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_ldsx.c3
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_loops1.c9
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_movsx.c3
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_precision.c133
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_sdiv.c3
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_subprog_precision.c86
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_hw_metadata.c2
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c88
-rw-r--r--tools/testing/selftests/bpf/test_loader.c35
-rw-r--r--tools/testing/selftests/bpf/test_maps.c17
-rw-r--r--tools/testing/selftests/bpf/test_maps.h5
-rw-r--r--tools/testing/selftests/bpf/test_sock_addr.c2
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c2
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.c4
-rw-r--r--tools/testing/selftests/bpf/unpriv_helpers.c33
-rw-r--r--tools/testing/selftests/bpf/verifier/bpf_st_mem.c32
-rw-r--r--tools/testing/selftests/bpf/verifier/calls.c6
-rw-r--r--tools/testing/selftests/bpf/verifier/ld_imm64.c8
-rw-r--r--tools/testing/selftests/bpf/veristat.c89
-rwxr-xr-xtools/testing/selftests/bpf/vmtest.sh4
-rw-r--r--tools/testing/selftests/bpf/xdp_hw_metadata.c78
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.c19
-rw-r--r--tools/testing/selftests/cachestat/test_cachestat.c2
-rw-r--r--tools/testing/selftests/capabilities/Makefile2
-rw-r--r--tools/testing/selftests/capabilities/test_execve.c8
-rw-r--r--tools/testing/selftests/capabilities/validate_cap.c8
-rw-r--r--tools/testing/selftests/cgroup/.gitignore1
-rw-r--r--tools/testing/selftests/cgroup/Makefile2
-rwxr-xr-xtools/testing/selftests/cgroup/test_cpuset_prs.sh467
-rw-r--r--tools/testing/selftests/cgroup/test_hugetlb_memcg.c234
-rw-r--r--tools/testing/selftests/cgroup/test_zswap.c48
-rw-r--r--tools/testing/selftests/clone3/clone3.c272
-rw-r--r--tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c2
-rw-r--r--tools/testing/selftests/clone3/clone3_clear_sighand.c4
-rw-r--r--tools/testing/selftests/clone3/clone3_selftests.h13
-rw-r--r--tools/testing/selftests/clone3/clone3_set_tid.c2
-rw-r--r--tools/testing/selftests/core/close_range_test.c28
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/damon/debugfs_attrs.sh0
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh0
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/damon/debugfs_empty_targets.sh0
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/damon/debugfs_huge_count_read_write.sh0
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/damon/debugfs_rm_non_contexts.sh0
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/damon/debugfs_schemes.sh0
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/damon/debugfs_target_ids.sh0
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/damon/lru_sort.sh0
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/damon/reclaim.sh0
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/damon/sysfs.sh1
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh0
-rw-r--r--tools/testing/selftests/dmabuf-heaps/.gitignore1
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/pci_reset.sh58
-rw-r--r--tools/testing/selftests/dt/.gitignore1
-rw-r--r--tools/testing/selftests/dt/Makefile21
-rw-r--r--tools/testing/selftests/dt/compatible_ignore_list1
-rw-r--r--tools/testing/selftests/dt/ktap_helpers.sh70
-rwxr-xr-xtools/testing/selftests/dt/test_unprobed_devices.sh83
-rw-r--r--tools/testing/selftests/efivarfs/create-read.c2
-rw-r--r--tools/testing/selftests/exec/execveat.c87
-rw-r--r--tools/testing/selftests/firmware/fw_namespace.c4
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc10
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc7
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc4
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_non_uniq_symbol.tc13
-rw-r--r--tools/testing/selftests/hid/Makefile10
-rw-r--r--tools/testing/selftests/hid/progs/hid.c3
-rw-r--r--tools/testing/selftests/hid/progs/hid_bpf_helpers.h77
-rw-r--r--tools/testing/selftests/iommu/iommufd.c379
-rw-r--r--tools/testing/selftests/iommu/iommufd_fail_nth.c7
-rw-r--r--tools/testing/selftests/iommu/iommufd_utils.h233
-rw-r--r--tools/testing/selftests/kselftest.h46
-rw-r--r--tools/testing/selftests/kvm/Makefile25
-rw-r--r--tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c4
-rw-r--r--tools/testing/selftests/kvm/aarch64/debug-exceptions.c12
-rw-r--r--tools/testing/selftests/kvm/aarch64/page_fault_test.c11
-rw-r--r--tools/testing/selftests/kvm/aarch64/set_id_regs.c481
-rw-r--r--tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c670
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/processor.h1
-rw-r--r--tools/testing/selftests/kvm/include/test_util.h8
-rw-r--r--tools/testing/selftests/kvm/include/ucall_common.h2
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h23
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/processor.c6
-rw-r--r--tools/testing/selftests/kvm/lib/guest_sprintf.c7
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/apic.c2
-rw-r--r--tools/testing/selftests/kvm/memslot_perf_test.c9
-rw-r--r--tools/testing/selftests/kvm/riscv/get-reg-list.c233
-rw-r--r--tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c47
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c2
-rwxr-xr-xtools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh1
-rw-r--r--tools/testing/selftests/kvm/x86_64/state_test.c110
-rw-r--r--tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c4
-rw-r--r--tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c4
-rw-r--r--tools/testing/selftests/landlock/base_test.c2
-rw-r--r--tools/testing/selftests/landlock/common.h13
-rw-r--r--tools/testing/selftests/landlock/config4
-rw-r--r--tools/testing/selftests/landlock/fs_test.c69
-rw-r--r--tools/testing/selftests/landlock/net_test.c1738
-rw-r--r--tools/testing/selftests/lkdtm/config1
-rw-r--r--tools/testing/selftests/lkdtm/tests.txt3
-rw-r--r--tools/testing/selftests/mm/.gitignore3
-rw-r--r--tools/testing/selftests/mm/Makefile4
-rw-r--r--tools/testing/selftests/mm/config1
-rw-r--r--tools/testing/selftests/mm/gup_longterm.c3
-rw-r--r--tools/testing/selftests/mm/hugetlb-madvise.c19
-rw-r--r--tools/testing/selftests/mm/hugetlb_fault_after_madv.c73
-rw-r--r--tools/testing/selftests/mm/ksm_functional_tests.c66
-rw-r--r--tools/testing/selftests/mm/mdwe_test.c137
-rw-r--r--tools/testing/selftests/mm/mremap_dontunmap.c1
-rw-r--r--tools/testing/selftests/mm/mremap_test.c303
-rw-r--r--tools/testing/selftests/mm/pagemap_ioctl.c1661
-rw-r--r--tools/testing/selftests/mm/pkey-helpers.h2
-rwxr-xr-xtools/testing/selftests/mm/run_vmtests.sh12
-rw-r--r--tools/testing/selftests/mm/vm_util.c19
-rw-r--r--tools/testing/selftests/mm/vm_util.h1
-rw-r--r--tools/testing/selftests/net/Makefile3
-rw-r--r--tools/testing/selftests/net/cmsg_sender.c50
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh7
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_mdb.sh184
-rwxr-xr-xtools/testing/selftests/net/fq_band_pktlimit.sh57
-rw-r--r--tools/testing/selftests/net/io_uring_zerocopy_tx.c268
-rwxr-xr-xtools/testing/selftests/net/mptcp/diag.sh23
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh112
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh400
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_lib.sh91
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_sockopt.sh40
-rwxr-xr-xtools/testing/selftests/net/mptcp/simult_flows.sh19
-rwxr-xr-xtools/testing/selftests/net/mptcp/userspace_pm.sh143
-rwxr-xr-xtools/testing/selftests/net/net_helper.sh22
-rwxr-xr-xtools/testing/selftests/net/netns-name.sh87
-rwxr-xr-xtools/testing/selftests/net/openvswitch/openvswitch.sh21
-rw-r--r--tools/testing/selftests/net/openvswitch/ovs-dpctl.py48
-rwxr-xr-xtools/testing/selftests/net/pmtu.sh2
-rwxr-xr-xtools/testing/selftests/net/route_localnet.sh6
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.sh2
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_mdb.sh108
-rwxr-xr-xtools/testing/selftests/net/udpgro.sh13
-rwxr-xr-xtools/testing/selftests/net/udpgro_bench.sh5
-rwxr-xr-xtools/testing/selftests/net/udpgro_frglist.sh5
-rw-r--r--tools/testing/selftests/netfilter/Makefile2
-rwxr-xr-xtools/testing/selftests/netfilter/nft_audit.sh52
-rwxr-xr-xtools/testing/selftests/netfilter/xt_string.sh128
-rw-r--r--tools/testing/selftests/nolibc/.gitignore1
-rw-r--r--tools/testing/selftests/nolibc/Makefile57
-rw-r--r--tools/testing/selftests/nolibc/nolibc-test-linkage.c26
-rw-r--r--tools/testing/selftests/nolibc/nolibc-test-linkage.h9
-rw-r--r--tools/testing/selftests/nolibc/nolibc-test.c28
-rw-r--r--tools/testing/selftests/openat2/openat2_test.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_fdinfo_test.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_test.c12
-rw-r--r--tools/testing/selftests/proc/proc-empty-vm.c182
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/rcutorture/bin/functions.sh29
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm.sh17
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-console.sh9
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/torture.sh2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TRACE021
-rw-r--r--tools/testing/selftests/resctrl/Makefile2
-rw-r--r--tools/testing/selftests/resctrl/cache.c7
-rw-r--r--tools/testing/selftests/resctrl/cat_test.c21
-rw-r--r--tools/testing/selftests/resctrl/cmt_test.c37
-rw-r--r--tools/testing/selftests/resctrl/mba_test.c6
-rw-r--r--tools/testing/selftests/resctrl/mbm_test.c11
-rw-r--r--tools/testing/selftests/resctrl/resctrl.h24
-rw-r--r--tools/testing/selftests/resctrl/resctrl_tests.c180
-rw-r--r--tools/testing/selftests/resctrl/resctrl_val.c86
-rw-r--r--tools/testing/selftests/resctrl/resctrlfs.c162
-rw-r--r--tools/testing/selftests/riscv/hwprobe/Makefile9
-rw-r--r--tools/testing/selftests/riscv/hwprobe/cbo.c228
-rw-r--r--tools/testing/selftests/riscv/hwprobe/hwprobe.c64
-rw-r--r--tools/testing/selftests/riscv/hwprobe/hwprobe.h15
-rw-r--r--tools/testing/selftests/riscv/mm/Makefile6
-rw-r--r--tools/testing/selftests/riscv/mm/mmap_bottomup.c (renamed from tools/testing/selftests/riscv/mm/testcases/mmap_bottomup.c)2
-rw-r--r--tools/testing/selftests/riscv/mm/mmap_default.c (renamed from tools/testing/selftests/riscv/mm/testcases/mmap_default.c)2
-rw-r--r--tools/testing/selftests/riscv/mm/mmap_test.h (renamed from tools/testing/selftests/riscv/mm/testcases/mmap_test.h)0
-rwxr-xr-xtools/testing/selftests/riscv/mm/run_mmap.sh (renamed from tools/testing/selftests/riscv/mm/testcases/run_mmap.sh)0
-rw-r--r--tools/testing/selftests/rseq/param_test.c20
-rw-r--r--tools/testing/selftests/sigaltstack/sas.c2
-rwxr-xr-xtools/testing/selftests/static_keys/test_static_keys.sh8
-rw-r--r--tools/testing/selftests/tc-testing/Makefile29
-rw-r--r--tools/testing/selftests/tc-testing/README2
-rw-r--r--tools/testing/selftests/tc-testing/action-ebpfbin0 -> 856 bytes
-rw-r--r--tools/testing/selftests/tc-testing/config9
-rw-r--r--tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py67
-rw-r--r--tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py210
-rwxr-xr-xtools/testing/selftests/tc-testing/scripts/taprio_wait_for_admin.sh (renamed from tools/testing/selftests/tc-testing/taprio_wait_for_admin.sh)0
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json14
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/bpf.json10
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/u32.json57
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/hfsc.json32
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json8
-rwxr-xr-xtools/testing/selftests/tc-testing/tdc.py14
-rwxr-xr-xtools/testing/selftests/tc-testing/tdc.sh69
-rw-r--r--tools/testing/selftests/tdx/.gitignore1
-rw-r--r--tools/testing/selftests/thermal/intel/power_floor/Makefile12
-rw-r--r--tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c108
-rw-r--r--tools/testing/selftests/thermal/intel/workload_hint/Makefile12
-rw-r--r--tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c157
-rw-r--r--tools/testing/selftests/timers/nsleep-lat.c26
-rw-r--r--tools/testing/selftests/timers/posix_timers.c81
-rw-r--r--tools/testing/selftests/uevent/uevent_filtering.c8
-rw-r--r--tools/testing/selftests/user_events/.gitignore4
-rw-r--r--tools/testing/selftests/user_events/abi_test.c69
-rw-r--r--tools/testing/selftests/user_events/dyn_test.c54
-rw-r--r--tools/testing/selftests/x86/Makefile4
-rw-r--r--tools/testing/selftests/x86/lam.c6
-rw-r--r--tools/testing/selftests/x86/nx_stack.c212
280 files changed, 17274 insertions, 2914 deletions
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 1a21d6beebc6..3b2061d1c1a5 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -18,6 +18,7 @@ TARGETS += drivers/dma-buf
TARGETS += drivers/s390x/uvdevice
TARGETS += drivers/net/bonding
TARGETS += drivers/net/team
+TARGETS += dt
TARGETS += efivarfs
TARGETS += exec
TARGETS += fchmodat2
@@ -85,6 +86,8 @@ TARGETS += syscall_user_dispatch
TARGETS += sysctl
TARGETS += tc-testing
TARGETS += tdx
+TARGETS += thermal/intel/power_floor
+TARGETS += thermal/intel/workload_hint
TARGETS += timens
ifneq (1, $(quicktest))
TARGETS += timers
diff --git a/tools/testing/selftests/alsa/alsa-local.h b/tools/testing/selftests/alsa/alsa-local.h
index de030dc23bd1..29143ef52101 100644
--- a/tools/testing/selftests/alsa/alsa-local.h
+++ b/tools/testing/selftests/alsa/alsa-local.h
@@ -24,4 +24,14 @@ int conf_get_bool(snd_config_t *root, const char *key1, const char *key2, int de
void conf_get_string_array(snd_config_t *root, const char *key1, const char *key2,
const char **array, int array_size, const char *def);
+struct card_cfg_data {
+ int card;
+ snd_config_t *config;
+ const char *filename;
+ const char *config_id;
+ struct card_cfg_data *next;
+};
+
+extern struct card_cfg_data *conf_cards;
+
#endif /* __ALSA_LOCAL_H */
diff --git a/tools/testing/selftests/alsa/conf.c b/tools/testing/selftests/alsa/conf.c
index 2f1685a3eae1..00925eb8d9f4 100644
--- a/tools/testing/selftests/alsa/conf.c
+++ b/tools/testing/selftests/alsa/conf.c
@@ -19,14 +19,7 @@
#define SYSFS_ROOT "/sys"
-struct card_data {
- int card;
- snd_config_t *config;
- const char *filename;
- struct card_data *next;
-};
-
-static struct card_data *conf_cards;
+struct card_cfg_data *conf_cards;
static const char *alsa_config =
"ctl.hw {\n"
@@ -97,9 +90,9 @@ snd_config_t *get_alsalib_config(void)
return config;
}
-static struct card_data *conf_data_by_card(int card, bool msg)
+static struct card_cfg_data *conf_data_by_card(int card, bool msg)
{
- struct card_data *conf;
+ struct card_cfg_data *conf;
for (conf = conf_cards; conf; conf = conf->next) {
if (conf->card == card) {
@@ -229,55 +222,31 @@ static bool sysfs_match(const char *sysfs_root, snd_config_t *config)
return iter > 0;
}
-static bool test_filename1(int card, const char *filename, const char *sysfs_card_root)
+static void assign_card_config(int card, const char *sysfs_card_root)
{
- struct card_data *data, *data2;
- snd_config_t *config, *sysfs_config, *card_config, *sysfs_card_config, *node;
- snd_config_iterator_t i, next;
+ struct card_cfg_data *data;
+ snd_config_t *sysfs_card_config;
- config = conf_load_from_file(filename);
- if (snd_config_search(config, "sysfs", &sysfs_config) ||
- snd_config_get_type(sysfs_config) != SND_CONFIG_TYPE_COMPOUND)
- ksft_exit_fail_msg("Missing global sysfs block in filename %s\n", filename);
- if (snd_config_search(config, "card", &card_config) ||
- snd_config_get_type(card_config) != SND_CONFIG_TYPE_COMPOUND)
- ksft_exit_fail_msg("Missing global card block in filename %s\n", filename);
- if (!sysfs_match(SYSFS_ROOT, sysfs_config))
- return false;
- snd_config_for_each(i, next, card_config) {
- node = snd_config_iterator_entry(i);
- if (snd_config_search(node, "sysfs", &sysfs_card_config) ||
- snd_config_get_type(sysfs_card_config) != SND_CONFIG_TYPE_COMPOUND)
- ksft_exit_fail_msg("Missing card sysfs block in filename %s\n", filename);
+ for (data = conf_cards; data; data = data->next) {
+ snd_config_search(data->config, "sysfs", &sysfs_card_config);
if (!sysfs_match(sysfs_card_root, sysfs_card_config))
continue;
- data = malloc(sizeof(*data));
- if (!data)
- ksft_exit_fail_msg("Out of memory\n");
- data2 = conf_data_by_card(card, false);
- if (data2)
- ksft_exit_fail_msg("Duplicate card '%s' <-> '%s'\n", filename, data2->filename);
+
data->card = card;
- data->filename = filename;
- data->config = node;
- data->next = conf_cards;
- conf_cards = data;
- return true;
+ break;
}
- return false;
}
-static bool test_filename(const char *filename)
+static void assign_card_configs(void)
{
char fn[128];
int card;
for (card = 0; card < 32; card++) {
snprintf(fn, sizeof(fn), "%s/class/sound/card%d", SYSFS_ROOT, card);
- if (access(fn, R_OK) == 0 && test_filename1(card, filename, fn))
- return true;
+ if (access(fn, R_OK) == 0)
+ assign_card_config(card, fn);
}
- return false;
}
static int filename_filter(const struct dirent *dirent)
@@ -296,6 +265,41 @@ static int filename_filter(const struct dirent *dirent)
return 0;
}
+static bool match_config(const char *filename)
+{
+ struct card_cfg_data *data;
+ snd_config_t *config, *sysfs_config, *card_config, *sysfs_card_config, *node;
+ snd_config_iterator_t i, next;
+
+ config = conf_load_from_file(filename);
+ if (snd_config_search(config, "sysfs", &sysfs_config) ||
+ snd_config_get_type(sysfs_config) != SND_CONFIG_TYPE_COMPOUND)
+ ksft_exit_fail_msg("Missing global sysfs block in filename %s\n", filename);
+ if (snd_config_search(config, "card", &card_config) ||
+ snd_config_get_type(card_config) != SND_CONFIG_TYPE_COMPOUND)
+ ksft_exit_fail_msg("Missing global card block in filename %s\n", filename);
+ if (!sysfs_match(SYSFS_ROOT, sysfs_config))
+ return false;
+ snd_config_for_each(i, next, card_config) {
+ node = snd_config_iterator_entry(i);
+ if (snd_config_search(node, "sysfs", &sysfs_card_config) ||
+ snd_config_get_type(sysfs_card_config) != SND_CONFIG_TYPE_COMPOUND)
+ ksft_exit_fail_msg("Missing card sysfs block in filename %s\n", filename);
+
+ data = malloc(sizeof(*data));
+ if (!data)
+ ksft_exit_fail_msg("Out of memory\n");
+ data->filename = filename;
+ data->config = node;
+ data->card = -1;
+ if (snd_config_get_id(node, &data->config_id))
+ ksft_exit_fail_msg("snd_config_get_id failed for card\n");
+ data->next = conf_cards;
+ conf_cards = data;
+ }
+ return true;
+}
+
void conf_load(void)
{
const char *fn = "conf.d";
@@ -311,17 +315,19 @@ void conf_load(void)
if (filename == NULL)
ksft_exit_fail_msg("Out of memory\n");
sprintf(filename, "%s/%s", fn, namelist[j]->d_name);
- if (test_filename(filename))
+ if (match_config(filename))
filename = NULL;
free(filename);
free(namelist[j]);
}
free(namelist);
+
+ assign_card_configs();
}
void conf_free(void)
{
- struct card_data *conf;
+ struct card_cfg_data *conf;
while (conf_cards) {
conf = conf_cards;
@@ -332,7 +338,7 @@ void conf_free(void)
snd_config_t *conf_by_card(int card)
{
- struct card_data *conf;
+ struct card_cfg_data *conf;
conf = conf_data_by_card(card, true);
if (conf)
diff --git a/tools/testing/selftests/alsa/pcm-test.c b/tools/testing/selftests/alsa/pcm-test.c
index c0a39818c5a4..de664dedb541 100644
--- a/tools/testing/selftests/alsa/pcm-test.c
+++ b/tools/testing/selftests/alsa/pcm-test.c
@@ -566,6 +566,7 @@ void *card_thread(void *data)
int main(void)
{
struct card_data *card;
+ struct card_cfg_data *conf;
struct pcm_data *pcm;
snd_config_t *global_config, *cfg;
int num_pcm_tests = 0, num_tests, num_std_pcm_tests;
@@ -583,6 +584,10 @@ int main(void)
find_pcms();
+ for (conf = conf_cards; conf; conf = conf->next)
+ if (conf->card < 0)
+ num_missing++;
+
num_std_pcm_tests = conf_get_count(default_pcm_config, "test", NULL);
for (pcm = pcm_list; pcm != NULL; pcm = pcm->next) {
@@ -598,6 +603,11 @@ int main(void)
ksft_set_plan(num_missing + num_pcm_tests);
+ for (conf = conf_cards; conf; conf = conf->next)
+ if (conf->card < 0)
+ ksft_test_result_fail("test.missing.%s.%s\n",
+ conf->filename, conf->config_id);
+
for (pcm = pcm_missing; pcm != NULL; pcm = pcm->next) {
ksft_test_result(false, "test.missing.%d.%d.%d.%s\n",
pcm->card, pcm->device, pcm->subdevice,
diff --git a/tools/testing/selftests/amd-pstate/gitsource.sh b/tools/testing/selftests/amd-pstate/gitsource.sh
index 5f2171f0116d..4cde62f90468 100755
--- a/tools/testing/selftests/amd-pstate/gitsource.sh
+++ b/tools/testing/selftests/amd-pstate/gitsource.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
# Testing and monitor the cpu desire performance, frequency, load,
@@ -66,12 +66,15 @@ post_clear_gitsource()
install_gitsource()
{
- if [ ! -d $git_name ]; then
+ if [ ! -d $SCRIPTDIR/$git_name ]; then
+ pushd $(pwd) > /dev/null 2>&1
+ cd $SCRIPTDIR
printf "Download gitsource, please wait a moment ...\n\n"
wget -O $git_tar $gitsource_url > /dev/null 2>&1
printf "Tar gitsource ...\n\n"
tar -xzf $git_tar
+ popd > /dev/null 2>&1
fi
}
@@ -79,12 +82,14 @@ install_gitsource()
run_gitsource()
{
echo "Launching amd pstate tracer for $1 #$2 tracer_interval: $TRACER_INTERVAL"
- ./amd_pstate_trace.py -n tracer-gitsource-$1-$2 -i $TRACER_INTERVAL > /dev/null 2>&1 &
+ $TRACER -n tracer-gitsource-$1-$2 -i $TRACER_INTERVAL > /dev/null 2>&1 &
printf "Make and test gitsource for $1 #$2 make_cpus: $MAKE_CPUS\n"
- cd $git_name
- perf stat -a --per-socket -I 1000 -e power/energy-pkg/ /usr/bin/time -o ../$OUTFILE_GIT.time-gitsource-$1-$2.log make test -j$MAKE_CPUS > ../$OUTFILE_GIT-perf-$1-$2.log 2>&1
- cd ..
+ BACKUP_DIR=$(pwd)
+ pushd $BACKUP_DIR > /dev/null 2>&1
+ cd $SCRIPTDIR/$git_name
+ $PERF stat -a --per-socket -I 1000 -e power/energy-pkg/ /usr/bin/time -o $BACKUP_DIR/$OUTFILE_GIT.time-gitsource-$1-$2.log make test -j$MAKE_CPUS > $BACKUP_DIR/$OUTFILE_GIT-perf-$1-$2.log 2>&1
+ popd > /dev/null 2>&1
for job in `jobs -p`
do
diff --git a/tools/testing/selftests/amd-pstate/run.sh b/tools/testing/selftests/amd-pstate/run.sh
index de4d8e9c9565..b053eea8bb19 100755
--- a/tools/testing/selftests/amd-pstate/run.sh
+++ b/tools/testing/selftests/amd-pstate/run.sh
@@ -8,9 +8,12 @@ else
FILE_MAIN=DONE
fi
-source basic.sh
-source tbench.sh
-source gitsource.sh
+SCRIPTDIR=`dirname "$0"`
+TRACER=$SCRIPTDIR/../../../power/x86/amd_pstate_tracer/amd_pstate_trace.py
+
+source $SCRIPTDIR/basic.sh
+source $SCRIPTDIR/tbench.sh
+source $SCRIPTDIR/gitsource.sh
# amd-pstate-ut only run on x86/x86_64 AMD systems.
ARCH=$(uname -m 2>/dev/null | sed -e 's/i.86/x86/' -e 's/x86_64/x86/')
@@ -22,6 +25,7 @@ OUTFILE=selftest
OUTFILE_TBENCH="$OUTFILE.tbench"
OUTFILE_GIT="$OUTFILE.gitsource"
+PERF=/usr/bin/perf
SYSFS=
CPUROOT=
CPUFREQROOT=
@@ -151,6 +155,7 @@ help()
[-p <tbench process number>]
[-l <loop times for tbench>]
[-i <amd tracer interval>]
+ [-b <perf binary>]
[-m <comparative test: acpi-cpufreq>]
\n"
exit 2
@@ -158,7 +163,7 @@ help()
parse_arguments()
{
- while getopts ho:c:t:p:l:i:m: arg
+ while getopts ho:c:t:p:l:i:b:m: arg
do
case $arg in
h) # --help
@@ -189,6 +194,10 @@ parse_arguments()
TRACER_INTERVAL=$OPTARG
;;
+ b) # --perf-binary
+ PERF=`realpath $OPTARG`
+ ;;
+
m) # --comparative-test
COMPARATIVE_TEST=$OPTARG
;;
@@ -202,8 +211,8 @@ parse_arguments()
command_perf()
{
- if ! command -v perf > /dev/null; then
- echo $msg please install perf. >&2
+ if ! $PERF -v; then
+ echo $msg please install perf or provide perf binary path as argument >&2
exit $ksft_skip
fi
}
diff --git a/tools/testing/selftests/amd-pstate/tbench.sh b/tools/testing/selftests/amd-pstate/tbench.sh
index 49c9850341f6..2a98d9c9202e 100755
--- a/tools/testing/selftests/amd-pstate/tbench.sh
+++ b/tools/testing/selftests/amd-pstate/tbench.sh
@@ -64,11 +64,11 @@ post_clear_tbench()
run_tbench()
{
echo "Launching amd pstate tracer for $1 #$2 tracer_interval: $TRACER_INTERVAL"
- ./amd_pstate_trace.py -n tracer-tbench-$1-$2 -i $TRACER_INTERVAL > /dev/null 2>&1 &
+ $TRACER -n tracer-tbench-$1-$2 -i $TRACER_INTERVAL > /dev/null 2>&1 &
printf "Test tbench for $1 #$2 time_limit: $TIME_LIMIT procs_num: $PROCESS_NUM\n"
tbench_srv > /dev/null 2>&1 &
- perf stat -a --per-socket -I 1000 -e power/energy-pkg/ tbench -t $TIME_LIMIT $PROCESS_NUM > $OUTFILE_TBENCH-perf-$1-$2.log 2>&1
+ $PERF stat -a --per-socket -I 1000 -e power/energy-pkg/ tbench -t $TIME_LIMIT $PROCESS_NUM > $OUTFILE_TBENCH-perf-$1-$2.log 2>&1
pid=`pidof tbench_srv`
kill $pid
diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c
index e3d262831d91..1189e77c8152 100644
--- a/tools/testing/selftests/arm64/abi/hwcap.c
+++ b/tools/testing/selftests/arm64/abi/hwcap.c
@@ -81,6 +81,20 @@ static void lrcpc_sigill(void)
asm volatile(".inst 0xb8bfc3e0" : : : );
}
+static void lse128_sigill(void)
+{
+ u64 __attribute__ ((aligned (16))) mem[2] = { 10, 20 };
+ register u64 *memp asm ("x0") = mem;
+ register u64 val0 asm ("x1") = 5;
+ register u64 val1 asm ("x2") = 4;
+
+ /* SWPP X1, X2, [X0] */
+ asm volatile(".inst 0x19228001"
+ : "+r" (memp), "+r" (val0), "+r" (val1)
+ :
+ : "cc", "memory");
+}
+
static void mops_sigill(void)
{
char dst[1], src[1];
@@ -226,6 +240,12 @@ static void sveaes_sigill(void)
asm volatile(".inst 0x4522e400" : : : "z0");
}
+static void sveb16b16_sigill(void)
+{
+ /* BFADD ZA.H[W0, 0], {Z0.H-Z1.H} */
+ asm volatile(".inst 0xC1E41C00" : : : );
+}
+
static void svepmull_sigill(void)
{
/* PMULLB Z0.Q, Z0.D, Z0.D */
@@ -289,6 +309,19 @@ static void uscat_sigbus(void)
asm volatile(".inst 0xb820003f" : : : );
}
+static void lrcpc3_sigill(void)
+{
+ int data[2] = { 1, 2 };
+
+ register int *src asm ("x0") = data;
+ register int data0 asm ("w2") = 0;
+ register int data1 asm ("w3") = 0;
+
+ /* LDIAPP w2, w3, [x0] */
+ asm volatile(".inst 0x99431802"
+ : "=r" (data0), "=r" (data1) : "r" (src) :);
+}
+
static const struct hwcap_data {
const char *name;
unsigned long at_hwcap;
@@ -349,6 +382,13 @@ static const struct hwcap_data {
.sigill_fn = ilrcpc_sigill,
},
{
+ .name = "LRCPC3",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_LRCPC3,
+ .cpuinfo = "lrcpc3",
+ .sigill_fn = lrcpc3_sigill,
+ },
+ {
.name = "LSE",
.at_hwcap = AT_HWCAP,
.hwcap_bit = HWCAP_ATOMICS,
@@ -365,6 +405,13 @@ static const struct hwcap_data {
.sigbus_reliable = true,
},
{
+ .name = "LSE128",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_LSE128,
+ .cpuinfo = "lse128",
+ .sigill_fn = lse128_sigill,
+ },
+ {
.name = "MOPS",
.at_hwcap = AT_HWCAP2,
.hwcap_bit = HWCAP2_MOPS,
@@ -494,6 +541,13 @@ static const struct hwcap_data {
.sigill_fn = sveaes_sigill,
},
{
+ .name = "SVE2 B16B16",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SVE_B16B16,
+ .cpuinfo = "sveb16b16",
+ .sigill_fn = sveb16b16_sigill,
+ },
+ {
.name = "SVE2 PMULL",
.at_hwcap = AT_HWCAP2,
.hwcap_bit = HWCAP2_SVEPMULL,
diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S
index 4328895dfc87..547d077e3517 100644
--- a/tools/testing/selftests/arm64/fp/sve-test.S
+++ b/tools/testing/selftests/arm64/fp/sve-test.S
@@ -473,6 +473,13 @@ function _start
// mov x8, #__NR_sched_yield // Encourage preemption
// svc #0
+#ifdef SSVE
+ mrs x0, S3_3_C4_C2_2 // SVCR should have ZA=0,SM=1
+ and x1, x0, #3
+ cmp x1, #1
+ b.ne svcr_barf
+#endif
+
mov x21, #0
0: mov x0, x21
bl check_zreg
@@ -553,3 +560,15 @@ function vl_barf
mov x1, #1
svc #0
endfunction
+
+function svcr_barf
+ mov x10, x0
+
+ puts "Bad SVCR: "
+ mov x0, x10
+ bl putdecn
+
+ mov x8, #__NR_exit
+ mov x1, #1
+ svc #0
+endfunction
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 4225f975fce3..9c27b67bc7b1 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -585,11 +585,20 @@ endef
# Define test_progs test runner.
TRUNNER_TESTS_DIR := prog_tests
TRUNNER_BPF_PROGS_DIR := progs
-TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \
- network_helpers.c testing_helpers.c \
- btf_helpers.c flow_dissector_load.h \
- cap_helpers.c test_loader.c xsk.c disasm.c \
- json_writer.c unpriv_helpers.c \
+TRUNNER_EXTRA_SOURCES := test_progs.c \
+ cgroup_helpers.c \
+ trace_helpers.c \
+ network_helpers.c \
+ testing_helpers.c \
+ btf_helpers.c \
+ cap_helpers.c \
+ unpriv_helpers.c \
+ netlink_helpers.c \
+ test_loader.c \
+ xsk.c \
+ disasm.c \
+ json_writer.c \
+ flow_dissector_load.h \
ip_check_defrag_frags.h
TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \
$(OUTPUT)/liburandom_read.so \
diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
index 2c8cb3f61529..1386baf9ae4a 100644
--- a/tools/testing/selftests/bpf/bpf_experimental.h
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
@@ -458,4 +458,23 @@ extern void bpf_throw(u64 cookie) __ksym;
__bpf_assert_op(LHS, <=, END, value, false); \
})
+struct bpf_iter_css_task;
+struct cgroup_subsys_state;
+extern int bpf_iter_css_task_new(struct bpf_iter_css_task *it,
+ struct cgroup_subsys_state *css, unsigned int flags) __weak __ksym;
+extern struct task_struct *bpf_iter_css_task_next(struct bpf_iter_css_task *it) __weak __ksym;
+extern void bpf_iter_css_task_destroy(struct bpf_iter_css_task *it) __weak __ksym;
+
+struct bpf_iter_task;
+extern int bpf_iter_task_new(struct bpf_iter_task *it,
+ struct task_struct *task, unsigned int flags) __weak __ksym;
+extern struct task_struct *bpf_iter_task_next(struct bpf_iter_task *it) __weak __ksym;
+extern void bpf_iter_task_destroy(struct bpf_iter_task *it) __weak __ksym;
+
+struct bpf_iter_css;
+extern int bpf_iter_css_new(struct bpf_iter_css *it,
+ struct cgroup_subsys_state *start, unsigned int flags) __weak __ksym;
+extern struct cgroup_subsys_state *bpf_iter_css_next(struct bpf_iter_css *it) __weak __ksym;
+extern void bpf_iter_css_destroy(struct bpf_iter_css *it) __weak __ksym;
+
#endif
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
index a5e246f7b202..91907b321f91 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -39,9 +39,7 @@ struct bpf_testmod_struct_arg_4 {
int b;
};
-__diag_push();
-__diag_ignore_all("-Wmissing-prototypes",
- "Global functions as their definitions will be in bpf_testmod.ko BTF");
+__bpf_hook_start();
noinline int
bpf_testmod_test_struct_arg_1(struct bpf_testmod_struct_arg_2 a, int b, int c) {
@@ -335,7 +333,7 @@ noinline int bpf_fentry_shadow_test(int a)
}
EXPORT_SYMBOL_GPL(bpf_fentry_shadow_test);
-__diag_pop();
+__bpf_hook_end();
static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = {
.attr = { .name = "bpf_testmod", .mode = 0666, },
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index 5b1da2a32ea7..5aa133bf3688 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -45,9 +45,12 @@
#define format_parent_cgroup_path(buf, path) \
format_cgroup_path_pid(buf, path, getppid())
-#define format_classid_path(buf) \
- snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH, \
- CGROUP_WORK_DIR)
+#define format_classid_path_pid(buf, pid) \
+ snprintf(buf, sizeof(buf), "%s%s%d", NETCLS_MOUNT_PATH, \
+ CGROUP_WORK_DIR, pid)
+
+#define format_classid_path(buf) \
+ format_classid_path_pid(buf, getpid())
static __thread bool cgroup_workdir_mounted;
@@ -419,26 +422,23 @@ int create_and_get_cgroup(const char *relative_path)
}
/**
- * get_cgroup_id() - Get cgroup id for a particular cgroup path
- * @relative_path: The cgroup path, relative to the workdir, to join
+ * get_cgroup_id_from_path - Get cgroup id for a particular cgroup path
+ * @cgroup_workdir: The absolute cgroup path
*
* On success, it returns the cgroup id. On failure it returns 0,
* which is an invalid cgroup id.
* If there is a failure, it prints the error to stderr.
*/
-unsigned long long get_cgroup_id(const char *relative_path)
+unsigned long long get_cgroup_id_from_path(const char *cgroup_workdir)
{
int dirfd, err, flags, mount_id, fhsize;
union {
unsigned long long cgid;
unsigned char raw_bytes[8];
} id;
- char cgroup_workdir[PATH_MAX + 1];
struct file_handle *fhp, *fhp2;
unsigned long long ret = 0;
- format_cgroup_path(cgroup_workdir, relative_path);
-
dirfd = AT_FDCWD;
flags = 0;
fhsize = sizeof(*fhp);
@@ -474,6 +474,14 @@ free_mem:
return ret;
}
+unsigned long long get_cgroup_id(const char *relative_path)
+{
+ char cgroup_workdir[PATH_MAX + 1];
+
+ format_cgroup_path(cgroup_workdir, relative_path);
+ return get_cgroup_id_from_path(cgroup_workdir);
+}
+
int cgroup_setup_and_join(const char *path) {
int cg_fd;
@@ -523,10 +531,20 @@ int setup_classid_environment(void)
return 1;
}
- if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls") &&
- errno != EBUSY) {
- log_err("mount cgroup net_cls");
- return 1;
+ if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls")) {
+ if (errno != EBUSY) {
+ log_err("mount cgroup net_cls");
+ return 1;
+ }
+
+ if (rmdir(NETCLS_MOUNT_PATH)) {
+ log_err("rmdir cgroup net_cls");
+ return 1;
+ }
+ if (umount(CGROUP_MOUNT_DFLT)) {
+ log_err("umount cgroup base");
+ return 1;
+ }
}
cleanup_classid_environment();
@@ -541,15 +559,16 @@ int setup_classid_environment(void)
/**
* set_classid() - Set a cgroupv1 net_cls classid
- * @id: the numeric classid
*
- * Writes the passed classid into the cgroup work dir's net_cls.classid
+ * Writes the classid into the cgroup work dir's net_cls.classid
* file in order to later on trigger socket tagging.
*
+ * We leverage the current pid as the classid, ensuring unique identification.
+ *
* On success, it returns 0, otherwise on failure it returns 1. If there
* is a failure, it prints the error to stderr.
*/
-int set_classid(unsigned int id)
+int set_classid(void)
{
char cgroup_workdir[PATH_MAX - 42];
char cgroup_classid_path[PATH_MAX + 1];
@@ -565,7 +584,7 @@ int set_classid(unsigned int id)
return 1;
}
- if (dprintf(fd, "%u\n", id) < 0) {
+ if (dprintf(fd, "%u\n", getpid()) < 0) {
log_err("Setting cgroup classid");
rc = 1;
}
@@ -607,3 +626,66 @@ void cleanup_classid_environment(void)
join_cgroup_from_top(NETCLS_MOUNT_PATH);
nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
}
+
+/**
+ * get_classid_cgroup_id - Get the cgroup id of a net_cls cgroup
+ */
+unsigned long long get_classid_cgroup_id(void)
+{
+ char cgroup_workdir[PATH_MAX + 1];
+
+ format_classid_path(cgroup_workdir);
+ return get_cgroup_id_from_path(cgroup_workdir);
+}
+
+/**
+ * get_cgroup1_hierarchy_id - Retrieves the ID of a cgroup1 hierarchy from the cgroup1 subsys name.
+ * @subsys_name: The cgroup1 subsys name, which can be retrieved from /proc/self/cgroup. It can be
+ * a named cgroup like "name=systemd", a controller name like "net_cls", or multi-contollers like
+ * "net_cls,net_prio".
+ */
+int get_cgroup1_hierarchy_id(const char *subsys_name)
+{
+ char *c, *c2, *c3, *c4;
+ bool found = false;
+ char line[1024];
+ FILE *file;
+ int i, id;
+
+ if (!subsys_name)
+ return -1;
+
+ file = fopen("/proc/self/cgroup", "r");
+ if (!file) {
+ log_err("fopen /proc/self/cgroup");
+ return -1;
+ }
+
+ while (fgets(line, 1024, file)) {
+ i = 0;
+ for (c = strtok_r(line, ":", &c2); c && i < 2; c = strtok_r(NULL, ":", &c2)) {
+ if (i == 0) {
+ id = strtol(c, NULL, 10);
+ } else if (i == 1) {
+ if (!strcmp(c, subsys_name)) {
+ found = true;
+ break;
+ }
+
+ /* Multiple subsystems may share one single mount point */
+ for (c3 = strtok_r(c, ",", &c4); c3;
+ c3 = strtok_r(NULL, ",", &c4)) {
+ if (!strcmp(c, subsys_name)) {
+ found = true;
+ break;
+ }
+ }
+ }
+ i++;
+ }
+ if (found)
+ break;
+ }
+ fclose(file);
+ return found ? id : -1;
+}
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h
index 5c2cb9c8b546..ee053641c026 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.h
+++ b/tools/testing/selftests/bpf/cgroup_helpers.h
@@ -20,6 +20,7 @@ int get_root_cgroup(void);
int create_and_get_cgroup(const char *relative_path);
void remove_cgroup(const char *relative_path);
unsigned long long get_cgroup_id(const char *relative_path);
+int get_cgroup1_hierarchy_id(const char *subsys_name);
int join_cgroup(const char *relative_path);
int join_root_cgroup(void);
@@ -29,8 +30,9 @@ int setup_cgroup_environment(void);
void cleanup_cgroup_environment(void);
/* cgroupv1 related */
-int set_classid(unsigned int id);
+int set_classid(void);
int join_classid(void);
+unsigned long long get_classid_cgroup_id(void);
int setup_classid_environment(void);
void cleanup_classid_environment(void);
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 02dd4409200e..3ec5927ec3e5 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -71,6 +71,7 @@ CONFIG_NETFILTER_SYNPROXY=y
CONFIG_NETFILTER_XT_CONNMARK=y
CONFIG_NETFILTER_XT_MATCH_STATE=y
CONFIG_NETFILTER_XT_TARGET_CT=y
+CONFIG_NETKIT=y
CONFIG_NF_CONNTRACK=y
CONFIG_NF_CONNTRACK_MARK=y
CONFIG_NF_DEFRAG_IPV4=y
diff --git a/tools/testing/selftests/bpf/config.aarch64 b/tools/testing/selftests/bpf/config.aarch64
index 253821494884..29c8635c5722 100644
--- a/tools/testing/selftests/bpf/config.aarch64
+++ b/tools/testing/selftests/bpf/config.aarch64
@@ -1,4 +1,3 @@
-CONFIG_9P_FS=y
CONFIG_ARCH_VEXPRESS=y
CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y
CONFIG_ARM_SMMU_V3=y
@@ -37,6 +36,7 @@ CONFIG_CRYPTO_USER_API_SKCIPHER=y
CONFIG_DEBUG_ATOMIC_SLEEP=y
CONFIG_DEBUG_INFO_BTF=y
CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_DEBUG_INFO_REDUCED=n
CONFIG_DEBUG_LIST=y
CONFIG_DEBUG_LOCKDEP=y
CONFIG_DEBUG_NOTIFIERS=y
@@ -46,7 +46,6 @@ CONFIG_DEBUG_SG=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_DEVTMPFS=y
-CONFIG_DRM_VIRTIO_GPU=y
CONFIG_DRM=y
CONFIG_DUMMY=y
CONFIG_EXPERT=y
@@ -67,7 +66,6 @@ CONFIG_HAVE_KRETPROBES=y
CONFIG_HEADERS_INSTALL=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_HUGETLBFS=y
-CONFIG_HW_RANDOM_VIRTIO=y
CONFIG_HW_RANDOM=y
CONFIG_HZ_100=y
CONFIG_IDLE_PAGE_TRACKING=y
@@ -99,8 +97,6 @@ CONFIG_MEMCG=y
CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_NAMESPACES=y
-CONFIG_NET_9P_VIRTIO=y
-CONFIG_NET_9P=y
CONFIG_NET_ACT_BPF=y
CONFIG_NET_ACT_GACT=y
CONFIG_NETDEVICES=y
@@ -140,7 +136,6 @@ CONFIG_SCHED_TRACER=y
CONFIG_SCSI_CONSTANTS=y
CONFIG_SCSI_LOGGING=y
CONFIG_SCSI_SCAN_ASYNC=y
-CONFIG_SCSI_VIRTIO=y
CONFIG_SCSI=y
CONFIG_SECURITY_NETWORK=y
CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
@@ -167,16 +162,6 @@ CONFIG_UPROBES=y
CONFIG_USELIB=y
CONFIG_USER_NS=y
CONFIG_VETH=y
-CONFIG_VIRTIO_BALLOON=y
-CONFIG_VIRTIO_BLK=y
-CONFIG_VIRTIO_CONSOLE=y
-CONFIG_VIRTIO_FS=y
-CONFIG_VIRTIO_INPUT=y
-CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y
-CONFIG_VIRTIO_MMIO=y
-CONFIG_VIRTIO_NET=y
-CONFIG_VIRTIO_PCI=y
-CONFIG_VIRTIO_VSOCKETS_COMMON=y
CONFIG_VLAN_8021Q=y
CONFIG_VSOCKETS=y
CONFIG_VSOCKETS_LOOPBACK=y
diff --git a/tools/testing/selftests/bpf/config.s390x b/tools/testing/selftests/bpf/config.s390x
index 2ba92167be35..e93330382849 100644
--- a/tools/testing/selftests/bpf/config.s390x
+++ b/tools/testing/selftests/bpf/config.s390x
@@ -1,4 +1,3 @@
-CONFIG_9P_FS=y
CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y
CONFIG_AUDIT=y
CONFIG_BLK_CGROUP=y
@@ -84,8 +83,6 @@ CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_NAMESPACES=y
CONFIG_NET=y
-CONFIG_NET_9P=y
-CONFIG_NET_9P_VIRTIO=y
CONFIG_NET_ACT_BPF=y
CONFIG_NET_ACT_GACT=y
CONFIG_NET_KEY=y
@@ -114,7 +111,6 @@ CONFIG_SAMPLE_SECCOMP=y
CONFIG_SAMPLES=y
CONFIG_SCHED_TRACER=y
CONFIG_SCSI=y
-CONFIG_SCSI_VIRTIO=y
CONFIG_SECURITY_NETWORK=y
CONFIG_STACK_TRACER=y
CONFIG_STATIC_KEYS_SELFTEST=y
@@ -136,11 +132,6 @@ CONFIG_UPROBES=y
CONFIG_USELIB=y
CONFIG_USER_NS=y
CONFIG_VETH=y
-CONFIG_VIRTIO_BALLOON=y
-CONFIG_VIRTIO_BLK=y
-CONFIG_VIRTIO_NET=y
-CONFIG_VIRTIO_PCI=y
-CONFIG_VIRTIO_VSOCKETS_COMMON=y
CONFIG_VLAN_8021Q=y
CONFIG_VSOCKETS=y
CONFIG_VSOCKETS_LOOPBACK=y
diff --git a/tools/testing/selftests/bpf/config.vm b/tools/testing/selftests/bpf/config.vm
new file mode 100644
index 000000000000..a9746ca78777
--- /dev/null
+++ b/tools/testing/selftests/bpf/config.vm
@@ -0,0 +1,12 @@
+CONFIG_9P_FS=y
+CONFIG_9P_FS_POSIX_ACL=y
+CONFIG_9P_FS_SECURITY=y
+CONFIG_CRYPTO_DEV_VIRTIO=y
+CONFIG_NET_9P=y
+CONFIG_NET_9P_VIRTIO=y
+CONFIG_VIRTIO_BALLOON=y
+CONFIG_VIRTIO_BLK=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_VIRTIO_NET=y
+CONFIG_VIRTIO_PCI=y
+CONFIG_VIRTIO_VSOCKETS_COMMON=y
diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64
index 2e70a6048278..f7bfb2b09c82 100644
--- a/tools/testing/selftests/bpf/config.x86_64
+++ b/tools/testing/selftests/bpf/config.x86_64
@@ -1,6 +1,3 @@
-CONFIG_9P_FS=y
-CONFIG_9P_FS_POSIX_ACL=y
-CONFIG_9P_FS_SECURITY=y
CONFIG_AGP=y
CONFIG_AGP_AMD64=y
CONFIG_AGP_INTEL=y
@@ -45,7 +42,6 @@ CONFIG_CPU_IDLE_GOV_LADDER=y
CONFIG_CPUSETS=y
CONFIG_CRC_T10DIF=y
CONFIG_CRYPTO_BLAKE2B=y
-CONFIG_CRYPTO_DEV_VIRTIO=y
CONFIG_CRYPTO_SEQIV=y
CONFIG_CRYPTO_XXHASH=y
CONFIG_DCB=y
@@ -145,8 +141,6 @@ CONFIG_MEMORY_FAILURE=y
CONFIG_MINIX_SUBPARTITION=y
CONFIG_NAMESPACES=y
CONFIG_NET=y
-CONFIG_NET_9P=y
-CONFIG_NET_9P_VIRTIO=y
CONFIG_NET_ACT_BPF=y
CONFIG_NET_CLS_CGROUP=y
CONFIG_NET_EMATCH=y
@@ -228,12 +222,6 @@ CONFIG_USER_NS=y
CONFIG_VALIDATE_FS_PARSER=y
CONFIG_VETH=y
CONFIG_VIRT_DRIVERS=y
-CONFIG_VIRTIO_BALLOON=y
-CONFIG_VIRTIO_BLK=y
-CONFIG_VIRTIO_CONSOLE=y
-CONFIG_VIRTIO_NET=y
-CONFIG_VIRTIO_PCI=y
-CONFIG_VIRTIO_VSOCKETS_COMMON=y
CONFIG_VLAN_8021Q=y
CONFIG_VSOCKETS=y
CONFIG_VSOCKETS_LOOPBACK=y
diff --git a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c
index 1a9eeefda9a8..2ea36408816b 100644
--- a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c
+++ b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c
@@ -131,10 +131,17 @@ static bool is_lru(__u32 map_type)
map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
}
+static bool is_percpu(__u32 map_type)
+{
+ return map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+ map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
+}
+
struct upsert_opts {
__u32 map_type;
int map_fd;
__u32 n;
+ bool retry_for_nomem;
};
static int create_small_hash(void)
@@ -148,19 +155,38 @@ static int create_small_hash(void)
return map_fd;
}
+static bool retry_for_nomem_fn(int err)
+{
+ return err == ENOMEM;
+}
+
static void *patch_map_thread(void *arg)
{
+ /* 8KB is enough for 1024 CPUs. And it is shared between N_THREADS. */
+ static __u8 blob[8 << 10];
struct upsert_opts *opts = arg;
+ void *val_ptr;
int val;
int ret;
int i;
for (i = 0; i < opts->n; i++) {
- if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
+ if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
val = create_small_hash();
- else
+ val_ptr = &val;
+ } else if (is_percpu(opts->map_type)) {
+ val_ptr = blob;
+ } else {
val = rand();
- ret = bpf_map_update_elem(opts->map_fd, &i, &val, 0);
+ val_ptr = &val;
+ }
+
+ /* 2 seconds may be enough ? */
+ if (opts->retry_for_nomem)
+ ret = map_update_retriable(opts->map_fd, &i, val_ptr, 0,
+ 40, retry_for_nomem_fn);
+ else
+ ret = bpf_map_update_elem(opts->map_fd, &i, val_ptr, 0);
CHECK(ret < 0, "bpf_map_update_elem", "key=%d error: %s\n", i, strerror(errno));
if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
@@ -281,6 +307,13 @@ static void __test(int map_fd)
else
opts.n /= 2;
+ /* per-cpu bpf memory allocator may not be able to allocate per-cpu
+ * pointer successfully and it can not refill free llist timely, and
+ * bpf_map_update_elem() will return -ENOMEM. so just retry to mitigate
+ * the problem temporarily.
+ */
+ opts.retry_for_nomem = is_percpu(opts.map_type) && (info.map_flags & BPF_F_NO_PREALLOC);
+
/*
* Upsert keys [0, n) under some competition: with random values from
* N_THREADS threads. Check values, then delete all elements and check
@@ -326,20 +359,14 @@ static int map_create(__u32 type, const char *name, struct bpf_map_create_opts *
static int create_hash(void)
{
- struct bpf_map_create_opts map_opts = {
- .sz = sizeof(map_opts),
- .map_flags = BPF_F_NO_PREALLOC,
- };
+ LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = BPF_F_NO_PREALLOC);
return map_create(BPF_MAP_TYPE_HASH, "hash", &map_opts);
}
static int create_percpu_hash(void)
{
- struct bpf_map_create_opts map_opts = {
- .sz = sizeof(map_opts),
- .map_flags = BPF_F_NO_PREALLOC,
- };
+ LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = BPF_F_NO_PREALLOC);
return map_create(BPF_MAP_TYPE_PERCPU_HASH, "percpu_hash", &map_opts);
}
@@ -356,21 +383,17 @@ static int create_percpu_hash_prealloc(void)
static int create_lru_hash(__u32 type, __u32 map_flags)
{
- struct bpf_map_create_opts map_opts = {
- .sz = sizeof(map_opts),
- .map_flags = map_flags,
- };
+ LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = map_flags);
return map_create(type, "lru_hash", &map_opts);
}
static int create_hash_of_maps(void)
{
- struct bpf_map_create_opts map_opts = {
- .sz = sizeof(map_opts),
+ LIBBPF_OPTS(bpf_map_create_opts, map_opts,
.map_flags = BPF_F_NO_PREALLOC,
.inner_map_fd = create_small_hash(),
- };
+ );
int ret;
ret = map_create_opts(BPF_MAP_TYPE_HASH_OF_MAPS, "hash_of_maps",
diff --git a/tools/testing/selftests/bpf/netlink_helpers.c b/tools/testing/selftests/bpf/netlink_helpers.c
new file mode 100644
index 000000000000..caf36eb1d032
--- /dev/null
+++ b/tools/testing/selftests/bpf/netlink_helpers.c
@@ -0,0 +1,358 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Taken & modified from iproute2's libnetlink.c
+ * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/socket.h>
+
+#include "netlink_helpers.h"
+
+static int rcvbuf = 1024 * 1024;
+
+void rtnl_close(struct rtnl_handle *rth)
+{
+ if (rth->fd >= 0) {
+ close(rth->fd);
+ rth->fd = -1;
+ }
+}
+
+int rtnl_open_byproto(struct rtnl_handle *rth, unsigned int subscriptions,
+ int protocol)
+{
+ socklen_t addr_len;
+ int sndbuf = 32768;
+ int one = 1;
+
+ memset(rth, 0, sizeof(*rth));
+ rth->proto = protocol;
+ rth->fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, protocol);
+ if (rth->fd < 0) {
+ perror("Cannot open netlink socket");
+ return -1;
+ }
+ if (setsockopt(rth->fd, SOL_SOCKET, SO_SNDBUF,
+ &sndbuf, sizeof(sndbuf)) < 0) {
+ perror("SO_SNDBUF");
+ goto err;
+ }
+ if (setsockopt(rth->fd, SOL_SOCKET, SO_RCVBUF,
+ &rcvbuf, sizeof(rcvbuf)) < 0) {
+ perror("SO_RCVBUF");
+ goto err;
+ }
+
+ /* Older kernels may no support extended ACK reporting */
+ setsockopt(rth->fd, SOL_NETLINK, NETLINK_EXT_ACK,
+ &one, sizeof(one));
+
+ memset(&rth->local, 0, sizeof(rth->local));
+ rth->local.nl_family = AF_NETLINK;
+ rth->local.nl_groups = subscriptions;
+
+ if (bind(rth->fd, (struct sockaddr *)&rth->local,
+ sizeof(rth->local)) < 0) {
+ perror("Cannot bind netlink socket");
+ goto err;
+ }
+ addr_len = sizeof(rth->local);
+ if (getsockname(rth->fd, (struct sockaddr *)&rth->local,
+ &addr_len) < 0) {
+ perror("Cannot getsockname");
+ goto err;
+ }
+ if (addr_len != sizeof(rth->local)) {
+ fprintf(stderr, "Wrong address length %d\n", addr_len);
+ goto err;
+ }
+ if (rth->local.nl_family != AF_NETLINK) {
+ fprintf(stderr, "Wrong address family %d\n",
+ rth->local.nl_family);
+ goto err;
+ }
+ rth->seq = time(NULL);
+ return 0;
+err:
+ rtnl_close(rth);
+ return -1;
+}
+
+int rtnl_open(struct rtnl_handle *rth, unsigned int subscriptions)
+{
+ return rtnl_open_byproto(rth, subscriptions, NETLINK_ROUTE);
+}
+
+static int __rtnl_recvmsg(int fd, struct msghdr *msg, int flags)
+{
+ int len;
+
+ do {
+ len = recvmsg(fd, msg, flags);
+ } while (len < 0 && (errno == EINTR || errno == EAGAIN));
+ if (len < 0) {
+ fprintf(stderr, "netlink receive error %s (%d)\n",
+ strerror(errno), errno);
+ return -errno;
+ }
+ if (len == 0) {
+ fprintf(stderr, "EOF on netlink\n");
+ return -ENODATA;
+ }
+ return len;
+}
+
+static int rtnl_recvmsg(int fd, struct msghdr *msg, char **answer)
+{
+ struct iovec *iov = msg->msg_iov;
+ char *buf;
+ int len;
+
+ iov->iov_base = NULL;
+ iov->iov_len = 0;
+
+ len = __rtnl_recvmsg(fd, msg, MSG_PEEK | MSG_TRUNC);
+ if (len < 0)
+ return len;
+ if (len < 32768)
+ len = 32768;
+ buf = malloc(len);
+ if (!buf) {
+ fprintf(stderr, "malloc error: not enough buffer\n");
+ return -ENOMEM;
+ }
+ iov->iov_base = buf;
+ iov->iov_len = len;
+ len = __rtnl_recvmsg(fd, msg, 0);
+ if (len < 0) {
+ free(buf);
+ return len;
+ }
+ if (answer)
+ *answer = buf;
+ else
+ free(buf);
+ return len;
+}
+
+static void rtnl_talk_error(struct nlmsghdr *h, struct nlmsgerr *err,
+ nl_ext_ack_fn_t errfn)
+{
+ fprintf(stderr, "RTNETLINK answers: %s\n",
+ strerror(-err->error));
+}
+
+static int __rtnl_talk_iov(struct rtnl_handle *rtnl, struct iovec *iov,
+ size_t iovlen, struct nlmsghdr **answer,
+ bool show_rtnl_err, nl_ext_ack_fn_t errfn)
+{
+ struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
+ struct iovec riov;
+ struct msghdr msg = {
+ .msg_name = &nladdr,
+ .msg_namelen = sizeof(nladdr),
+ .msg_iov = iov,
+ .msg_iovlen = iovlen,
+ };
+ unsigned int seq = 0;
+ struct nlmsghdr *h;
+ int i, status;
+ char *buf;
+
+ for (i = 0; i < iovlen; i++) {
+ h = iov[i].iov_base;
+ h->nlmsg_seq = seq = ++rtnl->seq;
+ if (answer == NULL)
+ h->nlmsg_flags |= NLM_F_ACK;
+ }
+ status = sendmsg(rtnl->fd, &msg, 0);
+ if (status < 0) {
+ perror("Cannot talk to rtnetlink");
+ return -1;
+ }
+ /* change msg to use the response iov */
+ msg.msg_iov = &riov;
+ msg.msg_iovlen = 1;
+ i = 0;
+ while (1) {
+next:
+ status = rtnl_recvmsg(rtnl->fd, &msg, &buf);
+ ++i;
+ if (status < 0)
+ return status;
+ if (msg.msg_namelen != sizeof(nladdr)) {
+ fprintf(stderr,
+ "Sender address length == %d!\n",
+ msg.msg_namelen);
+ exit(1);
+ }
+ for (h = (struct nlmsghdr *)buf; status >= sizeof(*h); ) {
+ int len = h->nlmsg_len;
+ int l = len - sizeof(*h);
+
+ if (l < 0 || len > status) {
+ if (msg.msg_flags & MSG_TRUNC) {
+ fprintf(stderr, "Truncated message!\n");
+ free(buf);
+ return -1;
+ }
+ fprintf(stderr,
+ "Malformed message: len=%d!\n",
+ len);
+ exit(1);
+ }
+ if (nladdr.nl_pid != 0 ||
+ h->nlmsg_pid != rtnl->local.nl_pid ||
+ h->nlmsg_seq > seq || h->nlmsg_seq < seq - iovlen) {
+ /* Don't forget to skip that message. */
+ status -= NLMSG_ALIGN(len);
+ h = (struct nlmsghdr *)((char *)h + NLMSG_ALIGN(len));
+ continue;
+ }
+ if (h->nlmsg_type == NLMSG_ERROR) {
+ struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(h);
+ int error = err->error;
+
+ if (l < sizeof(struct nlmsgerr)) {
+ fprintf(stderr, "ERROR truncated\n");
+ free(buf);
+ return -1;
+ }
+ if (error) {
+ errno = -error;
+ if (rtnl->proto != NETLINK_SOCK_DIAG &&
+ show_rtnl_err)
+ rtnl_talk_error(h, err, errfn);
+ }
+ if (i < iovlen) {
+ free(buf);
+ goto next;
+ }
+ if (error) {
+ free(buf);
+ return -i;
+ }
+ if (answer)
+ *answer = (struct nlmsghdr *)buf;
+ else
+ free(buf);
+ return 0;
+ }
+ if (answer) {
+ *answer = (struct nlmsghdr *)buf;
+ return 0;
+ }
+ fprintf(stderr, "Unexpected reply!\n");
+ status -= NLMSG_ALIGN(len);
+ h = (struct nlmsghdr *)((char *)h + NLMSG_ALIGN(len));
+ }
+ free(buf);
+ if (msg.msg_flags & MSG_TRUNC) {
+ fprintf(stderr, "Message truncated!\n");
+ continue;
+ }
+ if (status) {
+ fprintf(stderr, "Remnant of size %d!\n", status);
+ exit(1);
+ }
+ }
+}
+
+static int __rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
+ struct nlmsghdr **answer, bool show_rtnl_err,
+ nl_ext_ack_fn_t errfn)
+{
+ struct iovec iov = {
+ .iov_base = n,
+ .iov_len = n->nlmsg_len,
+ };
+
+ return __rtnl_talk_iov(rtnl, &iov, 1, answer, show_rtnl_err, errfn);
+}
+
+int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
+ struct nlmsghdr **answer)
+{
+ return __rtnl_talk(rtnl, n, answer, true, NULL);
+}
+
+int addattr(struct nlmsghdr *n, int maxlen, int type)
+{
+ return addattr_l(n, maxlen, type, NULL, 0);
+}
+
+int addattr8(struct nlmsghdr *n, int maxlen, int type, __u8 data)
+{
+ return addattr_l(n, maxlen, type, &data, sizeof(__u8));
+}
+
+int addattr16(struct nlmsghdr *n, int maxlen, int type, __u16 data)
+{
+ return addattr_l(n, maxlen, type, &data, sizeof(__u16));
+}
+
+int addattr32(struct nlmsghdr *n, int maxlen, int type, __u32 data)
+{
+ return addattr_l(n, maxlen, type, &data, sizeof(__u32));
+}
+
+int addattr64(struct nlmsghdr *n, int maxlen, int type, __u64 data)
+{
+ return addattr_l(n, maxlen, type, &data, sizeof(__u64));
+}
+
+int addattrstrz(struct nlmsghdr *n, int maxlen, int type, const char *str)
+{
+ return addattr_l(n, maxlen, type, str, strlen(str)+1);
+}
+
+int addattr_l(struct nlmsghdr *n, int maxlen, int type, const void *data,
+ int alen)
+{
+ int len = RTA_LENGTH(alen);
+ struct rtattr *rta;
+
+ if (NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len) > maxlen) {
+ fprintf(stderr, "%s: Message exceeded bound of %d\n",
+ __func__, maxlen);
+ return -1;
+ }
+ rta = NLMSG_TAIL(n);
+ rta->rta_type = type;
+ rta->rta_len = len;
+ if (alen)
+ memcpy(RTA_DATA(rta), data, alen);
+ n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len);
+ return 0;
+}
+
+int addraw_l(struct nlmsghdr *n, int maxlen, const void *data, int len)
+{
+ if (NLMSG_ALIGN(n->nlmsg_len) + NLMSG_ALIGN(len) > maxlen) {
+ fprintf(stderr, "%s: Message exceeded bound of %d\n",
+ __func__, maxlen);
+ return -1;
+ }
+
+ memcpy(NLMSG_TAIL(n), data, len);
+ memset((void *) NLMSG_TAIL(n) + len, 0, NLMSG_ALIGN(len) - len);
+ n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + NLMSG_ALIGN(len);
+ return 0;
+}
+
+struct rtattr *addattr_nest(struct nlmsghdr *n, int maxlen, int type)
+{
+ struct rtattr *nest = NLMSG_TAIL(n);
+
+ addattr_l(n, maxlen, type, NULL, 0);
+ return nest;
+}
+
+int addattr_nest_end(struct nlmsghdr *n, struct rtattr *nest)
+{
+ nest->rta_len = (void *)NLMSG_TAIL(n) - (void *)nest;
+ return n->nlmsg_len;
+}
diff --git a/tools/testing/selftests/bpf/netlink_helpers.h b/tools/testing/selftests/bpf/netlink_helpers.h
new file mode 100644
index 000000000000..68116818a47e
--- /dev/null
+++ b/tools/testing/selftests/bpf/netlink_helpers.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef NETLINK_HELPERS_H
+#define NETLINK_HELPERS_H
+
+#include <string.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+
+struct rtnl_handle {
+ int fd;
+ struct sockaddr_nl local;
+ struct sockaddr_nl peer;
+ __u32 seq;
+ __u32 dump;
+ int proto;
+ FILE *dump_fp;
+#define RTNL_HANDLE_F_LISTEN_ALL_NSID 0x01
+#define RTNL_HANDLE_F_SUPPRESS_NLERR 0x02
+#define RTNL_HANDLE_F_STRICT_CHK 0x04
+ int flags;
+};
+
+#define NLMSG_TAIL(nmsg) \
+ ((struct rtattr *) (((void *) (nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len)))
+
+typedef int (*nl_ext_ack_fn_t)(const char *errmsg, uint32_t off,
+ const struct nlmsghdr *inner_nlh);
+
+int rtnl_open(struct rtnl_handle *rth, unsigned int subscriptions)
+ __attribute__((warn_unused_result));
+void rtnl_close(struct rtnl_handle *rth);
+int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
+ struct nlmsghdr **answer)
+ __attribute__((warn_unused_result));
+
+int addattr(struct nlmsghdr *n, int maxlen, int type);
+int addattr8(struct nlmsghdr *n, int maxlen, int type, __u8 data);
+int addattr16(struct nlmsghdr *n, int maxlen, int type, __u16 data);
+int addattr32(struct nlmsghdr *n, int maxlen, int type, __u32 data);
+int addattr64(struct nlmsghdr *n, int maxlen, int type, __u64 data);
+int addattrstrz(struct nlmsghdr *n, int maxlen, int type, const char *data);
+int addattr_l(struct nlmsghdr *n, int maxlen, int type, const void *data, int alen);
+int addraw_l(struct nlmsghdr *n, int maxlen, const void *data, int len);
+struct rtattr *addattr_nest(struct nlmsghdr *n, int maxlen, int type);
+int addattr_nest_end(struct nlmsghdr *n, struct rtattr *nest);
+#endif /* NETLINK_HELPERS_H */
diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c
index 465c1c3a3d3c..4ebd0da898f5 100644
--- a/tools/testing/selftests/bpf/prog_tests/align.c
+++ b/tools/testing/selftests/bpf/prog_tests/align.c
@@ -40,7 +40,7 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {0, "R1", "ctx(off=0,imm=0)"},
+ {0, "R1", "ctx()"},
{0, "R10", "fp0"},
{0, "R3_w", "2"},
{1, "R3_w", "4"},
@@ -68,7 +68,7 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {0, "R1", "ctx(off=0,imm=0)"},
+ {0, "R1", "ctx()"},
{0, "R10", "fp0"},
{0, "R3_w", "1"},
{1, "R3_w", "2"},
@@ -97,7 +97,7 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {0, "R1", "ctx(off=0,imm=0)"},
+ {0, "R1", "ctx()"},
{0, "R10", "fp0"},
{0, "R3_w", "4"},
{1, "R3_w", "8"},
@@ -119,7 +119,7 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {0, "R1", "ctx(off=0,imm=0)"},
+ {0, "R1", "ctx()"},
{0, "R10", "fp0"},
{0, "R3_w", "7"},
{1, "R3_w", "7"},
@@ -162,13 +162,13 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {6, "R0_w", "pkt(off=8,r=8,imm=0)"},
+ {6, "R0_w", "pkt(off=8,r=8)"},
{6, "R3_w", "var_off=(0x0; 0xff)"},
{7, "R3_w", "var_off=(0x0; 0x1fe)"},
{8, "R3_w", "var_off=(0x0; 0x3fc)"},
{9, "R3_w", "var_off=(0x0; 0x7f8)"},
{10, "R3_w", "var_off=(0x0; 0xff0)"},
- {12, "R3_w", "pkt_end(off=0,imm=0)"},
+ {12, "R3_w", "pkt_end()"},
{17, "R4_w", "var_off=(0x0; 0xff)"},
{18, "R4_w", "var_off=(0x0; 0x1fe0)"},
{19, "R4_w", "var_off=(0x0; 0xff0)"},
@@ -235,11 +235,11 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {2, "R5_w", "pkt(off=0,r=0,imm=0)"},
- {4, "R5_w", "pkt(off=14,r=0,imm=0)"},
- {5, "R4_w", "pkt(off=14,r=0,imm=0)"},
- {9, "R2", "pkt(off=0,r=18,imm=0)"},
- {10, "R5", "pkt(off=14,r=18,imm=0)"},
+ {2, "R5_w", "pkt(r=0)"},
+ {4, "R5_w", "pkt(off=14,r=0)"},
+ {5, "R4_w", "pkt(off=14,r=0)"},
+ {9, "R2", "pkt(r=18)"},
+ {10, "R5", "pkt(off=14,r=18)"},
{10, "R4_w", "var_off=(0x0; 0xff)"},
{13, "R4_w", "var_off=(0x0; 0xffff)"},
{14, "R4_w", "var_off=(0x0; 0xffff)"},
@@ -299,7 +299,7 @@ static struct bpf_align_test tests[] = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
- {6, "R2_w", "pkt(off=0,r=8,imm=0)"},
+ {6, "R2_w", "pkt(r=8)"},
{7, "R6_w", "var_off=(0x0; 0x3fc)"},
/* Offset is added to packet pointer R5, resulting in
* known fixed offset, and variable offset from R6.
@@ -337,7 +337,7 @@ static struct bpf_align_test tests[] = {
/* Constant offset is added to R5 packet pointer,
* resulting in reg->off value of 14.
*/
- {26, "R5_w", "pkt(off=14,r=8,"},
+ {26, "R5_w", "pkt(off=14,r=8)"},
/* Variable offset is added to R5, resulting in a
* variable offset of (4n). See comment for insn #18
* for R4 = R5 trick.
@@ -397,7 +397,7 @@ static struct bpf_align_test tests[] = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
- {6, "R2_w", "pkt(off=0,r=8,imm=0)"},
+ {6, "R2_w", "pkt(r=8)"},
{7, "R6_w", "var_off=(0x0; 0x3fc)"},
/* Adding 14 makes R6 be (4n+2) */
{8, "R6_w", "var_off=(0x2; 0x7fc)"},
@@ -459,7 +459,7 @@ static struct bpf_align_test tests[] = {
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
.matches = {
- {3, "R5_w", "pkt_end(off=0,imm=0)"},
+ {3, "R5_w", "pkt_end()"},
/* (ptr - ptr) << 2 == unknown, (4n) */
{5, "R5_w", "var_off=(0x0; 0xfffffffffffffffc)"},
/* (4n) + 14 == (4n+2). We blow our bounds, because
@@ -513,7 +513,7 @@ static struct bpf_align_test tests[] = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
- {6, "R2_w", "pkt(off=0,r=8,imm=0)"},
+ {6, "R2_w", "pkt(r=8)"},
{8, "R6_w", "var_off=(0x0; 0x3fc)"},
/* Adding 14 makes R6 be (4n+2) */
{9, "R6_w", "var_off=(0x2; 0x7fc)"},
@@ -566,7 +566,7 @@ static struct bpf_align_test tests[] = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
- {6, "R2_w", "pkt(off=0,r=8,imm=0)"},
+ {6, "R2_w", "pkt(r=8)"},
{9, "R6_w", "var_off=(0x0; 0x3c)"},
/* Adding 14 makes R6 be (4n+2) */
{10, "R6_w", "var_off=(0x2; 0x7c)"},
@@ -659,14 +659,14 @@ static int do_test_single(struct bpf_align_test *test)
/* Check the next line as well in case the previous line
* did not have a corresponding bpf insn. Example:
* func#0 @0
- * 0: R1=ctx(off=0,imm=0) R10=fp0
+ * 0: R1=ctx() R10=fp0
* 0: (b7) r3 = 2 ; R3_w=2
*
* Sometimes it's actually two lines below, e.g. when
* searching for "6: R3_w=scalar(umax=255,var_off=(0x0; 0xff))":
- * from 4 to 6: R0_w=pkt(off=8,r=8,imm=0) R1=ctx(off=0,imm=0) R2_w=pkt(off=0,r=8,imm=0) R3_w=pkt_end(off=0,imm=0) R10=fp0
- * 6: R0_w=pkt(off=8,r=8,imm=0) R1=ctx(off=0,imm=0) R2_w=pkt(off=0,r=8,imm=0) R3_w=pkt_end(off=0,imm=0) R10=fp0
- * 6: (71) r3 = *(u8 *)(r2 +0) ; R2_w=pkt(off=0,r=8,imm=0) R3_w=scalar(umax=255,var_off=(0x0; 0xff))
+ * from 4 to 6: R0_w=pkt(off=8,r=8) R1=ctx() R2_w=pkt(r=8) R3_w=pkt_end() R10=fp0
+ * 6: R0_w=pkt(off=8,r=8) R1=ctx() R2_w=pkt(r=8) R3_w=pkt_end() R10=fp0
+ * 6: (71) r3 = *(u8 *)(r2 +0) ; R2_w=pkt(r=8) R3_w=scalar(umax=255,var_off=(0x0; 0xff))
*/
while (!(p = strstr(line_ptr, m.reg)) || !strstr(p, m.match)) {
cur_line = -1;
diff --git a/tools/testing/selftests/bpf/prog_tests/bind_perm.c b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
index a1766a298bb7..f7cd129cb82b 100644
--- a/tools/testing/selftests/bpf/prog_tests/bind_perm.c
+++ b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
@@ -9,8 +9,6 @@
#include "cap_helpers.h"
#include "bind_perm.skel.h"
-static int duration;
-
static int create_netns(void)
{
if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
@@ -27,7 +25,7 @@ void try_bind(int family, int port, int expected_errno)
int fd = -1;
fd = socket(family, SOCK_STREAM, 0);
- if (CHECK(fd < 0, "fd", "errno %d", errno))
+ if (!ASSERT_GE(fd, 0, "socket"))
goto close_socket;
if (family == AF_INET) {
@@ -60,7 +58,7 @@ void test_bind_perm(void)
return;
cgroup_fd = test__join_cgroup("/bind_perm");
- if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno))
+ if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup"))
return;
skel = bind_perm__open_and_load();
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 41aba139b20b..618af9dfae9b 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -7,7 +7,7 @@
#include "bpf_iter_ipv6_route.skel.h"
#include "bpf_iter_netlink.skel.h"
#include "bpf_iter_bpf_map.skel.h"
-#include "bpf_iter_task.skel.h"
+#include "bpf_iter_tasks.skel.h"
#include "bpf_iter_task_stack.skel.h"
#include "bpf_iter_task_file.skel.h"
#include "bpf_iter_task_vmas.skel.h"
@@ -34,8 +34,6 @@
#include "bpf_iter_ksym.skel.h"
#include "bpf_iter_sockmap.skel.h"
-static int duration;
-
static void test_btf_id_or_null(void)
{
struct bpf_iter_test_kern3 *skel;
@@ -64,7 +62,7 @@ static void do_dummy_read_opts(struct bpf_program *prog, struct bpf_iter_attach_
/* not check contents, but ensure read() ends without error */
while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
;
- CHECK(len < 0, "read", "read failed: %s\n", strerror(errno));
+ ASSERT_GE(len, 0, "read");
close(iter_fd);
@@ -215,12 +213,12 @@ static void *do_nothing_wait(void *arg)
static void test_task_common_nocheck(struct bpf_iter_attach_opts *opts,
int *num_unknown, int *num_known)
{
- struct bpf_iter_task *skel;
+ struct bpf_iter_tasks *skel;
pthread_t thread_id;
void *ret;
- skel = bpf_iter_task__open_and_load();
- if (!ASSERT_OK_PTR(skel, "bpf_iter_task__open_and_load"))
+ skel = bpf_iter_tasks__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_tasks__open_and_load"))
return;
ASSERT_OK(pthread_mutex_lock(&do_nothing_mutex), "pthread_mutex_lock");
@@ -239,7 +237,7 @@ static void test_task_common_nocheck(struct bpf_iter_attach_opts *opts,
ASSERT_FALSE(pthread_join(thread_id, &ret) || ret != NULL,
"pthread_join");
- bpf_iter_task__destroy(skel);
+ bpf_iter_tasks__destroy(skel);
}
static void test_task_common(struct bpf_iter_attach_opts *opts, int num_unknown, int num_known)
@@ -307,10 +305,10 @@ static void test_task_pidfd(void)
static void test_task_sleepable(void)
{
- struct bpf_iter_task *skel;
+ struct bpf_iter_tasks *skel;
- skel = bpf_iter_task__open_and_load();
- if (!ASSERT_OK_PTR(skel, "bpf_iter_task__open_and_load"))
+ skel = bpf_iter_tasks__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_tasks__open_and_load"))
return;
do_dummy_read(skel->progs.dump_task_sleepable);
@@ -320,7 +318,7 @@ static void test_task_sleepable(void)
ASSERT_GT(skel->bss->num_success_copy_from_user_task, 0,
"num_success_copy_from_user_task");
- bpf_iter_task__destroy(skel);
+ bpf_iter_tasks__destroy(skel);
}
static void test_task_stack(void)
@@ -334,6 +332,8 @@ static void test_task_stack(void)
do_dummy_read(skel->progs.dump_task_stack);
do_dummy_read(skel->progs.get_task_user_stacks);
+ ASSERT_EQ(skel->bss->num_user_stacks, 1, "num_user_stacks");
+
bpf_iter_task_stack__destroy(skel);
}
@@ -413,7 +413,7 @@ static int do_btf_read(struct bpf_iter_task_btf *skel)
goto free_link;
}
- if (CHECK(err < 0, "read", "read failed: %s\n", strerror(errno)))
+ if (!ASSERT_GE(err, 0, "read"))
goto free_link;
ASSERT_HAS_SUBSTR(taskbuf, "(struct task_struct)",
@@ -526,11 +526,11 @@ static int do_read_with_fd(int iter_fd, const char *expected,
start = 0;
while ((len = read(iter_fd, buf + start, read_buf_len)) > 0) {
start += len;
- if (CHECK(start >= 16, "read", "read len %d\n", len))
+ if (!ASSERT_LT(start, 16, "read"))
return -1;
read_buf_len = read_one_char ? 1 : 16 - start;
}
- if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ if (!ASSERT_GE(len, 0, "read"))
return -1;
if (!ASSERT_STREQ(buf, expected, "read"))
@@ -571,8 +571,7 @@ static int do_read(const char *path, const char *expected)
int err, iter_fd;
iter_fd = open(path, O_RDONLY);
- if (CHECK(iter_fd < 0, "open", "open %s failed: %s\n",
- path, strerror(errno)))
+ if (!ASSERT_GE(iter_fd, 0, "open"))
return -1;
err = do_read_with_fd(iter_fd, expected, false);
@@ -600,7 +599,7 @@ static void test_file_iter(void)
unlink(path);
err = bpf_link__pin(link, path);
- if (CHECK(err, "pin_iter", "pin_iter to %s failed: %d\n", path, err))
+ if (!ASSERT_OK(err, "pin_iter"))
goto free_link;
err = do_read(path, "abcd");
@@ -651,12 +650,10 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
* overflow and needs restart.
*/
map1_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 8, 1, NULL);
- if (CHECK(map1_fd < 0, "bpf_map_create",
- "map_creation failed: %s\n", strerror(errno)))
+ if (!ASSERT_GE(map1_fd, 0, "bpf_map_create"))
goto out;
map2_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 8, 1, NULL);
- if (CHECK(map2_fd < 0, "bpf_map_create",
- "map_creation failed: %s\n", strerror(errno)))
+ if (!ASSERT_GE(map2_fd, 0, "bpf_map_create"))
goto free_map1;
/* bpf_seq_printf kernel buffer is 8 pages, so one map
@@ -685,14 +682,12 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
/* setup filtering map_id in bpf program */
map_info_len = sizeof(map_info);
err = bpf_map_get_info_by_fd(map1_fd, &map_info, &map_info_len);
- if (CHECK(err, "get_map_info", "get map info failed: %s\n",
- strerror(errno)))
+ if (!ASSERT_OK(err, "get_map_info"))
goto free_map2;
skel->bss->map1_id = map_info.id;
err = bpf_map_get_info_by_fd(map2_fd, &map_info, &map_info_len);
- if (CHECK(err, "get_map_info", "get map info failed: %s\n",
- strerror(errno)))
+ if (!ASSERT_OK(err, "get_map_info"))
goto free_map2;
skel->bss->map2_id = map_info.id;
@@ -705,7 +700,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
goto free_link;
buf = malloc(expected_read_len);
- if (!buf)
+ if (!ASSERT_OK_PTR(buf, "malloc"))
goto close_iter;
/* do read */
@@ -714,16 +709,14 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
while ((len = read(iter_fd, buf, expected_read_len)) > 0)
total_read_len += len;
- CHECK(len != -1 || errno != E2BIG, "read",
- "expected ret -1, errno E2BIG, but get ret %d, error %s\n",
- len, strerror(errno));
+ ASSERT_EQ(len, -1, "read");
+ ASSERT_EQ(errno, E2BIG, "read");
goto free_buf;
} else if (!ret1) {
while ((len = read(iter_fd, buf, expected_read_len)) > 0)
total_read_len += len;
- if (CHECK(len < 0, "read", "read failed: %s\n",
- strerror(errno)))
+ if (!ASSERT_GE(len, 0, "read"))
goto free_buf;
} else {
do {
@@ -732,8 +725,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
total_read_len += len;
} while (len > 0 || len == -EAGAIN);
- if (CHECK(len < 0, "read", "read failed: %s\n",
- strerror(errno)))
+ if (!ASSERT_GE(len, 0, "read"))
goto free_buf;
}
@@ -836,7 +828,7 @@ static void test_bpf_hash_map(void)
/* do some tests */
while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
;
- if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ if (!ASSERT_GE(len, 0, "read"))
goto close_iter;
/* test results */
@@ -878,6 +870,8 @@ static void test_bpf_percpu_hash_map(void)
skel->rodata->num_cpus = bpf_num_possible_cpus();
val = malloc(8 * bpf_num_possible_cpus());
+ if (!ASSERT_OK_PTR(val, "malloc"))
+ goto out;
err = bpf_iter_bpf_percpu_hash_map__load(skel);
if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_hash_map__load"))
@@ -917,7 +911,7 @@ static void test_bpf_percpu_hash_map(void)
/* do some tests */
while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
;
- if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ if (!ASSERT_GE(len, 0, "read"))
goto close_iter;
/* test results */
@@ -983,17 +977,14 @@ static void test_bpf_array_map(void)
start = 0;
while ((len = read(iter_fd, buf + start, sizeof(buf) - start)) > 0)
start += len;
- if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ if (!ASSERT_GE(len, 0, "read"))
goto close_iter;
/* test results */
res_first_key = *(__u32 *)buf;
res_first_val = *(__u64 *)(buf + sizeof(__u32));
- if (CHECK(res_first_key != 0 || res_first_val != first_val,
- "bpf_seq_write",
- "seq_write failure: first key %u vs expected 0, "
- " first value %llu vs expected %llu\n",
- res_first_key, res_first_val, first_val))
+ if (!ASSERT_EQ(res_first_key, 0, "bpf_seq_write") ||
+ !ASSERT_EQ(res_first_val, first_val, "bpf_seq_write"))
goto close_iter;
if (!ASSERT_EQ(skel->bss->key_sum, expected_key, "key_sum"))
@@ -1057,6 +1048,8 @@ static void test_bpf_percpu_array_map(void)
skel->rodata->num_cpus = bpf_num_possible_cpus();
val = malloc(8 * bpf_num_possible_cpus());
+ if (!ASSERT_OK_PTR(val, "malloc"))
+ goto out;
err = bpf_iter_bpf_percpu_array_map__load(skel);
if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_array_map__load"))
@@ -1092,7 +1085,7 @@ static void test_bpf_percpu_array_map(void)
/* do some tests */
while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
;
- if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ if (!ASSERT_GE(len, 0, "read"))
goto close_iter;
/* test results */
@@ -1131,6 +1124,7 @@ static void test_bpf_sk_storage_delete(void)
sock_fd = socket(AF_INET6, SOCK_STREAM, 0);
if (!ASSERT_GE(sock_fd, 0, "socket"))
goto out;
+
err = bpf_map_update_elem(map_fd, &sock_fd, &val, BPF_NOEXIST);
if (!ASSERT_OK(err, "map_update"))
goto out;
@@ -1151,14 +1145,19 @@ static void test_bpf_sk_storage_delete(void)
/* do some tests */
while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
;
- if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ if (!ASSERT_GE(len, 0, "read"))
goto close_iter;
/* test results */
err = bpf_map_lookup_elem(map_fd, &sock_fd, &val);
- if (CHECK(!err || errno != ENOENT, "bpf_map_lookup_elem",
- "map value wasn't deleted (err=%d, errno=%d)\n", err, errno))
- goto close_iter;
+
+ /* Note: The following assertions serve to ensure
+ * the value was deleted. It does so by asserting
+ * that bpf_map_lookup_elem has failed. This might
+ * seem counterintuitive at first.
+ */
+ ASSERT_ERR(err, "bpf_map_lookup_elem");
+ ASSERT_EQ(errno, ENOENT, "bpf_map_lookup_elem");
close_iter:
close(iter_fd);
@@ -1203,17 +1202,15 @@ static void test_bpf_sk_storage_get(void)
do_dummy_read(skel->progs.fill_socket_owner);
err = bpf_map_lookup_elem(map_fd, &sock_fd, &val);
- if (CHECK(err || val != getpid(), "bpf_map_lookup_elem",
- "map value wasn't set correctly (expected %d, got %d, err=%d)\n",
- getpid(), val, err))
+ if (!ASSERT_OK(err, "bpf_map_lookup_elem") ||
+ !ASSERT_EQ(val, getpid(), "bpf_map_lookup_elem"))
goto close_socket;
do_dummy_read(skel->progs.negate_socket_local_storage);
err = bpf_map_lookup_elem(map_fd, &sock_fd, &val);
- CHECK(err || val != -getpid(), "bpf_map_lookup_elem",
- "map value wasn't set correctly (expected %d, got %d, err=%d)\n",
- -getpid(), val, err);
+ ASSERT_OK(err, "bpf_map_lookup_elem");
+ ASSERT_EQ(val, -getpid(), "bpf_map_lookup_elem");
close_socket:
close(sock_fd);
@@ -1290,7 +1287,7 @@ static void test_bpf_sk_storage_map(void)
/* do some tests */
while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
;
- if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ if (!ASSERT_GE(len, 0, "read"))
goto close_iter;
/* test results */
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
index 675b90b15280..f09d6ac2ef09 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
@@ -25,7 +25,7 @@ void serial_test_bpf_obj_id(void)
*/
__u32 map_ids[nr_iters + 1];
char jited_insns[128], xlated_insns[128], zeros[128], tp_name[128];
- __u32 i, next_id, info_len, nr_id_found, duration = 0;
+ __u32 i, next_id, info_len, nr_id_found;
struct timespec real_time_ts, boot_time_ts;
int err = 0;
__u64 array_value;
@@ -33,16 +33,16 @@ void serial_test_bpf_obj_id(void)
time_t now, load_time;
err = bpf_prog_get_fd_by_id(0);
- CHECK(err >= 0 || errno != ENOENT,
- "get-fd-by-notexist-prog-id", "err %d errno %d\n", err, errno);
+ ASSERT_LT(err, 0, "bpf_prog_get_fd_by_id");
+ ASSERT_EQ(errno, ENOENT, "bpf_prog_get_fd_by_id");
err = bpf_map_get_fd_by_id(0);
- CHECK(err >= 0 || errno != ENOENT,
- "get-fd-by-notexist-map-id", "err %d errno %d\n", err, errno);
+ ASSERT_LT(err, 0, "bpf_map_get_fd_by_id");
+ ASSERT_EQ(errno, ENOENT, "bpf_map_get_fd_by_id");
err = bpf_link_get_fd_by_id(0);
- CHECK(err >= 0 || errno != ENOENT,
- "get-fd-by-notexist-link-id", "err %d errno %d\n", err, errno);
+ ASSERT_LT(err, 0, "bpf_map_get_fd_by_id");
+ ASSERT_EQ(errno, ENOENT, "bpf_map_get_fd_by_id");
/* Check bpf_map_get_info_by_fd() */
bzero(zeros, sizeof(zeros));
@@ -53,25 +53,26 @@ void serial_test_bpf_obj_id(void)
/* test_obj_id.o is a dumb prog. It should never fail
* to load.
*/
- if (CHECK_FAIL(err))
+ if (!ASSERT_OK(err, "bpf_prog_test_load"))
continue;
/* Insert a magic value to the map */
map_fds[i] = bpf_find_map(__func__, objs[i], "test_map_id");
- if (CHECK_FAIL(map_fds[i] < 0))
+ if (!ASSERT_GE(map_fds[i], 0, "bpf_find_map"))
goto done;
+
err = bpf_map_update_elem(map_fds[i], &array_key,
&array_magic_value, 0);
- if (CHECK_FAIL(err))
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
goto done;
- prog = bpf_object__find_program_by_name(objs[i],
- "test_obj_id");
- if (CHECK_FAIL(!prog))
+ prog = bpf_object__find_program_by_name(objs[i], "test_obj_id");
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
goto done;
+
links[i] = bpf_program__attach(prog);
err = libbpf_get_error(links[i]);
- if (CHECK(err, "prog_attach", "prog #%d, err %d\n", i, err)) {
+ if (!ASSERT_OK(err, "bpf_program__attach")) {
links[i] = NULL;
goto done;
}
@@ -81,24 +82,14 @@ void serial_test_bpf_obj_id(void)
bzero(&map_infos[i], info_len);
err = bpf_map_get_info_by_fd(map_fds[i], &map_infos[i],
&info_len);
- if (CHECK(err ||
- map_infos[i].type != BPF_MAP_TYPE_ARRAY ||
- map_infos[i].key_size != sizeof(__u32) ||
- map_infos[i].value_size != sizeof(__u64) ||
- map_infos[i].max_entries != 1 ||
- map_infos[i].map_flags != 0 ||
- info_len != sizeof(struct bpf_map_info) ||
- strcmp((char *)map_infos[i].name, expected_map_name),
- "get-map-info(fd)",
- "err %d errno %d type %d(%d) info_len %u(%zu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n",
- err, errno,
- map_infos[i].type, BPF_MAP_TYPE_ARRAY,
- info_len, sizeof(struct bpf_map_info),
- map_infos[i].key_size,
- map_infos[i].value_size,
- map_infos[i].max_entries,
- map_infos[i].map_flags,
- map_infos[i].name, expected_map_name))
+ if (!ASSERT_OK(err, "bpf_map_get_info_by_fd") ||
+ !ASSERT_EQ(map_infos[i].type, BPF_MAP_TYPE_ARRAY, "map_type") ||
+ !ASSERT_EQ(map_infos[i].key_size, sizeof(__u32), "key_size") ||
+ !ASSERT_EQ(map_infos[i].value_size, sizeof(__u64), "value_size") ||
+ !ASSERT_EQ(map_infos[i].max_entries, 1, "max_entries") ||
+ !ASSERT_EQ(map_infos[i].map_flags, 0, "map_flags") ||
+ !ASSERT_EQ(info_len, sizeof(struct bpf_map_info), "map_info_len") ||
+ !ASSERT_STREQ((char *)map_infos[i].name, expected_map_name, "map_name"))
goto done;
/* Check getting prog info */
@@ -112,48 +103,34 @@ void serial_test_bpf_obj_id(void)
prog_infos[i].xlated_prog_len = sizeof(xlated_insns);
prog_infos[i].map_ids = ptr_to_u64(map_ids + i);
prog_infos[i].nr_map_ids = 2;
+
err = clock_gettime(CLOCK_REALTIME, &real_time_ts);
- if (CHECK_FAIL(err))
+ if (!ASSERT_OK(err, "clock_gettime"))
goto done;
+
err = clock_gettime(CLOCK_BOOTTIME, &boot_time_ts);
- if (CHECK_FAIL(err))
+ if (!ASSERT_OK(err, "clock_gettime"))
goto done;
+
err = bpf_prog_get_info_by_fd(prog_fds[i], &prog_infos[i],
&info_len);
load_time = (real_time_ts.tv_sec - boot_time_ts.tv_sec)
+ (prog_infos[i].load_time / nsec_per_sec);
- if (CHECK(err ||
- prog_infos[i].type != BPF_PROG_TYPE_RAW_TRACEPOINT ||
- info_len != sizeof(struct bpf_prog_info) ||
- (env.jit_enabled && !prog_infos[i].jited_prog_len) ||
- (env.jit_enabled &&
- !memcmp(jited_insns, zeros, sizeof(zeros))) ||
- !prog_infos[i].xlated_prog_len ||
- !memcmp(xlated_insns, zeros, sizeof(zeros)) ||
- load_time < now - 60 || load_time > now + 60 ||
- prog_infos[i].created_by_uid != my_uid ||
- prog_infos[i].nr_map_ids != 1 ||
- *(int *)(long)prog_infos[i].map_ids != map_infos[i].id ||
- strcmp((char *)prog_infos[i].name, expected_prog_name),
- "get-prog-info(fd)",
- "err %d errno %d i %d type %d(%d) info_len %u(%zu) "
- "jit_enabled %d jited_prog_len %u xlated_prog_len %u "
- "jited_prog %d xlated_prog %d load_time %lu(%lu) "
- "uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) "
- "name %s(%s)\n",
- err, errno, i,
- prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER,
- info_len, sizeof(struct bpf_prog_info),
- env.jit_enabled,
- prog_infos[i].jited_prog_len,
- prog_infos[i].xlated_prog_len,
- !!memcmp(jited_insns, zeros, sizeof(zeros)),
- !!memcmp(xlated_insns, zeros, sizeof(zeros)),
- load_time, now,
- prog_infos[i].created_by_uid, my_uid,
- prog_infos[i].nr_map_ids, 1,
- *(int *)(long)prog_infos[i].map_ids, map_infos[i].id,
- prog_infos[i].name, expected_prog_name))
+
+ if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd") ||
+ !ASSERT_EQ(prog_infos[i].type, BPF_PROG_TYPE_RAW_TRACEPOINT, "prog_type") ||
+ !ASSERT_EQ(info_len, sizeof(struct bpf_prog_info), "prog_info_len") ||
+ !ASSERT_FALSE((env.jit_enabled && !prog_infos[i].jited_prog_len), "jited_prog_len") ||
+ !ASSERT_FALSE((env.jit_enabled && !memcmp(jited_insns, zeros, sizeof(zeros))),
+ "jited_insns") ||
+ !ASSERT_NEQ(prog_infos[i].xlated_prog_len, 0, "xlated_prog_len") ||
+ !ASSERT_NEQ(memcmp(xlated_insns, zeros, sizeof(zeros)), 0, "xlated_insns") ||
+ !ASSERT_GE(load_time, (now - 60), "load_time") ||
+ !ASSERT_LE(load_time, (now + 60), "load_time") ||
+ !ASSERT_EQ(prog_infos[i].created_by_uid, my_uid, "created_by_uid") ||
+ !ASSERT_EQ(prog_infos[i].nr_map_ids, 1, "nr_map_ids") ||
+ !ASSERT_EQ(*(int *)(long)prog_infos[i].map_ids, map_infos[i].id, "map_ids") ||
+ !ASSERT_STREQ((char *)prog_infos[i].name, expected_prog_name, "prog_name"))
goto done;
/* Check getting link info */
@@ -163,25 +140,12 @@ void serial_test_bpf_obj_id(void)
link_infos[i].raw_tracepoint.tp_name_len = sizeof(tp_name);
err = bpf_link_get_info_by_fd(bpf_link__fd(links[i]),
&link_infos[i], &info_len);
- if (CHECK(err ||
- link_infos[i].type != BPF_LINK_TYPE_RAW_TRACEPOINT ||
- link_infos[i].prog_id != prog_infos[i].id ||
- link_infos[i].raw_tracepoint.tp_name != ptr_to_u64(&tp_name) ||
- strcmp(u64_to_ptr(link_infos[i].raw_tracepoint.tp_name),
- "sys_enter") ||
- info_len != sizeof(struct bpf_link_info),
- "get-link-info(fd)",
- "err %d errno %d info_len %u(%zu) type %d(%d) id %d "
- "prog_id %d (%d) tp_name %s(%s)\n",
- err, errno,
- info_len, sizeof(struct bpf_link_info),
- link_infos[i].type, BPF_LINK_TYPE_RAW_TRACEPOINT,
- link_infos[i].id,
- link_infos[i].prog_id, prog_infos[i].id,
- (const char *)u64_to_ptr(link_infos[i].raw_tracepoint.tp_name),
- "sys_enter"))
+ if (!ASSERT_OK(err, "bpf_link_get_info_by_fd") ||
+ !ASSERT_EQ(link_infos[i].type, BPF_LINK_TYPE_RAW_TRACEPOINT, "link_type") ||
+ !ASSERT_EQ(link_infos[i].prog_id, prog_infos[i].id, "prog_id") ||
+ !ASSERT_EQ(link_infos[i].raw_tracepoint.tp_name, ptr_to_u64(&tp_name), "&tp_name") ||
+ !ASSERT_STREQ(u64_to_ptr(link_infos[i].raw_tracepoint.tp_name), "sys_enter", "tp_name"))
goto done;
-
}
/* Check bpf_prog_get_next_id() */
@@ -190,7 +154,7 @@ void serial_test_bpf_obj_id(void)
while (!bpf_prog_get_next_id(next_id, &next_id)) {
struct bpf_prog_info prog_info = {};
__u32 saved_map_id;
- int prog_fd;
+ int prog_fd, cmp_res;
info_len = sizeof(prog_info);
@@ -198,9 +162,7 @@ void serial_test_bpf_obj_id(void)
if (prog_fd < 0 && errno == ENOENT)
/* The bpf_prog is in the dead row */
continue;
- if (CHECK(prog_fd < 0, "get-prog-fd(next_id)",
- "prog_fd %d next_id %d errno %d\n",
- prog_fd, next_id, errno))
+ if (!ASSERT_GE(prog_fd, 0, "bpf_prog_get_fd_by_id"))
break;
for (i = 0; i < nr_iters; i++)
@@ -218,9 +180,8 @@ void serial_test_bpf_obj_id(void)
*/
prog_info.nr_map_ids = 1;
err = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &info_len);
- if (CHECK(!err || errno != EFAULT,
- "get-prog-fd-bad-nr-map-ids", "err %d errno %d(%d)",
- err, errno, EFAULT))
+ if (!ASSERT_ERR(err, "bpf_prog_get_info_by_fd") ||
+ !ASSERT_EQ(errno, EFAULT, "bpf_prog_get_info_by_fd"))
break;
bzero(&prog_info, sizeof(prog_info));
info_len = sizeof(prog_info);
@@ -231,27 +192,22 @@ void serial_test_bpf_obj_id(void)
err = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &info_len);
prog_infos[i].jited_prog_insns = 0;
prog_infos[i].xlated_prog_insns = 0;
- CHECK(err || info_len != sizeof(struct bpf_prog_info) ||
- memcmp(&prog_info, &prog_infos[i], info_len) ||
- *(int *)(long)prog_info.map_ids != saved_map_id,
- "get-prog-info(next_id->fd)",
- "err %d errno %d info_len %u(%zu) memcmp %d map_id %u(%u)\n",
- err, errno, info_len, sizeof(struct bpf_prog_info),
- memcmp(&prog_info, &prog_infos[i], info_len),
- *(int *)(long)prog_info.map_ids, saved_map_id);
+ cmp_res = memcmp(&prog_info, &prog_infos[i], info_len);
+
+ ASSERT_OK(err, "bpf_prog_get_info_by_fd");
+ ASSERT_EQ(info_len, sizeof(struct bpf_prog_info), "prog_info_len");
+ ASSERT_OK(cmp_res, "memcmp");
+ ASSERT_EQ(*(int *)(long)prog_info.map_ids, saved_map_id, "map_id");
close(prog_fd);
}
- CHECK(nr_id_found != nr_iters,
- "check total prog id found by get_next_id",
- "nr_id_found %u(%u)\n",
- nr_id_found, nr_iters);
+ ASSERT_EQ(nr_id_found, nr_iters, "prog_nr_id_found");
/* Check bpf_map_get_next_id() */
nr_id_found = 0;
next_id = 0;
while (!bpf_map_get_next_id(next_id, &next_id)) {
struct bpf_map_info map_info = {};
- int map_fd;
+ int map_fd, cmp_res;
info_len = sizeof(map_info);
@@ -259,9 +215,7 @@ void serial_test_bpf_obj_id(void)
if (map_fd < 0 && errno == ENOENT)
/* The bpf_map is in the dead row */
continue;
- if (CHECK(map_fd < 0, "get-map-fd(next_id)",
- "map_fd %d next_id %u errno %d\n",
- map_fd, next_id, errno))
+ if (!ASSERT_GE(map_fd, 0, "bpf_map_get_fd_by_id"))
break;
for (i = 0; i < nr_iters; i++)
@@ -274,25 +228,19 @@ void serial_test_bpf_obj_id(void)
nr_id_found++;
err = bpf_map_lookup_elem(map_fd, &array_key, &array_value);
- if (CHECK_FAIL(err))
+ if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
goto done;
err = bpf_map_get_info_by_fd(map_fd, &map_info, &info_len);
- CHECK(err || info_len != sizeof(struct bpf_map_info) ||
- memcmp(&map_info, &map_infos[i], info_len) ||
- array_value != array_magic_value,
- "check get-map-info(next_id->fd)",
- "err %d errno %d info_len %u(%zu) memcmp %d array_value %llu(%llu)\n",
- err, errno, info_len, sizeof(struct bpf_map_info),
- memcmp(&map_info, &map_infos[i], info_len),
- array_value, array_magic_value);
+ cmp_res = memcmp(&map_info, &map_infos[i], info_len);
+ ASSERT_OK(err, "bpf_map_get_info_by_fd");
+ ASSERT_EQ(info_len, sizeof(struct bpf_map_info), "info_len");
+ ASSERT_OK(cmp_res, "memcmp");
+ ASSERT_EQ(array_value, array_magic_value, "array_value");
close(map_fd);
}
- CHECK(nr_id_found != nr_iters,
- "check total map id found by get_next_id",
- "nr_id_found %u(%u)\n",
- nr_id_found, nr_iters);
+ ASSERT_EQ(nr_id_found, nr_iters, "map_nr_id_found");
/* Check bpf_link_get_next_id() */
nr_id_found = 0;
@@ -308,9 +256,7 @@ void serial_test_bpf_obj_id(void)
if (link_fd < 0 && errno == ENOENT)
/* The bpf_link is in the dead row */
continue;
- if (CHECK(link_fd < 0, "get-link-fd(next_id)",
- "link_fd %d next_id %u errno %d\n",
- link_fd, next_id, errno))
+ if (!ASSERT_GE(link_fd, 0, "bpf_link_get_fd_by_id"))
break;
for (i = 0; i < nr_iters; i++)
@@ -325,17 +271,13 @@ void serial_test_bpf_obj_id(void)
err = bpf_link_get_info_by_fd(link_fd, &link_info, &info_len);
cmp_res = memcmp(&link_info, &link_infos[i],
offsetof(struct bpf_link_info, raw_tracepoint));
- CHECK(err || info_len != sizeof(link_info) || cmp_res,
- "check get-link-info(next_id->fd)",
- "err %d errno %d info_len %u(%zu) memcmp %d\n",
- err, errno, info_len, sizeof(struct bpf_link_info),
- cmp_res);
+ ASSERT_OK(err, "bpf_link_get_info_by_fd");
+ ASSERT_EQ(info_len, sizeof(link_info), "info_len");
+ ASSERT_OK(cmp_res, "memcmp");
close(link_fd);
}
- CHECK(nr_id_found != nr_iters,
- "check total link id found by get_next_id",
- "nr_id_found %u(%u)\n", nr_id_found, nr_iters);
+ ASSERT_EQ(nr_id_found, nr_iters, "link_nr_id_found");
done:
for (i = 0; i < nr_iters; i++) {
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
index 4aabeaa525d4..a88e6e07e4f5 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
@@ -20,15 +20,14 @@
static const unsigned int total_bytes = 10 * 1024 * 1024;
static int expected_stg = 0xeB9F;
-static int stop, duration;
+static int stop;
static int settcpca(int fd, const char *tcp_ca)
{
int err;
err = setsockopt(fd, IPPROTO_TCP, TCP_CONGESTION, tcp_ca, strlen(tcp_ca));
- if (CHECK(err == -1, "setsockopt(fd, TCP_CONGESTION)", "errno:%d\n",
- errno))
+ if (!ASSERT_NEQ(err, -1, "setsockopt"))
return -1;
return 0;
@@ -65,8 +64,7 @@ static void *server(void *arg)
bytes += nr_sent;
}
- CHECK(bytes != total_bytes, "send", "%zd != %u nr_sent:%zd errno:%d\n",
- bytes, total_bytes, nr_sent, errno);
+ ASSERT_EQ(bytes, total_bytes, "send");
done:
if (fd >= 0)
@@ -92,10 +90,11 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
WRITE_ONCE(stop, 0);
lfd = socket(AF_INET6, SOCK_STREAM, 0);
- if (CHECK(lfd == -1, "socket", "errno:%d\n", errno))
+ if (!ASSERT_NEQ(lfd, -1, "socket"))
return;
+
fd = socket(AF_INET6, SOCK_STREAM, 0);
- if (CHECK(fd == -1, "socket", "errno:%d\n", errno)) {
+ if (!ASSERT_NEQ(fd, -1, "socket")) {
close(lfd);
return;
}
@@ -108,26 +107,27 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
sa6.sin6_family = AF_INET6;
sa6.sin6_addr = in6addr_loopback;
err = bind(lfd, (struct sockaddr *)&sa6, addrlen);
- if (CHECK(err == -1, "bind", "errno:%d\n", errno))
+ if (!ASSERT_NEQ(err, -1, "bind"))
goto done;
+
err = getsockname(lfd, (struct sockaddr *)&sa6, &addrlen);
- if (CHECK(err == -1, "getsockname", "errno:%d\n", errno))
+ if (!ASSERT_NEQ(err, -1, "getsockname"))
goto done;
+
err = listen(lfd, 1);
- if (CHECK(err == -1, "listen", "errno:%d\n", errno))
+ if (!ASSERT_NEQ(err, -1, "listen"))
goto done;
if (sk_stg_map) {
err = bpf_map_update_elem(bpf_map__fd(sk_stg_map), &fd,
&expected_stg, BPF_NOEXIST);
- if (CHECK(err, "bpf_map_update_elem(sk_stg_map)",
- "err:%d errno:%d\n", err, errno))
+ if (!ASSERT_OK(err, "bpf_map_update_elem(sk_stg_map)"))
goto done;
}
/* connect to server */
err = connect(fd, (struct sockaddr *)&sa6, addrlen);
- if (CHECK(err == -1, "connect", "errno:%d\n", errno))
+ if (!ASSERT_NEQ(err, -1, "connect"))
goto done;
if (sk_stg_map) {
@@ -135,14 +135,13 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
err = bpf_map_lookup_elem(bpf_map__fd(sk_stg_map), &fd,
&tmp_stg);
- if (CHECK(!err || errno != ENOENT,
- "bpf_map_lookup_elem(sk_stg_map)",
- "err:%d errno:%d\n", err, errno))
+ if (!ASSERT_ERR(err, "bpf_map_lookup_elem(sk_stg_map)") ||
+ !ASSERT_EQ(errno, ENOENT, "bpf_map_lookup_elem(sk_stg_map)"))
goto done;
}
err = pthread_create(&srv_thread, NULL, server, (void *)(long)lfd);
- if (CHECK(err != 0, "pthread_create", "err:%d errno:%d\n", err, errno))
+ if (!ASSERT_OK(err, "pthread_create"))
goto done;
/* recv total_bytes */
@@ -156,13 +155,12 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
bytes += nr_recv;
}
- CHECK(bytes != total_bytes, "recv", "%zd != %u nr_recv:%zd errno:%d\n",
- bytes, total_bytes, nr_recv, errno);
+ ASSERT_EQ(bytes, total_bytes, "recv");
WRITE_ONCE(stop, 1);
pthread_join(srv_thread, &thread_ret);
- CHECK(IS_ERR(thread_ret), "pthread_join", "thread_ret:%ld",
- PTR_ERR(thread_ret));
+ ASSERT_OK(IS_ERR(thread_ret), "thread_ret");
+
done:
close(lfd);
close(fd);
@@ -174,7 +172,7 @@ static void test_cubic(void)
struct bpf_link *link;
cubic_skel = bpf_cubic__open_and_load();
- if (CHECK(!cubic_skel, "bpf_cubic__open_and_load", "failed\n"))
+ if (!ASSERT_OK_PTR(cubic_skel, "bpf_cubic__open_and_load"))
return;
link = bpf_map__attach_struct_ops(cubic_skel->maps.cubic);
@@ -197,7 +195,7 @@ static void test_dctcp(void)
struct bpf_link *link;
dctcp_skel = bpf_dctcp__open_and_load();
- if (CHECK(!dctcp_skel, "bpf_dctcp__open_and_load", "failed\n"))
+ if (!ASSERT_OK_PTR(dctcp_skel, "bpf_dctcp__open_and_load"))
return;
link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp);
@@ -207,9 +205,7 @@ static void test_dctcp(void)
}
do_test("bpf_dctcp", dctcp_skel->maps.sk_stg_map);
- CHECK(dctcp_skel->bss->stg_result != expected_stg,
- "Unexpected stg_result", "stg_result (%x) != expected_stg (%x)\n",
- dctcp_skel->bss->stg_result, expected_stg);
+ ASSERT_EQ(dctcp_skel->bss->stg_result, expected_stg, "stg_result");
bpf_link__destroy(link);
bpf_dctcp__destroy(dctcp_skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
index 731c343897d8..e770912fc1d2 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
@@ -35,7 +35,7 @@ static int check_load(const char *file, enum bpf_prog_type type)
}
bpf_program__set_type(prog, type);
- bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32);
+ bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS);
bpf_program__set_log_level(prog, 4 | extra_prog_load_log_flags);
err = bpf_object__load(obj);
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 92d51f377fe5..8fb4a04fbbc0 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -5265,6 +5265,7 @@ static size_t get_pprint_mapv_size(enum pprint_mapv_kind_t mapv_kind)
#endif
assert(0);
+ return 0;
}
static void set_pprint_mapv(enum pprint_mapv_kind_t mapv_kind,
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c b/tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c
new file mode 100644
index 000000000000..74d6d7546f40
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Yafang Shao <laoar.shao@gmail.com> */
+
+#include <sys/types.h>
+#include <unistd.h>
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+#include "test_cgroup1_hierarchy.skel.h"
+
+static void bpf_cgroup1(struct test_cgroup1_hierarchy *skel)
+{
+ struct bpf_link *lsm_link, *fentry_link;
+ int err;
+
+ /* Attach LSM prog first */
+ lsm_link = bpf_program__attach_lsm(skel->progs.lsm_run);
+ if (!ASSERT_OK_PTR(lsm_link, "lsm_attach"))
+ return;
+
+ /* LSM prog will be triggered when attaching fentry */
+ fentry_link = bpf_program__attach_trace(skel->progs.fentry_run);
+ ASSERT_NULL(fentry_link, "fentry_attach_fail");
+
+ err = bpf_link__destroy(lsm_link);
+ ASSERT_OK(err, "destroy_lsm");
+}
+
+static void bpf_cgroup1_sleepable(struct test_cgroup1_hierarchy *skel)
+{
+ struct bpf_link *lsm_link, *fentry_link;
+ int err;
+
+ /* Attach LSM prog first */
+ lsm_link = bpf_program__attach_lsm(skel->progs.lsm_s_run);
+ if (!ASSERT_OK_PTR(lsm_link, "lsm_attach"))
+ return;
+
+ /* LSM prog will be triggered when attaching fentry */
+ fentry_link = bpf_program__attach_trace(skel->progs.fentry_run);
+ ASSERT_NULL(fentry_link, "fentry_attach_fail");
+
+ err = bpf_link__destroy(lsm_link);
+ ASSERT_OK(err, "destroy_lsm");
+}
+
+static void bpf_cgroup1_invalid_id(struct test_cgroup1_hierarchy *skel)
+{
+ struct bpf_link *lsm_link, *fentry_link;
+ int err;
+
+ /* Attach LSM prog first */
+ lsm_link = bpf_program__attach_lsm(skel->progs.lsm_run);
+ if (!ASSERT_OK_PTR(lsm_link, "lsm_attach"))
+ return;
+
+ /* LSM prog will be triggered when attaching fentry */
+ fentry_link = bpf_program__attach_trace(skel->progs.fentry_run);
+ if (!ASSERT_OK_PTR(fentry_link, "fentry_attach_success"))
+ goto cleanup;
+
+ err = bpf_link__destroy(fentry_link);
+ ASSERT_OK(err, "destroy_lsm");
+
+cleanup:
+ err = bpf_link__destroy(lsm_link);
+ ASSERT_OK(err, "destroy_fentry");
+}
+
+void test_cgroup1_hierarchy(void)
+{
+ struct test_cgroup1_hierarchy *skel;
+ __u64 current_cgid;
+ int hid, err;
+
+ skel = test_cgroup1_hierarchy__open();
+ if (!ASSERT_OK_PTR(skel, "open"))
+ return;
+
+ skel->bss->target_pid = getpid();
+
+ err = bpf_program__set_attach_target(skel->progs.fentry_run, 0, "bpf_fentry_test1");
+ if (!ASSERT_OK(err, "fentry_set_target"))
+ goto destroy;
+
+ err = test_cgroup1_hierarchy__load(skel);
+ if (!ASSERT_OK(err, "load"))
+ goto destroy;
+
+ /* Setup cgroup1 hierarchy */
+ err = setup_classid_environment();
+ if (!ASSERT_OK(err, "setup_classid_environment"))
+ goto destroy;
+
+ err = join_classid();
+ if (!ASSERT_OK(err, "join_cgroup1"))
+ goto cleanup;
+
+ current_cgid = get_classid_cgroup_id();
+ if (!ASSERT_GE(current_cgid, 0, "cgroup1 id"))
+ goto cleanup;
+
+ hid = get_cgroup1_hierarchy_id("net_cls");
+ if (!ASSERT_GE(hid, 0, "cgroup1 id"))
+ goto cleanup;
+ skel->bss->target_hid = hid;
+
+ if (test__start_subtest("test_cgroup1_hierarchy")) {
+ skel->bss->target_ancestor_cgid = current_cgid;
+ bpf_cgroup1(skel);
+ }
+
+ if (test__start_subtest("test_root_cgid")) {
+ skel->bss->target_ancestor_cgid = 1;
+ skel->bss->target_ancestor_level = 0;
+ bpf_cgroup1(skel);
+ }
+
+ if (test__start_subtest("test_invalid_level")) {
+ skel->bss->target_ancestor_cgid = 1;
+ skel->bss->target_ancestor_level = 1;
+ bpf_cgroup1_invalid_id(skel);
+ }
+
+ if (test__start_subtest("test_invalid_cgid")) {
+ skel->bss->target_ancestor_cgid = 0;
+ bpf_cgroup1_invalid_id(skel);
+ }
+
+ if (test__start_subtest("test_invalid_hid")) {
+ skel->bss->target_ancestor_cgid = 1;
+ skel->bss->target_ancestor_level = 0;
+ skel->bss->target_hid = -1;
+ bpf_cgroup1_invalid_id(skel);
+ }
+
+ if (test__start_subtest("test_invalid_cgrp_name")) {
+ skel->bss->target_hid = get_cgroup1_hierarchy_id("net_cl");
+ skel->bss->target_ancestor_cgid = current_cgid;
+ bpf_cgroup1_invalid_id(skel);
+ }
+
+ if (test__start_subtest("test_invalid_cgrp_name2")) {
+ skel->bss->target_hid = get_cgroup1_hierarchy_id("net_cls,");
+ skel->bss->target_ancestor_cgid = current_cgid;
+ bpf_cgroup1_invalid_id(skel);
+ }
+
+ if (test__start_subtest("test_sleepable_prog")) {
+ skel->bss->target_hid = hid;
+ skel->bss->target_ancestor_cgid = current_cgid;
+ bpf_cgroup1_sleepable(skel);
+ }
+
+cleanup:
+ cleanup_classid_environment();
+destroy:
+ test_cgroup1_hierarchy__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c b/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c
index e02feb5fae97..574d9a0cdc8e 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c
@@ -4,6 +4,7 @@
#include <test_progs.h>
#include <bpf/libbpf.h>
#include <bpf/btf.h>
+#include "iters_css_task.skel.h"
#include "cgroup_iter.skel.h"
#include "cgroup_helpers.h"
@@ -263,6 +264,35 @@ close_cgrp:
close(cgrp_fd);
}
+static void test_walk_self_only_css_task(void)
+{
+ struct iters_css_task *skel;
+ int err;
+
+ skel = iters_css_task__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ bpf_program__set_autoload(skel->progs.cgroup_id_printer, true);
+
+ err = iters_css_task__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ err = join_cgroup(cg_path[CHILD2]);
+ if (!ASSERT_OK(err, "join_cgroup"))
+ goto cleanup;
+
+ skel->bss->target_pid = getpid();
+ snprintf(expected_output, sizeof(expected_output),
+ PROLOGUE "%8llu\n" EPILOGUE, cg_id[CHILD2]);
+ read_from_cgroup_iter(skel->progs.cgroup_id_printer, cg_fd[CHILD2],
+ BPF_CGROUP_ITER_SELF_ONLY, "test_walk_self_only_css_task");
+ ASSERT_EQ(skel->bss->css_task_cnt, 1, "css_task_cnt");
+cleanup:
+ iters_css_task__destroy(skel);
+}
+
void test_cgroup_iter(void)
{
struct cgroup_iter *skel = NULL;
@@ -293,6 +323,9 @@ void test_cgroup_iter(void)
test_walk_self_only(skel);
if (test__start_subtest("cgroup_iter__dead_self_only"))
test_walk_dead_self_only(skel);
+ if (test__start_subtest("cgroup_iter__self_only_css_task"))
+ test_walk_self_only_css_task();
+
out:
cgroup_iter__destroy(skel);
cleanup_cgroups();
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
index 9026b42914d3..addf720428f7 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
@@ -71,7 +71,7 @@ void test_cgroup_v1v2(void)
}
ASSERT_OK(run_test(cgroup_fd, server_fd, false), "cgroup-v2-only");
setup_classid_environment();
- set_classid(42);
+ set_classid();
ASSERT_OK(run_test(cgroup_fd, server_fd, true), "cgroup-v1v2");
cleanup_classid_environment();
close(server_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/iters.c b/tools/testing/selftests/bpf/prog_tests/iters.c
index b696873c5455..bf84d4a1d9ae 100644
--- a/tools/testing/selftests/bpf/prog_tests/iters.c
+++ b/tools/testing/selftests/bpf/prog_tests/iters.c
@@ -1,7 +1,14 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+#include <sys/syscall.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <malloc.h>
+#include <stdlib.h>
#include <test_progs.h>
+#include "cgroup_helpers.h"
#include "iters.skel.h"
#include "iters_state_safety.skel.h"
@@ -9,6 +16,10 @@
#include "iters_num.skel.h"
#include "iters_testmod_seq.skel.h"
#include "iters_task_vma.skel.h"
+#include "iters_task.skel.h"
+#include "iters_css_task.skel.h"
+#include "iters_css.skel.h"
+#include "iters_task_failure.skel.h"
static void subtest_num_iters(void)
{
@@ -146,11 +157,144 @@ cleanup:
iters_task_vma__destroy(skel);
}
+static pthread_mutex_t do_nothing_mutex;
+
+static void *do_nothing_wait(void *arg)
+{
+ pthread_mutex_lock(&do_nothing_mutex);
+ pthread_mutex_unlock(&do_nothing_mutex);
+
+ pthread_exit(arg);
+}
+
+#define thread_num 2
+
+static void subtest_task_iters(void)
+{
+ struct iters_task *skel = NULL;
+ pthread_t thread_ids[thread_num];
+ void *ret;
+ int err;
+
+ skel = iters_task__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ goto cleanup;
+ skel->bss->target_pid = getpid();
+ err = iters_task__attach(skel);
+ if (!ASSERT_OK(err, "iters_task__attach"))
+ goto cleanup;
+ pthread_mutex_lock(&do_nothing_mutex);
+ for (int i = 0; i < thread_num; i++)
+ ASSERT_OK(pthread_create(&thread_ids[i], NULL, &do_nothing_wait, NULL),
+ "pthread_create");
+
+ syscall(SYS_getpgid);
+ iters_task__detach(skel);
+ ASSERT_EQ(skel->bss->procs_cnt, 1, "procs_cnt");
+ ASSERT_EQ(skel->bss->threads_cnt, thread_num + 1, "threads_cnt");
+ ASSERT_EQ(skel->bss->proc_threads_cnt, thread_num + 1, "proc_threads_cnt");
+ pthread_mutex_unlock(&do_nothing_mutex);
+ for (int i = 0; i < thread_num; i++)
+ ASSERT_OK(pthread_join(thread_ids[i], &ret), "pthread_join");
+cleanup:
+ iters_task__destroy(skel);
+}
+
+extern int stack_mprotect(void);
+
+static void subtest_css_task_iters(void)
+{
+ struct iters_css_task *skel = NULL;
+ int err, cg_fd, cg_id;
+ const char *cgrp_path = "/cg1";
+
+ err = setup_cgroup_environment();
+ if (!ASSERT_OK(err, "setup_cgroup_environment"))
+ goto cleanup;
+ cg_fd = create_and_get_cgroup(cgrp_path);
+ if (!ASSERT_GE(cg_fd, 0, "create_and_get_cgroup"))
+ goto cleanup;
+ cg_id = get_cgroup_id(cgrp_path);
+ err = join_cgroup(cgrp_path);
+ if (!ASSERT_OK(err, "join_cgroup"))
+ goto cleanup;
+
+ skel = iters_css_task__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ goto cleanup;
+
+ skel->bss->target_pid = getpid();
+ skel->bss->cg_id = cg_id;
+ err = iters_css_task__attach(skel);
+ if (!ASSERT_OK(err, "iters_task__attach"))
+ goto cleanup;
+ err = stack_mprotect();
+ if (!ASSERT_EQ(err, -1, "stack_mprotect") ||
+ !ASSERT_EQ(errno, EPERM, "stack_mprotect"))
+ goto cleanup;
+ iters_css_task__detach(skel);
+ ASSERT_EQ(skel->bss->css_task_cnt, 1, "css_task_cnt");
+
+cleanup:
+ cleanup_cgroup_environment();
+ iters_css_task__destroy(skel);
+}
+
+static void subtest_css_iters(void)
+{
+ struct iters_css *skel = NULL;
+ struct {
+ const char *path;
+ int fd;
+ } cgs[] = {
+ { "/cg1" },
+ { "/cg1/cg2" },
+ { "/cg1/cg2/cg3" },
+ { "/cg1/cg2/cg3/cg4" },
+ };
+ int err, cg_nr = ARRAY_SIZE(cgs);
+ int i;
+
+ err = setup_cgroup_environment();
+ if (!ASSERT_OK(err, "setup_cgroup_environment"))
+ goto cleanup;
+ for (i = 0; i < cg_nr; i++) {
+ cgs[i].fd = create_and_get_cgroup(cgs[i].path);
+ if (!ASSERT_GE(cgs[i].fd, 0, "create_and_get_cgroup"))
+ goto cleanup;
+ }
+
+ skel = iters_css__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ goto cleanup;
+
+ skel->bss->target_pid = getpid();
+ skel->bss->root_cg_id = get_cgroup_id(cgs[0].path);
+ skel->bss->leaf_cg_id = get_cgroup_id(cgs[cg_nr - 1].path);
+ err = iters_css__attach(skel);
+
+ if (!ASSERT_OK(err, "iters_task__attach"))
+ goto cleanup;
+
+ syscall(SYS_getpgid);
+ ASSERT_EQ(skel->bss->pre_order_cnt, cg_nr, "pre_order_cnt");
+ ASSERT_EQ(skel->bss->first_cg_id, get_cgroup_id(cgs[0].path), "first_cg_id");
+
+ ASSERT_EQ(skel->bss->post_order_cnt, cg_nr, "post_order_cnt");
+ ASSERT_EQ(skel->bss->last_cg_id, get_cgroup_id(cgs[0].path), "last_cg_id");
+ ASSERT_EQ(skel->bss->tree_high, cg_nr - 1, "tree_high");
+ iters_css__detach(skel);
+cleanup:
+ cleanup_cgroup_environment();
+ iters_css__destroy(skel);
+}
+
void test_iters(void)
{
RUN_TESTS(iters_state_safety);
RUN_TESTS(iters_looping);
RUN_TESTS(iters);
+ RUN_TESTS(iters_css_task);
if (env.has_testmod)
RUN_TESTS(iters_testmod_seq);
@@ -161,4 +305,11 @@ void test_iters(void)
subtest_testmod_seq_iters();
if (test__start_subtest("task_vma"))
subtest_task_vma_iters();
+ if (test__start_subtest("task"))
+ subtest_task_iters();
+ if (test__start_subtest("css_task"))
+ subtest_css_task_iters();
+ if (test__start_subtest("css"))
+ subtest_css_iters();
+ RUN_TESTS(iters_task_failure);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c
index 69dc31383b78..2fb89de63bd2 100644
--- a/tools/testing/selftests/bpf/prog_tests/linked_list.c
+++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c
@@ -94,14 +94,8 @@ static struct {
{ "incorrect_head_var_off2", "variable ptr_ access var_off=(0x0; 0xffffffff) disallowed" },
{ "incorrect_head_off1", "bpf_list_head not found at offset=25" },
{ "incorrect_head_off2", "bpf_list_head not found at offset=1" },
- { "pop_front_off",
- "15: (bf) r1 = r6 ; R1_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=48,imm=0) "
- "R6_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=48,imm=0) refs=2,4\n"
- "16: (85) call bpf_this_cpu_ptr#154\nR1 type=ptr_or_null_ expected=percpu_ptr_" },
- { "pop_back_off",
- "15: (bf) r1 = r6 ; R1_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=48,imm=0) "
- "R6_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=48,imm=0) refs=2,4\n"
- "16: (85) call bpf_this_cpu_ptr#154\nR1 type=ptr_or_null_ expected=percpu_ptr_" },
+ { "pop_front_off", "off 48 doesn't point to 'struct bpf_spin_lock' that is at 40" },
+ { "pop_back_off", "off 48 doesn't point to 'struct bpf_spin_lock' that is at 40" },
};
static void test_linked_list_fail_prog(const char *prog_name, const char *err_msg)
diff --git a/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c
index b25b870f87ba..e6e50a394472 100644
--- a/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c
+++ b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c
@@ -73,6 +73,37 @@ static void test_local_kptr_stash_unstash(void)
local_kptr_stash__destroy(skel);
}
+static void test_refcount_acquire_without_unstash(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct local_kptr_stash *skel;
+ int ret;
+
+ skel = local_kptr_stash__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "local_kptr_stash__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.refcount_acquire_without_unstash),
+ &opts);
+ ASSERT_OK(ret, "refcount_acquire_without_unstash run");
+ ASSERT_EQ(opts.retval, 2, "refcount_acquire_without_unstash retval");
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.stash_refcounted_node), &opts);
+ ASSERT_OK(ret, "stash_refcounted_node run");
+ ASSERT_OK(opts.retval, "stash_refcounted_node retval");
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.refcount_acquire_without_unstash),
+ &opts);
+ ASSERT_OK(ret, "refcount_acquire_without_unstash (2) run");
+ ASSERT_EQ(opts.retval, 42, "refcount_acquire_without_unstash (2) retval");
+
+ local_kptr_stash__destroy(skel);
+}
+
static void test_local_kptr_stash_fail(void)
{
RUN_TESTS(local_kptr_stash_fail);
@@ -86,6 +117,8 @@ void test_local_kptr_stash(void)
test_local_kptr_stash_plain();
if (test__start_subtest("local_kptr_stash_unstash"))
test_local_kptr_stash_unstash();
+ if (test__start_subtest("refcount_acquire_without_unstash"))
+ test_refcount_acquire_without_unstash();
if (test__start_subtest("local_kptr_stash_fail"))
test_local_kptr_stash_fail();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/log_buf.c b/tools/testing/selftests/bpf/prog_tests/log_buf.c
index fe9a23e65ef4..0f7ea4d7d9f6 100644
--- a/tools/testing/selftests/bpf/prog_tests/log_buf.c
+++ b/tools/testing/selftests/bpf/prog_tests/log_buf.c
@@ -78,7 +78,7 @@ static void obj_load_log_buf(void)
ASSERT_OK_PTR(strstr(libbpf_log_buf, "prog 'bad_prog': BPF program load failed"),
"libbpf_log_not_empty");
ASSERT_OK_PTR(strstr(obj_log_buf, "DATASEC license"), "obj_log_not_empty");
- ASSERT_OK_PTR(strstr(good_log_buf, "0: R1=ctx(off=0,imm=0) R10=fp0"),
+ ASSERT_OK_PTR(strstr(good_log_buf, "0: R1=ctx() R10=fp0"),
"good_log_verbose");
ASSERT_OK_PTR(strstr(bad_log_buf, "invalid access to map value, value_size=16 off=16000 size=4"),
"bad_log_not_empty");
@@ -175,7 +175,7 @@ static void bpf_prog_load_log_buf(void)
opts.log_level = 2;
fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "good_prog", "GPL",
good_prog_insns, good_prog_insn_cnt, &opts);
- ASSERT_OK_PTR(strstr(log_buf, "0: R1=ctx(off=0,imm=0) R10=fp0"), "good_log_2");
+ ASSERT_OK_PTR(strstr(log_buf, "0: R1=ctx() R10=fp0"), "good_log_2");
ASSERT_GE(fd, 0, "good_fd2");
if (fd >= 0)
close(fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
new file mode 100644
index 000000000000..0c9abd279e18
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
@@ -0,0 +1,2124 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#define _GNU_SOURCE
+#include <limits.h>
+#include <test_progs.h>
+#include <linux/filter.h>
+#include <linux/bpf.h>
+
+/* =================================
+ * SHORT AND CONSISTENT NUMBER TYPES
+ * =================================
+ */
+#define U64_MAX ((u64)UINT64_MAX)
+#define U32_MAX ((u32)UINT_MAX)
+#define U16_MAX ((u32)UINT_MAX)
+#define S64_MIN ((s64)INT64_MIN)
+#define S64_MAX ((s64)INT64_MAX)
+#define S32_MIN ((s32)INT_MIN)
+#define S32_MAX ((s32)INT_MAX)
+#define S16_MIN ((s16)0x80000000)
+#define S16_MAX ((s16)0x7fffffff)
+
+typedef unsigned long long ___u64;
+typedef unsigned int ___u32;
+typedef long long ___s64;
+typedef int ___s32;
+
+/* avoid conflicts with already defined types in kernel headers */
+#define u64 ___u64
+#define u32 ___u32
+#define s64 ___s64
+#define s32 ___s32
+
+/* ==================================
+ * STRING BUF ABSTRACTION AND HELPERS
+ * ==================================
+ */
+struct strbuf {
+ size_t buf_sz;
+ int pos;
+ char buf[0];
+};
+
+#define DEFINE_STRBUF(name, N) \
+ struct { struct strbuf buf; char data[(N)]; } ___##name; \
+ struct strbuf *name = (___##name.buf.buf_sz = (N), ___##name.buf.pos = 0, &___##name.buf)
+
+__printf(2, 3)
+static inline void snappendf(struct strbuf *s, const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ s->pos += vsnprintf(s->buf + s->pos,
+ s->pos < s->buf_sz ? s->buf_sz - s->pos : 0,
+ fmt, args);
+ va_end(args);
+}
+
+/* ==================================
+ * GENERIC NUMBER TYPE AND OPERATIONS
+ * ==================================
+ */
+enum num_t { U64, first_t = U64, U32, S64, S32, last_t = S32 };
+
+static __always_inline u64 min_t(enum num_t t, u64 x, u64 y)
+{
+ switch (t) {
+ case U64: return (u64)x < (u64)y ? (u64)x : (u64)y;
+ case U32: return (u32)x < (u32)y ? (u32)x : (u32)y;
+ case S64: return (s64)x < (s64)y ? (s64)x : (s64)y;
+ case S32: return (s32)x < (s32)y ? (s32)x : (s32)y;
+ default: printf("min_t!\n"); exit(1);
+ }
+}
+
+static __always_inline u64 max_t(enum num_t t, u64 x, u64 y)
+{
+ switch (t) {
+ case U64: return (u64)x > (u64)y ? (u64)x : (u64)y;
+ case U32: return (u32)x > (u32)y ? (u32)x : (u32)y;
+ case S64: return (s64)x > (s64)y ? (s64)x : (s64)y;
+ case S32: return (s32)x > (s32)y ? (u32)(s32)x : (u32)(s32)y;
+ default: printf("max_t!\n"); exit(1);
+ }
+}
+
+static __always_inline u64 cast_t(enum num_t t, u64 x)
+{
+ switch (t) {
+ case U64: return (u64)x;
+ case U32: return (u32)x;
+ case S64: return (s64)x;
+ case S32: return (u32)(s32)x;
+ default: printf("cast_t!\n"); exit(1);
+ }
+}
+
+static const char *t_str(enum num_t t)
+{
+ switch (t) {
+ case U64: return "u64";
+ case U32: return "u32";
+ case S64: return "s64";
+ case S32: return "s32";
+ default: printf("t_str!\n"); exit(1);
+ }
+}
+
+static enum num_t t_is_32(enum num_t t)
+{
+ switch (t) {
+ case U64: return false;
+ case U32: return true;
+ case S64: return false;
+ case S32: return true;
+ default: printf("t_is_32!\n"); exit(1);
+ }
+}
+
+static enum num_t t_signed(enum num_t t)
+{
+ switch (t) {
+ case U64: return S64;
+ case U32: return S32;
+ case S64: return S64;
+ case S32: return S32;
+ default: printf("t_signed!\n"); exit(1);
+ }
+}
+
+static enum num_t t_unsigned(enum num_t t)
+{
+ switch (t) {
+ case U64: return U64;
+ case U32: return U32;
+ case S64: return U64;
+ case S32: return U32;
+ default: printf("t_unsigned!\n"); exit(1);
+ }
+}
+
+#define UNUM_MAX_DECIMAL U16_MAX
+#define SNUM_MAX_DECIMAL S16_MAX
+#define SNUM_MIN_DECIMAL S16_MIN
+
+static bool num_is_small(enum num_t t, u64 x)
+{
+ switch (t) {
+ case U64: return (u64)x <= UNUM_MAX_DECIMAL;
+ case U32: return (u32)x <= UNUM_MAX_DECIMAL;
+ case S64: return (s64)x >= SNUM_MIN_DECIMAL && (s64)x <= SNUM_MAX_DECIMAL;
+ case S32: return (s32)x >= SNUM_MIN_DECIMAL && (s32)x <= SNUM_MAX_DECIMAL;
+ default: printf("num_is_small!\n"); exit(1);
+ }
+}
+
+static void snprintf_num(enum num_t t, struct strbuf *sb, u64 x)
+{
+ bool is_small = num_is_small(t, x);
+
+ if (is_small) {
+ switch (t) {
+ case U64: return snappendf(sb, "%llu", (u64)x);
+ case U32: return snappendf(sb, "%u", (u32)x);
+ case S64: return snappendf(sb, "%lld", (s64)x);
+ case S32: return snappendf(sb, "%d", (s32)x);
+ default: printf("snprintf_num!\n"); exit(1);
+ }
+ } else {
+ switch (t) {
+ case U64:
+ if (x == U64_MAX)
+ return snappendf(sb, "U64_MAX");
+ else if (x >= U64_MAX - 256)
+ return snappendf(sb, "U64_MAX-%llu", U64_MAX - x);
+ else
+ return snappendf(sb, "%#llx", (u64)x);
+ case U32:
+ if ((u32)x == U32_MAX)
+ return snappendf(sb, "U32_MAX");
+ else if ((u32)x >= U32_MAX - 256)
+ return snappendf(sb, "U32_MAX-%u", U32_MAX - (u32)x);
+ else
+ return snappendf(sb, "%#x", (u32)x);
+ case S64:
+ if ((s64)x == S64_MAX)
+ return snappendf(sb, "S64_MAX");
+ else if ((s64)x >= S64_MAX - 256)
+ return snappendf(sb, "S64_MAX-%lld", S64_MAX - (s64)x);
+ else if ((s64)x == S64_MIN)
+ return snappendf(sb, "S64_MIN");
+ else if ((s64)x <= S64_MIN + 256)
+ return snappendf(sb, "S64_MIN+%lld", (s64)x - S64_MIN);
+ else
+ return snappendf(sb, "%#llx", (s64)x);
+ case S32:
+ if ((s32)x == S32_MAX)
+ return snappendf(sb, "S32_MAX");
+ else if ((s32)x >= S32_MAX - 256)
+ return snappendf(sb, "S32_MAX-%d", S32_MAX - (s32)x);
+ else if ((s32)x == S32_MIN)
+ return snappendf(sb, "S32_MIN");
+ else if ((s32)x <= S32_MIN + 256)
+ return snappendf(sb, "S32_MIN+%d", (s32)x - S32_MIN);
+ else
+ return snappendf(sb, "%#x", (s32)x);
+ default: printf("snprintf_num!\n"); exit(1);
+ }
+ }
+}
+
+/* ===================================
+ * GENERIC RANGE STRUCT AND OPERATIONS
+ * ===================================
+ */
+struct range {
+ u64 a, b;
+};
+
+static void snprintf_range(enum num_t t, struct strbuf *sb, struct range x)
+{
+ if (x.a == x.b)
+ return snprintf_num(t, sb, x.a);
+
+ snappendf(sb, "[");
+ snprintf_num(t, sb, x.a);
+ snappendf(sb, "; ");
+ snprintf_num(t, sb, x.b);
+ snappendf(sb, "]");
+}
+
+static void print_range(enum num_t t, struct range x, const char *sfx)
+{
+ DEFINE_STRBUF(sb, 128);
+
+ snprintf_range(t, sb, x);
+ printf("%s%s", sb->buf, sfx);
+}
+
+static const struct range unkn[] = {
+ [U64] = { 0, U64_MAX },
+ [U32] = { 0, U32_MAX },
+ [S64] = { (u64)S64_MIN, (u64)S64_MAX },
+ [S32] = { (u64)(u32)S32_MIN, (u64)(u32)S32_MAX },
+};
+
+static struct range unkn_subreg(enum num_t t)
+{
+ switch (t) {
+ case U64: return unkn[U32];
+ case U32: return unkn[U32];
+ case S64: return unkn[U32];
+ case S32: return unkn[S32];
+ default: printf("unkn_subreg!\n"); exit(1);
+ }
+}
+
+static struct range range(enum num_t t, u64 a, u64 b)
+{
+ switch (t) {
+ case U64: return (struct range){ (u64)a, (u64)b };
+ case U32: return (struct range){ (u32)a, (u32)b };
+ case S64: return (struct range){ (s64)a, (s64)b };
+ case S32: return (struct range){ (u32)(s32)a, (u32)(s32)b };
+ default: printf("range!\n"); exit(1);
+ }
+}
+
+static __always_inline u32 sign64(u64 x) { return (x >> 63) & 1; }
+static __always_inline u32 sign32(u64 x) { return ((u32)x >> 31) & 1; }
+static __always_inline u32 upper32(u64 x) { return (u32)(x >> 32); }
+static __always_inline u64 swap_low32(u64 x, u32 y) { return (x & 0xffffffff00000000ULL) | y; }
+
+static bool range_eq(struct range x, struct range y)
+{
+ return x.a == y.a && x.b == y.b;
+}
+
+static struct range range_cast_to_s32(struct range x)
+{
+ u64 a = x.a, b = x.b;
+
+ /* if upper 32 bits are constant, lower 32 bits should form a proper
+ * s32 range to be correct
+ */
+ if (upper32(a) == upper32(b) && (s32)a <= (s32)b)
+ return range(S32, a, b);
+
+ /* Special case where upper bits form a small sequence of two
+ * sequential numbers (in 32-bit unsigned space, so 0xffffffff to
+ * 0x00000000 is also valid), while lower bits form a proper s32 range
+ * going from negative numbers to positive numbers.
+ *
+ * E.g.: [0xfffffff0ffffff00; 0xfffffff100000010]. Iterating
+ * over full 64-bit numbers range will form a proper [-16, 16]
+ * ([0xffffff00; 0x00000010]) range in its lower 32 bits.
+ */
+ if (upper32(a) + 1 == upper32(b) && (s32)a < 0 && (s32)b >= 0)
+ return range(S32, a, b);
+
+ /* otherwise we can't derive much meaningful information */
+ return unkn[S32];
+}
+
+static struct range range_cast_u64(enum num_t to_t, struct range x)
+{
+ u64 a = (u64)x.a, b = (u64)x.b;
+
+ switch (to_t) {
+ case U64:
+ return x;
+ case U32:
+ if (upper32(a) != upper32(b))
+ return unkn[U32];
+ return range(U32, a, b);
+ case S64:
+ if (sign64(a) != sign64(b))
+ return unkn[S64];
+ return range(S64, a, b);
+ case S32:
+ return range_cast_to_s32(x);
+ default: printf("range_cast_u64!\n"); exit(1);
+ }
+}
+
+static struct range range_cast_s64(enum num_t to_t, struct range x)
+{
+ s64 a = (s64)x.a, b = (s64)x.b;
+
+ switch (to_t) {
+ case U64:
+ /* equivalent to (s64)a <= (s64)b check */
+ if (sign64(a) != sign64(b))
+ return unkn[U64];
+ return range(U64, a, b);
+ case U32:
+ if (upper32(a) != upper32(b) || sign32(a) != sign32(b))
+ return unkn[U32];
+ return range(U32, a, b);
+ case S64:
+ return x;
+ case S32:
+ return range_cast_to_s32(x);
+ default: printf("range_cast_s64!\n"); exit(1);
+ }
+}
+
+static struct range range_cast_u32(enum num_t to_t, struct range x)
+{
+ u32 a = (u32)x.a, b = (u32)x.b;
+
+ switch (to_t) {
+ case U64:
+ case S64:
+ /* u32 is always a valid zero-extended u64/s64 */
+ return range(to_t, a, b);
+ case U32:
+ return x;
+ case S32:
+ return range_cast_to_s32(range(U32, a, b));
+ default: printf("range_cast_u32!\n"); exit(1);
+ }
+}
+
+static struct range range_cast_s32(enum num_t to_t, struct range x)
+{
+ s32 a = (s32)x.a, b = (s32)x.b;
+
+ switch (to_t) {
+ case U64:
+ case U32:
+ case S64:
+ if (sign32(a) != sign32(b))
+ return unkn[to_t];
+ return range(to_t, a, b);
+ case S32:
+ return x;
+ default: printf("range_cast_s32!\n"); exit(1);
+ }
+}
+
+/* Reinterpret range in *from_t* domain as a range in *to_t* domain preserving
+ * all possible information. Worst case, it will be unknown range within
+ * *to_t* domain, if nothing more specific can be guaranteed during the
+ * conversion
+ */
+static struct range range_cast(enum num_t from_t, enum num_t to_t, struct range from)
+{
+ switch (from_t) {
+ case U64: return range_cast_u64(to_t, from);
+ case U32: return range_cast_u32(to_t, from);
+ case S64: return range_cast_s64(to_t, from);
+ case S32: return range_cast_s32(to_t, from);
+ default: printf("range_cast!\n"); exit(1);
+ }
+}
+
+static bool is_valid_num(enum num_t t, u64 x)
+{
+ switch (t) {
+ case U64: return true;
+ case U32: return upper32(x) == 0;
+ case S64: return true;
+ case S32: return upper32(x) == 0;
+ default: printf("is_valid_num!\n"); exit(1);
+ }
+}
+
+static bool is_valid_range(enum num_t t, struct range x)
+{
+ if (!is_valid_num(t, x.a) || !is_valid_num(t, x.b))
+ return false;
+
+ switch (t) {
+ case U64: return (u64)x.a <= (u64)x.b;
+ case U32: return (u32)x.a <= (u32)x.b;
+ case S64: return (s64)x.a <= (s64)x.b;
+ case S32: return (s32)x.a <= (s32)x.b;
+ default: printf("is_valid_range!\n"); exit(1);
+ }
+}
+
+static struct range range_improve(enum num_t t, struct range old, struct range new)
+{
+ return range(t, max_t(t, old.a, new.a), min_t(t, old.b, new.b));
+}
+
+static struct range range_refine(enum num_t x_t, struct range x, enum num_t y_t, struct range y)
+{
+ struct range y_cast;
+
+ y_cast = range_cast(y_t, x_t, y);
+
+ /* the case when new range knowledge, *y*, is a 32-bit subregister
+ * range, while previous range knowledge, *x*, is a full register
+ * 64-bit range, needs special treatment to take into account upper 32
+ * bits of full register range
+ */
+ if (t_is_32(y_t) && !t_is_32(x_t)) {
+ struct range x_swap;
+
+ /* some combinations of upper 32 bits and sign bit can lead to
+ * invalid ranges, in such cases it's easier to detect them
+ * after cast/swap than try to enumerate all the conditions
+ * under which transformation and knowledge transfer is valid
+ */
+ x_swap = range(x_t, swap_low32(x.a, y_cast.a), swap_low32(x.b, y_cast.b));
+ if (!is_valid_range(x_t, x_swap))
+ return x;
+ return range_improve(x_t, x, x_swap);
+ }
+
+ /* otherwise, plain range cast and intersection works */
+ return range_improve(x_t, x, y_cast);
+}
+
+/* =======================
+ * GENERIC CONDITIONAL OPS
+ * =======================
+ */
+enum op { OP_LT, OP_LE, OP_GT, OP_GE, OP_EQ, OP_NE, first_op = OP_LT, last_op = OP_NE };
+
+static enum op complement_op(enum op op)
+{
+ switch (op) {
+ case OP_LT: return OP_GE;
+ case OP_LE: return OP_GT;
+ case OP_GT: return OP_LE;
+ case OP_GE: return OP_LT;
+ case OP_EQ: return OP_NE;
+ case OP_NE: return OP_EQ;
+ default: printf("complement_op!\n"); exit(1);
+ }
+}
+
+static const char *op_str(enum op op)
+{
+ switch (op) {
+ case OP_LT: return "<";
+ case OP_LE: return "<=";
+ case OP_GT: return ">";
+ case OP_GE: return ">=";
+ case OP_EQ: return "==";
+ case OP_NE: return "!=";
+ default: printf("op_str!\n"); exit(1);
+ }
+}
+
+/* Can register with range [x.a, x.b] *EVER* satisfy
+ * OP (<, <=, >, >=, ==, !=) relation to
+ * a regsiter with range [y.a, y.b]
+ * _in *num_t* domain_
+ */
+static bool range_canbe_op(enum num_t t, struct range x, struct range y, enum op op)
+{
+#define range_canbe(T) do { \
+ switch (op) { \
+ case OP_LT: return (T)x.a < (T)y.b; \
+ case OP_LE: return (T)x.a <= (T)y.b; \
+ case OP_GT: return (T)x.b > (T)y.a; \
+ case OP_GE: return (T)x.b >= (T)y.a; \
+ case OP_EQ: return (T)max_t(t, x.a, y.a) <= (T)min_t(t, x.b, y.b); \
+ case OP_NE: return !((T)x.a == (T)x.b && (T)y.a == (T)y.b && (T)x.a == (T)y.a); \
+ default: printf("range_canbe op %d\n", op); exit(1); \
+ } \
+} while (0)
+
+ switch (t) {
+ case U64: { range_canbe(u64); }
+ case U32: { range_canbe(u32); }
+ case S64: { range_canbe(s64); }
+ case S32: { range_canbe(s32); }
+ default: printf("range_canbe!\n"); exit(1);
+ }
+#undef range_canbe
+}
+
+/* Does register with range [x.a, x.b] *ALWAYS* satisfy
+ * OP (<, <=, >, >=, ==, !=) relation to
+ * a regsiter with range [y.a, y.b]
+ * _in *num_t* domain_
+ */
+static bool range_always_op(enum num_t t, struct range x, struct range y, enum op op)
+{
+ /* always op <=> ! canbe complement(op) */
+ return !range_canbe_op(t, x, y, complement_op(op));
+}
+
+/* Does register with range [x.a, x.b] *NEVER* satisfy
+ * OP (<, <=, >, >=, ==, !=) relation to
+ * a regsiter with range [y.a, y.b]
+ * _in *num_t* domain_
+ */
+static bool range_never_op(enum num_t t, struct range x, struct range y, enum op op)
+{
+ return !range_canbe_op(t, x, y, op);
+}
+
+/* similar to verifier's is_branch_taken():
+ * 1 - always taken;
+ * 0 - never taken,
+ * -1 - unsure.
+ */
+static int range_branch_taken_op(enum num_t t, struct range x, struct range y, enum op op)
+{
+ if (range_always_op(t, x, y, op))
+ return 1;
+ if (range_never_op(t, x, y, op))
+ return 0;
+ return -1;
+}
+
+/* What would be the new estimates for register x and y ranges assuming truthful
+ * OP comparison between them. I.e., (x OP y == true) => x <- newx, y <- newy.
+ *
+ * We assume "interesting" cases where ranges overlap. Cases where it's
+ * obvious that (x OP y) is either always true or false should be filtered with
+ * range_never and range_always checks.
+ */
+static void range_cond(enum num_t t, struct range x, struct range y,
+ enum op op, struct range *newx, struct range *newy)
+{
+ if (!range_canbe_op(t, x, y, op)) {
+ /* nothing to adjust, can't happen, return original values */
+ *newx = x;
+ *newy = y;
+ return;
+ }
+ switch (op) {
+ case OP_LT:
+ *newx = range(t, x.a, min_t(t, x.b, y.b - 1));
+ *newy = range(t, max_t(t, x.a + 1, y.a), y.b);
+ break;
+ case OP_LE:
+ *newx = range(t, x.a, min_t(t, x.b, y.b));
+ *newy = range(t, max_t(t, x.a, y.a), y.b);
+ break;
+ case OP_GT:
+ *newx = range(t, max_t(t, x.a, y.a + 1), x.b);
+ *newy = range(t, y.a, min_t(t, x.b - 1, y.b));
+ break;
+ case OP_GE:
+ *newx = range(t, max_t(t, x.a, y.a), x.b);
+ *newy = range(t, y.a, min_t(t, x.b, y.b));
+ break;
+ case OP_EQ:
+ *newx = range(t, max_t(t, x.a, y.a), min_t(t, x.b, y.b));
+ *newy = range(t, max_t(t, x.a, y.a), min_t(t, x.b, y.b));
+ break;
+ case OP_NE:
+ /* generic case, can't derive more information */
+ *newx = range(t, x.a, x.b);
+ *newy = range(t, y.a, y.b);
+ break;
+
+ /* below extended logic is not supported by verifier just yet */
+ if (x.a == x.b && x.a == y.a) {
+ /* X is a constant matching left side of Y */
+ *newx = range(t, x.a, x.b);
+ *newy = range(t, y.a + 1, y.b);
+ } else if (x.a == x.b && x.b == y.b) {
+ /* X is a constant matching rigth side of Y */
+ *newx = range(t, x.a, x.b);
+ *newy = range(t, y.a, y.b - 1);
+ } else if (y.a == y.b && x.a == y.a) {
+ /* Y is a constant matching left side of X */
+ *newx = range(t, x.a + 1, x.b);
+ *newy = range(t, y.a, y.b);
+ } else if (y.a == y.b && x.b == y.b) {
+ /* Y is a constant matching rigth side of X */
+ *newx = range(t, x.a, x.b - 1);
+ *newy = range(t, y.a, y.b);
+ } else {
+ /* generic case, can't derive more information */
+ *newx = range(t, x.a, x.b);
+ *newy = range(t, y.a, y.b);
+ }
+
+ break;
+ default:
+ break;
+ }
+}
+
+/* =======================
+ * REGISTER STATE HANDLING
+ * =======================
+ */
+struct reg_state {
+ struct range r[4]; /* indexed by enum num_t: U64, U32, S64, S32 */
+ bool valid;
+};
+
+static void print_reg_state(struct reg_state *r, const char *sfx)
+{
+ DEFINE_STRBUF(sb, 512);
+ enum num_t t;
+ int cnt = 0;
+
+ if (!r->valid) {
+ printf("<not found>%s", sfx);
+ return;
+ }
+
+ snappendf(sb, "scalar(");
+ for (t = first_t; t <= last_t; t++) {
+ snappendf(sb, "%s%s=", cnt++ ? "," : "", t_str(t));
+ snprintf_range(t, sb, r->r[t]);
+ }
+ snappendf(sb, ")");
+
+ printf("%s%s", sb->buf, sfx);
+}
+
+static void print_refinement(enum num_t s_t, struct range src,
+ enum num_t d_t, struct range old, struct range new,
+ const char *ctx)
+{
+ printf("REFINING (%s) (%s)SRC=", ctx, t_str(s_t));
+ print_range(s_t, src, "");
+ printf(" (%s)DST_OLD=", t_str(d_t));
+ print_range(d_t, old, "");
+ printf(" (%s)DST_NEW=", t_str(d_t));
+ print_range(d_t, new, "\n");
+}
+
+static void reg_state_refine(struct reg_state *r, enum num_t t, struct range x, const char *ctx)
+{
+ enum num_t d_t, s_t;
+ struct range old;
+ bool keep_going = false;
+
+again:
+ /* try to derive new knowledge from just learned range x of type t */
+ for (d_t = first_t; d_t <= last_t; d_t++) {
+ old = r->r[d_t];
+ r->r[d_t] = range_refine(d_t, r->r[d_t], t, x);
+ if (!range_eq(r->r[d_t], old)) {
+ keep_going = true;
+ if (env.verbosity >= VERBOSE_VERY)
+ print_refinement(t, x, d_t, old, r->r[d_t], ctx);
+ }
+ }
+
+ /* now see if we can derive anything new from updated reg_state's ranges */
+ for (s_t = first_t; s_t <= last_t; s_t++) {
+ for (d_t = first_t; d_t <= last_t; d_t++) {
+ old = r->r[d_t];
+ r->r[d_t] = range_refine(d_t, r->r[d_t], s_t, r->r[s_t]);
+ if (!range_eq(r->r[d_t], old)) {
+ keep_going = true;
+ if (env.verbosity >= VERBOSE_VERY)
+ print_refinement(s_t, r->r[s_t], d_t, old, r->r[d_t], ctx);
+ }
+ }
+ }
+
+ /* keep refining until we converge */
+ if (keep_going) {
+ keep_going = false;
+ goto again;
+ }
+}
+
+static void reg_state_set_const(struct reg_state *rs, enum num_t t, u64 val)
+{
+ enum num_t tt;
+
+ rs->valid = true;
+ for (tt = first_t; tt <= last_t; tt++)
+ rs->r[tt] = tt == t ? range(t, val, val) : unkn[tt];
+
+ reg_state_refine(rs, t, rs->r[t], "CONST");
+}
+
+static void reg_state_cond(enum num_t t, struct reg_state *x, struct reg_state *y, enum op op,
+ struct reg_state *newx, struct reg_state *newy, const char *ctx)
+{
+ char buf[32];
+ enum num_t ts[2];
+ struct reg_state xx = *x, yy = *y;
+ int i, t_cnt;
+ struct range z1, z2;
+
+ if (op == OP_EQ || op == OP_NE) {
+ /* OP_EQ and OP_NE are sign-agnostic, so we need to process
+ * both signed and unsigned domains at the same time
+ */
+ ts[0] = t_unsigned(t);
+ ts[1] = t_signed(t);
+ t_cnt = 2;
+ } else {
+ ts[0] = t;
+ t_cnt = 1;
+ }
+
+ for (i = 0; i < t_cnt; i++) {
+ t = ts[i];
+ z1 = x->r[t];
+ z2 = y->r[t];
+
+ range_cond(t, z1, z2, op, &z1, &z2);
+
+ if (newx) {
+ snprintf(buf, sizeof(buf), "%s R1", ctx);
+ reg_state_refine(&xx, t, z1, buf);
+ }
+ if (newy) {
+ snprintf(buf, sizeof(buf), "%s R2", ctx);
+ reg_state_refine(&yy, t, z2, buf);
+ }
+ }
+
+ if (newx)
+ *newx = xx;
+ if (newy)
+ *newy = yy;
+}
+
+static int reg_state_branch_taken_op(enum num_t t, struct reg_state *x, struct reg_state *y,
+ enum op op)
+{
+ if (op == OP_EQ || op == OP_NE) {
+ /* OP_EQ and OP_NE are sign-agnostic */
+ enum num_t tu = t_unsigned(t);
+ enum num_t ts = t_signed(t);
+ int br_u, br_s, br;
+
+ br_u = range_branch_taken_op(tu, x->r[tu], y->r[tu], op);
+ br_s = range_branch_taken_op(ts, x->r[ts], y->r[ts], op);
+
+ if (br_u >= 0 && br_s >= 0 && br_u != br_s)
+ ASSERT_FALSE(true, "branch taken inconsistency!\n");
+
+ /* if 64-bit ranges are indecisive, use 32-bit subranges to
+ * eliminate always/never taken branches, if possible
+ */
+ if (br_u == -1 && (t == U64 || t == S64)) {
+ br = range_branch_taken_op(U32, x->r[U32], y->r[U32], op);
+ /* we can only reject for OP_EQ, never take branch
+ * based on lower 32 bits
+ */
+ if (op == OP_EQ && br == 0)
+ return 0;
+ /* for OP_NEQ we can be conclusive only if lower 32 bits
+ * differ and thus inequality branch is always taken
+ */
+ if (op == OP_NE && br == 1)
+ return 1;
+
+ br = range_branch_taken_op(S32, x->r[S32], y->r[S32], op);
+ if (op == OP_EQ && br == 0)
+ return 0;
+ if (op == OP_NE && br == 1)
+ return 1;
+ }
+
+ return br_u >= 0 ? br_u : br_s;
+ }
+ return range_branch_taken_op(t, x->r[t], y->r[t], op);
+}
+
+/* =====================================
+ * BPF PROGS GENERATION AND VERIFICATION
+ * =====================================
+ */
+struct case_spec {
+ /* whether to init full register (r1) or sub-register (w1) */
+ bool init_subregs;
+ /* whether to establish initial value range on full register (r1) or
+ * sub-register (w1)
+ */
+ bool setup_subregs;
+ /* whether to establish initial value range using signed or unsigned
+ * comparisons (i.e., initialize umin/umax or smin/smax directly)
+ */
+ bool setup_signed;
+ /* whether to perform comparison on full registers or sub-registers */
+ bool compare_subregs;
+ /* whether to perform comparison using signed or unsigned operations */
+ bool compare_signed;
+};
+
+/* Generate test BPF program based on provided test ranges, operation, and
+ * specifications about register bitness and signedness.
+ */
+static int load_range_cmp_prog(struct range x, struct range y, enum op op,
+ int branch_taken, struct case_spec spec,
+ char *log_buf, size_t log_sz,
+ int *false_pos, int *true_pos)
+{
+#define emit(insn) ({ \
+ struct bpf_insn __insns[] = { insn }; \
+ int __i; \
+ for (__i = 0; __i < ARRAY_SIZE(__insns); __i++) \
+ insns[cur_pos + __i] = __insns[__i]; \
+ cur_pos += __i; \
+})
+#define JMP_TO(target) (target - cur_pos - 1)
+ int cur_pos = 0, exit_pos, fd, op_code;
+ struct bpf_insn insns[64];
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .log_level = 2,
+ .log_buf = log_buf,
+ .log_size = log_sz,
+ .prog_flags = BPF_F_TEST_REG_INVARIANTS,
+ );
+
+ /* ; skip exit block below
+ * goto +2;
+ */
+ emit(BPF_JMP_A(2));
+ exit_pos = cur_pos;
+ /* ; exit block for all the preparatory conditionals
+ * out:
+ * r0 = 0;
+ * exit;
+ */
+ emit(BPF_MOV64_IMM(BPF_REG_0, 0));
+ emit(BPF_EXIT_INSN());
+ /*
+ * ; assign r6/w6 and r7/w7 unpredictable u64/u32 value
+ * call bpf_get_current_pid_tgid;
+ * r6 = r0; | w6 = w0;
+ * call bpf_get_current_pid_tgid;
+ * r7 = r0; | w7 = w0;
+ */
+ emit(BPF_EMIT_CALL(BPF_FUNC_get_current_pid_tgid));
+ if (spec.init_subregs)
+ emit(BPF_MOV32_REG(BPF_REG_6, BPF_REG_0));
+ else
+ emit(BPF_MOV64_REG(BPF_REG_6, BPF_REG_0));
+ emit(BPF_EMIT_CALL(BPF_FUNC_get_current_pid_tgid));
+ if (spec.init_subregs)
+ emit(BPF_MOV32_REG(BPF_REG_7, BPF_REG_0));
+ else
+ emit(BPF_MOV64_REG(BPF_REG_7, BPF_REG_0));
+ /* ; setup initial r6/w6 possible value range ([x.a, x.b])
+ * r1 = %[x.a] ll; | w1 = %[x.a];
+ * r2 = %[x.b] ll; | w2 = %[x.b];
+ * if r6 < r1 goto out; | if w6 < w1 goto out;
+ * if r6 > r2 goto out; | if w6 > w2 goto out;
+ */
+ if (spec.setup_subregs) {
+ emit(BPF_MOV32_IMM(BPF_REG_1, (s32)x.a));
+ emit(BPF_MOV32_IMM(BPF_REG_2, (s32)x.b));
+ emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT,
+ BPF_REG_6, BPF_REG_1, JMP_TO(exit_pos)));
+ emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT,
+ BPF_REG_6, BPF_REG_2, JMP_TO(exit_pos)));
+ } else {
+ emit(BPF_LD_IMM64(BPF_REG_1, x.a));
+ emit(BPF_LD_IMM64(BPF_REG_2, x.b));
+ emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT,
+ BPF_REG_6, BPF_REG_1, JMP_TO(exit_pos)));
+ emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT,
+ BPF_REG_6, BPF_REG_2, JMP_TO(exit_pos)));
+ }
+ /* ; setup initial r7/w7 possible value range ([y.a, y.b])
+ * r1 = %[y.a] ll; | w1 = %[y.a];
+ * r2 = %[y.b] ll; | w2 = %[y.b];
+ * if r7 < r1 goto out; | if w7 < w1 goto out;
+ * if r7 > r2 goto out; | if w7 > w2 goto out;
+ */
+ if (spec.setup_subregs) {
+ emit(BPF_MOV32_IMM(BPF_REG_1, (s32)y.a));
+ emit(BPF_MOV32_IMM(BPF_REG_2, (s32)y.b));
+ emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT,
+ BPF_REG_7, BPF_REG_1, JMP_TO(exit_pos)));
+ emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT,
+ BPF_REG_7, BPF_REG_2, JMP_TO(exit_pos)));
+ } else {
+ emit(BPF_LD_IMM64(BPF_REG_1, y.a));
+ emit(BPF_LD_IMM64(BPF_REG_2, y.b));
+ emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT,
+ BPF_REG_7, BPF_REG_1, JMP_TO(exit_pos)));
+ emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT,
+ BPF_REG_7, BPF_REG_2, JMP_TO(exit_pos)));
+ }
+ /* ; range test instruction
+ * if r6 <op> r7 goto +3; | if w6 <op> w7 goto +3;
+ */
+ switch (op) {
+ case OP_LT: op_code = spec.compare_signed ? BPF_JSLT : BPF_JLT; break;
+ case OP_LE: op_code = spec.compare_signed ? BPF_JSLE : BPF_JLE; break;
+ case OP_GT: op_code = spec.compare_signed ? BPF_JSGT : BPF_JGT; break;
+ case OP_GE: op_code = spec.compare_signed ? BPF_JSGE : BPF_JGE; break;
+ case OP_EQ: op_code = BPF_JEQ; break;
+ case OP_NE: op_code = BPF_JNE; break;
+ default:
+ printf("unrecognized op %d\n", op);
+ return -ENOTSUP;
+ }
+ /* ; BEFORE conditional, r0/w0 = {r6/w6,r7/w7} is to extract verifier state reliably
+ * ; this is used for debugging, as verifier doesn't always print
+ * ; registers states as of condition jump instruction (e.g., when
+ * ; precision marking happens)
+ * r0 = r6; | w0 = w6;
+ * r0 = r7; | w0 = w7;
+ */
+ if (spec.compare_subregs) {
+ emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_6));
+ emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_7));
+ } else {
+ emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_6));
+ emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_7));
+ }
+ if (spec.compare_subregs)
+ emit(BPF_JMP32_REG(op_code, BPF_REG_6, BPF_REG_7, 3));
+ else
+ emit(BPF_JMP_REG(op_code, BPF_REG_6, BPF_REG_7, 3));
+ /* ; FALSE branch, r0/w0 = {r6/w6,r7/w7} is to extract verifier state reliably
+ * r0 = r6; | w0 = w6;
+ * r0 = r7; | w0 = w7;
+ * exit;
+ */
+ *false_pos = cur_pos;
+ if (spec.compare_subregs) {
+ emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_6));
+ emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_7));
+ } else {
+ emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_6));
+ emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_7));
+ }
+ if (branch_taken == 1) /* false branch is never taken */
+ emit(BPF_EMIT_CALL(0xDEAD)); /* poison this branch */
+ else
+ emit(BPF_EXIT_INSN());
+ /* ; TRUE branch, r0/w0 = {r6/w6,r7/w7} is to extract verifier state reliably
+ * r0 = r6; | w0 = w6;
+ * r0 = r7; | w0 = w7;
+ * exit;
+ */
+ *true_pos = cur_pos;
+ if (spec.compare_subregs) {
+ emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_6));
+ emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_7));
+ } else {
+ emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_6));
+ emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_7));
+ }
+ if (branch_taken == 0) /* true branch is never taken */
+ emit(BPF_EMIT_CALL(0xDEAD)); /* poison this branch */
+ emit(BPF_EXIT_INSN()); /* last instruction has to be exit */
+
+ fd = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT, "reg_bounds_test",
+ "GPL", insns, cur_pos, &opts);
+ if (fd < 0)
+ return fd;
+
+ close(fd);
+ return 0;
+#undef emit
+#undef JMP_TO
+}
+
+#define str_has_pfx(str, pfx) (strncmp(str, pfx, strlen(pfx)) == 0)
+
+/* Parse register state from verifier log.
+ * `s` should point to the start of "Rx = ..." substring in the verifier log.
+ */
+static int parse_reg_state(const char *s, struct reg_state *reg)
+{
+ /* There are two generic forms for SCALAR register:
+ * - known constant: R6_rwD=P%lld
+ * - range: R6_rwD=scalar(id=1,...), where "..." is a comma-separated
+ * list of optional range specifiers:
+ * - umin=%llu, if missing, assumed 0;
+ * - umax=%llu, if missing, assumed U64_MAX;
+ * - smin=%lld, if missing, assumed S64_MIN;
+ * - smax=%lld, if missing, assummed S64_MAX;
+ * - umin32=%d, if missing, assumed 0;
+ * - umax32=%d, if missing, assumed U32_MAX;
+ * - smin32=%d, if missing, assumed S32_MIN;
+ * - smax32=%d, if missing, assummed S32_MAX;
+ * - var_off=(%#llx; %#llx), tnum part, we don't care about it.
+ *
+ * If some of the values are equal, they will be grouped (but min/max
+ * are not mixed together, and similarly negative values are not
+ * grouped with non-negative ones). E.g.:
+ *
+ * R6_w=Pscalar(smin=smin32=0, smax=umax=umax32=1000)
+ *
+ * _rwD part is optional (and any of the letters can be missing).
+ * P (precision mark) is optional as well.
+ *
+ * Anything inside scalar() is optional, including id, of course.
+ */
+ struct {
+ const char *pfx;
+ u64 *dst, def;
+ bool is_32, is_set;
+ } *f, fields[8] = {
+ {"smin=", &reg->r[S64].a, S64_MIN},
+ {"smax=", &reg->r[S64].b, S64_MAX},
+ {"umin=", &reg->r[U64].a, 0},
+ {"umax=", &reg->r[U64].b, U64_MAX},
+ {"smin32=", &reg->r[S32].a, (u32)S32_MIN, true},
+ {"smax32=", &reg->r[S32].b, (u32)S32_MAX, true},
+ {"umin32=", &reg->r[U32].a, 0, true},
+ {"umax32=", &reg->r[U32].b, U32_MAX, true},
+ };
+ const char *p;
+ int i;
+
+ p = strchr(s, '=');
+ if (!p)
+ return -EINVAL;
+ p++;
+ if (*p == 'P')
+ p++;
+
+ if (!str_has_pfx(p, "scalar(")) {
+ long long sval;
+ enum num_t t;
+
+ if (p[0] == '0' && p[1] == 'x') {
+ if (sscanf(p, "%llx", &sval) != 1)
+ return -EINVAL;
+ } else {
+ if (sscanf(p, "%lld", &sval) != 1)
+ return -EINVAL;
+ }
+
+ reg->valid = true;
+ for (t = first_t; t <= last_t; t++) {
+ reg->r[t] = range(t, sval, sval);
+ }
+ return 0;
+ }
+
+ p += sizeof("scalar");
+ while (p) {
+ int midxs[ARRAY_SIZE(fields)], mcnt = 0;
+ u64 val;
+
+ for (i = 0; i < ARRAY_SIZE(fields); i++) {
+ f = &fields[i];
+ if (!str_has_pfx(p, f->pfx))
+ continue;
+ midxs[mcnt++] = i;
+ p += strlen(f->pfx);
+ }
+
+ if (mcnt) {
+ /* populate all matched fields */
+ if (p[0] == '0' && p[1] == 'x') {
+ if (sscanf(p, "%llx", &val) != 1)
+ return -EINVAL;
+ } else {
+ if (sscanf(p, "%lld", &val) != 1)
+ return -EINVAL;
+ }
+
+ for (i = 0; i < mcnt; i++) {
+ f = &fields[midxs[i]];
+ f->is_set = true;
+ *f->dst = f->is_32 ? (u64)(u32)val : val;
+ }
+ } else if (str_has_pfx(p, "var_off")) {
+ /* skip "var_off=(0x0; 0x3f)" part completely */
+ p = strchr(p, ')');
+ if (!p)
+ return -EINVAL;
+ p++;
+ }
+
+ p = strpbrk(p, ",)");
+ if (*p == ')')
+ break;
+ if (p)
+ p++;
+ }
+
+ reg->valid = true;
+
+ for (i = 0; i < ARRAY_SIZE(fields); i++) {
+ f = &fields[i];
+ if (!f->is_set)
+ *f->dst = f->def;
+ }
+
+ return 0;
+}
+
+
+/* Parse all register states (TRUE/FALSE branches and DST/SRC registers)
+ * out of the verifier log for a corresponding test case BPF program.
+ */
+static int parse_range_cmp_log(const char *log_buf, struct case_spec spec,
+ int false_pos, int true_pos,
+ struct reg_state *false1_reg, struct reg_state *false2_reg,
+ struct reg_state *true1_reg, struct reg_state *true2_reg)
+{
+ struct {
+ int insn_idx;
+ int reg_idx;
+ const char *reg_upper;
+ struct reg_state *state;
+ } specs[] = {
+ {false_pos, 6, "R6=", false1_reg},
+ {false_pos + 1, 7, "R7=", false2_reg},
+ {true_pos, 6, "R6=", true1_reg},
+ {true_pos + 1, 7, "R7=", true2_reg},
+ };
+ char buf[32];
+ const char *p = log_buf, *q;
+ int i, err;
+
+ for (i = 0; i < 4; i++) {
+ sprintf(buf, "%d: (%s) %s = %s%d", specs[i].insn_idx,
+ spec.compare_subregs ? "bc" : "bf",
+ spec.compare_subregs ? "w0" : "r0",
+ spec.compare_subregs ? "w" : "r", specs[i].reg_idx);
+
+ q = strstr(p, buf);
+ if (!q) {
+ *specs[i].state = (struct reg_state){.valid = false};
+ continue;
+ }
+ p = strstr(q, specs[i].reg_upper);
+ if (!p)
+ return -EINVAL;
+ err = parse_reg_state(p, specs[i].state);
+ if (err)
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/* Validate ranges match, and print details if they don't */
+static bool assert_range_eq(enum num_t t, struct range x, struct range y,
+ const char *ctx1, const char *ctx2)
+{
+ DEFINE_STRBUF(sb, 512);
+
+ if (range_eq(x, y))
+ return true;
+
+ snappendf(sb, "MISMATCH %s.%s: ", ctx1, ctx2);
+ snprintf_range(t, sb, x);
+ snappendf(sb, " != ");
+ snprintf_range(t, sb, y);
+
+ printf("%s\n", sb->buf);
+
+ return false;
+}
+
+/* Validate that register states match, and print details if they don't */
+static bool assert_reg_state_eq(struct reg_state *r, struct reg_state *e, const char *ctx)
+{
+ bool ok = true;
+ enum num_t t;
+
+ if (r->valid != e->valid) {
+ printf("MISMATCH %s: actual %s != expected %s\n", ctx,
+ r->valid ? "<valid>" : "<invalid>",
+ e->valid ? "<valid>" : "<invalid>");
+ return false;
+ }
+
+ if (!r->valid)
+ return true;
+
+ for (t = first_t; t <= last_t; t++) {
+ if (!assert_range_eq(t, r->r[t], e->r[t], ctx, t_str(t)))
+ ok = false;
+ }
+
+ return ok;
+}
+
+/* Printf verifier log, filtering out irrelevant noise */
+static void print_verifier_log(const char *buf)
+{
+ const char *p;
+
+ while (buf[0]) {
+ p = strchrnul(buf, '\n');
+
+ /* filter out irrelevant precision backtracking logs */
+ if (str_has_pfx(buf, "mark_precise: "))
+ goto skip_line;
+
+ printf("%.*s\n", (int)(p - buf), buf);
+
+skip_line:
+ buf = *p == '\0' ? p : p + 1;
+ }
+}
+
+/* Simulate provided test case purely with our own range-based logic.
+ * This is done to set up expectations for verifier's branch_taken logic and
+ * verifier's register states in the verifier log.
+ */
+static void sim_case(enum num_t init_t, enum num_t cond_t,
+ struct range x, struct range y, enum op op,
+ struct reg_state *fr1, struct reg_state *fr2,
+ struct reg_state *tr1, struct reg_state *tr2,
+ int *branch_taken)
+{
+ const u64 A = x.a;
+ const u64 B = x.b;
+ const u64 C = y.a;
+ const u64 D = y.b;
+ struct reg_state rc;
+ enum op rev_op = complement_op(op);
+ enum num_t t;
+
+ fr1->valid = fr2->valid = true;
+ tr1->valid = tr2->valid = true;
+ for (t = first_t; t <= last_t; t++) {
+ /* if we are initializing using 32-bit subregisters,
+ * full registers get upper 32 bits zeroed automatically
+ */
+ struct range z = t_is_32(init_t) ? unkn_subreg(t) : unkn[t];
+
+ fr1->r[t] = fr2->r[t] = tr1->r[t] = tr2->r[t] = z;
+ }
+
+ /* step 1: r1 >= A, r2 >= C */
+ reg_state_set_const(&rc, init_t, A);
+ reg_state_cond(init_t, fr1, &rc, OP_GE, fr1, NULL, "r1>=A");
+ reg_state_set_const(&rc, init_t, C);
+ reg_state_cond(init_t, fr2, &rc, OP_GE, fr2, NULL, "r2>=C");
+ *tr1 = *fr1;
+ *tr2 = *fr2;
+ if (env.verbosity >= VERBOSE_VERY) {
+ printf("STEP1 (%s) R1: ", t_str(init_t)); print_reg_state(fr1, "\n");
+ printf("STEP1 (%s) R2: ", t_str(init_t)); print_reg_state(fr2, "\n");
+ }
+
+ /* step 2: r1 <= B, r2 <= D */
+ reg_state_set_const(&rc, init_t, B);
+ reg_state_cond(init_t, fr1, &rc, OP_LE, fr1, NULL, "r1<=B");
+ reg_state_set_const(&rc, init_t, D);
+ reg_state_cond(init_t, fr2, &rc, OP_LE, fr2, NULL, "r2<=D");
+ *tr1 = *fr1;
+ *tr2 = *fr2;
+ if (env.verbosity >= VERBOSE_VERY) {
+ printf("STEP2 (%s) R1: ", t_str(init_t)); print_reg_state(fr1, "\n");
+ printf("STEP2 (%s) R2: ", t_str(init_t)); print_reg_state(fr2, "\n");
+ }
+
+ /* step 3: r1 <op> r2 */
+ *branch_taken = reg_state_branch_taken_op(cond_t, fr1, fr2, op);
+ fr1->valid = fr2->valid = false;
+ tr1->valid = tr2->valid = false;
+ if (*branch_taken != 1) { /* FALSE is possible */
+ fr1->valid = fr2->valid = true;
+ reg_state_cond(cond_t, fr1, fr2, rev_op, fr1, fr2, "FALSE");
+ }
+ if (*branch_taken != 0) { /* TRUE is possible */
+ tr1->valid = tr2->valid = true;
+ reg_state_cond(cond_t, tr1, tr2, op, tr1, tr2, "TRUE");
+ }
+ if (env.verbosity >= VERBOSE_VERY) {
+ printf("STEP3 (%s) FALSE R1:", t_str(cond_t)); print_reg_state(fr1, "\n");
+ printf("STEP3 (%s) FALSE R2:", t_str(cond_t)); print_reg_state(fr2, "\n");
+ printf("STEP3 (%s) TRUE R1:", t_str(cond_t)); print_reg_state(tr1, "\n");
+ printf("STEP3 (%s) TRUE R2:", t_str(cond_t)); print_reg_state(tr2, "\n");
+ }
+}
+
+/* ===============================
+ * HIGH-LEVEL TEST CASE VALIDATION
+ * ===============================
+ */
+static u32 upper_seeds[] = {
+ 0,
+ 1,
+ U32_MAX,
+ U32_MAX - 1,
+ S32_MAX,
+ (u32)S32_MIN,
+};
+
+static u32 lower_seeds[] = {
+ 0,
+ 1,
+ 2, (u32)-2,
+ 255, (u32)-255,
+ UINT_MAX,
+ UINT_MAX - 1,
+ INT_MAX,
+ (u32)INT_MIN,
+};
+
+struct ctx {
+ int val_cnt, subval_cnt, range_cnt, subrange_cnt;
+ u64 uvals[ARRAY_SIZE(upper_seeds) * ARRAY_SIZE(lower_seeds)];
+ s64 svals[ARRAY_SIZE(upper_seeds) * ARRAY_SIZE(lower_seeds)];
+ u32 usubvals[ARRAY_SIZE(lower_seeds)];
+ s32 ssubvals[ARRAY_SIZE(lower_seeds)];
+ struct range *uranges, *sranges;
+ struct range *usubranges, *ssubranges;
+ int max_failure_cnt, cur_failure_cnt;
+ int total_case_cnt, case_cnt;
+ int rand_case_cnt;
+ unsigned rand_seed;
+ __u64 start_ns;
+ char progress_ctx[64];
+};
+
+static void cleanup_ctx(struct ctx *ctx)
+{
+ free(ctx->uranges);
+ free(ctx->sranges);
+ free(ctx->usubranges);
+ free(ctx->ssubranges);
+}
+
+struct subtest_case {
+ enum num_t init_t;
+ enum num_t cond_t;
+ struct range x;
+ struct range y;
+ enum op op;
+};
+
+static void subtest_case_str(struct strbuf *sb, struct subtest_case *t, bool use_op)
+{
+ snappendf(sb, "(%s)", t_str(t->init_t));
+ snprintf_range(t->init_t, sb, t->x);
+ snappendf(sb, " (%s)%s ", t_str(t->cond_t), use_op ? op_str(t->op) : "<op>");
+ snprintf_range(t->init_t, sb, t->y);
+}
+
+/* Generate and validate test case based on specific combination of setup
+ * register ranges (including their expected num_t domain), and conditional
+ * operation to perform (including num_t domain in which it has to be
+ * performed)
+ */
+static int verify_case_op(enum num_t init_t, enum num_t cond_t,
+ struct range x, struct range y, enum op op)
+{
+ char log_buf[256 * 1024];
+ size_t log_sz = sizeof(log_buf);
+ int err, false_pos = 0, true_pos = 0, branch_taken;
+ struct reg_state fr1, fr2, tr1, tr2;
+ struct reg_state fe1, fe2, te1, te2;
+ bool failed = false;
+ struct case_spec spec = {
+ .init_subregs = (init_t == U32 || init_t == S32),
+ .setup_subregs = (init_t == U32 || init_t == S32),
+ .setup_signed = (init_t == S64 || init_t == S32),
+ .compare_subregs = (cond_t == U32 || cond_t == S32),
+ .compare_signed = (cond_t == S64 || cond_t == S32),
+ };
+
+ log_buf[0] = '\0';
+
+ sim_case(init_t, cond_t, x, y, op, &fe1, &fe2, &te1, &te2, &branch_taken);
+
+ err = load_range_cmp_prog(x, y, op, branch_taken, spec,
+ log_buf, log_sz, &false_pos, &true_pos);
+ if (err) {
+ ASSERT_OK(err, "load_range_cmp_prog");
+ failed = true;
+ }
+
+ err = parse_range_cmp_log(log_buf, spec, false_pos, true_pos,
+ &fr1, &fr2, &tr1, &tr2);
+ if (err) {
+ ASSERT_OK(err, "parse_range_cmp_log");
+ failed = true;
+ }
+
+ if (!assert_reg_state_eq(&fr1, &fe1, "false_reg1") ||
+ !assert_reg_state_eq(&fr2, &fe2, "false_reg2") ||
+ !assert_reg_state_eq(&tr1, &te1, "true_reg1") ||
+ !assert_reg_state_eq(&tr2, &te2, "true_reg2")) {
+ failed = true;
+ }
+
+ if (failed || env.verbosity >= VERBOSE_NORMAL) {
+ if (failed || env.verbosity >= VERBOSE_VERY) {
+ printf("VERIFIER LOG:\n========================\n");
+ print_verifier_log(log_buf);
+ printf("=====================\n");
+ }
+ printf("ACTUAL FALSE1: "); print_reg_state(&fr1, "\n");
+ printf("EXPECTED FALSE1: "); print_reg_state(&fe1, "\n");
+ printf("ACTUAL FALSE2: "); print_reg_state(&fr2, "\n");
+ printf("EXPECTED FALSE2: "); print_reg_state(&fe2, "\n");
+ printf("ACTUAL TRUE1: "); print_reg_state(&tr1, "\n");
+ printf("EXPECTED TRUE1: "); print_reg_state(&te1, "\n");
+ printf("ACTUAL TRUE2: "); print_reg_state(&tr2, "\n");
+ printf("EXPECTED TRUE2: "); print_reg_state(&te2, "\n");
+
+ return failed ? -EINVAL : 0;
+ }
+
+ return 0;
+}
+
+/* Given setup ranges and number types, go over all supported operations,
+ * generating individual subtest for each allowed combination
+ */
+static int verify_case_opt(struct ctx *ctx, enum num_t init_t, enum num_t cond_t,
+ struct range x, struct range y, bool is_subtest)
+{
+ DEFINE_STRBUF(sb, 256);
+ int err;
+ struct subtest_case sub = {
+ .init_t = init_t,
+ .cond_t = cond_t,
+ .x = x,
+ .y = y,
+ };
+
+ sb->pos = 0; /* reset position in strbuf */
+ subtest_case_str(sb, &sub, false /* ignore op */);
+ if (is_subtest && !test__start_subtest(sb->buf))
+ return 0;
+
+ for (sub.op = first_op; sub.op <= last_op; sub.op++) {
+ sb->pos = 0; /* reset position in strbuf */
+ subtest_case_str(sb, &sub, true /* print op */);
+
+ if (env.verbosity >= VERBOSE_NORMAL) /* this speeds up debugging */
+ printf("TEST CASE: %s\n", sb->buf);
+
+ err = verify_case_op(init_t, cond_t, x, y, sub.op);
+ if (err || env.verbosity >= VERBOSE_NORMAL)
+ ASSERT_OK(err, sb->buf);
+ if (err) {
+ ctx->cur_failure_cnt++;
+ if (ctx->cur_failure_cnt > ctx->max_failure_cnt)
+ return err;
+ return 0; /* keep testing other cases */
+ }
+ ctx->case_cnt++;
+ if ((ctx->case_cnt % 10000) == 0) {
+ double progress = (ctx->case_cnt + 0.0) / ctx->total_case_cnt;
+ u64 elapsed_ns = get_time_ns() - ctx->start_ns;
+ double remain_ns = elapsed_ns / progress * (1 - progress);
+
+ fprintf(env.stderr, "PROGRESS (%s): %d/%d (%.2lf%%), "
+ "elapsed %llu mins (%.2lf hrs), "
+ "ETA %.0lf mins (%.2lf hrs)\n",
+ ctx->progress_ctx,
+ ctx->case_cnt, ctx->total_case_cnt, 100.0 * progress,
+ elapsed_ns / 1000000000 / 60,
+ elapsed_ns / 1000000000.0 / 3600,
+ remain_ns / 1000000000.0 / 60,
+ remain_ns / 1000000000.0 / 3600);
+ }
+ }
+
+ return 0;
+}
+
+static int verify_case(struct ctx *ctx, enum num_t init_t, enum num_t cond_t,
+ struct range x, struct range y)
+{
+ return verify_case_opt(ctx, init_t, cond_t, x, y, true /* is_subtest */);
+}
+
+/* ================================
+ * GENERATED CASES FROM SEED VALUES
+ * ================================
+ */
+static int u64_cmp(const void *p1, const void *p2)
+{
+ u64 x1 = *(const u64 *)p1, x2 = *(const u64 *)p2;
+
+ return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0;
+}
+
+static int u32_cmp(const void *p1, const void *p2)
+{
+ u32 x1 = *(const u32 *)p1, x2 = *(const u32 *)p2;
+
+ return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0;
+}
+
+static int s64_cmp(const void *p1, const void *p2)
+{
+ s64 x1 = *(const s64 *)p1, x2 = *(const s64 *)p2;
+
+ return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0;
+}
+
+static int s32_cmp(const void *p1, const void *p2)
+{
+ s32 x1 = *(const s32 *)p1, x2 = *(const s32 *)p2;
+
+ return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0;
+}
+
+/* Generate valid unique constants from seeds, both signed and unsigned */
+static void gen_vals(struct ctx *ctx)
+{
+ int i, j, cnt = 0;
+
+ for (i = 0; i < ARRAY_SIZE(upper_seeds); i++) {
+ for (j = 0; j < ARRAY_SIZE(lower_seeds); j++) {
+ ctx->uvals[cnt++] = (((u64)upper_seeds[i]) << 32) | lower_seeds[j];
+ }
+ }
+
+ /* sort and compact uvals (i.e., it's `sort | uniq`) */
+ qsort(ctx->uvals, cnt, sizeof(*ctx->uvals), u64_cmp);
+ for (i = 1, j = 0; i < cnt; i++) {
+ if (ctx->uvals[j] == ctx->uvals[i])
+ continue;
+ j++;
+ ctx->uvals[j] = ctx->uvals[i];
+ }
+ ctx->val_cnt = j + 1;
+
+ /* we have exactly the same number of s64 values, they are just in
+ * a different order than u64s, so just sort them differently
+ */
+ for (i = 0; i < ctx->val_cnt; i++)
+ ctx->svals[i] = ctx->uvals[i];
+ qsort(ctx->svals, ctx->val_cnt, sizeof(*ctx->svals), s64_cmp);
+
+ if (env.verbosity >= VERBOSE_SUPER) {
+ DEFINE_STRBUF(sb1, 256);
+ DEFINE_STRBUF(sb2, 256);
+
+ for (i = 0; i < ctx->val_cnt; i++) {
+ sb1->pos = sb2->pos = 0;
+ snprintf_num(U64, sb1, ctx->uvals[i]);
+ snprintf_num(S64, sb2, ctx->svals[i]);
+ printf("SEED #%d: u64=%-20s s64=%-20s\n", i, sb1->buf, sb2->buf);
+ }
+ }
+
+ /* 32-bit values are generated separately */
+ cnt = 0;
+ for (i = 0; i < ARRAY_SIZE(lower_seeds); i++) {
+ ctx->usubvals[cnt++] = lower_seeds[i];
+ }
+
+ /* sort and compact usubvals (i.e., it's `sort | uniq`) */
+ qsort(ctx->usubvals, cnt, sizeof(*ctx->usubvals), u32_cmp);
+ for (i = 1, j = 0; i < cnt; i++) {
+ if (ctx->usubvals[j] == ctx->usubvals[i])
+ continue;
+ j++;
+ ctx->usubvals[j] = ctx->usubvals[i];
+ }
+ ctx->subval_cnt = j + 1;
+
+ for (i = 0; i < ctx->subval_cnt; i++)
+ ctx->ssubvals[i] = ctx->usubvals[i];
+ qsort(ctx->ssubvals, ctx->subval_cnt, sizeof(*ctx->ssubvals), s32_cmp);
+
+ if (env.verbosity >= VERBOSE_SUPER) {
+ DEFINE_STRBUF(sb1, 256);
+ DEFINE_STRBUF(sb2, 256);
+
+ for (i = 0; i < ctx->subval_cnt; i++) {
+ sb1->pos = sb2->pos = 0;
+ snprintf_num(U32, sb1, ctx->usubvals[i]);
+ snprintf_num(S32, sb2, ctx->ssubvals[i]);
+ printf("SUBSEED #%d: u32=%-10s s32=%-10s\n", i, sb1->buf, sb2->buf);
+ }
+ }
+}
+
+/* Generate valid ranges from upper/lower seeds */
+static int gen_ranges(struct ctx *ctx)
+{
+ int i, j, cnt = 0;
+
+ for (i = 0; i < ctx->val_cnt; i++) {
+ for (j = i; j < ctx->val_cnt; j++) {
+ if (env.verbosity >= VERBOSE_SUPER) {
+ DEFINE_STRBUF(sb1, 256);
+ DEFINE_STRBUF(sb2, 256);
+
+ sb1->pos = sb2->pos = 0;
+ snprintf_range(U64, sb1, range(U64, ctx->uvals[i], ctx->uvals[j]));
+ snprintf_range(S64, sb2, range(S64, ctx->svals[i], ctx->svals[j]));
+ printf("RANGE #%d: u64=%-40s s64=%-40s\n", cnt, sb1->buf, sb2->buf);
+ }
+ cnt++;
+ }
+ }
+ ctx->range_cnt = cnt;
+
+ ctx->uranges = calloc(ctx->range_cnt, sizeof(*ctx->uranges));
+ if (!ASSERT_OK_PTR(ctx->uranges, "uranges_calloc"))
+ return -EINVAL;
+ ctx->sranges = calloc(ctx->range_cnt, sizeof(*ctx->sranges));
+ if (!ASSERT_OK_PTR(ctx->sranges, "sranges_calloc"))
+ return -EINVAL;
+
+ cnt = 0;
+ for (i = 0; i < ctx->val_cnt; i++) {
+ for (j = i; j < ctx->val_cnt; j++) {
+ ctx->uranges[cnt] = range(U64, ctx->uvals[i], ctx->uvals[j]);
+ ctx->sranges[cnt] = range(S64, ctx->svals[i], ctx->svals[j]);
+ cnt++;
+ }
+ }
+
+ cnt = 0;
+ for (i = 0; i < ctx->subval_cnt; i++) {
+ for (j = i; j < ctx->subval_cnt; j++) {
+ if (env.verbosity >= VERBOSE_SUPER) {
+ DEFINE_STRBUF(sb1, 256);
+ DEFINE_STRBUF(sb2, 256);
+
+ sb1->pos = sb2->pos = 0;
+ snprintf_range(U32, sb1, range(U32, ctx->usubvals[i], ctx->usubvals[j]));
+ snprintf_range(S32, sb2, range(S32, ctx->ssubvals[i], ctx->ssubvals[j]));
+ printf("SUBRANGE #%d: u32=%-20s s32=%-20s\n", cnt, sb1->buf, sb2->buf);
+ }
+ cnt++;
+ }
+ }
+ ctx->subrange_cnt = cnt;
+
+ ctx->usubranges = calloc(ctx->subrange_cnt, sizeof(*ctx->usubranges));
+ if (!ASSERT_OK_PTR(ctx->usubranges, "usubranges_calloc"))
+ return -EINVAL;
+ ctx->ssubranges = calloc(ctx->subrange_cnt, sizeof(*ctx->ssubranges));
+ if (!ASSERT_OK_PTR(ctx->ssubranges, "ssubranges_calloc"))
+ return -EINVAL;
+
+ cnt = 0;
+ for (i = 0; i < ctx->subval_cnt; i++) {
+ for (j = i; j < ctx->subval_cnt; j++) {
+ ctx->usubranges[cnt] = range(U32, ctx->usubvals[i], ctx->usubvals[j]);
+ ctx->ssubranges[cnt] = range(S32, ctx->ssubvals[i], ctx->ssubvals[j]);
+ cnt++;
+ }
+ }
+
+ return 0;
+}
+
+static int parse_env_vars(struct ctx *ctx)
+{
+ const char *s;
+
+ if ((s = getenv("REG_BOUNDS_MAX_FAILURE_CNT"))) {
+ errno = 0;
+ ctx->max_failure_cnt = strtol(s, NULL, 10);
+ if (errno || ctx->max_failure_cnt < 0) {
+ ASSERT_OK(-errno, "REG_BOUNDS_MAX_FAILURE_CNT");
+ return -EINVAL;
+ }
+ }
+
+ if ((s = getenv("REG_BOUNDS_RAND_CASE_CNT"))) {
+ errno = 0;
+ ctx->rand_case_cnt = strtol(s, NULL, 10);
+ if (errno || ctx->rand_case_cnt < 0) {
+ ASSERT_OK(-errno, "REG_BOUNDS_RAND_CASE_CNT");
+ return -EINVAL;
+ }
+ }
+
+ if ((s = getenv("REG_BOUNDS_RAND_SEED"))) {
+ errno = 0;
+ ctx->rand_seed = strtoul(s, NULL, 10);
+ if (errno) {
+ ASSERT_OK(-errno, "REG_BOUNDS_RAND_SEED");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int prepare_gen_tests(struct ctx *ctx)
+{
+ const char *s;
+ int err;
+
+ if (!(s = getenv("SLOW_TESTS")) || strcmp(s, "1") != 0) {
+ test__skip();
+ return -ENOTSUP;
+ }
+
+ err = parse_env_vars(ctx);
+ if (err)
+ return err;
+
+ gen_vals(ctx);
+ err = gen_ranges(ctx);
+ if (err) {
+ ASSERT_OK(err, "gen_ranges");
+ return err;
+ }
+
+ return 0;
+}
+
+/* Go over generated constants and ranges and validate various supported
+ * combinations of them
+ */
+static void validate_gen_range_vs_const_64(enum num_t init_t, enum num_t cond_t)
+{
+ struct ctx ctx;
+ struct range rconst;
+ const struct range *ranges;
+ const u64 *vals;
+ int i, j;
+
+ memset(&ctx, 0, sizeof(ctx));
+
+ if (prepare_gen_tests(&ctx))
+ goto cleanup;
+
+ ranges = init_t == U64 ? ctx.uranges : ctx.sranges;
+ vals = init_t == U64 ? ctx.uvals : (const u64 *)ctx.svals;
+
+ ctx.total_case_cnt = (last_op - first_op + 1) * (2 * ctx.range_cnt * ctx.val_cnt);
+ ctx.start_ns = get_time_ns();
+ snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx),
+ "RANGE x CONST, %s -> %s",
+ t_str(init_t), t_str(cond_t));
+
+ for (i = 0; i < ctx.val_cnt; i++) {
+ for (j = 0; j < ctx.range_cnt; j++) {
+ rconst = range(init_t, vals[i], vals[i]);
+
+ /* (u64|s64)(<range> x <const>) */
+ if (verify_case(&ctx, init_t, cond_t, ranges[j], rconst))
+ goto cleanup;
+ /* (u64|s64)(<const> x <range>) */
+ if (verify_case(&ctx, init_t, cond_t, rconst, ranges[j]))
+ goto cleanup;
+ }
+ }
+
+cleanup:
+ cleanup_ctx(&ctx);
+}
+
+static void validate_gen_range_vs_const_32(enum num_t init_t, enum num_t cond_t)
+{
+ struct ctx ctx;
+ struct range rconst;
+ const struct range *ranges;
+ const u32 *vals;
+ int i, j;
+
+ memset(&ctx, 0, sizeof(ctx));
+
+ if (prepare_gen_tests(&ctx))
+ goto cleanup;
+
+ ranges = init_t == U32 ? ctx.usubranges : ctx.ssubranges;
+ vals = init_t == U32 ? ctx.usubvals : (const u32 *)ctx.ssubvals;
+
+ ctx.total_case_cnt = (last_op - first_op + 1) * (2 * ctx.subrange_cnt * ctx.subval_cnt);
+ ctx.start_ns = get_time_ns();
+ snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx),
+ "RANGE x CONST, %s -> %s",
+ t_str(init_t), t_str(cond_t));
+
+ for (i = 0; i < ctx.subval_cnt; i++) {
+ for (j = 0; j < ctx.subrange_cnt; j++) {
+ rconst = range(init_t, vals[i], vals[i]);
+
+ /* (u32|s32)(<range> x <const>) */
+ if (verify_case(&ctx, init_t, cond_t, ranges[j], rconst))
+ goto cleanup;
+ /* (u32|s32)(<const> x <range>) */
+ if (verify_case(&ctx, init_t, cond_t, rconst, ranges[j]))
+ goto cleanup;
+ }
+ }
+
+cleanup:
+ cleanup_ctx(&ctx);
+}
+
+static void validate_gen_range_vs_range(enum num_t init_t, enum num_t cond_t)
+{
+ struct ctx ctx;
+ const struct range *ranges;
+ int i, j, rcnt;
+
+ memset(&ctx, 0, sizeof(ctx));
+
+ if (prepare_gen_tests(&ctx))
+ goto cleanup;
+
+ switch (init_t)
+ {
+ case U64:
+ ranges = ctx.uranges;
+ rcnt = ctx.range_cnt;
+ break;
+ case U32:
+ ranges = ctx.usubranges;
+ rcnt = ctx.subrange_cnt;
+ break;
+ case S64:
+ ranges = ctx.sranges;
+ rcnt = ctx.range_cnt;
+ break;
+ case S32:
+ ranges = ctx.ssubranges;
+ rcnt = ctx.subrange_cnt;
+ break;
+ default:
+ printf("validate_gen_range_vs_range!\n");
+ exit(1);
+ }
+
+ ctx.total_case_cnt = (last_op - first_op + 1) * (2 * rcnt * (rcnt + 1) / 2);
+ ctx.start_ns = get_time_ns();
+ snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx),
+ "RANGE x RANGE, %s -> %s",
+ t_str(init_t), t_str(cond_t));
+
+ for (i = 0; i < rcnt; i++) {
+ for (j = i; j < rcnt; j++) {
+ /* (<range> x <range>) */
+ if (verify_case(&ctx, init_t, cond_t, ranges[i], ranges[j]))
+ goto cleanup;
+ if (verify_case(&ctx, init_t, cond_t, ranges[j], ranges[i]))
+ goto cleanup;
+ }
+ }
+
+cleanup:
+ cleanup_ctx(&ctx);
+}
+
+/* Go over thousands of test cases generated from initial seed values.
+ * Given this take a long time, guard this begind SLOW_TESTS=1 envvar. If
+ * envvar is not set, this test is skipped during test_progs testing.
+ *
+ * We split this up into smaller subsets based on initialization and
+ * conditiona numeric domains to get an easy parallelization with test_progs'
+ * -j argument.
+ */
+
+/* RANGE x CONST, U64 initial range */
+void test_reg_bounds_gen_consts_u64_u64(void) { validate_gen_range_vs_const_64(U64, U64); }
+void test_reg_bounds_gen_consts_u64_s64(void) { validate_gen_range_vs_const_64(U64, S64); }
+void test_reg_bounds_gen_consts_u64_u32(void) { validate_gen_range_vs_const_64(U64, U32); }
+void test_reg_bounds_gen_consts_u64_s32(void) { validate_gen_range_vs_const_64(U64, S32); }
+/* RANGE x CONST, S64 initial range */
+void test_reg_bounds_gen_consts_s64_u64(void) { validate_gen_range_vs_const_64(S64, U64); }
+void test_reg_bounds_gen_consts_s64_s64(void) { validate_gen_range_vs_const_64(S64, S64); }
+void test_reg_bounds_gen_consts_s64_u32(void) { validate_gen_range_vs_const_64(S64, U32); }
+void test_reg_bounds_gen_consts_s64_s32(void) { validate_gen_range_vs_const_64(S64, S32); }
+/* RANGE x CONST, U32 initial range */
+void test_reg_bounds_gen_consts_u32_u64(void) { validate_gen_range_vs_const_32(U32, U64); }
+void test_reg_bounds_gen_consts_u32_s64(void) { validate_gen_range_vs_const_32(U32, S64); }
+void test_reg_bounds_gen_consts_u32_u32(void) { validate_gen_range_vs_const_32(U32, U32); }
+void test_reg_bounds_gen_consts_u32_s32(void) { validate_gen_range_vs_const_32(U32, S32); }
+/* RANGE x CONST, S32 initial range */
+void test_reg_bounds_gen_consts_s32_u64(void) { validate_gen_range_vs_const_32(S32, U64); }
+void test_reg_bounds_gen_consts_s32_s64(void) { validate_gen_range_vs_const_32(S32, S64); }
+void test_reg_bounds_gen_consts_s32_u32(void) { validate_gen_range_vs_const_32(S32, U32); }
+void test_reg_bounds_gen_consts_s32_s32(void) { validate_gen_range_vs_const_32(S32, S32); }
+
+/* RANGE x RANGE, U64 initial range */
+void test_reg_bounds_gen_ranges_u64_u64(void) { validate_gen_range_vs_range(U64, U64); }
+void test_reg_bounds_gen_ranges_u64_s64(void) { validate_gen_range_vs_range(U64, S64); }
+void test_reg_bounds_gen_ranges_u64_u32(void) { validate_gen_range_vs_range(U64, U32); }
+void test_reg_bounds_gen_ranges_u64_s32(void) { validate_gen_range_vs_range(U64, S32); }
+/* RANGE x RANGE, S64 initial range */
+void test_reg_bounds_gen_ranges_s64_u64(void) { validate_gen_range_vs_range(S64, U64); }
+void test_reg_bounds_gen_ranges_s64_s64(void) { validate_gen_range_vs_range(S64, S64); }
+void test_reg_bounds_gen_ranges_s64_u32(void) { validate_gen_range_vs_range(S64, U32); }
+void test_reg_bounds_gen_ranges_s64_s32(void) { validate_gen_range_vs_range(S64, S32); }
+/* RANGE x RANGE, U32 initial range */
+void test_reg_bounds_gen_ranges_u32_u64(void) { validate_gen_range_vs_range(U32, U64); }
+void test_reg_bounds_gen_ranges_u32_s64(void) { validate_gen_range_vs_range(U32, S64); }
+void test_reg_bounds_gen_ranges_u32_u32(void) { validate_gen_range_vs_range(U32, U32); }
+void test_reg_bounds_gen_ranges_u32_s32(void) { validate_gen_range_vs_range(U32, S32); }
+/* RANGE x RANGE, S32 initial range */
+void test_reg_bounds_gen_ranges_s32_u64(void) { validate_gen_range_vs_range(S32, U64); }
+void test_reg_bounds_gen_ranges_s32_s64(void) { validate_gen_range_vs_range(S32, S64); }
+void test_reg_bounds_gen_ranges_s32_u32(void) { validate_gen_range_vs_range(S32, U32); }
+void test_reg_bounds_gen_ranges_s32_s32(void) { validate_gen_range_vs_range(S32, S32); }
+
+#define DEFAULT_RAND_CASE_CNT 100
+
+#define RAND_21BIT_MASK ((1 << 22) - 1)
+
+static u64 rand_u64()
+{
+ /* RAND_MAX is guaranteed to be at least 1<<15, but in practice it
+ * seems to be 1<<31, so we need to call it thrice to get full u64;
+ * we'll use rougly equal split: 22 + 21 + 21 bits
+ */
+ return ((u64)random() << 42) |
+ (((u64)random() & RAND_21BIT_MASK) << 21) |
+ (random() & RAND_21BIT_MASK);
+}
+
+static u64 rand_const(enum num_t t)
+{
+ return cast_t(t, rand_u64());
+}
+
+static struct range rand_range(enum num_t t)
+{
+ u64 x = rand_const(t), y = rand_const(t);
+
+ return range(t, min_t(t, x, y), max_t(t, x, y));
+}
+
+static void validate_rand_ranges(enum num_t init_t, enum num_t cond_t, bool const_range)
+{
+ struct ctx ctx;
+ struct range range1, range2;
+ int err, i;
+ u64 t;
+
+ memset(&ctx, 0, sizeof(ctx));
+
+ err = parse_env_vars(&ctx);
+ if (err) {
+ ASSERT_OK(err, "parse_env_vars");
+ return;
+ }
+
+ if (ctx.rand_case_cnt == 0)
+ ctx.rand_case_cnt = DEFAULT_RAND_CASE_CNT;
+ if (ctx.rand_seed == 0)
+ ctx.rand_seed = (unsigned)get_time_ns();
+
+ srandom(ctx.rand_seed);
+
+ ctx.total_case_cnt = (last_op - first_op + 1) * (2 * ctx.rand_case_cnt);
+ ctx.start_ns = get_time_ns();
+ snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx),
+ "[RANDOM SEED %u] RANGE x %s, %s -> %s",
+ ctx.rand_seed, const_range ? "CONST" : "RANGE",
+ t_str(init_t), t_str(cond_t));
+
+ for (i = 0; i < ctx.rand_case_cnt; i++) {
+ range1 = rand_range(init_t);
+ if (const_range) {
+ t = rand_const(init_t);
+ range2 = range(init_t, t, t);
+ } else {
+ range2 = rand_range(init_t);
+ }
+
+ /* <range1> x <range2> */
+ if (verify_case_opt(&ctx, init_t, cond_t, range1, range2, false /* !is_subtest */))
+ goto cleanup;
+ /* <range2> x <range1> */
+ if (verify_case_opt(&ctx, init_t, cond_t, range2, range1, false /* !is_subtest */))
+ goto cleanup;
+ }
+
+cleanup:
+ /* make sure we report random seed for reproducing */
+ ASSERT_TRUE(true, ctx.progress_ctx);
+ cleanup_ctx(&ctx);
+}
+
+/* [RANDOM] RANGE x CONST, U64 initial range */
+void test_reg_bounds_rand_consts_u64_u64(void) { validate_rand_ranges(U64, U64, true /* const */); }
+void test_reg_bounds_rand_consts_u64_s64(void) { validate_rand_ranges(U64, S64, true /* const */); }
+void test_reg_bounds_rand_consts_u64_u32(void) { validate_rand_ranges(U64, U32, true /* const */); }
+void test_reg_bounds_rand_consts_u64_s32(void) { validate_rand_ranges(U64, S32, true /* const */); }
+/* [RANDOM] RANGE x CONST, S64 initial range */
+void test_reg_bounds_rand_consts_s64_u64(void) { validate_rand_ranges(S64, U64, true /* const */); }
+void test_reg_bounds_rand_consts_s64_s64(void) { validate_rand_ranges(S64, S64, true /* const */); }
+void test_reg_bounds_rand_consts_s64_u32(void) { validate_rand_ranges(S64, U32, true /* const */); }
+void test_reg_bounds_rand_consts_s64_s32(void) { validate_rand_ranges(S64, S32, true /* const */); }
+/* [RANDOM] RANGE x CONST, U32 initial range */
+void test_reg_bounds_rand_consts_u32_u64(void) { validate_rand_ranges(U32, U64, true /* const */); }
+void test_reg_bounds_rand_consts_u32_s64(void) { validate_rand_ranges(U32, S64, true /* const */); }
+void test_reg_bounds_rand_consts_u32_u32(void) { validate_rand_ranges(U32, U32, true /* const */); }
+void test_reg_bounds_rand_consts_u32_s32(void) { validate_rand_ranges(U32, S32, true /* const */); }
+/* [RANDOM] RANGE x CONST, S32 initial range */
+void test_reg_bounds_rand_consts_s32_u64(void) { validate_rand_ranges(S32, U64, true /* const */); }
+void test_reg_bounds_rand_consts_s32_s64(void) { validate_rand_ranges(S32, S64, true /* const */); }
+void test_reg_bounds_rand_consts_s32_u32(void) { validate_rand_ranges(S32, U32, true /* const */); }
+void test_reg_bounds_rand_consts_s32_s32(void) { validate_rand_ranges(S32, S32, true /* const */); }
+
+/* [RANDOM] RANGE x RANGE, U64 initial range */
+void test_reg_bounds_rand_ranges_u64_u64(void) { validate_rand_ranges(U64, U64, false /* range */); }
+void test_reg_bounds_rand_ranges_u64_s64(void) { validate_rand_ranges(U64, S64, false /* range */); }
+void test_reg_bounds_rand_ranges_u64_u32(void) { validate_rand_ranges(U64, U32, false /* range */); }
+void test_reg_bounds_rand_ranges_u64_s32(void) { validate_rand_ranges(U64, S32, false /* range */); }
+/* [RANDOM] RANGE x RANGE, S64 initial range */
+void test_reg_bounds_rand_ranges_s64_u64(void) { validate_rand_ranges(S64, U64, false /* range */); }
+void test_reg_bounds_rand_ranges_s64_s64(void) { validate_rand_ranges(S64, S64, false /* range */); }
+void test_reg_bounds_rand_ranges_s64_u32(void) { validate_rand_ranges(S64, U32, false /* range */); }
+void test_reg_bounds_rand_ranges_s64_s32(void) { validate_rand_ranges(S64, S32, false /* range */); }
+/* [RANDOM] RANGE x RANGE, U32 initial range */
+void test_reg_bounds_rand_ranges_u32_u64(void) { validate_rand_ranges(U32, U64, false /* range */); }
+void test_reg_bounds_rand_ranges_u32_s64(void) { validate_rand_ranges(U32, S64, false /* range */); }
+void test_reg_bounds_rand_ranges_u32_u32(void) { validate_rand_ranges(U32, U32, false /* range */); }
+void test_reg_bounds_rand_ranges_u32_s32(void) { validate_rand_ranges(U32, S32, false /* range */); }
+/* [RANDOM] RANGE x RANGE, S32 initial range */
+void test_reg_bounds_rand_ranges_s32_u64(void) { validate_rand_ranges(S32, U64, false /* range */); }
+void test_reg_bounds_rand_ranges_s32_s64(void) { validate_rand_ranges(S32, S64, false /* range */); }
+void test_reg_bounds_rand_ranges_s32_u32(void) { validate_rand_ranges(S32, U32, false /* range */); }
+void test_reg_bounds_rand_ranges_s32_s32(void) { validate_rand_ranges(S32, S32, false /* range */); }
+
+/* A set of hard-coded "interesting" cases to validate as part of normal
+ * test_progs test runs
+ */
+static struct subtest_case crafted_cases[] = {
+ {U64, U64, {0, 0xffffffff}, {0, 0}},
+ {U64, U64, {0, 0x80000000}, {0, 0}},
+ {U64, U64, {0x100000000ULL, 0x100000100ULL}, {0, 0}},
+ {U64, U64, {0x100000000ULL, 0x180000000ULL}, {0, 0}},
+ {U64, U64, {0x100000000ULL, 0x1ffffff00ULL}, {0, 0}},
+ {U64, U64, {0x100000000ULL, 0x1ffffff01ULL}, {0, 0}},
+ {U64, U64, {0x100000000ULL, 0x1fffffffeULL}, {0, 0}},
+ {U64, U64, {0x100000001ULL, 0x1000000ffULL}, {0, 0}},
+
+ /* single point overlap, interesting BPF_EQ and BPF_NE interactions */
+ {U64, U64, {0, 1}, {1, 0x80000000}},
+ {U64, S64, {0, 1}, {1, 0x80000000}},
+ {U64, U32, {0, 1}, {1, 0x80000000}},
+ {U64, S32, {0, 1}, {1, 0x80000000}},
+
+ {U64, S64, {0, 0xffffffff00000000ULL}, {0, 0}},
+ {U64, S64, {0x7fffffffffffffffULL, 0xffffffff00000000ULL}, {0, 0}},
+ {U64, S64, {0x7fffffff00000001ULL, 0xffffffff00000000ULL}, {0, 0}},
+ {U64, S64, {0, 0xffffffffULL}, {1, 1}},
+ {U64, S64, {0, 0xffffffffULL}, {0x7fffffff, 0x7fffffff}},
+
+ {U64, U32, {0, 0x100000000}, {0, 0}},
+ {U64, U32, {0xfffffffe, 0x100000000}, {0x80000000, 0x80000000}},
+
+ {U64, S32, {0, 0xffffffff00000000ULL}, {0, 0}},
+ /* these are tricky cases where lower 32 bits allow to tighten 64
+ * bit boundaries based on tightened lower 32 bit boundaries
+ */
+ {U64, S32, {0, 0x0ffffffffULL}, {0, 0}},
+ {U64, S32, {0, 0x100000000ULL}, {0, 0}},
+ {U64, S32, {0, 0x100000001ULL}, {0, 0}},
+ {U64, S32, {0, 0x180000000ULL}, {0, 0}},
+ {U64, S32, {0, 0x17fffffffULL}, {0, 0}},
+ {U64, S32, {0, 0x180000001ULL}, {0, 0}},
+
+ /* verifier knows about [-1, 0] range for s32 for this case already */
+ {S64, S64, {0xffffffffffffffffULL, 0}, {0xffffffff00000000ULL, 0xffffffff00000000ULL}},
+ /* but didn't know about these cases initially */
+ {U64, U64, {0xffffffff, 0x100000000ULL}, {0, 0}}, /* s32: [-1, 0] */
+ {U64, U64, {0xffffffff, 0x100000001ULL}, {0, 0}}, /* s32: [-1, 1] */
+
+ /* longer convergence case: learning from u64 -> s64 -> u64 -> u32,
+ * arriving at u32: [1, U32_MAX] (instead of more pessimistic [0, U32_MAX])
+ */
+ {S64, U64, {0xffffffff00000001ULL, 0}, {0xffffffff00000000ULL, 0xffffffff00000000ULL}},
+
+ {U32, U32, {1, U32_MAX}, {0, 0}},
+
+ {U32, S32, {0, U32_MAX}, {U32_MAX, U32_MAX}},
+
+ {S32, U64, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}, {(u32)(s32)-255, 0}},
+ {S32, S64, {(u32)(s32)S32_MIN, (u32)(s32)-255}, {(u32)(s32)-2, 0}},
+ {S32, S64, {0, 1}, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}},
+ {S32, U32, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}, {(u32)(s32)S32_MIN, (u32)(s32)S32_MIN}},
+};
+
+/* Go over crafted hard-coded cases. This is fast, so we do it as part of
+ * normal test_progs run.
+ */
+void test_reg_bounds_crafted(void)
+{
+ struct ctx ctx;
+ int i;
+
+ memset(&ctx, 0, sizeof(ctx));
+
+ for (i = 0; i < ARRAY_SIZE(crafted_cases); i++) {
+ struct subtest_case *c = &crafted_cases[i];
+
+ verify_case(&ctx, c->init_t, c->cond_t, c->x, c->y);
+ verify_case(&ctx, c->init_t, c->cond_t, c->y, c->x);
+ }
+
+ cleanup_ctx(&ctx);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt.c b/tools/testing/selftests/bpf/prog_tests/sockopt.c
index 9e6a5e3ed4de..5a4491d4edfe 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <io_uring/mini_liburing.h>
#include "cgroup_helpers.h"
static char bpf_log_buf[4096];
@@ -38,6 +39,7 @@ static struct sockopt_test {
socklen_t get_optlen_ret;
enum sockopt_test_error error;
+ bool io_uring_support;
} tests[] = {
/* ==================== getsockopt ==================== */
@@ -251,7 +253,9 @@ static struct sockopt_test {
.attach_type = BPF_CGROUP_GETSOCKOPT,
.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+ .get_level = SOL_SOCKET,
.get_optlen = 64,
+ .io_uring_support = true,
},
{
.descr = "getsockopt: deny bigger ctx->optlen",
@@ -276,6 +280,7 @@ static struct sockopt_test {
.get_optlen = 64,
.error = EFAULT_GETSOCKOPT,
+ .io_uring_support = true,
},
{
.descr = "getsockopt: ignore >PAGE_SIZE optlen",
@@ -318,6 +323,7 @@ static struct sockopt_test {
.get_optval = {}, /* the changes are ignored */
.get_optlen = PAGE_SIZE + 1,
.error = EOPNOTSUPP_GETSOCKOPT,
+ .io_uring_support = true,
},
{
.descr = "getsockopt: support smaller ctx->optlen",
@@ -337,8 +343,10 @@ static struct sockopt_test {
.attach_type = BPF_CGROUP_GETSOCKOPT,
.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+ .get_level = SOL_SOCKET,
.get_optlen = 64,
.get_optlen_ret = 32,
+ .io_uring_support = true,
},
{
.descr = "getsockopt: deny writing to ctx->optval",
@@ -518,6 +526,7 @@ static struct sockopt_test {
.set_level = 123,
.set_optlen = 1,
+ .io_uring_support = true,
},
{
.descr = "setsockopt: allow changing ctx->level",
@@ -572,6 +581,7 @@ static struct sockopt_test {
.set_optname = 123,
.set_optlen = 1,
+ .io_uring_support = true,
},
{
.descr = "setsockopt: allow changing ctx->optname",
@@ -624,6 +634,7 @@ static struct sockopt_test {
.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
.set_optlen = 64,
+ .io_uring_support = true,
},
{
.descr = "setsockopt: ctx->optlen == -1 is ok",
@@ -640,6 +651,7 @@ static struct sockopt_test {
.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
.set_optlen = 64,
+ .io_uring_support = true,
},
{
.descr = "setsockopt: deny ctx->optlen < 0 (except -1)",
@@ -658,6 +670,7 @@ static struct sockopt_test {
.set_optlen = 4,
.error = EFAULT_SETSOCKOPT,
+ .io_uring_support = true,
},
{
.descr = "setsockopt: deny ctx->optlen > input optlen",
@@ -675,6 +688,7 @@ static struct sockopt_test {
.set_optlen = 64,
.error = EFAULT_SETSOCKOPT,
+ .io_uring_support = true,
},
{
.descr = "setsockopt: ignore >PAGE_SIZE optlen",
@@ -940,7 +954,89 @@ static int load_prog(const struct bpf_insn *insns,
return fd;
}
-static int run_test(int cgroup_fd, struct sockopt_test *test)
+/* Core function that handles io_uring ring initialization,
+ * sending SQE with sockopt command and waiting for the CQE.
+ */
+static int uring_sockopt(int op, int fd, int level, int optname,
+ const void *optval, socklen_t optlen)
+{
+ struct io_uring_cqe *cqe;
+ struct io_uring_sqe *sqe;
+ struct io_uring ring;
+ int err;
+
+ err = io_uring_queue_init(1, &ring, 0);
+ if (!ASSERT_OK(err, "io_uring initialization"))
+ return err;
+
+ sqe = io_uring_get_sqe(&ring);
+ if (!ASSERT_NEQ(sqe, NULL, "Get an SQE")) {
+ err = -1;
+ goto fail;
+ }
+
+ io_uring_prep_cmd(sqe, op, fd, level, optname, optval, optlen);
+
+ err = io_uring_submit(&ring);
+ if (!ASSERT_EQ(err, 1, "Submit SQE"))
+ goto fail;
+
+ err = io_uring_wait_cqe(&ring, &cqe);
+ if (!ASSERT_OK(err, "Wait for CQE"))
+ goto fail;
+
+ err = cqe->res;
+
+fail:
+ io_uring_queue_exit(&ring);
+
+ return err;
+}
+
+static int uring_setsockopt(int fd, int level, int optname, const void *optval,
+ socklen_t optlen)
+{
+ return uring_sockopt(SOCKET_URING_OP_SETSOCKOPT, fd, level, optname,
+ optval, optlen);
+}
+
+static int uring_getsockopt(int fd, int level, int optname, void *optval,
+ socklen_t *optlen)
+{
+ int ret = uring_sockopt(SOCKET_URING_OP_GETSOCKOPT, fd, level, optname,
+ optval, *optlen);
+ if (ret < 0)
+ return ret;
+
+ /* Populate optlen back to be compatible with systemcall interface,
+ * and simplify the test.
+ */
+ *optlen = ret;
+
+ return 0;
+}
+
+/* Execute the setsocktopt operation */
+static int call_setsockopt(bool use_io_uring, int fd, int level, int optname,
+ const void *optval, socklen_t optlen)
+{
+ if (use_io_uring)
+ return uring_setsockopt(fd, level, optname, optval, optlen);
+
+ return setsockopt(fd, level, optname, optval, optlen);
+}
+
+/* Execute the getsocktopt operation */
+static int call_getsockopt(bool use_io_uring, int fd, int level, int optname,
+ void *optval, socklen_t *optlen)
+{
+ if (use_io_uring)
+ return uring_getsockopt(fd, level, optname, optval, optlen);
+
+ return getsockopt(fd, level, optname, optval, optlen);
+}
+
+static int run_test(int cgroup_fd, struct sockopt_test *test, bool use_io_uring)
{
int sock_fd, err, prog_fd;
void *optval = NULL;
@@ -980,8 +1076,9 @@ static int run_test(int cgroup_fd, struct sockopt_test *test)
test->set_optlen = num_pages * sysconf(_SC_PAGESIZE) + remainder;
}
- err = setsockopt(sock_fd, test->set_level, test->set_optname,
- test->set_optval, test->set_optlen);
+ err = call_setsockopt(use_io_uring, sock_fd, test->set_level,
+ test->set_optname, test->set_optval,
+ test->set_optlen);
if (err) {
if (errno == EPERM && test->error == EPERM_SETSOCKOPT)
goto close_sock_fd;
@@ -1008,8 +1105,8 @@ static int run_test(int cgroup_fd, struct sockopt_test *test)
socklen_t expected_get_optlen = test->get_optlen_ret ?:
test->get_optlen;
- err = getsockopt(sock_fd, test->get_level, test->get_optname,
- optval, &optlen);
+ err = call_getsockopt(use_io_uring, sock_fd, test->get_level,
+ test->get_optname, optval, &optlen);
if (err) {
if (errno == EOPNOTSUPP && test->error == EOPNOTSUPP_GETSOCKOPT)
goto free_optval;
@@ -1063,7 +1160,11 @@ void test_sockopt(void)
if (!test__start_subtest(tests[i].descr))
continue;
- ASSERT_OK(run_test(cgroup_fd, &tests[i]), tests[i].descr);
+ ASSERT_OK(run_test(cgroup_fd, &tests[i], false),
+ tests[i].descr);
+ if (tests[i].io_uring_support)
+ ASSERT_OK(run_test(cgroup_fd, &tests[i], true),
+ tests[i].descr);
}
close(cgroup_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/spin_lock.c b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
index f29c08d93beb..18d451be57c8 100644
--- a/tools/testing/selftests/bpf/prog_tests/spin_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
@@ -13,22 +13,22 @@ static struct {
const char *err_msg;
} spin_lock_fail_tests[] = {
{ "lock_id_kptr_preserve",
- "5: (bf) r1 = r0 ; R0_w=ptr_foo(id=2,ref_obj_id=2,off=0,imm=0) "
- "R1_w=ptr_foo(id=2,ref_obj_id=2,off=0,imm=0) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n"
+ "5: (bf) r1 = r0 ; R0_w=ptr_foo(id=2,ref_obj_id=2) "
+ "R1_w=ptr_foo(id=2,ref_obj_id=2) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n"
"R1 type=ptr_ expected=percpu_ptr_" },
{ "lock_id_global_zero",
- "; R1_w=map_value(off=0,ks=4,vs=4,imm=0)\n2: (85) call bpf_this_cpu_ptr#154\n"
+ "; R1_w=map_value(map=.data.A,ks=4,vs=4)\n2: (85) call bpf_this_cpu_ptr#154\n"
"R1 type=map_value expected=percpu_ptr_" },
{ "lock_id_mapval_preserve",
"[0-9]\\+: (bf) r1 = r0 ;"
- " R0_w=map_value(id=1,off=0,ks=4,vs=8,imm=0)"
- " R1_w=map_value(id=1,off=0,ks=4,vs=8,imm=0)\n"
+ " R0_w=map_value(id=1,map=array_map,ks=4,vs=8)"
+ " R1_w=map_value(id=1,map=array_map,ks=4,vs=8)\n"
"[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n"
"R1 type=map_value expected=percpu_ptr_" },
{ "lock_id_innermapval_preserve",
"[0-9]\\+: (bf) r1 = r0 ;"
- " R0=map_value(id=2,off=0,ks=4,vs=8,imm=0)"
- " R1_w=map_value(id=2,off=0,ks=4,vs=8,imm=0)\n"
+ " R0=map_value(id=2,ks=4,vs=8)"
+ " R1_w=map_value(id=2,ks=4,vs=8)\n"
"[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n"
"R1 type=map_value expected=percpu_ptr_" },
{ "lock_id_mismatch_kptr_kptr", "bpf_spin_unlock of different lock" },
diff --git a/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c b/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c
index 4224727fb364..626d76fe43a2 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c
@@ -30,8 +30,15 @@ void test_task_under_cgroup(void)
if (!ASSERT_OK(ret, "test_task_under_cgroup__load"))
goto cleanup;
- ret = test_task_under_cgroup__attach(skel);
- if (!ASSERT_OK(ret, "test_task_under_cgroup__attach"))
+ /* First, attach the LSM program, and then it will be triggered when the
+ * TP_BTF program is attached.
+ */
+ skel->links.lsm_run = bpf_program__attach_lsm(skel->progs.lsm_run);
+ if (!ASSERT_OK_PTR(skel->links.lsm_run, "attach_lsm"))
+ goto cleanup;
+
+ skel->links.tp_btf_run = bpf_program__attach_trace(skel->progs.tp_btf_run);
+ if (!ASSERT_OK_PTR(skel->links.tp_btf_run, "attach_tp_btf"))
goto cleanup;
pid = fork();
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_helpers.h b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h
index 67f985f7d215..924d0e25320c 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_helpers.h
+++ b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h
@@ -4,6 +4,10 @@
#define TC_HELPERS
#include <test_progs.h>
+#ifndef loopback
+# define loopback 1
+#endif
+
static inline __u32 id_from_prog_fd(int fd)
{
struct bpf_prog_info prog_info = {};
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_netkit.c b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c
new file mode 100644
index 000000000000..15ee7b2fc410
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c
@@ -0,0 +1,687 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Isovalent */
+#include <uapi/linux/if_link.h>
+#include <net/if.h>
+#include <test_progs.h>
+
+#define netkit_peer "nk0"
+#define netkit_name "nk1"
+
+#define ping_addr_neigh 0x0a000002 /* 10.0.0.2 */
+#define ping_addr_noneigh 0x0a000003 /* 10.0.0.3 */
+
+#include "test_tc_link.skel.h"
+#include "netlink_helpers.h"
+#include "tc_helpers.h"
+
+#define ICMP_ECHO 8
+
+struct icmphdr {
+ __u8 type;
+ __u8 code;
+ __sum16 checksum;
+ struct {
+ __be16 id;
+ __be16 sequence;
+ } echo;
+};
+
+struct iplink_req {
+ struct nlmsghdr n;
+ struct ifinfomsg i;
+ char buf[1024];
+};
+
+static int create_netkit(int mode, int policy, int peer_policy, int *ifindex,
+ bool same_netns)
+{
+ struct rtnl_handle rth = { .fd = -1 };
+ struct iplink_req req = {};
+ struct rtattr *linkinfo, *data;
+ const char *type = "netkit";
+ int err;
+
+ err = rtnl_open(&rth, 0);
+ if (!ASSERT_OK(err, "open_rtnetlink"))
+ return err;
+
+ memset(&req, 0, sizeof(req));
+ req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+ req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+ req.n.nlmsg_type = RTM_NEWLINK;
+ req.i.ifi_family = AF_UNSPEC;
+
+ addattr_l(&req.n, sizeof(req), IFLA_IFNAME, netkit_name,
+ strlen(netkit_name));
+ linkinfo = addattr_nest(&req.n, sizeof(req), IFLA_LINKINFO);
+ addattr_l(&req.n, sizeof(req), IFLA_INFO_KIND, type, strlen(type));
+ data = addattr_nest(&req.n, sizeof(req), IFLA_INFO_DATA);
+ addattr32(&req.n, sizeof(req), IFLA_NETKIT_POLICY, policy);
+ addattr32(&req.n, sizeof(req), IFLA_NETKIT_PEER_POLICY, peer_policy);
+ addattr32(&req.n, sizeof(req), IFLA_NETKIT_MODE, mode);
+ addattr_nest_end(&req.n, data);
+ addattr_nest_end(&req.n, linkinfo);
+
+ err = rtnl_talk(&rth, &req.n, NULL);
+ ASSERT_OK(err, "talk_rtnetlink");
+ rtnl_close(&rth);
+ *ifindex = if_nametoindex(netkit_name);
+
+ ASSERT_GT(*ifindex, 0, "retrieve_ifindex");
+ ASSERT_OK(system("ip netns add foo"), "create netns");
+ ASSERT_OK(system("ip link set dev " netkit_name " up"),
+ "up primary");
+ ASSERT_OK(system("ip addr add dev " netkit_name " 10.0.0.1/24"),
+ "addr primary");
+ if (same_netns) {
+ ASSERT_OK(system("ip link set dev " netkit_peer " up"),
+ "up peer");
+ ASSERT_OK(system("ip addr add dev " netkit_peer " 10.0.0.2/24"),
+ "addr peer");
+ } else {
+ ASSERT_OK(system("ip link set " netkit_peer " netns foo"),
+ "move peer");
+ ASSERT_OK(system("ip netns exec foo ip link set dev "
+ netkit_peer " up"), "up peer");
+ ASSERT_OK(system("ip netns exec foo ip addr add dev "
+ netkit_peer " 10.0.0.2/24"), "addr peer");
+ }
+ return err;
+}
+
+static void destroy_netkit(void)
+{
+ ASSERT_OK(system("ip link del dev " netkit_name), "del primary");
+ ASSERT_OK(system("ip netns del foo"), "delete netns");
+ ASSERT_EQ(if_nametoindex(netkit_name), 0, netkit_name "_ifindex");
+}
+
+static int __send_icmp(__u32 dest)
+{
+ struct sockaddr_in addr;
+ struct icmphdr icmp;
+ int sock, ret;
+
+ ret = write_sysctl("/proc/sys/net/ipv4/ping_group_range", "0 0");
+ if (!ASSERT_OK(ret, "write_sysctl(net.ipv4.ping_group_range)"))
+ return ret;
+
+ sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP);
+ if (!ASSERT_GE(sock, 0, "icmp_socket"))
+ return -errno;
+
+ ret = setsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE,
+ netkit_name, strlen(netkit_name) + 1);
+ if (!ASSERT_OK(ret, "setsockopt(SO_BINDTODEVICE)"))
+ goto out;
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = htonl(dest);
+
+ memset(&icmp, 0, sizeof(icmp));
+ icmp.type = ICMP_ECHO;
+ icmp.echo.id = 1234;
+ icmp.echo.sequence = 1;
+
+ ret = sendto(sock, &icmp, sizeof(icmp), 0,
+ (struct sockaddr *)&addr, sizeof(addr));
+ if (!ASSERT_GE(ret, 0, "icmp_sendto"))
+ ret = -errno;
+ else
+ ret = 0;
+out:
+ close(sock);
+ return ret;
+}
+
+static int send_icmp(void)
+{
+ return __send_icmp(ping_addr_neigh);
+}
+
+void serial_test_tc_netkit_basic(void)
+{
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ LIBBPF_OPTS(bpf_netkit_opts, optl);
+ __u32 prog_ids[2], link_ids[2];
+ __u32 pid1, pid2, lid1, lid2;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err, ifindex;
+
+ err = create_netkit(NETKIT_L2, NETKIT_PASS, NETKIT_PASS,
+ &ifindex, false);
+ if (err)
+ return;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1,
+ BPF_NETKIT_PRIMARY), 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2,
+ BPF_NETKIT_PEER), 0, "tc2_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0);
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0);
+
+ ASSERT_EQ(skel->bss->seen_tc1, false, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+ link = bpf_program__attach_netkit(skel->progs.tc1, ifindex, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1));
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 1);
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0);
+
+ optq.prog_ids = prog_ids;
+ optq.link_ids = link_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(ifindex, BPF_NETKIT_PRIMARY, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 2, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_EQ(send_icmp(), 0, "icmp_pkt");
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+ link = bpf_program__attach_netkit(skel->progs.tc2, ifindex, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc2 = link;
+
+ lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2));
+ ASSERT_NEQ(lid1, lid2, "link_ids_1_2");
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 1);
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 1);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(ifindex, BPF_NETKIT_PEER, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 2, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid2, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid2, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_EQ(send_icmp(), 0, "icmp_pkt");
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+cleanup:
+ test_tc_link__destroy(skel);
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0);
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0);
+ destroy_netkit();
+}
+
+static void serial_test_tc_netkit_multi_links_target(int mode, int target)
+{
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ LIBBPF_OPTS(bpf_netkit_opts, optl);
+ __u32 prog_ids[3], link_ids[3];
+ __u32 pid1, pid2, lid1, lid2;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err, ifindex;
+
+ err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS,
+ &ifindex, false);
+ if (err)
+ return;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1,
+ target), 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2,
+ target), 0, "tc2_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+
+ assert_mprog_count_ifindex(ifindex, target, 0);
+
+ ASSERT_EQ(skel->bss->seen_tc1, false, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_eth, false, "seen_eth");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+ link = bpf_program__attach_netkit(skel->progs.tc1, ifindex, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1));
+
+ assert_mprog_count_ifindex(ifindex, target, 1);
+
+ optq.prog_ids = prog_ids;
+ optq.link_ids = link_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(ifindex, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 2, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_EQ(send_icmp(), 0, "icmp_pkt");
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_eth, true, "seen_eth");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = bpf_program__fd(skel->progs.tc1),
+ );
+
+ link = bpf_program__attach_netkit(skel->progs.tc2, ifindex, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc2 = link;
+
+ lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2));
+ ASSERT_NEQ(lid1, lid2, "link_ids_1_2");
+
+ assert_mprog_count_ifindex(ifindex, target, 2);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(ifindex, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid2, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid2, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid1, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], lid1, "link_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+ ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_EQ(send_icmp(), 0, "icmp_pkt");
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_eth, true, "seen_eth");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+cleanup:
+ test_tc_link__destroy(skel);
+
+ assert_mprog_count_ifindex(ifindex, target, 0);
+ destroy_netkit();
+}
+
+void serial_test_tc_netkit_multi_links(void)
+{
+ serial_test_tc_netkit_multi_links_target(NETKIT_L2, BPF_NETKIT_PRIMARY);
+ serial_test_tc_netkit_multi_links_target(NETKIT_L3, BPF_NETKIT_PRIMARY);
+ serial_test_tc_netkit_multi_links_target(NETKIT_L2, BPF_NETKIT_PEER);
+ serial_test_tc_netkit_multi_links_target(NETKIT_L3, BPF_NETKIT_PEER);
+}
+
+static void serial_test_tc_netkit_multi_opts_target(int mode, int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 pid1, pid2, fd1, fd2;
+ __u32 prog_ids[3];
+ struct test_tc_link *skel;
+ int err, ifindex;
+
+ err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS,
+ &ifindex, false);
+ if (err)
+ return;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+
+ pid1 = id_from_prog_fd(fd1);
+ pid2 = id_from_prog_fd(fd2);
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+
+ assert_mprog_count_ifindex(ifindex, target, 0);
+
+ ASSERT_EQ(skel->bss->seen_tc1, false, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_eth, false, "seen_eth");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+ err = bpf_prog_attach_opts(fd1, ifindex, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count_ifindex(ifindex, target, 1);
+
+ optq.prog_ids = prog_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(ifindex, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_fd1;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 2, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_EQ(send_icmp(), 0, "icmp_pkt");
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_eth, true, "seen_eth");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_attach_opts(fd2, ifindex, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_fd1;
+
+ assert_mprog_count_ifindex(ifindex, target, 2);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(ifindex, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_fd2;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid2, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid1, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_EQ(send_icmp(), 0, "icmp_pkt");
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_eth, true, "seen_eth");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+
+cleanup_fd2:
+ err = bpf_prog_detach_opts(fd2, ifindex, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count_ifindex(ifindex, target, 1);
+cleanup_fd1:
+ err = bpf_prog_detach_opts(fd1, ifindex, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count_ifindex(ifindex, target, 0);
+cleanup:
+ test_tc_link__destroy(skel);
+
+ assert_mprog_count_ifindex(ifindex, target, 0);
+ destroy_netkit();
+}
+
+void serial_test_tc_netkit_multi_opts(void)
+{
+ serial_test_tc_netkit_multi_opts_target(NETKIT_L2, BPF_NETKIT_PRIMARY);
+ serial_test_tc_netkit_multi_opts_target(NETKIT_L3, BPF_NETKIT_PRIMARY);
+ serial_test_tc_netkit_multi_opts_target(NETKIT_L2, BPF_NETKIT_PEER);
+ serial_test_tc_netkit_multi_opts_target(NETKIT_L3, BPF_NETKIT_PEER);
+}
+
+void serial_test_tc_netkit_device(void)
+{
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ LIBBPF_OPTS(bpf_netkit_opts, optl);
+ __u32 prog_ids[2], link_ids[2];
+ __u32 pid1, pid2, lid1;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err, ifindex, ifindex2;
+
+ err = create_netkit(NETKIT_L3, NETKIT_PASS, NETKIT_PASS,
+ &ifindex, true);
+ if (err)
+ return;
+
+ ifindex2 = if_nametoindex(netkit_peer);
+ ASSERT_NEQ(ifindex, ifindex2, "ifindex_1_2");
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1,
+ BPF_NETKIT_PRIMARY), 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2,
+ BPF_NETKIT_PEER), 0, "tc2_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc3,
+ BPF_NETKIT_PRIMARY), 0, "tc3_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0);
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0);
+
+ ASSERT_EQ(skel->bss->seen_tc1, false, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+ link = bpf_program__attach_netkit(skel->progs.tc1, ifindex, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1));
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 1);
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0);
+
+ optq.prog_ids = prog_ids;
+ optq.link_ids = link_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(ifindex, BPF_NETKIT_PRIMARY, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 2, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_EQ(send_icmp(), 0, "icmp_pkt");
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(ifindex2, BPF_NETKIT_PRIMARY, &optq);
+ ASSERT_EQ(err, -EACCES, "prog_query_should_fail");
+
+ err = bpf_prog_query_opts(ifindex2, BPF_NETKIT_PEER, &optq);
+ ASSERT_EQ(err, -EACCES, "prog_query_should_fail");
+
+ link = bpf_program__attach_netkit(skel->progs.tc2, ifindex2, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ link = bpf_program__attach_netkit(skel->progs.tc3, ifindex2, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 1);
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0);
+cleanup:
+ test_tc_link__destroy(skel);
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0);
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0);
+ destroy_netkit();
+}
+
+static void serial_test_tc_netkit_neigh_links_target(int mode, int target)
+{
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ LIBBPF_OPTS(bpf_netkit_opts, optl);
+ __u32 prog_ids[2], link_ids[2];
+ __u32 pid1, lid1;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err, ifindex;
+
+ err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS,
+ &ifindex, false);
+ if (err)
+ return;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1,
+ BPF_NETKIT_PRIMARY), 0, "tc1_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+
+ assert_mprog_count_ifindex(ifindex, target, 0);
+
+ ASSERT_EQ(skel->bss->seen_tc1, false, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_eth, false, "seen_eth");
+
+ link = bpf_program__attach_netkit(skel->progs.tc1, ifindex, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1));
+
+ assert_mprog_count_ifindex(ifindex, target, 1);
+
+ optq.prog_ids = prog_ids;
+ optq.link_ids = link_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(ifindex, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 2, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_EQ(__send_icmp(ping_addr_noneigh), 0, "icmp_pkt");
+
+ ASSERT_EQ(skel->bss->seen_tc1, true /* L2: ARP */, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_eth, mode == NETKIT_L3, "seen_eth");
+cleanup:
+ test_tc_link__destroy(skel);
+
+ assert_mprog_count_ifindex(ifindex, target, 0);
+ destroy_netkit();
+}
+
+void serial_test_tc_netkit_neigh_links(void)
+{
+ serial_test_tc_netkit_neigh_links_target(NETKIT_L2, BPF_NETKIT_PRIMARY);
+ serial_test_tc_netkit_neigh_links_target(NETKIT_L3, BPF_NETKIT_PRIMARY);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_opts.c b/tools/testing/selftests/bpf/prog_tests/tc_opts.c
index ca506d2fcf58..196abf223465 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_opts.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_opts.c
@@ -2387,12 +2387,9 @@ static int generate_dummy_prog(void)
const size_t prog_insn_cnt = sizeof(prog_insns) / sizeof(struct bpf_insn);
LIBBPF_OPTS(bpf_prog_load_opts, opts);
const size_t log_buf_sz = 256;
- char *log_buf;
+ char log_buf[log_buf_sz];
int fd = -1;
- log_buf = malloc(log_buf_sz);
- if (!ASSERT_OK_PTR(log_buf, "log_buf_alloc"))
- return fd;
opts.log_buf = log_buf;
opts.log_size = log_buf_sz;
@@ -2402,7 +2399,6 @@ static int generate_dummy_prog(void)
prog_insns, prog_insn_cnt, &opts);
ASSERT_STREQ(log_buf, "", "log_0");
ASSERT_GE(fd, 0, "prog_fd");
- free(log_buf);
return fd;
}
@@ -2471,7 +2467,7 @@ static void test_tc_opts_query_target(int target)
__u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4;
struct test_tc_link *skel;
union bpf_attr attr;
- __u32 prog_ids[5];
+ __u32 prog_ids[10];
int err;
skel = test_tc_link__open_and_load();
@@ -2599,6 +2595,135 @@ static void test_tc_opts_query_target(int target)
ASSERT_EQ(attr.query.link_ids, 0, "link_ids");
ASSERT_EQ(attr.query.link_attach_flags, 0, "link_attach_flags");
+ /* Test 3: Query with smaller prog_ids array */
+ memset(&attr, 0, attr_size);
+ attr.query.target_ifindex = loopback;
+ attr.query.attach_type = target;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ attr.query.prog_ids = ptr_to_u64(prog_ids);
+ attr.query.count = 2;
+
+ err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size);
+ ASSERT_EQ(err, -1, "prog_query_should_fail");
+ ASSERT_EQ(errno, ENOSPC, "prog_query_should_fail");
+
+ ASSERT_EQ(attr.query.count, 4, "count");
+ ASSERT_EQ(attr.query.revision, 5, "revision");
+ ASSERT_EQ(attr.query.query_flags, 0, "query_flags");
+ ASSERT_EQ(attr.query.attach_flags, 0, "attach_flags");
+ ASSERT_EQ(attr.query.target_ifindex, loopback, "target_ifindex");
+ ASSERT_EQ(attr.query.attach_type, target, "attach_type");
+ ASSERT_EQ(attr.query.prog_ids, ptr_to_u64(prog_ids), "prog_ids");
+ ASSERT_EQ(prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(prog_ids[1], id2, "prog_ids[1]");
+ ASSERT_EQ(prog_ids[2], 0, "prog_ids[2]");
+ ASSERT_EQ(prog_ids[3], 0, "prog_ids[3]");
+ ASSERT_EQ(prog_ids[4], 0, "prog_ids[4]");
+ ASSERT_EQ(attr.query.prog_attach_flags, 0, "prog_attach_flags");
+ ASSERT_EQ(attr.query.link_ids, 0, "link_ids");
+ ASSERT_EQ(attr.query.link_attach_flags, 0, "link_attach_flags");
+
+ /* Test 4: Query with larger prog_ids array */
+ memset(&attr, 0, attr_size);
+ attr.query.target_ifindex = loopback;
+ attr.query.attach_type = target;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ attr.query.prog_ids = ptr_to_u64(prog_ids);
+ attr.query.count = 10;
+
+ err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(attr.query.count, 4, "count");
+ ASSERT_EQ(attr.query.revision, 5, "revision");
+ ASSERT_EQ(attr.query.query_flags, 0, "query_flags");
+ ASSERT_EQ(attr.query.attach_flags, 0, "attach_flags");
+ ASSERT_EQ(attr.query.target_ifindex, loopback, "target_ifindex");
+ ASSERT_EQ(attr.query.attach_type, target, "attach_type");
+ ASSERT_EQ(attr.query.prog_ids, ptr_to_u64(prog_ids), "prog_ids");
+ ASSERT_EQ(prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(prog_ids[1], id2, "prog_ids[1]");
+ ASSERT_EQ(prog_ids[2], id3, "prog_ids[2]");
+ ASSERT_EQ(prog_ids[3], id4, "prog_ids[3]");
+ ASSERT_EQ(prog_ids[4], 0, "prog_ids[4]");
+ ASSERT_EQ(attr.query.prog_attach_flags, 0, "prog_attach_flags");
+ ASSERT_EQ(attr.query.link_ids, 0, "link_ids");
+ ASSERT_EQ(attr.query.link_attach_flags, 0, "link_attach_flags");
+
+ /* Test 5: Query with NULL prog_ids array but with count > 0 */
+ memset(&attr, 0, attr_size);
+ attr.query.target_ifindex = loopback;
+ attr.query.attach_type = target;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ attr.query.count = sizeof(prog_ids);
+
+ err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(attr.query.count, 4, "count");
+ ASSERT_EQ(attr.query.revision, 5, "revision");
+ ASSERT_EQ(attr.query.query_flags, 0, "query_flags");
+ ASSERT_EQ(attr.query.attach_flags, 0, "attach_flags");
+ ASSERT_EQ(attr.query.target_ifindex, loopback, "target_ifindex");
+ ASSERT_EQ(attr.query.attach_type, target, "attach_type");
+ ASSERT_EQ(prog_ids[0], 0, "prog_ids[0]");
+ ASSERT_EQ(prog_ids[1], 0, "prog_ids[1]");
+ ASSERT_EQ(prog_ids[2], 0, "prog_ids[2]");
+ ASSERT_EQ(prog_ids[3], 0, "prog_ids[3]");
+ ASSERT_EQ(prog_ids[4], 0, "prog_ids[4]");
+ ASSERT_EQ(attr.query.prog_ids, 0, "prog_ids");
+ ASSERT_EQ(attr.query.prog_attach_flags, 0, "prog_attach_flags");
+ ASSERT_EQ(attr.query.link_ids, 0, "link_ids");
+ ASSERT_EQ(attr.query.link_attach_flags, 0, "link_attach_flags");
+
+ /* Test 6: Query with non-NULL prog_ids array but with count == 0 */
+ memset(&attr, 0, attr_size);
+ attr.query.target_ifindex = loopback;
+ attr.query.attach_type = target;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ attr.query.prog_ids = ptr_to_u64(prog_ids);
+
+ err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(attr.query.count, 4, "count");
+ ASSERT_EQ(attr.query.revision, 5, "revision");
+ ASSERT_EQ(attr.query.query_flags, 0, "query_flags");
+ ASSERT_EQ(attr.query.attach_flags, 0, "attach_flags");
+ ASSERT_EQ(attr.query.target_ifindex, loopback, "target_ifindex");
+ ASSERT_EQ(attr.query.attach_type, target, "attach_type");
+ ASSERT_EQ(prog_ids[0], 0, "prog_ids[0]");
+ ASSERT_EQ(prog_ids[1], 0, "prog_ids[1]");
+ ASSERT_EQ(prog_ids[2], 0, "prog_ids[2]");
+ ASSERT_EQ(prog_ids[3], 0, "prog_ids[3]");
+ ASSERT_EQ(prog_ids[4], 0, "prog_ids[4]");
+ ASSERT_EQ(attr.query.prog_ids, ptr_to_u64(prog_ids), "prog_ids");
+ ASSERT_EQ(attr.query.prog_attach_flags, 0, "prog_attach_flags");
+ ASSERT_EQ(attr.query.link_ids, 0, "link_ids");
+ ASSERT_EQ(attr.query.link_attach_flags, 0, "link_attach_flags");
+
+ /* Test 7: Query with invalid flags */
+ attr.query.attach_flags = 0;
+ attr.query.query_flags = 1;
+
+ err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size);
+ ASSERT_EQ(err, -1, "prog_query_should_fail");
+ ASSERT_EQ(errno, EINVAL, "prog_query_should_fail");
+
+ attr.query.attach_flags = 1;
+ attr.query.query_flags = 0;
+
+ err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size);
+ ASSERT_EQ(err, -1, "prog_query_should_fail");
+ ASSERT_EQ(errno, EINVAL, "prog_query_should_fail");
+
cleanup4:
err = bpf_prog_detach_opts(fd4, loopback, target, &optd);
ASSERT_OK(err, "prog_detach");
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index 6ee22c3b251a..518f143c5b0f 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -24,6 +24,7 @@
#include "test_progs.h"
#include "network_helpers.h"
+#include "netlink_helpers.h"
#include "test_tc_neigh_fib.skel.h"
#include "test_tc_neigh.skel.h"
#include "test_tc_peer.skel.h"
@@ -110,11 +111,17 @@ static void netns_setup_namespaces_nofail(const char *verb)
}
}
+enum dev_mode {
+ MODE_VETH,
+ MODE_NETKIT,
+};
+
struct netns_setup_result {
- int ifindex_veth_src;
- int ifindex_veth_src_fwd;
- int ifindex_veth_dst;
- int ifindex_veth_dst_fwd;
+ enum dev_mode dev_mode;
+ int ifindex_src;
+ int ifindex_src_fwd;
+ int ifindex_dst;
+ int ifindex_dst_fwd;
};
static int get_ifaddr(const char *name, char *ifaddr)
@@ -137,58 +144,110 @@ static int get_ifaddr(const char *name, char *ifaddr)
return 0;
}
+static int create_netkit(int mode, char *prim, char *peer)
+{
+ struct rtattr *linkinfo, *data, *peer_info;
+ struct rtnl_handle rth = { .fd = -1 };
+ const char *type = "netkit";
+ struct {
+ struct nlmsghdr n;
+ struct ifinfomsg i;
+ char buf[1024];
+ } req = {};
+ int err;
+
+ err = rtnl_open(&rth, 0);
+ if (!ASSERT_OK(err, "open_rtnetlink"))
+ return err;
+
+ memset(&req, 0, sizeof(req));
+ req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+ req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+ req.n.nlmsg_type = RTM_NEWLINK;
+ req.i.ifi_family = AF_UNSPEC;
+
+ addattr_l(&req.n, sizeof(req), IFLA_IFNAME, prim, strlen(prim));
+ linkinfo = addattr_nest(&req.n, sizeof(req), IFLA_LINKINFO);
+ addattr_l(&req.n, sizeof(req), IFLA_INFO_KIND, type, strlen(type));
+ data = addattr_nest(&req.n, sizeof(req), IFLA_INFO_DATA);
+ addattr32(&req.n, sizeof(req), IFLA_NETKIT_MODE, mode);
+ peer_info = addattr_nest(&req.n, sizeof(req), IFLA_NETKIT_PEER_INFO);
+ req.n.nlmsg_len += sizeof(struct ifinfomsg);
+ addattr_l(&req.n, sizeof(req), IFLA_IFNAME, peer, strlen(peer));
+ addattr_nest_end(&req.n, peer_info);
+ addattr_nest_end(&req.n, data);
+ addattr_nest_end(&req.n, linkinfo);
+
+ err = rtnl_talk(&rth, &req.n, NULL);
+ ASSERT_OK(err, "talk_rtnetlink");
+ rtnl_close(&rth);
+ return err;
+}
+
static int netns_setup_links_and_routes(struct netns_setup_result *result)
{
struct nstoken *nstoken = NULL;
- char veth_src_fwd_addr[IFADDR_STR_LEN+1] = {};
-
- SYS(fail, "ip link add veth_src type veth peer name veth_src_fwd");
- SYS(fail, "ip link add veth_dst type veth peer name veth_dst_fwd");
+ char src_fwd_addr[IFADDR_STR_LEN+1] = {};
+ int err;
- SYS(fail, "ip link set veth_dst_fwd address " MAC_DST_FWD);
- SYS(fail, "ip link set veth_dst address " MAC_DST);
+ if (result->dev_mode == MODE_VETH) {
+ SYS(fail, "ip link add src type veth peer name src_fwd");
+ SYS(fail, "ip link add dst type veth peer name dst_fwd");
+
+ SYS(fail, "ip link set dst_fwd address " MAC_DST_FWD);
+ SYS(fail, "ip link set dst address " MAC_DST);
+ } else if (result->dev_mode == MODE_NETKIT) {
+ err = create_netkit(NETKIT_L3, "src", "src_fwd");
+ if (!ASSERT_OK(err, "create_ifindex_src"))
+ goto fail;
+ err = create_netkit(NETKIT_L3, "dst", "dst_fwd");
+ if (!ASSERT_OK(err, "create_ifindex_dst"))
+ goto fail;
+ }
- if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr))
+ if (get_ifaddr("src_fwd", src_fwd_addr))
goto fail;
- result->ifindex_veth_src = if_nametoindex("veth_src");
- if (!ASSERT_GT(result->ifindex_veth_src, 0, "ifindex_veth_src"))
+ result->ifindex_src = if_nametoindex("src");
+ if (!ASSERT_GT(result->ifindex_src, 0, "ifindex_src"))
goto fail;
- result->ifindex_veth_src_fwd = if_nametoindex("veth_src_fwd");
- if (!ASSERT_GT(result->ifindex_veth_src_fwd, 0, "ifindex_veth_src_fwd"))
+ result->ifindex_src_fwd = if_nametoindex("src_fwd");
+ if (!ASSERT_GT(result->ifindex_src_fwd, 0, "ifindex_src_fwd"))
goto fail;
- result->ifindex_veth_dst = if_nametoindex("veth_dst");
- if (!ASSERT_GT(result->ifindex_veth_dst, 0, "ifindex_veth_dst"))
+ result->ifindex_dst = if_nametoindex("dst");
+ if (!ASSERT_GT(result->ifindex_dst, 0, "ifindex_dst"))
goto fail;
- result->ifindex_veth_dst_fwd = if_nametoindex("veth_dst_fwd");
- if (!ASSERT_GT(result->ifindex_veth_dst_fwd, 0, "ifindex_veth_dst_fwd"))
+ result->ifindex_dst_fwd = if_nametoindex("dst_fwd");
+ if (!ASSERT_GT(result->ifindex_dst_fwd, 0, "ifindex_dst_fwd"))
goto fail;
- SYS(fail, "ip link set veth_src netns " NS_SRC);
- SYS(fail, "ip link set veth_src_fwd netns " NS_FWD);
- SYS(fail, "ip link set veth_dst_fwd netns " NS_FWD);
- SYS(fail, "ip link set veth_dst netns " NS_DST);
+ SYS(fail, "ip link set src netns " NS_SRC);
+ SYS(fail, "ip link set src_fwd netns " NS_FWD);
+ SYS(fail, "ip link set dst_fwd netns " NS_FWD);
+ SYS(fail, "ip link set dst netns " NS_DST);
/** setup in 'src' namespace */
nstoken = open_netns(NS_SRC);
if (!ASSERT_OK_PTR(nstoken, "setns src"))
goto fail;
- SYS(fail, "ip addr add " IP4_SRC "/32 dev veth_src");
- SYS(fail, "ip addr add " IP6_SRC "/128 dev veth_src nodad");
- SYS(fail, "ip link set dev veth_src up");
+ SYS(fail, "ip addr add " IP4_SRC "/32 dev src");
+ SYS(fail, "ip addr add " IP6_SRC "/128 dev src nodad");
+ SYS(fail, "ip link set dev src up");
- SYS(fail, "ip route add " IP4_DST "/32 dev veth_src scope global");
- SYS(fail, "ip route add " IP4_NET "/16 dev veth_src scope global");
- SYS(fail, "ip route add " IP6_DST "/128 dev veth_src scope global");
+ SYS(fail, "ip route add " IP4_DST "/32 dev src scope global");
+ SYS(fail, "ip route add " IP4_NET "/16 dev src scope global");
+ SYS(fail, "ip route add " IP6_DST "/128 dev src scope global");
- SYS(fail, "ip neigh add " IP4_DST " dev veth_src lladdr %s",
- veth_src_fwd_addr);
- SYS(fail, "ip neigh add " IP6_DST " dev veth_src lladdr %s",
- veth_src_fwd_addr);
+ if (result->dev_mode == MODE_VETH) {
+ SYS(fail, "ip neigh add " IP4_DST " dev src lladdr %s",
+ src_fwd_addr);
+ SYS(fail, "ip neigh add " IP6_DST " dev src lladdr %s",
+ src_fwd_addr);
+ }
close_netns(nstoken);
@@ -201,15 +260,15 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
* needs v4 one in order to start ARP probing. IP4_NET route is added
* to the endpoints so that the ARP processing will reply.
*/
- SYS(fail, "ip addr add " IP4_SLL "/32 dev veth_src_fwd");
- SYS(fail, "ip addr add " IP4_DLL "/32 dev veth_dst_fwd");
- SYS(fail, "ip link set dev veth_src_fwd up");
- SYS(fail, "ip link set dev veth_dst_fwd up");
+ SYS(fail, "ip addr add " IP4_SLL "/32 dev src_fwd");
+ SYS(fail, "ip addr add " IP4_DLL "/32 dev dst_fwd");
+ SYS(fail, "ip link set dev src_fwd up");
+ SYS(fail, "ip link set dev dst_fwd up");
- SYS(fail, "ip route add " IP4_SRC "/32 dev veth_src_fwd scope global");
- SYS(fail, "ip route add " IP6_SRC "/128 dev veth_src_fwd scope global");
- SYS(fail, "ip route add " IP4_DST "/32 dev veth_dst_fwd scope global");
- SYS(fail, "ip route add " IP6_DST "/128 dev veth_dst_fwd scope global");
+ SYS(fail, "ip route add " IP4_SRC "/32 dev src_fwd scope global");
+ SYS(fail, "ip route add " IP6_SRC "/128 dev src_fwd scope global");
+ SYS(fail, "ip route add " IP4_DST "/32 dev dst_fwd scope global");
+ SYS(fail, "ip route add " IP6_DST "/128 dev dst_fwd scope global");
close_netns(nstoken);
@@ -218,16 +277,18 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
if (!ASSERT_OK_PTR(nstoken, "setns dst"))
goto fail;
- SYS(fail, "ip addr add " IP4_DST "/32 dev veth_dst");
- SYS(fail, "ip addr add " IP6_DST "/128 dev veth_dst nodad");
- SYS(fail, "ip link set dev veth_dst up");
+ SYS(fail, "ip addr add " IP4_DST "/32 dev dst");
+ SYS(fail, "ip addr add " IP6_DST "/128 dev dst nodad");
+ SYS(fail, "ip link set dev dst up");
- SYS(fail, "ip route add " IP4_SRC "/32 dev veth_dst scope global");
- SYS(fail, "ip route add " IP4_NET "/16 dev veth_dst scope global");
- SYS(fail, "ip route add " IP6_SRC "/128 dev veth_dst scope global");
+ SYS(fail, "ip route add " IP4_SRC "/32 dev dst scope global");
+ SYS(fail, "ip route add " IP4_NET "/16 dev dst scope global");
+ SYS(fail, "ip route add " IP6_SRC "/128 dev dst scope global");
- SYS(fail, "ip neigh add " IP4_SRC " dev veth_dst lladdr " MAC_DST_FWD);
- SYS(fail, "ip neigh add " IP6_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+ if (result->dev_mode == MODE_VETH) {
+ SYS(fail, "ip neigh add " IP4_SRC " dev dst lladdr " MAC_DST_FWD);
+ SYS(fail, "ip neigh add " IP6_SRC " dev dst lladdr " MAC_DST_FWD);
+ }
close_netns(nstoken);
@@ -293,23 +354,23 @@ static int netns_load_bpf(const struct bpf_program *src_prog,
const struct bpf_program *chk_prog,
const struct netns_setup_result *setup_result)
{
- LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src_fwd);
- LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd);
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd);
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
int err;
- /* tc qdisc add dev veth_src_fwd clsact */
- QDISC_CLSACT_CREATE(&qdisc_veth_src_fwd, setup_result->ifindex_veth_src_fwd);
- /* tc filter add dev veth_src_fwd ingress bpf da src_prog */
- XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS, src_prog, 0);
- /* tc filter add dev veth_src_fwd egress bpf da chk_prog */
- XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS, chk_prog, 0);
+ /* tc qdisc add dev src_fwd clsact */
+ QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd);
+ /* tc filter add dev src_fwd ingress bpf da src_prog */
+ XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS, src_prog, 0);
+ /* tc filter add dev src_fwd egress bpf da chk_prog */
+ XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS, chk_prog, 0);
- /* tc qdisc add dev veth_dst_fwd clsact */
- QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd);
- /* tc filter add dev veth_dst_fwd ingress bpf da dst_prog */
- XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, dst_prog, 0);
- /* tc filter add dev veth_dst_fwd egress bpf da chk_prog */
- XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, chk_prog, 0);
+ /* tc qdisc add dev dst_fwd clsact */
+ QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
+ /* tc filter add dev dst_fwd ingress bpf da dst_prog */
+ XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, dst_prog, 0);
+ /* tc filter add dev dst_fwd egress bpf da chk_prog */
+ XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, chk_prog, 0);
return 0;
fail:
@@ -539,10 +600,10 @@ done:
static int netns_load_dtime_bpf(struct test_tc_dtime *skel,
const struct netns_setup_result *setup_result)
{
- LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src_fwd);
- LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd);
- LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src);
- LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst);
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd);
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_src);
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_dst);
struct nstoken *nstoken;
int err;
@@ -550,58 +611,58 @@ static int netns_load_dtime_bpf(struct test_tc_dtime *skel,
nstoken = open_netns(NS_SRC);
if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
return -1;
- /* tc qdisc add dev veth_src clsact */
- QDISC_CLSACT_CREATE(&qdisc_veth_src, setup_result->ifindex_veth_src);
- /* tc filter add dev veth_src ingress bpf da ingress_host */
- XGRESS_FILTER_ADD(&qdisc_veth_src, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
- /* tc filter add dev veth_src egress bpf da egress_host */
- XGRESS_FILTER_ADD(&qdisc_veth_src, BPF_TC_EGRESS, skel->progs.egress_host, 0);
+ /* tc qdisc add dev src clsact */
+ QDISC_CLSACT_CREATE(&qdisc_src, setup_result->ifindex_src);
+ /* tc filter add dev src ingress bpf da ingress_host */
+ XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
+ /* tc filter add dev src egress bpf da egress_host */
+ XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_EGRESS, skel->progs.egress_host, 0);
close_netns(nstoken);
/* setup ns_dst tc progs */
nstoken = open_netns(NS_DST);
if (!ASSERT_OK_PTR(nstoken, "setns " NS_DST))
return -1;
- /* tc qdisc add dev veth_dst clsact */
- QDISC_CLSACT_CREATE(&qdisc_veth_dst, setup_result->ifindex_veth_dst);
- /* tc filter add dev veth_dst ingress bpf da ingress_host */
- XGRESS_FILTER_ADD(&qdisc_veth_dst, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
- /* tc filter add dev veth_dst egress bpf da egress_host */
- XGRESS_FILTER_ADD(&qdisc_veth_dst, BPF_TC_EGRESS, skel->progs.egress_host, 0);
+ /* tc qdisc add dev dst clsact */
+ QDISC_CLSACT_CREATE(&qdisc_dst, setup_result->ifindex_dst);
+ /* tc filter add dev dst ingress bpf da ingress_host */
+ XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
+ /* tc filter add dev dst egress bpf da egress_host */
+ XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_EGRESS, skel->progs.egress_host, 0);
close_netns(nstoken);
/* setup ns_fwd tc progs */
nstoken = open_netns(NS_FWD);
if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
return -1;
- /* tc qdisc add dev veth_dst_fwd clsact */
- QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd);
- /* tc filter add dev veth_dst_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
- XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS,
+ /* tc qdisc add dev dst_fwd clsact */
+ QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
+ /* tc filter add dev dst_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
+ XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS,
skel->progs.ingress_fwdns_prio100, 100);
- /* tc filter add dev veth_dst_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
- XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS,
+ /* tc filter add dev dst_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
+ XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS,
skel->progs.ingress_fwdns_prio101, 101);
- /* tc filter add dev veth_dst_fwd egress prio 100 bpf da egress_fwdns_prio100 */
- XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS,
+ /* tc filter add dev dst_fwd egress prio 100 bpf da egress_fwdns_prio100 */
+ XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS,
skel->progs.egress_fwdns_prio100, 100);
- /* tc filter add dev veth_dst_fwd egress prio 101 bpf da egress_fwdns_prio101 */
- XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS,
+ /* tc filter add dev dst_fwd egress prio 101 bpf da egress_fwdns_prio101 */
+ XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS,
skel->progs.egress_fwdns_prio101, 101);
- /* tc qdisc add dev veth_src_fwd clsact */
- QDISC_CLSACT_CREATE(&qdisc_veth_src_fwd, setup_result->ifindex_veth_src_fwd);
- /* tc filter add dev veth_src_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
- XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS,
+ /* tc qdisc add dev src_fwd clsact */
+ QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd);
+ /* tc filter add dev src_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
+ XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS,
skel->progs.ingress_fwdns_prio100, 100);
- /* tc filter add dev veth_src_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
- XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS,
+ /* tc filter add dev src_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
+ XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS,
skel->progs.ingress_fwdns_prio101, 101);
- /* tc filter add dev veth_src_fwd egress prio 100 bpf da egress_fwdns_prio100 */
- XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS,
+ /* tc filter add dev src_fwd egress prio 100 bpf da egress_fwdns_prio100 */
+ XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS,
skel->progs.egress_fwdns_prio100, 100);
- /* tc filter add dev veth_src_fwd egress prio 101 bpf da egress_fwdns_prio101 */
- XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS,
+ /* tc filter add dev src_fwd egress prio 101 bpf da egress_fwdns_prio101 */
+ XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS,
skel->progs.egress_fwdns_prio101, 101);
close_netns(nstoken);
return 0;
@@ -777,8 +838,8 @@ static void test_tc_redirect_dtime(struct netns_setup_result *setup_result)
if (!ASSERT_OK_PTR(skel, "test_tc_dtime__open"))
return;
- skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
- skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
+ skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
+ skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
err = test_tc_dtime__load(skel);
if (!ASSERT_OK(err, "test_tc_dtime__load"))
@@ -868,8 +929,8 @@ static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open"))
goto done;
- skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
- skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
+ skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
+ skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
err = test_tc_neigh__load(skel);
if (!ASSERT_OK(err, "test_tc_neigh__load"))
@@ -904,8 +965,8 @@ static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
goto done;
- skel->rodata->IFINDEX_SRC = setup_result->ifindex_veth_src_fwd;
- skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
+ skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
+ skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
err = test_tc_peer__load(skel);
if (!ASSERT_OK(err, "test_tc_peer__load"))
@@ -996,7 +1057,7 @@ static int tun_relay_loop(int src_fd, int target_fd)
static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
{
LIBBPF_OPTS(bpf_tc_hook, qdisc_tun_fwd);
- LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd);
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
struct test_tc_peer *skel = NULL;
struct nstoken *nstoken = NULL;
int err;
@@ -1045,7 +1106,7 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
goto fail;
skel->rodata->IFINDEX_SRC = ifindex;
- skel->rodata->IFINDEX_DST = setup_result->ifindex_veth_dst_fwd;
+ skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
err = test_tc_peer__load(skel);
if (!ASSERT_OK(err, "test_tc_peer__load"))
@@ -1053,19 +1114,19 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
/* Load "tc_src_l3" to the tun_fwd interface to redirect packets
* towards dst, and "tc_dst" to redirect packets
- * and "tc_chk" on veth_dst_fwd to drop non-redirected packets.
+ * and "tc_chk" on dst_fwd to drop non-redirected packets.
*/
/* tc qdisc add dev tun_fwd clsact */
QDISC_CLSACT_CREATE(&qdisc_tun_fwd, ifindex);
/* tc filter add dev tun_fwd ingress bpf da tc_src_l3 */
XGRESS_FILTER_ADD(&qdisc_tun_fwd, BPF_TC_INGRESS, skel->progs.tc_src_l3, 0);
- /* tc qdisc add dev veth_dst_fwd clsact */
- QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd);
- /* tc filter add dev veth_dst_fwd ingress bpf da tc_dst_l3 */
- XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, skel->progs.tc_dst_l3, 0);
- /* tc filter add dev veth_dst_fwd egress bpf da tc_chk */
- XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0);
+ /* tc qdisc add dev dst_fwd clsact */
+ QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
+ /* tc filter add dev dst_fwd ingress bpf da tc_dst_l3 */
+ XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, skel->progs.tc_dst_l3, 0);
+ /* tc filter add dev dst_fwd egress bpf da tc_chk */
+ XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0);
/* Setup route and neigh tables */
SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
@@ -1074,17 +1135,17 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad");
SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad");
- SYS(fail, "ip -netns " NS_SRC " route del " IP4_DST "/32 dev veth_src scope global");
+ SYS(fail, "ip -netns " NS_SRC " route del " IP4_DST "/32 dev src scope global");
SYS(fail, "ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD
" dev tun_src scope global");
- SYS(fail, "ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev veth_dst scope global");
- SYS(fail, "ip -netns " NS_SRC " route del " IP6_DST "/128 dev veth_src scope global");
+ SYS(fail, "ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev dst scope global");
+ SYS(fail, "ip -netns " NS_SRC " route del " IP6_DST "/128 dev src scope global");
SYS(fail, "ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD
" dev tun_src scope global");
- SYS(fail, "ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev veth_dst scope global");
+ SYS(fail, "ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev dst scope global");
- SYS(fail, "ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
- SYS(fail, "ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev veth_dst lladdr " MAC_DST_FWD);
+ SYS(fail, "ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev dst lladdr " MAC_DST_FWD);
+ SYS(fail, "ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev dst lladdr " MAC_DST_FWD);
if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
goto fail;
@@ -1106,9 +1167,9 @@ fail:
close_netns(nstoken);
}
-#define RUN_TEST(name) \
+#define RUN_TEST(name, mode) \
({ \
- struct netns_setup_result setup_result; \
+ struct netns_setup_result setup_result = { .dev_mode = mode, }; \
if (test__start_subtest(#name)) \
if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \
if (ASSERT_OK(netns_setup_links_and_routes(&setup_result), \
@@ -1122,11 +1183,13 @@ static void *test_tc_redirect_run_tests(void *arg)
{
netns_setup_namespaces_nofail("delete");
- RUN_TEST(tc_redirect_peer);
- RUN_TEST(tc_redirect_peer_l3);
- RUN_TEST(tc_redirect_neigh);
- RUN_TEST(tc_redirect_neigh_fib);
- RUN_TEST(tc_redirect_dtime);
+ RUN_TEST(tc_redirect_peer, MODE_VETH);
+ RUN_TEST(tc_redirect_peer, MODE_NETKIT);
+ RUN_TEST(tc_redirect_peer_l3, MODE_VETH);
+ RUN_TEST(tc_redirect_peer_l3, MODE_NETKIT);
+ RUN_TEST(tc_redirect_neigh, MODE_VETH);
+ RUN_TEST(tc_redirect_neigh_fib, MODE_VETH);
+ RUN_TEST(tc_redirect_dtime, MODE_VETH);
return NULL;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c b/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c
index 0cca4e8ae38e..d3491a84b3b9 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c
@@ -9,9 +9,10 @@
#include "test_bpf_ma.skel.h"
-void test_test_bpf_ma(void)
+static void do_bpf_ma_test(const char *name)
{
struct test_bpf_ma *skel;
+ struct bpf_program *prog;
struct btf *btf;
int i, err;
@@ -34,6 +35,11 @@ void test_test_bpf_ma(void)
skel->rodata->data_btf_ids[i] = id;
}
+ prog = bpf_object__find_program_by_name(skel->obj, name);
+ if (!ASSERT_OK_PTR(prog, "invalid prog name"))
+ goto out;
+ bpf_program__set_autoload(prog, true);
+
err = test_bpf_ma__load(skel);
if (!ASSERT_OK(err, "load"))
goto out;
@@ -48,3 +54,15 @@ void test_test_bpf_ma(void)
out:
test_bpf_ma__destroy(skel);
}
+
+void test_test_bpf_ma(void)
+{
+ if (test__start_subtest("batch_alloc_free"))
+ do_bpf_ma_test("test_batch_alloc_free");
+ if (test__start_subtest("free_through_map_free"))
+ do_bpf_ma_test("test_free_through_map_free");
+ if (test__start_subtest("batch_percpu_alloc_free"))
+ do_bpf_ma_test("test_batch_percpu_alloc_free");
+ if (test__start_subtest("percpu_free_through_map_free"))
+ do_bpf_ma_test("test_percpu_free_through_map_free");
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpffs.c b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c
index 214d9f4a94a5..ea933fd151c3 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_bpffs.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c
@@ -8,7 +8,8 @@
#include <sys/types.h>
#include <test_progs.h>
-#define TDIR "/sys/kernel/debug"
+/* TDIR must be in a location we can create a directory in. */
+#define TDIR "/tmp/test_bpffs_testdir"
static int read_iter(char *file)
{
@@ -43,8 +44,11 @@ static int fn(void)
if (!ASSERT_OK(err, "mount /"))
goto out;
- err = umount(TDIR);
- if (!ASSERT_OK(err, "umount " TDIR))
+ err = mkdir(TDIR, 0777);
+ /* If the directory already exists we can carry on. It may be left over
+ * from a previous run.
+ */
+ if ((err && errno != EEXIST) && !ASSERT_OK(err, "mkdir " TDIR))
goto out;
err = mount("none", TDIR, "tmpfs", 0, NULL);
@@ -138,6 +142,7 @@ out:
rmdir(TDIR "/fs1");
rmdir(TDIR "/fs2");
umount(TDIR);
+ rmdir(TDIR);
exit(err);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
index e3e68c97b40c..5cfa7a6316b6 100644
--- a/tools/testing/selftests/bpf/prog_tests/verifier.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -31,6 +31,7 @@
#include "verifier_helper_restricted.skel.h"
#include "verifier_helper_value_access.skel.h"
#include "verifier_int_ptr.skel.h"
+#include "verifier_iterating_callbacks.skel.h"
#include "verifier_jeq_infer_not_null.skel.h"
#include "verifier_ld_ind.skel.h"
#include "verifier_ldsx.skel.h"
@@ -46,6 +47,7 @@
#include "verifier_movsx.skel.h"
#include "verifier_netfilter_ctx.skel.h"
#include "verifier_netfilter_retcode.skel.h"
+#include "verifier_precision.skel.h"
#include "verifier_prevent_map_lookup.skel.h"
#include "verifier_raw_stack.skel.h"
#include "verifier_raw_tp_writable.skel.h"
@@ -138,6 +140,7 @@ void test_verifier_helper_packet_access(void) { RUN(verifier_helper_packet_acces
void test_verifier_helper_restricted(void) { RUN(verifier_helper_restricted); }
void test_verifier_helper_value_access(void) { RUN(verifier_helper_value_access); }
void test_verifier_int_ptr(void) { RUN(verifier_int_ptr); }
+void test_verifier_iterating_callbacks(void) { RUN(verifier_iterating_callbacks); }
void test_verifier_jeq_infer_not_null(void) { RUN(verifier_jeq_infer_not_null); }
void test_verifier_ld_ind(void) { RUN(verifier_ld_ind); }
void test_verifier_ldsx(void) { RUN(verifier_ldsx); }
@@ -153,6 +156,7 @@ void test_verifier_meta_access(void) { RUN(verifier_meta_access); }
void test_verifier_movsx(void) { RUN(verifier_movsx); }
void test_verifier_netfilter_ctx(void) { RUN(verifier_netfilter_ctx); }
void test_verifier_netfilter_retcode(void) { RUN(verifier_netfilter_retcode); }
+void test_verifier_precision(void) { RUN(verifier_precision); }
void test_verifier_prevent_map_lookup(void) { RUN(verifier_prevent_map_lookup); }
void test_verifier_raw_stack(void) { RUN(verifier_raw_stack); }
void test_verifier_raw_tp_writable(void) { RUN(verifier_raw_tp_writable); }
diff --git a/tools/testing/selftests/bpf/prog_tests/vmlinux.c b/tools/testing/selftests/bpf/prog_tests/vmlinux.c
index 72310cfc6474..6fb2217d940b 100644
--- a/tools/testing/selftests/bpf/prog_tests/vmlinux.c
+++ b/tools/testing/selftests/bpf/prog_tests/vmlinux.c
@@ -16,27 +16,27 @@ static void nsleep()
void test_vmlinux(void)
{
- int duration = 0, err;
+ int err;
struct test_vmlinux* skel;
struct test_vmlinux__bss *bss;
skel = test_vmlinux__open_and_load();
- if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ if (!ASSERT_OK_PTR(skel, "test_vmlinux__open_and_load"))
return;
bss = skel->bss;
err = test_vmlinux__attach(skel);
- if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ if (!ASSERT_OK(err, "test_vmlinux__attach"))
goto cleanup;
/* trigger everything */
nsleep();
- CHECK(!bss->tp_called, "tp", "not called\n");
- CHECK(!bss->raw_tp_called, "raw_tp", "not called\n");
- CHECK(!bss->tp_btf_called, "tp_btf", "not called\n");
- CHECK(!bss->kprobe_called, "kprobe", "not called\n");
- CHECK(!bss->fentry_called, "fentry", "not called\n");
+ ASSERT_TRUE(bss->tp_called, "tp");
+ ASSERT_TRUE(bss->raw_tp_called, "raw_tp");
+ ASSERT_TRUE(bss->tp_btf_called, "tp_btf");
+ ASSERT_TRUE(bss->kprobe_called, "kprobe");
+ ASSERT_TRUE(bss->fentry_called, "fentry");
cleanup:
test_vmlinux__destroy(skel);
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
index f2b8167b72a8..442f4ca39fd7 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
@@ -35,6 +35,8 @@ int dump_task_stack(struct bpf_iter__task *ctx)
return 0;
}
+int num_user_stacks = 0;
+
SEC("iter/task")
int get_task_user_stacks(struct bpf_iter__task *ctx)
{
@@ -51,6 +53,9 @@ int get_task_user_stacks(struct bpf_iter__task *ctx)
if (res <= 0)
return 0;
+ /* Only one task, the current one, should succeed */
+ ++num_user_stacks;
+
buf_sz += res;
/* If the verifier doesn't refine bpf_get_task_stack res, and instead
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c
index 96131b9a1caa..96131b9a1caa 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c
diff --git a/tools/testing/selftests/bpf/progs/bpf_loop_bench.c b/tools/testing/selftests/bpf/progs/bpf_loop_bench.c
index 4ce76eb064c4..d461746fd3c1 100644
--- a/tools/testing/selftests/bpf/progs/bpf_loop_bench.c
+++ b/tools/testing/selftests/bpf/progs/bpf_loop_bench.c
@@ -15,13 +15,16 @@ static int empty_callback(__u32 index, void *data)
return 0;
}
+static int outer_loop(__u32 index, void *data)
+{
+ bpf_loop(nr_loops, empty_callback, NULL, 0);
+ __sync_add_and_fetch(&hits, nr_loops);
+ return 0;
+}
+
SEC("fentry/" SYS_PREFIX "sys_getpgid")
int benchmark(void *ctx)
{
- for (int i = 0; i < 1000; i++) {
- bpf_loop(nr_loops, empty_callback, NULL, 0);
-
- __sync_add_and_fetch(&hits, nr_loops);
- }
+ bpf_loop(1000, outer_loop, NULL, 0);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/cb_refs.c b/tools/testing/selftests/bpf/progs/cb_refs.c
index 76d661b20e87..56c764df8196 100644
--- a/tools/testing/selftests/bpf/progs/cb_refs.c
+++ b/tools/testing/selftests/bpf/progs/cb_refs.c
@@ -33,6 +33,7 @@ int underflow_prog(void *ctx)
if (!p)
return 0;
bpf_for_each_map_elem(&array_map, cb1, &p, 0);
+ bpf_kfunc_call_test_release(p);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/exceptions_assert.c b/tools/testing/selftests/bpf/progs/exceptions_assert.c
index e1e5c54a6a11..49efaed143fc 100644
--- a/tools/testing/selftests/bpf/progs/exceptions_assert.c
+++ b/tools/testing/selftests/bpf/progs/exceptions_assert.c
@@ -18,48 +18,48 @@
return *(u64 *)num; \
}
-__msg(": R0_w=-2147483648 R10=fp0")
+__msg(": R0_w=0xffffffff80000000 R10=fp0")
check_assert(s64, eq, int_min, INT_MIN);
-__msg(": R0_w=2147483647 R10=fp0")
+__msg(": R0_w=0x7fffffff R10=fp0")
check_assert(s64, eq, int_max, INT_MAX);
__msg(": R0_w=0 R10=fp0")
check_assert(s64, eq, zero, 0);
-__msg(": R0_w=-9223372036854775808 R1_w=-9223372036854775808 R10=fp0")
+__msg(": R0_w=0x8000000000000000 R1_w=0x8000000000000000 R10=fp0")
check_assert(s64, eq, llong_min, LLONG_MIN);
-__msg(": R0_w=9223372036854775807 R1_w=9223372036854775807 R10=fp0")
+__msg(": R0_w=0x7fffffffffffffff R1_w=0x7fffffffffffffff R10=fp0")
check_assert(s64, eq, llong_max, LLONG_MAX);
-__msg(": R0_w=scalar(smax=2147483646) R10=fp0")
+__msg(": R0_w=scalar(smax=0x7ffffffe) R10=fp0")
check_assert(s64, lt, pos, INT_MAX);
-__msg(": R0_w=scalar(smax=-1,umin=9223372036854775808,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
+__msg(": R0_w=scalar(smax=-1,umin=0x8000000000000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
check_assert(s64, lt, zero, 0);
-__msg(": R0_w=scalar(smax=-2147483649,umin=9223372036854775808,umax=18446744071562067967,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
+__msg(": R0_w=scalar(smax=0xffffffff7fffffff,umin=0x8000000000000000,umax=0xffffffff7fffffff,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
check_assert(s64, lt, neg, INT_MIN);
-__msg(": R0_w=scalar(smax=2147483647) R10=fp0")
+__msg(": R0_w=scalar(smax=0x7fffffff) R10=fp0")
check_assert(s64, le, pos, INT_MAX);
__msg(": R0_w=scalar(smax=0) R10=fp0")
check_assert(s64, le, zero, 0);
-__msg(": R0_w=scalar(smax=-2147483648,umin=9223372036854775808,umax=18446744071562067968,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
+__msg(": R0_w=scalar(smax=0xffffffff80000000,umin=0x8000000000000000,umax=0xffffffff80000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
check_assert(s64, le, neg, INT_MIN);
-__msg(": R0_w=scalar(smin=umin=2147483648,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))")
+__msg(": R0_w=scalar(smin=umin=0x80000000,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
check_assert(s64, gt, pos, INT_MAX);
-__msg(": R0_w=scalar(smin=umin=1,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))")
+__msg(": R0_w=scalar(smin=umin=1,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
check_assert(s64, gt, zero, 0);
-__msg(": R0_w=scalar(smin=-2147483647) R10=fp0")
+__msg(": R0_w=scalar(smin=0xffffffff80000001) R10=fp0")
check_assert(s64, gt, neg, INT_MIN);
-__msg(": R0_w=scalar(smin=umin=2147483647,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff))")
+__msg(": R0_w=scalar(smin=umin=0x7fffffff,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
check_assert(s64, ge, pos, INT_MAX);
-__msg(": R0_w=scalar(smin=0,umax=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff)) R10=fp0")
+__msg(": R0_w=scalar(smin=0,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff)) R10=fp0")
check_assert(s64, ge, zero, 0);
-__msg(": R0_w=scalar(smin=-2147483648) R10=fp0")
+__msg(": R0_w=scalar(smin=0xffffffff80000000) R10=fp0")
check_assert(s64, ge, neg, INT_MIN);
SEC("?tc")
__log_level(2) __failure
-__msg(": R0=0 R1=ctx(off=0,imm=0) R2=scalar(smin=smin32=-2147483646,smax=smax32=2147483645) R10=fp0")
+__msg(": R0=0 R1=ctx() R2=scalar(smin=0xffffffff80000002,smax=smax32=0x7ffffffd,smin32=0x80000002) R10=fp0")
int check_assert_range_s64(struct __sk_buff *ctx)
{
struct bpf_sock *sk = ctx->sk;
@@ -75,7 +75,7 @@ int check_assert_range_s64(struct __sk_buff *ctx)
SEC("?tc")
__log_level(2) __failure
-__msg(": R1=ctx(off=0,imm=0) R2=scalar(smin=umin=smin32=umin32=4096,smax=umax=smax32=umax32=8192,var_off=(0x0; 0x3fff))")
+__msg(": R1=ctx() R2=scalar(smin=umin=smin32=umin32=4096,smax=umax=smax32=umax32=8192,var_off=(0x0; 0x3fff))")
int check_assert_range_u64(struct __sk_buff *ctx)
{
u64 num = ctx->len;
@@ -86,7 +86,7 @@ int check_assert_range_u64(struct __sk_buff *ctx)
SEC("?tc")
__log_level(2) __failure
-__msg(": R0=0 R1=ctx(off=0,imm=0) R2=4096 R10=fp0")
+__msg(": R0=0 R1=ctx() R2=4096 R10=fp0")
int check_assert_single_range_s64(struct __sk_buff *ctx)
{
struct bpf_sock *sk = ctx->sk;
@@ -103,7 +103,7 @@ int check_assert_single_range_s64(struct __sk_buff *ctx)
SEC("?tc")
__log_level(2) __failure
-__msg(": R1=ctx(off=0,imm=0) R2=4096 R10=fp0")
+__msg(": R1=ctx() R2=4096 R10=fp0")
int check_assert_single_range_u64(struct __sk_buff *ctx)
{
u64 num = ctx->len;
@@ -114,7 +114,7 @@ int check_assert_single_range_u64(struct __sk_buff *ctx)
SEC("?tc")
__log_level(2) __failure
-__msg(": R1=pkt(off=64,r=64,imm=0) R2=pkt_end(off=0,imm=0) R6=pkt(off=0,r=64,imm=0) R10=fp0")
+__msg(": R1=pkt(off=64,r=64) R2=pkt_end() R6=pkt(r=64) R10=fp0")
int check_assert_generic(struct __sk_buff *ctx)
{
u8 *data_end = (void *)(long)ctx->data_end;
diff --git a/tools/testing/selftests/bpf/progs/exceptions_fail.c b/tools/testing/selftests/bpf/progs/exceptions_fail.c
index 4c39e920dac2..8c0ef2742208 100644
--- a/tools/testing/selftests/bpf/progs/exceptions_fail.c
+++ b/tools/testing/selftests/bpf/progs/exceptions_fail.c
@@ -171,6 +171,7 @@ int reject_with_rbtree_add_throw(void *ctx)
return 0;
bpf_spin_lock(&lock);
bpf_rbtree_add(&rbtree, &f->node, rbless);
+ bpf_spin_unlock(&lock);
return 0;
}
@@ -214,6 +215,7 @@ int reject_with_cb_reference(void *ctx)
if (!f)
return 0;
bpf_loop(5, subprog_cb_ref, NULL, 0);
+ bpf_obj_drop(f);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c
index 6b9b3c56f009..b2181f850d3e 100644
--- a/tools/testing/selftests/bpf/progs/iters.c
+++ b/tools/testing/selftests/bpf/progs/iters.c
@@ -14,6 +14,13 @@ int my_pid;
int arr[256];
int small_arr[16] SEC(".data.small_arr");
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 10);
+ __type(key, int);
+ __type(value, int);
+} amap SEC(".maps");
+
#ifdef REAL_TEST
#define MY_PID_GUARD() if (my_pid != (bpf_get_current_pid_tgid() >> 32)) return 0
#else
@@ -716,4 +723,714 @@ int iter_pass_iter_ptr_to_subprog(const void *ctx)
return 0;
}
+SEC("?raw_tp")
+__failure
+__msg("R1 type=scalar expected=fp")
+__naked int delayed_read_mark(void)
+{
+ /* This is equivalent to C program below.
+ * The call to bpf_iter_num_next() is reachable with r7 values &fp[-16] and 0xdead.
+ * State with r7=&fp[-16] is visited first and follows r6 != 42 ... continue branch.
+ * At this point iterator next() call is reached with r7 that has no read mark.
+ * Loop body with r7=0xdead would only be visited if verifier would decide to continue
+ * with second loop iteration. Absence of read mark on r7 might affect state
+ * equivalent logic used for iterator convergence tracking.
+ *
+ * r7 = &fp[-16]
+ * fp[-16] = 0
+ * r6 = bpf_get_prandom_u32()
+ * bpf_iter_num_new(&fp[-8], 0, 10)
+ * while (bpf_iter_num_next(&fp[-8])) {
+ * r6++
+ * if (r6 != 42) {
+ * r7 = 0xdead
+ * continue;
+ * }
+ * bpf_probe_read_user(r7, 8, 0xdeadbeef); // this is not safe
+ * }
+ * bpf_iter_num_destroy(&fp[-8])
+ * return 0
+ */
+ asm volatile (
+ "r7 = r10;"
+ "r7 += -16;"
+ "r0 = 0;"
+ "*(u64 *)(r7 + 0) = r0;"
+ "call %[bpf_get_prandom_u32];"
+ "r6 = r0;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "1:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto 2f;"
+ "r6 += 1;"
+ "if r6 != 42 goto 3f;"
+ "r7 = 0xdead;"
+ "goto 1b;"
+ "3:"
+ "r1 = r7;"
+ "r2 = 8;"
+ "r3 = 0xdeadbeef;"
+ "call %[bpf_probe_read_user];"
+ "goto 1b;"
+ "2:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_next),
+ __imm(bpf_iter_num_destroy),
+ __imm(bpf_probe_read_user)
+ : __clobber_all
+ );
+}
+
+SEC("?raw_tp")
+__failure
+__msg("math between fp pointer and register with unbounded")
+__naked int delayed_precision_mark(void)
+{
+ /* This is equivalent to C program below.
+ * The test is similar to delayed_iter_mark but verifies that incomplete
+ * precision don't fool verifier.
+ * The call to bpf_iter_num_next() is reachable with r7 values -16 and -32.
+ * State with r7=-16 is visited first and follows r6 != 42 ... continue branch.
+ * At this point iterator next() call is reached with r7 that has no read
+ * and precision marks.
+ * Loop body with r7=-32 would only be visited if verifier would decide to continue
+ * with second loop iteration. Absence of precision mark on r7 might affect state
+ * equivalent logic used for iterator convergence tracking.
+ *
+ * r8 = 0
+ * fp[-16] = 0
+ * r7 = -16
+ * r6 = bpf_get_prandom_u32()
+ * bpf_iter_num_new(&fp[-8], 0, 10)
+ * while (bpf_iter_num_next(&fp[-8])) {
+ * if (r6 != 42) {
+ * r7 = -32
+ * r6 = bpf_get_prandom_u32()
+ * continue;
+ * }
+ * r0 = r10
+ * r0 += r7
+ * r8 = *(u64 *)(r0 + 0) // this is not safe
+ * r6 = bpf_get_prandom_u32()
+ * }
+ * bpf_iter_num_destroy(&fp[-8])
+ * return r8
+ */
+ asm volatile (
+ "r8 = 0;"
+ "*(u64 *)(r10 - 16) = r8;"
+ "r7 = -16;"
+ "call %[bpf_get_prandom_u32];"
+ "r6 = r0;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "1:"
+ "r1 = r10;"
+ "r1 += -8;\n"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto 2f;"
+ "if r6 != 42 goto 3f;"
+ "r7 = -32;"
+ "call %[bpf_get_prandom_u32];"
+ "r6 = r0;"
+ "goto 1b;\n"
+ "3:"
+ "r0 = r10;"
+ "r0 += r7;"
+ "r8 = *(u64 *)(r0 + 0);"
+ "call %[bpf_get_prandom_u32];"
+ "r6 = r0;"
+ "goto 1b;\n"
+ "2:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = r8;"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_next),
+ __imm(bpf_iter_num_destroy),
+ __imm(bpf_probe_read_user)
+ : __clobber_all
+ );
+}
+
+SEC("?raw_tp")
+__failure
+__msg("math between fp pointer and register with unbounded")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked int loop_state_deps1(void)
+{
+ /* This is equivalent to C program below.
+ *
+ * The case turns out to be tricky in a sense that:
+ * - states with c=-25 are explored only on a second iteration
+ * of the outer loop;
+ * - states with read+precise mark on c are explored only on
+ * second iteration of the inner loop and in a state which
+ * is pushed to states stack first.
+ *
+ * Depending on the details of iterator convergence logic
+ * verifier might stop states traversal too early and miss
+ * unsafe c=-25 memory access.
+ *
+ * j = iter_new(); // fp[-16]
+ * a = 0; // r6
+ * b = 0; // r7
+ * c = -24; // r8
+ * while (iter_next(j)) {
+ * i = iter_new(); // fp[-8]
+ * a = 0; // r6
+ * b = 0; // r7
+ * while (iter_next(i)) {
+ * if (a == 1) {
+ * a = 0;
+ * b = 1;
+ * } else if (a == 0) {
+ * a = 1;
+ * if (random() == 42)
+ * continue;
+ * if (b == 1) {
+ * *(r10 + c) = 7; // this is not safe
+ * iter_destroy(i);
+ * iter_destroy(j);
+ * return;
+ * }
+ * }
+ * }
+ * iter_destroy(i);
+ * a = 0;
+ * b = 0;
+ * c = -25;
+ * }
+ * iter_destroy(j);
+ * return;
+ */
+ asm volatile (
+ "r1 = r10;"
+ "r1 += -16;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "r6 = 0;"
+ "r7 = 0;"
+ "r8 = -24;"
+ "j_loop_%=:"
+ "r1 = r10;"
+ "r1 += -16;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto j_loop_end_%=;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "r6 = 0;"
+ "r7 = 0;"
+ "i_loop_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto i_loop_end_%=;"
+ "check_one_r6_%=:"
+ "if r6 != 1 goto check_zero_r6_%=;"
+ "r6 = 0;"
+ "r7 = 1;"
+ "goto i_loop_%=;"
+ "check_zero_r6_%=:"
+ "if r6 != 0 goto i_loop_%=;"
+ "r6 = 1;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 != 42 goto check_one_r7_%=;"
+ "goto i_loop_%=;"
+ "check_one_r7_%=:"
+ "if r7 != 1 goto i_loop_%=;"
+ "r0 = r10;"
+ "r0 += r8;"
+ "r1 = 7;"
+ "*(u64 *)(r0 + 0) = r1;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r1 = r10;"
+ "r1 += -16;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ "i_loop_end_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r6 = 0;"
+ "r7 = 0;"
+ "r8 = -25;"
+ "goto j_loop_%=;"
+ "j_loop_end_%=:"
+ "r1 = r10;"
+ "r1 += -16;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_next),
+ __imm(bpf_iter_num_destroy)
+ : __clobber_all
+ );
+}
+
+SEC("?raw_tp")
+__failure
+__msg("math between fp pointer and register with unbounded")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked int loop_state_deps2(void)
+{
+ /* This is equivalent to C program below.
+ *
+ * The case turns out to be tricky in a sense that:
+ * - states with read+precise mark on c are explored only on a second
+ * iteration of the first inner loop and in a state which is pushed to
+ * states stack first.
+ * - states with c=-25 are explored only on a second iteration of the
+ * second inner loop and in a state which is pushed to states stack
+ * first.
+ *
+ * Depending on the details of iterator convergence logic
+ * verifier might stop states traversal too early and miss
+ * unsafe c=-25 memory access.
+ *
+ * j = iter_new(); // fp[-16]
+ * a = 0; // r6
+ * b = 0; // r7
+ * c = -24; // r8
+ * while (iter_next(j)) {
+ * i = iter_new(); // fp[-8]
+ * a = 0; // r6
+ * b = 0; // r7
+ * while (iter_next(i)) {
+ * if (a == 1) {
+ * a = 0;
+ * b = 1;
+ * } else if (a == 0) {
+ * a = 1;
+ * if (random() == 42)
+ * continue;
+ * if (b == 1) {
+ * *(r10 + c) = 7; // this is not safe
+ * iter_destroy(i);
+ * iter_destroy(j);
+ * return;
+ * }
+ * }
+ * }
+ * iter_destroy(i);
+ * i = iter_new(); // fp[-8]
+ * a = 0; // r6
+ * b = 0; // r7
+ * while (iter_next(i)) {
+ * if (a == 1) {
+ * a = 0;
+ * b = 1;
+ * } else if (a == 0) {
+ * a = 1;
+ * if (random() == 42)
+ * continue;
+ * if (b == 1) {
+ * a = 0;
+ * c = -25;
+ * }
+ * }
+ * }
+ * iter_destroy(i);
+ * }
+ * iter_destroy(j);
+ * return;
+ */
+ asm volatile (
+ "r1 = r10;"
+ "r1 += -16;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "r6 = 0;"
+ "r7 = 0;"
+ "r8 = -24;"
+ "j_loop_%=:"
+ "r1 = r10;"
+ "r1 += -16;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto j_loop_end_%=;"
+
+ /* first inner loop */
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "r6 = 0;"
+ "r7 = 0;"
+ "i_loop_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto i_loop_end_%=;"
+ "check_one_r6_%=:"
+ "if r6 != 1 goto check_zero_r6_%=;"
+ "r6 = 0;"
+ "r7 = 1;"
+ "goto i_loop_%=;"
+ "check_zero_r6_%=:"
+ "if r6 != 0 goto i_loop_%=;"
+ "r6 = 1;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 != 42 goto check_one_r7_%=;"
+ "goto i_loop_%=;"
+ "check_one_r7_%=:"
+ "if r7 != 1 goto i_loop_%=;"
+ "r0 = r10;"
+ "r0 += r8;"
+ "r1 = 7;"
+ "*(u64 *)(r0 + 0) = r1;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r1 = r10;"
+ "r1 += -16;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ "i_loop_end_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+
+ /* second inner loop */
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "r6 = 0;"
+ "r7 = 0;"
+ "i2_loop_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto i2_loop_end_%=;"
+ "check2_one_r6_%=:"
+ "if r6 != 1 goto check2_zero_r6_%=;"
+ "r6 = 0;"
+ "r7 = 1;"
+ "goto i2_loop_%=;"
+ "check2_zero_r6_%=:"
+ "if r6 != 0 goto i2_loop_%=;"
+ "r6 = 1;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 != 42 goto check2_one_r7_%=;"
+ "goto i2_loop_%=;"
+ "check2_one_r7_%=:"
+ "if r7 != 1 goto i2_loop_%=;"
+ "r6 = 0;"
+ "r8 = -25;"
+ "goto i2_loop_%=;"
+ "i2_loop_end_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+
+ "r6 = 0;"
+ "r7 = 0;"
+ "goto j_loop_%=;"
+ "j_loop_end_%=:"
+ "r1 = r10;"
+ "r1 += -16;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_next),
+ __imm(bpf_iter_num_destroy)
+ : __clobber_all
+ );
+}
+
+SEC("?raw_tp")
+__success
+__naked int triple_continue(void)
+{
+ /* This is equivalent to C program below.
+ * High branching factor of the loop body turned out to be
+ * problematic for one of the iterator convergence tracking
+ * algorithms explored.
+ *
+ * r6 = bpf_get_prandom_u32()
+ * bpf_iter_num_new(&fp[-8], 0, 10)
+ * while (bpf_iter_num_next(&fp[-8])) {
+ * if (bpf_get_prandom_u32() != 42)
+ * continue;
+ * if (bpf_get_prandom_u32() != 42)
+ * continue;
+ * if (bpf_get_prandom_u32() != 42)
+ * continue;
+ * r0 += 0;
+ * }
+ * bpf_iter_num_destroy(&fp[-8])
+ * return 0
+ */
+ asm volatile (
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "loop_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto loop_end_%=;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 != 42 goto loop_%=;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 != 42 goto loop_%=;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 != 42 goto loop_%=;"
+ "r0 += 0;"
+ "goto loop_%=;"
+ "loop_end_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_next),
+ __imm(bpf_iter_num_destroy)
+ : __clobber_all
+ );
+}
+
+SEC("?raw_tp")
+__success
+__naked int widen_spill(void)
+{
+ /* This is equivalent to C program below.
+ * The counter is stored in fp[-16], if this counter is not widened
+ * verifier states representing loop iterations would never converge.
+ *
+ * fp[-16] = 0
+ * bpf_iter_num_new(&fp[-8], 0, 10)
+ * while (bpf_iter_num_next(&fp[-8])) {
+ * r0 = fp[-16];
+ * r0 += 1;
+ * fp[-16] = r0;
+ * }
+ * bpf_iter_num_destroy(&fp[-8])
+ * return 0
+ */
+ asm volatile (
+ "r0 = 0;"
+ "*(u64 *)(r10 - 16) = r0;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "loop_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto loop_end_%=;"
+ "r0 = *(u64 *)(r10 - 16);"
+ "r0 += 1;"
+ "*(u64 *)(r10 - 16) = r0;"
+ "goto loop_%=;"
+ "loop_end_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_next),
+ __imm(bpf_iter_num_destroy)
+ : __clobber_all
+ );
+}
+
+SEC("raw_tp")
+__success
+__naked int checkpoint_states_deletion(void)
+{
+ /* This is equivalent to C program below.
+ *
+ * int *a, *b, *c, *d, *e, *f;
+ * int i, sum = 0;
+ * bpf_for(i, 0, 10) {
+ * a = bpf_map_lookup_elem(&amap, &i);
+ * b = bpf_map_lookup_elem(&amap, &i);
+ * c = bpf_map_lookup_elem(&amap, &i);
+ * d = bpf_map_lookup_elem(&amap, &i);
+ * e = bpf_map_lookup_elem(&amap, &i);
+ * f = bpf_map_lookup_elem(&amap, &i);
+ * if (a) sum += 1;
+ * if (b) sum += 1;
+ * if (c) sum += 1;
+ * if (d) sum += 1;
+ * if (e) sum += 1;
+ * if (f) sum += 1;
+ * }
+ * return 0;
+ *
+ * The body of the loop spawns multiple simulation paths
+ * with different combination of NULL/non-NULL information for a/b/c/d/e/f.
+ * Each combination is unique from states_equal() point of view.
+ * Explored states checkpoint is created after each iterator next call.
+ * Iterator convergence logic expects that eventually current state
+ * would get equal to one of the explored states and thus loop
+ * exploration would be finished (at-least for a specific path).
+ * Verifier evicts explored states with high miss to hit ratio
+ * to to avoid comparing current state with too many explored
+ * states per instruction.
+ * This test is designed to "stress test" eviction policy defined using formula:
+ *
+ * sl->miss_cnt > sl->hit_cnt * N + N // if true sl->state is evicted
+ *
+ * Currently N is set to 64, which allows for 6 variables in this test.
+ */
+ asm volatile (
+ "r6 = 0;" /* a */
+ "r7 = 0;" /* b */
+ "r8 = 0;" /* c */
+ "*(u64 *)(r10 - 24) = r6;" /* d */
+ "*(u64 *)(r10 - 32) = r6;" /* e */
+ "*(u64 *)(r10 - 40) = r6;" /* f */
+ "r9 = 0;" /* sum */
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "loop_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto loop_end_%=;"
+
+ "*(u64 *)(r10 - 16) = r0;"
+
+ "r1 = %[amap] ll;"
+ "r2 = r10;"
+ "r2 += -16;"
+ "call %[bpf_map_lookup_elem];"
+ "r6 = r0;"
+
+ "r1 = %[amap] ll;"
+ "r2 = r10;"
+ "r2 += -16;"
+ "call %[bpf_map_lookup_elem];"
+ "r7 = r0;"
+
+ "r1 = %[amap] ll;"
+ "r2 = r10;"
+ "r2 += -16;"
+ "call %[bpf_map_lookup_elem];"
+ "r8 = r0;"
+
+ "r1 = %[amap] ll;"
+ "r2 = r10;"
+ "r2 += -16;"
+ "call %[bpf_map_lookup_elem];"
+ "*(u64 *)(r10 - 24) = r0;"
+
+ "r1 = %[amap] ll;"
+ "r2 = r10;"
+ "r2 += -16;"
+ "call %[bpf_map_lookup_elem];"
+ "*(u64 *)(r10 - 32) = r0;"
+
+ "r1 = %[amap] ll;"
+ "r2 = r10;"
+ "r2 += -16;"
+ "call %[bpf_map_lookup_elem];"
+ "*(u64 *)(r10 - 40) = r0;"
+
+ "if r6 == 0 goto +1;"
+ "r9 += 1;"
+ "if r7 == 0 goto +1;"
+ "r9 += 1;"
+ "if r8 == 0 goto +1;"
+ "r9 += 1;"
+ "r0 = *(u64 *)(r10 - 24);"
+ "if r0 == 0 goto +1;"
+ "r9 += 1;"
+ "r0 = *(u64 *)(r10 - 32);"
+ "if r0 == 0 goto +1;"
+ "r9 += 1;"
+ "r0 = *(u64 *)(r10 - 40);"
+ "if r0 == 0 goto +1;"
+ "r9 += 1;"
+
+ "goto loop_%=;"
+ "loop_end_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_next),
+ __imm(bpf_iter_num_destroy),
+ __imm_addr(amap)
+ : __clobber_all
+ );
+}
+
+struct {
+ int data[32];
+ int n;
+} loop_data;
+
+SEC("raw_tp")
+__success
+int iter_arr_with_actual_elem_count(const void *ctx)
+{
+ int i, n = loop_data.n, sum = 0;
+
+ if (n > ARRAY_SIZE(loop_data.data))
+ return 0;
+
+ bpf_for(i, 0, n) {
+ /* no rechecking of i against ARRAY_SIZE(loop_data.n) */
+ sum += loop_data.data[i];
+ }
+
+ return sum;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/iters_css.c b/tools/testing/selftests/bpf/progs/iters_css.c
new file mode 100644
index 000000000000..ec1f6c2f590b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/iters_css.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Chuyi Zhou <zhouchuyi@bytedance.com> */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+char _license[] SEC("license") = "GPL";
+
+pid_t target_pid;
+u64 root_cg_id, leaf_cg_id;
+u64 first_cg_id, last_cg_id;
+
+int pre_order_cnt, post_order_cnt, tree_high;
+
+struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym;
+void bpf_cgroup_release(struct cgroup *p) __ksym;
+void bpf_rcu_read_lock(void) __ksym;
+void bpf_rcu_read_unlock(void) __ksym;
+
+SEC("fentry.s/" SYS_PREFIX "sys_getpgid")
+int iter_css_for_each(const void *ctx)
+{
+ struct task_struct *cur_task = bpf_get_current_task_btf();
+ struct cgroup_subsys_state *root_css, *leaf_css, *pos;
+ struct cgroup *root_cgrp, *leaf_cgrp, *cur_cgrp;
+
+ if (cur_task->pid != target_pid)
+ return 0;
+
+ root_cgrp = bpf_cgroup_from_id(root_cg_id);
+
+ if (!root_cgrp)
+ return 0;
+
+ leaf_cgrp = bpf_cgroup_from_id(leaf_cg_id);
+
+ if (!leaf_cgrp) {
+ bpf_cgroup_release(root_cgrp);
+ return 0;
+ }
+ root_css = &root_cgrp->self;
+ leaf_css = &leaf_cgrp->self;
+ pre_order_cnt = post_order_cnt = tree_high = 0;
+ first_cg_id = last_cg_id = 0;
+
+ bpf_rcu_read_lock();
+ bpf_for_each(css, pos, root_css, BPF_CGROUP_ITER_DESCENDANTS_POST) {
+ cur_cgrp = pos->cgroup;
+ post_order_cnt++;
+ last_cg_id = cur_cgrp->kn->id;
+ }
+
+ bpf_for_each(css, pos, root_css, BPF_CGROUP_ITER_DESCENDANTS_PRE) {
+ cur_cgrp = pos->cgroup;
+ pre_order_cnt++;
+ if (!first_cg_id)
+ first_cg_id = cur_cgrp->kn->id;
+ }
+
+ bpf_for_each(css, pos, leaf_css, BPF_CGROUP_ITER_ANCESTORS_UP)
+ tree_high++;
+
+ bpf_for_each(css, pos, root_css, BPF_CGROUP_ITER_ANCESTORS_UP)
+ tree_high--;
+ bpf_rcu_read_unlock();
+ bpf_cgroup_release(root_cgrp);
+ bpf_cgroup_release(leaf_cgrp);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/iters_css_task.c b/tools/testing/selftests/bpf/progs/iters_css_task.c
new file mode 100644
index 000000000000..9ac758649cb8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/iters_css_task.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Chuyi Zhou <zhouchuyi@bytedance.com> */
+
+#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym;
+struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym;
+void bpf_cgroup_release(struct cgroup *p) __ksym;
+
+pid_t target_pid;
+int css_task_cnt;
+u64 cg_id;
+
+SEC("lsm/file_mprotect")
+int BPF_PROG(iter_css_task_for_each, struct vm_area_struct *vma,
+ unsigned long reqprot, unsigned long prot, int ret)
+{
+ struct task_struct *cur_task = bpf_get_current_task_btf();
+ struct cgroup_subsys_state *css;
+ struct task_struct *task;
+ struct cgroup *cgrp;
+
+ if (cur_task->pid != target_pid)
+ return ret;
+
+ cgrp = bpf_cgroup_from_id(cg_id);
+
+ if (!cgrp)
+ return -EPERM;
+
+ css = &cgrp->self;
+ css_task_cnt = 0;
+
+ bpf_for_each(css_task, task, css, CSS_TASK_ITER_PROCS)
+ if (task->pid == target_pid)
+ css_task_cnt++;
+
+ bpf_cgroup_release(cgrp);
+
+ return -EPERM;
+}
+
+static inline u64 cgroup_id(struct cgroup *cgrp)
+{
+ return cgrp->kn->id;
+}
+
+SEC("?iter/cgroup")
+int cgroup_id_printer(struct bpf_iter__cgroup *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct cgroup *cgrp = ctx->cgroup;
+ struct cgroup_subsys_state *css;
+ struct task_struct *task;
+
+ /* epilogue */
+ if (cgrp == NULL) {
+ BPF_SEQ_PRINTF(seq, "epilogue\n");
+ return 0;
+ }
+
+ /* prologue */
+ if (ctx->meta->seq_num == 0)
+ BPF_SEQ_PRINTF(seq, "prologue\n");
+
+ BPF_SEQ_PRINTF(seq, "%8llu\n", cgroup_id(cgrp));
+
+ css = &cgrp->self;
+ css_task_cnt = 0;
+ bpf_for_each(css_task, task, css, CSS_TASK_ITER_PROCS) {
+ if (task->pid == target_pid)
+ css_task_cnt++;
+ }
+
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int BPF_PROG(iter_css_task_for_each_sleep)
+{
+ u64 cgrp_id = bpf_get_current_cgroup_id();
+ struct cgroup *cgrp = bpf_cgroup_from_id(cgrp_id);
+ struct cgroup_subsys_state *css;
+ struct task_struct *task;
+
+ if (cgrp == NULL)
+ return 0;
+ css = &cgrp->self;
+
+ bpf_for_each(css_task, task, css, CSS_TASK_ITER_PROCS) {
+
+ }
+ bpf_cgroup_release(cgrp);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/iters_task.c b/tools/testing/selftests/bpf/progs/iters_task.c
new file mode 100644
index 000000000000..c9b4055cd410
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/iters_task.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Chuyi Zhou <zhouchuyi@bytedance.com> */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+char _license[] SEC("license") = "GPL";
+
+pid_t target_pid;
+int procs_cnt, threads_cnt, proc_threads_cnt;
+
+void bpf_rcu_read_lock(void) __ksym;
+void bpf_rcu_read_unlock(void) __ksym;
+
+SEC("fentry.s/" SYS_PREFIX "sys_getpgid")
+int iter_task_for_each_sleep(void *ctx)
+{
+ struct task_struct *cur_task = bpf_get_current_task_btf();
+ struct task_struct *pos;
+
+ if (cur_task->pid != target_pid)
+ return 0;
+ procs_cnt = threads_cnt = proc_threads_cnt = 0;
+
+ bpf_rcu_read_lock();
+ bpf_for_each(task, pos, NULL, BPF_TASK_ITER_ALL_PROCS)
+ if (pos->pid == target_pid)
+ procs_cnt++;
+
+ bpf_for_each(task, pos, cur_task, BPF_TASK_ITER_PROC_THREADS)
+ proc_threads_cnt++;
+
+ bpf_for_each(task, pos, NULL, BPF_TASK_ITER_ALL_THREADS)
+ if (pos->tgid == target_pid)
+ threads_cnt++;
+ bpf_rcu_read_unlock();
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/iters_task_failure.c b/tools/testing/selftests/bpf/progs/iters_task_failure.c
new file mode 100644
index 000000000000..6b1588d70652
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/iters_task_failure.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Chuyi Zhou <zhouchuyi@bytedance.com> */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym;
+void bpf_cgroup_release(struct cgroup *p) __ksym;
+void bpf_rcu_read_lock(void) __ksym;
+void bpf_rcu_read_unlock(void) __ksym;
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("expected an RCU CS when using bpf_iter_task_next")
+int BPF_PROG(iter_tasks_without_lock)
+{
+ struct task_struct *pos;
+
+ bpf_for_each(task, pos, NULL, BPF_TASK_ITER_ALL_PROCS) {
+
+ }
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("expected an RCU CS when using bpf_iter_css_next")
+int BPF_PROG(iter_css_without_lock)
+{
+ u64 cg_id = bpf_get_current_cgroup_id();
+ struct cgroup *cgrp = bpf_cgroup_from_id(cg_id);
+ struct cgroup_subsys_state *root_css, *pos;
+
+ if (!cgrp)
+ return 0;
+ root_css = &cgrp->self;
+
+ bpf_for_each(css, pos, root_css, BPF_CGROUP_ITER_DESCENDANTS_POST) {
+
+ }
+ bpf_cgroup_release(cgrp);
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("expected an RCU CS when using bpf_iter_task_next")
+int BPF_PROG(iter_tasks_lock_and_unlock)
+{
+ struct task_struct *pos;
+
+ bpf_rcu_read_lock();
+ bpf_for_each(task, pos, NULL, BPF_TASK_ITER_ALL_PROCS) {
+ bpf_rcu_read_unlock();
+
+ bpf_rcu_read_lock();
+ }
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("expected an RCU CS when using bpf_iter_css_next")
+int BPF_PROG(iter_css_lock_and_unlock)
+{
+ u64 cg_id = bpf_get_current_cgroup_id();
+ struct cgroup *cgrp = bpf_cgroup_from_id(cg_id);
+ struct cgroup_subsys_state *root_css, *pos;
+
+ if (!cgrp)
+ return 0;
+ root_css = &cgrp->self;
+
+ bpf_rcu_read_lock();
+ bpf_for_each(css, pos, root_css, BPF_CGROUP_ITER_DESCENDANTS_POST) {
+ bpf_rcu_read_unlock();
+
+ bpf_rcu_read_lock();
+ }
+ bpf_rcu_read_unlock();
+ bpf_cgroup_release(cgrp);
+ return 0;
+}
+
+SEC("?fentry/" SYS_PREFIX "sys_getpgid")
+__failure __msg("css_task_iter is only allowed in bpf_lsm, bpf_iter and sleepable progs")
+int BPF_PROG(iter_css_task_for_each)
+{
+ u64 cg_id = bpf_get_current_cgroup_id();
+ struct cgroup *cgrp = bpf_cgroup_from_id(cg_id);
+ struct cgroup_subsys_state *css;
+ struct task_struct *task;
+
+ if (cgrp == NULL)
+ return 0;
+ css = &cgrp->self;
+
+ bpf_for_each(css_task, task, css, CSS_TASK_ITER_PROCS) {
+
+ }
+ bpf_cgroup_release(cgrp);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/iters_task_vma.c b/tools/testing/selftests/bpf/progs/iters_task_vma.c
index 44edecfdfaee..e085a51d153e 100644
--- a/tools/testing/selftests/bpf/progs/iters_task_vma.c
+++ b/tools/testing/selftests/bpf/progs/iters_task_vma.c
@@ -30,6 +30,7 @@ int iter_task_vma_for_each(const void *ctx)
bpf_for_each(task_vma, vma, task, 0) {
if (seen >= 1000)
break;
+ barrier_var(seen);
vm_ranges[seen].vm_start = vma->vm_start;
vm_ranges[seen].vm_end = vma->vm_end;
diff --git a/tools/testing/selftests/bpf/progs/linked_list_fail.c b/tools/testing/selftests/bpf/progs/linked_list_fail.c
index f4c63daba229..6438982b928b 100644
--- a/tools/testing/selftests/bpf/progs/linked_list_fail.c
+++ b/tools/testing/selftests/bpf/progs/linked_list_fail.c
@@ -591,7 +591,9 @@ int pop_ptr_off(void *(*op)(void *head))
n = op(&p->head);
bpf_spin_unlock(&p->lock);
- bpf_this_cpu_ptr(n);
+ if (!n)
+ return 0;
+ bpf_spin_lock((void *)n);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/local_kptr_stash.c b/tools/testing/selftests/bpf/progs/local_kptr_stash.c
index b567a666d2b8..1769fdff6aea 100644
--- a/tools/testing/selftests/bpf/progs/local_kptr_stash.c
+++ b/tools/testing/selftests/bpf/progs/local_kptr_stash.c
@@ -14,6 +14,24 @@ struct node_data {
struct bpf_rb_node node;
};
+struct refcounted_node {
+ long data;
+ struct bpf_rb_node rb_node;
+ struct bpf_refcount refcount;
+};
+
+struct stash {
+ struct bpf_spin_lock l;
+ struct refcounted_node __kptr *stashed;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct stash);
+ __uint(max_entries, 10);
+} refcounted_node_stash SEC(".maps");
+
struct plain_local {
long key;
long data;
@@ -38,6 +56,7 @@ struct map_value {
* Had to do the same w/ bpf_kfunc_call_test_release below
*/
struct node_data *just_here_because_btf_bug;
+struct refcounted_node *just_here_because_btf_bug2;
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
@@ -132,4 +151,56 @@ long stash_test_ref_kfunc(void *ctx)
return 0;
}
+SEC("tc")
+long refcount_acquire_without_unstash(void *ctx)
+{
+ struct refcounted_node *p;
+ struct stash *s;
+ int ret = 0;
+
+ s = bpf_map_lookup_elem(&refcounted_node_stash, &ret);
+ if (!s)
+ return 1;
+
+ if (!s->stashed)
+ /* refcount_acquire failure is expected when no refcounted_node
+ * has been stashed before this program executes
+ */
+ return 2;
+
+ p = bpf_refcount_acquire(s->stashed);
+ if (!p)
+ return 3;
+
+ ret = s->stashed ? s->stashed->data : -1;
+ bpf_obj_drop(p);
+ return ret;
+}
+
+/* Helper for refcount_acquire_without_unstash test */
+SEC("tc")
+long stash_refcounted_node(void *ctx)
+{
+ struct refcounted_node *p;
+ struct stash *s;
+ int key = 0;
+
+ s = bpf_map_lookup_elem(&refcounted_node_stash, &key);
+ if (!s)
+ return 1;
+
+ p = bpf_obj_new(typeof(*p));
+ if (!p)
+ return 2;
+ p->data = 42;
+
+ p = bpf_kptr_xchg(&s->stashed, p);
+ if (p) {
+ bpf_obj_drop(p);
+ return 3;
+ }
+
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/pyperf180.c b/tools/testing/selftests/bpf/progs/pyperf180.c
index c39f559d3100..42c4a8b62e36 100644
--- a/tools/testing/selftests/bpf/progs/pyperf180.c
+++ b/tools/testing/selftests/bpf/progs/pyperf180.c
@@ -1,4 +1,26 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
#define STACK_MAX_LEN 180
+
+/* llvm upstream commit at clang18
+ * https://github.com/llvm/llvm-project/commit/1a2e77cf9e11dbf56b5720c607313a566eebb16e
+ * changed inlining behavior and caused compilation failure as some branch
+ * target distance exceeded 16bit representation which is the maximum for
+ * cpu v1/v2/v3. Macro __BPF_CPU_VERSION__ is later implemented in clang18
+ * to specify which cpu version is used for compilation. So a smaller
+ * unroll_count can be set if __BPF_CPU_VERSION__ is less than 4, which
+ * reduced some branch target distances and resolved the compilation failure.
+ *
+ * To capture the case where a developer/ci uses clang18 but the corresponding
+ * repo checkpoint does not have __BPF_CPU_VERSION__, a smaller unroll_count
+ * will be set as well to prevent potential compilation failures.
+ */
+#ifdef __BPF_CPU_VERSION__
+#if __BPF_CPU_VERSION__ < 4
+#define UNROLL_COUNT 90
+#endif
+#elif __clang_major__ == 18
+#define UNROLL_COUNT 90
+#endif
+
#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c
index 1ef07f6ee580..1553b9c16aa7 100644
--- a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c
@@ -54,6 +54,25 @@ long rbtree_refcounted_node_ref_escapes(void *ctx)
}
SEC("?tc")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+long refcount_acquire_maybe_null(void *ctx)
+{
+ struct node_acquire *n, *m;
+
+ n = bpf_obj_new(typeof(*n));
+ /* Intentionally not testing !n
+ * it's MAYBE_NULL for refcount_acquire
+ */
+ m = bpf_refcount_acquire(n);
+ if (m)
+ bpf_obj_drop(m);
+ if (n)
+ bpf_obj_drop(n);
+
+ return 0;
+}
+
+SEC("?tc")
__failure __msg("Unreleased reference id=3 alloc_insn=9")
long rbtree_refcounted_node_ref_escapes_owning_input(void *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h
index e02cfd380746..40df2cc26eaf 100644
--- a/tools/testing/selftests/bpf/progs/strobemeta.h
+++ b/tools/testing/selftests/bpf/progs/strobemeta.h
@@ -24,9 +24,11 @@ struct task_struct {};
#define STACK_TABLE_EPOCH_SHIFT 20
#define STROBE_MAX_STR_LEN 1
#define STROBE_MAX_CFGS 32
+#define READ_MAP_VAR_PAYLOAD_CAP \
+ ((1 + STROBE_MAX_MAP_ENTRIES * 2) * STROBE_MAX_STR_LEN)
#define STROBE_MAX_PAYLOAD \
(STROBE_MAX_STRS * STROBE_MAX_STR_LEN + \
- STROBE_MAX_MAPS * (1 + STROBE_MAX_MAP_ENTRIES * 2) * STROBE_MAX_STR_LEN)
+ STROBE_MAX_MAPS * READ_MAP_VAR_PAYLOAD_CAP)
struct strobe_value_header {
/*
@@ -355,7 +357,7 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg,
size_t idx, void *tls_base,
struct strobe_value_generic *value,
struct strobemeta_payload *data,
- void *payload)
+ size_t off)
{
void *location;
uint64_t len;
@@ -366,7 +368,7 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg,
return 0;
bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
- len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, value->ptr);
+ len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, value->ptr);
/*
* if bpf_probe_read_user_str returns error (<0), due to casting to
* unsinged int, it will become big number, so next check is
@@ -378,14 +380,14 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg,
return 0;
data->str_lens[idx] = len;
- return len;
+ return off + len;
}
-static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
- size_t idx, void *tls_base,
- struct strobe_value_generic *value,
- struct strobemeta_payload *data,
- void *payload)
+static __always_inline uint64_t read_map_var(struct strobemeta_cfg *cfg,
+ size_t idx, void *tls_base,
+ struct strobe_value_generic *value,
+ struct strobemeta_payload *data,
+ size_t off)
{
struct strobe_map_descr* descr = &data->map_descrs[idx];
struct strobe_map_raw map;
@@ -397,11 +399,11 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
location = calc_location(&cfg->map_locs[idx], tls_base);
if (!location)
- return payload;
+ return off;
bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
if (bpf_probe_read_user(&map, sizeof(struct strobe_map_raw), value->ptr))
- return payload;
+ return off;
descr->id = map.id;
descr->cnt = map.cnt;
@@ -410,10 +412,10 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
data->req_meta_valid = 1;
}
- len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, map.tag);
+ len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, map.tag);
if (len <= STROBE_MAX_STR_LEN) {
descr->tag_len = len;
- payload += len;
+ off += len;
}
#ifdef NO_UNROLL
@@ -426,22 +428,22 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
break;
descr->key_lens[i] = 0;
- len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN,
+ len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN,
map.entries[i].key);
if (len <= STROBE_MAX_STR_LEN) {
descr->key_lens[i] = len;
- payload += len;
+ off += len;
}
descr->val_lens[i] = 0;
- len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN,
+ len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN,
map.entries[i].val);
if (len <= STROBE_MAX_STR_LEN) {
descr->val_lens[i] = len;
- payload += len;
+ off += len;
}
}
- return payload;
+ return off;
}
#ifdef USE_BPF_LOOP
@@ -455,14 +457,20 @@ struct read_var_ctx {
struct strobemeta_payload *data;
void *tls_base;
struct strobemeta_cfg *cfg;
- void *payload;
+ size_t payload_off;
/* value gets mutated */
struct strobe_value_generic *value;
enum read_type type;
};
-static int read_var_callback(__u32 index, struct read_var_ctx *ctx)
+static int read_var_callback(__u64 index, struct read_var_ctx *ctx)
{
+ /* lose precision info for ctx->payload_off, verifier won't track
+ * double xor, barrier_var() is needed to force clang keep both xors.
+ */
+ ctx->payload_off ^= index;
+ barrier_var(ctx->payload_off);
+ ctx->payload_off ^= index;
switch (ctx->type) {
case READ_INT_VAR:
if (index >= STROBE_MAX_INTS)
@@ -472,14 +480,18 @@ static int read_var_callback(__u32 index, struct read_var_ctx *ctx)
case READ_MAP_VAR:
if (index >= STROBE_MAX_MAPS)
return 1;
- ctx->payload = read_map_var(ctx->cfg, index, ctx->tls_base,
- ctx->value, ctx->data, ctx->payload);
+ if (ctx->payload_off > sizeof(ctx->data->payload) - READ_MAP_VAR_PAYLOAD_CAP)
+ return 1;
+ ctx->payload_off = read_map_var(ctx->cfg, index, ctx->tls_base,
+ ctx->value, ctx->data, ctx->payload_off);
break;
case READ_STR_VAR:
if (index >= STROBE_MAX_STRS)
return 1;
- ctx->payload += read_str_var(ctx->cfg, index, ctx->tls_base,
- ctx->value, ctx->data, ctx->payload);
+ if (ctx->payload_off > sizeof(ctx->data->payload) - STROBE_MAX_STR_LEN)
+ return 1;
+ ctx->payload_off = read_str_var(ctx->cfg, index, ctx->tls_base,
+ ctx->value, ctx->data, ctx->payload_off);
break;
}
return 0;
@@ -501,7 +513,8 @@ static void *read_strobe_meta(struct task_struct *task,
pid_t pid = bpf_get_current_pid_tgid() >> 32;
struct strobe_value_generic value = {0};
struct strobemeta_cfg *cfg;
- void *tls_base, *payload;
+ size_t payload_off;
+ void *tls_base;
cfg = bpf_map_lookup_elem(&strobemeta_cfgs, &pid);
if (!cfg)
@@ -509,7 +522,7 @@ static void *read_strobe_meta(struct task_struct *task,
data->int_vals_set_mask = 0;
data->req_meta_valid = 0;
- payload = data->payload;
+ payload_off = 0;
/*
* we don't have struct task_struct definition, it should be:
* tls_base = (void *)task->thread.fsbase;
@@ -522,7 +535,7 @@ static void *read_strobe_meta(struct task_struct *task,
.tls_base = tls_base,
.value = &value,
.data = data,
- .payload = payload,
+ .payload_off = 0,
};
int err;
@@ -540,6 +553,11 @@ static void *read_strobe_meta(struct task_struct *task,
err = bpf_loop(STROBE_MAX_MAPS, read_var_callback, &ctx, 0);
if (err != STROBE_MAX_MAPS)
return NULL;
+
+ payload_off = ctx.payload_off;
+ /* this should not really happen, here only to satisfy verifer */
+ if (payload_off > sizeof(data->payload))
+ payload_off = sizeof(data->payload);
#else
#ifdef NO_UNROLL
#pragma clang loop unroll(disable)
@@ -555,7 +573,7 @@ static void *read_strobe_meta(struct task_struct *task,
#pragma unroll
#endif /* NO_UNROLL */
for (int i = 0; i < STROBE_MAX_STRS; ++i) {
- payload += read_str_var(cfg, i, tls_base, &value, data, payload);
+ payload_off = read_str_var(cfg, i, tls_base, &value, data, payload_off);
}
#ifdef NO_UNROLL
#pragma clang loop unroll(disable)
@@ -563,7 +581,7 @@ static void *read_strobe_meta(struct task_struct *task,
#pragma unroll
#endif /* NO_UNROLL */
for (int i = 0; i < STROBE_MAX_MAPS; ++i) {
- payload = read_map_var(cfg, i, tls_base, &value, data, payload);
+ payload_off = read_map_var(cfg, i, tls_base, &value, data, payload_off);
}
#endif /* USE_BPF_LOOP */
@@ -571,7 +589,7 @@ static void *read_strobe_meta(struct task_struct *task,
* return pointer right after end of payload, so it's possible to
* calculate exact amount of useful data that needs to be sent
*/
- return payload;
+ return &data->payload[payload_off];
}
SEC("raw_tracepoint/kfree_skb")
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_ma.c b/tools/testing/selftests/bpf/progs/test_bpf_ma.c
index ecde41ae0fc8..b685a4aba6bd 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_ma.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_ma.c
@@ -37,10 +37,20 @@ int pid = 0;
__type(key, int); \
__type(value, struct map_value_##_size); \
__uint(max_entries, 128); \
- } array_##_size SEC(".maps");
+ } array_##_size SEC(".maps")
-static __always_inline void batch_alloc_free(struct bpf_map *map, unsigned int batch,
- unsigned int idx)
+#define DEFINE_ARRAY_WITH_PERCPU_KPTR(_size) \
+ struct map_value_percpu_##_size { \
+ struct bin_data_##_size __percpu_kptr * data; \
+ }; \
+ struct { \
+ __uint(type, BPF_MAP_TYPE_ARRAY); \
+ __type(key, int); \
+ __type(value, struct map_value_percpu_##_size); \
+ __uint(max_entries, 128); \
+ } array_percpu_##_size SEC(".maps")
+
+static __always_inline void batch_alloc(struct bpf_map *map, unsigned int batch, unsigned int idx)
{
struct generic_map_value *value;
unsigned int i, key;
@@ -65,6 +75,14 @@ static __always_inline void batch_alloc_free(struct bpf_map *map, unsigned int b
return;
}
}
+}
+
+static __always_inline void batch_free(struct bpf_map *map, unsigned int batch, unsigned int idx)
+{
+ struct generic_map_value *value;
+ unsigned int i, key;
+ void *old;
+
for (i = 0; i < batch; i++) {
key = i;
value = bpf_map_lookup_elem(map, &key);
@@ -81,8 +99,72 @@ static __always_inline void batch_alloc_free(struct bpf_map *map, unsigned int b
}
}
+static __always_inline void batch_percpu_alloc(struct bpf_map *map, unsigned int batch,
+ unsigned int idx)
+{
+ struct generic_map_value *value;
+ unsigned int i, key;
+ void *old, *new;
+
+ for (i = 0; i < batch; i++) {
+ key = i;
+ value = bpf_map_lookup_elem(map, &key);
+ if (!value) {
+ err = 1;
+ return;
+ }
+ /* per-cpu allocator may not be able to refill in time */
+ new = bpf_percpu_obj_new_impl(data_btf_ids[idx], NULL);
+ if (!new)
+ continue;
+
+ old = bpf_kptr_xchg(&value->data, new);
+ if (old) {
+ bpf_percpu_obj_drop(old);
+ err = 2;
+ return;
+ }
+ }
+}
+
+static __always_inline void batch_percpu_free(struct bpf_map *map, unsigned int batch,
+ unsigned int idx)
+{
+ struct generic_map_value *value;
+ unsigned int i, key;
+ void *old;
+
+ for (i = 0; i < batch; i++) {
+ key = i;
+ value = bpf_map_lookup_elem(map, &key);
+ if (!value) {
+ err = 3;
+ return;
+ }
+ old = bpf_kptr_xchg(&value->data, NULL);
+ if (!old)
+ continue;
+ bpf_percpu_obj_drop(old);
+ }
+}
+
+#define CALL_BATCH_ALLOC(size, batch, idx) \
+ batch_alloc((struct bpf_map *)(&array_##size), batch, idx)
+
#define CALL_BATCH_ALLOC_FREE(size, batch, idx) \
- batch_alloc_free((struct bpf_map *)(&array_##size), batch, idx)
+ do { \
+ batch_alloc((struct bpf_map *)(&array_##size), batch, idx); \
+ batch_free((struct bpf_map *)(&array_##size), batch, idx); \
+ } while (0)
+
+#define CALL_BATCH_PERCPU_ALLOC(size, batch, idx) \
+ batch_percpu_alloc((struct bpf_map *)(&array_percpu_##size), batch, idx)
+
+#define CALL_BATCH_PERCPU_ALLOC_FREE(size, batch, idx) \
+ do { \
+ batch_percpu_alloc((struct bpf_map *)(&array_percpu_##size), batch, idx); \
+ batch_percpu_free((struct bpf_map *)(&array_percpu_##size), batch, idx); \
+ } while (0)
DEFINE_ARRAY_WITH_KPTR(8);
DEFINE_ARRAY_WITH_KPTR(16);
@@ -97,8 +179,21 @@ DEFINE_ARRAY_WITH_KPTR(1024);
DEFINE_ARRAY_WITH_KPTR(2048);
DEFINE_ARRAY_WITH_KPTR(4096);
-SEC("fentry/" SYS_PREFIX "sys_nanosleep")
-int test_bpf_mem_alloc_free(void *ctx)
+/* per-cpu kptr doesn't support bin_data_8 which is a zero-sized array */
+DEFINE_ARRAY_WITH_PERCPU_KPTR(16);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(32);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(64);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(96);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(128);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(192);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(256);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(512);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(1024);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(2048);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(4096);
+
+SEC("?fentry/" SYS_PREFIX "sys_nanosleep")
+int test_batch_alloc_free(void *ctx)
{
if ((u32)bpf_get_current_pid_tgid() != pid)
return 0;
@@ -121,3 +216,76 @@ int test_bpf_mem_alloc_free(void *ctx)
return 0;
}
+
+SEC("?fentry/" SYS_PREFIX "sys_nanosleep")
+int test_free_through_map_free(void *ctx)
+{
+ if ((u32)bpf_get_current_pid_tgid() != pid)
+ return 0;
+
+ /* Alloc 128 8-bytes objects in batch to trigger refilling,
+ * then free these objects through map free.
+ */
+ CALL_BATCH_ALLOC(8, 128, 0);
+ CALL_BATCH_ALLOC(16, 128, 1);
+ CALL_BATCH_ALLOC(32, 128, 2);
+ CALL_BATCH_ALLOC(64, 128, 3);
+ CALL_BATCH_ALLOC(96, 128, 4);
+ CALL_BATCH_ALLOC(128, 128, 5);
+ CALL_BATCH_ALLOC(192, 128, 6);
+ CALL_BATCH_ALLOC(256, 128, 7);
+ CALL_BATCH_ALLOC(512, 64, 8);
+ CALL_BATCH_ALLOC(1024, 32, 9);
+ CALL_BATCH_ALLOC(2048, 16, 10);
+ CALL_BATCH_ALLOC(4096, 8, 11);
+
+ return 0;
+}
+
+SEC("?fentry/" SYS_PREFIX "sys_nanosleep")
+int test_batch_percpu_alloc_free(void *ctx)
+{
+ if ((u32)bpf_get_current_pid_tgid() != pid)
+ return 0;
+
+ /* Alloc 128 16-bytes per-cpu objects in batch to trigger refilling,
+ * then free 128 16-bytes per-cpu objects in batch to trigger freeing.
+ */
+ CALL_BATCH_PERCPU_ALLOC_FREE(16, 128, 1);
+ CALL_BATCH_PERCPU_ALLOC_FREE(32, 128, 2);
+ CALL_BATCH_PERCPU_ALLOC_FREE(64, 128, 3);
+ CALL_BATCH_PERCPU_ALLOC_FREE(96, 128, 4);
+ CALL_BATCH_PERCPU_ALLOC_FREE(128, 128, 5);
+ CALL_BATCH_PERCPU_ALLOC_FREE(192, 128, 6);
+ CALL_BATCH_PERCPU_ALLOC_FREE(256, 128, 7);
+ CALL_BATCH_PERCPU_ALLOC_FREE(512, 64, 8);
+ CALL_BATCH_PERCPU_ALLOC_FREE(1024, 32, 9);
+ CALL_BATCH_PERCPU_ALLOC_FREE(2048, 16, 10);
+ CALL_BATCH_PERCPU_ALLOC_FREE(4096, 8, 11);
+
+ return 0;
+}
+
+SEC("?fentry/" SYS_PREFIX "sys_nanosleep")
+int test_percpu_free_through_map_free(void *ctx)
+{
+ if ((u32)bpf_get_current_pid_tgid() != pid)
+ return 0;
+
+ /* Alloc 128 16-bytes per-cpu objects in batch to trigger refilling,
+ * then free these object through map free.
+ */
+ CALL_BATCH_PERCPU_ALLOC(16, 128, 1);
+ CALL_BATCH_PERCPU_ALLOC(32, 128, 2);
+ CALL_BATCH_PERCPU_ALLOC(64, 128, 3);
+ CALL_BATCH_PERCPU_ALLOC(96, 128, 4);
+ CALL_BATCH_PERCPU_ALLOC(128, 128, 5);
+ CALL_BATCH_PERCPU_ALLOC(192, 128, 6);
+ CALL_BATCH_PERCPU_ALLOC(256, 128, 7);
+ CALL_BATCH_PERCPU_ALLOC(512, 64, 8);
+ CALL_BATCH_PERCPU_ALLOC(1024, 32, 9);
+ CALL_BATCH_PERCPU_ALLOC(2048, 16, 10);
+ CALL_BATCH_PERCPU_ALLOC(4096, 8, 11);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c b/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c
new file mode 100644
index 000000000000..44628865fe1d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Yafang Shao <laoar.shao@gmail.com> */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+__u32 target_ancestor_level;
+__u64 target_ancestor_cgid;
+int target_pid, target_hid;
+
+struct cgroup *bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) __ksym;
+struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) __ksym;
+void bpf_cgroup_release(struct cgroup *cgrp) __ksym;
+
+static int bpf_link_create_verify(int cmd)
+{
+ struct cgroup *cgrp, *ancestor;
+ struct task_struct *task;
+ int ret = 0;
+
+ if (cmd != BPF_LINK_CREATE)
+ return 0;
+
+ task = bpf_get_current_task_btf();
+
+ /* Then it can run in parallel with others */
+ if (task->pid != target_pid)
+ return 0;
+
+ cgrp = bpf_task_get_cgroup1(task, target_hid);
+ if (!cgrp)
+ return 0;
+
+ /* Refuse it if its cgid or its ancestor's cgid is the target cgid */
+ if (cgrp->kn->id == target_ancestor_cgid)
+ ret = -1;
+
+ ancestor = bpf_cgroup_ancestor(cgrp, target_ancestor_level);
+ if (!ancestor)
+ goto out;
+
+ if (ancestor->kn->id == target_ancestor_cgid)
+ ret = -1;
+ bpf_cgroup_release(ancestor);
+
+out:
+ bpf_cgroup_release(cgrp);
+ return ret;
+}
+
+SEC("lsm/bpf")
+int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size)
+{
+ return bpf_link_create_verify(cmd);
+}
+
+SEC("lsm.s/bpf")
+int BPF_PROG(lsm_s_run, int cmd, union bpf_attr *attr, unsigned int size)
+{
+ return bpf_link_create_verify(cmd);
+}
+
+SEC("fentry")
+int BPF_PROG(fentry_run)
+{
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_ldsx_insn.c b/tools/testing/selftests/bpf/progs/test_ldsx_insn.c
index 3ddcb3777912..2a2a942737d7 100644
--- a/tools/testing/selftests/bpf/progs/test_ldsx_insn.c
+++ b/tools/testing/selftests/bpf/progs/test_ldsx_insn.c
@@ -7,7 +7,8 @@
#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
(defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
- defined(__TARGET_ARCH_s390)) && __clang_major__ >= 18
+ defined(__TARGET_ARCH_s390) || defined(__TARGET_ARCH_loongarch)) && \
+ __clang_major__ >= 18
const volatile int skip = 0;
#else
const volatile int skip = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c b/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c
index 56cdc0a553f0..7e750309ce27 100644
--- a/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c
+++ b/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c
@@ -18,7 +18,7 @@ const volatile __u64 cgid;
int remote_pid;
SEC("tp_btf/task_newtask")
-int BPF_PROG(handle__task_newtask, struct task_struct *task, u64 clone_flags)
+int BPF_PROG(tp_btf_run, struct task_struct *task, u64 clone_flags)
{
struct cgroup *cgrp = NULL;
struct task_struct *acquired;
@@ -48,4 +48,30 @@ out:
return 0;
}
+SEC("lsm.s/bpf")
+int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size)
+{
+ struct cgroup *cgrp = NULL;
+ struct task_struct *task;
+ int ret = 0;
+
+ task = bpf_get_current_task_btf();
+ if (local_pid != task->pid)
+ return 0;
+
+ if (cmd != BPF_LINK_CREATE)
+ return 0;
+
+ /* 1 is the root cgroup */
+ cgrp = bpf_cgroup_from_id(1);
+ if (!cgrp)
+ goto out;
+ if (!bpf_task_under_cgroup(task, cgrp))
+ ret = -1;
+ bpf_cgroup_release(cgrp);
+
+out:
+ return ret;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tc_link.c b/tools/testing/selftests/bpf/progs/test_tc_link.c
index 30e7124c49a1..992400acb957 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_link.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_link.c
@@ -1,7 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2023 Isovalent */
#include <stdbool.h>
+
#include <linux/bpf.h>
+#include <linux/if_ether.h>
+
+#include <bpf/bpf_endian.h>
#include <bpf/bpf_helpers.h>
char LICENSE[] SEC("license") = "GPL";
@@ -12,10 +16,19 @@ bool seen_tc3;
bool seen_tc4;
bool seen_tc5;
bool seen_tc6;
+bool seen_eth;
SEC("tc/ingress")
int tc1(struct __sk_buff *skb)
{
+ struct ethhdr eth = {};
+
+ if (skb->protocol != __bpf_constant_htons(ETH_P_IP))
+ goto out;
+ if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)))
+ goto out;
+ seen_eth = eth.h_proto == bpf_htons(ETH_P_IP);
+out:
seen_tc1 = true;
return TCX_NEXT;
}
diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c
index c5588a14fe2e..ec430b71730b 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bounds.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c
@@ -965,6 +965,7 @@ l0_%=: r0 = 0; \
SEC("xdp")
__description("bound check with JMP_JSLT for crossing 64-bit signed boundary")
__success __retval(0)
+__flag(!BPF_F_TEST_REG_INVARIANTS) /* known invariants violation */
__naked void crossing_64_bit_signed_boundary_2(void)
{
asm volatile (" \
@@ -1046,6 +1047,7 @@ l0_%=: r0 = 0; \
SEC("xdp")
__description("bound check with JMP32_JSLT for crossing 32-bit signed boundary")
__success __retval(0)
+__flag(!BPF_F_TEST_REG_INVARIANTS) /* known invariants violation */
__naked void crossing_32_bit_signed_boundary_2(void)
{
asm volatile (" \
diff --git a/tools/testing/selftests/bpf/progs/verifier_bswap.c b/tools/testing/selftests/bpf/progs/verifier_bswap.c
index 107525fb4a6a..e61755656e8d 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bswap.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bswap.c
@@ -6,7 +6,8 @@
#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
(defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
- defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \
+ defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \
+ defined(__TARGET_ARCH_loongarch)) && \
__clang_major__ >= 18
SEC("socket")
diff --git a/tools/testing/selftests/bpf/progs/verifier_cfg.c b/tools/testing/selftests/bpf/progs/verifier_cfg.c
index df7697b94007..c1f55e1d80a4 100644
--- a/tools/testing/selftests/bpf/progs/verifier_cfg.c
+++ b/tools/testing/selftests/bpf/progs/verifier_cfg.c
@@ -97,4 +97,66 @@ l0_%=: r2 = r0; \
" ::: __clobber_all);
}
+SEC("socket")
+__description("conditional loop (2)")
+__success
+__failure_unpriv __msg_unpriv("back-edge from insn 10 to 11")
+__naked void conditional_loop2(void)
+{
+ asm volatile (" \
+ r9 = 2 ll; \
+ r3 = 0x20 ll; \
+ r4 = 0x35 ll; \
+ r8 = r4; \
+ goto l1_%=; \
+l0_%=: r9 -= r3; \
+ r9 -= r4; \
+ r9 -= r8; \
+l1_%=: r8 += r4; \
+ if r8 < 0x64 goto l0_%=; \
+ r0 = r9; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unconditional loop after conditional jump")
+__failure __msg("infinite loop detected")
+__failure_unpriv __msg_unpriv("back-edge from insn 3 to 2")
+__naked void uncond_loop_after_cond_jmp(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ if r0 > 0 goto l1_%=; \
+l0_%=: r0 = 1; \
+ goto l0_%=; \
+l1_%=: exit; \
+" ::: __clobber_all);
+}
+
+
+__naked __noinline __used
+static unsigned long never_ending_subprog()
+{
+ asm volatile (" \
+ r0 = r1; \
+ goto -1; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unconditional loop after conditional jump")
+/* infinite loop is detected *after* check_cfg() */
+__failure __msg("infinite loop detected")
+__naked void uncond_loop_in_subprog_after_cond_jmp(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ if r0 > 0 goto l1_%=; \
+l0_%=: r0 += 1; \
+ call never_ending_subprog; \
+l1_%=: exit; \
+" ::: __clobber_all);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_gotol.c b/tools/testing/selftests/bpf/progs/verifier_gotol.c
index 9f202eda952f..d1edbcff9a18 100644
--- a/tools/testing/selftests/bpf/progs/verifier_gotol.c
+++ b/tools/testing/selftests/bpf/progs/verifier_gotol.c
@@ -6,7 +6,8 @@
#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
(defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
- defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \
+ defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \
+ defined(__TARGET_ARCH_loongarch)) && \
__clang_major__ >= 18
SEC("socket")
diff --git a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
new file mode 100644
index 000000000000..5905e036e0ea
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
@@ -0,0 +1,242 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 8);
+ __type(key, __u32);
+ __type(value, __u64);
+} map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_USER_RINGBUF);
+ __uint(max_entries, 8);
+} ringbuf SEC(".maps");
+
+struct vm_area_struct;
+struct bpf_map;
+
+struct buf_context {
+ char *buf;
+};
+
+struct num_context {
+ __u64 i;
+ __u64 j;
+};
+
+__u8 choice_arr[2] = { 0, 1 };
+
+static int unsafe_on_2nd_iter_cb(__u32 idx, struct buf_context *ctx)
+{
+ if (idx == 0) {
+ ctx->buf = (char *)(0xDEAD);
+ return 0;
+ }
+
+ if (bpf_probe_read_user(ctx->buf, 8, (void *)(0xBADC0FFEE)))
+ return 1;
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("R1 type=scalar expected=fp")
+int unsafe_on_2nd_iter(void *unused)
+{
+ char buf[4];
+ struct buf_context loop_ctx = { .buf = buf };
+
+ bpf_loop(100, unsafe_on_2nd_iter_cb, &loop_ctx, 0);
+ return 0;
+}
+
+static int unsafe_on_zero_iter_cb(__u32 idx, struct num_context *ctx)
+{
+ ctx->i = 0;
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid access to map value, value_size=2 off=32 size=1")
+int unsafe_on_zero_iter(void *unused)
+{
+ struct num_context loop_ctx = { .i = 32 };
+
+ bpf_loop(100, unsafe_on_zero_iter_cb, &loop_ctx, 0);
+ return choice_arr[loop_ctx.i];
+}
+
+static int widening_cb(__u32 idx, struct num_context *ctx)
+{
+ ++ctx->i;
+ return 0;
+}
+
+SEC("?raw_tp")
+__success
+int widening(void *unused)
+{
+ struct num_context loop_ctx = { .i = 0, .j = 1 };
+
+ bpf_loop(100, widening_cb, &loop_ctx, 0);
+ /* loop_ctx.j is not changed during callback iteration,
+ * verifier should not apply widening to it.
+ */
+ return choice_arr[loop_ctx.j];
+}
+
+static int loop_detection_cb(__u32 idx, struct num_context *ctx)
+{
+ for (;;) {}
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("infinite loop detected")
+int loop_detection(void *unused)
+{
+ struct num_context loop_ctx = { .i = 0 };
+
+ bpf_loop(100, loop_detection_cb, &loop_ctx, 0);
+ return 0;
+}
+
+static __always_inline __u64 oob_state_machine(struct num_context *ctx)
+{
+ switch (ctx->i) {
+ case 0:
+ ctx->i = 1;
+ break;
+ case 1:
+ ctx->i = 32;
+ break;
+ }
+ return 0;
+}
+
+static __u64 for_each_map_elem_cb(struct bpf_map *map, __u32 *key, __u64 *val, void *data)
+{
+ return oob_state_machine(data);
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid access to map value, value_size=2 off=32 size=1")
+int unsafe_for_each_map_elem(void *unused)
+{
+ struct num_context loop_ctx = { .i = 0 };
+
+ bpf_for_each_map_elem(&map, for_each_map_elem_cb, &loop_ctx, 0);
+ return choice_arr[loop_ctx.i];
+}
+
+static __u64 ringbuf_drain_cb(struct bpf_dynptr *dynptr, void *data)
+{
+ return oob_state_machine(data);
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid access to map value, value_size=2 off=32 size=1")
+int unsafe_ringbuf_drain(void *unused)
+{
+ struct num_context loop_ctx = { .i = 0 };
+
+ bpf_user_ringbuf_drain(&ringbuf, ringbuf_drain_cb, &loop_ctx, 0);
+ return choice_arr[loop_ctx.i];
+}
+
+static __u64 find_vma_cb(struct task_struct *task, struct vm_area_struct *vma, void *data)
+{
+ return oob_state_machine(data);
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid access to map value, value_size=2 off=32 size=1")
+int unsafe_find_vma(void *unused)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+ struct num_context loop_ctx = { .i = 0 };
+
+ bpf_find_vma(task, 0, find_vma_cb, &loop_ctx, 0);
+ return choice_arr[loop_ctx.i];
+}
+
+static int iter_limit_cb(__u32 idx, struct num_context *ctx)
+{
+ ctx->i++;
+ return 0;
+}
+
+SEC("?raw_tp")
+__success
+int bpf_loop_iter_limit_ok(void *unused)
+{
+ struct num_context ctx = { .i = 0 };
+
+ bpf_loop(1, iter_limit_cb, &ctx, 0);
+ return choice_arr[ctx.i];
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid access to map value, value_size=2 off=2 size=1")
+int bpf_loop_iter_limit_overflow(void *unused)
+{
+ struct num_context ctx = { .i = 0 };
+
+ bpf_loop(2, iter_limit_cb, &ctx, 0);
+ return choice_arr[ctx.i];
+}
+
+static int iter_limit_level2a_cb(__u32 idx, struct num_context *ctx)
+{
+ ctx->i += 100;
+ return 0;
+}
+
+static int iter_limit_level2b_cb(__u32 idx, struct num_context *ctx)
+{
+ ctx->i += 10;
+ return 0;
+}
+
+static int iter_limit_level1_cb(__u32 idx, struct num_context *ctx)
+{
+ ctx->i += 1;
+ bpf_loop(1, iter_limit_level2a_cb, ctx, 0);
+ bpf_loop(1, iter_limit_level2b_cb, ctx, 0);
+ return 0;
+}
+
+/* Check that path visiting every callback function once had been
+ * reached by verifier. Variables 'ctx{1,2}i' below serve as flags,
+ * with each decimal digit corresponding to a callback visit marker.
+ */
+SEC("socket")
+__success __retval(111111)
+int bpf_loop_iter_limit_nested(void *unused)
+{
+ struct num_context ctx1 = { .i = 0 };
+ struct num_context ctx2 = { .i = 0 };
+ __u64 a, b, c;
+
+ bpf_loop(1, iter_limit_level1_cb, &ctx1, 0);
+ bpf_loop(1, iter_limit_level1_cb, &ctx2, 0);
+ a = ctx1.i;
+ b = ctx2.i;
+ /* Force 'ctx1.i' and 'ctx2.i' precise. */
+ c = choice_arr[(a + b) % 2];
+ /* This makes 'c' zero, but neither clang nor verifier know it. */
+ c /= 10;
+ /* Make sure that verifier does not visit 'impossible' states:
+ * enumerate all possible callback visit masks.
+ */
+ if (a != 0 && a != 1 && a != 11 && a != 101 && a != 111 &&
+ b != 0 && b != 1 && b != 11 && b != 101 && b != 111)
+ asm volatile ("r0 /= 0;" ::: "r0");
+ return 1000 * a + b + c;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_ldsx.c b/tools/testing/selftests/bpf/progs/verifier_ldsx.c
index 375525329637..d4427d8e1217 100644
--- a/tools/testing/selftests/bpf/progs/verifier_ldsx.c
+++ b/tools/testing/selftests/bpf/progs/verifier_ldsx.c
@@ -6,7 +6,8 @@
#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
(defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
- defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \
+ defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \
+ defined(__TARGET_ARCH_loongarch)) && \
__clang_major__ >= 18
SEC("socket")
diff --git a/tools/testing/selftests/bpf/progs/verifier_loops1.c b/tools/testing/selftests/bpf/progs/verifier_loops1.c
index 5bc86af80a9a..71735dbf33d4 100644
--- a/tools/testing/selftests/bpf/progs/verifier_loops1.c
+++ b/tools/testing/selftests/bpf/progs/verifier_loops1.c
@@ -75,9 +75,10 @@ l0_%=: r0 += 1; \
" ::: __clobber_all);
}
-SEC("tracepoint")
+SEC("socket")
__description("bounded loop, start in the middle")
-__failure __msg("back-edge")
+__success
+__failure_unpriv __msg_unpriv("back-edge")
__naked void loop_start_in_the_middle(void)
{
asm volatile (" \
@@ -136,7 +137,9 @@ l0_%=: exit; \
SEC("tracepoint")
__description("bounded recursion")
-__failure __msg("back-edge")
+__failure
+/* verifier limitation in detecting max stack depth */
+__msg("the call stack of 8 frames is too deep !")
__naked void bounded_recursion(void)
{
asm volatile (" \
diff --git a/tools/testing/selftests/bpf/progs/verifier_movsx.c b/tools/testing/selftests/bpf/progs/verifier_movsx.c
index b2a04d1179d0..cbb9d6714f53 100644
--- a/tools/testing/selftests/bpf/progs/verifier_movsx.c
+++ b/tools/testing/selftests/bpf/progs/verifier_movsx.c
@@ -6,7 +6,8 @@
#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
(defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
- defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \
+ defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \
+ defined(__TARGET_ARCH_loongarch)) && \
__clang_major__ >= 18
SEC("socket")
diff --git a/tools/testing/selftests/bpf/progs/verifier_precision.c b/tools/testing/selftests/bpf/progs/verifier_precision.c
new file mode 100644
index 000000000000..6b564d4c0986
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_precision.c
@@ -0,0 +1,133 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 SUSE LLC */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r1 = r10")
+__msg("mark_precise: frame0: regs=r2 stack= before 2: (55) if r2 != 0xfffffff8 goto pc+2")
+__msg("mark_precise: frame0: regs=r2 stack= before 1: (87) r2 = -r2")
+__msg("mark_precise: frame0: regs=r2 stack= before 0: (b7) r2 = 8")
+__naked int bpf_neg(void)
+{
+ asm volatile (
+ "r2 = 8;"
+ "r2 = -r2;"
+ "if r2 != -8 goto 1f;"
+ "r1 = r10;"
+ "r1 += r2;"
+ "1:"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r1 = r10")
+__msg("mark_precise: frame0: regs=r2 stack= before 2: (55) if r2 != 0x0 goto pc+2")
+__msg("mark_precise: frame0: regs=r2 stack= before 1: (d4) r2 = le16 r2")
+__msg("mark_precise: frame0: regs=r2 stack= before 0: (b7) r2 = 0")
+__naked int bpf_end_to_le(void)
+{
+ asm volatile (
+ "r2 = 0;"
+ "r2 = le16 r2;"
+ "if r2 != 0 goto 1f;"
+ "r1 = r10;"
+ "r1 += r2;"
+ "1:"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r1 = r10")
+__msg("mark_precise: frame0: regs=r2 stack= before 2: (55) if r2 != 0x0 goto pc+2")
+__msg("mark_precise: frame0: regs=r2 stack= before 1: (dc) r2 = be16 r2")
+__msg("mark_precise: frame0: regs=r2 stack= before 0: (b7) r2 = 0")
+__naked int bpf_end_to_be(void)
+{
+ asm volatile (
+ "r2 = 0;"
+ "r2 = be16 r2;"
+ "if r2 != 0 goto 1f;"
+ "r1 = r10;"
+ "r1 += r2;"
+ "1:"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
+ defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \
+ __clang_major__ >= 18
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r1 = r10")
+__msg("mark_precise: frame0: regs=r2 stack= before 2: (55) if r2 != 0x0 goto pc+2")
+__msg("mark_precise: frame0: regs=r2 stack= before 1: (d7) r2 = bswap16 r2")
+__msg("mark_precise: frame0: regs=r2 stack= before 0: (b7) r2 = 0")
+__naked int bpf_end_bswap(void)
+{
+ asm volatile (
+ "r2 = 0;"
+ "r2 = bswap16 r2;"
+ "if r2 != 0 goto 1f;"
+ "r1 = r10;"
+ "r1 += r2;"
+ "1:"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+#endif /* v4 instruction */
+
+SEC("?raw_tp")
+__success __log_level(2)
+/*
+ * Without the bug fix there will be no history between "last_idx 3 first_idx 3"
+ * and "parent state regs=" lines. "R0_w=6" parts are here to help anchor
+ * expected log messages to the one specific mark_chain_precision operation.
+ *
+ * This is quite fragile: if verifier checkpointing heuristic changes, this
+ * might need adjusting.
+ */
+__msg("2: (07) r0 += 1 ; R0_w=6")
+__msg("3: (35) if r0 >= 0xa goto pc+1")
+__msg("mark_precise: frame0: last_idx 3 first_idx 3 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r0 stack= before 2: (07) r0 += 1")
+__msg("mark_precise: frame0: regs=r0 stack= before 1: (07) r0 += 1")
+__msg("mark_precise: frame0: regs=r0 stack= before 4: (05) goto pc-4")
+__msg("mark_precise: frame0: regs=r0 stack= before 3: (35) if r0 >= 0xa goto pc+1")
+__msg("mark_precise: frame0: parent state regs= stack=: R0_rw=P4")
+__msg("3: R0_w=6")
+__naked int state_loop_first_last_equal(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "l0_%=:"
+ "r0 += 1;"
+ "r0 += 1;"
+ /* every few iterations we'll have a checkpoint here with
+ * first_idx == last_idx, potentially confusing precision
+ * backtracking logic
+ */
+ "if r0 >= 10 goto l1_%=;" /* checkpoint + mark_precise */
+ "goto l0_%=;"
+ "l1_%=:"
+ "exit;"
+ ::: __clobber_common
+ );
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_sdiv.c b/tools/testing/selftests/bpf/progs/verifier_sdiv.c
index 8fc5174808b2..2a2271cf0294 100644
--- a/tools/testing/selftests/bpf/progs/verifier_sdiv.c
+++ b/tools/testing/selftests/bpf/progs/verifier_sdiv.c
@@ -6,7 +6,8 @@
#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
(defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
- defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \
+ defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \
+ defined(__TARGET_ARCH_loongarch)) && \
__clang_major__ >= 18
SEC("socket")
diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
index db6b3143338b..f61d623b1ce8 100644
--- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
+++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
@@ -119,15 +119,41 @@ __naked int global_subprog_result_precise(void)
SEC("?raw_tp")
__success __log_level(2)
+/* First simulated path does not include callback body,
+ * r1 and r4 are always precise for bpf_loop() calls.
+ */
+__msg("9: (85) call bpf_loop#181")
+__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1")
+__msg("mark_precise: frame0: parent state regs=r4 stack=:")
+__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9")
+__msg("mark_precise: frame0: regs=r4 stack= before 8: (b7) r4 = 0")
+__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1")
+__msg("mark_precise: frame0: parent state regs=r1 stack=:")
+__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9")
+__msg("mark_precise: frame0: regs=r1 stack= before 8: (b7) r4 = 0")
+__msg("mark_precise: frame0: regs=r1 stack= before 7: (b7) r3 = 0")
+__msg("mark_precise: frame0: regs=r1 stack= before 6: (bf) r2 = r8")
+__msg("mark_precise: frame0: regs=r1 stack= before 5: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3")
+/* r6 precision propagation */
__msg("14: (0f) r1 += r6")
-__msg("mark_precise: frame0: last_idx 14 first_idx 10")
+__msg("mark_precise: frame0: last_idx 14 first_idx 9")
__msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7")
__msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4")
__msg("mark_precise: frame0: regs=r6 stack= before 11: (25) if r6 > 0x3 goto pc+4")
__msg("mark_precise: frame0: regs=r6 stack= before 10: (bf) r6 = r0")
-__msg("mark_precise: frame0: parent state regs=r0 stack=:")
-__msg("mark_precise: frame0: last_idx 18 first_idx 0")
-__msg("mark_precise: frame0: regs=r0 stack= before 18: (95) exit")
+__msg("mark_precise: frame0: regs=r0 stack= before 9: (85) call bpf_loop")
+/* State entering callback body popped from states stack */
+__msg("from 9 to 17: frame1:")
+__msg("17: frame1: R1=scalar() R2=0 R10=fp0 cb")
+__msg("17: (b7) r0 = 0")
+__msg("18: (95) exit")
+__msg("returning from callee:")
+__msg("to caller at 9:")
+__msg("frame 0: propagating r1,r4")
+__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r1,r4 stack= before 18: (95) exit")
+__msg("from 18 to 9: safe")
__naked int callback_result_precise(void)
{
asm volatile (
@@ -233,20 +259,36 @@ __naked int parent_callee_saved_reg_precise_global(void)
SEC("?raw_tp")
__success __log_level(2)
+/* First simulated path does not include callback body */
__msg("12: (0f) r1 += r6")
-__msg("mark_precise: frame0: last_idx 12 first_idx 10")
+__msg("mark_precise: frame0: last_idx 12 first_idx 9")
__msg("mark_precise: frame0: regs=r6 stack= before 11: (bf) r1 = r7")
__msg("mark_precise: frame0: regs=r6 stack= before 10: (27) r6 *= 4")
+__msg("mark_precise: frame0: regs=r6 stack= before 9: (85) call bpf_loop")
__msg("mark_precise: frame0: parent state regs=r6 stack=:")
-__msg("mark_precise: frame0: last_idx 16 first_idx 0")
-__msg("mark_precise: frame0: regs=r6 stack= before 16: (95) exit")
-__msg("mark_precise: frame1: regs= stack= before 15: (b7) r0 = 0")
-__msg("mark_precise: frame1: regs= stack= before 9: (85) call bpf_loop#181")
+__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9")
__msg("mark_precise: frame0: regs=r6 stack= before 8: (b7) r4 = 0")
__msg("mark_precise: frame0: regs=r6 stack= before 7: (b7) r3 = 0")
__msg("mark_precise: frame0: regs=r6 stack= before 6: (bf) r2 = r8")
__msg("mark_precise: frame0: regs=r6 stack= before 5: (b7) r1 = 1")
__msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3")
+/* State entering callback body popped from states stack */
+__msg("from 9 to 15: frame1:")
+__msg("15: frame1: R1=scalar() R2=0 R10=fp0 cb")
+__msg("15: (b7) r0 = 0")
+__msg("16: (95) exit")
+__msg("returning from callee:")
+__msg("to caller at 9:")
+/* r1, r4 are always precise for bpf_loop(),
+ * r6 was marked before backtracking to callback body.
+ */
+__msg("frame 0: propagating r1,r4,r6")
+__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r1,r4,r6 stack= before 16: (95) exit")
+__msg("mark_precise: frame1: regs= stack= before 15: (b7) r0 = 0")
+__msg("mark_precise: frame1: regs= stack= before 9: (85) call bpf_loop")
+__msg("mark_precise: frame0: parent state regs= stack=:")
+__msg("from 16 to 9: safe")
__naked int parent_callee_saved_reg_precise_with_callback(void)
{
asm volatile (
@@ -373,22 +415,38 @@ __naked int parent_stack_slot_precise_global(void)
SEC("?raw_tp")
__success __log_level(2)
+/* First simulated path does not include callback body */
__msg("14: (0f) r1 += r6")
-__msg("mark_precise: frame0: last_idx 14 first_idx 11")
+__msg("mark_precise: frame0: last_idx 14 first_idx 10")
__msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7")
__msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4")
__msg("mark_precise: frame0: regs=r6 stack= before 11: (79) r6 = *(u64 *)(r10 -8)")
+__msg("mark_precise: frame0: regs= stack=-8 before 10: (85) call bpf_loop")
__msg("mark_precise: frame0: parent state regs= stack=-8:")
-__msg("mark_precise: frame0: last_idx 18 first_idx 0")
-__msg("mark_precise: frame0: regs= stack=-8 before 18: (95) exit")
-__msg("mark_precise: frame1: regs= stack= before 17: (b7) r0 = 0")
-__msg("mark_precise: frame1: regs= stack= before 10: (85) call bpf_loop#181")
+__msg("mark_precise: frame0: last_idx 9 first_idx 0 subseq_idx 10")
__msg("mark_precise: frame0: regs= stack=-8 before 9: (b7) r4 = 0")
__msg("mark_precise: frame0: regs= stack=-8 before 8: (b7) r3 = 0")
__msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r2 = r8")
__msg("mark_precise: frame0: regs= stack=-8 before 6: (bf) r1 = r6")
__msg("mark_precise: frame0: regs= stack=-8 before 5: (7b) *(u64 *)(r10 -8) = r6")
__msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3")
+/* State entering callback body popped from states stack */
+__msg("from 10 to 17: frame1:")
+__msg("17: frame1: R1=scalar() R2=0 R10=fp0 cb")
+__msg("17: (b7) r0 = 0")
+__msg("18: (95) exit")
+__msg("returning from callee:")
+__msg("to caller at 10:")
+/* r1, r4 are always precise for bpf_loop(),
+ * fp-8 was marked before backtracking to callback body.
+ */
+__msg("frame 0: propagating r1,r4,fp-8")
+__msg("mark_precise: frame0: last_idx 10 first_idx 10 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r1,r4 stack=-8 before 18: (95) exit")
+__msg("mark_precise: frame1: regs= stack= before 17: (b7) r0 = 0")
+__msg("mark_precise: frame1: regs= stack= before 10: (85) call bpf_loop#181")
+__msg("mark_precise: frame0: parent state regs= stack=:")
+__msg("from 18 to 10: safe")
__naked int parent_stack_slot_precise_with_callback(void)
{
asm volatile (
diff --git a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
index b2dfd7066c6e..f6d1cc9ad892 100644
--- a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
@@ -21,7 +21,7 @@ extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx,
extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash,
enum xdp_rss_hash_type *rss_type) __ksym;
-SEC("xdp")
+SEC("xdp.frags")
int rx(struct xdp_md *ctx)
{
void *data, *data_meta, *data_end;
diff --git a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
index 07d786329105..80f620602d50 100644
--- a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
+++ b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
@@ -53,6 +53,8 @@
#define DEFAULT_TTL 64
#define MAX_ALLOWED_PORTS 8
+#define MAX_PACKET_OFF 0xffff
+
#define swap(a, b) \
do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
@@ -177,69 +179,82 @@ static __always_inline __u32 tcp_ns_to_ts(__u64 ns)
return ns / (NSEC_PER_SEC / TCP_TS_HZ);
}
-static __always_inline __u32 tcp_time_stamp_raw(void)
+static __always_inline __u32 tcp_clock_ms(void)
{
return tcp_ns_to_ts(tcp_clock_ns());
}
struct tcpopt_context {
- __u8 *ptr;
- __u8 *end;
+ void *data;
void *data_end;
__be32 *tsecr;
__u8 wscale;
bool option_timestamp;
bool option_sack;
+ __u32 off;
};
-static int tscookie_tcpopt_parse(struct tcpopt_context *ctx)
+static __always_inline u8 *next(struct tcpopt_context *ctx, __u32 sz)
{
- __u8 opcode, opsize;
+ __u64 off = ctx->off;
+ __u8 *data;
- if (ctx->ptr >= ctx->end)
- return 1;
- if (ctx->ptr >= ctx->data_end)
- return 1;
+ /* Verifier forbids access to packet when offset exceeds MAX_PACKET_OFF */
+ if (off > MAX_PACKET_OFF - sz)
+ return NULL;
- opcode = ctx->ptr[0];
+ data = ctx->data + off;
+ barrier_var(data);
+ if (data + sz >= ctx->data_end)
+ return NULL;
- if (opcode == TCPOPT_EOL)
- return 1;
- if (opcode == TCPOPT_NOP) {
- ++ctx->ptr;
- return 0;
- }
+ ctx->off += sz;
+ return data;
+}
- if (ctx->ptr + 1 >= ctx->end)
- return 1;
- if (ctx->ptr + 1 >= ctx->data_end)
+static int tscookie_tcpopt_parse(struct tcpopt_context *ctx)
+{
+ __u8 *opcode, *opsize, *wscale, *tsecr;
+ __u32 off = ctx->off;
+
+ opcode = next(ctx, 1);
+ if (!opcode)
return 1;
- opsize = ctx->ptr[1];
- if (opsize < 2)
+
+ if (*opcode == TCPOPT_EOL)
return 1;
+ if (*opcode == TCPOPT_NOP)
+ return 0;
- if (ctx->ptr + opsize > ctx->end)
+ opsize = next(ctx, 1);
+ if (!opsize || *opsize < 2)
return 1;
- switch (opcode) {
+ switch (*opcode) {
case TCPOPT_WINDOW:
- if (opsize == TCPOLEN_WINDOW && ctx->ptr + TCPOLEN_WINDOW <= ctx->data_end)
- ctx->wscale = ctx->ptr[2] < TCP_MAX_WSCALE ? ctx->ptr[2] : TCP_MAX_WSCALE;
+ wscale = next(ctx, 1);
+ if (!wscale)
+ return 1;
+ if (*opsize == TCPOLEN_WINDOW)
+ ctx->wscale = *wscale < TCP_MAX_WSCALE ? *wscale : TCP_MAX_WSCALE;
break;
case TCPOPT_TIMESTAMP:
- if (opsize == TCPOLEN_TIMESTAMP && ctx->ptr + TCPOLEN_TIMESTAMP <= ctx->data_end) {
+ tsecr = next(ctx, 4);
+ if (!tsecr)
+ return 1;
+ if (*opsize == TCPOLEN_TIMESTAMP) {
ctx->option_timestamp = true;
/* Client's tsval becomes our tsecr. */
- *ctx->tsecr = get_unaligned((__be32 *)(ctx->ptr + 2));
+ *ctx->tsecr = get_unaligned((__be32 *)tsecr);
}
break;
case TCPOPT_SACK_PERM:
- if (opsize == TCPOLEN_SACK_PERM)
+ if (*opsize == TCPOLEN_SACK_PERM)
ctx->option_sack = true;
break;
}
- ctx->ptr += opsize;
+ ctx->off = off + *opsize;
return 0;
}
@@ -256,16 +271,21 @@ static int tscookie_tcpopt_parse_batch(__u32 index, void *context)
static __always_inline bool tscookie_init(struct tcphdr *tcp_header,
__u16 tcp_len, __be32 *tsval,
- __be32 *tsecr, void *data_end)
+ __be32 *tsecr, void *data, void *data_end)
{
struct tcpopt_context loop_ctx = {
- .ptr = (__u8 *)(tcp_header + 1),
- .end = (__u8 *)tcp_header + tcp_len,
+ .data = data,
.data_end = data_end,
.tsecr = tsecr,
.wscale = TS_OPT_WSCALE_MASK,
.option_timestamp = false,
.option_sack = false,
+ /* Note: currently verifier would track .off as unbound scalar.
+ * In case if verifier would at some point get smarter and
+ * compute bounded value for this var, beware that it might
+ * hinder bpf_loop() convergence validation.
+ */
+ .off = (__u8 *)(tcp_header + 1) - (__u8 *)data,
};
u32 cookie;
@@ -274,7 +294,7 @@ static __always_inline bool tscookie_init(struct tcphdr *tcp_header,
if (!loop_ctx.option_timestamp)
return false;
- cookie = tcp_time_stamp_raw() & ~TSMASK;
+ cookie = tcp_clock_ms() & ~TSMASK;
cookie |= loop_ctx.wscale & TS_OPT_WSCALE_MASK;
if (loop_ctx.option_sack)
cookie |= TS_OPT_SACK;
@@ -635,7 +655,7 @@ static __always_inline int syncookie_handle_syn(struct header_pointers *hdr,
cookie = (__u32)value;
if (tscookie_init((void *)hdr->tcp, hdr->tcp_len,
- &tsopt_buf[0], &tsopt_buf[1], data_end))
+ &tsopt_buf[0], &tsopt_buf[1], data, data_end))
tsopt = tsopt_buf;
/* Check that there is enough space for a SYNACK. It also covers
diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c
index 37ffa57f28a1..a350ecdfba4a 100644
--- a/tools/testing/selftests/bpf/test_loader.c
+++ b/tools/testing/selftests/bpf/test_loader.c
@@ -153,6 +153,14 @@ static int parse_retval(const char *str, int *val, const char *name)
return parse_int(str, val, name);
}
+static void update_flags(int *flags, int flag, bool clear)
+{
+ if (clear)
+ *flags &= ~flag;
+ else
+ *flags |= flag;
+}
+
/* Uses btf_decl_tag attributes to describe the expected test
* behavior, see bpf_misc.h for detailed description of each attribute
* and attribute combinations.
@@ -171,6 +179,7 @@ static int parse_test_spec(struct test_loader *tester,
memset(spec, 0, sizeof(*spec));
spec->prog_name = bpf_program__name(prog);
+ spec->prog_flags = BPF_F_TEST_REG_INVARIANTS; /* by default be strict */
btf = bpf_object__btf(obj);
if (!btf) {
@@ -187,7 +196,8 @@ static int parse_test_spec(struct test_loader *tester,
for (i = 1; i < btf__type_cnt(btf); i++) {
const char *s, *val, *msg;
const struct btf_type *t;
- int tmp;
+ bool clear;
+ int flags;
t = btf__type_by_id(btf, i);
if (!btf_is_decl_tag(t))
@@ -253,23 +263,30 @@ static int parse_test_spec(struct test_loader *tester,
goto cleanup;
} else if (str_has_pfx(s, TEST_TAG_PROG_FLAGS_PFX)) {
val = s + sizeof(TEST_TAG_PROG_FLAGS_PFX) - 1;
+
+ clear = val[0] == '!';
+ if (clear)
+ val++;
+
if (strcmp(val, "BPF_F_STRICT_ALIGNMENT") == 0) {
- spec->prog_flags |= BPF_F_STRICT_ALIGNMENT;
+ update_flags(&spec->prog_flags, BPF_F_STRICT_ALIGNMENT, clear);
} else if (strcmp(val, "BPF_F_ANY_ALIGNMENT") == 0) {
- spec->prog_flags |= BPF_F_ANY_ALIGNMENT;
+ update_flags(&spec->prog_flags, BPF_F_ANY_ALIGNMENT, clear);
} else if (strcmp(val, "BPF_F_TEST_RND_HI32") == 0) {
- spec->prog_flags |= BPF_F_TEST_RND_HI32;
+ update_flags(&spec->prog_flags, BPF_F_TEST_RND_HI32, clear);
} else if (strcmp(val, "BPF_F_TEST_STATE_FREQ") == 0) {
- spec->prog_flags |= BPF_F_TEST_STATE_FREQ;
+ update_flags(&spec->prog_flags, BPF_F_TEST_STATE_FREQ, clear);
} else if (strcmp(val, "BPF_F_SLEEPABLE") == 0) {
- spec->prog_flags |= BPF_F_SLEEPABLE;
+ update_flags(&spec->prog_flags, BPF_F_SLEEPABLE, clear);
} else if (strcmp(val, "BPF_F_XDP_HAS_FRAGS") == 0) {
- spec->prog_flags |= BPF_F_XDP_HAS_FRAGS;
+ update_flags(&spec->prog_flags, BPF_F_XDP_HAS_FRAGS, clear);
+ } else if (strcmp(val, "BPF_F_TEST_REG_INVARIANTS") == 0) {
+ update_flags(&spec->prog_flags, BPF_F_TEST_REG_INVARIANTS, clear);
} else /* assume numeric value */ {
- err = parse_int(val, &tmp, "test prog flags");
+ err = parse_int(val, &flags, "test prog flags");
if (err)
goto cleanup;
- spec->prog_flags |= tmp;
+ update_flags(&spec->prog_flags, flags, clear);
}
}
}
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 7fc00e423e4d..767e0693df10 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -1396,13 +1396,18 @@ static void test_map_stress(void)
#define MAX_DELAY_US 50000
#define MIN_DELAY_RANGE_US 5000
-static int map_update_retriable(int map_fd, const void *key, const void *value,
- int flags, int attempts)
+static bool retry_for_again_or_busy(int err)
+{
+ return (err == EAGAIN || err == EBUSY);
+}
+
+int map_update_retriable(int map_fd, const void *key, const void *value, int flags, int attempts,
+ retry_for_error_fn need_retry)
{
int delay = rand() % MIN_DELAY_RANGE_US;
while (bpf_map_update_elem(map_fd, key, value, flags)) {
- if (!attempts || (errno != EAGAIN && errno != EBUSY))
+ if (!attempts || !need_retry(errno))
return -errno;
if (delay <= MAX_DELAY_US / 2)
@@ -1445,11 +1450,13 @@ static void test_update_delete(unsigned int fn, void *data)
key = value = i;
if (do_update) {
- err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES);
+ err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES,
+ retry_for_again_or_busy);
if (err)
printf("error %d %d\n", err, errno);
assert(err == 0);
- err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES);
+ err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES,
+ retry_for_again_or_busy);
if (err)
printf("error %d %d\n", err, errno);
assert(err == 0);
diff --git a/tools/testing/selftests/bpf/test_maps.h b/tools/testing/selftests/bpf/test_maps.h
index f6fbca761732..e4ac704a536c 100644
--- a/tools/testing/selftests/bpf/test_maps.h
+++ b/tools/testing/selftests/bpf/test_maps.h
@@ -4,6 +4,7 @@
#include <stdio.h>
#include <stdlib.h>
+#include <stdbool.h>
#define CHECK(condition, tag, format...) ({ \
int __ret = !!(condition); \
@@ -16,4 +17,8 @@
extern int skips;
+typedef bool (*retry_for_error_fn)(int err);
+int map_update_retriable(int map_fd, const void *key, const void *value, int flags, int attempts,
+ retry_for_error_fn need_retry);
+
#endif
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
index 2c89674fc62c..b0068a9d2cfe 100644
--- a/tools/testing/selftests/bpf/test_sock_addr.c
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -679,7 +679,7 @@ static int load_path(const struct sock_addr_test *test, const char *path)
bpf_program__set_type(prog, BPF_PROG_TYPE_CGROUP_SOCK_ADDR);
bpf_program__set_expected_attach_type(prog, test->expected_attach_type);
- bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32);
+ bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS);
err = bpf_object__load(obj);
if (err) {
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 98107e0452d3..f36e41435be7 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -1588,7 +1588,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
if (fixup_skips != skips)
return;
- pflags = BPF_F_TEST_RND_HI32;
+ pflags = BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS;
if (test->flags & F_LOAD_WITH_STRICT_ALIGNMENT)
pflags |= BPF_F_STRICT_ALIGNMENT;
if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)
diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c
index 8d994884c7b4..d2458c1b1671 100644
--- a/tools/testing/selftests/bpf/testing_helpers.c
+++ b/tools/testing/selftests/bpf/testing_helpers.c
@@ -276,7 +276,7 @@ int bpf_prog_test_load(const char *file, enum bpf_prog_type type,
if (type != BPF_PROG_TYPE_UNSPEC && bpf_program__type(prog) != type)
bpf_program__set_type(prog, type);
- flags = bpf_program__flags(prog) | BPF_F_TEST_RND_HI32;
+ flags = bpf_program__flags(prog) | BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS;
bpf_program__set_flags(prog, flags);
err = bpf_object__load(obj);
@@ -299,7 +299,7 @@ int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
{
LIBBPF_OPTS(bpf_prog_load_opts, opts,
.kern_version = kern_version,
- .prog_flags = BPF_F_TEST_RND_HI32,
+ .prog_flags = BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS,
.log_level = extra_prog_load_log_flags,
.log_buf = log_buf,
.log_size = log_buf_sz,
diff --git a/tools/testing/selftests/bpf/unpriv_helpers.c b/tools/testing/selftests/bpf/unpriv_helpers.c
index 2a6efbd0401e..b6d016461fb0 100644
--- a/tools/testing/selftests/bpf/unpriv_helpers.c
+++ b/tools/testing/selftests/bpf/unpriv_helpers.c
@@ -4,9 +4,40 @@
#include <stdlib.h>
#include <error.h>
#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
#include "unpriv_helpers.h"
+static bool get_mitigations_off(void)
+{
+ char cmdline[4096], *c;
+ int fd, ret = false;
+
+ fd = open("/proc/cmdline", O_RDONLY);
+ if (fd < 0) {
+ perror("open /proc/cmdline");
+ return false;
+ }
+
+ if (read(fd, cmdline, sizeof(cmdline) - 1) < 0) {
+ perror("read /proc/cmdline");
+ goto out;
+ }
+
+ cmdline[sizeof(cmdline) - 1] = '\0';
+ for (c = strtok(cmdline, " \n"); c; c = strtok(NULL, " \n")) {
+ if (strncmp(c, "mitigations=off", strlen(c)))
+ continue;
+ ret = true;
+ break;
+ }
+out:
+ close(fd);
+ return ret;
+}
+
bool get_unpriv_disabled(void)
{
bool disabled;
@@ -22,5 +53,5 @@ bool get_unpriv_disabled(void)
disabled = true;
}
- return disabled;
+ return disabled ? true : get_mitigations_off();
}
diff --git a/tools/testing/selftests/bpf/verifier/bpf_st_mem.c b/tools/testing/selftests/bpf/verifier/bpf_st_mem.c
index 3af2501082b2..b616575c3b00 100644
--- a/tools/testing/selftests/bpf/verifier/bpf_st_mem.c
+++ b/tools/testing/selftests/bpf/verifier/bpf_st_mem.c
@@ -65,3 +65,35 @@
.expected_attach_type = BPF_SK_LOOKUP,
.runs = -1,
},
+{
+ "BPF_ST_MEM stack imm sign",
+ /* Check if verifier correctly reasons about sign of an
+ * immediate spilled to stack by BPF_ST instruction.
+ *
+ * fp[-8] = -44;
+ * r0 = fp[-8];
+ * if r0 s< 0 goto ret0;
+ * r0 = -1;
+ * exit;
+ * ret0:
+ * r0 = 0;
+ * exit;
+ */
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, -44),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+ BPF_JMP_IMM(BPF_JSLT, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, -1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ /* Use prog type that requires return value in range [0, 1] */
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+ .result = VERBOSE_ACCEPT,
+ .runs = -1,
+ .errstr = "0: (7a) *(u64 *)(r10 -8) = -44 ; R10=fp0 fp-8_w=-44\
+ 2: (c5) if r0 s< 0x0 goto pc+2\
+ R0_w=-44",
+},
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 1bdf2b43e49e..3d5cd51071f0 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -442,7 +442,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
- .errstr = "back-edge from insn 0 to 0",
+ .errstr = "the call stack of 9 frames is too deep",
.result = REJECT,
},
{
@@ -799,7 +799,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
- .errstr = "back-edge",
+ .errstr = "the call stack of 9 frames is too deep",
.result = REJECT,
},
{
@@ -811,7 +811,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
- .errstr = "back-edge",
+ .errstr = "the call stack of 9 frames is too deep",
.result = REJECT,
},
{
diff --git a/tools/testing/selftests/bpf/verifier/ld_imm64.c b/tools/testing/selftests/bpf/verifier/ld_imm64.c
index f9297900cea6..78f19c255f20 100644
--- a/tools/testing/selftests/bpf/verifier/ld_imm64.c
+++ b/tools/testing/selftests/bpf/verifier/ld_imm64.c
@@ -9,8 +9,8 @@
BPF_MOV64_IMM(BPF_REG_0, 2),
BPF_EXIT_INSN(),
},
- .errstr = "invalid BPF_LD_IMM insn",
- .errstr_unpriv = "R1 pointer comparison",
+ .errstr = "jump into the middle of ldimm64 insn 1",
+ .errstr_unpriv = "jump into the middle of ldimm64 insn 1",
.result = REJECT,
},
{
@@ -23,8 +23,8 @@
BPF_LD_IMM64(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- .errstr = "invalid BPF_LD_IMM insn",
- .errstr_unpriv = "R1 pointer comparison",
+ .errstr = "jump into the middle of ldimm64 insn 1",
+ .errstr_unpriv = "jump into the middle of ldimm64 insn 1",
.result = REJECT,
},
{
diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index 655095810d4a..1d418d66e375 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -18,6 +18,7 @@
#include <libelf.h>
#include <gelf.h>
#include <float.h>
+#include <math.h>
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
@@ -99,6 +100,7 @@ struct stat_specs {
enum stat_id ids[ALL_STATS_CNT];
enum stat_variant variants[ALL_STATS_CNT];
bool asc[ALL_STATS_CNT];
+ bool abs[ALL_STATS_CNT];
int lens[ALL_STATS_CNT * 3]; /* 3x for comparison mode */
};
@@ -133,6 +135,7 @@ struct filter {
int stat_id;
enum stat_variant stat_var;
long value;
+ bool abs;
};
static struct env {
@@ -142,10 +145,12 @@ static struct env {
bool debug;
bool quiet;
bool force_checkpoints;
+ bool force_reg_invariants;
enum resfmt out_fmt;
bool show_version;
bool comparison_mode;
bool replay_mode;
+ int top_n;
int log_level;
int log_size;
@@ -210,8 +215,7 @@ static const struct argp_option opts[] = {
{ "log-level", 'l', "LEVEL", 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode)" },
{ "log-fixed", OPT_LOG_FIXED, NULL, 0, "Disable verifier log rotation" },
{ "log-size", OPT_LOG_SIZE, "BYTES", 0, "Customize verifier log size (default to 16MB)" },
- { "test-states", 't', NULL, 0,
- "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" },
+ { "top-n", 'n', "N", 0, "Emit only up to first N results." },
{ "quiet", 'q', NULL, 0, "Quiet mode" },
{ "emit", 'e', "SPEC", 0, "Specify stats to be emitted" },
{ "sort", 's', "SPEC", 0, "Specify sort order" },
@@ -219,6 +223,10 @@ static const struct argp_option opts[] = {
{ "compare", 'C', NULL, 0, "Comparison mode" },
{ "replay", 'R', NULL, 0, "Replay mode" },
{ "filter", 'f', "FILTER", 0, "Filter expressions (or @filename for file with expressions)." },
+ { "test-states", 't', NULL, 0,
+ "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" },
+ { "test-reg-invariants", 'r', NULL, 0,
+ "Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" },
{},
};
@@ -290,6 +298,16 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
case 't':
env.force_checkpoints = true;
break;
+ case 'r':
+ env.force_reg_invariants = true;
+ break;
+ case 'n':
+ errno = 0;
+ env.top_n = strtol(arg, NULL, 10);
+ if (errno) {
+ fprintf(stderr, "invalid top N specifier: %s\n", arg);
+ argp_usage(state);
+ }
case 'C':
env.comparison_mode = true;
break;
@@ -455,7 +473,8 @@ static struct {
{ OP_EQ, "=" },
};
-static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_variant *var);
+static bool parse_stat_id_var(const char *name, size_t len, int *id,
+ enum stat_variant *var, bool *is_abs);
static int append_filter(struct filter **filters, int *cnt, const char *str)
{
@@ -488,13 +507,14 @@ static int append_filter(struct filter **filters, int *cnt, const char *str)
long val;
const char *end = str;
const char *op_str;
+ bool is_abs;
op_str = operators[i].op_str;
p = strstr(str, op_str);
if (!p)
continue;
- if (!parse_stat_id_var(str, p - str, &id, &var)) {
+ if (!parse_stat_id_var(str, p - str, &id, &var, &is_abs)) {
fprintf(stderr, "Unrecognized stat name in '%s'!\n", str);
return -EINVAL;
}
@@ -533,6 +553,7 @@ static int append_filter(struct filter **filters, int *cnt, const char *str)
f->stat_id = id;
f->stat_var = var;
f->op = operators[i].op_kind;
+ f->abs = true;
f->value = val;
*cnt += 1;
@@ -657,7 +678,8 @@ static struct stat_def {
[MARK_READ_MAX_LEN] = { "Max mark read length", {"max_mark_read_len", "mark_read"}, },
};
-static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_variant *var)
+static bool parse_stat_id_var(const char *name, size_t len, int *id,
+ enum stat_variant *var, bool *is_abs)
{
static const char *var_sfxs[] = {
[VARIANT_A] = "_a",
@@ -667,6 +689,14 @@ static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_v
};
int i, j, k;
+ /* |<stat>| means we take absolute value of given stat */
+ *is_abs = false;
+ if (len > 2 && name[0] == '|' && name[len - 1] == '|') {
+ *is_abs = true;
+ name += 1;
+ len -= 2;
+ }
+
for (i = 0; i < ARRAY_SIZE(stat_defs); i++) {
struct stat_def *def = &stat_defs[i];
size_t alias_len, sfx_len;
@@ -722,7 +752,7 @@ static bool is_desc_sym(char c)
static int parse_stat(const char *stat_name, struct stat_specs *specs)
{
int id;
- bool has_order = false, is_asc = false;
+ bool has_order = false, is_asc = false, is_abs = false;
size_t len = strlen(stat_name);
enum stat_variant var;
@@ -737,7 +767,7 @@ static int parse_stat(const char *stat_name, struct stat_specs *specs)
len -= 1;
}
- if (!parse_stat_id_var(stat_name, len, &id, &var)) {
+ if (!parse_stat_id_var(stat_name, len, &id, &var, &is_abs)) {
fprintf(stderr, "Unrecognized stat name '%s'\n", stat_name);
return -ESRCH;
}
@@ -745,6 +775,7 @@ static int parse_stat(const char *stat_name, struct stat_specs *specs)
specs->ids[specs->spec_cnt] = id;
specs->variants[specs->spec_cnt] = var;
specs->asc[specs->spec_cnt] = has_order ? is_asc : stat_defs[id].asc_by_default;
+ specs->abs[specs->spec_cnt] = is_abs;
specs->spec_cnt++;
return 0;
@@ -997,6 +1028,8 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
if (env.force_checkpoints)
bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_STATE_FREQ);
+ if (env.force_reg_invariants)
+ bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_REG_INVARIANTS);
err = bpf_object__load(obj);
env.progs_processed++;
@@ -1103,7 +1136,7 @@ cleanup:
}
static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2,
- enum stat_id id, bool asc)
+ enum stat_id id, bool asc, bool abs)
{
int cmp = 0;
@@ -1124,6 +1157,11 @@ static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2,
long v1 = s1->stats[id];
long v2 = s2->stats[id];
+ if (abs) {
+ v1 = v1 < 0 ? -v1 : v1;
+ v2 = v2 < 0 ? -v2 : v2;
+ }
+
if (v1 != v2)
cmp = v1 < v2 ? -1 : 1;
break;
@@ -1142,7 +1180,8 @@ static int cmp_prog_stats(const void *v1, const void *v2)
int i, cmp;
for (i = 0; i < env.sort_spec.spec_cnt; i++) {
- cmp = cmp_stat(s1, s2, env.sort_spec.ids[i], env.sort_spec.asc[i]);
+ cmp = cmp_stat(s1, s2, env.sort_spec.ids[i],
+ env.sort_spec.asc[i], env.sort_spec.abs[i]);
if (cmp != 0)
return cmp;
}
@@ -1211,7 +1250,8 @@ static void fetch_join_stat_value(const struct verif_stats_join *s,
static int cmp_join_stat(const struct verif_stats_join *s1,
const struct verif_stats_join *s2,
- enum stat_id id, enum stat_variant var, bool asc)
+ enum stat_id id, enum stat_variant var,
+ bool asc, bool abs)
{
const char *str1 = NULL, *str2 = NULL;
double v1, v2;
@@ -1220,6 +1260,11 @@ static int cmp_join_stat(const struct verif_stats_join *s1,
fetch_join_stat_value(s1, id, var, &str1, &v1);
fetch_join_stat_value(s2, id, var, &str2, &v2);
+ if (abs) {
+ v1 = fabs(v1);
+ v2 = fabs(v2);
+ }
+
if (str1)
cmp = strcmp(str1, str2);
else if (v1 != v2)
@@ -1237,7 +1282,8 @@ static int cmp_join_stats(const void *v1, const void *v2)
cmp = cmp_join_stat(s1, s2,
env.sort_spec.ids[i],
env.sort_spec.variants[i],
- env.sort_spec.asc[i]);
+ env.sort_spec.asc[i],
+ env.sort_spec.abs[i]);
if (cmp != 0)
return cmp;
}
@@ -1720,6 +1766,9 @@ static bool is_join_stat_filter_matched(struct filter *f, const struct verif_sta
fetch_join_stat_value(stats, f->stat_id, f->stat_var, &str, &value);
+ if (f->abs)
+ value = fabs(value);
+
switch (f->op) {
case OP_EQ: return value > f->value - eps && value < f->value + eps;
case OP_NEQ: return value < f->value - eps || value > f->value + eps;
@@ -1766,7 +1815,7 @@ static int handle_comparison_mode(void)
struct stat_specs base_specs = {}, comp_specs = {};
struct stat_specs tmp_sort_spec;
enum resfmt cur_fmt;
- int err, i, j, last_idx;
+ int err, i, j, last_idx, cnt;
if (env.filename_cnt != 2) {
fprintf(stderr, "Comparison mode expects exactly two input CSV files!\n\n");
@@ -1879,7 +1928,7 @@ static int handle_comparison_mode(void)
env.join_stat_cnt += 1;
}
- /* now sort joined results accorsing to sort spec */
+ /* now sort joined results according to sort spec */
qsort(env.join_stats, env.join_stat_cnt, sizeof(*env.join_stats), cmp_join_stats);
/* for human-readable table output we need to do extra pass to
@@ -1896,16 +1945,22 @@ one_more_time:
output_comp_headers(cur_fmt);
last_idx = -1;
+ cnt = 0;
for (i = 0; i < env.join_stat_cnt; i++) {
const struct verif_stats_join *join = &env.join_stats[i];
if (!should_output_join_stats(join))
continue;
+ if (env.top_n && cnt >= env.top_n)
+ break;
+
if (cur_fmt == RESFMT_TABLE_CALCLEN)
last_idx = i;
output_comp_stats(join, cur_fmt, i == last_idx);
+
+ cnt++;
}
if (cur_fmt == RESFMT_TABLE_CALCLEN) {
@@ -1920,6 +1975,9 @@ static bool is_stat_filter_matched(struct filter *f, const struct verif_stats *s
{
long value = stats->stats[f->stat_id];
+ if (f->abs)
+ value = value < 0 ? -value : value;
+
switch (f->op) {
case OP_EQ: return value == f->value;
case OP_NEQ: return value != f->value;
@@ -1964,7 +2022,7 @@ static bool should_output_stats(const struct verif_stats *stats)
static void output_prog_stats(void)
{
const struct verif_stats *stats;
- int i, last_stat_idx = 0;
+ int i, last_stat_idx = 0, cnt = 0;
if (env.out_fmt == RESFMT_TABLE) {
/* calculate column widths */
@@ -1984,7 +2042,10 @@ static void output_prog_stats(void)
stats = &env.prog_stats[i];
if (!should_output_stats(stats))
continue;
+ if (env.top_n && cnt >= env.top_n)
+ break;
output_stats(stats, env.out_fmt, i == last_stat_idx);
+ cnt++;
}
}
diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh
index 685034528018..65d14f3bbe30 100755
--- a/tools/testing/selftests/bpf/vmtest.sh
+++ b/tools/testing/selftests/bpf/vmtest.sh
@@ -36,7 +36,9 @@ DEFAULT_COMMAND="./test_progs"
MOUNT_DIR="mnt"
ROOTFS_IMAGE="root.img"
OUTPUT_DIR="$HOME/.bpf_selftests"
-KCONFIG_REL_PATHS=("tools/testing/selftests/bpf/config" "tools/testing/selftests/bpf/config.${ARCH}")
+KCONFIG_REL_PATHS=("tools/testing/selftests/bpf/config"
+ "tools/testing/selftests/bpf/config.vm"
+ "tools/testing/selftests/bpf/config.${ARCH}")
INDEX_URL="https://raw.githubusercontent.com/libbpf/ci/master/INDEX"
NUM_COMPILE_JOBS="$(nproc)"
LOG_FILE_BASE="$(date +"bpf_selftests.%Y-%m-%d_%H-%M-%S")"
diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
index 17c980138796..c3ba40d0b9de 100644
--- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
@@ -26,6 +26,7 @@
#include <linux/sockios.h>
#include <sys/mman.h>
#include <net/if.h>
+#include <ctype.h>
#include <poll.h>
#include <time.h>
@@ -47,6 +48,7 @@ struct xsk {
};
struct xdp_hw_metadata *bpf_obj;
+__u16 bind_flags = XDP_COPY;
struct xsk *rx_xsk;
const char *ifname;
int ifindex;
@@ -60,7 +62,7 @@ static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id)
const struct xsk_socket_config socket_config = {
.rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
- .bind_flags = XDP_COPY,
+ .bind_flags = bind_flags,
};
const struct xsk_umem_config umem_config = {
.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
@@ -263,11 +265,14 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t
verify_skb_metadata(server_fd);
for (i = 0; i < rxq; i++) {
+ bool first_seg = true;
+ bool is_eop = true;
+
if (fds[i].revents == 0)
continue;
struct xsk *xsk = &rx_xsk[i];
-
+peek:
ret = xsk_ring_cons__peek(&xsk->rx, 1, &idx);
printf("xsk_ring_cons__peek: %d\n", ret);
if (ret != 1)
@@ -276,12 +281,19 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t
rx_desc = xsk_ring_cons__rx_desc(&xsk->rx, idx);
comp_addr = xsk_umem__extract_addr(rx_desc->addr);
addr = xsk_umem__add_offset_to_addr(rx_desc->addr);
- printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx\n",
- xsk, idx, rx_desc->addr, addr, comp_addr);
- verify_xdp_metadata(xsk_umem__get_data(xsk->umem_area, addr),
- clock_id);
+ is_eop = !(rx_desc->options & XDP_PKT_CONTD);
+ printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx%s\n",
+ xsk, idx, rx_desc->addr, addr, comp_addr, is_eop ? " EoP" : "");
+ if (first_seg) {
+ verify_xdp_metadata(xsk_umem__get_data(xsk->umem_area, addr),
+ clock_id);
+ first_seg = false;
+ }
+
xsk_ring_cons__release(&xsk->rx, 1);
refill_rx(xsk, comp_addr);
+ if (!is_eop)
+ goto peek;
}
}
@@ -404,6 +416,53 @@ static void timestamping_enable(int fd, int val)
error(1, errno, "setsockopt(SO_TIMESTAMPING)");
}
+static void print_usage(void)
+{
+ const char *usage =
+ "Usage: xdp_hw_metadata [OPTIONS] [IFNAME]\n"
+ " -m Enable multi-buffer XDP for larger MTU\n"
+ " -h Display this help and exit\n\n"
+ "Generate test packets on the other machine with:\n"
+ " echo -n xdp | nc -u -q1 <dst_ip> 9091\n";
+
+ printf("%s", usage);
+}
+
+static void read_args(int argc, char *argv[])
+{
+ int opt;
+
+ while ((opt = getopt(argc, argv, "mh")) != -1) {
+ switch (opt) {
+ case 'm':
+ bind_flags |= XDP_USE_SG;
+ break;
+ case 'h':
+ print_usage();
+ exit(0);
+ case '?':
+ if (isprint(optopt))
+ fprintf(stderr, "Unknown option: -%c\n", optopt);
+ fallthrough;
+ default:
+ print_usage();
+ error(-1, opterr, "Command line options error");
+ }
+ }
+
+ if (optind >= argc) {
+ fprintf(stderr, "No device name provided\n");
+ print_usage();
+ exit(-1);
+ }
+
+ ifname = argv[optind];
+ ifindex = if_nametoindex(ifname);
+
+ if (!ifname)
+ error(-1, errno, "Invalid interface name");
+}
+
int main(int argc, char *argv[])
{
clockid_t clock_id = CLOCK_TAI;
@@ -413,13 +472,8 @@ int main(int argc, char *argv[])
struct bpf_program *prog;
- if (argc != 2) {
- fprintf(stderr, "pass device name\n");
- return -1;
- }
+ read_args(argc, argv);
- ifname = argv[1];
- ifindex = if_nametoindex(ifname);
rxq = rxq_num(ifname);
printf("rxq: %d\n", rxq);
diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index 591ca9637b23..b604c570309a 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -908,8 +908,9 @@ static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr)
struct xdp_info *meta = data - sizeof(struct xdp_info);
if (meta->count != pkt->pkt_nb) {
- ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%d]\n",
- __func__, pkt->pkt_nb, meta->count);
+ ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%llu]\n",
+ __func__, pkt->pkt_nb,
+ (unsigned long long)meta->count);
return false;
}
@@ -926,11 +927,13 @@ static bool is_frag_valid(struct xsk_umem_info *umem, u64 addr, u32 len, u32 exp
if (addr >= umem->num_frames * umem->frame_size ||
addr + len > umem->num_frames * umem->frame_size) {
- ksft_print_msg("Frag invalid addr: %llx len: %u\n", addr, len);
+ ksft_print_msg("Frag invalid addr: %llx len: %u\n",
+ (unsigned long long)addr, len);
return false;
}
if (!umem->unaligned_mode && addr % umem->frame_size + len > umem->frame_size) {
- ksft_print_msg("Frag crosses frame boundary addr: %llx len: %u\n", addr, len);
+ ksft_print_msg("Frag crosses frame boundary addr: %llx len: %u\n",
+ (unsigned long long)addr, len);
return false;
}
@@ -1029,7 +1032,8 @@ static int complete_pkts(struct xsk_socket_info *xsk, int batch_size)
u64 addr = *xsk_ring_cons__comp_addr(&xsk->umem->cq, idx + rcvd - 1);
ksft_print_msg("[%s] Too many packets completed\n", __func__);
- ksft_print_msg("Last completion address: %llx\n", addr);
+ ksft_print_msg("Last completion address: %llx\n",
+ (unsigned long long)addr);
return TEST_FAILURE;
}
@@ -1513,8 +1517,9 @@ static int validate_tx_invalid_descs(struct ifobject *ifobject)
}
if (stats.tx_invalid_descs != ifobject->xsk->pkt_stream->nb_pkts / 2) {
- ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%u] expected [%u]\n",
- __func__, stats.tx_invalid_descs,
+ ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%llu] expected [%u]\n",
+ __func__,
+ (unsigned long long)stats.tx_invalid_descs,
ifobject->xsk->pkt_stream->nb_pkts);
return TEST_FAILURE;
}
diff --git a/tools/testing/selftests/cachestat/test_cachestat.c b/tools/testing/selftests/cachestat/test_cachestat.c
index 4804c7dc7b31..b171fd53b004 100644
--- a/tools/testing/selftests/cachestat/test_cachestat.c
+++ b/tools/testing/selftests/cachestat/test_cachestat.c
@@ -27,7 +27,7 @@ static const char * const dev_files[] = {
void print_cachestat(struct cachestat *cs)
{
ksft_print_msg(
- "Using cachestat: Cached: %lu, Dirty: %lu, Writeback: %lu, Evicted: %lu, Recently Evicted: %lu\n",
+ "Using cachestat: Cached: %llu, Dirty: %llu, Writeback: %llu, Evicted: %llu, Recently Evicted: %llu\n",
cs->nr_cache, cs->nr_dirty, cs->nr_writeback,
cs->nr_evicted, cs->nr_recently_evicted);
}
diff --git a/tools/testing/selftests/capabilities/Makefile b/tools/testing/selftests/capabilities/Makefile
index 6e9d98d457d5..411ac098308f 100644
--- a/tools/testing/selftests/capabilities/Makefile
+++ b/tools/testing/selftests/capabilities/Makefile
@@ -2,7 +2,7 @@
TEST_GEN_FILES := validate_cap
TEST_GEN_PROGS := test_execve
-CFLAGS += -O2 -g -std=gnu99 -Wall
+CFLAGS += -O2 -g -std=gnu99 -Wall $(KHDR_INCLUDES)
LDLIBS += -lcap-ng -lrt -ldl
include ../lib.mk
diff --git a/tools/testing/selftests/capabilities/test_execve.c b/tools/testing/selftests/capabilities/test_execve.c
index df0ef02b4036..e3a352b020a7 100644
--- a/tools/testing/selftests/capabilities/test_execve.c
+++ b/tools/testing/selftests/capabilities/test_execve.c
@@ -20,14 +20,6 @@
#include "../kselftest.h"
-#ifndef PR_CAP_AMBIENT
-#define PR_CAP_AMBIENT 47
-# define PR_CAP_AMBIENT_IS_SET 1
-# define PR_CAP_AMBIENT_RAISE 2
-# define PR_CAP_AMBIENT_LOWER 3
-# define PR_CAP_AMBIENT_CLEAR_ALL 4
-#endif
-
static int nerrs;
static pid_t mpid; /* main() pid is used to avoid duplicate test counts */
diff --git a/tools/testing/selftests/capabilities/validate_cap.c b/tools/testing/selftests/capabilities/validate_cap.c
index cdfc94268fe6..60b4e7b716a7 100644
--- a/tools/testing/selftests/capabilities/validate_cap.c
+++ b/tools/testing/selftests/capabilities/validate_cap.c
@@ -9,14 +9,6 @@
#include "../kselftest.h"
-#ifndef PR_CAP_AMBIENT
-#define PR_CAP_AMBIENT 47
-# define PR_CAP_AMBIENT_IS_SET 1
-# define PR_CAP_AMBIENT_RAISE 2
-# define PR_CAP_AMBIENT_LOWER 3
-# define PR_CAP_AMBIENT_CLEAR_ALL 4
-#endif
-
#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 19)
# define HAVE_GETAUXVAL
#endif
diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore
index af8c3f30b9c1..2732e0b29271 100644
--- a/tools/testing/selftests/cgroup/.gitignore
+++ b/tools/testing/selftests/cgroup/.gitignore
@@ -7,4 +7,5 @@ test_kill
test_cpu
test_cpuset
test_zswap
+test_hugetlb_memcg
wait_inotify
diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile
index c27f05f6ce9b..00b441928909 100644
--- a/tools/testing/selftests/cgroup/Makefile
+++ b/tools/testing/selftests/cgroup/Makefile
@@ -14,6 +14,7 @@ TEST_GEN_PROGS += test_kill
TEST_GEN_PROGS += test_cpu
TEST_GEN_PROGS += test_cpuset
TEST_GEN_PROGS += test_zswap
+TEST_GEN_PROGS += test_hugetlb_memcg
LOCAL_HDRS += $(selfdir)/clone3/clone3_selftests.h $(selfdir)/pidfd/pidfd.h
@@ -27,3 +28,4 @@ $(OUTPUT)/test_kill: cgroup_util.c
$(OUTPUT)/test_cpu: cgroup_util.c
$(OUTPUT)/test_cpuset: cgroup_util.c
$(OUTPUT)/test_zswap: cgroup_util.c
+$(OUTPUT)/test_hugetlb_memcg: cgroup_util.c
diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh
index 4afb132e4e4f..a6e9848189d6 100755
--- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh
+++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh
@@ -3,7 +3,7 @@
#
# Test for cpuset v2 partition root state (PRS)
#
-# The sched verbose flag is set, if available, so that the console log
+# The sched verbose flag can be optionally set so that the console log
# can be examined for the correct setting of scheduling domain.
#
@@ -22,27 +22,27 @@ WAIT_INOTIFY=$(cd $(dirname $0); pwd)/wait_inotify
# Find cgroup v2 mount point
CGROUP2=$(mount -t cgroup2 | head -1 | awk -e '{print $3}')
[[ -n "$CGROUP2" ]] || skip_test "Cgroup v2 mount point not found!"
+SUBPARTS_CPUS=$CGROUP2/.__DEBUG__.cpuset.cpus.subpartitions
+CPULIST=$(cat $CGROUP2/cpuset.cpus.effective)
-CPUS=$(lscpu | grep "^CPU(s):" | sed -e "s/.*:[[:space:]]*//")
-[[ $CPUS -lt 8 ]] && skip_test "Test needs at least 8 cpus available!"
+NR_CPUS=$(lscpu | grep "^CPU(s):" | sed -e "s/.*:[[:space:]]*//")
+[[ $NR_CPUS -lt 8 ]] && skip_test "Test needs at least 8 cpus available!"
# Set verbose flag and delay factor
PROG=$1
-VERBOSE=
+VERBOSE=0
DELAY_FACTOR=1
SCHED_DEBUG=
while [[ "$1" = -* ]]
do
case "$1" in
- -v) VERBOSE=1
+ -v) ((VERBOSE++))
# Enable sched/verbose can slow thing down
[[ $DELAY_FACTOR -eq 1 ]] &&
DELAY_FACTOR=2
- break
;;
-d) DELAY_FACTOR=$2
shift
- break
;;
*) echo "Usage: $PROG [-v] [-d <delay-factor>"
exit
@@ -52,7 +52,7 @@ do
done
# Set sched verbose flag if available when "-v" option is specified
-if [[ -n "$VERBOSE" && -d /sys/kernel/debug/sched ]]
+if [[ $VERBOSE -gt 0 && -d /sys/kernel/debug/sched ]]
then
# Used to restore the original setting during cleanup
SCHED_DEBUG=$(cat /sys/kernel/debug/sched/verbose)
@@ -61,14 +61,26 @@ fi
cd $CGROUP2
echo +cpuset > cgroup.subtree_control
+
+#
+# If cpuset has been set up and used in child cgroups, we may not be able to
+# create partition under root cgroup because of the CPU exclusivity rule.
+# So we are going to skip the test if this is the case.
+#
[[ -d test ]] || mkdir test
-cd test
+echo 0-6 > test/cpuset.cpus
+echo root > test/cpuset.cpus.partition
+cat test/cpuset.cpus.partition | grep -q invalid
+RESULT=$?
+echo member > test/cpuset.cpus.partition
+echo "" > test/cpuset.cpus
+[[ $RESULT -eq 0 ]] && skip_test "Child cgroups are using cpuset!"
cleanup()
{
online_cpus
+ cd $CGROUP2
rmdir A1/A2/A3 A1/A2 A1 B1 > /dev/null 2>&1
- cd ..
rmdir test > /dev/null 2>&1
[[ -n "$SCHED_DEBUG" ]] &&
echo "$SCHED_DEBUG" > /sys/kernel/debug/sched/verbose
@@ -103,7 +115,7 @@ test_partition()
[[ $? -eq 0 ]] || exit 1
ACTUAL_VAL=$(cat cpuset.cpus.partition)
[[ $ACTUAL_VAL != $EXPECTED_VAL ]] && {
- echo "cpuset.cpus.partition: expect $EXPECTED_VAL, found $EXPECTED_VAL"
+ echo "cpuset.cpus.partition: expect $EXPECTED_VAL, found $ACTUAL_VAL"
echo "Test FAILED"
exit 1
}
@@ -114,7 +126,7 @@ test_effective_cpus()
EXPECTED_VAL=$1
ACTUAL_VAL=$(cat cpuset.cpus.effective)
[[ "$ACTUAL_VAL" != "$EXPECTED_VAL" ]] && {
- echo "cpuset.cpus.effective: expect '$EXPECTED_VAL', found '$EXPECTED_VAL'"
+ echo "cpuset.cpus.effective: expect '$EXPECTED_VAL', found '$ACTUAL_VAL'"
echo "Test FAILED"
exit 1
}
@@ -139,6 +151,7 @@ test_add_proc()
#
test_isolated()
{
+ cd $CGROUP2/test
echo 2-3 > cpuset.cpus
TYPE=$(cat cpuset.cpus.partition)
[[ $TYPE = member ]] || echo member > cpuset.cpus.partition
@@ -203,125 +216,220 @@ test_isolated()
#
# Cgroup test hierarchy
#
-# test -- A1 -- A2 -- A3
-# \- B1
+# root -- A1 -- A2 -- A3
+# +- B1
#
-# P<v> = set cpus.partition (0:member, 1:root, 2:isolated, -1:root invalid)
-# C<l> = add cpu-list
+# P<v> = set cpus.partition (0:member, 1:root, 2:isolated)
+# C<l> = add cpu-list to cpuset.cpus
+# X<l> = add cpu-list to cpuset.cpus.exclusive
# S<p> = use prefix in subtree_control
# T = put a task into cgroup
-# O<c>-<v> = Write <v> to CPU online file of <c>
+# O<c>=<v> = Write <v> to CPU online file of <c>
#
SETUP_A123_PARTITIONS="C1-3:P1:S+ C2-3:P1:S+ C3:P1"
TEST_MATRIX=(
- # test old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate
- # ---- ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------
- " S+ C0-1 . . C2-3 S+ C4-5 . . 0 A2:0-1"
- " S+ C0-1 . . C2-3 P1 . . . 0 "
- " S+ C0-1 . . C2-3 P1:S+ C0-1:P1 . . 0 "
- " S+ C0-1 . . C2-3 P1:S+ C1:P1 . . 0 "
- " S+ C0-1:S+ . . C2-3 . . . P1 0 "
- " S+ C0-1:P1 . . C2-3 S+ C1 . . 0 "
- " S+ C0-1:P1 . . C2-3 S+ C1:P1 . . 0 "
- " S+ C0-1:P1 . . C2-3 S+ C1:P1 . P1 0 "
- " S+ C0-1:P1 . . C2-3 C4-5 . . . 0 A1:4-5"
- " S+ C0-1:P1 . . C2-3 S+:C4-5 . . . 0 A1:4-5"
- " S+ C0-1 . . C2-3:P1 . . . C2 0 "
- " S+ C0-1 . . C2-3:P1 . . . C4-5 0 B1:4-5"
- " S+ C0-3:P1:S+ C2-3:P1 . . . . . . 0 A1:0-1,A2:2-3"
- " S+ C0-3:P1:S+ C2-3:P1 . . C1-3 . . . 0 A1:1,A2:2-3"
- " S+ C2-3:P1:S+ C3:P1 . . C3 . . . 0 A1:,A2:3 A1:P1,A2:P1"
- " S+ C2-3:P1:S+ C3:P1 . . C3 P0 . . 0 A1:3,A2:3 A1:P1,A2:P0"
- " S+ C2-3:P1:S+ C2:P1 . . C2-4 . . . 0 A1:3-4,A2:2"
- " S+ C2-3:P1:S+ C3:P1 . . C3 . . C0-2 0 A1:,B1:0-2 A1:P1,A2:P1"
- " S+ $SETUP_A123_PARTITIONS . C2-3 . . . 0 A1:,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
+ # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS
+ # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ --------
+ " C0-1 . . C2-3 S+ C4-5 . . 0 A2:0-1"
+ " C0-1 . . C2-3 P1 . . . 0 "
+ " C0-1 . . C2-3 P1:S+ C0-1:P1 . . 0 "
+ " C0-1 . . C2-3 P1:S+ C1:P1 . . 0 "
+ " C0-1:S+ . . C2-3 . . . P1 0 "
+ " C0-1:P1 . . C2-3 S+ C1 . . 0 "
+ " C0-1:P1 . . C2-3 S+ C1:P1 . . 0 "
+ " C0-1:P1 . . C2-3 S+ C1:P1 . P1 0 "
+ " C0-1:P1 . . C2-3 C4-5 . . . 0 A1:4-5"
+ " C0-1:P1 . . C2-3 S+:C4-5 . . . 0 A1:4-5"
+ " C0-1 . . C2-3:P1 . . . C2 0 "
+ " C0-1 . . C2-3:P1 . . . C4-5 0 B1:4-5"
+ "C0-3:P1:S+ C2-3:P1 . . . . . . 0 A1:0-1,A2:2-3"
+ "C0-3:P1:S+ C2-3:P1 . . C1-3 . . . 0 A1:1,A2:2-3"
+ "C2-3:P1:S+ C3:P1 . . C3 . . . 0 A1:,A2:3 A1:P1,A2:P1"
+ "C2-3:P1:S+ C3:P1 . . C3 P0 . . 0 A1:3,A2:3 A1:P1,A2:P0"
+ "C2-3:P1:S+ C2:P1 . . C2-4 . . . 0 A1:3-4,A2:2"
+ "C2-3:P1:S+ C3:P1 . . C3 . . C0-2 0 A1:,B1:0-2 A1:P1,A2:P1"
+ "$SETUP_A123_PARTITIONS . C2-3 . . . 0 A1:,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
# CPU offlining cases:
- " S+ C0-1 . . C2-3 S+ C4-5 . O2-0 0 A1:0-1,B1:3"
- " S+ C0-3:P1:S+ C2-3:P1 . . O2-0 . . . 0 A1:0-1,A2:3"
- " S+ C0-3:P1:S+ C2-3:P1 . . O2-0 O2-1 . . 0 A1:0-1,A2:2-3"
- " S+ C0-3:P1:S+ C2-3:P1 . . O1-0 . . . 0 A1:0,A2:2-3"
- " S+ C0-3:P1:S+ C2-3:P1 . . O1-0 O1-1 . . 0 A1:0-1,A2:2-3"
- " S+ C2-3:P1:S+ C3:P1 . . O3-0 O3-1 . . 0 A1:2,A2:3 A1:P1,A2:P1"
- " S+ C2-3:P1:S+ C3:P2 . . O3-0 O3-1 . . 0 A1:2,A2:3 A1:P1,A2:P2"
- " S+ C2-3:P1:S+ C3:P1 . . O2-0 O2-1 . . 0 A1:2,A2:3 A1:P1,A2:P1"
- " S+ C2-3:P1:S+ C3:P2 . . O2-0 O2-1 . . 0 A1:2,A2:3 A1:P1,A2:P2"
- " S+ C2-3:P1:S+ C3:P1 . . O2-0 . . . 0 A1:,A2:3 A1:P1,A2:P1"
- " S+ C2-3:P1:S+ C3:P1 . . O3-0 . . . 0 A1:2,A2: A1:P1,A2:P1"
- " S+ C2-3:P1:S+ C3:P1 . . T:O2-0 . . . 0 A1:3,A2:3 A1:P1,A2:P-1"
- " S+ C2-3:P1:S+ C3:P1 . . . T:O3-0 . . 0 A1:2,A2:2 A1:P1,A2:P-1"
- " S+ $SETUP_A123_PARTITIONS . O1-0 . . . 0 A1:,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
- " S+ $SETUP_A123_PARTITIONS . O2-0 . . . 0 A1:1,A2:,A3:3 A1:P1,A2:P1,A3:P1"
- " S+ $SETUP_A123_PARTITIONS . O3-0 . . . 0 A1:1,A2:2,A3: A1:P1,A2:P1,A3:P1"
- " S+ $SETUP_A123_PARTITIONS . T:O1-0 . . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1"
- " S+ $SETUP_A123_PARTITIONS . . T:O2-0 . . 0 A1:1,A2:3,A3:3 A1:P1,A2:P1,A3:P-1"
- " S+ $SETUP_A123_PARTITIONS . . . T:O3-0 . 0 A1:1,A2:2,A3:2 A1:P1,A2:P1,A3:P-1"
- " S+ $SETUP_A123_PARTITIONS . T:O1-0 O1-1 . . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
- " S+ $SETUP_A123_PARTITIONS . . T:O2-0 O2-1 . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
- " S+ $SETUP_A123_PARTITIONS . . . T:O3-0 O3-1 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
- " S+ $SETUP_A123_PARTITIONS . T:O1-0 O2-0 O1-1 . 0 A1:1,A2:,A3:3 A1:P1,A2:P1,A3:P1"
- " S+ $SETUP_A123_PARTITIONS . T:O1-0 O2-0 O2-1 . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1"
-
- # test old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate
- # ---- ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------
+ " C0-1 . . C2-3 S+ C4-5 . O2=0 0 A1:0-1,B1:3"
+ "C0-3:P1:S+ C2-3:P1 . . O2=0 . . . 0 A1:0-1,A2:3"
+ "C0-3:P1:S+ C2-3:P1 . . O2=0 O2=1 . . 0 A1:0-1,A2:2-3"
+ "C0-3:P1:S+ C2-3:P1 . . O1=0 . . . 0 A1:0,A2:2-3"
+ "C0-3:P1:S+ C2-3:P1 . . O1=0 O1=1 . . 0 A1:0-1,A2:2-3"
+ "C2-3:P1:S+ C3:P1 . . O3=0 O3=1 . . 0 A1:2,A2:3 A1:P1,A2:P1"
+ "C2-3:P1:S+ C3:P2 . . O3=0 O3=1 . . 0 A1:2,A2:3 A1:P1,A2:P2"
+ "C2-3:P1:S+ C3:P1 . . O2=0 O2=1 . . 0 A1:2,A2:3 A1:P1,A2:P1"
+ "C2-3:P1:S+ C3:P2 . . O2=0 O2=1 . . 0 A1:2,A2:3 A1:P1,A2:P2"
+ "C2-3:P1:S+ C3:P1 . . O2=0 . . . 0 A1:,A2:3 A1:P1,A2:P1"
+ "C2-3:P1:S+ C3:P1 . . O3=0 . . . 0 A1:2,A2: A1:P1,A2:P1"
+ "C2-3:P1:S+ C3:P1 . . T:O2=0 . . . 0 A1:3,A2:3 A1:P1,A2:P-1"
+ "C2-3:P1:S+ C3:P1 . . . T:O3=0 . . 0 A1:2,A2:2 A1:P1,A2:P-1"
+ "$SETUP_A123_PARTITIONS . O1=0 . . . 0 A1:,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
+ "$SETUP_A123_PARTITIONS . O2=0 . . . 0 A1:1,A2:,A3:3 A1:P1,A2:P1,A3:P1"
+ "$SETUP_A123_PARTITIONS . O3=0 . . . 0 A1:1,A2:2,A3: A1:P1,A2:P1,A3:P1"
+ "$SETUP_A123_PARTITIONS . T:O1=0 . . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1"
+ "$SETUP_A123_PARTITIONS . . T:O2=0 . . 0 A1:1,A2:3,A3:3 A1:P1,A2:P1,A3:P-1"
+ "$SETUP_A123_PARTITIONS . . . T:O3=0 . 0 A1:1,A2:2,A3:2 A1:P1,A2:P1,A3:P-1"
+ "$SETUP_A123_PARTITIONS . T:O1=0 O1=1 . . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
+ "$SETUP_A123_PARTITIONS . . T:O2=0 O2=1 . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
+ "$SETUP_A123_PARTITIONS . . . T:O3=0 O3=1 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
+ "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O1=1 . 0 A1:1,A2:,A3:3 A1:P1,A2:P1,A3:P1"
+ "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O2=1 . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1"
+
+ # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS
+ # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ --------
+ #
+ # Remote partition and cpuset.cpus.exclusive tests
+ #
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 . . . 0 A1:0-3,A2:1-3,A3:2-3,XA1:2-3"
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3:P2 . . 0 A1:0-1,A2:2-3,A3:2-3 A1:P0,A2:P2 2-3"
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 X3:P2 . . 0 A1:0-2,A2:3,A3:3 A1:P0,A2:P2 3"
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2 . 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3"
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:C3 . 0 A1:0-2,A2:1-2,A3:3 A1:P0,A3:P2 3"
+ " C0-3:S+ C1-3:S+ C2-3 C2-3 . . . P2 0 A1:0-3,A2:1-3,A3:2-3,B1:2-3 A1:P0,A3:P0,B1:P-2"
+ " C0-3:S+ C1-3:S+ C2-3 C4-5 . . . P2 0 B1:4-5 B1:P2 4-5"
+ " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2 0 A3:2-3,B1:4 A3:P2,B1:P2 2-4"
+ " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2:C1-3 P2 0 A3:2-3,B1:4 A3:P2,B1:P2 2-4"
+ " C0-3:S+ C1-3:S+ C2-3 C4 X1-3 X1-3:P2 P2 . 0 A2:1,A3:2-3 A2:P2,A3:P2 1-3"
+ " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2:C4-5 0 A3:2-3,B1:4-5 A3:P2,B1:P2 2-5"
+
+ # Nested remote/local partition tests
+ " C0-3:S+ C1-3:S+ C2-3 C4-5 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:,A3:2-3,B1:4-5 \
+ A1:P0,A2:P1,A3:P2,B1:P1 2-3"
+ " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:,A3:2-3,B1:4 \
+ A1:P0,A2:P1,A3:P2,B1:P1 2-4"
+ " C0-3:S+ C1-3:S+ C3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:2,A3:3,B1:4 \
+ A1:P0,A2:P1,A3:P2,B1:P1 2-4"
+ " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X4:P1 . 0 A1:0-1,A2:2-3,A3:4 \
+ A1:P0,A2:P2,A3:P1 2-4"
+ " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \
+ . . X5 . . 0 A1:0-4,A2:1-4,A3:2-4 \
+ A1:P0,A2:P-2,A3:P-1 ."
+ " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \
+ . . . X1 . 0 A1:0-1,A2:2-4,A3:2-4 \
+ A1:P0,A2:P2,A3:P-1 2-4"
+
+ # Remote partition offline tests
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 . 0 A1:0-1,A2:1,A3:3 A1:P0,A3:P2 2-3"
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 O2=1 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3"
+ " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 P2:O3=0 . 0 A1:0-2,A2:1-2,A3: A1:P0,A3:P2 3"
+ " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 T:P2:O3=0 . 0 A1:0-2,A2:1-2,A3:1-2 A1:P0,A3:P-2 3"
+
+ # An invalidated remote partition cannot self-recover from hotplug
+ " C0-3:S+ C1-3:S+ C2 . X2-3 X2-3 T:P2:O2=0 O2=1 0 A1:0-3,A2:1-3,A3:2 A1:P0,A3:P-2"
+
+ # cpus.exclusive.effective clearing test
+ " C0-3:S+ C1-3:S+ C2 . X2-3:X . . . 0 A1:0-3,A2:1-3,A3:2,XA1:"
+
+ # Invalid to valid remote partition transition test
+ " C0-3:S+ C1-3 . . . X3:P2 . . 0 A1:0-3,A2:1-3,XA2: A2:P-2"
+ " C0-3:S+ C1-3:X3:P2
+ . . X2-3 P2 . . 0 A1:0-2,A2:3,XA2:3 A2:P2 3"
+
+ # Invalid to valid local partition direct transition tests
+ " C1-3:S+:P2 C2-3:X1:P2 . . . . . . 0 A1:1-3,XA1:1-3,A2:2-3:XA2: A1:P2,A2:P-2 1-3"
+ " C1-3:S+:P2 C2-3:X1:P2 . . . X3:P2 . . 0 A1:1-2,XA1:1-3,A2:3:XA2:3 A1:P2,A2:P2 1-3"
+ " C0-3:P2 . . C4-6 C0-4 . . . 0 A1:0-4,B1:4-6 A1:P-2,B1:P0"
+ " C0-3:P2 . . C4-6 C0-4:C0-3 . . . 0 A1:0-3,B1:4-6 A1:P2,B1:P0 0-3"
+ " C0-3:P2 . . C3-5:C4-5 . . . . 0 A1:0-3,B1:4-5 A1:P2,B1:P0 0-3"
+
+ # Local partition invalidation tests
+ " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \
+ . . . . . 0 A1:1,A2:2,A3:3 A1:P2,A2:P2,A3:P2 1-3"
+ " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \
+ . . X4 . . 0 A1:1-3,A2:1-3,A3:2-3,XA2:,XA3: A1:P2,A2:P-2,A3:P-2 1-3"
+ " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \
+ . . C4 . . 0 A1:1-3,A2:1-3,A3:2-3,XA2:,XA3: A1:P2,A2:P-2,A3:P-2 1-3"
+ # Local partition CPU change tests
+ " C0-5:S+:P2 C4-5:S+:P1 . . . C3-5 . . 0 A1:0-2,A2:3-5 A1:P2,A2:P1 0-2"
+ " C0-5:S+:P2 C4-5:S+:P1 . . C1-5 . . . 0 A1:1-3,A2:4-5 A1:P2,A2:P1 1-3"
+
+ # cpus_allowed/exclusive_cpus update tests
+ " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \
+ . C4 . P2 . 0 A1:4,A2:4,XA2:,XA3:,A3:4 \
+ A1:P0,A3:P-2 ."
+ " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \
+ . X1 . P2 . 0 A1:0-3,A2:1-3,XA1:1,XA2:,XA3:,A3:2-3 \
+ A1:P0,A3:P-2 ."
+ " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \
+ . . C3 P2 . 0 A1:0-2,A2:0-2,XA2:3,XA3:3,A3:3 \
+ A1:P0,A3:P2 3"
+ " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \
+ . . X3 P2 . 0 A1:0-2,A2:1-2,XA2:3,XA3:3,A3:3 \
+ A1:P0,A3:P2 3"
+ " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \
+ . . X3 . . 0 A1:0-3,A2:1-3,XA2:3,XA3:3,A3:2-3 \
+ A1:P0,A3:P-2 ."
+ " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \
+ . . C3 . . 0 A1:0-3,A2:3,XA2:3,XA3:3,A3:3 \
+ A1:P0,A3:P-2 ."
+ " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \
+ . C4 . . . 0 A1:4,A2:4,A3:4,XA1:,XA2:,XA3 \
+ A1:P0,A3:P-2 ."
+
+ # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS
+ # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ --------
#
# Incorrect change to cpuset.cpus invalidates partition root
#
# Adding CPUs to partition root that are not in parent's
# cpuset.cpus is allowed, but those extra CPUs are ignored.
- " S+ C2-3:P1:S+ C3:P1 . . . C2-4 . . 0 A1:,A2:2-3 A1:P1,A2:P1"
+ "C2-3:P1:S+ C3:P1 . . . C2-4 . . 0 A1:,A2:2-3 A1:P1,A2:P1"
# Taking away all CPUs from parent or itself if there are tasks
# will make the partition invalid.
- " S+ C2-3:P1:S+ C3:P1 . . T C2-3 . . 0 A1:2-3,A2:2-3 A1:P1,A2:P-1"
- " S+ C3:P1:S+ C3 . . T P1 . . 0 A1:3,A2:3 A1:P1,A2:P-1"
- " S+ $SETUP_A123_PARTITIONS . T:C2-3 . . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1"
- " S+ $SETUP_A123_PARTITIONS . T:C2-3:C1-3 . . . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
+ "C2-3:P1:S+ C3:P1 . . T C2-3 . . 0 A1:2-3,A2:2-3 A1:P1,A2:P-1"
+ " C3:P1:S+ C3 . . T P1 . . 0 A1:3,A2:3 A1:P1,A2:P-1"
+ "$SETUP_A123_PARTITIONS . T:C2-3 . . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1"
+ "$SETUP_A123_PARTITIONS . T:C2-3:C1-3 . . . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
# Changing a partition root to member makes child partitions invalid
- " S+ C2-3:P1:S+ C3:P1 . . P0 . . . 0 A1:2-3,A2:3 A1:P0,A2:P-1"
- " S+ $SETUP_A123_PARTITIONS . C2-3 P0 . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P0,A3:P-1"
+ "C2-3:P1:S+ C3:P1 . . P0 . . . 0 A1:2-3,A2:3 A1:P0,A2:P-1"
+ "$SETUP_A123_PARTITIONS . C2-3 P0 . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P0,A3:P-1"
# cpuset.cpus can contains cpus not in parent's cpuset.cpus as long
# as they overlap.
- " S+ C2-3:P1:S+ . . . . C3-4:P1 . . 0 A1:2,A2:3 A1:P1,A2:P1"
+ "C2-3:P1:S+ . . . . C3-4:P1 . . 0 A1:2,A2:3 A1:P1,A2:P1"
# Deletion of CPUs distributed to child cgroup is allowed.
- " S+ C0-1:P1:S+ C1 . C2-3 C4-5 . . . 0 A1:4-5,A2:4-5"
+ "C0-1:P1:S+ C1 . C2-3 C4-5 . . . 0 A1:4-5,A2:4-5"
# To become a valid partition root, cpuset.cpus must overlap parent's
# cpuset.cpus.
- " S+ C0-1:P1 . . C2-3 S+ C4-5:P1 . . 0 A1:0-1,A2:0-1 A1:P1,A2:P-1"
+ " C0-1:P1 . . C2-3 S+ C4-5:P1 . . 0 A1:0-1,A2:0-1 A1:P1,A2:P-1"
# Enabling partition with child cpusets is allowed
- " S+ C0-1:S+ C1 . C2-3 P1 . . . 0 A1:0-1,A2:1 A1:P1"
+ " C0-1:S+ C1 . C2-3 P1 . . . 0 A1:0-1,A2:1 A1:P1"
# A partition root with non-partition root parent is invalid, but it
# can be made valid if its parent becomes a partition root too.
- " S+ C0-1:S+ C1 . C2-3 . P2 . . 0 A1:0-1,A2:1 A1:P0,A2:P-2"
- " S+ C0-1:S+ C1:P2 . C2-3 P1 . . . 0 A1:0,A2:1 A1:P1,A2:P2"
+ " C0-1:S+ C1 . C2-3 . P2 . . 0 A1:0-1,A2:1 A1:P0,A2:P-2"
+ " C0-1:S+ C1:P2 . C2-3 P1 . . . 0 A1:0,A2:1 A1:P1,A2:P2"
# A non-exclusive cpuset.cpus change will invalidate partition and its siblings
- " S+ C0-1:P1 . . C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P-1,B1:P0"
- " S+ C0-1:P1 . . P1:C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P-1,B1:P-1"
- " S+ C0-1 . . P1:C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P0,B1:P-1"
+ " C0-1:P1 . . C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P-1,B1:P0"
+ " C0-1:P1 . . P1:C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P-1,B1:P-1"
+ " C0-1 . . P1:C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P0,B1:P-1"
- # test old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate
- # ---- ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------
+ # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS
+ # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ --------
# Failure cases:
# A task cannot be added to a partition with no cpu
- " S+ C2-3:P1:S+ C3:P1 . . O2-0:T . . . 1 A1:,A2:3 A1:P1,A2:P1"
+ "C2-3:P1:S+ C3:P1 . . O2=0:T . . . 1 A1:,A2:3 A1:P1,A2:P1"
+
+ # Changes to cpuset.cpus.exclusive that violate exclusivity rule is rejected
+ " C0-3 . . C4-5 X0-3 . . X3-5 1 A1:0-3,B1:4-5"
)
#
# Write to the cpu online file
-# $1 - <c>-<v> where <c> = cpu number, <v> value to be written
+# $1 - <c>=<v> where <c> = cpu number, <v> value to be written
#
write_cpu_online()
{
- CPU=${1%-*}
- VAL=${1#*-}
+ CPU=${1%=*}
+ VAL=${1#*=}
CPUFILE=//sys/devices/system/cpu/cpu${CPU}/online
if [[ $VAL -eq 0 ]]
then
@@ -349,11 +457,12 @@ set_ctrl_state()
TMPMSG=/tmp/.msg_$$
CGRP=$1
STATE=$2
- SHOWERR=${3}${VERBOSE}
+ SHOWERR=${3}
CTRL=${CTRL:=$CONTROLLER}
HASERR=0
REDIRECT="2> $TMPMSG"
[[ -z "$STATE" || "$STATE" = '.' ]] && return 0
+ [[ $VERBOSE -gt 0 ]] && SHOWERR=1
rm -f $TMPMSG
for CMD in $(echo $STATE | sed -e "s/:/ /g")
@@ -362,12 +471,18 @@ set_ctrl_state()
SFILE=$CGRP/cgroup.subtree_control
PFILE=$CGRP/cpuset.cpus.partition
CFILE=$CGRP/cpuset.cpus
+ XFILE=$CGRP/cpuset.cpus.exclusive
S=$(expr substr $CMD 1 1)
if [[ $S = S ]]
then
PREFIX=${CMD#?}
COMM="echo ${PREFIX}${CTRL} > $SFILE"
eval $COMM $REDIRECT
+ elif [[ $S = X ]]
+ then
+ CPUS=${CMD#?}
+ COMM="echo $CPUS > $XFILE"
+ eval $COMM $REDIRECT
elif [[ $S = C ]]
then
CPUS=${CMD#?}
@@ -430,7 +545,7 @@ online_cpus()
[[ -n "OFFLINE_CPUS" ]] && {
for C in $OFFLINE_CPUS
do
- write_cpu_online ${C}-1
+ write_cpu_online ${C}=1
done
}
}
@@ -443,18 +558,27 @@ reset_cgroup_states()
echo 0 > $CGROUP2/cgroup.procs
online_cpus
rmdir A1/A2/A3 A1/A2 A1 B1 > /dev/null 2>&1
- set_ctrl_state . S-
+ pause 0.02
+ set_ctrl_state . R-
pause 0.01
}
dump_states()
{
- for DIR in A1 A1/A2 A1/A2/A3 B1
+ for DIR in . A1 A1/A2 A1/A2/A3 B1
do
+ CPUS=$DIR/cpuset.cpus
ECPUS=$DIR/cpuset.cpus.effective
+ XCPUS=$DIR/cpuset.cpus.exclusive
+ XECPUS=$DIR/cpuset.cpus.exclusive.effective
PRS=$DIR/cpuset.cpus.partition
- [[ -e $ECPUS ]] && echo "$ECPUS: $(cat $ECPUS)"
- [[ -e $PRS ]] && echo "$PRS: $(cat $PRS)"
+ PCPUS=$DIR/.__DEBUG__.cpuset.cpus.subpartitions
+ [[ -e $CPUS ]] && echo "$CPUS: $(cat $CPUS)"
+ [[ -e $XCPUS ]] && echo "$XCPUS: $(cat $XCPUS)"
+ [[ -e $ECPUS ]] && echo "$ECPUS: $(cat $ECPUS)"
+ [[ -e $XECPUS ]] && echo "$XECPUS: $(cat $XECPUS)"
+ [[ -e $PRS ]] && echo "$PRS: $(cat $PRS)"
+ [[ -e $PCPUS ]] && echo "$PCPUS: $(cat $PCPUS)"
done
}
@@ -470,11 +594,17 @@ check_effective_cpus()
set -- $(echo $CHK | sed -e "s/:/ /g")
CGRP=$1
CPUS=$2
+ if [[ $CGRP = X* ]]
+ then
+ CGRP=${CGRP#X}
+ FILE=cpuset.cpus.exclusive.effective
+ else
+ FILE=cpuset.cpus.effective
+ fi
[[ $CGRP = A2 ]] && CGRP=A1/A2
[[ $CGRP = A3 ]] && CGRP=A1/A2/A3
- FILE=$CGRP/cpuset.cpus.effective
- [[ -e $FILE ]] || return 1
- [[ $CPUS = $(cat $FILE) ]] || return 1
+ [[ -e $CGRP/$FILE ]] || return 1
+ [[ $CPUS = $(cat $CGRP/$FILE) ]] || return 1
done
}
@@ -525,6 +655,65 @@ check_cgroup_states()
}
#
+# Get isolated (including offline) CPUs by looking at
+# /sys/kernel/debug/sched/domains and compare that with the expected value.
+#
+# Note that a sched domain of just 1 CPU will be considered isolated.
+#
+# $1 - expected isolated cpu list
+#
+check_isolcpus()
+{
+ EXPECT_VAL=$1
+ ISOLCPUS=
+ LASTISOLCPU=
+ SCHED_DOMAINS=/sys/kernel/debug/sched/domains
+ [[ -d $SCHED_DOMAINS ]] || return 0
+ [[ $EXPECT_VAL = . ]] && EXPECT_VAL=
+
+ for ((CPU=0; CPU < $NR_CPUS; CPU++))
+ do
+ [[ -n "$(ls ${SCHED_DOMAINS}/cpu$CPU)" ]] && continue
+
+ if [[ -z "$LASTISOLCPU" ]]
+ then
+ ISOLCPUS=$CPU
+ LASTISOLCPU=$CPU
+ elif [[ "$LASTISOLCPU" -eq $((CPU - 1)) ]]
+ then
+ echo $ISOLCPUS | grep -q "\<$LASTISOLCPU\$"
+ if [[ $? -eq 0 ]]
+ then
+ ISOLCPUS=${ISOLCPUS}-
+ fi
+ LASTISOLCPU=$CPU
+ else
+ if [[ $ISOLCPUS = *- ]]
+ then
+ ISOLCPUS=${ISOLCPUS}$LASTISOLCPU
+ fi
+ ISOLCPUS=${ISOLCPUS},$CPU
+ LASTISOLCPU=$CPU
+ fi
+ done
+ [[ "$ISOLCPUS" = *- ]] && ISOLCPUS=${ISOLCPUS}$LASTISOLCPU
+ [[ "$EXPECT_VAL" = "$ISOLCPUS" ]]
+}
+
+test_fail()
+{
+ TESTNUM=$1
+ TESTTYPE=$2
+ ADDINFO=$3
+ echo "Test $TEST[$TESTNUM] failed $TESTTYPE check!"
+ [[ -n "$ADDINFO" ]] && echo "*** $ADDINFO ***"
+ eval echo \${$TEST[$I]}
+ echo
+ dump_states
+ exit 1
+}
+
+#
# Run cpuset state transition test
# $1 - test matrix name
#
@@ -536,88 +725,83 @@ run_state_test()
{
TEST=$1
CONTROLLER=cpuset
- CPULIST=0-6
I=0
eval CNT="\${#$TEST[@]}"
reset_cgroup_states
- echo $CPULIST > cpuset.cpus
- echo root > cpuset.cpus.partition
console_msg "Running state transition test ..."
while [[ $I -lt $CNT ]]
do
echo "Running test $I ..." > /dev/console
+ [[ $VERBOSE -gt 1 ]] && {
+ echo ""
+ eval echo \${$TEST[$I]}
+ }
eval set -- "\${$TEST[$I]}"
- ROOT=$1
- OLD_A1=$2
- OLD_A2=$3
- OLD_A3=$4
- OLD_B1=$5
- NEW_A1=$6
- NEW_A2=$7
- NEW_A3=$8
- NEW_B1=$9
- RESULT=${10}
- ECPUS=${11}
- STATES=${12}
-
- set_ctrl_state_noerr . $ROOT
+ OLD_A1=$1
+ OLD_A2=$2
+ OLD_A3=$3
+ OLD_B1=$4
+ NEW_A1=$5
+ NEW_A2=$6
+ NEW_A3=$7
+ NEW_B1=$8
+ RESULT=$9
+ ECPUS=${10}
+ STATES=${11}
+ ICPUS=${12}
+
+ set_ctrl_state_noerr B1 $OLD_B1
set_ctrl_state_noerr A1 $OLD_A1
set_ctrl_state_noerr A1/A2 $OLD_A2
set_ctrl_state_noerr A1/A2/A3 $OLD_A3
- set_ctrl_state_noerr B1 $OLD_B1
RETVAL=0
set_ctrl_state A1 $NEW_A1; ((RETVAL += $?))
set_ctrl_state A1/A2 $NEW_A2; ((RETVAL += $?))
set_ctrl_state A1/A2/A3 $NEW_A3; ((RETVAL += $?))
set_ctrl_state B1 $NEW_B1; ((RETVAL += $?))
- [[ $RETVAL -ne $RESULT ]] && {
- echo "Test $TEST[$I] failed result check!"
- eval echo \"\${$TEST[$I]}\"
- dump_states
- exit 1
- }
+ [[ $RETVAL -ne $RESULT ]] && test_fail $I result
[[ -n "$ECPUS" && "$ECPUS" != . ]] && {
check_effective_cpus $ECPUS
- [[ $? -ne 0 ]] && {
- echo "Test $TEST[$I] failed effective CPU check!"
- eval echo \"\${$TEST[$I]}\"
- echo
- dump_states
- exit 1
- }
+ [[ $? -ne 0 ]] && test_fail $I "effective CPU"
}
- [[ -n "$STATES" ]] && {
+ [[ -n "$STATES" && "$STATES" != . ]] && {
check_cgroup_states $STATES
- [[ $? -ne 0 ]] && {
- echo "FAILED: Test $TEST[$I] failed states check!"
- eval echo \"\${$TEST[$I]}\"
- echo
- dump_states
- exit 1
- }
+ [[ $? -ne 0 ]] && test_fail $I states
}
+ # Compare the expected isolated CPUs with the actual ones,
+ # if available
+ [[ -n "$ICPUS" ]] && {
+ check_isolcpus $ICPUS
+ [[ $? -ne 0 ]] && test_fail $I "isolated CPU" \
+ "Expect $ICPUS, get $ISOLCPUS instead"
+ }
reset_cgroup_states
#
# Check to see if effective cpu list changes
#
- pause 0.05
NEWLIST=$(cat cpuset.cpus.effective)
+ RETRY=0
+ while [[ $NEWLIST != $CPULIST && $RETRY -lt 5 ]]
+ do
+ # Wait a bit longer & recheck a few times
+ pause 0.01
+ ((RETRY++))
+ NEWLIST=$(cat cpuset.cpus.effective)
+ done
[[ $NEWLIST != $CPULIST ]] && {
echo "Effective cpus changed to $NEWLIST after test $I!"
exit 1
}
- [[ -n "$VERBOSE" ]] && echo "Test $I done."
+ [[ $VERBOSE -gt 0 ]] && echo "Test $I done."
((I++))
done
echo "All $I tests of $TEST PASSED."
-
- echo member > cpuset.cpus.partition
}
#
@@ -642,6 +826,7 @@ test_inotify()
{
ERR=0
PRS=/tmp/.prs_$$
+ cd $CGROUP2/test
[[ -f $WAIT_INOTIFY ]] || {
echo "wait_inotify not found, inotify test SKIPPED."
return
@@ -655,7 +840,7 @@ test_inotify()
rm -f $PRS
wait_inotify $PWD/cpuset.cpus.partition $PRS &
pause 0.01
- set_ctrl_state . "O1-0"
+ set_ctrl_state . "O1=0"
pause 0.01
check_cgroup_states ".:P-1"
if [[ $? -ne 0 ]]
@@ -689,5 +874,3 @@ run_state_test TEST_MATRIX
test_isolated
test_inotify
echo "All tests PASSED."
-cd ..
-rmdir test
diff --git a/tools/testing/selftests/cgroup/test_hugetlb_memcg.c b/tools/testing/selftests/cgroup/test_hugetlb_memcg.c
new file mode 100644
index 000000000000..f0fefeb4cc24
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_hugetlb_memcg.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+
+#include <linux/limits.h>
+#include <sys/mman.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include "../kselftest.h"
+#include "cgroup_util.h"
+
+#define ADDR ((void *)(0x0UL))
+#define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB)
+/* mapping 8 MBs == 4 hugepages */
+#define LENGTH (8UL*1024*1024)
+#define PROTECTION (PROT_READ | PROT_WRITE)
+
+/* borrowed from mm/hmm-tests.c */
+static long get_hugepage_size(void)
+{
+ int fd;
+ char buf[2048];
+ int len;
+ char *p, *q, *path = "/proc/meminfo", *tag = "Hugepagesize:";
+ long val;
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0) {
+ /* Error opening the file */
+ return -1;
+ }
+
+ len = read(fd, buf, sizeof(buf));
+ close(fd);
+ if (len < 0) {
+ /* Error in reading the file */
+ return -1;
+ }
+ if (len == sizeof(buf)) {
+ /* Error file is too large */
+ return -1;
+ }
+ buf[len] = '\0';
+
+ /* Search for a tag if provided */
+ if (tag) {
+ p = strstr(buf, tag);
+ if (!p)
+ return -1; /* looks like the line we want isn't there */
+ p += strlen(tag);
+ } else
+ p = buf;
+
+ val = strtol(p, &q, 0);
+ if (*q != ' ') {
+ /* Error parsing the file */
+ return -1;
+ }
+
+ return val;
+}
+
+static int set_file(const char *path, long value)
+{
+ FILE *file;
+ int ret;
+
+ file = fopen(path, "w");
+ if (!file)
+ return -1;
+ ret = fprintf(file, "%ld\n", value);
+ fclose(file);
+ return ret;
+}
+
+static int set_nr_hugepages(long value)
+{
+ return set_file("/proc/sys/vm/nr_hugepages", value);
+}
+
+static unsigned int check_first(char *addr)
+{
+ return *(unsigned int *)addr;
+}
+
+static void write_data(char *addr)
+{
+ unsigned long i;
+
+ for (i = 0; i < LENGTH; i++)
+ *(addr + i) = (char)i;
+}
+
+static int hugetlb_test_program(const char *cgroup, void *arg)
+{
+ char *test_group = (char *)arg;
+ void *addr;
+ long old_current, expected_current, current;
+ int ret = EXIT_FAILURE;
+
+ old_current = cg_read_long(test_group, "memory.current");
+ set_nr_hugepages(20);
+ current = cg_read_long(test_group, "memory.current");
+ if (current - old_current >= MB(2)) {
+ ksft_print_msg(
+ "setting nr_hugepages should not increase hugepage usage.\n");
+ ksft_print_msg("before: %ld, after: %ld\n", old_current, current);
+ return EXIT_FAILURE;
+ }
+
+ addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, 0, 0);
+ if (addr == MAP_FAILED) {
+ ksft_print_msg("fail to mmap.\n");
+ return EXIT_FAILURE;
+ }
+ current = cg_read_long(test_group, "memory.current");
+ if (current - old_current >= MB(2)) {
+ ksft_print_msg("mmap should not increase hugepage usage.\n");
+ ksft_print_msg("before: %ld, after: %ld\n", old_current, current);
+ goto out_failed_munmap;
+ }
+ old_current = current;
+
+ /* read the first page */
+ check_first(addr);
+ expected_current = old_current + MB(2);
+ current = cg_read_long(test_group, "memory.current");
+ if (!values_close(expected_current, current, 5)) {
+ ksft_print_msg("memory usage should increase by around 2MB.\n");
+ ksft_print_msg(
+ "expected memory: %ld, actual memory: %ld\n",
+ expected_current, current);
+ goto out_failed_munmap;
+ }
+
+ /* write to the whole range */
+ write_data(addr);
+ current = cg_read_long(test_group, "memory.current");
+ expected_current = old_current + MB(8);
+ if (!values_close(expected_current, current, 5)) {
+ ksft_print_msg("memory usage should increase by around 8MB.\n");
+ ksft_print_msg(
+ "expected memory: %ld, actual memory: %ld\n",
+ expected_current, current);
+ goto out_failed_munmap;
+ }
+
+ /* unmap the whole range */
+ munmap(addr, LENGTH);
+ current = cg_read_long(test_group, "memory.current");
+ expected_current = old_current;
+ if (!values_close(expected_current, current, 5)) {
+ ksft_print_msg("memory usage should go back down.\n");
+ ksft_print_msg(
+ "expected memory: %ld, actual memory: %ld\n",
+ expected_current, current);
+ return ret;
+ }
+
+ ret = EXIT_SUCCESS;
+ return ret;
+
+out_failed_munmap:
+ munmap(addr, LENGTH);
+ return ret;
+}
+
+static int test_hugetlb_memcg(char *root)
+{
+ int ret = KSFT_FAIL;
+ char *test_group;
+
+ test_group = cg_name(root, "hugetlb_memcg_test");
+ if (!test_group || cg_create(test_group)) {
+ ksft_print_msg("fail to create cgroup.\n");
+ goto out;
+ }
+
+ if (cg_write(test_group, "memory.max", "100M")) {
+ ksft_print_msg("fail to set cgroup memory limit.\n");
+ goto out;
+ }
+
+ /* disable swap */
+ if (cg_write(test_group, "memory.swap.max", "0")) {
+ ksft_print_msg("fail to disable swap.\n");
+ goto out;
+ }
+
+ if (!cg_run(test_group, hugetlb_test_program, (void *)test_group))
+ ret = KSFT_PASS;
+out:
+ cg_destroy(test_group);
+ free(test_group);
+ return ret;
+}
+
+int main(int argc, char **argv)
+{
+ char root[PATH_MAX];
+ int ret = EXIT_SUCCESS, has_memory_hugetlb_acc;
+
+ has_memory_hugetlb_acc = proc_mount_contains("memory_hugetlb_accounting");
+ if (has_memory_hugetlb_acc < 0)
+ ksft_exit_skip("Failed to query cgroup mount option\n");
+ else if (!has_memory_hugetlb_acc)
+ ksft_exit_skip("memory hugetlb accounting is disabled\n");
+
+ /* Unit is kB! */
+ if (get_hugepage_size() != 2048) {
+ ksft_print_msg("test_hugetlb_memcg requires 2MB hugepages\n");
+ ksft_test_result_skip("test_hugetlb_memcg\n");
+ return ret;
+ }
+
+ if (cg_find_unified_root(root, sizeof(root)))
+ ksft_exit_skip("cgroup v2 isn't mounted\n");
+
+ switch (test_hugetlb_memcg(root)) {
+ case KSFT_PASS:
+ ksft_test_result_pass("test_hugetlb_memcg\n");
+ break;
+ case KSFT_SKIP:
+ ksft_test_result_skip("test_hugetlb_memcg\n");
+ break;
+ default:
+ ret = EXIT_FAILURE;
+ ksft_test_result_fail("test_hugetlb_memcg\n");
+ break;
+ }
+
+ return ret;
+}
diff --git a/tools/testing/selftests/cgroup/test_zswap.c b/tools/testing/selftests/cgroup/test_zswap.c
index 49def87a909b..c99d2adaca3f 100644
--- a/tools/testing/selftests/cgroup/test_zswap.c
+++ b/tools/testing/selftests/cgroup/test_zswap.c
@@ -55,6 +55,11 @@ static int get_zswap_written_back_pages(size_t *value)
return read_int("/sys/kernel/debug/zswap/written_back_pages", value);
}
+static long get_zswpout(const char *cgroup)
+{
+ return cg_read_key_long(cgroup, "memory.stat", "zswpout ");
+}
+
static int allocate_bytes(const char *cgroup, void *arg)
{
size_t size = (size_t)arg;
@@ -69,6 +74,48 @@ static int allocate_bytes(const char *cgroup, void *arg)
}
/*
+ * Sanity test to check that pages are written into zswap.
+ */
+static int test_zswap_usage(const char *root)
+{
+ long zswpout_before, zswpout_after;
+ int ret = KSFT_FAIL;
+ char *test_group;
+
+ /* Set up */
+ test_group = cg_name(root, "no_shrink_test");
+ if (!test_group)
+ goto out;
+ if (cg_create(test_group))
+ goto out;
+ if (cg_write(test_group, "memory.max", "1M"))
+ goto out;
+
+ zswpout_before = get_zswpout(test_group);
+ if (zswpout_before < 0) {
+ ksft_print_msg("Failed to get zswpout\n");
+ goto out;
+ }
+
+ /* Allocate more than memory.max to push memory into zswap */
+ if (cg_run(test_group, allocate_bytes, (void *)MB(4)))
+ goto out;
+
+ /* Verify that pages come into zswap */
+ zswpout_after = get_zswpout(test_group);
+ if (zswpout_after <= zswpout_before) {
+ ksft_print_msg("zswpout does not increase after test program\n");
+ goto out;
+ }
+ ret = KSFT_PASS;
+
+out:
+ cg_destroy(test_group);
+ free(test_group);
+ return ret;
+}
+
+/*
* When trying to store a memcg page in zswap, if the memcg hits its memory
* limit in zswap, writeback should not be triggered.
*
@@ -235,6 +282,7 @@ struct zswap_test {
int (*fn)(const char *root);
const char *name;
} tests[] = {
+ T(test_zswap_usage),
T(test_no_kmem_bypass),
T(test_no_invasive_cgroup_shrink),
};
diff --git a/tools/testing/selftests/clone3/clone3.c b/tools/testing/selftests/clone3/clone3.c
index e60cf4da8fb0..3c9bf0cd82a8 100644
--- a/tools/testing/selftests/clone3/clone3.c
+++ b/tools/testing/selftests/clone3/clone3.c
@@ -7,6 +7,7 @@
#include <inttypes.h>
#include <linux/types.h>
#include <linux/sched.h>
+#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
@@ -103,8 +104,8 @@ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode)
return 0;
}
-static void test_clone3(uint64_t flags, size_t size, int expected,
- enum test_mode test_mode)
+static bool test_clone3(uint64_t flags, size_t size, int expected,
+ enum test_mode test_mode)
{
int ret;
@@ -114,92 +115,223 @@ static void test_clone3(uint64_t flags, size_t size, int expected,
ret = call_clone3(flags, size, test_mode);
ksft_print_msg("[%d] clone3() with flags says: %d expected %d\n",
getpid(), ret, expected);
- if (ret != expected)
- ksft_test_result_fail(
+ if (ret != expected) {
+ ksft_print_msg(
"[%d] Result (%d) is different than expected (%d)\n",
getpid(), ret, expected);
- else
- ksft_test_result_pass(
- "[%d] Result (%d) matches expectation (%d)\n",
- getpid(), ret, expected);
-}
-
-int main(int argc, char *argv[])
-{
- uid_t uid = getuid();
-
- ksft_print_header();
- ksft_set_plan(19);
- test_clone3_supported();
-
- /* Just a simple clone3() should return 0.*/
- test_clone3(0, 0, 0, CLONE3_ARGS_NO_TEST);
-
- /* Do a clone3() in a new PID NS.*/
- if (uid == 0)
- test_clone3(CLONE_NEWPID, 0, 0, CLONE3_ARGS_NO_TEST);
- else
- ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n");
-
- /* Do a clone3() with CLONE_ARGS_SIZE_VER0. */
- test_clone3(0, CLONE_ARGS_SIZE_VER0, 0, CLONE3_ARGS_NO_TEST);
+ return false;
+ }
- /* Do a clone3() with CLONE_ARGS_SIZE_VER0 - 8 */
- test_clone3(0, CLONE_ARGS_SIZE_VER0 - 8, -EINVAL, CLONE3_ARGS_NO_TEST);
+ return true;
+}
- /* Do a clone3() with sizeof(struct clone_args) + 8 */
- test_clone3(0, sizeof(struct __clone_args) + 8, 0, CLONE3_ARGS_NO_TEST);
+typedef bool (*filter_function)(void);
+typedef size_t (*size_function)(void);
- /* Do a clone3() with exit_signal having highest 32 bits non-zero */
- test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_BIG);
+static bool not_root(void)
+{
+ if (getuid() != 0) {
+ ksft_print_msg("Not running as root\n");
+ return true;
+ }
- /* Do a clone3() with negative 32-bit exit_signal */
- test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_NEG);
+ return false;
+}
- /* Do a clone3() with exit_signal not fitting into CSIGNAL mask */
- test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_CSIG);
+static bool no_timenamespace(void)
+{
+ if (not_root())
+ return true;
- /* Do a clone3() with NSIG < exit_signal < CSIG */
- test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_NSIG);
+ if (!access("/proc/self/ns/time", F_OK))
+ return false;
- test_clone3(0, sizeof(struct __clone_args) + 8, 0, CLONE3_ARGS_ALL_0);
+ ksft_print_msg("Time namespaces are not supported\n");
+ return true;
+}
- test_clone3(0, sizeof(struct __clone_args) + 16, -E2BIG,
- CLONE3_ARGS_ALL_0);
+static size_t page_size_plus_8(void)
+{
+ return getpagesize() + 8;
+}
- test_clone3(0, sizeof(struct __clone_args) * 2, -E2BIG,
- CLONE3_ARGS_ALL_0);
+struct test {
+ const char *name;
+ uint64_t flags;
+ size_t size;
+ size_function size_function;
+ int expected;
+ enum test_mode test_mode;
+ filter_function filter;
+};
- /* Do a clone3() with > page size */
- test_clone3(0, getpagesize() + 8, -E2BIG, CLONE3_ARGS_NO_TEST);
+static const struct test tests[] = {
+ {
+ .name = "simple clone3()",
+ .flags = 0,
+ .size = 0,
+ .expected = 0,
+ .test_mode = CLONE3_ARGS_NO_TEST,
+ },
+ {
+ .name = "clone3() in a new PID_NS",
+ .flags = CLONE_NEWPID,
+ .size = 0,
+ .expected = 0,
+ .test_mode = CLONE3_ARGS_NO_TEST,
+ .filter = not_root,
+ },
+ {
+ .name = "CLONE_ARGS_SIZE_VER0",
+ .flags = 0,
+ .size = CLONE_ARGS_SIZE_VER0,
+ .expected = 0,
+ .test_mode = CLONE3_ARGS_NO_TEST,
+ },
+ {
+ .name = "CLONE_ARGS_SIZE_VER0 - 8",
+ .flags = 0,
+ .size = CLONE_ARGS_SIZE_VER0 - 8,
+ .expected = -EINVAL,
+ .test_mode = CLONE3_ARGS_NO_TEST,
+ },
+ {
+ .name = "sizeof(struct clone_args) + 8",
+ .flags = 0,
+ .size = sizeof(struct __clone_args) + 8,
+ .expected = 0,
+ .test_mode = CLONE3_ARGS_NO_TEST,
+ },
+ {
+ .name = "exit_signal with highest 32 bits non-zero",
+ .flags = 0,
+ .size = 0,
+ .expected = -EINVAL,
+ .test_mode = CLONE3_ARGS_INVAL_EXIT_SIGNAL_BIG,
+ },
+ {
+ .name = "negative 32-bit exit_signal",
+ .flags = 0,
+ .size = 0,
+ .expected = -EINVAL,
+ .test_mode = CLONE3_ARGS_INVAL_EXIT_SIGNAL_NEG,
+ },
+ {
+ .name = "exit_signal not fitting into CSIGNAL mask",
+ .flags = 0,
+ .size = 0,
+ .expected = -EINVAL,
+ .test_mode = CLONE3_ARGS_INVAL_EXIT_SIGNAL_CSIG,
+ },
+ {
+ .name = "NSIG < exit_signal < CSIG",
+ .flags = 0,
+ .size = 0,
+ .expected = -EINVAL,
+ .test_mode = CLONE3_ARGS_INVAL_EXIT_SIGNAL_NSIG,
+ },
+ {
+ .name = "Arguments sizeof(struct clone_args) + 8",
+ .flags = 0,
+ .size = sizeof(struct __clone_args) + 8,
+ .expected = 0,
+ .test_mode = CLONE3_ARGS_ALL_0,
+ },
+ {
+ .name = "Arguments sizeof(struct clone_args) + 16",
+ .flags = 0,
+ .size = sizeof(struct __clone_args) + 16,
+ .expected = -E2BIG,
+ .test_mode = CLONE3_ARGS_ALL_0,
+ },
+ {
+ .name = "Arguments sizeof(struct clone_arg) * 2",
+ .flags = 0,
+ .size = sizeof(struct __clone_args) + 16,
+ .expected = -E2BIG,
+ .test_mode = CLONE3_ARGS_ALL_0,
+ },
+ {
+ .name = "Arguments > page size",
+ .flags = 0,
+ .size_function = page_size_plus_8,
+ .expected = -E2BIG,
+ .test_mode = CLONE3_ARGS_NO_TEST,
+ },
+ {
+ .name = "CLONE_ARGS_SIZE_VER0 in a new PID NS",
+ .flags = CLONE_NEWPID,
+ .size = CLONE_ARGS_SIZE_VER0,
+ .expected = 0,
+ .test_mode = CLONE3_ARGS_NO_TEST,
+ .filter = not_root,
+ },
+ {
+ .name = "CLONE_ARGS_SIZE_VER0 - 8 in a new PID NS",
+ .flags = CLONE_NEWPID,
+ .size = CLONE_ARGS_SIZE_VER0 - 8,
+ .expected = -EINVAL,
+ .test_mode = CLONE3_ARGS_NO_TEST,
+ },
+ {
+ .name = "sizeof(struct clone_args) + 8 in a new PID NS",
+ .flags = CLONE_NEWPID,
+ .size = sizeof(struct __clone_args) + 8,
+ .expected = 0,
+ .test_mode = CLONE3_ARGS_NO_TEST,
+ .filter = not_root,
+ },
+ {
+ .name = "Arguments > page size in a new PID NS",
+ .flags = CLONE_NEWPID,
+ .size_function = page_size_plus_8,
+ .expected = -E2BIG,
+ .test_mode = CLONE3_ARGS_NO_TEST,
+ },
+ {
+ .name = "New time NS",
+ .flags = CLONE_NEWTIME,
+ .size = 0,
+ .expected = 0,
+ .test_mode = CLONE3_ARGS_NO_TEST,
+ .filter = no_timenamespace,
+ },
+ {
+ .name = "exit signal (SIGCHLD) in flags",
+ .flags = SIGCHLD,
+ .size = 0,
+ .expected = -EINVAL,
+ .test_mode = CLONE3_ARGS_NO_TEST,
+ },
+};
- /* Do a clone3() with CLONE_ARGS_SIZE_VER0 in a new PID NS. */
- if (uid == 0)
- test_clone3(CLONE_NEWPID, CLONE_ARGS_SIZE_VER0, 0,
- CLONE3_ARGS_NO_TEST);
- else
- ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n");
+int main(int argc, char *argv[])
+{
+ size_t size;
+ int i;
- /* Do a clone3() with CLONE_ARGS_SIZE_VER0 - 8 in a new PID NS */
- test_clone3(CLONE_NEWPID, CLONE_ARGS_SIZE_VER0 - 8, -EINVAL,
- CLONE3_ARGS_NO_TEST);
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(tests));
+ test_clone3_supported();
- /* Do a clone3() with sizeof(struct clone_args) + 8 in a new PID NS */
- if (uid == 0)
- test_clone3(CLONE_NEWPID, sizeof(struct __clone_args) + 8, 0,
- CLONE3_ARGS_NO_TEST);
- else
- ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n");
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ if (tests[i].filter && tests[i].filter()) {
+ ksft_test_result_skip("%s\n", tests[i].name);
+ continue;
+ }
- /* Do a clone3() with > page size in a new PID NS */
- test_clone3(CLONE_NEWPID, getpagesize() + 8, -E2BIG,
- CLONE3_ARGS_NO_TEST);
+ if (tests[i].size_function)
+ size = tests[i].size_function();
+ else
+ size = tests[i].size;
- /* Do a clone3() in a new time namespace */
- test_clone3(CLONE_NEWTIME, 0, 0, CLONE3_ARGS_NO_TEST);
+ ksft_print_msg("Running test '%s'\n", tests[i].name);
- /* Do a clone3() with exit signal (SIGCHLD) in flags */
- test_clone3(SIGCHLD, 0, -EINVAL, CLONE3_ARGS_NO_TEST);
+ ksft_test_result(test_clone3(tests[i].flags, size,
+ tests[i].expected,
+ tests[i].test_mode),
+ "%s\n", tests[i].name);
+ }
ksft_finished();
}
diff --git a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
index 52d3f0364bda..31b56d625655 100644
--- a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
+++ b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
@@ -27,9 +27,7 @@
#include "../kselftest_harness.h"
#include "clone3_selftests.h"
-#ifndef MAX_PID_NS_LEVEL
#define MAX_PID_NS_LEVEL 32
-#endif
static void child_exit(int ret)
{
diff --git a/tools/testing/selftests/clone3/clone3_clear_sighand.c b/tools/testing/selftests/clone3/clone3_clear_sighand.c
index 47a8c0fc3676..54a8b2445be9 100644
--- a/tools/testing/selftests/clone3/clone3_clear_sighand.c
+++ b/tools/testing/selftests/clone3/clone3_clear_sighand.c
@@ -16,10 +16,6 @@
#include "../kselftest.h"
#include "clone3_selftests.h"
-#ifndef CLONE_CLEAR_SIGHAND
-#define CLONE_CLEAR_SIGHAND 0x100000000ULL
-#endif
-
static void nop_handler(int signo)
{
}
diff --git a/tools/testing/selftests/clone3/clone3_selftests.h b/tools/testing/selftests/clone3/clone3_selftests.h
index e81ffaaee02b..3d2663fe50ba 100644
--- a/tools/testing/selftests/clone3/clone3_selftests.h
+++ b/tools/testing/selftests/clone3/clone3_selftests.h
@@ -15,10 +15,6 @@
#define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr)))
-#ifndef CLONE_INTO_CGROUP
-#define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */
-#endif
-
#ifndef __NR_clone3
#define __NR_clone3 -1
#endif
@@ -32,18 +28,9 @@ struct __clone_args {
__aligned_u64 stack;
__aligned_u64 stack_size;
__aligned_u64 tls;
-#ifndef CLONE_ARGS_SIZE_VER0
-#define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */
-#endif
__aligned_u64 set_tid;
__aligned_u64 set_tid_size;
-#ifndef CLONE_ARGS_SIZE_VER1
-#define CLONE_ARGS_SIZE_VER1 80 /* sizeof second published struct */
-#endif
__aligned_u64 cgroup;
-#ifndef CLONE_ARGS_SIZE_VER2
-#define CLONE_ARGS_SIZE_VER2 88 /* sizeof third published struct */
-#endif
};
static pid_t sys_clone3(struct __clone_args *args, size_t size)
diff --git a/tools/testing/selftests/clone3/clone3_set_tid.c b/tools/testing/selftests/clone3/clone3_set_tid.c
index 0229e9ebb995..ed785afb6077 100644
--- a/tools/testing/selftests/clone3/clone3_set_tid.c
+++ b/tools/testing/selftests/clone3/clone3_set_tid.c
@@ -23,9 +23,7 @@
#include "../kselftest.h"
#include "clone3_selftests.h"
-#ifndef MAX_PID_NS_LEVEL
#define MAX_PID_NS_LEVEL 32
-#endif
static int pipe_1[2];
static int pipe_2[2];
diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c
index 749239930ca8..534576f06df1 100644
--- a/tools/testing/selftests/core/close_range_test.c
+++ b/tools/testing/selftests/core/close_range_test.c
@@ -16,34 +16,6 @@
#include "../kselftest_harness.h"
#include "../clone3/clone3_selftests.h"
-#ifndef __NR_close_range
- #if defined __alpha__
- #define __NR_close_range 546
- #elif defined _MIPS_SIM
- #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
- #define __NR_close_range (436 + 4000)
- #endif
- #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
- #define __NR_close_range (436 + 6000)
- #endif
- #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
- #define __NR_close_range (436 + 5000)
- #endif
- #elif defined __ia64__
- #define __NR_close_range (436 + 1024)
- #else
- #define __NR_close_range 436
- #endif
-#endif
-
-#ifndef CLOSE_RANGE_UNSHARE
-#define CLOSE_RANGE_UNSHARE (1U << 1)
-#endif
-
-#ifndef CLOSE_RANGE_CLOEXEC
-#define CLOSE_RANGE_CLOEXEC (1U << 2)
-#endif
-
static inline int sys_close_range(unsigned int fd, unsigned int max_fd,
unsigned int flags)
{
diff --git a/tools/testing/selftests/damon/debugfs_attrs.sh b/tools/testing/selftests/damon/debugfs_attrs.sh
index 902e312bca89..902e312bca89 100644..100755
--- a/tools/testing/selftests/damon/debugfs_attrs.sh
+++ b/tools/testing/selftests/damon/debugfs_attrs.sh
diff --git a/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh b/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh
index 4a76e37ef16b..4a76e37ef16b 100644..100755
--- a/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh
+++ b/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh
diff --git a/tools/testing/selftests/damon/debugfs_empty_targets.sh b/tools/testing/selftests/damon/debugfs_empty_targets.sh
index 87aff8083822..87aff8083822 100644..100755
--- a/tools/testing/selftests/damon/debugfs_empty_targets.sh
+++ b/tools/testing/selftests/damon/debugfs_empty_targets.sh
diff --git a/tools/testing/selftests/damon/debugfs_huge_count_read_write.sh b/tools/testing/selftests/damon/debugfs_huge_count_read_write.sh
index 922cadac2950..922cadac2950 100644..100755
--- a/tools/testing/selftests/damon/debugfs_huge_count_read_write.sh
+++ b/tools/testing/selftests/damon/debugfs_huge_count_read_write.sh
diff --git a/tools/testing/selftests/damon/debugfs_rm_non_contexts.sh b/tools/testing/selftests/damon/debugfs_rm_non_contexts.sh
index f3ffeb1343cf..f3ffeb1343cf 100644..100755
--- a/tools/testing/selftests/damon/debugfs_rm_non_contexts.sh
+++ b/tools/testing/selftests/damon/debugfs_rm_non_contexts.sh
diff --git a/tools/testing/selftests/damon/debugfs_schemes.sh b/tools/testing/selftests/damon/debugfs_schemes.sh
index 5b39ab44731c..5b39ab44731c 100644..100755
--- a/tools/testing/selftests/damon/debugfs_schemes.sh
+++ b/tools/testing/selftests/damon/debugfs_schemes.sh
diff --git a/tools/testing/selftests/damon/debugfs_target_ids.sh b/tools/testing/selftests/damon/debugfs_target_ids.sh
index 49aeabdb0aae..49aeabdb0aae 100644..100755
--- a/tools/testing/selftests/damon/debugfs_target_ids.sh
+++ b/tools/testing/selftests/damon/debugfs_target_ids.sh
diff --git a/tools/testing/selftests/damon/lru_sort.sh b/tools/testing/selftests/damon/lru_sort.sh
index 61b80197c896..61b80197c896 100644..100755
--- a/tools/testing/selftests/damon/lru_sort.sh
+++ b/tools/testing/selftests/damon/lru_sort.sh
diff --git a/tools/testing/selftests/damon/reclaim.sh b/tools/testing/selftests/damon/reclaim.sh
index 78dbc2334cbe..78dbc2334cbe 100644..100755
--- a/tools/testing/selftests/damon/reclaim.sh
+++ b/tools/testing/selftests/damon/reclaim.sh
diff --git a/tools/testing/selftests/damon/sysfs.sh b/tools/testing/selftests/damon/sysfs.sh
index 60a9a305aef0..56f0230a8b92 100644..100755
--- a/tools/testing/selftests/damon/sysfs.sh
+++ b/tools/testing/selftests/damon/sysfs.sh
@@ -175,6 +175,7 @@ test_scheme()
ensure_dir "$scheme_dir" "exist"
ensure_file "$scheme_dir/action" "exist" "600"
test_access_pattern "$scheme_dir/access_pattern"
+ ensure_file "$scheme_dir/apply_interval_us" "exist" "600"
test_quotas "$scheme_dir/quotas"
test_watermarks "$scheme_dir/watermarks"
test_filters "$scheme_dir/filters"
diff --git a/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh b/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh
index ade35576e748..ade35576e748 100644..100755
--- a/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh
+++ b/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh
diff --git a/tools/testing/selftests/dmabuf-heaps/.gitignore b/tools/testing/selftests/dmabuf-heaps/.gitignore
new file mode 100644
index 000000000000..b500e76b9045
--- /dev/null
+++ b/tools/testing/selftests/dmabuf-heaps/.gitignore
@@ -0,0 +1 @@
+dmabuf-heap
diff --git a/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh b/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh
new file mode 100755
index 000000000000..fe0343b95e6c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test that PCI reset works correctly by verifying that only the expected reset
+# methods are supported and that after issuing the reset the ifindex of the
+# port changes.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ pci_reset_test
+"
+NUM_NETIFS=1
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+pci_reset_test()
+{
+ RET=0
+
+ local bus=$(echo $DEVLINK_DEV | cut -d '/' -f 1)
+ local bdf=$(echo $DEVLINK_DEV | cut -d '/' -f 2)
+
+ if [ $bus != "pci" ]; then
+ check_err 1 "devlink device is not a PCI device"
+ log_test "pci reset"
+ return
+ fi
+
+ if [ ! -f /sys/bus/pci/devices/$bdf/reset_method ]; then
+ check_err 1 "reset is not supported"
+ log_test "pci reset"
+ return
+ fi
+
+ [[ $(cat /sys/bus/pci/devices/$bdf/reset_method) == "bus" ]]
+ check_err $? "only \"bus\" reset method should be supported"
+
+ local ifindex_pre=$(ip -j link show dev $swp1 | jq '.[]["ifindex"]')
+
+ echo 1 > /sys/bus/pci/devices/$bdf/reset
+ check_err $? "reset failed"
+
+ # Wait for udev to rename newly created netdev.
+ udevadm settle
+
+ local ifindex_post=$(ip -j link show dev $swp1 | jq '.[]["ifindex"]')
+
+ [[ $ifindex_pre != $ifindex_post ]]
+ check_err $? "reset not performed"
+
+ log_test "pci reset"
+}
+
+swp1=${NETIFS[p1]}
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/dt/.gitignore b/tools/testing/selftests/dt/.gitignore
new file mode 100644
index 000000000000..f6476c9f2884
--- /dev/null
+++ b/tools/testing/selftests/dt/.gitignore
@@ -0,0 +1 @@
+compatible_list
diff --git a/tools/testing/selftests/dt/Makefile b/tools/testing/selftests/dt/Makefile
new file mode 100644
index 000000000000..62dc00ee4978
--- /dev/null
+++ b/tools/testing/selftests/dt/Makefile
@@ -0,0 +1,21 @@
+PY3 = $(shell which python3 2>/dev/null)
+
+ifneq ($(PY3),)
+
+TEST_PROGS := test_unprobed_devices.sh
+TEST_GEN_FILES := compatible_list
+TEST_FILES := compatible_ignore_list ktap_helpers.sh
+
+include ../lib.mk
+
+$(OUTPUT)/compatible_list:
+ $(top_srcdir)/scripts/dtc/dt-extract-compatibles -d $(top_srcdir) > $@
+
+else
+
+all: no_py3_warning
+
+no_py3_warning:
+ @echo "Missing python3. This test will be skipped."
+
+endif
diff --git a/tools/testing/selftests/dt/compatible_ignore_list b/tools/testing/selftests/dt/compatible_ignore_list
new file mode 100644
index 000000000000..1323903feca9
--- /dev/null
+++ b/tools/testing/selftests/dt/compatible_ignore_list
@@ -0,0 +1 @@
+simple-mfd
diff --git a/tools/testing/selftests/dt/ktap_helpers.sh b/tools/testing/selftests/dt/ktap_helpers.sh
new file mode 100644
index 000000000000..8dfae51bb4e2
--- /dev/null
+++ b/tools/testing/selftests/dt/ktap_helpers.sh
@@ -0,0 +1,70 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2023 Collabora Ltd
+#
+# Helpers for outputting in KTAP format
+#
+KTAP_TESTNO=1
+KTAP_CNT_PASS=0
+KTAP_CNT_FAIL=0
+KTAP_CNT_SKIP=0
+
+ktap_print_header() {
+ echo "TAP version 13"
+}
+
+ktap_set_plan() {
+ num_tests="$1"
+
+ echo "1..$num_tests"
+}
+
+ktap_skip_all() {
+ echo -n "1..0 # SKIP "
+ echo $@
+}
+
+__ktap_test() {
+ result="$1"
+ description="$2"
+ directive="$3" # optional
+
+ local directive_str=
+ [[ ! -z "$directive" ]] && directive_str="# $directive"
+
+ echo $result $KTAP_TESTNO $description $directive_str
+
+ KTAP_TESTNO=$((KTAP_TESTNO+1))
+}
+
+ktap_test_pass() {
+ description="$1"
+
+ result="ok"
+ __ktap_test "$result" "$description"
+
+ KTAP_CNT_PASS=$((KTAP_CNT_PASS+1))
+}
+
+ktap_test_skip() {
+ description="$1"
+
+ result="ok"
+ directive="SKIP"
+ __ktap_test "$result" "$description" "$directive"
+
+ KTAP_CNT_SKIP=$((KTAP_CNT_SKIP+1))
+}
+
+ktap_test_fail() {
+ description="$1"
+
+ result="not ok"
+ __ktap_test "$result" "$description"
+
+ KTAP_CNT_FAIL=$((KTAP_CNT_FAIL+1))
+}
+
+ktap_print_totals() {
+ echo "# Totals: pass:$KTAP_CNT_PASS fail:$KTAP_CNT_FAIL xfail:0 xpass:0 skip:$KTAP_CNT_SKIP error:0"
+}
diff --git a/tools/testing/selftests/dt/test_unprobed_devices.sh b/tools/testing/selftests/dt/test_unprobed_devices.sh
new file mode 100755
index 000000000000..b07af2a4c4de
--- /dev/null
+++ b/tools/testing/selftests/dt/test_unprobed_devices.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2023 Collabora Ltd
+#
+# Based on Frank Rowand's dt_stat script.
+#
+# This script tests for devices that were declared on the Devicetree and are
+# expected to bind to a driver, but didn't.
+#
+# To achieve this, two lists are used:
+# * a list of the compatibles that can be matched by a Devicetree node
+# * a list of compatibles that should be ignored
+#
+
+DIR="$(dirname $(readlink -f "$0"))"
+
+source "${DIR}"/ktap_helpers.sh
+
+PDT=/proc/device-tree/
+COMPAT_LIST="${DIR}"/compatible_list
+IGNORE_LIST="${DIR}"/compatible_ignore_list
+
+KSFT_PASS=0
+KSFT_FAIL=1
+KSFT_SKIP=4
+
+ktap_print_header
+
+if [[ ! -d "${PDT}" ]]; then
+ ktap_skip_all "${PDT} doesn't exist."
+ exit "${KSFT_SKIP}"
+fi
+
+nodes_compatible=$(
+ for node_compat in $(find ${PDT} -name compatible); do
+ node=$(dirname "${node_compat}")
+ # Check if node is available
+ if [[ -e "${node}"/status ]]; then
+ status=$(tr -d '\000' < "${node}"/status)
+ [[ "${status}" != "okay" && "${status}" != "ok" ]] && continue
+ fi
+ echo "${node}" | sed -e 's|\/proc\/device-tree||'
+ done | sort
+ )
+
+nodes_dev_bound=$(
+ IFS=$'\n'
+ for uevent in $(find /sys/devices -name uevent); do
+ if [[ -d "$(dirname "${uevent}")"/driver ]]; then
+ grep '^OF_FULLNAME=' "${uevent}" | sed -e 's|OF_FULLNAME=||'
+ fi
+ done
+ )
+
+num_tests=$(echo ${nodes_compatible} | wc -w)
+ktap_set_plan "${num_tests}"
+
+retval="${KSFT_PASS}"
+for node in ${nodes_compatible}; do
+ if ! echo "${nodes_dev_bound}" | grep -E -q "(^| )${node}( |\$)"; then
+ compatibles=$(tr '\000' '\n' < "${PDT}"/"${node}"/compatible)
+
+ for compatible in ${compatibles}; do
+ if grep -x -q "${compatible}" "${IGNORE_LIST}"; then
+ continue
+ fi
+
+ if grep -x -q "${compatible}" "${COMPAT_LIST}"; then
+ ktap_test_fail "${node}"
+ retval="${KSFT_FAIL}"
+ continue 2
+ fi
+ done
+ ktap_test_skip "${node}"
+ else
+ ktap_test_pass "${node}"
+ fi
+
+done
+
+ktap_print_totals
+exit "${retval}"
diff --git a/tools/testing/selftests/efivarfs/create-read.c b/tools/testing/selftests/efivarfs/create-read.c
index 9674a19396a3..7bc7af4eb2c1 100644
--- a/tools/testing/selftests/efivarfs/create-read.c
+++ b/tools/testing/selftests/efivarfs/create-read.c
@@ -32,8 +32,10 @@ int main(int argc, char **argv)
rc = read(fd, buf, sizeof(buf));
if (rc != 0) {
fprintf(stderr, "Reading a new var should return EOF\n");
+ close(fd);
return EXIT_FAILURE;
}
+ close(fd);
return EXIT_SUCCESS;
}
diff --git a/tools/testing/selftests/exec/execveat.c b/tools/testing/selftests/exec/execveat.c
index 67bf7254a48f..bf79d664c8e6 100644
--- a/tools/testing/selftests/exec/execveat.c
+++ b/tools/testing/selftests/exec/execveat.c
@@ -23,6 +23,9 @@
#include "../kselftest.h"
+#define TESTS_EXPECTED 51
+#define TEST_NAME_LEN (PATH_MAX * 4)
+
static char longpath[2 * PATH_MAX] = "";
static char *envp[] = { "IN_TEST=yes", NULL, NULL };
static char *argv[] = { "execveat", "99", NULL };
@@ -43,71 +46,85 @@ static int execveat_(int fd, const char *path, char **argv, char **envp,
static int _check_execveat_fail(int fd, const char *path, int flags,
int expected_errno, const char *errno_str)
{
+ char test_name[TEST_NAME_LEN];
int rc;
errno = 0;
- printf("Check failure of execveat(%d, '%s', %d) with %s... ",
- fd, path?:"(null)", flags, errno_str);
+ snprintf(test_name, sizeof(test_name),
+ "Check failure of execveat(%d, '%s', %d) with %s",
+ fd, path?:"(null)", flags, errno_str);
rc = execveat_(fd, path, argv, envp, flags);
if (rc > 0) {
- printf("[FAIL] (unexpected success from execveat(2))\n");
+ ksft_print_msg("unexpected success from execveat(2)\n");
+ ksft_test_result_fail("%s\n", test_name);
return 1;
}
if (errno != expected_errno) {
- printf("[FAIL] (expected errno %d (%s) not %d (%s)\n",
- expected_errno, strerror(expected_errno),
- errno, strerror(errno));
+ ksft_print_msg("expected errno %d (%s) not %d (%s)\n",
+ expected_errno, strerror(expected_errno),
+ errno, strerror(errno));
+ ksft_test_result_fail("%s\n", test_name);
return 1;
}
- printf("[OK]\n");
+ ksft_test_result_pass("%s\n", test_name);
return 0;
}
static int check_execveat_invoked_rc(int fd, const char *path, int flags,
int expected_rc, int expected_rc2)
{
+ char test_name[TEST_NAME_LEN];
int status;
int rc;
pid_t child;
int pathlen = path ? strlen(path) : 0;
if (pathlen > 40)
- printf("Check success of execveat(%d, '%.20s...%s', %d)... ",
- fd, path, (path + pathlen - 20), flags);
+ snprintf(test_name, sizeof(test_name),
+ "Check success of execveat(%d, '%.20s...%s', %d)... ",
+ fd, path, (path + pathlen - 20), flags);
else
- printf("Check success of execveat(%d, '%s', %d)... ",
- fd, path?:"(null)", flags);
+ snprintf(test_name, sizeof(test_name),
+ "Check success of execveat(%d, '%s', %d)... ",
+ fd, path?:"(null)", flags);
+
child = fork();
if (child < 0) {
- printf("[FAIL] (fork() failed)\n");
+ ksft_perror("fork() failed");
+ ksft_test_result_fail("%s\n", test_name);
return 1;
}
if (child == 0) {
/* Child: do execveat(). */
rc = execveat_(fd, path, argv, envp, flags);
- printf("[FAIL]: execveat() failed, rc=%d errno=%d (%s)\n",
- rc, errno, strerror(errno));
+ ksft_print_msg("execveat() failed, rc=%d errno=%d (%s)\n",
+ rc, errno, strerror(errno));
+ ksft_test_result_fail("%s\n", test_name);
exit(1); /* should not reach here */
}
/* Parent: wait for & check child's exit status. */
rc = waitpid(child, &status, 0);
if (rc != child) {
- printf("[FAIL] (waitpid(%d,...) returned %d)\n", child, rc);
+ ksft_print_msg("waitpid(%d,...) returned %d\n", child, rc);
+ ksft_test_result_fail("%s\n", test_name);
return 1;
}
if (!WIFEXITED(status)) {
- printf("[FAIL] (child %d did not exit cleanly, status=%08x)\n",
- child, status);
+ ksft_print_msg("child %d did not exit cleanly, status=%08x\n",
+ child, status);
+ ksft_test_result_fail("%s\n", test_name);
return 1;
}
if ((WEXITSTATUS(status) != expected_rc) &&
(WEXITSTATUS(status) != expected_rc2)) {
- printf("[FAIL] (child %d exited with %d not %d nor %d)\n",
- child, WEXITSTATUS(status), expected_rc, expected_rc2);
+ ksft_print_msg("child %d exited with %d not %d nor %d\n",
+ child, WEXITSTATUS(status), expected_rc,
+ expected_rc2);
+ ksft_test_result_fail("%s\n", test_name);
return 1;
}
- printf("[OK]\n");
+ ksft_test_result_pass("%s\n", test_name);
return 0;
}
@@ -129,11 +146,9 @@ static int open_or_die(const char *filename, int flags)
{
int fd = open(filename, flags);
- if (fd < 0) {
- printf("Failed to open '%s'; "
+ if (fd < 0)
+ ksft_exit_fail_msg("Failed to open '%s'; "
"check prerequisites are available\n", filename);
- exit(1);
- }
return fd;
}
@@ -162,8 +177,7 @@ static int check_execveat_pathmax(int root_dfd, const char *src, int is_script)
char *cwd = getcwd(NULL, 0);
if (!cwd) {
- printf("Failed to getcwd(), errno=%d (%s)\n",
- errno, strerror(errno));
+ ksft_perror("Failed to getcwd()");
return 2;
}
strcpy(longpath, cwd);
@@ -193,12 +207,12 @@ static int check_execveat_pathmax(int root_dfd, const char *src, int is_script)
*/
fd = open(longpath, O_RDONLY);
if (fd > 0) {
- printf("Invoke copy of '%s' via filename of length %zu:\n",
- src, strlen(longpath));
+ ksft_print_msg("Invoke copy of '%s' via filename of length %zu:\n",
+ src, strlen(longpath));
fail += check_execveat(fd, "", AT_EMPTY_PATH);
} else {
- printf("Failed to open length %zu filename, errno=%d (%s)\n",
- strlen(longpath), errno, strerror(errno));
+ ksft_print_msg("Failed to open length %zu filename, errno=%d (%s)\n",
+ strlen(longpath), errno, strerror(errno));
fail++;
}
@@ -405,28 +419,31 @@ int main(int argc, char **argv)
const char *in_test = getenv("IN_TEST");
if (verbose) {
- printf(" invoked with:");
+ ksft_print_msg("invoked with:\n");
for (ii = 0; ii < argc; ii++)
- printf(" [%d]='%s'", ii, argv[ii]);
- printf("\n");
+ ksft_print_msg("\t[%d]='%s\n'", ii, argv[ii]);
}
/* Check expected environment transferred. */
if (!in_test || strcmp(in_test, "yes") != 0) {
- printf("[FAIL] (no IN_TEST=yes in env)\n");
+ ksft_print_msg("no IN_TEST=yes in env\n");
return 1;
}
/* Use the final argument as an exit code. */
rc = atoi(argv[argc - 1]);
- fflush(stdout);
+ exit(rc);
} else {
+ ksft_print_header();
+ ksft_set_plan(TESTS_EXPECTED);
prerequisites();
if (verbose)
envp[1] = "VERBOSE=1";
rc = run_tests();
if (rc > 0)
printf("%d tests failed\n", rc);
+ ksft_finished();
}
+
return rc;
}
diff --git a/tools/testing/selftests/firmware/fw_namespace.c b/tools/testing/selftests/firmware/fw_namespace.c
index 4c6f0cd83c5b..04757dc7e546 100644
--- a/tools/testing/selftests/firmware/fw_namespace.c
+++ b/tools/testing/selftests/firmware/fw_namespace.c
@@ -17,10 +17,6 @@
#include <sys/wait.h>
#include <unistd.h>
-#ifndef CLONE_NEWNS
-# define CLONE_NEWNS 0x00020000
-#endif
-
static char *fw_path = NULL;
static void die(char *fmt, ...)
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc
index ff7499eb98d6..e21c9c27ece4 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc
@@ -28,13 +28,21 @@ s390*)
mips*)
ARG1=%r4
;;
+loongarch*)
+ ARG1=%r4
+;;
+riscv*)
+ ARG1=%a0
+;;
*)
echo "Please implement other architecture here"
exit_untested
esac
: "Test get argument (1)"
-if grep -q eventfs_add_dir available_filter_functions; then
+if grep -q eventfs_create_dir available_filter_functions; then
+ DIR_NAME="eventfs_create_dir"
+elif grep -q eventfs_add_dir available_filter_functions; then
DIR_NAME="eventfs_add_dir"
else
DIR_NAME="tracefs_create_dir"
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
index a202b2ea4baf..93217d459556 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
@@ -31,13 +31,18 @@ mips*)
loongarch*)
ARG1=%r4
;;
+riscv*)
+ ARG1=%a0
+;;
*)
echo "Please implement other architecture here"
exit_untested
esac
: "Test get argument (1)"
-if grep -q eventfs_add_dir available_filter_functions; then
+if grep -q eventfs_create_dir available_filter_functions; then
+ DIR_NAME="eventfs_create_dir"
+elif grep -q eventfs_add_dir available_filter_functions; then
DIR_NAME="eventfs_add_dir"
else
DIR_NAME="tracefs_create_dir"
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
index 1df61e13a812..8f1292ad80ff 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
@@ -44,6 +44,10 @@ loongarch*)
GOODREG=%r4
BADREG=%r12
;;
+riscv*)
+ GOODREG=%a0
+ BADREG=%a8
+;;
*)
echo "Please implement other architecture here"
exit_untested
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_non_uniq_symbol.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_non_uniq_symbol.tc
new file mode 100644
index 000000000000..bc9514428dba
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_non_uniq_symbol.tc
@@ -0,0 +1,13 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Test failure of registering kprobe on non unique symbol
+# requires: kprobe_events
+
+SYMBOL='name_show'
+
+# We skip this test on kernel where SYMBOL is unique or does not exist.
+if [ "$(grep -c -E "[[:alnum:]]+ t ${SYMBOL}" /proc/kallsyms)" -le '1' ]; then
+ exit_unsupported
+fi
+
+! echo "p:test_non_unique ${SYMBOL}" > kprobe_events
diff --git a/tools/testing/selftests/hid/Makefile b/tools/testing/selftests/hid/Makefile
index 2e986cbf1a46..2b5ea18bde38 100644
--- a/tools/testing/selftests/hid/Makefile
+++ b/tools/testing/selftests/hid/Makefile
@@ -21,7 +21,9 @@ CXX ?= $(CROSS_COMPILE)g++
HOSTPKG_CONFIG := pkg-config
-CFLAGS += -g -O0 -rdynamic -Wall -Werror -I$(KHDR_INCLUDES) -I$(OUTPUT)
+CFLAGS += -g -O0 -rdynamic -Wall -Werror -I$(OUTPUT)
+CFLAGS += -I$(OUTPUT)/tools/include
+
LDLIBS += -lelf -lz -lrt -lpthread
# Silence some warnings when compiled with clang
@@ -65,7 +67,6 @@ BPFTOOLDIR := $(TOOLSDIR)/bpf/bpftool
SCRATCH_DIR := $(OUTPUT)/tools
BUILD_DIR := $(SCRATCH_DIR)/build
INCLUDE_DIR := $(SCRATCH_DIR)/include
-KHDR_INCLUDES := $(SCRATCH_DIR)/uapi/include
BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a
ifneq ($(CROSS_COMPILE),)
HOST_BUILD_DIR := $(BUILD_DIR)/host
@@ -151,9 +152,6 @@ else
$(Q)cp "$(VMLINUX_H)" $@
endif
-$(KHDR_INCLUDES)/linux/hid.h: $(top_srcdir)/include/uapi/linux/hid.h
- $(MAKE) -C $(top_srcdir) INSTALL_HDR_PATH=$(SCRATCH_DIR)/uapi headers_install
-
$(RESOLVE_BTFIDS): $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/resolve_btfids \
$(TOOLSDIR)/bpf/resolve_btfids/main.c \
$(TOOLSDIR)/lib/rbtree.c \
@@ -231,7 +229,7 @@ $(BPF_SKELS): %.skel.h: %.bpf.o $(BPFTOOL) | $(OUTPUT)
$(Q)$(BPFTOOL) gen object $(<:.o=.linked1.o) $<
$(Q)$(BPFTOOL) gen skeleton $(<:.o=.linked1.o) name $(notdir $(<:.bpf.o=)) > $@
-$(OUTPUT)/%.o: %.c $(BPF_SKELS) $(KHDR_INCLUDES)/linux/hid.h
+$(OUTPUT)/%.o: %.c $(BPF_SKELS)
$(call msg,CC,,$@)
$(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@
diff --git a/tools/testing/selftests/hid/progs/hid.c b/tools/testing/selftests/hid/progs/hid.c
index 88c593f753b5..1e558826b809 100644
--- a/tools/testing/selftests/hid/progs/hid.c
+++ b/tools/testing/selftests/hid/progs/hid.c
@@ -1,8 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2022 Red hat */
-#include "vmlinux.h"
-#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
#include "hid_bpf_helpers.h"
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h
index 4fff31dbe0e7..65e657ac1198 100644
--- a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h
+++ b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h
@@ -5,6 +5,83 @@
#ifndef __HID_BPF_HELPERS_H
#define __HID_BPF_HELPERS_H
+/* "undefine" structs and enums in vmlinux.h, because we "override" them below */
+#define hid_bpf_ctx hid_bpf_ctx___not_used
+#define hid_report_type hid_report_type___not_used
+#define hid_class_request hid_class_request___not_used
+#define hid_bpf_attach_flags hid_bpf_attach_flags___not_used
+#define HID_INPUT_REPORT HID_INPUT_REPORT___not_used
+#define HID_OUTPUT_REPORT HID_OUTPUT_REPORT___not_used
+#define HID_FEATURE_REPORT HID_FEATURE_REPORT___not_used
+#define HID_REPORT_TYPES HID_REPORT_TYPES___not_used
+#define HID_REQ_GET_REPORT HID_REQ_GET_REPORT___not_used
+#define HID_REQ_GET_IDLE HID_REQ_GET_IDLE___not_used
+#define HID_REQ_GET_PROTOCOL HID_REQ_GET_PROTOCOL___not_used
+#define HID_REQ_SET_REPORT HID_REQ_SET_REPORT___not_used
+#define HID_REQ_SET_IDLE HID_REQ_SET_IDLE___not_used
+#define HID_REQ_SET_PROTOCOL HID_REQ_SET_PROTOCOL___not_used
+#define HID_BPF_FLAG_NONE HID_BPF_FLAG_NONE___not_used
+#define HID_BPF_FLAG_INSERT_HEAD HID_BPF_FLAG_INSERT_HEAD___not_used
+#define HID_BPF_FLAG_MAX HID_BPF_FLAG_MAX___not_used
+
+#include "vmlinux.h"
+
+#undef hid_bpf_ctx
+#undef hid_report_type
+#undef hid_class_request
+#undef hid_bpf_attach_flags
+#undef HID_INPUT_REPORT
+#undef HID_OUTPUT_REPORT
+#undef HID_FEATURE_REPORT
+#undef HID_REPORT_TYPES
+#undef HID_REQ_GET_REPORT
+#undef HID_REQ_GET_IDLE
+#undef HID_REQ_GET_PROTOCOL
+#undef HID_REQ_SET_REPORT
+#undef HID_REQ_SET_IDLE
+#undef HID_REQ_SET_PROTOCOL
+#undef HID_BPF_FLAG_NONE
+#undef HID_BPF_FLAG_INSERT_HEAD
+#undef HID_BPF_FLAG_MAX
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <linux/const.h>
+
+enum hid_report_type {
+ HID_INPUT_REPORT = 0,
+ HID_OUTPUT_REPORT = 1,
+ HID_FEATURE_REPORT = 2,
+
+ HID_REPORT_TYPES,
+};
+
+struct hid_bpf_ctx {
+ __u32 index;
+ const struct hid_device *hid;
+ __u32 allocated_size;
+ enum hid_report_type report_type;
+ union {
+ __s32 retval;
+ __s32 size;
+ };
+} __attribute__((preserve_access_index));
+
+enum hid_class_request {
+ HID_REQ_GET_REPORT = 0x01,
+ HID_REQ_GET_IDLE = 0x02,
+ HID_REQ_GET_PROTOCOL = 0x03,
+ HID_REQ_SET_REPORT = 0x09,
+ HID_REQ_SET_IDLE = 0x0A,
+ HID_REQ_SET_PROTOCOL = 0x0B,
+};
+
+enum hid_bpf_attach_flags {
+ HID_BPF_FLAG_NONE = 0,
+ HID_BPF_FLAG_INSERT_HEAD = _BITUL(0),
+ HID_BPF_FLAG_MAX,
+};
+
/* following are kfuncs exported by HID for HID-BPF */
extern __u8 *hid_bpf_get_data(struct hid_bpf_ctx *ctx,
unsigned int offset,
diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c
index 33d08600be13..6ed328c863c4 100644
--- a/tools/testing/selftests/iommu/iommufd.c
+++ b/tools/testing/selftests/iommu/iommufd.c
@@ -86,12 +86,13 @@ TEST_F(iommufd, cmd_fail)
TEST_F(iommufd, cmd_length)
{
-#define TEST_LENGTH(_struct, _ioctl) \
+#define TEST_LENGTH(_struct, _ioctl, _last) \
{ \
+ size_t min_size = offsetofend(struct _struct, _last); \
struct { \
struct _struct cmd; \
uint8_t extra; \
- } cmd = { .cmd = { .size = sizeof(struct _struct) - 1 }, \
+ } cmd = { .cmd = { .size = min_size - 1 }, \
.extra = UINT8_MAX }; \
int old_errno; \
int rc; \
@@ -112,16 +113,19 @@ TEST_F(iommufd, cmd_length)
} \
}
- TEST_LENGTH(iommu_destroy, IOMMU_DESTROY);
- TEST_LENGTH(iommu_hw_info, IOMMU_GET_HW_INFO);
- TEST_LENGTH(iommu_ioas_alloc, IOMMU_IOAS_ALLOC);
- TEST_LENGTH(iommu_ioas_iova_ranges, IOMMU_IOAS_IOVA_RANGES);
- TEST_LENGTH(iommu_ioas_allow_iovas, IOMMU_IOAS_ALLOW_IOVAS);
- TEST_LENGTH(iommu_ioas_map, IOMMU_IOAS_MAP);
- TEST_LENGTH(iommu_ioas_copy, IOMMU_IOAS_COPY);
- TEST_LENGTH(iommu_ioas_unmap, IOMMU_IOAS_UNMAP);
- TEST_LENGTH(iommu_option, IOMMU_OPTION);
- TEST_LENGTH(iommu_vfio_ioas, IOMMU_VFIO_IOAS);
+ TEST_LENGTH(iommu_destroy, IOMMU_DESTROY, id);
+ TEST_LENGTH(iommu_hw_info, IOMMU_GET_HW_INFO, __reserved);
+ TEST_LENGTH(iommu_hwpt_alloc, IOMMU_HWPT_ALLOC, __reserved);
+ TEST_LENGTH(iommu_ioas_alloc, IOMMU_IOAS_ALLOC, out_ioas_id);
+ TEST_LENGTH(iommu_ioas_iova_ranges, IOMMU_IOAS_IOVA_RANGES,
+ out_iova_alignment);
+ TEST_LENGTH(iommu_ioas_allow_iovas, IOMMU_IOAS_ALLOW_IOVAS,
+ allowed_iovas);
+ TEST_LENGTH(iommu_ioas_map, IOMMU_IOAS_MAP, iova);
+ TEST_LENGTH(iommu_ioas_copy, IOMMU_IOAS_COPY, src_iova);
+ TEST_LENGTH(iommu_ioas_unmap, IOMMU_IOAS_UNMAP, length);
+ TEST_LENGTH(iommu_option, IOMMU_OPTION, val64);
+ TEST_LENGTH(iommu_vfio_ioas, IOMMU_VFIO_IOAS, __reserved);
#undef TEST_LENGTH
}
@@ -260,6 +264,121 @@ TEST_F(iommufd_ioas, ioas_destroy)
}
}
+TEST_F(iommufd_ioas, alloc_hwpt_nested)
+{
+ const uint32_t min_data_len =
+ offsetofend(struct iommu_hwpt_selftest, iotlb);
+ struct iommu_hwpt_selftest data = {
+ .iotlb = IOMMU_TEST_IOTLB_DEFAULT,
+ };
+ uint32_t nested_hwpt_id[2] = {};
+ uint32_t parent_hwpt_id = 0;
+ uint32_t parent_hwpt_id_not_work = 0;
+ uint32_t test_hwpt_id = 0;
+
+ if (self->device_id) {
+ /* Negative tests */
+ test_err_hwpt_alloc(ENOENT, self->ioas_id, self->device_id, 0,
+ &test_hwpt_id);
+ test_err_hwpt_alloc(EINVAL, self->device_id, self->device_id, 0,
+ &test_hwpt_id);
+
+ test_cmd_hwpt_alloc(self->device_id, self->ioas_id,
+ IOMMU_HWPT_ALLOC_NEST_PARENT,
+ &parent_hwpt_id);
+
+ test_cmd_hwpt_alloc(self->device_id, self->ioas_id, 0,
+ &parent_hwpt_id_not_work);
+
+ /* Negative nested tests */
+ test_err_hwpt_alloc_nested(EINVAL, self->device_id,
+ parent_hwpt_id, 0,
+ &nested_hwpt_id[0],
+ IOMMU_HWPT_DATA_NONE, &data,
+ sizeof(data));
+ test_err_hwpt_alloc_nested(EOPNOTSUPP, self->device_id,
+ parent_hwpt_id, 0,
+ &nested_hwpt_id[0],
+ IOMMU_HWPT_DATA_SELFTEST + 1, &data,
+ sizeof(data));
+ test_err_hwpt_alloc_nested(EINVAL, self->device_id,
+ parent_hwpt_id, 0,
+ &nested_hwpt_id[0],
+ IOMMU_HWPT_DATA_SELFTEST, &data,
+ min_data_len - 1);
+ test_err_hwpt_alloc_nested(EFAULT, self->device_id,
+ parent_hwpt_id, 0,
+ &nested_hwpt_id[0],
+ IOMMU_HWPT_DATA_SELFTEST, NULL,
+ sizeof(data));
+ test_err_hwpt_alloc_nested(
+ EOPNOTSUPP, self->device_id, parent_hwpt_id,
+ IOMMU_HWPT_ALLOC_NEST_PARENT, &nested_hwpt_id[0],
+ IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data));
+ test_err_hwpt_alloc_nested(EINVAL, self->device_id,
+ parent_hwpt_id_not_work, 0,
+ &nested_hwpt_id[0],
+ IOMMU_HWPT_DATA_SELFTEST, &data,
+ sizeof(data));
+
+ /* Allocate two nested hwpts sharing one common parent hwpt */
+ test_cmd_hwpt_alloc_nested(self->device_id, parent_hwpt_id, 0,
+ &nested_hwpt_id[0],
+ IOMMU_HWPT_DATA_SELFTEST, &data,
+ sizeof(data));
+ test_cmd_hwpt_alloc_nested(self->device_id, parent_hwpt_id, 0,
+ &nested_hwpt_id[1],
+ IOMMU_HWPT_DATA_SELFTEST, &data,
+ sizeof(data));
+
+ /* Negative test: a nested hwpt on top of a nested hwpt */
+ test_err_hwpt_alloc_nested(EINVAL, self->device_id,
+ nested_hwpt_id[0], 0, &test_hwpt_id,
+ IOMMU_HWPT_DATA_SELFTEST, &data,
+ sizeof(data));
+ /* Negative test: parent hwpt now cannot be freed */
+ EXPECT_ERRNO(EBUSY,
+ _test_ioctl_destroy(self->fd, parent_hwpt_id));
+
+ /* Attach device to nested_hwpt_id[0] that then will be busy */
+ test_cmd_mock_domain_replace(self->stdev_id, nested_hwpt_id[0]);
+ EXPECT_ERRNO(EBUSY,
+ _test_ioctl_destroy(self->fd, nested_hwpt_id[0]));
+
+ /* Switch from nested_hwpt_id[0] to nested_hwpt_id[1] */
+ test_cmd_mock_domain_replace(self->stdev_id, nested_hwpt_id[1]);
+ EXPECT_ERRNO(EBUSY,
+ _test_ioctl_destroy(self->fd, nested_hwpt_id[1]));
+ test_ioctl_destroy(nested_hwpt_id[0]);
+
+ /* Detach from nested_hwpt_id[1] and destroy it */
+ test_cmd_mock_domain_replace(self->stdev_id, parent_hwpt_id);
+ test_ioctl_destroy(nested_hwpt_id[1]);
+
+ /* Detach from the parent hw_pagetable and destroy it */
+ test_cmd_mock_domain_replace(self->stdev_id, self->ioas_id);
+ test_ioctl_destroy(parent_hwpt_id);
+ test_ioctl_destroy(parent_hwpt_id_not_work);
+ } else {
+ test_err_hwpt_alloc(ENOENT, self->device_id, self->ioas_id, 0,
+ &parent_hwpt_id);
+ test_err_hwpt_alloc_nested(ENOENT, self->device_id,
+ parent_hwpt_id, 0,
+ &nested_hwpt_id[0],
+ IOMMU_HWPT_DATA_SELFTEST, &data,
+ sizeof(data));
+ test_err_hwpt_alloc_nested(ENOENT, self->device_id,
+ parent_hwpt_id, 0,
+ &nested_hwpt_id[1],
+ IOMMU_HWPT_DATA_SELFTEST, &data,
+ sizeof(data));
+ test_err_mock_domain_replace(ENOENT, self->stdev_id,
+ nested_hwpt_id[0]);
+ test_err_mock_domain_replace(ENOENT, self->stdev_id,
+ nested_hwpt_id[1]);
+ }
+}
+
TEST_F(iommufd_ioas, hwpt_attach)
{
/* Create a device attached directly to a hwpt */
@@ -1404,16 +1523,242 @@ TEST_F(iommufd_mock_domain, alloc_hwpt)
int i;
for (i = 0; i != variant->mock_domains; i++) {
+ uint32_t hwpt_id[2];
uint32_t stddev_id;
- uint32_t hwpt_id;
- test_cmd_hwpt_alloc(self->idev_ids[0], self->ioas_id, &hwpt_id);
- test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL);
+ test_err_hwpt_alloc(EOPNOTSUPP,
+ self->idev_ids[i], self->ioas_id,
+ ~IOMMU_HWPT_ALLOC_NEST_PARENT, &hwpt_id[0]);
+ test_cmd_hwpt_alloc(self->idev_ids[i], self->ioas_id,
+ 0, &hwpt_id[0]);
+ test_cmd_hwpt_alloc(self->idev_ids[i], self->ioas_id,
+ IOMMU_HWPT_ALLOC_NEST_PARENT, &hwpt_id[1]);
+
+ /* Do a hw_pagetable rotation test */
+ test_cmd_mock_domain_replace(self->stdev_ids[i], hwpt_id[0]);
+ EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, hwpt_id[0]));
+ test_cmd_mock_domain_replace(self->stdev_ids[i], hwpt_id[1]);
+ EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, hwpt_id[1]));
+ test_cmd_mock_domain_replace(self->stdev_ids[i], self->ioas_id);
+ test_ioctl_destroy(hwpt_id[1]);
+
+ test_cmd_mock_domain(hwpt_id[0], &stddev_id, NULL, NULL);
test_ioctl_destroy(stddev_id);
- test_ioctl_destroy(hwpt_id);
+ test_ioctl_destroy(hwpt_id[0]);
}
}
+FIXTURE(iommufd_dirty_tracking)
+{
+ int fd;
+ uint32_t ioas_id;
+ uint32_t hwpt_id;
+ uint32_t stdev_id;
+ uint32_t idev_id;
+ unsigned long page_size;
+ unsigned long bitmap_size;
+ void *bitmap;
+ void *buffer;
+};
+
+FIXTURE_VARIANT(iommufd_dirty_tracking)
+{
+ unsigned long buffer_size;
+};
+
+FIXTURE_SETUP(iommufd_dirty_tracking)
+{
+ void *vrc;
+ int rc;
+
+ self->fd = open("/dev/iommu", O_RDWR);
+ ASSERT_NE(-1, self->fd);
+
+ rc = posix_memalign(&self->buffer, HUGEPAGE_SIZE, variant->buffer_size);
+ if (rc || !self->buffer) {
+ SKIP(return, "Skipping buffer_size=%lu due to errno=%d",
+ variant->buffer_size, rc);
+ }
+
+ assert((uintptr_t)self->buffer % HUGEPAGE_SIZE == 0);
+ vrc = mmap(self->buffer, variant->buffer_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
+ assert(vrc == self->buffer);
+
+ self->page_size = MOCK_PAGE_SIZE;
+ self->bitmap_size =
+ variant->buffer_size / self->page_size / BITS_PER_BYTE;
+
+ /* Provision with an extra (MOCK_PAGE_SIZE) for the unaligned case */
+ rc = posix_memalign(&self->bitmap, PAGE_SIZE,
+ self->bitmap_size + MOCK_PAGE_SIZE);
+ assert(!rc);
+ assert(self->bitmap);
+ assert((uintptr_t)self->bitmap % PAGE_SIZE == 0);
+
+ test_ioctl_ioas_alloc(&self->ioas_id);
+ test_cmd_mock_domain(self->ioas_id, &self->stdev_id, &self->hwpt_id,
+ &self->idev_id);
+}
+
+FIXTURE_TEARDOWN(iommufd_dirty_tracking)
+{
+ munmap(self->buffer, variant->buffer_size);
+ munmap(self->bitmap, self->bitmap_size);
+ teardown_iommufd(self->fd, _metadata);
+}
+
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128k)
+{
+ /* one u32 index bitmap */
+ .buffer_size = 128UL * 1024UL,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256k)
+{
+ /* one u64 index bitmap */
+ .buffer_size = 256UL * 1024UL,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty640k)
+{
+ /* two u64 index and trailing end bitmap */
+ .buffer_size = 640UL * 1024UL,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M)
+{
+ /* 4K bitmap (128M IOVA range) */
+ .buffer_size = 128UL * 1024UL * 1024UL,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256M)
+{
+ /* 8K bitmap (256M IOVA range) */
+ .buffer_size = 256UL * 1024UL * 1024UL,
+};
+
+TEST_F(iommufd_dirty_tracking, enforce_dirty)
+{
+ uint32_t ioas_id, stddev_id, idev_id;
+ uint32_t hwpt_id, _hwpt_id;
+ uint32_t dev_flags;
+
+ /* Regular case */
+ dev_flags = MOCK_FLAGS_DEVICE_NO_DIRTY;
+ test_cmd_hwpt_alloc(self->idev_id, self->ioas_id,
+ IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id);
+ test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL);
+ test_err_mock_domain_flags(EINVAL, hwpt_id, dev_flags, &stddev_id,
+ NULL);
+ test_ioctl_destroy(stddev_id);
+ test_ioctl_destroy(hwpt_id);
+
+ /* IOMMU device does not support dirty tracking */
+ test_ioctl_ioas_alloc(&ioas_id);
+ test_cmd_mock_domain_flags(ioas_id, dev_flags, &stddev_id, &_hwpt_id,
+ &idev_id);
+ test_err_hwpt_alloc(EOPNOTSUPP, idev_id, ioas_id,
+ IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id);
+ test_ioctl_destroy(stddev_id);
+}
+
+TEST_F(iommufd_dirty_tracking, set_dirty_tracking)
+{
+ uint32_t stddev_id;
+ uint32_t hwpt_id;
+
+ test_cmd_hwpt_alloc(self->idev_id, self->ioas_id,
+ IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id);
+ test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL);
+ test_cmd_set_dirty_tracking(hwpt_id, true);
+ test_cmd_set_dirty_tracking(hwpt_id, false);
+
+ test_ioctl_destroy(stddev_id);
+ test_ioctl_destroy(hwpt_id);
+}
+
+TEST_F(iommufd_dirty_tracking, device_dirty_capability)
+{
+ uint32_t caps = 0;
+ uint32_t stddev_id;
+ uint32_t hwpt_id;
+
+ test_cmd_hwpt_alloc(self->idev_id, self->ioas_id, 0, &hwpt_id);
+ test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL);
+ test_cmd_get_hw_capabilities(self->idev_id, caps,
+ IOMMU_HW_CAP_DIRTY_TRACKING);
+ ASSERT_EQ(IOMMU_HW_CAP_DIRTY_TRACKING,
+ caps & IOMMU_HW_CAP_DIRTY_TRACKING);
+
+ test_ioctl_destroy(stddev_id);
+ test_ioctl_destroy(hwpt_id);
+}
+
+TEST_F(iommufd_dirty_tracking, get_dirty_bitmap)
+{
+ uint32_t stddev_id;
+ uint32_t hwpt_id;
+ uint32_t ioas_id;
+
+ test_ioctl_ioas_alloc(&ioas_id);
+ test_ioctl_ioas_map_fixed_id(ioas_id, self->buffer,
+ variant->buffer_size, MOCK_APERTURE_START);
+
+ test_cmd_hwpt_alloc(self->idev_id, ioas_id,
+ IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id);
+ test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL);
+
+ test_cmd_set_dirty_tracking(hwpt_id, true);
+
+ test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size,
+ MOCK_APERTURE_START, self->page_size,
+ self->bitmap, self->bitmap_size, 0, _metadata);
+
+ /* PAGE_SIZE unaligned bitmap */
+ test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size,
+ MOCK_APERTURE_START, self->page_size,
+ self->bitmap + MOCK_PAGE_SIZE,
+ self->bitmap_size, 0, _metadata);
+
+ test_ioctl_destroy(stddev_id);
+ test_ioctl_destroy(hwpt_id);
+}
+
+TEST_F(iommufd_dirty_tracking, get_dirty_bitmap_no_clear)
+{
+ uint32_t stddev_id;
+ uint32_t hwpt_id;
+ uint32_t ioas_id;
+
+ test_ioctl_ioas_alloc(&ioas_id);
+ test_ioctl_ioas_map_fixed_id(ioas_id, self->buffer,
+ variant->buffer_size, MOCK_APERTURE_START);
+
+ test_cmd_hwpt_alloc(self->idev_id, ioas_id,
+ IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id);
+ test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL);
+
+ test_cmd_set_dirty_tracking(hwpt_id, true);
+
+ test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size,
+ MOCK_APERTURE_START, self->page_size,
+ self->bitmap, self->bitmap_size,
+ IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR,
+ _metadata);
+
+ /* Unaligned bitmap */
+ test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size,
+ MOCK_APERTURE_START, self->page_size,
+ self->bitmap + MOCK_PAGE_SIZE,
+ self->bitmap_size,
+ IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR,
+ _metadata);
+
+ test_ioctl_destroy(stddev_id);
+ test_ioctl_destroy(hwpt_id);
+}
+
/* VFIO compatibility IOCTLs */
TEST_F(iommufd, simple_ioctls)
@@ -1729,7 +2074,7 @@ TEST_F(vfio_compat_mock_domain, map)
ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd));
ASSERT_EQ(BUFFER_SIZE, unmap_cmd.size);
- /* UNMAP_FLAG_ALL requres 0 iova/size */
+ /* UNMAP_FLAG_ALL requires 0 iova/size */
ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd));
unmap_cmd.flags = VFIO_DMA_UNMAP_FLAG_ALL;
EXPECT_ERRNO(EINVAL, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd));
diff --git a/tools/testing/selftests/iommu/iommufd_fail_nth.c b/tools/testing/selftests/iommu/iommufd_fail_nth.c
index a220ca2a689d..f590417cd67a 100644
--- a/tools/testing/selftests/iommu/iommufd_fail_nth.c
+++ b/tools/testing/selftests/iommu/iommufd_fail_nth.c
@@ -105,7 +105,7 @@ static bool fail_nth_next(struct __test_metadata *_metadata,
/*
* This is just an arbitrary limit based on the current kernel
- * situation. Changes in the kernel can dramtically change the number of
+ * situation. Changes in the kernel can dramatically change the number of
* required fault injection sites, so if this hits it doesn't
* necessarily mean a test failure, just that the limit has to be made
* bigger.
@@ -612,10 +612,11 @@ TEST_FAIL_NTH(basic_fail_nth, device)
&idev_id))
return -1;
- if (_test_cmd_get_hw_info(self->fd, idev_id, &info, sizeof(info)))
+ if (_test_cmd_get_hw_info(self->fd, idev_id, &info, sizeof(info), NULL))
return -1;
- if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, &hwpt_id))
+ if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0, &hwpt_id,
+ IOMMU_HWPT_DATA_NONE, 0, 0))
return -1;
if (_test_cmd_mock_domain_replace(self->fd, stdev_id, ioas_id2, NULL))
diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h
index e0753d03ecaa..050e9751321c 100644
--- a/tools/testing/selftests/iommu/iommufd_utils.h
+++ b/tools/testing/selftests/iommu/iommufd_utils.h
@@ -16,6 +16,25 @@
/* Hack to make assertions more readable */
#define _IOMMU_TEST_CMD(x) IOMMU_TEST_CMD
+/* Imported from include/asm-generic/bitops/generic-non-atomic.h */
+#define BITS_PER_BYTE 8
+#define BITS_PER_LONG __BITS_PER_LONG
+#define BIT_MASK(nr) (1UL << ((nr) % __BITS_PER_LONG))
+#define BIT_WORD(nr) ((nr) / __BITS_PER_LONG)
+
+static inline void set_bit(unsigned int nr, unsigned long *addr)
+{
+ unsigned long mask = BIT_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+
+ *p |= mask;
+}
+
+static inline bool test_bit(unsigned int nr, unsigned long *addr)
+{
+ return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG - 1)));
+}
+
static void *buffer;
static unsigned long BUFFER_SIZE;
@@ -74,6 +93,38 @@ static int _test_cmd_mock_domain(int fd, unsigned int ioas_id, __u32 *stdev_id,
EXPECT_ERRNO(_errno, _test_cmd_mock_domain(self->fd, ioas_id, \
stdev_id, hwpt_id, NULL))
+static int _test_cmd_mock_domain_flags(int fd, unsigned int ioas_id,
+ __u32 stdev_flags, __u32 *stdev_id,
+ __u32 *hwpt_id, __u32 *idev_id)
+{
+ struct iommu_test_cmd cmd = {
+ .size = sizeof(cmd),
+ .op = IOMMU_TEST_OP_MOCK_DOMAIN_FLAGS,
+ .id = ioas_id,
+ .mock_domain_flags = { .dev_flags = stdev_flags },
+ };
+ int ret;
+
+ ret = ioctl(fd, IOMMU_TEST_CMD, &cmd);
+ if (ret)
+ return ret;
+ if (stdev_id)
+ *stdev_id = cmd.mock_domain_flags.out_stdev_id;
+ assert(cmd.id != 0);
+ if (hwpt_id)
+ *hwpt_id = cmd.mock_domain_flags.out_hwpt_id;
+ if (idev_id)
+ *idev_id = cmd.mock_domain_flags.out_idev_id;
+ return 0;
+}
+#define test_cmd_mock_domain_flags(ioas_id, flags, stdev_id, hwpt_id, idev_id) \
+ ASSERT_EQ(0, _test_cmd_mock_domain_flags(self->fd, ioas_id, flags, \
+ stdev_id, hwpt_id, idev_id))
+#define test_err_mock_domain_flags(_errno, ioas_id, flags, stdev_id, hwpt_id) \
+ EXPECT_ERRNO(_errno, \
+ _test_cmd_mock_domain_flags(self->fd, ioas_id, flags, \
+ stdev_id, hwpt_id, NULL))
+
static int _test_cmd_mock_domain_replace(int fd, __u32 stdev_id, __u32 pt_id,
__u32 *hwpt_id)
{
@@ -103,12 +154,17 @@ static int _test_cmd_mock_domain_replace(int fd, __u32 stdev_id, __u32 pt_id,
pt_id, NULL))
static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id,
- __u32 *hwpt_id)
+ __u32 flags, __u32 *hwpt_id, __u32 data_type,
+ void *data, size_t data_len)
{
struct iommu_hwpt_alloc cmd = {
.size = sizeof(cmd),
+ .flags = flags,
.dev_id = device_id,
.pt_id = pt_id,
+ .data_type = data_type,
+ .data_len = data_len,
+ .data_uptr = (uint64_t)data,
};
int ret;
@@ -120,8 +176,24 @@ static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id,
return 0;
}
-#define test_cmd_hwpt_alloc(device_id, pt_id, hwpt_id) \
- ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, hwpt_id))
+#define test_cmd_hwpt_alloc(device_id, pt_id, flags, hwpt_id) \
+ ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, flags, \
+ hwpt_id, IOMMU_HWPT_DATA_NONE, NULL, \
+ 0))
+#define test_err_hwpt_alloc(_errno, device_id, pt_id, flags, hwpt_id) \
+ EXPECT_ERRNO(_errno, _test_cmd_hwpt_alloc( \
+ self->fd, device_id, pt_id, flags, \
+ hwpt_id, IOMMU_HWPT_DATA_NONE, NULL, 0))
+
+#define test_cmd_hwpt_alloc_nested(device_id, pt_id, flags, hwpt_id, \
+ data_type, data, data_len) \
+ ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, flags, \
+ hwpt_id, data_type, data, data_len))
+#define test_err_hwpt_alloc_nested(_errno, device_id, pt_id, flags, hwpt_id, \
+ data_type, data, data_len) \
+ EXPECT_ERRNO(_errno, \
+ _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, flags, \
+ hwpt_id, data_type, data, data_len))
static int _test_cmd_access_replace_ioas(int fd, __u32 access_id,
unsigned int ioas_id)
@@ -142,6 +214,126 @@ static int _test_cmd_access_replace_ioas(int fd, __u32 access_id,
#define test_cmd_access_replace_ioas(access_id, ioas_id) \
ASSERT_EQ(0, _test_cmd_access_replace_ioas(self->fd, access_id, ioas_id))
+static int _test_cmd_set_dirty_tracking(int fd, __u32 hwpt_id, bool enabled)
+{
+ struct iommu_hwpt_set_dirty_tracking cmd = {
+ .size = sizeof(cmd),
+ .flags = enabled ? IOMMU_HWPT_DIRTY_TRACKING_ENABLE : 0,
+ .hwpt_id = hwpt_id,
+ };
+ int ret;
+
+ ret = ioctl(fd, IOMMU_HWPT_SET_DIRTY_TRACKING, &cmd);
+ if (ret)
+ return -errno;
+ return 0;
+}
+#define test_cmd_set_dirty_tracking(hwpt_id, enabled) \
+ ASSERT_EQ(0, _test_cmd_set_dirty_tracking(self->fd, hwpt_id, enabled))
+
+static int _test_cmd_get_dirty_bitmap(int fd, __u32 hwpt_id, size_t length,
+ __u64 iova, size_t page_size,
+ __u64 *bitmap, __u32 flags)
+{
+ struct iommu_hwpt_get_dirty_bitmap cmd = {
+ .size = sizeof(cmd),
+ .hwpt_id = hwpt_id,
+ .flags = flags,
+ .iova = iova,
+ .length = length,
+ .page_size = page_size,
+ .data = (uintptr_t)bitmap,
+ };
+ int ret;
+
+ ret = ioctl(fd, IOMMU_HWPT_GET_DIRTY_BITMAP, &cmd);
+ if (ret)
+ return ret;
+ return 0;
+}
+
+#define test_cmd_get_dirty_bitmap(fd, hwpt_id, length, iova, page_size, \
+ bitmap, flags) \
+ ASSERT_EQ(0, _test_cmd_get_dirty_bitmap(fd, hwpt_id, length, iova, \
+ page_size, bitmap, flags))
+
+static int _test_cmd_mock_domain_set_dirty(int fd, __u32 hwpt_id, size_t length,
+ __u64 iova, size_t page_size,
+ __u64 *bitmap, __u64 *dirty)
+{
+ struct iommu_test_cmd cmd = {
+ .size = sizeof(cmd),
+ .op = IOMMU_TEST_OP_DIRTY,
+ .id = hwpt_id,
+ .dirty = {
+ .iova = iova,
+ .length = length,
+ .page_size = page_size,
+ .uptr = (uintptr_t)bitmap,
+ }
+ };
+ int ret;
+
+ ret = ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_DIRTY), &cmd);
+ if (ret)
+ return -ret;
+ if (dirty)
+ *dirty = cmd.dirty.out_nr_dirty;
+ return 0;
+}
+
+#define test_cmd_mock_domain_set_dirty(fd, hwpt_id, length, iova, page_size, \
+ bitmap, nr) \
+ ASSERT_EQ(0, \
+ _test_cmd_mock_domain_set_dirty(fd, hwpt_id, length, iova, \
+ page_size, bitmap, nr))
+
+static int _test_mock_dirty_bitmaps(int fd, __u32 hwpt_id, size_t length,
+ __u64 iova, size_t page_size, __u64 *bitmap,
+ __u64 bitmap_size, __u32 flags,
+ struct __test_metadata *_metadata)
+{
+ unsigned long i, count, nbits = bitmap_size * BITS_PER_BYTE;
+ unsigned long nr = nbits / 2;
+ __u64 out_dirty = 0;
+
+ /* Mark all even bits as dirty in the mock domain */
+ for (count = 0, i = 0; i < nbits; count += !(i % 2), i++)
+ if (!(i % 2))
+ set_bit(i, (unsigned long *)bitmap);
+ ASSERT_EQ(nr, count);
+
+ test_cmd_mock_domain_set_dirty(fd, hwpt_id, length, iova, page_size,
+ bitmap, &out_dirty);
+ ASSERT_EQ(nr, out_dirty);
+
+ /* Expect all even bits as dirty in the user bitmap */
+ memset(bitmap, 0, bitmap_size);
+ test_cmd_get_dirty_bitmap(fd, hwpt_id, length, iova, page_size, bitmap,
+ flags);
+ for (count = 0, i = 0; i < nbits; count += !(i % 2), i++)
+ ASSERT_EQ(!(i % 2), test_bit(i, (unsigned long *)bitmap));
+ ASSERT_EQ(count, out_dirty);
+
+ memset(bitmap, 0, bitmap_size);
+ test_cmd_get_dirty_bitmap(fd, hwpt_id, length, iova, page_size, bitmap,
+ flags);
+
+ /* It as read already -- expect all zeroes */
+ for (i = 0; i < nbits; i++) {
+ ASSERT_EQ(!(i % 2) && (flags &
+ IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR),
+ test_bit(i, (unsigned long *)bitmap));
+ }
+
+ return 0;
+}
+#define test_mock_dirty_bitmaps(hwpt_id, length, iova, page_size, bitmap, \
+ bitmap_size, flags, _metadata) \
+ ASSERT_EQ(0, _test_mock_dirty_bitmaps(self->fd, hwpt_id, length, iova, \
+ page_size, bitmap, bitmap_size, \
+ flags, _metadata))
+
static int _test_cmd_create_access(int fd, unsigned int ioas_id,
__u32 *access_id, unsigned int flags)
{
@@ -266,6 +458,17 @@ static int _test_ioctl_ioas_map(int fd, unsigned int ioas_id, void *buffer,
IOMMU_IOAS_MAP_READABLE)); \
})
+#define test_ioctl_ioas_map_fixed_id(ioas_id, buffer, length, iova) \
+ ({ \
+ __u64 __iova = iova; \
+ ASSERT_EQ(0, \
+ _test_ioctl_ioas_map( \
+ self->fd, ioas_id, buffer, length, &__iova, \
+ IOMMU_IOAS_MAP_FIXED_IOVA | \
+ IOMMU_IOAS_MAP_WRITEABLE | \
+ IOMMU_IOAS_MAP_READABLE)); \
+ })
+
#define test_err_ioctl_ioas_map_fixed(_errno, buffer, length, iova) \
({ \
__u64 __iova = iova; \
@@ -354,8 +557,8 @@ static void teardown_iommufd(int fd, struct __test_metadata *_metadata)
#endif
/* @data can be NULL */
-static int _test_cmd_get_hw_info(int fd, __u32 device_id,
- void *data, size_t data_len)
+static int _test_cmd_get_hw_info(int fd, __u32 device_id, void *data,
+ size_t data_len, uint32_t *capabilities)
{
struct iommu_test_hw_info *info = (struct iommu_test_hw_info *)data;
struct iommu_hw_info cmd = {
@@ -363,6 +566,7 @@ static int _test_cmd_get_hw_info(int fd, __u32 device_id,
.dev_id = device_id,
.data_len = data_len,
.data_uptr = (uint64_t)data,
+ .out_capabilities = 0,
};
int ret;
@@ -399,14 +603,19 @@ static int _test_cmd_get_hw_info(int fd, __u32 device_id,
assert(!info->flags);
}
+ if (capabilities)
+ *capabilities = cmd.out_capabilities;
+
return 0;
}
-#define test_cmd_get_hw_info(device_id, data, data_len) \
- ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, \
- data, data_len))
+#define test_cmd_get_hw_info(device_id, data, data_len) \
+ ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, data, \
+ data_len, NULL))
+
+#define test_err_get_hw_info(_errno, device_id, data, data_len) \
+ EXPECT_ERRNO(_errno, _test_cmd_get_hw_info(self->fd, device_id, data, \
+ data_len, NULL))
-#define test_err_get_hw_info(_errno, device_id, data, data_len) \
- EXPECT_ERRNO(_errno, \
- _test_cmd_get_hw_info(self->fd, device_id, \
- data, data_len))
+#define test_cmd_get_hw_capabilities(device_id, caps, mask) \
+ ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, NULL, 0, &caps))
diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
index 529d29a35900..a781e6311810 100644
--- a/tools/testing/selftests/kselftest.h
+++ b/tools/testing/selftests/kselftest.h
@@ -48,6 +48,7 @@
#include <stdlib.h>
#include <unistd.h>
#include <stdarg.h>
+#include <string.h>
#include <stdio.h>
#endif
@@ -77,6 +78,8 @@
#define KSFT_XPASS 3
#define KSFT_SKIP 4
+#define __printf(a, b) __attribute__((format(printf, a, b)))
+
/* counters */
struct ksft_count {
unsigned int ksft_pass;
@@ -129,7 +132,7 @@ static inline void ksft_print_header(void)
static inline void ksft_set_plan(unsigned int plan)
{
ksft_plan = plan;
- printf("1..%d\n", ksft_plan);
+ printf("1..%u\n", ksft_plan);
}
static inline void ksft_print_cnts(void)
@@ -137,13 +140,13 @@ static inline void ksft_print_cnts(void)
if (ksft_plan != ksft_test_num())
printf("# Planned tests != run tests (%u != %u)\n",
ksft_plan, ksft_test_num());
- printf("# Totals: pass:%d fail:%d xfail:%d xpass:%d skip:%d error:%d\n",
+ printf("# Totals: pass:%u fail:%u xfail:%u xpass:%u skip:%u error:%u\n",
ksft_cnt.ksft_pass, ksft_cnt.ksft_fail,
ksft_cnt.ksft_xfail, ksft_cnt.ksft_xpass,
ksft_cnt.ksft_xskip, ksft_cnt.ksft_error);
}
-static inline void ksft_print_msg(const char *msg, ...)
+static inline __printf(1, 2) void ksft_print_msg(const char *msg, ...)
{
int saved_errno = errno;
va_list args;
@@ -155,7 +158,20 @@ static inline void ksft_print_msg(const char *msg, ...)
va_end(args);
}
-static inline void ksft_test_result_pass(const char *msg, ...)
+static inline void ksft_perror(const char *msg)
+{
+#ifndef NOLIBC
+ ksft_print_msg("%s: %s (%d)\n", msg, strerror(errno), errno);
+#else
+ /*
+ * nolibc doesn't provide strerror() and it seems
+ * inappropriate to add one, just print the errno.
+ */
+ ksft_print_msg("%s: %d)\n", msg, errno);
+#endif
+}
+
+static inline __printf(1, 2) void ksft_test_result_pass(const char *msg, ...)
{
int saved_errno = errno;
va_list args;
@@ -163,13 +179,13 @@ static inline void ksft_test_result_pass(const char *msg, ...)
ksft_cnt.ksft_pass++;
va_start(args, msg);
- printf("ok %d ", ksft_test_num());
+ printf("ok %u ", ksft_test_num());
errno = saved_errno;
vprintf(msg, args);
va_end(args);
}
-static inline void ksft_test_result_fail(const char *msg, ...)
+static inline __printf(1, 2) void ksft_test_result_fail(const char *msg, ...)
{
int saved_errno = errno;
va_list args;
@@ -177,7 +193,7 @@ static inline void ksft_test_result_fail(const char *msg, ...)
ksft_cnt.ksft_fail++;
va_start(args, msg);
- printf("not ok %d ", ksft_test_num());
+ printf("not ok %u ", ksft_test_num());
errno = saved_errno;
vprintf(msg, args);
va_end(args);
@@ -195,7 +211,7 @@ static inline void ksft_test_result_fail(const char *msg, ...)
ksft_test_result_fail(fmt, ##__VA_ARGS__);\
} while (0)
-static inline void ksft_test_result_xfail(const char *msg, ...)
+static inline __printf(1, 2) void ksft_test_result_xfail(const char *msg, ...)
{
int saved_errno = errno;
va_list args;
@@ -203,13 +219,13 @@ static inline void ksft_test_result_xfail(const char *msg, ...)
ksft_cnt.ksft_xfail++;
va_start(args, msg);
- printf("ok %d # XFAIL ", ksft_test_num());
+ printf("ok %u # XFAIL ", ksft_test_num());
errno = saved_errno;
vprintf(msg, args);
va_end(args);
}
-static inline void ksft_test_result_skip(const char *msg, ...)
+static inline __printf(1, 2) void ksft_test_result_skip(const char *msg, ...)
{
int saved_errno = errno;
va_list args;
@@ -217,14 +233,14 @@ static inline void ksft_test_result_skip(const char *msg, ...)
ksft_cnt.ksft_xskip++;
va_start(args, msg);
- printf("ok %d # SKIP ", ksft_test_num());
+ printf("ok %u # SKIP ", ksft_test_num());
errno = saved_errno;
vprintf(msg, args);
va_end(args);
}
/* TODO: how does "error" differ from "fail" or "skip"? */
-static inline void ksft_test_result_error(const char *msg, ...)
+static inline __printf(1, 2) void ksft_test_result_error(const char *msg, ...)
{
int saved_errno = errno;
va_list args;
@@ -232,7 +248,7 @@ static inline void ksft_test_result_error(const char *msg, ...)
ksft_cnt.ksft_error++;
va_start(args, msg);
- printf("not ok %d # error ", ksft_test_num());
+ printf("not ok %u # error ", ksft_test_num());
errno = saved_errno;
vprintf(msg, args);
va_end(args);
@@ -271,7 +287,7 @@ static inline int ksft_exit_fail(void)
ksft_cnt.ksft_xfail + \
ksft_cnt.ksft_xskip)
-static inline int ksft_exit_fail_msg(const char *msg, ...)
+static inline __printf(1, 2) int ksft_exit_fail_msg(const char *msg, ...)
{
int saved_errno = errno;
va_list args;
@@ -298,7 +314,7 @@ static inline int ksft_exit_xpass(void)
exit(KSFT_XPASS);
}
-static inline int ksft_exit_skip(const char *msg, ...)
+static inline __printf(1, 2) int ksft_exit_skip(const char *msg, ...)
{
int saved_errno = errno;
va_list args;
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index a3bb36fb3cfc..a5963ab9215b 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -17,6 +17,15 @@ else
ARCH_DIR := $(ARCH)
endif
+ifeq ($(ARCH),arm64)
+arm64_tools_dir := $(top_srcdir)/tools/arch/arm64/tools/
+GEN_HDRS := $(top_srcdir)/tools/arch/arm64/include/generated/
+CFLAGS += -I$(GEN_HDRS)
+
+$(GEN_HDRS): $(wildcard $(arm64_tools_dir)/*)
+ $(MAKE) -C $(arm64_tools_dir)
+endif
+
LIBKVM += lib/assert.c
LIBKVM += lib/elf.c
LIBKVM += lib/guest_modes.c
@@ -66,6 +75,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/dirty_log_page_splitting_test
TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features
TEST_GEN_PROGS_x86_64 += x86_64/exit_on_emulation_failure_test
TEST_GEN_PROGS_x86_64 += x86_64/fix_hypercall_test
+TEST_GEN_PROGS_x86_64 += x86_64/hwcr_msr_test
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_evmcs
@@ -145,10 +155,12 @@ TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
TEST_GEN_PROGS_aarch64 += aarch64/hypercalls
TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test
TEST_GEN_PROGS_aarch64 += aarch64/psci_test
+TEST_GEN_PROGS_aarch64 += aarch64/set_id_regs
TEST_GEN_PROGS_aarch64 += aarch64/smccc_filter
TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config
TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq
+TEST_GEN_PROGS_aarch64 += aarch64/vpmu_counter_access
TEST_GEN_PROGS_aarch64 += access_tracking_perf_test
TEST_GEN_PROGS_aarch64 += demand_paging_test
TEST_GEN_PROGS_aarch64 += dirty_log_test
@@ -256,13 +268,18 @@ $(TEST_GEN_OBJ): $(OUTPUT)/%.o: %.c
$(SPLIT_TESTS_TARGETS): %: %.o $(SPLIT_TESTS_OBJS)
$(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $^ $(LDLIBS) -o $@
-EXTRA_CLEAN += $(LIBKVM_OBJS) $(TEST_DEP_FILES) $(TEST_GEN_OBJ) $(SPLIT_TESTS_OBJS) cscope.*
+EXTRA_CLEAN += $(GEN_HDRS) \
+ $(LIBKVM_OBJS) \
+ $(SPLIT_TESTS_OBJS) \
+ $(TEST_DEP_FILES) \
+ $(TEST_GEN_OBJ) \
+ cscope.*
x := $(shell mkdir -p $(sort $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ))))
-$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c
+$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c $(GEN_HDRS)
$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
-$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S
+$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S $(GEN_HDRS)
$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
# Compile the string overrides as freestanding to prevent the compiler from
@@ -272,8 +289,10 @@ $(LIBKVM_STRING_OBJ): $(OUTPUT)/%.o: %.c
$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c -ffreestanding $< -o $@
x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS))))
+$(SPLIT_TESTS_OBJS): $(GEN_HDRS)
$(TEST_GEN_PROGS): $(LIBKVM_OBJS)
$(TEST_GEN_PROGS_EXTENDED): $(LIBKVM_OBJS)
+$(TEST_GEN_OBJ): $(GEN_HDRS)
cscope: include_paths = $(LINUX_TOOL_INCLUDE) $(LINUX_HDR_PATH) include lib ..
cscope:
diff --git a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c b/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c
index b90580840b22..8e5bd07a3727 100644
--- a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c
+++ b/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c
@@ -146,8 +146,8 @@ static bool vcpu_aarch64_only(struct kvm_vcpu *vcpu)
vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &val);
- el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0), val);
- return el0 == ID_AA64PFR0_ELx_64BIT_ONLY;
+ el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val);
+ return el0 == ID_AA64PFR0_EL1_ELx_64BIT_ONLY;
}
int main(void)
diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
index f5b6cb3a0019..866002917441 100644
--- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
+++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
@@ -116,12 +116,12 @@ static void reset_debug_state(void)
/* Reset all bcr/bvr/wcr/wvr registers */
dfr0 = read_sysreg(id_aa64dfr0_el1);
- brps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_BRPS), dfr0);
+ brps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), dfr0);
for (i = 0; i <= brps; i++) {
write_dbgbcr(i, 0);
write_dbgbvr(i, 0);
}
- wrps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_WRPS), dfr0);
+ wrps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), dfr0);
for (i = 0; i <= wrps; i++) {
write_dbgwcr(i, 0);
write_dbgwvr(i, 0);
@@ -418,7 +418,7 @@ static void guest_code_ss(int test_cnt)
static int debug_version(uint64_t id_aa64dfr0)
{
- return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER), id_aa64dfr0);
+ return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), id_aa64dfr0);
}
static void test_guest_debug_exceptions(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn)
@@ -539,14 +539,14 @@ void test_guest_debug_exceptions_all(uint64_t aa64dfr0)
int b, w, c;
/* Number of breakpoints */
- brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_BRPS), aa64dfr0) + 1;
+ brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), aa64dfr0) + 1;
__TEST_REQUIRE(brp_num >= 2, "At least two breakpoints are required");
/* Number of watchpoints */
- wrp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_WRPS), aa64dfr0) + 1;
+ wrp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), aa64dfr0) + 1;
/* Number of context aware breakpoints */
- ctx_brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_CTX_CMPS), aa64dfr0) + 1;
+ ctx_brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_CTX_CMPs), aa64dfr0) + 1;
pr_debug("%s brp_num:%d, wrp_num:%d, ctx_brp_num:%d\n", __func__,
brp_num, wrp_num, ctx_brp_num);
diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
index 47bb914ab2fa..eb4217b7c768 100644
--- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c
+++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
@@ -96,14 +96,14 @@ static bool guest_check_lse(void)
uint64_t isar0 = read_sysreg(id_aa64isar0_el1);
uint64_t atomic;
- atomic = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR0_ATOMICS), isar0);
+ atomic = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_ATOMIC), isar0);
return atomic >= 2;
}
static bool guest_check_dc_zva(void)
{
uint64_t dczid = read_sysreg(dczid_el0);
- uint64_t dzp = FIELD_GET(ARM64_FEATURE_MASK(DCZID_DZP), dczid);
+ uint64_t dzp = FIELD_GET(ARM64_FEATURE_MASK(DCZID_EL0_DZP), dczid);
return dzp == 0;
}
@@ -135,8 +135,8 @@ static void guest_at(void)
uint64_t par;
asm volatile("at s1e1r, %0" :: "r" (guest_test_memory));
- par = read_sysreg(par_el1);
isb();
+ par = read_sysreg(par_el1);
/* Bit 1 indicates whether the AT was successful */
GUEST_ASSERT_EQ(par & 1, 0);
@@ -196,7 +196,7 @@ static bool guest_set_ha(void)
uint64_t hadbs, tcr;
/* Skip if HA is not supported. */
- hadbs = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_HADBS), mmfr1);
+ hadbs = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS), mmfr1);
if (hadbs == 0)
return false;
@@ -842,6 +842,7 @@ static void help(char *name)
.name = SCAT2(ro_memslot_no_syndrome, _access), \
.data_memslot_flags = KVM_MEM_READONLY, \
.pt_memslot_flags = KVM_MEM_READONLY, \
+ .guest_prepare = { _PREPARE(_access) }, \
.guest_test = _access, \
.fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \
.expected_events = { .fail_vcpu_runs = 1 }, \
@@ -865,6 +866,7 @@ static void help(char *name)
.name = SCAT2(ro_memslot_no_syn_and_dlog, _access), \
.data_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \
.pt_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \
+ .guest_prepare = { _PREPARE(_access) }, \
.guest_test = _access, \
.guest_test_check = { _test_check }, \
.fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \
@@ -894,6 +896,7 @@ static void help(char *name)
.data_memslot_flags = KVM_MEM_READONLY, \
.pt_memslot_flags = KVM_MEM_READONLY, \
.mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \
+ .guest_prepare = { _PREPARE(_access) }, \
.guest_test = _access, \
.uffd_data_handler = _uffd_data_handler, \
.uffd_pt_handler = uffd_pt_handler, \
diff --git a/tools/testing/selftests/kvm/aarch64/set_id_regs.c b/tools/testing/selftests/kvm/aarch64/set_id_regs.c
new file mode 100644
index 000000000000..bac05210b539
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/set_id_regs.c
@@ -0,0 +1,481 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * set_id_regs - Test for setting ID register from usersapce.
+ *
+ * Copyright (c) 2023 Google LLC.
+ *
+ *
+ * Test that KVM supports setting ID registers from userspace and handles the
+ * feature set correctly.
+ */
+
+#include <stdint.h>
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+#include <linux/bitfield.h>
+
+enum ftr_type {
+ FTR_EXACT, /* Use a predefined safe value */
+ FTR_LOWER_SAFE, /* Smaller value is safe */
+ FTR_HIGHER_SAFE, /* Bigger value is safe */
+ FTR_HIGHER_OR_ZERO_SAFE, /* Bigger value is safe, but 0 is biggest */
+ FTR_END, /* Mark the last ftr bits */
+};
+
+#define FTR_SIGNED true /* Value should be treated as signed */
+#define FTR_UNSIGNED false /* Value should be treated as unsigned */
+
+struct reg_ftr_bits {
+ char *name;
+ bool sign;
+ enum ftr_type type;
+ uint8_t shift;
+ uint64_t mask;
+ int64_t safe_val;
+};
+
+struct test_feature_reg {
+ uint32_t reg;
+ const struct reg_ftr_bits *ftr_bits;
+};
+
+#define __REG_FTR_BITS(NAME, SIGNED, TYPE, SHIFT, MASK, SAFE_VAL) \
+ { \
+ .name = #NAME, \
+ .sign = SIGNED, \
+ .type = TYPE, \
+ .shift = SHIFT, \
+ .mask = MASK, \
+ .safe_val = SAFE_VAL, \
+ }
+
+#define REG_FTR_BITS(type, reg, field, safe_val) \
+ __REG_FTR_BITS(reg##_##field, FTR_UNSIGNED, type, reg##_##field##_SHIFT, \
+ reg##_##field##_MASK, safe_val)
+
+#define S_REG_FTR_BITS(type, reg, field, safe_val) \
+ __REG_FTR_BITS(reg##_##field, FTR_SIGNED, type, reg##_##field##_SHIFT, \
+ reg##_##field##_MASK, safe_val)
+
+#define REG_FTR_END \
+ { \
+ .type = FTR_END, \
+ }
+
+static const struct reg_ftr_bits ftr_id_aa64dfr0_el1[] = {
+ S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, PMUVer, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, DebugVer, 0),
+ REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_dfr0_el1[] = {
+ S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_DFR0_EL1, PerfMon, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_DFR0_EL1, CopDbg, 0),
+ REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64isar0_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, RNDR, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TLB, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TS, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, FHM, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, DP, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SM4, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SM3, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA3, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, RDM, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TME, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, ATOMIC, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, CRC32, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA2, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA1, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, AES, 0),
+ REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64isar1_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, LS64, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, XS, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, I8MM, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, DGH, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, BF16, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, SPECRES, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, SB, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, FRINTTS, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, LRCPC, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, FCMA, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, JSCVT, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, DPB, 0),
+ REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64isar2_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, BC, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, RPRES, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, WFxT, 0),
+ REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV3, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV2, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, DIT, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, SEL2, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL0, 0),
+ REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64mmfr0_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ECV, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, EXS, 0),
+ S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN4, 0),
+ S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN64, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN16, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, BIGENDEL0, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, SNSMEM, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, BIGEND, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ASIDBITS, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, PARANGE, 0),
+ REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64mmfr1_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, TIDCP1, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, AFP, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, ETS, 0),
+ REG_FTR_BITS(FTR_HIGHER_SAFE, ID_AA64MMFR1_EL1, SpecSEI, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, PAN, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, LO, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, HPDS, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, HAFDBS, 0),
+ REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64mmfr2_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, E0PD, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, BBM, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, TTL, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, AT, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, ST, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, VARange, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, IESB, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, LSM, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, UAO, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, CnP, 0),
+ REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64zfr0_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F64MM, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F32MM, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, I8MM, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SM4, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SHA3, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, BF16, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, BitPerm, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, AES, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SVEver, 0),
+ REG_FTR_END,
+};
+
+#define TEST_REG(id, table) \
+ { \
+ .reg = id, \
+ .ftr_bits = &((table)[0]), \
+ }
+
+static struct test_feature_reg test_regs[] = {
+ TEST_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0_el1),
+ TEST_REG(SYS_ID_DFR0_EL1, ftr_id_dfr0_el1),
+ TEST_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0_el1),
+ TEST_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1_el1),
+ TEST_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2_el1),
+ TEST_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0_el1),
+ TEST_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0_el1),
+ TEST_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1_el1),
+ TEST_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2_el1),
+ TEST_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0_el1),
+};
+
+#define GUEST_REG_SYNC(id) GUEST_SYNC_ARGS(0, id, read_sysreg_s(id), 0, 0);
+
+static void guest_code(void)
+{
+ GUEST_REG_SYNC(SYS_ID_AA64DFR0_EL1);
+ GUEST_REG_SYNC(SYS_ID_DFR0_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64ISAR0_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64ISAR1_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64ISAR2_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64PFR0_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64MMFR0_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64MMFR1_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64MMFR2_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64ZFR0_EL1);
+
+ GUEST_DONE();
+}
+
+/* Return a safe value to a given ftr_bits an ftr value */
+uint64_t get_safe_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
+{
+ uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0);
+
+ if (ftr_bits->type == FTR_UNSIGNED) {
+ switch (ftr_bits->type) {
+ case FTR_EXACT:
+ ftr = ftr_bits->safe_val;
+ break;
+ case FTR_LOWER_SAFE:
+ if (ftr > 0)
+ ftr--;
+ break;
+ case FTR_HIGHER_SAFE:
+ if (ftr < ftr_max)
+ ftr++;
+ break;
+ case FTR_HIGHER_OR_ZERO_SAFE:
+ if (ftr == ftr_max)
+ ftr = 0;
+ else if (ftr != 0)
+ ftr++;
+ break;
+ default:
+ break;
+ }
+ } else if (ftr != ftr_max) {
+ switch (ftr_bits->type) {
+ case FTR_EXACT:
+ ftr = ftr_bits->safe_val;
+ break;
+ case FTR_LOWER_SAFE:
+ if (ftr > 0)
+ ftr--;
+ break;
+ case FTR_HIGHER_SAFE:
+ if (ftr < ftr_max - 1)
+ ftr++;
+ break;
+ case FTR_HIGHER_OR_ZERO_SAFE:
+ if (ftr != 0 && ftr != ftr_max - 1)
+ ftr++;
+ break;
+ default:
+ break;
+ }
+ }
+
+ return ftr;
+}
+
+/* Return an invalid value to a given ftr_bits an ftr value */
+uint64_t get_invalid_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
+{
+ uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0);
+
+ if (ftr_bits->type == FTR_UNSIGNED) {
+ switch (ftr_bits->type) {
+ case FTR_EXACT:
+ ftr = max((uint64_t)ftr_bits->safe_val + 1, ftr + 1);
+ break;
+ case FTR_LOWER_SAFE:
+ ftr++;
+ break;
+ case FTR_HIGHER_SAFE:
+ ftr--;
+ break;
+ case FTR_HIGHER_OR_ZERO_SAFE:
+ if (ftr == 0)
+ ftr = ftr_max;
+ else
+ ftr--;
+ break;
+ default:
+ break;
+ }
+ } else if (ftr != ftr_max) {
+ switch (ftr_bits->type) {
+ case FTR_EXACT:
+ ftr = max((uint64_t)ftr_bits->safe_val + 1, ftr + 1);
+ break;
+ case FTR_LOWER_SAFE:
+ ftr++;
+ break;
+ case FTR_HIGHER_SAFE:
+ ftr--;
+ break;
+ case FTR_HIGHER_OR_ZERO_SAFE:
+ if (ftr == 0)
+ ftr = ftr_max - 1;
+ else
+ ftr--;
+ break;
+ default:
+ break;
+ }
+ } else {
+ ftr = 0;
+ }
+
+ return ftr;
+}
+
+static void test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg,
+ const struct reg_ftr_bits *ftr_bits)
+{
+ uint8_t shift = ftr_bits->shift;
+ uint64_t mask = ftr_bits->mask;
+ uint64_t val, new_val, ftr;
+
+ vcpu_get_reg(vcpu, reg, &val);
+ ftr = (val & mask) >> shift;
+
+ ftr = get_safe_value(ftr_bits, ftr);
+
+ ftr <<= shift;
+ val &= ~mask;
+ val |= ftr;
+
+ vcpu_set_reg(vcpu, reg, val);
+ vcpu_get_reg(vcpu, reg, &new_val);
+ TEST_ASSERT_EQ(new_val, val);
+}
+
+static void test_reg_set_fail(struct kvm_vcpu *vcpu, uint64_t reg,
+ const struct reg_ftr_bits *ftr_bits)
+{
+ uint8_t shift = ftr_bits->shift;
+ uint64_t mask = ftr_bits->mask;
+ uint64_t val, old_val, ftr;
+ int r;
+
+ vcpu_get_reg(vcpu, reg, &val);
+ ftr = (val & mask) >> shift;
+
+ ftr = get_invalid_value(ftr_bits, ftr);
+
+ old_val = val;
+ ftr <<= shift;
+ val &= ~mask;
+ val |= ftr;
+
+ r = __vcpu_set_reg(vcpu, reg, val);
+ TEST_ASSERT(r < 0 && errno == EINVAL,
+ "Unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno);
+
+ vcpu_get_reg(vcpu, reg, &val);
+ TEST_ASSERT_EQ(val, old_val);
+}
+
+static void test_user_set_reg(struct kvm_vcpu *vcpu, bool aarch64_only)
+{
+ uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE];
+ struct reg_mask_range range = {
+ .addr = (__u64)masks,
+ };
+ int ret;
+
+ /* KVM should return error when reserved field is not zero */
+ range.reserved[0] = 1;
+ ret = __vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range);
+ TEST_ASSERT(ret, "KVM doesn't check invalid parameters.");
+
+ /* Get writable masks for feature ID registers */
+ memset(range.reserved, 0, sizeof(range.reserved));
+ vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range);
+
+ for (int i = 0; i < ARRAY_SIZE(test_regs); i++) {
+ const struct reg_ftr_bits *ftr_bits = test_regs[i].ftr_bits;
+ uint32_t reg_id = test_regs[i].reg;
+ uint64_t reg = KVM_ARM64_SYS_REG(reg_id);
+ int idx;
+
+ /* Get the index to masks array for the idreg */
+ idx = KVM_ARM_FEATURE_ID_RANGE_IDX(sys_reg_Op0(reg_id), sys_reg_Op1(reg_id),
+ sys_reg_CRn(reg_id), sys_reg_CRm(reg_id),
+ sys_reg_Op2(reg_id));
+
+ for (int j = 0; ftr_bits[j].type != FTR_END; j++) {
+ /* Skip aarch32 reg on aarch64 only system, since they are RAZ/WI. */
+ if (aarch64_only && sys_reg_CRm(reg_id) < 4) {
+ ksft_test_result_skip("%s on AARCH64 only system\n",
+ ftr_bits[j].name);
+ continue;
+ }
+
+ /* Make sure the feature field is writable */
+ TEST_ASSERT_EQ(masks[idx] & ftr_bits[j].mask, ftr_bits[j].mask);
+
+ test_reg_set_fail(vcpu, reg, &ftr_bits[j]);
+ test_reg_set_success(vcpu, reg, &ftr_bits[j]);
+
+ ksft_test_result_pass("%s\n", ftr_bits[j].name);
+ }
+ }
+}
+
+static void test_guest_reg_read(struct kvm_vcpu *vcpu)
+{
+ bool done = false;
+ struct ucall uc;
+ uint64_t val;
+
+ while (!done) {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_SYNC:
+ /* Make sure the written values are seen by guest */
+ vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(uc.args[2]), &val);
+ TEST_ASSERT_EQ(val, uc.args[3]);
+ break;
+ case UCALL_DONE:
+ done = true;
+ break;
+ default:
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+ }
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ bool aarch64_only;
+ uint64_t val, el0;
+ int ftr_cnt;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ /* Check for AARCH64 only system */
+ vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &val);
+ el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val);
+ aarch64_only = (el0 == ID_AA64PFR0_EL1_ELx_64BIT_ONLY);
+
+ ksft_print_header();
+
+ ftr_cnt = ARRAY_SIZE(ftr_id_aa64dfr0_el1) + ARRAY_SIZE(ftr_id_dfr0_el1) +
+ ARRAY_SIZE(ftr_id_aa64isar0_el1) + ARRAY_SIZE(ftr_id_aa64isar1_el1) +
+ ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) +
+ ARRAY_SIZE(ftr_id_aa64mmfr0_el1) + ARRAY_SIZE(ftr_id_aa64mmfr1_el1) +
+ ARRAY_SIZE(ftr_id_aa64mmfr2_el1) + ARRAY_SIZE(ftr_id_aa64zfr0_el1) -
+ ARRAY_SIZE(test_regs);
+
+ ksft_set_plan(ftr_cnt);
+
+ test_user_set_reg(vcpu, aarch64_only);
+ test_guest_reg_read(vcpu);
+
+ kvm_vm_free(vm);
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c
new file mode 100644
index 000000000000..5ea78986e665
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c
@@ -0,0 +1,670 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vpmu_counter_access - Test vPMU event counter access
+ *
+ * Copyright (c) 2023 Google LLC.
+ *
+ * This test checks if the guest can see the same number of the PMU event
+ * counters (PMCR_EL0.N) that userspace sets, if the guest can access
+ * those counters, and if the guest is prevented from accessing any
+ * other counters.
+ * It also checks if the userspace accesses to the PMU regsisters honor the
+ * PMCR.N value that's set for the guest.
+ * This test runs only when KVM_CAP_ARM_PMU_V3 is supported on the host.
+ */
+#include <kvm_util.h>
+#include <processor.h>
+#include <test_util.h>
+#include <vgic.h>
+#include <perf/arm_pmuv3.h>
+#include <linux/bitfield.h>
+
+/* The max number of the PMU event counters (excluding the cycle counter) */
+#define ARMV8_PMU_MAX_GENERAL_COUNTERS (ARMV8_PMU_MAX_COUNTERS - 1)
+
+/* The cycle counter bit position that's common among the PMU registers */
+#define ARMV8_PMU_CYCLE_IDX 31
+
+struct vpmu_vm {
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ int gic_fd;
+};
+
+static struct vpmu_vm vpmu_vm;
+
+struct pmreg_sets {
+ uint64_t set_reg_id;
+ uint64_t clr_reg_id;
+};
+
+#define PMREG_SET(set, clr) {.set_reg_id = set, .clr_reg_id = clr}
+
+static uint64_t get_pmcr_n(uint64_t pmcr)
+{
+ return (pmcr >> ARMV8_PMU_PMCR_N_SHIFT) & ARMV8_PMU_PMCR_N_MASK;
+}
+
+static void set_pmcr_n(uint64_t *pmcr, uint64_t pmcr_n)
+{
+ *pmcr = *pmcr & ~(ARMV8_PMU_PMCR_N_MASK << ARMV8_PMU_PMCR_N_SHIFT);
+ *pmcr |= (pmcr_n << ARMV8_PMU_PMCR_N_SHIFT);
+}
+
+static uint64_t get_counters_mask(uint64_t n)
+{
+ uint64_t mask = BIT(ARMV8_PMU_CYCLE_IDX);
+
+ if (n)
+ mask |= GENMASK(n - 1, 0);
+ return mask;
+}
+
+/* Read PMEVTCNTR<n>_EL0 through PMXEVCNTR_EL0 */
+static inline unsigned long read_sel_evcntr(int sel)
+{
+ write_sysreg(sel, pmselr_el0);
+ isb();
+ return read_sysreg(pmxevcntr_el0);
+}
+
+/* Write PMEVTCNTR<n>_EL0 through PMXEVCNTR_EL0 */
+static inline void write_sel_evcntr(int sel, unsigned long val)
+{
+ write_sysreg(sel, pmselr_el0);
+ isb();
+ write_sysreg(val, pmxevcntr_el0);
+ isb();
+}
+
+/* Read PMEVTYPER<n>_EL0 through PMXEVTYPER_EL0 */
+static inline unsigned long read_sel_evtyper(int sel)
+{
+ write_sysreg(sel, pmselr_el0);
+ isb();
+ return read_sysreg(pmxevtyper_el0);
+}
+
+/* Write PMEVTYPER<n>_EL0 through PMXEVTYPER_EL0 */
+static inline void write_sel_evtyper(int sel, unsigned long val)
+{
+ write_sysreg(sel, pmselr_el0);
+ isb();
+ write_sysreg(val, pmxevtyper_el0);
+ isb();
+}
+
+static inline void enable_counter(int idx)
+{
+ uint64_t v = read_sysreg(pmcntenset_el0);
+
+ write_sysreg(BIT(idx) | v, pmcntenset_el0);
+ isb();
+}
+
+static inline void disable_counter(int idx)
+{
+ uint64_t v = read_sysreg(pmcntenset_el0);
+
+ write_sysreg(BIT(idx) | v, pmcntenclr_el0);
+ isb();
+}
+
+static void pmu_disable_reset(void)
+{
+ uint64_t pmcr = read_sysreg(pmcr_el0);
+
+ /* Reset all counters, disabling them */
+ pmcr &= ~ARMV8_PMU_PMCR_E;
+ write_sysreg(pmcr | ARMV8_PMU_PMCR_P, pmcr_el0);
+ isb();
+}
+
+#define RETURN_READ_PMEVCNTRN(n) \
+ return read_sysreg(pmevcntr##n##_el0)
+static unsigned long read_pmevcntrn(int n)
+{
+ PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN);
+ return 0;
+}
+
+#define WRITE_PMEVCNTRN(n) \
+ write_sysreg(val, pmevcntr##n##_el0)
+static void write_pmevcntrn(int n, unsigned long val)
+{
+ PMEVN_SWITCH(n, WRITE_PMEVCNTRN);
+ isb();
+}
+
+#define READ_PMEVTYPERN(n) \
+ return read_sysreg(pmevtyper##n##_el0)
+static unsigned long read_pmevtypern(int n)
+{
+ PMEVN_SWITCH(n, READ_PMEVTYPERN);
+ return 0;
+}
+
+#define WRITE_PMEVTYPERN(n) \
+ write_sysreg(val, pmevtyper##n##_el0)
+static void write_pmevtypern(int n, unsigned long val)
+{
+ PMEVN_SWITCH(n, WRITE_PMEVTYPERN);
+ isb();
+}
+
+/*
+ * The pmc_accessor structure has pointers to PMEV{CNTR,TYPER}<n>_EL0
+ * accessors that test cases will use. Each of the accessors will
+ * either directly reads/writes PMEV{CNTR,TYPER}<n>_EL0
+ * (i.e. {read,write}_pmev{cnt,type}rn()), or reads/writes them through
+ * PMXEV{CNTR,TYPER}_EL0 (i.e. {read,write}_sel_ev{cnt,type}r()).
+ *
+ * This is used to test that combinations of those accessors provide
+ * the consistent behavior.
+ */
+struct pmc_accessor {
+ /* A function to be used to read PMEVTCNTR<n>_EL0 */
+ unsigned long (*read_cntr)(int idx);
+ /* A function to be used to write PMEVTCNTR<n>_EL0 */
+ void (*write_cntr)(int idx, unsigned long val);
+ /* A function to be used to read PMEVTYPER<n>_EL0 */
+ unsigned long (*read_typer)(int idx);
+ /* A function to be used to write PMEVTYPER<n>_EL0 */
+ void (*write_typer)(int idx, unsigned long val);
+};
+
+struct pmc_accessor pmc_accessors[] = {
+ /* test with all direct accesses */
+ { read_pmevcntrn, write_pmevcntrn, read_pmevtypern, write_pmevtypern },
+ /* test with all indirect accesses */
+ { read_sel_evcntr, write_sel_evcntr, read_sel_evtyper, write_sel_evtyper },
+ /* read with direct accesses, and write with indirect accesses */
+ { read_pmevcntrn, write_sel_evcntr, read_pmevtypern, write_sel_evtyper },
+ /* read with indirect accesses, and write with direct accesses */
+ { read_sel_evcntr, write_pmevcntrn, read_sel_evtyper, write_pmevtypern },
+};
+
+/*
+ * Convert a pointer of pmc_accessor to an index in pmc_accessors[],
+ * assuming that the pointer is one of the entries in pmc_accessors[].
+ */
+#define PMC_ACC_TO_IDX(acc) (acc - &pmc_accessors[0])
+
+#define GUEST_ASSERT_BITMAP_REG(regname, mask, set_expected) \
+{ \
+ uint64_t _tval = read_sysreg(regname); \
+ \
+ if (set_expected) \
+ __GUEST_ASSERT((_tval & mask), \
+ "tval: 0x%lx; mask: 0x%lx; set_expected: 0x%lx", \
+ _tval, mask, set_expected); \
+ else \
+ __GUEST_ASSERT(!(_tval & mask), \
+ "tval: 0x%lx; mask: 0x%lx; set_expected: 0x%lx", \
+ _tval, mask, set_expected); \
+}
+
+/*
+ * Check if @mask bits in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers
+ * are set or cleared as specified in @set_expected.
+ */
+static void check_bitmap_pmu_regs(uint64_t mask, bool set_expected)
+{
+ GUEST_ASSERT_BITMAP_REG(pmcntenset_el0, mask, set_expected);
+ GUEST_ASSERT_BITMAP_REG(pmcntenclr_el0, mask, set_expected);
+ GUEST_ASSERT_BITMAP_REG(pmintenset_el1, mask, set_expected);
+ GUEST_ASSERT_BITMAP_REG(pmintenclr_el1, mask, set_expected);
+ GUEST_ASSERT_BITMAP_REG(pmovsset_el0, mask, set_expected);
+ GUEST_ASSERT_BITMAP_REG(pmovsclr_el0, mask, set_expected);
+}
+
+/*
+ * Check if the bit in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers corresponding
+ * to the specified counter (@pmc_idx) can be read/written as expected.
+ * When @set_op is true, it tries to set the bit for the counter in
+ * those registers by writing the SET registers (the bit won't be set
+ * if the counter is not implemented though).
+ * Otherwise, it tries to clear the bits in the registers by writing
+ * the CLR registers.
+ * Then, it checks if the values indicated in the registers are as expected.
+ */
+static void test_bitmap_pmu_regs(int pmc_idx, bool set_op)
+{
+ uint64_t pmcr_n, test_bit = BIT(pmc_idx);
+ bool set_expected = false;
+
+ if (set_op) {
+ write_sysreg(test_bit, pmcntenset_el0);
+ write_sysreg(test_bit, pmintenset_el1);
+ write_sysreg(test_bit, pmovsset_el0);
+
+ /* The bit will be set only if the counter is implemented */
+ pmcr_n = get_pmcr_n(read_sysreg(pmcr_el0));
+ set_expected = (pmc_idx < pmcr_n) ? true : false;
+ } else {
+ write_sysreg(test_bit, pmcntenclr_el0);
+ write_sysreg(test_bit, pmintenclr_el1);
+ write_sysreg(test_bit, pmovsclr_el0);
+ }
+ check_bitmap_pmu_regs(test_bit, set_expected);
+}
+
+/*
+ * Tests for reading/writing registers for the (implemented) event counter
+ * specified by @pmc_idx.
+ */
+static void test_access_pmc_regs(struct pmc_accessor *acc, int pmc_idx)
+{
+ uint64_t write_data, read_data;
+
+ /* Disable all PMCs and reset all PMCs to zero. */
+ pmu_disable_reset();
+
+ /*
+ * Tests for reading/writing {PMCNTEN,PMINTEN,PMOVS}{SET,CLR}_EL1.
+ */
+
+ /* Make sure that the bit in those registers are set to 0 */
+ test_bitmap_pmu_regs(pmc_idx, false);
+ /* Test if setting the bit in those registers works */
+ test_bitmap_pmu_regs(pmc_idx, true);
+ /* Test if clearing the bit in those registers works */
+ test_bitmap_pmu_regs(pmc_idx, false);
+
+ /*
+ * Tests for reading/writing the event type register.
+ */
+
+ /*
+ * Set the event type register to an arbitrary value just for testing
+ * of reading/writing the register.
+ * Arm ARM says that for the event from 0x0000 to 0x003F,
+ * the value indicated in the PMEVTYPER<n>_EL0.evtCount field is
+ * the value written to the field even when the specified event
+ * is not supported.
+ */
+ write_data = (ARMV8_PMU_EXCLUDE_EL1 | ARMV8_PMUV3_PERFCTR_INST_RETIRED);
+ acc->write_typer(pmc_idx, write_data);
+ read_data = acc->read_typer(pmc_idx);
+ __GUEST_ASSERT(read_data == write_data,
+ "pmc_idx: 0x%lx; acc_idx: 0x%lx; read_data: 0x%lx; write_data: 0x%lx",
+ pmc_idx, PMC_ACC_TO_IDX(acc), read_data, write_data);
+
+ /*
+ * Tests for reading/writing the event count register.
+ */
+
+ read_data = acc->read_cntr(pmc_idx);
+
+ /* The count value must be 0, as it is disabled and reset */
+ __GUEST_ASSERT(read_data == 0,
+ "pmc_idx: 0x%lx; acc_idx: 0x%lx; read_data: 0x%lx",
+ pmc_idx, PMC_ACC_TO_IDX(acc), read_data);
+
+ write_data = read_data + pmc_idx + 0x12345;
+ acc->write_cntr(pmc_idx, write_data);
+ read_data = acc->read_cntr(pmc_idx);
+ __GUEST_ASSERT(read_data == write_data,
+ "pmc_idx: 0x%lx; acc_idx: 0x%lx; read_data: 0x%lx; write_data: 0x%lx",
+ pmc_idx, PMC_ACC_TO_IDX(acc), read_data, write_data);
+}
+
+#define INVALID_EC (-1ul)
+uint64_t expected_ec = INVALID_EC;
+
+static void guest_sync_handler(struct ex_regs *regs)
+{
+ uint64_t esr, ec;
+
+ esr = read_sysreg(esr_el1);
+ ec = (esr >> ESR_EC_SHIFT) & ESR_EC_MASK;
+
+ __GUEST_ASSERT(expected_ec == ec,
+ "PC: 0x%lx; ESR: 0x%lx; EC: 0x%lx; EC expected: 0x%lx",
+ regs->pc, esr, ec, expected_ec);
+
+ /* skip the trapping instruction */
+ regs->pc += 4;
+
+ /* Use INVALID_EC to indicate an exception occurred */
+ expected_ec = INVALID_EC;
+}
+
+/*
+ * Run the given operation that should trigger an exception with the
+ * given exception class. The exception handler (guest_sync_handler)
+ * will reset op_end_addr to 0, expected_ec to INVALID_EC, and skip
+ * the instruction that trapped.
+ */
+#define TEST_EXCEPTION(ec, ops) \
+({ \
+ GUEST_ASSERT(ec != INVALID_EC); \
+ WRITE_ONCE(expected_ec, ec); \
+ dsb(ish); \
+ ops; \
+ GUEST_ASSERT(expected_ec == INVALID_EC); \
+})
+
+/*
+ * Tests for reading/writing registers for the unimplemented event counter
+ * specified by @pmc_idx (>= PMCR_EL0.N).
+ */
+static void test_access_invalid_pmc_regs(struct pmc_accessor *acc, int pmc_idx)
+{
+ /*
+ * Reading/writing the event count/type registers should cause
+ * an UNDEFINED exception.
+ */
+ TEST_EXCEPTION(ESR_EC_UNKNOWN, acc->read_cntr(pmc_idx));
+ TEST_EXCEPTION(ESR_EC_UNKNOWN, acc->write_cntr(pmc_idx, 0));
+ TEST_EXCEPTION(ESR_EC_UNKNOWN, acc->read_typer(pmc_idx));
+ TEST_EXCEPTION(ESR_EC_UNKNOWN, acc->write_typer(pmc_idx, 0));
+ /*
+ * The bit corresponding to the (unimplemented) counter in
+ * {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers should be RAZ.
+ */
+ test_bitmap_pmu_regs(pmc_idx, 1);
+ test_bitmap_pmu_regs(pmc_idx, 0);
+}
+
+/*
+ * The guest is configured with PMUv3 with @expected_pmcr_n number of
+ * event counters.
+ * Check if @expected_pmcr_n is consistent with PMCR_EL0.N, and
+ * if reading/writing PMU registers for implemented or unimplemented
+ * counters works as expected.
+ */
+static void guest_code(uint64_t expected_pmcr_n)
+{
+ uint64_t pmcr, pmcr_n, unimp_mask;
+ int i, pmc;
+
+ __GUEST_ASSERT(expected_pmcr_n <= ARMV8_PMU_MAX_GENERAL_COUNTERS,
+ "Expected PMCR.N: 0x%lx; ARMv8 general counters: 0x%lx",
+ expected_pmcr_n, ARMV8_PMU_MAX_GENERAL_COUNTERS);
+
+ pmcr = read_sysreg(pmcr_el0);
+ pmcr_n = get_pmcr_n(pmcr);
+
+ /* Make sure that PMCR_EL0.N indicates the value userspace set */
+ __GUEST_ASSERT(pmcr_n == expected_pmcr_n,
+ "Expected PMCR.N: 0x%lx, PMCR.N: 0x%lx",
+ expected_pmcr_n, pmcr_n);
+
+ /*
+ * Make sure that (RAZ) bits corresponding to unimplemented event
+ * counters in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers are reset
+ * to zero.
+ * (NOTE: bits for implemented event counters are reset to UNKNOWN)
+ */
+ unimp_mask = GENMASK_ULL(ARMV8_PMU_MAX_GENERAL_COUNTERS - 1, pmcr_n);
+ check_bitmap_pmu_regs(unimp_mask, false);
+
+ /*
+ * Tests for reading/writing PMU registers for implemented counters.
+ * Use each combination of PMEV{CNTR,TYPER}<n>_EL0 accessor functions.
+ */
+ for (i = 0; i < ARRAY_SIZE(pmc_accessors); i++) {
+ for (pmc = 0; pmc < pmcr_n; pmc++)
+ test_access_pmc_regs(&pmc_accessors[i], pmc);
+ }
+
+ /*
+ * Tests for reading/writing PMU registers for unimplemented counters.
+ * Use each combination of PMEV{CNTR,TYPER}<n>_EL0 accessor functions.
+ */
+ for (i = 0; i < ARRAY_SIZE(pmc_accessors); i++) {
+ for (pmc = pmcr_n; pmc < ARMV8_PMU_MAX_GENERAL_COUNTERS; pmc++)
+ test_access_invalid_pmc_regs(&pmc_accessors[i], pmc);
+ }
+
+ GUEST_DONE();
+}
+
+#define GICD_BASE_GPA 0x8000000ULL
+#define GICR_BASE_GPA 0x80A0000ULL
+
+/* Create a VM that has one vCPU with PMUv3 configured. */
+static void create_vpmu_vm(void *guest_code)
+{
+ struct kvm_vcpu_init init;
+ uint8_t pmuver, ec;
+ uint64_t dfr0, irq = 23;
+ struct kvm_device_attr irq_attr = {
+ .group = KVM_ARM_VCPU_PMU_V3_CTRL,
+ .attr = KVM_ARM_VCPU_PMU_V3_IRQ,
+ .addr = (uint64_t)&irq,
+ };
+ struct kvm_device_attr init_attr = {
+ .group = KVM_ARM_VCPU_PMU_V3_CTRL,
+ .attr = KVM_ARM_VCPU_PMU_V3_INIT,
+ };
+
+ /* The test creates the vpmu_vm multiple times. Ensure a clean state */
+ memset(&vpmu_vm, 0, sizeof(vpmu_vm));
+
+ vpmu_vm.vm = vm_create(1);
+ vm_init_descriptor_tables(vpmu_vm.vm);
+ for (ec = 0; ec < ESR_EC_NUM; ec++) {
+ vm_install_sync_handler(vpmu_vm.vm, VECTOR_SYNC_CURRENT, ec,
+ guest_sync_handler);
+ }
+
+ /* Create vCPU with PMUv3 */
+ vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init);
+ init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3);
+ vpmu_vm.vcpu = aarch64_vcpu_add(vpmu_vm.vm, 0, &init, guest_code);
+ vcpu_init_descriptor_tables(vpmu_vm.vcpu);
+ vpmu_vm.gic_fd = vgic_v3_setup(vpmu_vm.vm, 1, 64,
+ GICD_BASE_GPA, GICR_BASE_GPA);
+ __TEST_REQUIRE(vpmu_vm.gic_fd >= 0,
+ "Failed to create vgic-v3, skipping");
+
+ /* Make sure that PMUv3 support is indicated in the ID register */
+ vcpu_get_reg(vpmu_vm.vcpu,
+ KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1), &dfr0);
+ pmuver = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), dfr0);
+ TEST_ASSERT(pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF &&
+ pmuver >= ID_AA64DFR0_EL1_PMUVer_IMP,
+ "Unexpected PMUVER (0x%x) on the vCPU with PMUv3", pmuver);
+
+ /* Initialize vPMU */
+ vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &irq_attr);
+ vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &init_attr);
+}
+
+static void destroy_vpmu_vm(void)
+{
+ close(vpmu_vm.gic_fd);
+ kvm_vm_free(vpmu_vm.vm);
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu, uint64_t pmcr_n)
+{
+ struct ucall uc;
+
+ vcpu_args_set(vcpu, 1, pmcr_n);
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ break;
+ }
+}
+
+static void test_create_vpmu_vm_with_pmcr_n(uint64_t pmcr_n, bool expect_fail)
+{
+ struct kvm_vcpu *vcpu;
+ uint64_t pmcr, pmcr_orig;
+
+ create_vpmu_vm(guest_code);
+ vcpu = vpmu_vm.vcpu;
+
+ vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), &pmcr_orig);
+ pmcr = pmcr_orig;
+
+ /*
+ * Setting a larger value of PMCR.N should not modify the field, and
+ * return a success.
+ */
+ set_pmcr_n(&pmcr, pmcr_n);
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), pmcr);
+ vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), &pmcr);
+
+ if (expect_fail)
+ TEST_ASSERT(pmcr_orig == pmcr,
+ "PMCR.N modified by KVM to a larger value (PMCR: 0x%lx) for pmcr_n: 0x%lx\n",
+ pmcr, pmcr_n);
+ else
+ TEST_ASSERT(pmcr_n == get_pmcr_n(pmcr),
+ "Failed to update PMCR.N to %lu (received: %lu)\n",
+ pmcr_n, get_pmcr_n(pmcr));
+}
+
+/*
+ * Create a guest with one vCPU, set the PMCR_EL0.N for the vCPU to @pmcr_n,
+ * and run the test.
+ */
+static void run_access_test(uint64_t pmcr_n)
+{
+ uint64_t sp;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vcpu_init init;
+
+ pr_debug("Test with pmcr_n %lu\n", pmcr_n);
+
+ test_create_vpmu_vm_with_pmcr_n(pmcr_n, false);
+ vcpu = vpmu_vm.vcpu;
+
+ /* Save the initial sp to restore them later to run the guest again */
+ vcpu_get_reg(vcpu, ARM64_CORE_REG(sp_el1), &sp);
+
+ run_vcpu(vcpu, pmcr_n);
+
+ /*
+ * Reset and re-initialize the vCPU, and run the guest code again to
+ * check if PMCR_EL0.N is preserved.
+ */
+ vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init);
+ init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3);
+ aarch64_vcpu_setup(vcpu, &init);
+ vcpu_init_descriptor_tables(vcpu);
+ vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), sp);
+ vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
+
+ run_vcpu(vcpu, pmcr_n);
+
+ destroy_vpmu_vm();
+}
+
+static struct pmreg_sets validity_check_reg_sets[] = {
+ PMREG_SET(SYS_PMCNTENSET_EL0, SYS_PMCNTENCLR_EL0),
+ PMREG_SET(SYS_PMINTENSET_EL1, SYS_PMINTENCLR_EL1),
+ PMREG_SET(SYS_PMOVSSET_EL0, SYS_PMOVSCLR_EL0),
+};
+
+/*
+ * Create a VM, and check if KVM handles the userspace accesses of
+ * the PMU register sets in @validity_check_reg_sets[] correctly.
+ */
+static void run_pmregs_validity_test(uint64_t pmcr_n)
+{
+ int i;
+ struct kvm_vcpu *vcpu;
+ uint64_t set_reg_id, clr_reg_id, reg_val;
+ uint64_t valid_counters_mask, max_counters_mask;
+
+ test_create_vpmu_vm_with_pmcr_n(pmcr_n, false);
+ vcpu = vpmu_vm.vcpu;
+
+ valid_counters_mask = get_counters_mask(pmcr_n);
+ max_counters_mask = get_counters_mask(ARMV8_PMU_MAX_COUNTERS);
+
+ for (i = 0; i < ARRAY_SIZE(validity_check_reg_sets); i++) {
+ set_reg_id = validity_check_reg_sets[i].set_reg_id;
+ clr_reg_id = validity_check_reg_sets[i].clr_reg_id;
+
+ /*
+ * Test if the 'set' and 'clr' variants of the registers
+ * are initialized based on the number of valid counters.
+ */
+ vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), &reg_val);
+ TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
+ "Initial read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx\n",
+ KVM_ARM64_SYS_REG(set_reg_id), reg_val);
+
+ vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id), &reg_val);
+ TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
+ "Initial read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx\n",
+ KVM_ARM64_SYS_REG(clr_reg_id), reg_val);
+
+ /*
+ * Using the 'set' variant, force-set the register to the
+ * max number of possible counters and test if KVM discards
+ * the bits for unimplemented counters as it should.
+ */
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), max_counters_mask);
+
+ vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), &reg_val);
+ TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
+ "Read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx\n",
+ KVM_ARM64_SYS_REG(set_reg_id), reg_val);
+
+ vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id), &reg_val);
+ TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
+ "Read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx\n",
+ KVM_ARM64_SYS_REG(clr_reg_id), reg_val);
+ }
+
+ destroy_vpmu_vm();
+}
+
+/*
+ * Create a guest with one vCPU, and attempt to set the PMCR_EL0.N for
+ * the vCPU to @pmcr_n, which is larger than the host value.
+ * The attempt should fail as @pmcr_n is too big to set for the vCPU.
+ */
+static void run_error_test(uint64_t pmcr_n)
+{
+ pr_debug("Error test with pmcr_n %lu (larger than the host)\n", pmcr_n);
+
+ test_create_vpmu_vm_with_pmcr_n(pmcr_n, true);
+ destroy_vpmu_vm();
+}
+
+/*
+ * Return the default number of implemented PMU event counters excluding
+ * the cycle counter (i.e. PMCR_EL0.N value) for the guest.
+ */
+static uint64_t get_pmcr_n_limit(void)
+{
+ uint64_t pmcr;
+
+ create_vpmu_vm(guest_code);
+ vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), &pmcr);
+ destroy_vpmu_vm();
+ return get_pmcr_n(pmcr);
+}
+
+int main(void)
+{
+ uint64_t i, pmcr_n;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_PMU_V3));
+
+ pmcr_n = get_pmcr_n_limit();
+ for (i = 0; i <= pmcr_n; i++) {
+ run_access_test(i);
+ run_pmregs_validity_test(i);
+ }
+
+ for (i = pmcr_n + 1; i < ARMV8_PMU_MAX_COUNTERS; i++)
+ run_error_test(i);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h
index cb537253a6b9..c42d683102c7 100644
--- a/tools/testing/selftests/kvm/include/aarch64/processor.h
+++ b/tools/testing/selftests/kvm/include/aarch64/processor.h
@@ -104,6 +104,7 @@ enum {
#define ESR_EC_SHIFT 26
#define ESR_EC_MASK (ESR_EC_NUM - 1)
+#define ESR_EC_UNKNOWN 0x0
#define ESR_EC_SVC64 0x15
#define ESR_EC_IABT 0x21
#define ESR_EC_DABT 0x25
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index 7e614adc6cf4..8e5f413a593d 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -33,7 +33,7 @@ static inline int _no_printf(const char *format, ...) { return 0; }
#define pr_info(...) _no_printf(__VA_ARGS__)
#endif
-void print_skip(const char *fmt, ...) __attribute__((format(printf, 1, 2)));
+void __printf(1, 2) print_skip(const char *fmt, ...);
#define __TEST_REQUIRE(f, fmt, ...) \
do { \
if (!(f)) \
@@ -46,9 +46,9 @@ ssize_t test_write(int fd, const void *buf, size_t count);
ssize_t test_read(int fd, void *buf, size_t count);
int test_seq_read(const char *path, char **bufp, size_t *sizep);
-void test_assert(bool exp, const char *exp_str,
- const char *file, unsigned int line, const char *fmt, ...)
- __attribute__((format(printf, 5, 6)));
+void __printf(5, 6) test_assert(bool exp, const char *exp_str,
+ const char *file, unsigned int line,
+ const char *fmt, ...);
#define TEST_ASSERT(e, fmt, ...) \
test_assert((e), #e, __FILE__, __LINE__, fmt, ##__VA_ARGS__)
diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h
index 112bc1da732a..ce33d306c2cb 100644
--- a/tools/testing/selftests/kvm/include/ucall_common.h
+++ b/tools/testing/selftests/kvm/include/ucall_common.h
@@ -1,7 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * tools/testing/selftests/kvm/include/kvm_util.h
- *
* Copyright (C) 2018, Google LLC.
*/
#ifndef SELFTEST_KVM_UCALL_COMMON_H
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 4fd042112526..25bc61dac5fb 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -68,6 +68,12 @@ struct xstate {
#define XFEATURE_MASK_OPMASK BIT_ULL(5)
#define XFEATURE_MASK_ZMM_Hi256 BIT_ULL(6)
#define XFEATURE_MASK_Hi16_ZMM BIT_ULL(7)
+#define XFEATURE_MASK_PT BIT_ULL(8)
+#define XFEATURE_MASK_PKRU BIT_ULL(9)
+#define XFEATURE_MASK_PASID BIT_ULL(10)
+#define XFEATURE_MASK_CET_USER BIT_ULL(11)
+#define XFEATURE_MASK_CET_KERNEL BIT_ULL(12)
+#define XFEATURE_MASK_LBR BIT_ULL(15)
#define XFEATURE_MASK_XTILE_CFG BIT_ULL(17)
#define XFEATURE_MASK_XTILE_DATA BIT_ULL(18)
@@ -147,6 +153,7 @@ struct kvm_x86_cpu_feature {
#define X86_FEATURE_CLWB KVM_X86_CPU_FEATURE(0x7, 0, EBX, 24)
#define X86_FEATURE_UMIP KVM_X86_CPU_FEATURE(0x7, 0, ECX, 2)
#define X86_FEATURE_PKU KVM_X86_CPU_FEATURE(0x7, 0, ECX, 3)
+#define X86_FEATURE_OSPKE KVM_X86_CPU_FEATURE(0x7, 0, ECX, 4)
#define X86_FEATURE_LA57 KVM_X86_CPU_FEATURE(0x7, 0, ECX, 16)
#define X86_FEATURE_RDPID KVM_X86_CPU_FEATURE(0x7, 0, ECX, 22)
#define X86_FEATURE_SGX_LC KVM_X86_CPU_FEATURE(0x7, 0, ECX, 30)
@@ -553,6 +560,13 @@ static inline void xsetbv(u32 index, u64 value)
__asm__ __volatile__("xsetbv" :: "a" (eax), "d" (edx), "c" (index));
}
+static inline void wrpkru(u32 pkru)
+{
+ /* Note, ECX and EDX are architecturally required to be '0'. */
+ asm volatile(".byte 0x0f,0x01,0xef\n\t"
+ : : "a" (pkru), "c"(0), "d"(0));
+}
+
static inline struct desc_ptr get_gdt(void)
{
struct desc_ptr gdt;
@@ -908,6 +922,15 @@ static inline bool kvm_pmu_has(struct kvm_x86_pmu_feature feature)
!kvm_cpu_has(feature.anti_feature);
}
+static __always_inline uint64_t kvm_cpu_supported_xcr0(void)
+{
+ if (!kvm_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO))
+ return 0;
+
+ return kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) |
+ ((uint64_t)kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32);
+}
+
static inline size_t kvm_cpuid2_size(int nr_entries)
{
return sizeof(struct kvm_cpuid2) +
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index 3a0259e25335..6fe12e985ba5 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -518,9 +518,9 @@ void aarch64_get_supported_page_sizes(uint32_t ipa,
err = ioctl(vcpu_fd, KVM_GET_ONE_REG, &reg);
TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_GET_ONE_REG, vcpu_fd));
- *ps4k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_TGRAN4), val) != 0xf;
- *ps64k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_TGRAN64), val) == 0;
- *ps16k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_TGRAN16), val) != 0;
+ *ps4k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN4), val) != 0xf;
+ *ps64k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN64), val) == 0;
+ *ps16k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN16), val) != 0;
close(vcpu_fd);
close(vm_fd);
diff --git a/tools/testing/selftests/kvm/lib/guest_sprintf.c b/tools/testing/selftests/kvm/lib/guest_sprintf.c
index c4a69d8aeb68..74627514c4d4 100644
--- a/tools/testing/selftests/kvm/lib/guest_sprintf.c
+++ b/tools/testing/selftests/kvm/lib/guest_sprintf.c
@@ -200,6 +200,13 @@ repeat:
++fmt;
}
+ /*
+ * Play nice with %llu, %llx, etc. KVM selftests only support
+ * 64-bit builds, so just treat %ll* the same as %l*.
+ */
+ if (qualifier == 'l' && *fmt == 'l')
+ ++fmt;
+
/* default base */
base = 10;
diff --git a/tools/testing/selftests/kvm/lib/x86_64/apic.c b/tools/testing/selftests/kvm/lib/x86_64/apic.c
index 7168e25c194e..89153a333e83 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/apic.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/apic.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * tools/testing/selftests/kvm/lib/x86_64/processor.c
- *
* Copyright (C) 2021, Google LLC.
*/
diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c
index 20eb2e730800..8698d1ab60d0 100644
--- a/tools/testing/selftests/kvm/memslot_perf_test.c
+++ b/tools/testing/selftests/kvm/memslot_perf_test.c
@@ -1033,9 +1033,8 @@ static bool test_loop(const struct test_data *data,
struct test_result *rbestruntime)
{
uint64_t maxslots;
- struct test_result result;
+ struct test_result result = {};
- result.nloops = 0;
if (!test_execute(targs->nslots, &maxslots, targs->seconds, data,
&result.nloops,
&result.slot_runtime, &result.guest_runtime)) {
@@ -1089,7 +1088,7 @@ int main(int argc, char *argv[])
.seconds = 5,
.runs = 1,
};
- struct test_result rbestslottime;
+ struct test_result rbestslottime = {};
int tctr;
if (!check_memory_sizes())
@@ -1098,11 +1097,10 @@ int main(int argc, char *argv[])
if (!parse_args(argc, argv, &targs))
return -1;
- rbestslottime.slottimens = 0;
for (tctr = targs.tfirst; tctr <= targs.tlast; tctr++) {
const struct test_data *data = &tests[tctr];
unsigned int runctr;
- struct test_result rbestruntime;
+ struct test_result rbestruntime = {};
if (tctr > targs.tfirst)
pr_info("\n");
@@ -1110,7 +1108,6 @@ int main(int argc, char *argv[])
pr_info("Testing %s performance with %i runs, %d seconds each\n",
data->name, targs.runs, targs.seconds);
- rbestruntime.runtimens = 0;
for (runctr = 0; runctr < targs.runs; runctr++)
if (!test_loop(data, &targs,
&rbestslottime, &rbestruntime))
diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c
index 9f99ea42f45f..6bedaea95395 100644
--- a/tools/testing/selftests/kvm/riscv/get-reg-list.c
+++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c
@@ -25,6 +25,8 @@ bool filter_reg(__u64 reg)
* the visibility of the ISA_EXT register itself.
*
* Based on above, we should filter-out all ISA_EXT registers.
+ *
+ * Note: The below list is alphabetically sorted.
*/
case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_A:
case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_C:
@@ -33,21 +35,23 @@ bool filter_reg(__u64 reg)
case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_H:
case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_I:
case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_M:
- case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVPBMT:
+ case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_V:
+ case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SMSTATEEN:
+ case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSAIA:
case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSTC:
case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVINVAL:
- case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIHINTPAUSE:
- case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICBOM:
- case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICBOZ:
- case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBB:
- case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSAIA:
- case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_V:
case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVNAPOT:
+ case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVPBMT:
case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBA:
+ case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBB:
case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBS:
+ case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICBOM:
+ case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICBOZ:
case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICNTR:
+ case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICOND:
case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICSR:
case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIFENCEI:
+ case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIHINTPAUSE:
case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIHPM:
return true;
/* AIA registers are always available when Ssaia can't be disabled */
@@ -112,11 +116,13 @@ void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
}
}
-static const char *config_id_to_str(__u64 id)
+static const char *config_id_to_str(const char *prefix, __u64 id)
{
/* reg_off is the offset into struct kvm_riscv_config */
__u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_CONFIG);
+ assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CONFIG);
+
switch (reg_off) {
case KVM_REG_RISCV_CONFIG_REG(isa):
return "KVM_REG_RISCV_CONFIG_REG(isa)";
@@ -134,11 +140,7 @@ static const char *config_id_to_str(__u64 id)
return "KVM_REG_RISCV_CONFIG_REG(satp_mode)";
}
- /*
- * Config regs would grow regularly with new pseudo reg added, so
- * just show raw id to indicate a new pseudo config reg.
- */
- return strdup_printf("KVM_REG_RISCV_CONFIG_REG(%lld) /* UNKNOWN */", reg_off);
+ return strdup_printf("%lld /* UNKNOWN */", reg_off);
}
static const char *core_id_to_str(const char *prefix, __u64 id)
@@ -146,6 +148,8 @@ static const char *core_id_to_str(const char *prefix, __u64 id)
/* reg_off is the offset into struct kvm_riscv_core */
__u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_CORE);
+ assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CORE);
+
switch (reg_off) {
case KVM_REG_RISCV_CORE_REG(regs.pc):
return "KVM_REG_RISCV_CORE_REG(regs.pc)";
@@ -176,14 +180,15 @@ static const char *core_id_to_str(const char *prefix, __u64 id)
return "KVM_REG_RISCV_CORE_REG(mode)";
}
- TEST_FAIL("%s: Unknown core reg id: 0x%llx", prefix, id);
- return NULL;
+ return strdup_printf("%lld /* UNKNOWN */", reg_off);
}
#define RISCV_CSR_GENERAL(csr) \
"KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(" #csr ")"
#define RISCV_CSR_AIA(csr) \
"KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_REG(" #csr ")"
+#define RISCV_CSR_SMSTATEEN(csr) \
+ "KVM_REG_RISCV_CSR_SMSTATEEN | KVM_REG_RISCV_CSR_REG(" #csr ")"
static const char *general_csr_id_to_str(__u64 reg_off)
{
@@ -209,10 +214,11 @@ static const char *general_csr_id_to_str(__u64 reg_off)
return RISCV_CSR_GENERAL(satp);
case KVM_REG_RISCV_CSR_REG(scounteren):
return RISCV_CSR_GENERAL(scounteren);
+ case KVM_REG_RISCV_CSR_REG(senvcfg):
+ return RISCV_CSR_GENERAL(senvcfg);
}
- TEST_FAIL("Unknown general csr reg: 0x%llx", reg_off);
- return NULL;
+ return strdup_printf("KVM_REG_RISCV_CSR_GENERAL | %lld /* UNKNOWN */", reg_off);
}
static const char *aia_csr_id_to_str(__u64 reg_off)
@@ -235,7 +241,18 @@ static const char *aia_csr_id_to_str(__u64 reg_off)
return RISCV_CSR_AIA(iprio2h);
}
- TEST_FAIL("Unknown aia csr reg: 0x%llx", reg_off);
+ return strdup_printf("KVM_REG_RISCV_CSR_AIA | %lld /* UNKNOWN */", reg_off);
+}
+
+static const char *smstateen_csr_id_to_str(__u64 reg_off)
+{
+ /* reg_off is the offset into struct kvm_riscv_smstateen_csr */
+ switch (reg_off) {
+ case KVM_REG_RISCV_CSR_SMSTATEEN_REG(sstateen0):
+ return RISCV_CSR_SMSTATEEN(sstateen0);
+ }
+
+ TEST_FAIL("Unknown smstateen csr reg: 0x%llx", reg_off);
return NULL;
}
@@ -244,6 +261,8 @@ static const char *csr_id_to_str(const char *prefix, __u64 id)
__u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_CSR);
__u64 reg_subtype = reg_off & KVM_REG_RISCV_SUBTYPE_MASK;
+ assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CSR);
+
reg_off &= ~KVM_REG_RISCV_SUBTYPE_MASK;
switch (reg_subtype) {
@@ -251,10 +270,11 @@ static const char *csr_id_to_str(const char *prefix, __u64 id)
return general_csr_id_to_str(reg_off);
case KVM_REG_RISCV_CSR_AIA:
return aia_csr_id_to_str(reg_off);
+ case KVM_REG_RISCV_CSR_SMSTATEEN:
+ return smstateen_csr_id_to_str(reg_off);
}
- TEST_FAIL("%s: Unknown csr subtype: 0x%llx", prefix, reg_subtype);
- return NULL;
+ return strdup_printf("%lld | %lld /* UNKNOWN */", reg_subtype, reg_off);
}
static const char *timer_id_to_str(const char *prefix, __u64 id)
@@ -262,6 +282,8 @@ static const char *timer_id_to_str(const char *prefix, __u64 id)
/* reg_off is the offset into struct kvm_riscv_timer */
__u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_TIMER);
+ assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_TIMER);
+
switch (reg_off) {
case KVM_REG_RISCV_TIMER_REG(frequency):
return "KVM_REG_RISCV_TIMER_REG(frequency)";
@@ -273,8 +295,7 @@ static const char *timer_id_to_str(const char *prefix, __u64 id)
return "KVM_REG_RISCV_TIMER_REG(state)";
}
- TEST_FAIL("%s: Unknown timer reg id: 0x%llx", prefix, id);
- return NULL;
+ return strdup_printf("%lld /* UNKNOWN */", reg_off);
}
static const char *fp_f_id_to_str(const char *prefix, __u64 id)
@@ -282,6 +303,8 @@ static const char *fp_f_id_to_str(const char *prefix, __u64 id)
/* reg_off is the offset into struct __riscv_f_ext_state */
__u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_FP_F);
+ assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_F);
+
switch (reg_off) {
case KVM_REG_RISCV_FP_F_REG(f[0]) ...
KVM_REG_RISCV_FP_F_REG(f[31]):
@@ -290,8 +313,7 @@ static const char *fp_f_id_to_str(const char *prefix, __u64 id)
return "KVM_REG_RISCV_FP_F_REG(fcsr)";
}
- TEST_FAIL("%s: Unknown fp_f reg id: 0x%llx", prefix, id);
- return NULL;
+ return strdup_printf("%lld /* UNKNOWN */", reg_off);
}
static const char *fp_d_id_to_str(const char *prefix, __u64 id)
@@ -299,6 +321,8 @@ static const char *fp_d_id_to_str(const char *prefix, __u64 id)
/* reg_off is the offset into struct __riscv_d_ext_state */
__u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_FP_D);
+ assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_D);
+
switch (reg_off) {
case KVM_REG_RISCV_FP_D_REG(f[0]) ...
KVM_REG_RISCV_FP_D_REG(f[31]):
@@ -307,96 +331,93 @@ static const char *fp_d_id_to_str(const char *prefix, __u64 id)
return "KVM_REG_RISCV_FP_D_REG(fcsr)";
}
- TEST_FAIL("%s: Unknown fp_d reg id: 0x%llx", prefix, id);
- return NULL;
+ return strdup_printf("%lld /* UNKNOWN */", reg_off);
}
-static const char *isa_ext_id_to_str(__u64 id)
+#define KVM_ISA_EXT_ARR(ext) \
+[KVM_RISCV_ISA_EXT_##ext] = "KVM_RISCV_ISA_EXT_" #ext
+
+static const char *isa_ext_id_to_str(const char *prefix, __u64 id)
{
/* reg_off is the offset into unsigned long kvm_isa_ext_arr[] */
__u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_ISA_EXT);
+ assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_ISA_EXT);
+
static const char * const kvm_isa_ext_reg_name[] = {
- "KVM_RISCV_ISA_EXT_A",
- "KVM_RISCV_ISA_EXT_C",
- "KVM_RISCV_ISA_EXT_D",
- "KVM_RISCV_ISA_EXT_F",
- "KVM_RISCV_ISA_EXT_H",
- "KVM_RISCV_ISA_EXT_I",
- "KVM_RISCV_ISA_EXT_M",
- "KVM_RISCV_ISA_EXT_SVPBMT",
- "KVM_RISCV_ISA_EXT_SSTC",
- "KVM_RISCV_ISA_EXT_SVINVAL",
- "KVM_RISCV_ISA_EXT_ZIHINTPAUSE",
- "KVM_RISCV_ISA_EXT_ZICBOM",
- "KVM_RISCV_ISA_EXT_ZICBOZ",
- "KVM_RISCV_ISA_EXT_ZBB",
- "KVM_RISCV_ISA_EXT_SSAIA",
- "KVM_RISCV_ISA_EXT_V",
- "KVM_RISCV_ISA_EXT_SVNAPOT",
- "KVM_RISCV_ISA_EXT_ZBA",
- "KVM_RISCV_ISA_EXT_ZBS",
- "KVM_RISCV_ISA_EXT_ZICNTR",
- "KVM_RISCV_ISA_EXT_ZICSR",
- "KVM_RISCV_ISA_EXT_ZIFENCEI",
- "KVM_RISCV_ISA_EXT_ZIHPM",
+ KVM_ISA_EXT_ARR(A),
+ KVM_ISA_EXT_ARR(C),
+ KVM_ISA_EXT_ARR(D),
+ KVM_ISA_EXT_ARR(F),
+ KVM_ISA_EXT_ARR(H),
+ KVM_ISA_EXT_ARR(I),
+ KVM_ISA_EXT_ARR(M),
+ KVM_ISA_EXT_ARR(V),
+ KVM_ISA_EXT_ARR(SMSTATEEN),
+ KVM_ISA_EXT_ARR(SSAIA),
+ KVM_ISA_EXT_ARR(SSTC),
+ KVM_ISA_EXT_ARR(SVINVAL),
+ KVM_ISA_EXT_ARR(SVNAPOT),
+ KVM_ISA_EXT_ARR(SVPBMT),
+ KVM_ISA_EXT_ARR(ZBA),
+ KVM_ISA_EXT_ARR(ZBB),
+ KVM_ISA_EXT_ARR(ZBS),
+ KVM_ISA_EXT_ARR(ZICBOM),
+ KVM_ISA_EXT_ARR(ZICBOZ),
+ KVM_ISA_EXT_ARR(ZICNTR),
+ KVM_ISA_EXT_ARR(ZICOND),
+ KVM_ISA_EXT_ARR(ZICSR),
+ KVM_ISA_EXT_ARR(ZIFENCEI),
+ KVM_ISA_EXT_ARR(ZIHINTPAUSE),
+ KVM_ISA_EXT_ARR(ZIHPM),
};
- if (reg_off >= ARRAY_SIZE(kvm_isa_ext_reg_name)) {
- /*
- * isa_ext regs would grow regularly with new isa extension added, so
- * just show "reg" to indicate a new extension.
- */
+ if (reg_off >= ARRAY_SIZE(kvm_isa_ext_reg_name))
return strdup_printf("%lld /* UNKNOWN */", reg_off);
- }
return kvm_isa_ext_reg_name[reg_off];
}
+#define KVM_SBI_EXT_ARR(ext) \
+[ext] = "KVM_REG_RISCV_SBI_SINGLE | " #ext
+
static const char *sbi_ext_single_id_to_str(__u64 reg_off)
{
/* reg_off is KVM_RISCV_SBI_EXT_ID */
static const char * const kvm_sbi_ext_reg_name[] = {
- "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_V01",
- "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_TIME",
- "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_IPI",
- "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_RFENCE",
- "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_SRST",
- "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_HSM",
- "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_PMU",
- "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_EXPERIMENTAL",
- "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_VENDOR",
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_V01),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_TIME),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_IPI),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_RFENCE),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_SRST),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_HSM),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_PMU),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_EXPERIMENTAL),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_VENDOR),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_DBCN),
};
- if (reg_off >= ARRAY_SIZE(kvm_sbi_ext_reg_name)) {
- /*
- * sbi_ext regs would grow regularly with new sbi extension added, so
- * just show "reg" to indicate a new extension.
- */
+ if (reg_off >= ARRAY_SIZE(kvm_sbi_ext_reg_name))
return strdup_printf("KVM_REG_RISCV_SBI_SINGLE | %lld /* UNKNOWN */", reg_off);
- }
return kvm_sbi_ext_reg_name[reg_off];
}
static const char *sbi_ext_multi_id_to_str(__u64 reg_subtype, __u64 reg_off)
{
- if (reg_off > KVM_REG_RISCV_SBI_MULTI_REG_LAST) {
- /*
- * sbi_ext regs would grow regularly with new sbi extension added, so
- * just show "reg" to indicate a new extension.
- */
- return strdup_printf("%lld /* UNKNOWN */", reg_off);
- }
+ const char *unknown = "";
+
+ if (reg_off > KVM_REG_RISCV_SBI_MULTI_REG_LAST)
+ unknown = " /* UNKNOWN */";
switch (reg_subtype) {
case KVM_REG_RISCV_SBI_MULTI_EN:
- return strdup_printf("KVM_REG_RISCV_SBI_MULTI_EN | %lld", reg_off);
+ return strdup_printf("KVM_REG_RISCV_SBI_MULTI_EN | %lld%s", reg_off, unknown);
case KVM_REG_RISCV_SBI_MULTI_DIS:
- return strdup_printf("KVM_REG_RISCV_SBI_MULTI_DIS | %lld", reg_off);
+ return strdup_printf("KVM_REG_RISCV_SBI_MULTI_DIS | %lld%s", reg_off, unknown);
}
- return NULL;
+ return strdup_printf("%lld | %lld /* UNKNOWN */", reg_subtype, reg_off);
}
static const char *sbi_ext_id_to_str(const char *prefix, __u64 id)
@@ -404,6 +425,8 @@ static const char *sbi_ext_id_to_str(const char *prefix, __u64 id)
__u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_SBI_EXT);
__u64 reg_subtype = reg_off & KVM_REG_RISCV_SUBTYPE_MASK;
+ assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_SBI_EXT);
+
reg_off &= ~KVM_REG_RISCV_SUBTYPE_MASK;
switch (reg_subtype) {
@@ -414,8 +437,7 @@ static const char *sbi_ext_id_to_str(const char *prefix, __u64 id)
return sbi_ext_multi_id_to_str(reg_subtype, reg_off);
}
- TEST_FAIL("%s: Unknown sbi ext subtype: 0x%llx", prefix, reg_subtype);
- return NULL;
+ return strdup_printf("%lld | %lld /* UNKNOWN */", reg_subtype, reg_off);
}
void print_reg(const char *prefix, __u64 id)
@@ -436,14 +458,14 @@ void print_reg(const char *prefix, __u64 id)
reg_size = "KVM_REG_SIZE_U128";
break;
default:
- TEST_FAIL("%s: Unexpected reg size: 0x%llx in reg id: 0x%llx",
- prefix, (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id);
+ printf("\tKVM_REG_RISCV | (%lld << KVM_REG_SIZE_SHIFT) | 0x%llx /* UNKNOWN */,",
+ (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id & REG_MASK);
}
switch (id & KVM_REG_RISCV_TYPE_MASK) {
case KVM_REG_RISCV_CONFIG:
printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_CONFIG | %s,\n",
- reg_size, config_id_to_str(id));
+ reg_size, config_id_to_str(prefix, id));
break;
case KVM_REG_RISCV_CORE:
printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_CORE | %s,\n",
@@ -467,15 +489,15 @@ void print_reg(const char *prefix, __u64 id)
break;
case KVM_REG_RISCV_ISA_EXT:
printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_ISA_EXT | %s,\n",
- reg_size, isa_ext_id_to_str(id));
+ reg_size, isa_ext_id_to_str(prefix, id));
break;
case KVM_REG_RISCV_SBI_EXT:
printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_SBI_EXT | %s,\n",
reg_size, sbi_ext_id_to_str(prefix, id));
break;
default:
- TEST_FAIL("%s: Unexpected reg type: 0x%llx in reg id: 0x%llx", prefix,
- (id & KVM_REG_RISCV_TYPE_MASK) >> KVM_REG_RISCV_TYPE_SHIFT, id);
+ printf("\tKVM_REG_RISCV | %s | 0x%llx /* UNKNOWN */,",
+ reg_size, id & REG_MASK);
}
}
@@ -532,6 +554,7 @@ static __u64 base_regs[] = {
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sip),
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(satp),
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(scounteren),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(senvcfg),
KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(frequency),
KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(time),
KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(compare),
@@ -545,6 +568,7 @@ static __u64 base_regs[] = {
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_PMU,
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_EXPERIMENTAL,
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_VENDOR,
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_DBCN,
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_MULTI_EN | 0,
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_MULTI_DIS | 0,
};
@@ -603,6 +627,10 @@ static __u64 zicntr_regs[] = {
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICNTR,
};
+static __u64 zicond_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICOND,
+};
+
static __u64 zicsr_regs[] = {
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICSR,
};
@@ -626,6 +654,11 @@ static __u64 aia_regs[] = {
KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSAIA,
};
+static __u64 smstateen_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_SMSTATEEN | KVM_REG_RISCV_CSR_SMSTATEEN_REG(sstateen0),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SMSTATEEN,
+};
+
static __u64 fp_f_regs[] = {
KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[0]),
KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[1]),
@@ -725,6 +758,8 @@ static __u64 fp_d_regs[] = {
{"zbs", .feature = KVM_RISCV_ISA_EXT_ZBS, .regs = zbs_regs, .regs_n = ARRAY_SIZE(zbs_regs),}
#define ZICNTR_REGS_SUBLIST \
{"zicntr", .feature = KVM_RISCV_ISA_EXT_ZICNTR, .regs = zicntr_regs, .regs_n = ARRAY_SIZE(zicntr_regs),}
+#define ZICOND_REGS_SUBLIST \
+ {"zicond", .feature = KVM_RISCV_ISA_EXT_ZICOND, .regs = zicond_regs, .regs_n = ARRAY_SIZE(zicond_regs),}
#define ZICSR_REGS_SUBLIST \
{"zicsr", .feature = KVM_RISCV_ISA_EXT_ZICSR, .regs = zicsr_regs, .regs_n = ARRAY_SIZE(zicsr_regs),}
#define ZIFENCEI_REGS_SUBLIST \
@@ -733,6 +768,8 @@ static __u64 fp_d_regs[] = {
{"zihpm", .feature = KVM_RISCV_ISA_EXT_ZIHPM, .regs = zihpm_regs, .regs_n = ARRAY_SIZE(zihpm_regs),}
#define AIA_REGS_SUBLIST \
{"aia", .feature = KVM_RISCV_ISA_EXT_SSAIA, .regs = aia_regs, .regs_n = ARRAY_SIZE(aia_regs),}
+#define SMSTATEEN_REGS_SUBLIST \
+ {"smstateen", .feature = KVM_RISCV_ISA_EXT_SMSTATEEN, .regs = smstateen_regs, .regs_n = ARRAY_SIZE(smstateen_regs),}
#define FP_F_REGS_SUBLIST \
{"fp_f", .feature = KVM_RISCV_ISA_EXT_F, .regs = fp_f_regs, \
.regs_n = ARRAY_SIZE(fp_f_regs),}
@@ -828,6 +865,14 @@ static struct vcpu_reg_list zicntr_config = {
},
};
+static struct vcpu_reg_list zicond_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ ZICOND_REGS_SUBLIST,
+ {0},
+ },
+};
+
static struct vcpu_reg_list zicsr_config = {
.sublists = {
BASE_SUBLIST,
@@ -860,6 +905,14 @@ static struct vcpu_reg_list aia_config = {
},
};
+static struct vcpu_reg_list smstateen_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ SMSTATEEN_REGS_SUBLIST,
+ {0},
+ },
+};
+
static struct vcpu_reg_list fp_f_config = {
.sublists = {
BASE_SUBLIST,
@@ -888,10 +941,12 @@ struct vcpu_reg_list *vcpu_configs[] = {
&zbb_config,
&zbs_config,
&zicntr_config,
+ &zicond_config,
&zicsr_config,
&zifencei_config,
&zihpm_config,
&aia_config,
+ &smstateen_config,
&fp_f_config,
&fp_d_config,
};
diff --git a/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c b/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c
new file mode 100644
index 000000000000..df351ae17029
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023, Google LLC.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "vmx.h"
+
+void test_hwcr_bit(struct kvm_vcpu *vcpu, unsigned int bit)
+{
+ const uint64_t ignored = BIT_ULL(3) | BIT_ULL(6) | BIT_ULL(8);
+ const uint64_t valid = BIT_ULL(18) | BIT_ULL(24);
+ const uint64_t legal = ignored | valid;
+ uint64_t val = BIT_ULL(bit);
+ uint64_t actual;
+ int r;
+
+ r = _vcpu_set_msr(vcpu, MSR_K7_HWCR, val);
+ TEST_ASSERT(val & ~legal ? !r : r == 1,
+ "Expected KVM_SET_MSRS(MSR_K7_HWCR) = 0x%lx to %s",
+ val, val & ~legal ? "fail" : "succeed");
+
+ actual = vcpu_get_msr(vcpu, MSR_K7_HWCR);
+ TEST_ASSERT(actual == (val & valid),
+ "Bit %u: unexpected HWCR 0x%lx; expected 0x%lx",
+ bit, actual, (val & valid));
+
+ vcpu_set_msr(vcpu, MSR_K7_HWCR, 0);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ unsigned int bit;
+
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+ for (bit = 0; bit < BITS_PER_LONG; bit++)
+ test_hwcr_bit(vcpu, bit);
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
index e446d76d1c0c..6c1278562090 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * KVM_GET/SET_* tests
- *
* Copyright (C) 2022, Red Hat, Inc.
*
* Tests for Hyper-V extensions to SVM.
diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
index 7f36c32fa760..18ac5c1952a3 100644
--- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
+++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * tools/testing/selftests/kvm/nx_huge_page_test.c
- *
* Usage: to be run via nx_huge_page_test.sh, which does the necessary
* environment setup and teardown
*
diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh
index 0560149e66ed..7cbb409801ee 100755
--- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh
+++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh
@@ -4,7 +4,6 @@
# Wrapper script which performs setup and cleanup for nx_huge_pages_test.
# Makes use of root privileges to set up huge pages and KVM module parameters.
#
-# tools/testing/selftests/kvm/nx_huge_page_test.sh
# Copyright (C) 2022, Google LLC.
set -e
diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c
index 4c4925a8ab45..88b58aab7207 100644
--- a/tools/testing/selftests/kvm/x86_64/state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/state_test.c
@@ -139,6 +139,83 @@ static void vmx_l1_guest_code(struct vmx_pages *vmx_pages)
static void __attribute__((__flatten__)) guest_code(void *arg)
{
GUEST_SYNC(1);
+
+ if (this_cpu_has(X86_FEATURE_XSAVE)) {
+ uint64_t supported_xcr0 = this_cpu_supported_xcr0();
+ uint8_t buffer[4096];
+
+ memset(buffer, 0xcc, sizeof(buffer));
+
+ set_cr4(get_cr4() | X86_CR4_OSXSAVE);
+ GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE));
+
+ xsetbv(0, xgetbv(0) | supported_xcr0);
+
+ /*
+ * Modify state for all supported xfeatures to take them out of
+ * their "init" state, i.e. to make them show up in XSTATE_BV.
+ *
+ * Note off-by-default features, e.g. AMX, are out of scope for
+ * this particular testcase as they have a different ABI.
+ */
+ GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_FP);
+ asm volatile ("fincstp");
+
+ GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_SSE);
+ asm volatile ("vmovdqu %0, %%xmm0" :: "m" (buffer));
+
+ if (supported_xcr0 & XFEATURE_MASK_YMM)
+ asm volatile ("vmovdqu %0, %%ymm0" :: "m" (buffer));
+
+ if (supported_xcr0 & XFEATURE_MASK_AVX512) {
+ asm volatile ("kmovq %0, %%k1" :: "r" (-1ull));
+ asm volatile ("vmovupd %0, %%zmm0" :: "m" (buffer));
+ asm volatile ("vmovupd %0, %%zmm16" :: "m" (buffer));
+ }
+
+ if (this_cpu_has(X86_FEATURE_MPX)) {
+ uint64_t bounds[2] = { 10, 0xffffffffull };
+ uint64_t output[2] = { };
+
+ GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDREGS);
+ GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDCSR);
+
+ /*
+ * Don't bother trying to get BNDCSR into the INUSE
+ * state. MSR_IA32_BNDCFGS doesn't count as it isn't
+ * managed via XSAVE/XRSTOR, and BNDCFGU can only be
+ * modified by XRSTOR. Stuffing XSTATE_BV in the host
+ * is simpler than doing XRSTOR here in the guest.
+ *
+ * However, temporarily enable MPX in BNDCFGS so that
+ * BNDMOV actually loads BND1. If MPX isn't *fully*
+ * enabled, all MPX instructions are treated as NOPs.
+ *
+ * Hand encode "bndmov (%rax),%bnd1" as support for MPX
+ * mnemonics/registers has been removed from gcc and
+ * clang (and was never fully supported by clang).
+ */
+ wrmsr(MSR_IA32_BNDCFGS, BIT_ULL(0));
+ asm volatile (".byte 0x66,0x0f,0x1a,0x08" :: "a" (bounds));
+ /*
+ * Hand encode "bndmov %bnd1, (%rax)" to sanity check
+ * that BND1 actually got loaded.
+ */
+ asm volatile (".byte 0x66,0x0f,0x1b,0x08" :: "a" (output));
+ wrmsr(MSR_IA32_BNDCFGS, 0);
+
+ GUEST_ASSERT_EQ(bounds[0], output[0]);
+ GUEST_ASSERT_EQ(bounds[1], output[1]);
+ }
+ if (this_cpu_has(X86_FEATURE_PKU)) {
+ GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_PKRU);
+ set_cr4(get_cr4() | X86_CR4_PKE);
+ GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSPKE));
+
+ wrpkru(-1u);
+ }
+ }
+
GUEST_SYNC(2);
if (arg) {
@@ -153,10 +230,11 @@ static void __attribute__((__flatten__)) guest_code(void *arg)
int main(int argc, char *argv[])
{
+ uint64_t *xstate_bv, saved_xstate_bv;
vm_vaddr_t nested_gva = 0;
-
+ struct kvm_cpuid2 empty_cpuid = {};
struct kvm_regs regs1, regs2;
- struct kvm_vcpu *vcpu;
+ struct kvm_vcpu *vcpu, *vcpuN;
struct kvm_vm *vm;
struct kvm_x86_state *state;
struct ucall uc;
@@ -209,6 +287,34 @@ int main(int argc, char *argv[])
/* Restore state in a new VM. */
vcpu = vm_recreate_with_one_vcpu(vm);
vcpu_load_state(vcpu, state);
+
+ /*
+ * Restore XSAVE state in a dummy vCPU, first without doing
+ * KVM_SET_CPUID2, and then with an empty guest CPUID. Except
+ * for off-by-default xfeatures, e.g. AMX, KVM is supposed to
+ * allow KVM_SET_XSAVE regardless of guest CPUID. Manually
+ * load only XSAVE state, MSRs in particular have a much more
+ * convoluted ABI.
+ *
+ * Load two versions of XSAVE state: one with the actual guest
+ * XSAVE state, and one with all supported features forced "on"
+ * in xstate_bv, e.g. to ensure that KVM allows loading all
+ * supported features, even if something goes awry in saving
+ * the original snapshot.
+ */
+ xstate_bv = (void *)&((uint8_t *)state->xsave->region)[512];
+ saved_xstate_bv = *xstate_bv;
+
+ vcpuN = __vm_vcpu_add(vm, vcpu->id + 1);
+ vcpu_xsave_set(vcpuN, state->xsave);
+ *xstate_bv = kvm_cpu_supported_xcr0();
+ vcpu_xsave_set(vcpuN, state->xsave);
+
+ vcpu_init_cpuid(vcpuN, &empty_cpuid);
+ vcpu_xsave_set(vcpuN, state->xsave);
+ *xstate_bv = saved_xstate_bv;
+ vcpu_xsave_set(vcpuN, state->xsave);
+
kvm_x86_state_cleanup(state);
memset(&regs2, 0, sizeof(regs2));
diff --git a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c b/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c
index 5b669818e39a..59c7304f805e 100644
--- a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c
+++ b/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c
@@ -1,10 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * svm_vmcall_test
- *
* Copyright © 2021 Amazon.com, Inc. or its affiliates.
- *
- * Xen shared_info / pvclock testing
*/
#include "test_util.h"
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
index 05898ad9f4d9..9ec9ab60b63e 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -1,10 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * svm_vmcall_test
- *
* Copyright © 2021 Amazon.com, Inc. or its affiliates.
- *
- * Xen shared_info / pvclock testing
*/
#include "test_util.h"
diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c
index 792c3f0a59b4..646f778dfb1e 100644
--- a/tools/testing/selftests/landlock/base_test.c
+++ b/tools/testing/selftests/landlock/base_test.c
@@ -75,7 +75,7 @@ TEST(abi_version)
const struct landlock_ruleset_attr ruleset_attr = {
.handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE,
};
- ASSERT_EQ(3, landlock_create_ruleset(NULL, 0,
+ ASSERT_EQ(4, landlock_create_ruleset(NULL, 0,
LANDLOCK_CREATE_RULESET_VERSION));
ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0,
diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h
index d7987ae8d7fc..5b79758cae62 100644
--- a/tools/testing/selftests/landlock/common.h
+++ b/tools/testing/selftests/landlock/common.h
@@ -112,10 +112,13 @@ static void _init_caps(struct __test_metadata *const _metadata, bool drop_all)
cap_t cap_p;
/* Only these three capabilities are useful for the tests. */
const cap_value_t caps[] = {
+ /* clang-format off */
CAP_DAC_OVERRIDE,
CAP_MKNOD,
CAP_SYS_ADMIN,
CAP_SYS_CHROOT,
+ CAP_NET_BIND_SERVICE,
+ /* clang-format on */
};
cap_p = cap_get_proc();
@@ -256,3 +259,13 @@ static int __maybe_unused send_fd(int usock, int fd_tx)
return -errno;
return 0;
}
+
+static void __maybe_unused
+enforce_ruleset(struct __test_metadata *const _metadata, const int ruleset_fd)
+{
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+ ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0))
+ {
+ TH_LOG("Failed to enforce ruleset: %s", strerror(errno));
+ }
+}
diff --git a/tools/testing/selftests/landlock/config b/tools/testing/selftests/landlock/config
index 3dc9e438eab1..0086efaa7b68 100644
--- a/tools/testing/selftests/landlock/config
+++ b/tools/testing/selftests/landlock/config
@@ -1,5 +1,9 @@
CONFIG_CGROUPS=y
CONFIG_CGROUP_SCHED=y
+CONFIG_INET=y
+CONFIG_IPV6=y
+CONFIG_NET=y
+CONFIG_NET_NS=y
CONFIG_OVERLAY_FS=y
CONFIG_PROC_FS=y
CONFIG_SECURITY=y
diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
index 251594306d40..18e1f86a6234 100644
--- a/tools/testing/selftests/landlock/fs_test.c
+++ b/tools/testing/selftests/landlock/fs_test.c
@@ -677,16 +677,6 @@ static int create_ruleset(struct __test_metadata *const _metadata,
return ruleset_fd;
}
-static void enforce_ruleset(struct __test_metadata *const _metadata,
- const int ruleset_fd)
-{
- ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
- ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0))
- {
- TH_LOG("Failed to enforce ruleset: %s", strerror(errno));
- }
-}
-
TEST_F_FORK(layout0, proc_nsfs)
{
const struct rule rules[] = {
@@ -1635,6 +1625,65 @@ TEST_F_FORK(layout1, move_mount)
clear_cap(_metadata, CAP_SYS_ADMIN);
}
+TEST_F_FORK(layout1, topology_changes_with_net_only)
+{
+ const struct landlock_ruleset_attr ruleset_net = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ };
+ int ruleset_fd;
+
+ /* Add network restrictions. */
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_net, sizeof(ruleset_net), 0);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Mount, remount, move_mount, umount, and pivot_root checks. */
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(0, mount_opt(&mnt_tmp, dir_s1d2));
+ ASSERT_EQ(0, mount(NULL, dir_s1d2, NULL, MS_PRIVATE | MS_REC, NULL));
+ ASSERT_EQ(0, syscall(__NR_move_mount, AT_FDCWD, dir_s1d2, AT_FDCWD,
+ dir_s2d2, 0));
+ ASSERT_EQ(0, umount(dir_s2d2));
+ ASSERT_EQ(0, syscall(__NR_pivot_root, dir_s3d2, dir_s3d3));
+ ASSERT_EQ(0, chdir("/"));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+}
+
+TEST_F_FORK(layout1, topology_changes_with_net_and_fs)
+{
+ const struct landlock_ruleset_attr ruleset_net_fs = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .handled_access_fs = LANDLOCK_ACCESS_FS_EXECUTE,
+ };
+ int ruleset_fd;
+
+ /* Add network and filesystem restrictions. */
+ ruleset_fd = landlock_create_ruleset(&ruleset_net_fs,
+ sizeof(ruleset_net_fs), 0);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Mount, remount, move_mount, umount, and pivot_root checks. */
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(-1, mount_opt(&mnt_tmp, dir_s1d2));
+ ASSERT_EQ(EPERM, errno);
+ ASSERT_EQ(-1, mount(NULL, dir_s3d2, NULL, MS_PRIVATE | MS_REC, NULL));
+ ASSERT_EQ(EPERM, errno);
+ ASSERT_EQ(-1, syscall(__NR_move_mount, AT_FDCWD, dir_s3d2, AT_FDCWD,
+ dir_s2d2, 0));
+ ASSERT_EQ(EPERM, errno);
+ ASSERT_EQ(-1, umount(dir_s3d2));
+ ASSERT_EQ(EPERM, errno);
+ ASSERT_EQ(-1, syscall(__NR_pivot_root, dir_s3d2, dir_s3d3));
+ ASSERT_EQ(EPERM, errno);
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+}
+
TEST_F_FORK(layout1, release_inodes)
{
const struct rule rules[] = {
diff --git a/tools/testing/selftests/landlock/net_test.c b/tools/testing/selftests/landlock/net_test.c
new file mode 100644
index 000000000000..929e21c4db05
--- /dev/null
+++ b/tools/testing/selftests/landlock/net_test.c
@@ -0,0 +1,1738 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Landlock tests - Network
+ *
+ * Copyright © 2022-2023 Huawei Tech. Co., Ltd.
+ * Copyright © 2023 Microsoft Corporation
+ */
+
+#define _GNU_SOURCE
+#include <arpa/inet.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/landlock.h>
+#include <linux/in.h>
+#include <sched.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "common.h"
+
+const short sock_port_start = (1 << 10);
+
+static const char loopback_ipv4[] = "127.0.0.1";
+static const char loopback_ipv6[] = "::1";
+
+/* Number pending connections queue to be hold. */
+const short backlog = 10;
+
+enum sandbox_type {
+ NO_SANDBOX,
+ /* This may be used to test rules that allow *and* deny accesses. */
+ TCP_SANDBOX,
+};
+
+struct protocol_variant {
+ int domain;
+ int type;
+};
+
+struct service_fixture {
+ struct protocol_variant protocol;
+ /* port is also stored in ipv4_addr.sin_port or ipv6_addr.sin6_port */
+ unsigned short port;
+ union {
+ struct sockaddr_in ipv4_addr;
+ struct sockaddr_in6 ipv6_addr;
+ struct {
+ struct sockaddr_un unix_addr;
+ socklen_t unix_addr_len;
+ };
+ };
+};
+
+static int set_service(struct service_fixture *const srv,
+ const struct protocol_variant prot,
+ const unsigned short index)
+{
+ memset(srv, 0, sizeof(*srv));
+
+ /*
+ * Copies all protocol properties in case of the variant only contains
+ * a subset of them.
+ */
+ srv->protocol = prot;
+
+ /* Checks for port overflow. */
+ if (index > 2)
+ return 1;
+ srv->port = sock_port_start << (2 * index);
+
+ switch (prot.domain) {
+ case AF_UNSPEC:
+ case AF_INET:
+ srv->ipv4_addr.sin_family = prot.domain;
+ srv->ipv4_addr.sin_port = htons(srv->port);
+ srv->ipv4_addr.sin_addr.s_addr = inet_addr(loopback_ipv4);
+ return 0;
+
+ case AF_INET6:
+ srv->ipv6_addr.sin6_family = prot.domain;
+ srv->ipv6_addr.sin6_port = htons(srv->port);
+ inet_pton(AF_INET6, loopback_ipv6, &srv->ipv6_addr.sin6_addr);
+ return 0;
+
+ case AF_UNIX:
+ srv->unix_addr.sun_family = prot.domain;
+ sprintf(srv->unix_addr.sun_path,
+ "_selftests-landlock-net-tid%d-index%d", gettid(),
+ index);
+ srv->unix_addr_len = SUN_LEN(&srv->unix_addr);
+ srv->unix_addr.sun_path[0] = '\0';
+ return 0;
+ }
+ return 1;
+}
+
+static void setup_loopback(struct __test_metadata *const _metadata)
+{
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(0, unshare(CLONE_NEWNET));
+ ASSERT_EQ(0, system("ip link set dev lo up"));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+}
+
+static bool is_restricted(const struct protocol_variant *const prot,
+ const enum sandbox_type sandbox)
+{
+ switch (prot->domain) {
+ case AF_INET:
+ case AF_INET6:
+ switch (prot->type) {
+ case SOCK_STREAM:
+ return sandbox == TCP_SANDBOX;
+ }
+ break;
+ }
+ return false;
+}
+
+static int socket_variant(const struct service_fixture *const srv)
+{
+ int ret;
+
+ ret = socket(srv->protocol.domain, srv->protocol.type | SOCK_CLOEXEC,
+ 0);
+ if (ret < 0)
+ return -errno;
+ return ret;
+}
+
+#ifndef SIN6_LEN_RFC2133
+#define SIN6_LEN_RFC2133 24
+#endif
+
+static socklen_t get_addrlen(const struct service_fixture *const srv,
+ const bool minimal)
+{
+ switch (srv->protocol.domain) {
+ case AF_UNSPEC:
+ case AF_INET:
+ return sizeof(srv->ipv4_addr);
+
+ case AF_INET6:
+ if (minimal)
+ return SIN6_LEN_RFC2133;
+ return sizeof(srv->ipv6_addr);
+
+ case AF_UNIX:
+ if (minimal)
+ return sizeof(srv->unix_addr) -
+ sizeof(srv->unix_addr.sun_path);
+ return srv->unix_addr_len;
+
+ default:
+ return 0;
+ }
+}
+
+static void set_port(struct service_fixture *const srv, uint16_t port)
+{
+ switch (srv->protocol.domain) {
+ case AF_UNSPEC:
+ case AF_INET:
+ srv->ipv4_addr.sin_port = htons(port);
+ return;
+
+ case AF_INET6:
+ srv->ipv6_addr.sin6_port = htons(port);
+ return;
+
+ default:
+ return;
+ }
+}
+
+static uint16_t get_binded_port(int socket_fd,
+ const struct protocol_variant *const prot)
+{
+ struct sockaddr_in ipv4_addr;
+ struct sockaddr_in6 ipv6_addr;
+ socklen_t ipv4_addr_len, ipv6_addr_len;
+
+ /* Gets binded port. */
+ switch (prot->domain) {
+ case AF_UNSPEC:
+ case AF_INET:
+ ipv4_addr_len = sizeof(ipv4_addr);
+ getsockname(socket_fd, &ipv4_addr, &ipv4_addr_len);
+ return ntohs(ipv4_addr.sin_port);
+
+ case AF_INET6:
+ ipv6_addr_len = sizeof(ipv6_addr);
+ getsockname(socket_fd, &ipv6_addr, &ipv6_addr_len);
+ return ntohs(ipv6_addr.sin6_port);
+
+ default:
+ return 0;
+ }
+}
+
+static int bind_variant_addrlen(const int sock_fd,
+ const struct service_fixture *const srv,
+ const socklen_t addrlen)
+{
+ int ret;
+
+ switch (srv->protocol.domain) {
+ case AF_UNSPEC:
+ case AF_INET:
+ ret = bind(sock_fd, &srv->ipv4_addr, addrlen);
+ break;
+
+ case AF_INET6:
+ ret = bind(sock_fd, &srv->ipv6_addr, addrlen);
+ break;
+
+ case AF_UNIX:
+ ret = bind(sock_fd, &srv->unix_addr, addrlen);
+ break;
+
+ default:
+ errno = EAFNOSUPPORT;
+ return -errno;
+ }
+
+ if (ret < 0)
+ return -errno;
+ return ret;
+}
+
+static int bind_variant(const int sock_fd,
+ const struct service_fixture *const srv)
+{
+ return bind_variant_addrlen(sock_fd, srv, get_addrlen(srv, false));
+}
+
+static int connect_variant_addrlen(const int sock_fd,
+ const struct service_fixture *const srv,
+ const socklen_t addrlen)
+{
+ int ret;
+
+ switch (srv->protocol.domain) {
+ case AF_UNSPEC:
+ case AF_INET:
+ ret = connect(sock_fd, &srv->ipv4_addr, addrlen);
+ break;
+
+ case AF_INET6:
+ ret = connect(sock_fd, &srv->ipv6_addr, addrlen);
+ break;
+
+ case AF_UNIX:
+ ret = connect(sock_fd, &srv->unix_addr, addrlen);
+ break;
+
+ default:
+ errno = -EAFNOSUPPORT;
+ return -errno;
+ }
+
+ if (ret < 0)
+ return -errno;
+ return ret;
+}
+
+static int connect_variant(const int sock_fd,
+ const struct service_fixture *const srv)
+{
+ return connect_variant_addrlen(sock_fd, srv, get_addrlen(srv, false));
+}
+
+FIXTURE(protocol)
+{
+ struct service_fixture srv0, srv1, srv2, unspec_any0, unspec_srv0;
+};
+
+FIXTURE_VARIANT(protocol)
+{
+ const enum sandbox_type sandbox;
+ const struct protocol_variant prot;
+};
+
+FIXTURE_SETUP(protocol)
+{
+ const struct protocol_variant prot_unspec = {
+ .domain = AF_UNSPEC,
+ .type = SOCK_STREAM,
+ };
+
+ disable_caps(_metadata);
+
+ ASSERT_EQ(0, set_service(&self->srv0, variant->prot, 0));
+ ASSERT_EQ(0, set_service(&self->srv1, variant->prot, 1));
+ ASSERT_EQ(0, set_service(&self->srv2, variant->prot, 2));
+
+ ASSERT_EQ(0, set_service(&self->unspec_srv0, prot_unspec, 0));
+
+ ASSERT_EQ(0, set_service(&self->unspec_any0, prot_unspec, 0));
+ self->unspec_any0.ipv4_addr.sin_addr.s_addr = htonl(INADDR_ANY);
+
+ setup_loopback(_metadata);
+};
+
+FIXTURE_TEARDOWN(protocol)
+{
+}
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_tcp) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_tcp) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_udp) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_DGRAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_udp) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_DGRAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_unix_stream) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .prot = {
+ .domain = AF_UNIX,
+ .type = SOCK_STREAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_unix_datagram) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .prot = {
+ .domain = AF_UNIX,
+ .type = SOCK_DGRAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_tcp) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_tcp) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_udp) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_DGRAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_udp) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_DGRAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_unix_stream) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .prot = {
+ .domain = AF_UNIX,
+ .type = SOCK_STREAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_unix_datagram) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .prot = {
+ .domain = AF_UNIX,
+ .type = SOCK_DGRAM,
+ },
+};
+
+static void test_bind_and_connect(struct __test_metadata *const _metadata,
+ const struct service_fixture *const srv,
+ const bool deny_bind, const bool deny_connect)
+{
+ char buf = '\0';
+ int inval_fd, bind_fd, client_fd, status, ret;
+ pid_t child;
+
+ /* Starts invalid addrlen tests with bind. */
+ inval_fd = socket_variant(srv);
+ ASSERT_LE(0, inval_fd)
+ {
+ TH_LOG("Failed to create socket: %s", strerror(errno));
+ }
+
+ /* Tries to bind with zero as addrlen. */
+ EXPECT_EQ(-EINVAL, bind_variant_addrlen(inval_fd, srv, 0));
+
+ /* Tries to bind with too small addrlen. */
+ EXPECT_EQ(-EINVAL, bind_variant_addrlen(inval_fd, srv,
+ get_addrlen(srv, true) - 1));
+
+ /* Tries to bind with minimal addrlen. */
+ ret = bind_variant_addrlen(inval_fd, srv, get_addrlen(srv, true));
+ if (deny_bind) {
+ EXPECT_EQ(-EACCES, ret);
+ } else {
+ EXPECT_EQ(0, ret)
+ {
+ TH_LOG("Failed to bind to socket: %s", strerror(errno));
+ }
+ }
+ EXPECT_EQ(0, close(inval_fd));
+
+ /* Starts invalid addrlen tests with connect. */
+ inval_fd = socket_variant(srv);
+ ASSERT_LE(0, inval_fd);
+
+ /* Tries to connect with zero as addrlen. */
+ EXPECT_EQ(-EINVAL, connect_variant_addrlen(inval_fd, srv, 0));
+
+ /* Tries to connect with too small addrlen. */
+ EXPECT_EQ(-EINVAL, connect_variant_addrlen(inval_fd, srv,
+ get_addrlen(srv, true) - 1));
+
+ /* Tries to connect with minimal addrlen. */
+ ret = connect_variant_addrlen(inval_fd, srv, get_addrlen(srv, true));
+ if (srv->protocol.domain == AF_UNIX) {
+ EXPECT_EQ(-EINVAL, ret);
+ } else if (deny_connect) {
+ EXPECT_EQ(-EACCES, ret);
+ } else if (srv->protocol.type == SOCK_STREAM) {
+ /* No listening server, whatever the value of deny_bind. */
+ EXPECT_EQ(-ECONNREFUSED, ret);
+ } else {
+ EXPECT_EQ(0, ret)
+ {
+ TH_LOG("Failed to connect to socket: %s",
+ strerror(errno));
+ }
+ }
+ EXPECT_EQ(0, close(inval_fd));
+
+ /* Starts connection tests. */
+ bind_fd = socket_variant(srv);
+ ASSERT_LE(0, bind_fd);
+
+ ret = bind_variant(bind_fd, srv);
+ if (deny_bind) {
+ EXPECT_EQ(-EACCES, ret);
+ } else {
+ EXPECT_EQ(0, ret);
+
+ /* Creates a listening socket. */
+ if (srv->protocol.type == SOCK_STREAM)
+ EXPECT_EQ(0, listen(bind_fd, backlog));
+ }
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ int connect_fd, ret;
+
+ /* Closes listening socket for the child. */
+ EXPECT_EQ(0, close(bind_fd));
+
+ /* Starts connection tests. */
+ connect_fd = socket_variant(srv);
+ ASSERT_LE(0, connect_fd);
+ ret = connect_variant(connect_fd, srv);
+ if (deny_connect) {
+ EXPECT_EQ(-EACCES, ret);
+ } else if (deny_bind) {
+ /* No listening server. */
+ EXPECT_EQ(-ECONNREFUSED, ret);
+ } else {
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(1, write(connect_fd, ".", 1));
+ }
+
+ EXPECT_EQ(0, close(connect_fd));
+ _exit(_metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
+ return;
+ }
+
+ /* Accepts connection from the child. */
+ client_fd = bind_fd;
+ if (!deny_bind && !deny_connect) {
+ if (srv->protocol.type == SOCK_STREAM) {
+ client_fd = accept(bind_fd, NULL, 0);
+ ASSERT_LE(0, client_fd);
+ }
+
+ EXPECT_EQ(1, read(client_fd, &buf, 1));
+ EXPECT_EQ('.', buf);
+ }
+
+ EXPECT_EQ(child, waitpid(child, &status, 0));
+ EXPECT_EQ(1, WIFEXITED(status));
+ EXPECT_EQ(EXIT_SUCCESS, WEXITSTATUS(status));
+
+ /* Closes connection, if any. */
+ if (client_fd != bind_fd)
+ EXPECT_LE(0, close(client_fd));
+
+ /* Closes listening socket. */
+ EXPECT_EQ(0, close(bind_fd));
+}
+
+TEST_F(protocol, bind)
+{
+ if (variant->sandbox == TCP_SANDBOX) {
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ };
+ const struct landlock_net_port_attr tcp_bind_connect_p0 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .port = self->srv0.port,
+ };
+ const struct landlock_net_port_attr tcp_connect_p1 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .port = self->srv1.port,
+ };
+ int ruleset_fd;
+
+ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Allows connect and bind for the first port. */
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind_connect_p0, 0));
+
+ /* Allows connect and denies bind for the second port. */
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_connect_p1, 0));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ /* Binds a socket to the first port. */
+ test_bind_and_connect(_metadata, &self->srv0, false, false);
+
+ /* Binds a socket to the second port. */
+ test_bind_and_connect(_metadata, &self->srv1,
+ is_restricted(&variant->prot, variant->sandbox),
+ false);
+
+ /* Binds a socket to the third port. */
+ test_bind_and_connect(_metadata, &self->srv2,
+ is_restricted(&variant->prot, variant->sandbox),
+ is_restricted(&variant->prot, variant->sandbox));
+}
+
+TEST_F(protocol, connect)
+{
+ if (variant->sandbox == TCP_SANDBOX) {
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ };
+ const struct landlock_net_port_attr tcp_bind_connect_p0 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .port = self->srv0.port,
+ };
+ const struct landlock_net_port_attr tcp_bind_p1 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .port = self->srv1.port,
+ };
+ int ruleset_fd;
+
+ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Allows connect and bind for the first port. */
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind_connect_p0, 0));
+
+ /* Allows bind and denies connect for the second port. */
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind_p1, 0));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ test_bind_and_connect(_metadata, &self->srv0, false, false);
+
+ test_bind_and_connect(_metadata, &self->srv1, false,
+ is_restricted(&variant->prot, variant->sandbox));
+
+ test_bind_and_connect(_metadata, &self->srv2,
+ is_restricted(&variant->prot, variant->sandbox),
+ is_restricted(&variant->prot, variant->sandbox));
+}
+
+TEST_F(protocol, bind_unspec)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP,
+ };
+ const struct landlock_net_port_attr tcp_bind = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .port = self->srv0.port,
+ };
+ int bind_fd, ret;
+
+ if (variant->sandbox == TCP_SANDBOX) {
+ const int ruleset_fd = landlock_create_ruleset(
+ &ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Allows bind. */
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind, 0));
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ bind_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, bind_fd);
+
+ /* Allowed bind on AF_UNSPEC/INADDR_ANY. */
+ ret = bind_variant(bind_fd, &self->unspec_any0);
+ if (variant->prot.domain == AF_INET) {
+ EXPECT_EQ(0, ret)
+ {
+ TH_LOG("Failed to bind to unspec/any socket: %s",
+ strerror(errno));
+ }
+ } else {
+ EXPECT_EQ(-EINVAL, ret);
+ }
+ EXPECT_EQ(0, close(bind_fd));
+
+ if (variant->sandbox == TCP_SANDBOX) {
+ const int ruleset_fd = landlock_create_ruleset(
+ &ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Denies bind. */
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ bind_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, bind_fd);
+
+ /* Denied bind on AF_UNSPEC/INADDR_ANY. */
+ ret = bind_variant(bind_fd, &self->unspec_any0);
+ if (variant->prot.domain == AF_INET) {
+ if (is_restricted(&variant->prot, variant->sandbox)) {
+ EXPECT_EQ(-EACCES, ret);
+ } else {
+ EXPECT_EQ(0, ret);
+ }
+ } else {
+ EXPECT_EQ(-EINVAL, ret);
+ }
+ EXPECT_EQ(0, close(bind_fd));
+
+ /* Checks bind with AF_UNSPEC and the loopback address. */
+ bind_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, bind_fd);
+ ret = bind_variant(bind_fd, &self->unspec_srv0);
+ if (variant->prot.domain == AF_INET) {
+ EXPECT_EQ(-EAFNOSUPPORT, ret);
+ } else {
+ EXPECT_EQ(-EINVAL, ret)
+ {
+ TH_LOG("Wrong bind error: %s", strerror(errno));
+ }
+ }
+ EXPECT_EQ(0, close(bind_fd));
+}
+
+TEST_F(protocol, connect_unspec)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ };
+ const struct landlock_net_port_attr tcp_connect = {
+ .allowed_access = LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .port = self->srv0.port,
+ };
+ int bind_fd, client_fd, status;
+ pid_t child;
+
+ /* Specific connection tests. */
+ bind_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, bind_fd);
+ EXPECT_EQ(0, bind_variant(bind_fd, &self->srv0));
+ if (self->srv0.protocol.type == SOCK_STREAM)
+ EXPECT_EQ(0, listen(bind_fd, backlog));
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ int connect_fd, ret;
+
+ /* Closes listening socket for the child. */
+ EXPECT_EQ(0, close(bind_fd));
+
+ connect_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, connect_fd);
+ EXPECT_EQ(0, connect_variant(connect_fd, &self->srv0));
+
+ /* Tries to connect again, or set peer. */
+ ret = connect_variant(connect_fd, &self->srv0);
+ if (self->srv0.protocol.type == SOCK_STREAM) {
+ EXPECT_EQ(-EISCONN, ret);
+ } else {
+ EXPECT_EQ(0, ret);
+ }
+
+ if (variant->sandbox == TCP_SANDBOX) {
+ const int ruleset_fd = landlock_create_ruleset(
+ &ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Allows connect. */
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd,
+ LANDLOCK_RULE_NET_PORT,
+ &tcp_connect, 0));
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ /* Disconnects already connected socket, or set peer. */
+ ret = connect_variant(connect_fd, &self->unspec_any0);
+ if (self->srv0.protocol.domain == AF_UNIX &&
+ self->srv0.protocol.type == SOCK_STREAM) {
+ EXPECT_EQ(-EINVAL, ret);
+ } else {
+ EXPECT_EQ(0, ret);
+ }
+
+ /* Tries to reconnect, or set peer. */
+ ret = connect_variant(connect_fd, &self->srv0);
+ if (self->srv0.protocol.domain == AF_UNIX &&
+ self->srv0.protocol.type == SOCK_STREAM) {
+ EXPECT_EQ(-EISCONN, ret);
+ } else {
+ EXPECT_EQ(0, ret);
+ }
+
+ if (variant->sandbox == TCP_SANDBOX) {
+ const int ruleset_fd = landlock_create_ruleset(
+ &ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Denies connect. */
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ ret = connect_variant(connect_fd, &self->unspec_any0);
+ if (self->srv0.protocol.domain == AF_UNIX &&
+ self->srv0.protocol.type == SOCK_STREAM) {
+ EXPECT_EQ(-EINVAL, ret);
+ } else {
+ /* Always allowed to disconnect. */
+ EXPECT_EQ(0, ret);
+ }
+
+ EXPECT_EQ(0, close(connect_fd));
+ _exit(_metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
+ return;
+ }
+
+ client_fd = bind_fd;
+ if (self->srv0.protocol.type == SOCK_STREAM) {
+ client_fd = accept(bind_fd, NULL, 0);
+ ASSERT_LE(0, client_fd);
+ }
+
+ EXPECT_EQ(child, waitpid(child, &status, 0));
+ EXPECT_EQ(1, WIFEXITED(status));
+ EXPECT_EQ(EXIT_SUCCESS, WEXITSTATUS(status));
+
+ /* Closes connection, if any. */
+ if (client_fd != bind_fd)
+ EXPECT_LE(0, close(client_fd));
+
+ /* Closes listening socket. */
+ EXPECT_EQ(0, close(bind_fd));
+}
+
+FIXTURE(ipv4)
+{
+ struct service_fixture srv0, srv1;
+};
+
+FIXTURE_VARIANT(ipv4)
+{
+ const enum sandbox_type sandbox;
+ const int type;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(ipv4, no_sandbox_with_tcp) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .type = SOCK_STREAM,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(ipv4, tcp_sandbox_with_tcp) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .type = SOCK_STREAM,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(ipv4, no_sandbox_with_udp) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .type = SOCK_DGRAM,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(ipv4, tcp_sandbox_with_udp) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .type = SOCK_DGRAM,
+};
+
+FIXTURE_SETUP(ipv4)
+{
+ const struct protocol_variant prot = {
+ .domain = AF_INET,
+ .type = variant->type,
+ };
+
+ disable_caps(_metadata);
+
+ set_service(&self->srv0, prot, 0);
+ set_service(&self->srv1, prot, 1);
+
+ setup_loopback(_metadata);
+};
+
+FIXTURE_TEARDOWN(ipv4)
+{
+}
+
+TEST_F(ipv4, from_unix_to_inet)
+{
+ int unix_stream_fd, unix_dgram_fd;
+
+ if (variant->sandbox == TCP_SANDBOX) {
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ };
+ const struct landlock_net_port_attr tcp_bind_connect_p0 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .port = self->srv0.port,
+ };
+ int ruleset_fd;
+
+ /* Denies connect and bind to check errno value. */
+ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Allows connect and bind for srv0. */
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind_connect_p0, 0));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ unix_stream_fd = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
+ ASSERT_LE(0, unix_stream_fd);
+
+ unix_dgram_fd = socket(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0);
+ ASSERT_LE(0, unix_dgram_fd);
+
+ /* Checks unix stream bind and connect for srv0. */
+ EXPECT_EQ(-EINVAL, bind_variant(unix_stream_fd, &self->srv0));
+ EXPECT_EQ(-EINVAL, connect_variant(unix_stream_fd, &self->srv0));
+
+ /* Checks unix stream bind and connect for srv1. */
+ EXPECT_EQ(-EINVAL, bind_variant(unix_stream_fd, &self->srv1))
+ {
+ TH_LOG("Wrong bind error: %s", strerror(errno));
+ }
+ EXPECT_EQ(-EINVAL, connect_variant(unix_stream_fd, &self->srv1));
+
+ /* Checks unix datagram bind and connect for srv0. */
+ EXPECT_EQ(-EINVAL, bind_variant(unix_dgram_fd, &self->srv0));
+ EXPECT_EQ(-EINVAL, connect_variant(unix_dgram_fd, &self->srv0));
+
+ /* Checks unix datagram bind and connect for srv1. */
+ EXPECT_EQ(-EINVAL, bind_variant(unix_dgram_fd, &self->srv1));
+ EXPECT_EQ(-EINVAL, connect_variant(unix_dgram_fd, &self->srv1));
+}
+
+FIXTURE(tcp_layers)
+{
+ struct service_fixture srv0, srv1;
+};
+
+FIXTURE_VARIANT(tcp_layers)
+{
+ const size_t num_layers;
+ const int domain;
+};
+
+FIXTURE_SETUP(tcp_layers)
+{
+ const struct protocol_variant prot = {
+ .domain = variant->domain,
+ .type = SOCK_STREAM,
+ };
+
+ disable_caps(_metadata);
+
+ ASSERT_EQ(0, set_service(&self->srv0, prot, 0));
+ ASSERT_EQ(0, set_service(&self->srv1, prot, 1));
+
+ setup_loopback(_metadata);
+};
+
+FIXTURE_TEARDOWN(tcp_layers)
+{
+}
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tcp_layers, no_sandbox_with_ipv4) {
+ /* clang-format on */
+ .domain = AF_INET,
+ .num_layers = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tcp_layers, one_sandbox_with_ipv4) {
+ /* clang-format on */
+ .domain = AF_INET,
+ .num_layers = 1,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tcp_layers, two_sandboxes_with_ipv4) {
+ /* clang-format on */
+ .domain = AF_INET,
+ .num_layers = 2,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tcp_layers, three_sandboxes_with_ipv4) {
+ /* clang-format on */
+ .domain = AF_INET,
+ .num_layers = 3,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tcp_layers, no_sandbox_with_ipv6) {
+ /* clang-format on */
+ .domain = AF_INET6,
+ .num_layers = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tcp_layers, one_sandbox_with_ipv6) {
+ /* clang-format on */
+ .domain = AF_INET6,
+ .num_layers = 1,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tcp_layers, two_sandboxes_with_ipv6) {
+ /* clang-format on */
+ .domain = AF_INET6,
+ .num_layers = 2,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tcp_layers, three_sandboxes_with_ipv6) {
+ /* clang-format on */
+ .domain = AF_INET6,
+ .num_layers = 3,
+};
+
+TEST_F(tcp_layers, ruleset_overlap)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ };
+ const struct landlock_net_port_attr tcp_bind = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .port = self->srv0.port,
+ };
+ const struct landlock_net_port_attr tcp_bind_connect = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .port = self->srv0.port,
+ };
+
+ if (variant->num_layers >= 1) {
+ int ruleset_fd;
+
+ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Allows bind. */
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind, 0));
+ /* Also allows bind, but allows connect too. */
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind_connect, 0));
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ if (variant->num_layers >= 2) {
+ int ruleset_fd;
+
+ /* Creates another ruleset layer. */
+ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Only allows bind. */
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind, 0));
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ if (variant->num_layers >= 3) {
+ int ruleset_fd;
+
+ /* Creates another ruleset layer. */
+ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Try to allow bind and connect. */
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind_connect, 0));
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ /*
+ * Forbids to connect to the socket because only one ruleset layer
+ * allows connect.
+ */
+ test_bind_and_connect(_metadata, &self->srv0, false,
+ variant->num_layers >= 2);
+}
+
+TEST_F(tcp_layers, ruleset_expand)
+{
+ if (variant->num_layers >= 1) {
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP,
+ };
+ /* Allows bind for srv0. */
+ const struct landlock_net_port_attr bind_srv0 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .port = self->srv0.port,
+ };
+ int ruleset_fd;
+
+ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &bind_srv0, 0));
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ if (variant->num_layers >= 2) {
+ /* Expands network mask with connect action. */
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ };
+ /* Allows bind for srv0 and connect to srv0. */
+ const struct landlock_net_port_attr tcp_bind_connect_p0 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .port = self->srv0.port,
+ };
+ /* Try to allow bind for srv1. */
+ const struct landlock_net_port_attr tcp_bind_p1 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .port = self->srv1.port,
+ };
+ int ruleset_fd;
+
+ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind_connect_p0, 0));
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind_p1, 0));
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ if (variant->num_layers >= 3) {
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ };
+ /* Allows connect to srv0, without bind rule. */
+ const struct landlock_net_port_attr tcp_bind_p0 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .port = self->srv0.port,
+ };
+ int ruleset_fd;
+
+ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind_p0, 0));
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ test_bind_and_connect(_metadata, &self->srv0, false,
+ variant->num_layers >= 3);
+
+ test_bind_and_connect(_metadata, &self->srv1, variant->num_layers >= 1,
+ variant->num_layers >= 2);
+}
+
+/* clang-format off */
+FIXTURE(mini) {};
+/* clang-format on */
+
+FIXTURE_SETUP(mini)
+{
+ disable_caps(_metadata);
+
+ setup_loopback(_metadata);
+};
+
+FIXTURE_TEARDOWN(mini)
+{
+}
+
+/* clang-format off */
+
+#define ACCESS_LAST LANDLOCK_ACCESS_NET_CONNECT_TCP
+
+#define ACCESS_ALL ( \
+ LANDLOCK_ACCESS_NET_BIND_TCP | \
+ LANDLOCK_ACCESS_NET_CONNECT_TCP)
+
+/* clang-format on */
+
+TEST_F(mini, network_access_rights)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = ACCESS_ALL,
+ };
+ struct landlock_net_port_attr net_port = {
+ .port = sock_port_start,
+ };
+ int ruleset_fd;
+ __u64 access;
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ for (access = 1; access <= ACCESS_LAST; access <<= 1) {
+ net_port.allowed_access = access;
+ EXPECT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &net_port, 0))
+ {
+ TH_LOG("Failed to add rule with access 0x%llx: %s",
+ access, strerror(errno));
+ }
+ }
+ EXPECT_EQ(0, close(ruleset_fd));
+}
+
+/* Checks invalid attribute, out of landlock network access range. */
+TEST_F(mini, unknown_access_rights)
+{
+ __u64 access_mask;
+
+ for (access_mask = 1ULL << 63; access_mask != ACCESS_LAST;
+ access_mask >>= 1) {
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = access_mask,
+ };
+
+ EXPECT_EQ(-1, landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0));
+ EXPECT_EQ(EINVAL, errno);
+ }
+}
+
+TEST_F(mini, inval)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP
+ };
+ const struct landlock_net_port_attr tcp_bind_connect = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .port = sock_port_start,
+ };
+ const struct landlock_net_port_attr tcp_denied = {
+ .allowed_access = 0,
+ .port = sock_port_start,
+ };
+ const struct landlock_net_port_attr tcp_bind = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .port = sock_port_start,
+ };
+ int ruleset_fd;
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Checks unhandled allowed_access. */
+ EXPECT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind_connect, 0));
+ EXPECT_EQ(EINVAL, errno);
+
+ /* Checks zero access value. */
+ EXPECT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_denied, 0));
+ EXPECT_EQ(ENOMSG, errno);
+
+ /* Adds with legitimate values. */
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind, 0));
+}
+
+TEST_F(mini, tcp_port_overflow)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ };
+ const struct landlock_net_port_attr port_max_bind = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .port = UINT16_MAX,
+ };
+ const struct landlock_net_port_attr port_max_connect = {
+ .allowed_access = LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .port = UINT16_MAX,
+ };
+ const struct landlock_net_port_attr port_overflow1 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .port = UINT16_MAX + 1,
+ };
+ const struct landlock_net_port_attr port_overflow2 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .port = UINT16_MAX + 2,
+ };
+ const struct landlock_net_port_attr port_overflow3 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .port = UINT32_MAX + 1UL,
+ };
+ const struct landlock_net_port_attr port_overflow4 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .port = UINT32_MAX + 2UL,
+ };
+ const struct protocol_variant ipv4_tcp = {
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ };
+ struct service_fixture srv_denied, srv_max_allowed;
+ int ruleset_fd;
+
+ ASSERT_EQ(0, set_service(&srv_denied, ipv4_tcp, 0));
+
+ /* Be careful to avoid port inconsistencies. */
+ srv_max_allowed = srv_denied;
+ srv_max_allowed.port = port_max_bind.port;
+ srv_max_allowed.ipv4_addr.sin_port = htons(port_max_bind.port);
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &port_max_bind, 0));
+
+ EXPECT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &port_overflow1, 0));
+ EXPECT_EQ(EINVAL, errno);
+
+ EXPECT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &port_overflow2, 0));
+ EXPECT_EQ(EINVAL, errno);
+
+ EXPECT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &port_overflow3, 0));
+ EXPECT_EQ(EINVAL, errno);
+
+ /* Interleaves with invalid rule additions. */
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &port_max_connect, 0));
+
+ EXPECT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &port_overflow4, 0));
+ EXPECT_EQ(EINVAL, errno);
+
+ enforce_ruleset(_metadata, ruleset_fd);
+
+ test_bind_and_connect(_metadata, &srv_denied, true, true);
+ test_bind_and_connect(_metadata, &srv_max_allowed, false, false);
+}
+
+FIXTURE(ipv4_tcp)
+{
+ struct service_fixture srv0, srv1;
+};
+
+FIXTURE_SETUP(ipv4_tcp)
+{
+ const struct protocol_variant ipv4_tcp = {
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ };
+
+ disable_caps(_metadata);
+
+ ASSERT_EQ(0, set_service(&self->srv0, ipv4_tcp, 0));
+ ASSERT_EQ(0, set_service(&self->srv1, ipv4_tcp, 1));
+
+ setup_loopback(_metadata);
+};
+
+FIXTURE_TEARDOWN(ipv4_tcp)
+{
+}
+
+TEST_F(ipv4_tcp, port_endianness)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ };
+ const struct landlock_net_port_attr bind_host_endian_p0 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ /* Host port format. */
+ .port = self->srv0.port,
+ };
+ const struct landlock_net_port_attr connect_big_endian_p0 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ /* Big endian port format. */
+ .port = htons(self->srv0.port),
+ };
+ const struct landlock_net_port_attr bind_connect_host_endian_p1 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ /* Host port format. */
+ .port = self->srv1.port,
+ };
+ const unsigned int one = 1;
+ const char little_endian = *(const char *)&one;
+ int ruleset_fd;
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &bind_host_endian_p0, 0));
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &connect_big_endian_p0, 0));
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &bind_connect_host_endian_p1, 0));
+ enforce_ruleset(_metadata, ruleset_fd);
+
+ /* No restriction for big endinan CPU. */
+ test_bind_and_connect(_metadata, &self->srv0, false, little_endian);
+
+ /* No restriction for any CPU. */
+ test_bind_and_connect(_metadata, &self->srv1, false, false);
+}
+
+TEST_F(ipv4_tcp, with_fs)
+{
+ const struct landlock_ruleset_attr ruleset_attr_fs_net = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR,
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP,
+ };
+ struct landlock_path_beneath_attr path_beneath = {
+ .allowed_access = LANDLOCK_ACCESS_FS_READ_DIR,
+ .parent_fd = -1,
+ };
+ struct landlock_net_port_attr tcp_bind = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .port = self->srv0.port,
+ };
+ int ruleset_fd, bind_fd, dir_fd;
+
+ /* Creates ruleset both for filesystem and network access. */
+ ruleset_fd = landlock_create_ruleset(&ruleset_attr_fs_net,
+ sizeof(ruleset_attr_fs_net), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Adds a filesystem rule. */
+ path_beneath.parent_fd = open("/dev", O_PATH | O_DIRECTORY | O_CLOEXEC);
+ ASSERT_LE(0, path_beneath.parent_fd);
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath, 0));
+ EXPECT_EQ(0, close(path_beneath.parent_fd));
+
+ /* Adds a network rule. */
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind, 0));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+
+ /* Tests file access. */
+ dir_fd = open("/dev", O_RDONLY);
+ EXPECT_LE(0, dir_fd);
+ EXPECT_EQ(0, close(dir_fd));
+
+ dir_fd = open("/", O_RDONLY);
+ EXPECT_EQ(-1, dir_fd);
+ EXPECT_EQ(EACCES, errno);
+
+ /* Tests port binding. */
+ bind_fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0);
+ ASSERT_LE(0, bind_fd);
+ EXPECT_EQ(0, bind_variant(bind_fd, &self->srv0));
+ EXPECT_EQ(0, close(bind_fd));
+
+ bind_fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0);
+ ASSERT_LE(0, bind_fd);
+ EXPECT_EQ(-EACCES, bind_variant(bind_fd, &self->srv1));
+}
+
+FIXTURE(port_specific)
+{
+ struct service_fixture srv0;
+};
+
+FIXTURE_VARIANT(port_specific)
+{
+ const enum sandbox_type sandbox;
+ const struct protocol_variant prot;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(port_specific, no_sandbox_with_ipv4) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(port_specific, sandbox_with_ipv4) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(port_specific, no_sandbox_with_ipv6) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(port_specific, sandbox_with_ipv6) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ },
+};
+
+FIXTURE_SETUP(port_specific)
+{
+ disable_caps(_metadata);
+
+ ASSERT_EQ(0, set_service(&self->srv0, variant->prot, 0));
+
+ setup_loopback(_metadata);
+};
+
+FIXTURE_TEARDOWN(port_specific)
+{
+}
+
+TEST_F(port_specific, bind_connect_zero)
+{
+ int bind_fd, connect_fd, ret;
+ uint16_t port;
+
+ /* Adds a rule layer with bind and connect actions. */
+ if (variant->sandbox == TCP_SANDBOX) {
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP
+ };
+ const struct landlock_net_port_attr tcp_bind_connect_zero = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .port = 0,
+ };
+ int ruleset_fd;
+
+ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Checks zero port value on bind and connect actions. */
+ EXPECT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind_connect_zero, 0));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ bind_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, bind_fd);
+
+ connect_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, connect_fd);
+
+ /* Sets address port to 0 for both protocol families. */
+ set_port(&self->srv0, 0);
+ /*
+ * Binds on port 0, which selects a random port within
+ * ip_local_port_range.
+ */
+ ret = bind_variant(bind_fd, &self->srv0);
+ EXPECT_EQ(0, ret);
+
+ EXPECT_EQ(0, listen(bind_fd, backlog));
+
+ /* Connects on port 0. */
+ ret = connect_variant(connect_fd, &self->srv0);
+ EXPECT_EQ(-ECONNREFUSED, ret);
+
+ /* Sets binded port for both protocol families. */
+ port = get_binded_port(bind_fd, &variant->prot);
+ EXPECT_NE(0, port);
+ set_port(&self->srv0, port);
+ /* Connects on the binded port. */
+ ret = connect_variant(connect_fd, &self->srv0);
+ if (is_restricted(&variant->prot, variant->sandbox)) {
+ /* Denied by Landlock. */
+ EXPECT_EQ(-EACCES, ret);
+ } else {
+ EXPECT_EQ(0, ret);
+ }
+
+ EXPECT_EQ(0, close(connect_fd));
+ EXPECT_EQ(0, close(bind_fd));
+}
+
+TEST_F(port_specific, bind_connect_1023)
+{
+ int bind_fd, connect_fd, ret;
+
+ /* Adds a rule layer with bind and connect actions. */
+ if (variant->sandbox == TCP_SANDBOX) {
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP
+ };
+ /* A rule with port value less than 1024. */
+ const struct landlock_net_port_attr tcp_bind_connect_low_range = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .port = 1023,
+ };
+ /* A rule with 1024 port. */
+ const struct landlock_net_port_attr tcp_bind_connect = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .port = 1024,
+ };
+ int ruleset_fd;
+
+ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind_connect_low_range, 0));
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind_connect, 0));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ bind_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, bind_fd);
+
+ connect_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, connect_fd);
+
+ /* Sets address port to 1023 for both protocol families. */
+ set_port(&self->srv0, 1023);
+ /* Binds on port 1023. */
+ ret = bind_variant(bind_fd, &self->srv0);
+ /* Denied by the system. */
+ EXPECT_EQ(-EACCES, ret);
+
+ /* Binds on port 1023. */
+ set_cap(_metadata, CAP_NET_BIND_SERVICE);
+ ret = bind_variant(bind_fd, &self->srv0);
+ clear_cap(_metadata, CAP_NET_BIND_SERVICE);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, listen(bind_fd, backlog));
+
+ /* Connects on the binded port 1023. */
+ ret = connect_variant(connect_fd, &self->srv0);
+ EXPECT_EQ(0, ret);
+
+ EXPECT_EQ(0, close(connect_fd));
+ EXPECT_EQ(0, close(bind_fd));
+
+ bind_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, bind_fd);
+
+ connect_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, connect_fd);
+
+ /* Sets address port to 1024 for both protocol families. */
+ set_port(&self->srv0, 1024);
+ /* Binds on port 1024. */
+ ret = bind_variant(bind_fd, &self->srv0);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, listen(bind_fd, backlog));
+
+ /* Connects on the binded port 1024. */
+ ret = connect_variant(connect_fd, &self->srv0);
+ EXPECT_EQ(0, ret);
+
+ EXPECT_EQ(0, close(connect_fd));
+ EXPECT_EQ(0, close(bind_fd));
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/lkdtm/config b/tools/testing/selftests/lkdtm/config
index 5d52f64dfb43..7afe05e8c4d7 100644
--- a/tools/testing/selftests/lkdtm/config
+++ b/tools/testing/selftests/lkdtm/config
@@ -9,7 +9,6 @@ CONFIG_INIT_ON_FREE_DEFAULT_ON=y
CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y
CONFIG_UBSAN=y
CONFIG_UBSAN_BOUNDS=y
-CONFIG_UBSAN_TRAP=y
CONFIG_STACKPROTECTOR_STRONG=y
CONFIG_SLUB_DEBUG=y
CONFIG_SLUB_DEBUG_ON=y
diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt
index 607b8d7e3ea3..368973f05250 100644
--- a/tools/testing/selftests/lkdtm/tests.txt
+++ b/tools/testing/selftests/lkdtm/tests.txt
@@ -1,4 +1,5 @@
#PANIC
+#PANIC_STOP_IRQOFF Crashes entire system
BUG kernel BUG at
WARNING WARNING:
WARNING_MESSAGE message trigger
@@ -7,7 +8,7 @@ EXCEPTION
#EXHAUST_STACK Corrupts memory on failure
#CORRUPT_STACK Crashes entire system on success
#CORRUPT_STACK_STRONG Crashes entire system on success
-ARRAY_BOUNDS
+ARRAY_BOUNDS call trace:|UBSAN: array-index-out-of-bounds
CORRUPT_LIST_ADD list_add corruption
CORRUPT_LIST_DEL list_del corruption
STACK_GUARD_PAGE_LEADING
diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore
index cdc9ce4426b9..4ff10ea61461 100644
--- a/tools/testing/selftests/mm/.gitignore
+++ b/tools/testing/selftests/mm/.gitignore
@@ -18,6 +18,8 @@ mremap_dontunmap
mremap_test
on-fault-limit
transhuge-stress
+pagemap_ioctl
+*.tmp*
protection_keys
protection_keys_32
protection_keys_64
@@ -43,3 +45,4 @@ mdwe_test
gup_longterm
mkdirty
va_high_addr_switch
+hugetlb_fault_after_madv
diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile
index 6a9fc5693145..78dfec8bc676 100644
--- a/tools/testing/selftests/mm/Makefile
+++ b/tools/testing/selftests/mm/Makefile
@@ -33,7 +33,7 @@ endif
MAKEFLAGS += --no-builtin-rules
CFLAGS = -Wall -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES)
-LDLIBS = -lrt -lpthread
+LDLIBS = -lrt -lpthread -lm
TEST_GEN_FILES = cow
TEST_GEN_FILES += compaction_test
@@ -60,6 +60,7 @@ TEST_GEN_FILES += mrelease_test
TEST_GEN_FILES += mremap_dontunmap
TEST_GEN_FILES += mremap_test
TEST_GEN_FILES += on-fault-limit
+TEST_GEN_PROGS += pagemap_ioctl
TEST_GEN_FILES += thuge-gen
TEST_GEN_FILES += transhuge-stress
TEST_GEN_FILES += uffd-stress
@@ -68,6 +69,7 @@ TEST_GEN_FILES += split_huge_page_test
TEST_GEN_FILES += ksm_tests
TEST_GEN_FILES += ksm_functional_tests
TEST_GEN_FILES += mdwe_test
+TEST_GEN_FILES += hugetlb_fault_after_madv
ifneq ($(ARCH),arm64)
TEST_GEN_PROGS += soft-dirty
diff --git a/tools/testing/selftests/mm/config b/tools/testing/selftests/mm/config
index be087c4bc396..4309916f629e 100644
--- a/tools/testing/selftests/mm/config
+++ b/tools/testing/selftests/mm/config
@@ -1,5 +1,6 @@
CONFIG_SYSVIPC=y
CONFIG_USERFAULTFD=y
+CONFIG_PTE_MARKER_UFFD_WP=y
CONFIG_TEST_VMALLOC=m
CONFIG_DEVICE_PRIVATE=y
CONFIG_TEST_HMM=m
diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c
index d33d3e68ffab..ad168d35b23b 100644
--- a/tools/testing/selftests/mm/gup_longterm.c
+++ b/tools/testing/selftests/mm/gup_longterm.c
@@ -265,10 +265,11 @@ static void run_with_tmpfile(test_fn fn, const char *desc)
fd = fileno(file);
if (fd < 0) {
ksft_test_result_fail("fileno() failed\n");
- return;
+ goto close;
}
fn(fd, pagesize);
+close:
fclose(file);
}
diff --git a/tools/testing/selftests/mm/hugetlb-madvise.c b/tools/testing/selftests/mm/hugetlb-madvise.c
index d55322df4b73..f32d99565c5e 100644
--- a/tools/testing/selftests/mm/hugetlb-madvise.c
+++ b/tools/testing/selftests/mm/hugetlb-madvise.c
@@ -36,25 +36,6 @@
unsigned long huge_page_size;
unsigned long base_page_size;
-unsigned long get_free_hugepages(void)
-{
- unsigned long fhp = 0;
- char *line = NULL;
- size_t linelen = 0;
- FILE *f = fopen("/proc/meminfo", "r");
-
- if (!f)
- return fhp;
- while (getline(&line, &linelen, f) > 0) {
- if (sscanf(line, "HugePages_Free: %lu", &fhp) == 1)
- break;
- }
-
- free(line);
- fclose(f);
- return fhp;
-}
-
void write_fault_pages(void *addr, unsigned long nr_pages)
{
unsigned long i;
diff --git a/tools/testing/selftests/mm/hugetlb_fault_after_madv.c b/tools/testing/selftests/mm/hugetlb_fault_after_madv.c
new file mode 100644
index 000000000000..73b81c632366
--- /dev/null
+++ b/tools/testing/selftests/mm/hugetlb_fault_after_madv.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "vm_util.h"
+#include "../kselftest.h"
+
+#define MMAP_SIZE (1 << 21)
+#define INLOOP_ITER 100
+
+char *huge_ptr;
+
+/* Touch the memory while it is being madvised() */
+void *touch(void *unused)
+{
+ char *ptr = (char *)huge_ptr;
+
+ for (int i = 0; i < INLOOP_ITER; i++)
+ ptr[0] = '.';
+
+ return NULL;
+}
+
+void *madv(void *unused)
+{
+ usleep(rand() % 10);
+
+ for (int i = 0; i < INLOOP_ITER; i++)
+ madvise(huge_ptr, MMAP_SIZE, MADV_DONTNEED);
+
+ return NULL;
+}
+
+int main(void)
+{
+ unsigned long free_hugepages;
+ pthread_t thread1, thread2;
+ /*
+ * On kernel 6.4, we are able to reproduce the problem with ~1000
+ * interactions
+ */
+ int max = 10000;
+
+ srand(getpid());
+
+ free_hugepages = get_free_hugepages();
+ if (free_hugepages != 1) {
+ ksft_exit_skip("This test needs one and only one page to execute. Got %lu\n",
+ free_hugepages);
+ }
+
+ while (max--) {
+ huge_ptr = mmap(NULL, MMAP_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
+ -1, 0);
+
+ if ((unsigned long)huge_ptr == -1)
+ ksft_exit_skip("Failed to allocated huge page\n");
+
+ pthread_create(&thread1, NULL, madv, NULL);
+ pthread_create(&thread2, NULL, touch, NULL);
+
+ pthread_join(thread1, NULL);
+ pthread_join(thread2, NULL);
+ munmap(huge_ptr, MMAP_SIZE);
+ }
+
+ return KSFT_PASS;
+}
diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c
index 901e950f9138..fbff0dd09191 100644
--- a/tools/testing/selftests/mm/ksm_functional_tests.c
+++ b/tools/testing/selftests/mm/ksm_functional_tests.c
@@ -26,6 +26,7 @@
#define KiB 1024u
#define MiB (1024 * KiB)
+#define FORK_EXEC_CHILD_PRG_NAME "ksm_fork_exec_child"
static int mem_fd;
static int ksm_fd;
@@ -479,6 +480,64 @@ static void test_prctl_fork(void)
ksft_test_result_pass("PR_SET_MEMORY_MERGE value is inherited\n");
}
+static int ksm_fork_exec_child(void)
+{
+ /* Test if KSM is enabled for the process. */
+ return prctl(PR_GET_MEMORY_MERGE, 0, 0, 0, 0) == 1;
+}
+
+static void test_prctl_fork_exec(void)
+{
+ int ret, status;
+ pid_t child_pid;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ ret = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0);
+ if (ret < 0 && errno == EINVAL) {
+ ksft_test_result_skip("PR_SET_MEMORY_MERGE not supported\n");
+ return;
+ } else if (ret) {
+ ksft_test_result_fail("PR_SET_MEMORY_MERGE=1 failed\n");
+ return;
+ }
+
+ child_pid = fork();
+ if (child_pid == -1) {
+ ksft_test_result_skip("fork() failed\n");
+ return;
+ } else if (child_pid == 0) {
+ char *prg_name = "./ksm_functional_tests";
+ char *argv_for_program[] = { prg_name, FORK_EXEC_CHILD_PRG_NAME };
+
+ execv(prg_name, argv_for_program);
+ return;
+ }
+
+ if (waitpid(child_pid, &status, 0) > 0) {
+ if (WIFEXITED(status)) {
+ status = WEXITSTATUS(status);
+ if (status) {
+ ksft_test_result_fail("KSM not enabled\n");
+ return;
+ }
+ } else {
+ ksft_test_result_fail("program didn't terminate normally\n");
+ return;
+ }
+ } else {
+ ksft_test_result_fail("waitpid() failed\n");
+ return;
+ }
+
+ if (prctl(PR_SET_MEMORY_MERGE, 0, 0, 0, 0)) {
+ ksft_test_result_fail("PR_SET_MEMORY_MERGE=0 failed\n");
+ return;
+ }
+
+ ksft_test_result_pass("PR_SET_MEMORY_MERGE value is inherited\n");
+}
+
static void test_prctl_unmerge(void)
{
const unsigned int size = 2 * MiB;
@@ -536,9 +595,13 @@ unmap:
int main(int argc, char **argv)
{
- unsigned int tests = 7;
+ unsigned int tests = 8;
int err;
+ if (argc > 1 && !strcmp(argv[1], FORK_EXEC_CHILD_PRG_NAME)) {
+ exit(ksm_fork_exec_child() == 1 ? 0 : 1);
+ }
+
#ifdef __NR_userfaultfd
tests++;
#endif
@@ -576,6 +639,7 @@ int main(int argc, char **argv)
test_prctl();
test_prctl_fork();
+ test_prctl_fork_exec();
test_prctl_unmerge();
err = ksft_get_fail_cnt();
diff --git a/tools/testing/selftests/mm/mdwe_test.c b/tools/testing/selftests/mm/mdwe_test.c
index bc91bef5d254..200bedcdc32e 100644
--- a/tools/testing/selftests/mm/mdwe_test.c
+++ b/tools/testing/selftests/mm/mdwe_test.c
@@ -22,15 +22,104 @@
TEST(prctl_flags)
{
+ EXPECT_LT(prctl(PR_SET_MDWE, PR_MDWE_NO_INHERIT, 0L, 0L, 7L), 0);
+ EXPECT_EQ(errno, EINVAL);
+
EXPECT_LT(prctl(PR_SET_MDWE, 7L, 0L, 0L, 0L), 0);
+ EXPECT_EQ(errno, EINVAL);
EXPECT_LT(prctl(PR_SET_MDWE, 0L, 7L, 0L, 0L), 0);
+ EXPECT_EQ(errno, EINVAL);
EXPECT_LT(prctl(PR_SET_MDWE, 0L, 0L, 7L, 0L), 0);
+ EXPECT_EQ(errno, EINVAL);
EXPECT_LT(prctl(PR_SET_MDWE, 0L, 0L, 0L, 7L), 0);
+ EXPECT_EQ(errno, EINVAL);
EXPECT_LT(prctl(PR_GET_MDWE, 7L, 0L, 0L, 0L), 0);
+ EXPECT_EQ(errno, EINVAL);
EXPECT_LT(prctl(PR_GET_MDWE, 0L, 7L, 0L, 0L), 0);
+ EXPECT_EQ(errno, EINVAL);
EXPECT_LT(prctl(PR_GET_MDWE, 0L, 0L, 7L, 0L), 0);
+ EXPECT_EQ(errno, EINVAL);
EXPECT_LT(prctl(PR_GET_MDWE, 0L, 0L, 0L, 7L), 0);
+ EXPECT_EQ(errno, EINVAL);
+}
+
+FIXTURE(consecutive_prctl_flags) {};
+FIXTURE_SETUP(consecutive_prctl_flags) {}
+FIXTURE_TEARDOWN(consecutive_prctl_flags) {}
+
+FIXTURE_VARIANT(consecutive_prctl_flags)
+{
+ unsigned long first_flags;
+ unsigned long second_flags;
+ bool should_work;
+};
+
+FIXTURE_VARIANT_ADD(consecutive_prctl_flags, can_keep_no_flags)
+{
+ .first_flags = 0,
+ .second_flags = 0,
+ .should_work = true,
+};
+
+FIXTURE_VARIANT_ADD(consecutive_prctl_flags, can_keep_exec_gain)
+{
+ .first_flags = PR_MDWE_REFUSE_EXEC_GAIN,
+ .second_flags = PR_MDWE_REFUSE_EXEC_GAIN,
+ .should_work = true,
+};
+
+FIXTURE_VARIANT_ADD(consecutive_prctl_flags, can_keep_both_flags)
+{
+ .first_flags = PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT,
+ .second_flags = PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT,
+ .should_work = true,
+};
+
+FIXTURE_VARIANT_ADD(consecutive_prctl_flags, cant_disable_mdwe)
+{
+ .first_flags = PR_MDWE_REFUSE_EXEC_GAIN,
+ .second_flags = 0,
+ .should_work = false,
+};
+
+FIXTURE_VARIANT_ADD(consecutive_prctl_flags, cant_disable_mdwe_no_inherit)
+{
+ .first_flags = PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT,
+ .second_flags = 0,
+ .should_work = false,
+};
+
+FIXTURE_VARIANT_ADD(consecutive_prctl_flags, cant_disable_no_inherit)
+{
+ .first_flags = PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT,
+ .second_flags = PR_MDWE_REFUSE_EXEC_GAIN,
+ .should_work = false,
+};
+
+FIXTURE_VARIANT_ADD(consecutive_prctl_flags, cant_enable_no_inherit)
+{
+ .first_flags = PR_MDWE_REFUSE_EXEC_GAIN,
+ .second_flags = PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT,
+ .should_work = false,
+};
+
+TEST_F(consecutive_prctl_flags, two_prctls)
+{
+ int ret;
+
+ EXPECT_EQ(prctl(PR_SET_MDWE, variant->first_flags, 0L, 0L, 0L), 0);
+
+ ret = prctl(PR_SET_MDWE, variant->second_flags, 0L, 0L, 0L);
+ if (variant->should_work) {
+ EXPECT_EQ(ret, 0);
+
+ ret = prctl(PR_GET_MDWE, 0L, 0L, 0L, 0L);
+ ASSERT_EQ(ret, variant->second_flags);
+ } else {
+ EXPECT_NE(ret, 0);
+ ASSERT_EQ(errno, EPERM);
+ }
}
FIXTURE(mdwe)
@@ -45,28 +134,45 @@ FIXTURE_VARIANT(mdwe)
{
bool enabled;
bool forked;
+ bool inherit;
};
FIXTURE_VARIANT_ADD(mdwe, stock)
{
- .enabled = false,
+ .enabled = false,
.forked = false,
+ .inherit = false,
};
FIXTURE_VARIANT_ADD(mdwe, enabled)
{
- .enabled = true,
+ .enabled = true,
.forked = false,
+ .inherit = true,
+};
+
+FIXTURE_VARIANT_ADD(mdwe, inherited)
+{
+ .enabled = true,
+ .forked = true,
+ .inherit = true,
};
-FIXTURE_VARIANT_ADD(mdwe, forked)
+FIXTURE_VARIANT_ADD(mdwe, not_inherited)
{
- .enabled = true,
+ .enabled = true,
.forked = true,
+ .inherit = false,
};
+static bool executable_map_should_fail(const FIXTURE_VARIANT(mdwe) *variant)
+{
+ return variant->enabled && (!variant->forked || variant->inherit);
+}
+
FIXTURE_SETUP(mdwe)
{
+ unsigned long mdwe_flags;
int ret, status;
self->p = NULL;
@@ -76,13 +182,17 @@ FIXTURE_SETUP(mdwe)
if (!variant->enabled)
return;
- ret = prctl(PR_SET_MDWE, PR_MDWE_REFUSE_EXEC_GAIN, 0L, 0L, 0L);
+ mdwe_flags = PR_MDWE_REFUSE_EXEC_GAIN;
+ if (!variant->inherit)
+ mdwe_flags |= PR_MDWE_NO_INHERIT;
+
+ ret = prctl(PR_SET_MDWE, mdwe_flags, 0L, 0L, 0L);
ASSERT_EQ(ret, 0) {
TH_LOG("PR_SET_MDWE failed or unsupported");
}
ret = prctl(PR_GET_MDWE, 0L, 0L, 0L, 0L);
- ASSERT_EQ(ret, 1);
+ ASSERT_EQ(ret, mdwe_flags);
if (variant->forked) {
self->pid = fork();
@@ -113,7 +223,7 @@ TEST_F(mdwe, mmap_READ_EXEC)
TEST_F(mdwe, mmap_WRITE_EXEC)
{
self->p = mmap(NULL, self->size, PROT_WRITE | PROT_EXEC, self->flags, 0, 0);
- if (variant->enabled) {
+ if (executable_map_should_fail(variant)) {
EXPECT_EQ(self->p, MAP_FAILED);
} else {
EXPECT_NE(self->p, MAP_FAILED);
@@ -139,7 +249,7 @@ TEST_F(mdwe, mprotect_add_EXEC)
ASSERT_NE(self->p, MAP_FAILED);
ret = mprotect(self->p, self->size, PROT_READ | PROT_EXEC);
- if (variant->enabled) {
+ if (executable_map_should_fail(variant)) {
EXPECT_LT(ret, 0);
} else {
EXPECT_EQ(ret, 0);
@@ -154,7 +264,7 @@ TEST_F(mdwe, mprotect_WRITE_EXEC)
ASSERT_NE(self->p, MAP_FAILED);
ret = mprotect(self->p, self->size, PROT_WRITE | PROT_EXEC);
- if (variant->enabled) {
+ if (executable_map_should_fail(variant)) {
EXPECT_LT(ret, 0);
} else {
EXPECT_EQ(ret, 0);
@@ -168,13 +278,10 @@ TEST_F(mdwe, mmap_FIXED)
self->p = mmap(NULL, self->size, PROT_READ, self->flags, 0, 0);
ASSERT_NE(self->p, MAP_FAILED);
- p = mmap(self->p + self->size, self->size, PROT_READ | PROT_EXEC,
+ /* MAP_FIXED unmaps the existing page before mapping which is allowed */
+ p = mmap(self->p, self->size, PROT_READ | PROT_EXEC,
self->flags | MAP_FIXED, 0, 0);
- if (variant->enabled) {
- EXPECT_EQ(p, MAP_FAILED);
- } else {
- EXPECT_EQ(p, self->p);
- }
+ EXPECT_EQ(p, self->p);
}
TEST_F(mdwe, arm64_BTI)
diff --git a/tools/testing/selftests/mm/mremap_dontunmap.c b/tools/testing/selftests/mm/mremap_dontunmap.c
index ca2359835e75..a06e73ec8568 100644
--- a/tools/testing/selftests/mm/mremap_dontunmap.c
+++ b/tools/testing/selftests/mm/mremap_dontunmap.c
@@ -7,6 +7,7 @@
*/
#define _GNU_SOURCE
#include <sys/mman.h>
+#include <linux/mman.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c
index 5c3773de9f0f..1d4c1589c305 100644
--- a/tools/testing/selftests/mm/mremap_test.c
+++ b/tools/testing/selftests/mm/mremap_test.c
@@ -23,12 +23,15 @@
#define VALIDATION_NO_THRESHOLD 0 /* Verify the entire region */
#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
+#define SIZE_MB(m) ((size_t)m * (1024 * 1024))
+#define SIZE_KB(k) ((size_t)k * 1024)
struct config {
unsigned long long src_alignment;
unsigned long long dest_alignment;
unsigned long long region_size;
int overlapping;
+ int dest_preamble_size;
};
struct test {
@@ -44,6 +47,7 @@ enum {
_1MB = 1ULL << 20,
_2MB = 2ULL << 20,
_4MB = 4ULL << 20,
+ _5MB = 5ULL << 20,
_1GB = 1ULL << 30,
_2GB = 2ULL << 30,
PMD = _2MB,
@@ -146,6 +150,60 @@ static bool is_range_mapped(FILE *maps_fp, void *start, void *end)
}
/*
+ * Returns the start address of the mapping on success, else returns
+ * NULL on failure.
+ */
+static void *get_source_mapping(struct config c)
+{
+ unsigned long long addr = 0ULL;
+ void *src_addr = NULL;
+ unsigned long long mmap_min_addr;
+
+ mmap_min_addr = get_mmap_min_addr();
+ /*
+ * For some tests, we need to not have any mappings below the
+ * source mapping. Add some headroom to mmap_min_addr for this.
+ */
+ mmap_min_addr += 10 * _4MB;
+
+retry:
+ addr += c.src_alignment;
+ if (addr < mmap_min_addr)
+ goto retry;
+
+ src_addr = mmap((void *) addr, c.region_size, PROT_READ | PROT_WRITE,
+ MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
+ -1, 0);
+ if (src_addr == MAP_FAILED) {
+ if (errno == EPERM || errno == EEXIST)
+ goto retry;
+ goto error;
+ }
+ /*
+ * Check that the address is aligned to the specified alignment.
+ * Addresses which have alignments that are multiples of that
+ * specified are not considered valid. For instance, 1GB address is
+ * 2MB-aligned, however it will not be considered valid for a
+ * requested alignment of 2MB. This is done to reduce coincidental
+ * alignment in the tests.
+ */
+ if (((unsigned long long) src_addr & (c.src_alignment - 1)) ||
+ !((unsigned long long) src_addr & c.src_alignment)) {
+ munmap(src_addr, c.region_size);
+ goto retry;
+ }
+
+ if (!src_addr)
+ goto error;
+
+ return src_addr;
+error:
+ ksft_print_msg("Failed to map source region: %s\n",
+ strerror(errno));
+ return NULL;
+}
+
+/*
* This test validates that merge is called when expanding a mapping.
* Mapping containing three pages is created, middle page is unmapped
* and then the mapping containing the first page is expanded so that
@@ -225,59 +283,83 @@ out:
}
/*
- * Returns the start address of the mapping on success, else returns
- * NULL on failure.
+ * Verify that an mremap within a range does not cause corruption
+ * of unrelated part of range.
+ *
+ * Consider the following range which is 2MB aligned and is
+ * a part of a larger 20MB range which is not shown. Each
+ * character is 256KB below making the source and destination
+ * 2MB each. The lower case letters are moved (s to d) and the
+ * upper case letters are not moved. The below test verifies
+ * that the upper case S letters are not corrupted by the
+ * adjacent mremap.
+ *
+ * |DDDDddddSSSSssss|
*/
-static void *get_source_mapping(struct config c)
+static void mremap_move_within_range(char pattern_seed)
{
- unsigned long long addr = 0ULL;
- void *src_addr = NULL;
- unsigned long long mmap_min_addr;
+ char *test_name = "mremap mremap move within range";
+ void *src, *dest;
+ int i, success = 1;
+
+ size_t size = SIZE_MB(20);
+ void *ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (ptr == MAP_FAILED) {
+ perror("mmap");
+ success = 0;
+ goto out;
+ }
+ memset(ptr, 0, size);
- mmap_min_addr = get_mmap_min_addr();
+ src = ptr + SIZE_MB(6);
+ src = (void *)((unsigned long)src & ~(SIZE_MB(2) - 1));
-retry:
- addr += c.src_alignment;
- if (addr < mmap_min_addr)
- goto retry;
+ /* Set byte pattern for source block. */
+ srand(pattern_seed);
+ for (i = 0; i < SIZE_MB(2); i++) {
+ ((char *)src)[i] = (char) rand();
+ }
- src_addr = mmap((void *) addr, c.region_size, PROT_READ | PROT_WRITE,
- MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
- -1, 0);
- if (src_addr == MAP_FAILED) {
- if (errno == EPERM || errno == EEXIST)
- goto retry;
- goto error;
+ dest = src - SIZE_MB(2);
+
+ void *new_ptr = mremap(src + SIZE_MB(1), SIZE_MB(1), SIZE_MB(1),
+ MREMAP_MAYMOVE | MREMAP_FIXED, dest + SIZE_MB(1));
+ if (new_ptr == MAP_FAILED) {
+ perror("mremap");
+ success = 0;
+ goto out;
}
- /*
- * Check that the address is aligned to the specified alignment.
- * Addresses which have alignments that are multiples of that
- * specified are not considered valid. For instance, 1GB address is
- * 2MB-aligned, however it will not be considered valid for a
- * requested alignment of 2MB. This is done to reduce coincidental
- * alignment in the tests.
- */
- if (((unsigned long long) src_addr & (c.src_alignment - 1)) ||
- !((unsigned long long) src_addr & c.src_alignment)) {
- munmap(src_addr, c.region_size);
- goto retry;
+
+ /* Verify byte pattern after remapping */
+ srand(pattern_seed);
+ for (i = 0; i < SIZE_MB(1); i++) {
+ char c = (char) rand();
+
+ if (((char *)src)[i] != c) {
+ ksft_print_msg("Data at src at %d got corrupted due to unrelated mremap\n",
+ i);
+ ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff,
+ ((char *) src)[i] & 0xff);
+ success = 0;
+ }
}
- if (!src_addr)
- goto error;
+out:
+ if (munmap(ptr, size) == -1)
+ perror("munmap");
- return src_addr;
-error:
- ksft_print_msg("Failed to map source region: %s\n",
- strerror(errno));
- return NULL;
+ if (success)
+ ksft_test_result_pass("%s\n", test_name);
+ else
+ ksft_test_result_fail("%s\n", test_name);
}
/* Returns the time taken for the remap on success else returns -1. */
static long long remap_region(struct config c, unsigned int threshold_mb,
char pattern_seed)
{
- void *addr, *src_addr, *dest_addr;
+ void *addr, *src_addr, *dest_addr, *dest_preamble_addr;
unsigned long long i;
struct timespec t_start = {0, 0}, t_end = {0, 0};
long long start_ns, end_ns, align_mask, ret, offset;
@@ -294,7 +376,7 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
goto out;
}
- /* Set byte pattern */
+ /* Set byte pattern for source block. */
srand(pattern_seed);
for (i = 0; i < threshold; i++)
memset((char *) src_addr + i, (char) rand(), 1);
@@ -306,6 +388,9 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
addr = (void *) (((unsigned long long) src_addr + c.region_size
+ offset) & align_mask);
+ /* Remap after the destination block preamble. */
+ addr += c.dest_preamble_size;
+
/* See comment in get_source_mapping() */
if (!((unsigned long long) addr & c.dest_alignment))
addr = (void *) ((unsigned long long) addr | c.dest_alignment);
@@ -316,11 +401,29 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
if (addr + c.dest_alignment < addr) {
ksft_print_msg("Couldn't find a valid region to remap to\n");
ret = -1;
- goto out;
+ goto clean_up_src;
}
addr += c.dest_alignment;
}
+ if (c.dest_preamble_size) {
+ dest_preamble_addr = mmap((void *) addr - c.dest_preamble_size, c.dest_preamble_size,
+ PROT_READ | PROT_WRITE,
+ MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
+ -1, 0);
+ if (dest_preamble_addr == MAP_FAILED) {
+ ksft_print_msg("Failed to map dest preamble region: %s\n",
+ strerror(errno));
+ ret = -1;
+ goto clean_up_src;
+ }
+
+ /* Set byte pattern for the dest preamble block. */
+ srand(pattern_seed);
+ for (i = 0; i < c.dest_preamble_size; i++)
+ memset((char *) dest_preamble_addr + i, (char) rand(), 1);
+ }
+
clock_gettime(CLOCK_MONOTONIC, &t_start);
dest_addr = mremap(src_addr, c.region_size, c.region_size,
MREMAP_MAYMOVE|MREMAP_FIXED, (char *) addr);
@@ -329,7 +432,7 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
if (dest_addr == MAP_FAILED) {
ksft_print_msg("mremap failed: %s\n", strerror(errno));
ret = -1;
- goto clean_up_src;
+ goto clean_up_dest_preamble;
}
/* Verify byte pattern after remapping */
@@ -338,7 +441,7 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
char c = (char) rand();
if (((char *) dest_addr)[i] != c) {
- ksft_print_msg("Data after remap doesn't match at offset %d\n",
+ ksft_print_msg("Data after remap doesn't match at offset %llu\n",
i);
ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff,
((char *) dest_addr)[i] & 0xff);
@@ -347,6 +450,23 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
}
}
+ /* Verify the dest preamble byte pattern after remapping */
+ if (c.dest_preamble_size) {
+ srand(pattern_seed);
+ for (i = 0; i < c.dest_preamble_size; i++) {
+ char c = (char) rand();
+
+ if (((char *) dest_preamble_addr)[i] != c) {
+ ksft_print_msg("Preamble data after remap doesn't match at offset %d\n",
+ i);
+ ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff,
+ ((char *) dest_preamble_addr)[i] & 0xff);
+ ret = -1;
+ goto clean_up_dest;
+ }
+ }
+ }
+
start_ns = t_start.tv_sec * NS_PER_SEC + t_start.tv_nsec;
end_ns = t_end.tv_sec * NS_PER_SEC + t_end.tv_nsec;
ret = end_ns - start_ns;
@@ -359,12 +479,92 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
*/
clean_up_dest:
munmap(dest_addr, c.region_size);
+clean_up_dest_preamble:
+ if (c.dest_preamble_size && dest_preamble_addr)
+ munmap(dest_preamble_addr, c.dest_preamble_size);
clean_up_src:
munmap(src_addr, c.region_size);
out:
return ret;
}
+/*
+ * Verify that an mremap aligning down does not destroy
+ * the beginning of the mapping just because the aligned
+ * down address landed on a mapping that maybe does not exist.
+ */
+static void mremap_move_1mb_from_start(char pattern_seed)
+{
+ char *test_name = "mremap move 1mb from start at 1MB+256KB aligned src";
+ void *src = NULL, *dest = NULL;
+ int i, success = 1;
+
+ /* Config to reuse get_source_mapping() to do an aligned mmap. */
+ struct config c = {
+ .src_alignment = SIZE_MB(1) + SIZE_KB(256),
+ .region_size = SIZE_MB(6)
+ };
+
+ src = get_source_mapping(c);
+ if (!src) {
+ success = 0;
+ goto out;
+ }
+
+ c.src_alignment = SIZE_MB(1) + SIZE_KB(256);
+ dest = get_source_mapping(c);
+ if (!dest) {
+ success = 0;
+ goto out;
+ }
+
+ /* Set byte pattern for source block. */
+ srand(pattern_seed);
+ for (i = 0; i < SIZE_MB(2); i++) {
+ ((char *)src)[i] = (char) rand();
+ }
+
+ /*
+ * Unmap the beginning of dest so that the aligned address
+ * falls on no mapping.
+ */
+ munmap(dest, SIZE_MB(1));
+
+ void *new_ptr = mremap(src + SIZE_MB(1), SIZE_MB(1), SIZE_MB(1),
+ MREMAP_MAYMOVE | MREMAP_FIXED, dest + SIZE_MB(1));
+ if (new_ptr == MAP_FAILED) {
+ perror("mremap");
+ success = 0;
+ goto out;
+ }
+
+ /* Verify byte pattern after remapping */
+ srand(pattern_seed);
+ for (i = 0; i < SIZE_MB(1); i++) {
+ char c = (char) rand();
+
+ if (((char *)src)[i] != c) {
+ ksft_print_msg("Data at src at %d got corrupted due to unrelated mremap\n",
+ i);
+ ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff,
+ ((char *) src)[i] & 0xff);
+ success = 0;
+ }
+ }
+
+out:
+ if (src && munmap(src, c.region_size) == -1)
+ perror("munmap src");
+
+ if (dest && munmap(dest, c.region_size) == -1)
+ perror("munmap dest");
+
+ if (success)
+ ksft_test_result_pass("%s\n", test_name);
+ else
+ ksft_test_result_fail("%s\n", test_name);
+}
+
static void run_mremap_test_case(struct test test_case, int *failures,
unsigned int threshold_mb,
unsigned int pattern_seed)
@@ -434,7 +634,7 @@ static int parse_args(int argc, char **argv, unsigned int *threshold_mb,
return 0;
}
-#define MAX_TEST 13
+#define MAX_TEST 15
#define MAX_PERF_TEST 3
int main(int argc, char **argv)
{
@@ -443,7 +643,8 @@ int main(int argc, char **argv)
unsigned int threshold_mb = VALIDATION_DEFAULT_THRESHOLD;
unsigned int pattern_seed;
int num_expand_tests = 2;
- struct test test_cases[MAX_TEST];
+ int num_misc_tests = 2;
+ struct test test_cases[MAX_TEST] = {};
struct test perf_test_cases[MAX_PERF_TEST];
int page_size;
time_t t;
@@ -500,6 +701,15 @@ int main(int argc, char **argv)
test_cases[12] = MAKE_TEST(PUD, PUD, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
"2GB mremap - Source PUD-aligned, Destination PUD-aligned");
+ /* Src and Dest addr 1MB aligned. 5MB mremap. */
+ test_cases[13] = MAKE_TEST(_1MB, _1MB, _5MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "5MB mremap - Source 1MB-aligned, Destination 1MB-aligned");
+
+ /* Src and Dest addr 1MB aligned. 5MB mremap. */
+ test_cases[14] = MAKE_TEST(_1MB, _1MB, _5MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "5MB mremap - Source 1MB-aligned, Dest 1MB-aligned with 40MB Preamble");
+ test_cases[14].config.dest_preamble_size = 10 * _4MB;
+
perf_test_cases[0] = MAKE_TEST(page_size, page_size, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
"1GB mremap - Source PTE-aligned, Destination PTE-aligned");
/*
@@ -515,7 +725,7 @@ int main(int argc, char **argv)
(threshold_mb * _1MB >= _1GB);
ksft_set_plan(ARRAY_SIZE(test_cases) + (run_perf_tests ?
- ARRAY_SIZE(perf_test_cases) : 0) + num_expand_tests);
+ ARRAY_SIZE(perf_test_cases) : 0) + num_expand_tests + num_misc_tests);
for (i = 0; i < ARRAY_SIZE(test_cases); i++)
run_mremap_test_case(test_cases[i], &failures, threshold_mb,
@@ -533,6 +743,9 @@ int main(int argc, char **argv)
fclose(maps_fp);
+ mremap_move_within_range(pattern_seed);
+ mremap_move_1mb_from_start(pattern_seed);
+
if (run_perf_tests) {
ksft_print_msg("\n%s\n",
"mremap HAVE_MOVE_PMD/PUD optimization time comparison for 1GB region:");
diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c
new file mode 100644
index 000000000000..befab43719ba
--- /dev/null
+++ b/tools/testing/selftests/mm/pagemap_ioctl.c
@@ -0,0 +1,1661 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <errno.h>
+#include <malloc.h>
+#include "vm_util.h"
+#include "../kselftest.h"
+#include <linux/types.h>
+#include <linux/memfd.h>
+#include <linux/userfaultfd.h>
+#include <linux/fs.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <math.h>
+#include <asm/unistd.h>
+#include <pthread.h>
+#include <sys/resource.h>
+#include <assert.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+
+#define PAGEMAP_BITS_ALL (PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | \
+ PAGE_IS_FILE | PAGE_IS_PRESENT | \
+ PAGE_IS_SWAPPED | PAGE_IS_PFNZERO | \
+ PAGE_IS_HUGE)
+#define PAGEMAP_NON_WRITTEN_BITS (PAGE_IS_WPALLOWED | PAGE_IS_FILE | \
+ PAGE_IS_PRESENT | PAGE_IS_SWAPPED | \
+ PAGE_IS_PFNZERO | PAGE_IS_HUGE)
+
+#define TEST_ITERATIONS 100
+#define PAGEMAP "/proc/self/pagemap"
+int pagemap_fd;
+int uffd;
+int page_size;
+int hpage_size;
+
+#define LEN(region) ((region.end - region.start)/page_size)
+
+static long pagemap_ioctl(void *start, int len, void *vec, int vec_len, int flag,
+ int max_pages, long required_mask, long anyof_mask, long excluded_mask,
+ long return_mask)
+{
+ struct pm_scan_arg arg;
+
+ arg.start = (uintptr_t)start;
+ arg.end = (uintptr_t)(start + len);
+ arg.vec = (uintptr_t)vec;
+ arg.vec_len = vec_len;
+ arg.flags = flag;
+ arg.size = sizeof(struct pm_scan_arg);
+ arg.max_pages = max_pages;
+ arg.category_mask = required_mask;
+ arg.category_anyof_mask = anyof_mask;
+ arg.category_inverted = excluded_mask;
+ arg.return_mask = return_mask;
+
+ return ioctl(pagemap_fd, PAGEMAP_SCAN, &arg);
+}
+
+static long pagemap_ioc(void *start, int len, void *vec, int vec_len, int flag,
+ int max_pages, long required_mask, long anyof_mask, long excluded_mask,
+ long return_mask, long *walk_end)
+{
+ struct pm_scan_arg arg;
+ int ret;
+
+ arg.start = (uintptr_t)start;
+ arg.end = (uintptr_t)(start + len);
+ arg.vec = (uintptr_t)vec;
+ arg.vec_len = vec_len;
+ arg.flags = flag;
+ arg.size = sizeof(struct pm_scan_arg);
+ arg.max_pages = max_pages;
+ arg.category_mask = required_mask;
+ arg.category_anyof_mask = anyof_mask;
+ arg.category_inverted = excluded_mask;
+ arg.return_mask = return_mask;
+
+ ret = ioctl(pagemap_fd, PAGEMAP_SCAN, &arg);
+
+ if (walk_end)
+ *walk_end = arg.walk_end;
+
+ return ret;
+}
+
+
+int init_uffd(void)
+{
+ struct uffdio_api uffdio_api;
+
+ uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY);
+ if (uffd == -1)
+ return uffd;
+
+ uffdio_api.api = UFFD_API;
+ uffdio_api.features = UFFD_FEATURE_WP_UNPOPULATED | UFFD_FEATURE_WP_ASYNC |
+ UFFD_FEATURE_WP_HUGETLBFS_SHMEM;
+ if (ioctl(uffd, UFFDIO_API, &uffdio_api))
+ return -1;
+
+ if (!(uffdio_api.api & UFFDIO_REGISTER_MODE_WP) ||
+ !(uffdio_api.features & UFFD_FEATURE_WP_UNPOPULATED) ||
+ !(uffdio_api.features & UFFD_FEATURE_WP_ASYNC) ||
+ !(uffdio_api.features & UFFD_FEATURE_WP_HUGETLBFS_SHMEM))
+ return -1;
+
+ return 0;
+}
+
+int wp_init(void *lpBaseAddress, int dwRegionSize)
+{
+ struct uffdio_register uffdio_register;
+ struct uffdio_writeprotect wp;
+
+ uffdio_register.range.start = (unsigned long)lpBaseAddress;
+ uffdio_register.range.len = dwRegionSize;
+ uffdio_register.mode = UFFDIO_REGISTER_MODE_WP;
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
+ ksft_exit_fail_msg("ioctl(UFFDIO_REGISTER) %d %s\n", errno, strerror(errno));
+
+ if (!(uffdio_register.ioctls & UFFDIO_WRITEPROTECT))
+ ksft_exit_fail_msg("ioctl set is incorrect\n");
+
+ wp.range.start = (unsigned long)lpBaseAddress;
+ wp.range.len = dwRegionSize;
+ wp.mode = UFFDIO_WRITEPROTECT_MODE_WP;
+
+ if (ioctl(uffd, UFFDIO_WRITEPROTECT, &wp))
+ ksft_exit_fail_msg("ioctl(UFFDIO_WRITEPROTECT)\n");
+
+ return 0;
+}
+
+int wp_free(void *lpBaseAddress, int dwRegionSize)
+{
+ struct uffdio_register uffdio_register;
+
+ uffdio_register.range.start = (unsigned long)lpBaseAddress;
+ uffdio_register.range.len = dwRegionSize;
+ uffdio_register.mode = UFFDIO_REGISTER_MODE_WP;
+ if (ioctl(uffd, UFFDIO_UNREGISTER, &uffdio_register.range))
+ ksft_exit_fail_msg("ioctl unregister failure\n");
+ return 0;
+}
+
+int wp_addr_range(void *lpBaseAddress, int dwRegionSize)
+{
+ if (pagemap_ioctl(lpBaseAddress, dwRegionSize, NULL, 0,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", 1, errno, strerror(errno));
+
+ return 0;
+}
+
+void *gethugetlb_mem(int size, int *shmid)
+{
+ char *mem;
+
+ if (shmid) {
+ *shmid = shmget(2, size, SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W);
+ if (*shmid < 0)
+ return NULL;
+
+ mem = shmat(*shmid, 0, 0);
+ if (mem == (char *)-1) {
+ shmctl(*shmid, IPC_RMID, NULL);
+ ksft_exit_fail_msg("Shared memory attach failure\n");
+ }
+ } else {
+ mem = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_HUGETLB | MAP_PRIVATE, -1, 0);
+ if (mem == MAP_FAILED)
+ return NULL;
+ }
+
+ return mem;
+}
+
+int userfaultfd_tests(void)
+{
+ int mem_size, vec_size, written, num_pages = 16;
+ char *mem, *vec;
+
+ mem_size = num_pages * page_size;
+ mem = mmap(NULL, mem_size, PROT_NONE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (mem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+
+ wp_init(mem, mem_size);
+
+ /* Change protection of pages differently */
+ mprotect(mem, mem_size/8, PROT_READ|PROT_WRITE);
+ mprotect(mem + 1 * mem_size/8, mem_size/8, PROT_READ);
+ mprotect(mem + 2 * mem_size/8, mem_size/8, PROT_READ|PROT_WRITE);
+ mprotect(mem + 3 * mem_size/8, mem_size/8, PROT_READ);
+ mprotect(mem + 4 * mem_size/8, mem_size/8, PROT_READ|PROT_WRITE);
+ mprotect(mem + 5 * mem_size/8, mem_size/8, PROT_NONE);
+ mprotect(mem + 6 * mem_size/8, mem_size/8, PROT_READ|PROT_WRITE);
+ mprotect(mem + 7 * mem_size/8, mem_size/8, PROT_READ);
+
+ wp_addr_range(mem + (mem_size/16), mem_size - 2 * (mem_size/8));
+ wp_addr_range(mem, mem_size);
+
+ vec_size = mem_size/page_size;
+ vec = malloc(sizeof(struct page_region) * vec_size);
+
+ written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ vec_size - 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (written < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+ ksft_test_result(written == 0, "%s all new pages must not be written (dirty)\n", __func__);
+
+ wp_free(mem, mem_size);
+ munmap(mem, mem_size);
+ free(vec);
+ return 0;
+}
+
+int get_reads(struct page_region *vec, int vec_size)
+{
+ int i, sum = 0;
+
+ for (i = 0; i < vec_size; i++)
+ sum += LEN(vec[i]);
+
+ return sum;
+}
+
+int sanity_tests_sd(void)
+{
+ int mem_size, vec_size, ret, ret2, ret3, i, num_pages = 1000, total_pages = 0;
+ int total_writes, total_reads, reads, count;
+ struct page_region *vec, *vec2;
+ char *mem, *m[2];
+ long walk_end;
+
+ vec_size = num_pages/2;
+ mem_size = num_pages * page_size;
+
+ vec = malloc(sizeof(struct page_region) * vec_size);
+ if (!vec)
+ ksft_exit_fail_msg("error nomem\n");
+
+ vec2 = malloc(sizeof(struct page_region) * vec_size);
+ if (!vec2)
+ ksft_exit_fail_msg("error nomem\n");
+
+ mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (mem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+
+ wp_init(mem, mem_size);
+ wp_addr_range(mem, mem_size);
+
+ /* 1. wrong operation */
+ ksft_test_result(pagemap_ioctl(mem, 0, vec, vec_size, 0,
+ 0, PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL) == 0,
+ "%s Zero range size is valid\n", __func__);
+
+ ksft_test_result(pagemap_ioctl(mem, mem_size, NULL, vec_size, 0,
+ 0, PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL) < 0,
+ "%s output buffer must be specified with size\n", __func__);
+
+ ksft_test_result(pagemap_ioctl(mem, mem_size, vec, 0, 0,
+ 0, PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL) == 0,
+ "%s output buffer can be 0\n", __func__);
+
+ ksft_test_result(pagemap_ioctl(mem, mem_size, 0, 0, 0,
+ 0, PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL) == 0,
+ "%s output buffer can be 0\n", __func__);
+
+ ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, -1,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) < 0,
+ "%s wrong flag specified\n", __func__);
+
+ ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC | 0xFF,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) < 0,
+ "%s flag has extra bits specified\n", __func__);
+
+ ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0,
+ 0, 0, 0, 0, PAGE_IS_WRITTEN) >= 0,
+ "%s no selection mask is specified\n", __func__);
+
+ ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0,
+ 0, PAGE_IS_WRITTEN, PAGE_IS_WRITTEN, 0, 0) == 0,
+ "%s no return mask is specified\n", __func__);
+
+ ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0,
+ 0, PAGE_IS_WRITTEN, 0, 0, 0x1000) < 0,
+ "%s wrong return mask specified\n", __func__);
+
+ ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, 0xFFF, PAGE_IS_WRITTEN, 0, PAGE_IS_WRITTEN) < 0,
+ "%s mixture of correct and wrong flag\n", __func__);
+
+ ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, 0, 0, PAGEMAP_BITS_ALL, PAGE_IS_WRITTEN) >= 0,
+ "%s PAGEMAP_BITS_ALL can be specified with PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC\n",
+ __func__);
+
+ /* 2. Clear area with larger vec size */
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ ksft_test_result(ret >= 0, "%s Clear area with larger vec size\n", __func__);
+
+ /* 3. Repeated pattern of written and non-written pages */
+ for (i = 0; i < mem_size; i += 2 * page_size)
+ mem[i]++;
+
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0, PAGE_IS_WRITTEN, 0,
+ 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ksft_test_result(ret == mem_size/(page_size * 2),
+ "%s Repeated pattern of written and non-written pages\n", __func__);
+
+ /* 4. Repeated pattern of written and non-written pages in parts */
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ num_pages/2 - 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ret2 = pagemap_ioctl(mem, mem_size, vec, 2, 0, 0, PAGE_IS_WRITTEN, 0, 0,
+ PAGE_IS_WRITTEN);
+ if (ret2 < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret2, errno, strerror(errno));
+
+ ret3 = pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret3 < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret3, errno, strerror(errno));
+
+ ksft_test_result((ret + ret3) == num_pages/2 && ret2 == 2,
+ "%s Repeated pattern of written and non-written pages in parts %d %d %d\n",
+ __func__, ret, ret3, ret2);
+
+ /* 5. Repeated pattern of written and non-written pages max_pages */
+ for (i = 0; i < mem_size; i += 2 * page_size)
+ mem[i]++;
+ mem[(mem_size/page_size - 1) * page_size]++;
+
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ num_pages/2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ret2 = pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret2 < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret2, errno, strerror(errno));
+
+ ksft_test_result(ret == num_pages/2 && ret2 == 1,
+ "%s Repeated pattern of written and non-written pages max_pages\n",
+ __func__);
+
+ /* 6. only get 2 dirty pages and clear them as well */
+ vec_size = mem_size/page_size;
+ memset(mem, -1, mem_size);
+
+ /* get and clear second and third pages */
+ ret = pagemap_ioctl(mem + page_size, 2 * page_size, vec, 1,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ret2 = pagemap_ioctl(mem, mem_size, vec2, vec_size, 0, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret2 < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret2, errno, strerror(errno));
+
+ ksft_test_result(ret == 1 && LEN(vec[0]) == 2 &&
+ vec[0].start == (uintptr_t)(mem + page_size) &&
+ ret2 == 2 && LEN(vec2[0]) == 1 && vec2[0].start == (uintptr_t)mem &&
+ LEN(vec2[1]) == vec_size - 3 &&
+ vec2[1].start == (uintptr_t)(mem + 3 * page_size),
+ "%s only get 2 written pages and clear them as well\n", __func__);
+
+ wp_free(mem, mem_size);
+ munmap(mem, mem_size);
+
+ /* 7. Two regions */
+ m[0] = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (m[0] == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+ m[1] = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (m[1] == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+
+ wp_init(m[0], mem_size);
+ wp_init(m[1], mem_size);
+ wp_addr_range(m[0], mem_size);
+ wp_addr_range(m[1], mem_size);
+
+ memset(m[0], 'a', mem_size);
+ memset(m[1], 'b', mem_size);
+
+ wp_addr_range(m[0], mem_size);
+
+ ret = pagemap_ioctl(m[1], mem_size, vec, 1, 0, 0, PAGE_IS_WRITTEN, 0, 0,
+ PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ksft_test_result(ret == 1 && LEN(vec[0]) == mem_size/page_size,
+ "%s Two regions\n", __func__);
+
+ wp_free(m[0], mem_size);
+ wp_free(m[1], mem_size);
+ munmap(m[0], mem_size);
+ munmap(m[1], mem_size);
+
+ free(vec);
+ free(vec2);
+
+ /* 8. Smaller vec */
+ mem_size = 1050 * page_size;
+ vec_size = mem_size/(page_size*2);
+
+ vec = malloc(sizeof(struct page_region) * vec_size);
+ if (!vec)
+ ksft_exit_fail_msg("error nomem\n");
+
+ mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (mem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+
+ wp_init(mem, mem_size);
+ wp_addr_range(mem, mem_size);
+
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ for (i = 0; i < mem_size/page_size; i += 2)
+ mem[i * page_size]++;
+
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ mem_size/(page_size*5), PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ total_pages += ret;
+
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ mem_size/(page_size*5), PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ total_pages += ret;
+
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ mem_size/(page_size*5), PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ total_pages += ret;
+
+ ksft_test_result(total_pages == mem_size/(page_size*2), "%s Smaller max_pages\n", __func__);
+
+ free(vec);
+ wp_free(mem, mem_size);
+ munmap(mem, mem_size);
+ total_pages = 0;
+
+ /* 9. Smaller vec */
+ mem_size = 10000 * page_size;
+ vec_size = 50;
+
+ vec = malloc(sizeof(struct page_region) * vec_size);
+ if (!vec)
+ ksft_exit_fail_msg("error nomem\n");
+
+ mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (mem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+
+ wp_init(mem, mem_size);
+ wp_addr_range(mem, mem_size);
+
+ for (count = 0; count < TEST_ITERATIONS; count++) {
+ total_writes = total_reads = 0;
+ walk_end = (long)mem;
+
+ for (i = 0; i < mem_size; i += page_size) {
+ if (rand() % 2) {
+ mem[i]++;
+ total_writes++;
+ }
+ }
+
+ while (total_reads < total_writes) {
+ ret = pagemap_ioc((void *)walk_end, mem_size-(walk_end - (long)mem), vec,
+ vec_size, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ if (ret > vec_size)
+ break;
+
+ reads = get_reads(vec, ret);
+ total_reads += reads;
+ }
+
+ if (total_reads != total_writes)
+ break;
+ }
+
+ ksft_test_result(count == TEST_ITERATIONS, "Smaller vec\n");
+
+ free(vec);
+ wp_free(mem, mem_size);
+ munmap(mem, mem_size);
+
+ /* 10. Walk_end tester */
+ vec_size = 1000;
+ mem_size = vec_size * page_size;
+
+ vec = malloc(sizeof(struct page_region) * vec_size);
+ if (!vec)
+ ksft_exit_fail_msg("error nomem\n");
+
+ mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (mem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+
+ wp_init(mem, mem_size);
+ wp_addr_range(mem, mem_size);
+
+ memset(mem, 0, mem_size);
+
+ ret = pagemap_ioc(mem, 0, vec, vec_size, 0,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == 0 && walk_end == (long)mem,
+ "Walk_end: Same start and end address\n");
+
+ ret = pagemap_ioc(mem, 0, vec, vec_size, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == 0 && walk_end == (long)mem,
+ "Walk_end: Same start and end with WP\n");
+
+ ret = pagemap_ioc(mem, 0, vec, 0, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == 0 && walk_end == (long)mem,
+ "Walk_end: Same start and end with 0 output buffer\n");
+
+ ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size),
+ "Walk_end: Big vec\n");
+
+ ret = pagemap_ioc(mem, mem_size, vec, 1, 0,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size),
+ "Walk_end: vec of minimum length\n");
+
+ ret = pagemap_ioc(mem, mem_size, vec, 1, 0,
+ vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size),
+ "Walk_end: Max pages specified\n");
+
+ ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
+ vec_size/2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size/2),
+ "Walk_end: Half max pages\n");
+
+ ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
+ 1, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size),
+ "Walk_end: 1 max page\n");
+
+ ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
+ -1, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size),
+ "Walk_end: max pages\n");
+
+ wp_addr_range(mem, mem_size);
+ for (i = 0; i < mem_size; i += 2 * page_size)
+ mem[i]++;
+
+ ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == vec_size/2 && walk_end == (long)(mem + mem_size),
+ "Walk_end sparse: Big vec\n");
+
+ ret = pagemap_ioc(mem, mem_size, vec, 1, 0,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size * 2),
+ "Walk_end sparse: vec of minimum length\n");
+
+ ret = pagemap_ioc(mem, mem_size, vec, 1, 0,
+ vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size * 2),
+ "Walk_end sparse: Max pages specified\n");
+
+ ret = pagemap_ioc(mem, mem_size, vec, vec_size/2, 0,
+ vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == vec_size/2 && walk_end == (long)(mem + mem_size),
+ "Walk_end sparse: Max pages specified\n");
+
+ ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
+ vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == vec_size/2 && walk_end == (long)(mem + mem_size),
+ "Walk_end sparse: Max pages specified\n");
+
+ ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
+ vec_size/2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == vec_size/2 && walk_end == (long)(mem + mem_size),
+ "Walk_endsparse : Half max pages\n");
+
+ ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
+ 1, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size * 2),
+ "Walk_end: 1 max page\n");
+
+ free(vec);
+ wp_free(mem, mem_size);
+ munmap(mem, mem_size);
+
+ return 0;
+}
+
+int base_tests(char *prefix, char *mem, int mem_size, int skip)
+{
+ int vec_size, written;
+ struct page_region *vec, *vec2;
+
+ if (skip) {
+ ksft_test_result_skip("%s all new pages must not be written (dirty)\n", prefix);
+ ksft_test_result_skip("%s all pages must be written (dirty)\n", prefix);
+ ksft_test_result_skip("%s all pages dirty other than first and the last one\n",
+ prefix);
+ ksft_test_result_skip("%s PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC\n", prefix);
+ ksft_test_result_skip("%s only middle page dirty\n", prefix);
+ ksft_test_result_skip("%s only two middle pages dirty\n", prefix);
+ return 0;
+ }
+
+ vec_size = mem_size/page_size;
+ vec = malloc(sizeof(struct page_region) * vec_size);
+ vec2 = malloc(sizeof(struct page_region) * vec_size);
+
+ /* 1. all new pages must be not be written (dirty) */
+ written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ vec_size - 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (written < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+ ksft_test_result(written == 0, "%s all new pages must not be written (dirty)\n", prefix);
+
+ /* 2. all pages must be written */
+ memset(mem, -1, mem_size);
+
+ written = pagemap_ioctl(mem, mem_size, vec, 1, 0, 0, PAGE_IS_WRITTEN, 0, 0,
+ PAGE_IS_WRITTEN);
+ if (written < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+ ksft_test_result(written == 1 && LEN(vec[0]) == mem_size/page_size,
+ "%s all pages must be written (dirty)\n", prefix);
+
+ /* 3. all pages dirty other than first and the last one */
+ written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (written < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+ memset(mem + page_size, 0, mem_size - (2 * page_size));
+
+ written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (written < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+ ksft_test_result(written == 1 && LEN(vec[0]) >= vec_size - 2 && LEN(vec[0]) <= vec_size,
+ "%s all pages dirty other than first and the last one\n", prefix);
+
+ written = pagemap_ioctl(mem, mem_size, vec, 1, 0, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (written < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+ ksft_test_result(written == 0,
+ "%s PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC\n", prefix);
+
+ /* 4. only middle page dirty */
+ written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (written < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+ mem[vec_size/2 * page_size]++;
+
+ written = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0, PAGE_IS_WRITTEN,
+ 0, 0, PAGE_IS_WRITTEN);
+ if (written < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+ ksft_test_result(written == 1 && LEN(vec[0]) >= 1,
+ "%s only middle page dirty\n", prefix);
+
+ /* 5. only two middle pages dirty and walk over only middle pages */
+ written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN | PAGE_IS_HUGE);
+ if (written < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+ mem[vec_size/2 * page_size]++;
+ mem[(vec_size/2 + 1) * page_size]++;
+
+ written = pagemap_ioctl(&mem[vec_size/2 * page_size], 2 * page_size, vec, 1, 0,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN | PAGE_IS_HUGE);
+ if (written < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+ ksft_test_result(written == 1 && vec[0].start == (uintptr_t)(&mem[vec_size/2 * page_size])
+ && LEN(vec[0]) == 2,
+ "%s only two middle pages dirty\n", prefix);
+
+ free(vec);
+ free(vec2);
+ return 0;
+}
+
+void *gethugepage(int map_size)
+{
+ int ret;
+ char *map;
+
+ map = memalign(hpage_size, map_size);
+ if (!map)
+ ksft_exit_fail_msg("memalign failed %d %s\n", errno, strerror(errno));
+
+ ret = madvise(map, map_size, MADV_HUGEPAGE);
+ if (ret)
+ return NULL;
+
+ memset(map, 0, map_size);
+
+ return map;
+}
+
+int hpage_unit_tests(void)
+{
+ char *map;
+ int ret, ret2;
+ size_t num_pages = 10;
+ int map_size = hpage_size * num_pages;
+ int vec_size = map_size/page_size;
+ struct page_region *vec, *vec2;
+
+ vec = malloc(sizeof(struct page_region) * vec_size);
+ vec2 = malloc(sizeof(struct page_region) * vec_size);
+ if (!vec || !vec2)
+ ksft_exit_fail_msg("malloc failed\n");
+
+ map = gethugepage(map_size);
+ if (map) {
+ wp_init(map, map_size);
+ wp_addr_range(map, map_size);
+
+ /* 1. all new huge page must not be written (dirty) */
+ ret = pagemap_ioctl(map, map_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ksft_test_result(ret == 0, "%s all new huge page must not be written (dirty)\n",
+ __func__);
+
+ /* 2. all the huge page must not be written */
+ ret = pagemap_ioctl(map, map_size, vec, vec_size, 0, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ksft_test_result(ret == 0, "%s all the huge page must not be written\n", __func__);
+
+ /* 3. all the huge page must be written and clear dirty as well */
+ memset(map, -1, map_size);
+ ret = pagemap_ioctl(map, map_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ksft_test_result(ret == 1 && vec[0].start == (uintptr_t)map &&
+ LEN(vec[0]) == vec_size && vec[0].categories == PAGE_IS_WRITTEN,
+ "%s all the huge page must be written and clear\n", __func__);
+
+ /* 4. only middle page written */
+ wp_free(map, map_size);
+ free(map);
+ map = gethugepage(map_size);
+ wp_init(map, map_size);
+ wp_addr_range(map, map_size);
+ map[vec_size/2 * page_size]++;
+
+ ret = pagemap_ioctl(map, map_size, vec, vec_size, 0, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ksft_test_result(ret == 1 && LEN(vec[0]) > 0,
+ "%s only middle page written\n", __func__);
+
+ wp_free(map, map_size);
+ free(map);
+ } else {
+ ksft_test_result_skip("%s all new huge page must be written\n", __func__);
+ ksft_test_result_skip("%s all the huge page must not be written\n", __func__);
+ ksft_test_result_skip("%s all the huge page must be written and clear\n", __func__);
+ ksft_test_result_skip("%s only middle page written\n", __func__);
+ }
+
+ /* 5. clear first half of huge page */
+ map = gethugepage(map_size);
+ if (map) {
+ wp_init(map, map_size);
+ wp_addr_range(map, map_size);
+
+ memset(map, 0, map_size);
+
+ wp_addr_range(map, map_size/2);
+
+ ret = pagemap_ioctl(map, map_size, vec, vec_size, 0, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ksft_test_result(ret == 1 && LEN(vec[0]) == vec_size/2 &&
+ vec[0].start == (uintptr_t)(map + map_size/2),
+ "%s clear first half of huge page\n", __func__);
+ wp_free(map, map_size);
+ free(map);
+ } else {
+ ksft_test_result_skip("%s clear first half of huge page\n", __func__);
+ }
+
+ /* 6. clear first half of huge page with limited buffer */
+ map = gethugepage(map_size);
+ if (map) {
+ wp_init(map, map_size);
+ wp_addr_range(map, map_size);
+
+ memset(map, 0, map_size);
+
+ ret = pagemap_ioctl(map, map_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ vec_size/2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ret = pagemap_ioctl(map, map_size, vec, vec_size, 0, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ksft_test_result(ret == 1 && LEN(vec[0]) == vec_size/2 &&
+ vec[0].start == (uintptr_t)(map + map_size/2),
+ "%s clear first half of huge page with limited buffer\n",
+ __func__);
+ wp_free(map, map_size);
+ free(map);
+ } else {
+ ksft_test_result_skip("%s clear first half of huge page with limited buffer\n",
+ __func__);
+ }
+
+ /* 7. clear second half of huge page */
+ map = gethugepage(map_size);
+ if (map) {
+ wp_init(map, map_size);
+ wp_addr_range(map, map_size);
+
+ memset(map, -1, map_size);
+
+ ret = pagemap_ioctl(map + map_size/2, map_size/2, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, vec_size/2,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ret = pagemap_ioctl(map, map_size, vec, vec_size, 0, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ksft_test_result(ret == 1 && LEN(vec[0]) == vec_size/2,
+ "%s clear second half huge page\n", __func__);
+ wp_free(map, map_size);
+ free(map);
+ } else {
+ ksft_test_result_skip("%s clear second half huge page\n", __func__);
+ }
+
+ /* 8. get half huge page */
+ map = gethugepage(map_size);
+ if (map) {
+ wp_init(map, map_size);
+ wp_addr_range(map, map_size);
+
+ memset(map, -1, map_size);
+ usleep(100);
+
+ ret = pagemap_ioctl(map, map_size, vec, 1,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ hpage_size/(2*page_size), PAGE_IS_WRITTEN, 0, 0,
+ PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ksft_test_result(ret == 1 && LEN(vec[0]) == hpage_size/(2*page_size),
+ "%s get half huge page\n", __func__);
+
+ ret2 = pagemap_ioctl(map, map_size, vec, vec_size, 0, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret2 < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret2, errno, strerror(errno));
+
+ ksft_test_result(ret2 == 1 && LEN(vec[0]) == (map_size - hpage_size/2)/page_size,
+ "%s get half huge page\n", __func__);
+
+ wp_free(map, map_size);
+ free(map);
+ } else {
+ ksft_test_result_skip("%s get half huge page\n", __func__);
+ ksft_test_result_skip("%s get half huge page\n", __func__);
+ }
+
+ free(vec);
+ free(vec2);
+ return 0;
+}
+
+int unmapped_region_tests(void)
+{
+ void *start = (void *)0x10000000;
+ int written, len = 0x00040000;
+ int vec_size = len / page_size;
+ struct page_region *vec = malloc(sizeof(struct page_region) * vec_size);
+
+ /* 1. Get written pages */
+ written = pagemap_ioctl(start, len, vec, vec_size, 0, 0,
+ PAGEMAP_NON_WRITTEN_BITS, 0, 0, PAGEMAP_NON_WRITTEN_BITS);
+ if (written < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+ ksft_test_result(written >= 0, "%s Get status of pages\n", __func__);
+
+ free(vec);
+ return 0;
+}
+
+static void test_simple(void)
+{
+ int i;
+ char *map;
+ struct page_region vec;
+
+ map = aligned_alloc(page_size, page_size);
+ if (!map)
+ ksft_exit_fail_msg("aligned_alloc failed\n");
+
+ wp_init(map, page_size);
+ wp_addr_range(map, page_size);
+
+ for (i = 0 ; i < TEST_ITERATIONS; i++) {
+ if (pagemap_ioctl(map, page_size, &vec, 1, 0, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) == 1) {
+ ksft_print_msg("written bit was 1, but should be 0 (i=%d)\n", i);
+ break;
+ }
+
+ wp_addr_range(map, page_size);
+ /* Write something to the page to get the written bit enabled on the page */
+ map[0]++;
+
+ if (pagemap_ioctl(map, page_size, &vec, 1, 0, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) == 0) {
+ ksft_print_msg("written bit was 0, but should be 1 (i=%d)\n", i);
+ break;
+ }
+
+ wp_addr_range(map, page_size);
+ }
+ wp_free(map, page_size);
+ free(map);
+
+ ksft_test_result(i == TEST_ITERATIONS, "Test %s\n", __func__);
+}
+
+int sanity_tests(void)
+{
+ int mem_size, vec_size, ret, fd, i, buf_size;
+ struct page_region *vec;
+ char *mem, *fmem;
+ struct stat sbuf;
+ char *tmp_buf;
+
+ /* 1. wrong operation */
+ mem_size = 10 * page_size;
+ vec_size = mem_size / page_size;
+
+ vec = malloc(sizeof(struct page_region) * vec_size);
+ mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (mem == MAP_FAILED || vec == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+
+ wp_init(mem, mem_size);
+ wp_addr_range(mem, mem_size);
+
+ ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL) >= 0,
+ "%s WP op can be specified with !PAGE_IS_WRITTEN\n", __func__);
+ ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0,
+ PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL) >= 0,
+ "%s required_mask specified\n", __func__);
+ ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0,
+ 0, PAGEMAP_BITS_ALL, 0, PAGEMAP_BITS_ALL) >= 0,
+ "%s anyof_mask specified\n", __func__);
+ ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0,
+ 0, 0, PAGEMAP_BITS_ALL, PAGEMAP_BITS_ALL) >= 0,
+ "%s excluded_mask specified\n", __func__);
+ ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0,
+ PAGEMAP_BITS_ALL, PAGEMAP_BITS_ALL, 0,
+ PAGEMAP_BITS_ALL) >= 0,
+ "%s required_mask and anyof_mask specified\n", __func__);
+ wp_free(mem, mem_size);
+ munmap(mem, mem_size);
+
+ /* 2. Get sd and present pages with anyof_mask */
+ mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (mem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+ wp_init(mem, mem_size);
+ wp_addr_range(mem, mem_size);
+
+ memset(mem, 0, mem_size);
+
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0,
+ 0, PAGEMAP_BITS_ALL, 0, PAGEMAP_BITS_ALL);
+ ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)mem && LEN(vec[0]) == vec_size &&
+ (vec[0].categories & (PAGE_IS_WRITTEN | PAGE_IS_PRESENT)) ==
+ (PAGE_IS_WRITTEN | PAGE_IS_PRESENT),
+ "%s Get sd and present pages with anyof_mask\n", __func__);
+
+ /* 3. Get sd and present pages with required_mask */
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0,
+ PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL);
+ ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)mem && LEN(vec[0]) == vec_size &&
+ (vec[0].categories & (PAGE_IS_WRITTEN | PAGE_IS_PRESENT)) ==
+ (PAGE_IS_WRITTEN | PAGE_IS_PRESENT),
+ "%s Get all the pages with required_mask\n", __func__);
+
+ /* 4. Get sd and present pages with required_mask and anyof_mask */
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0,
+ PAGE_IS_WRITTEN, PAGE_IS_PRESENT, 0, PAGEMAP_BITS_ALL);
+ ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)mem && LEN(vec[0]) == vec_size &&
+ (vec[0].categories & (PAGE_IS_WRITTEN | PAGE_IS_PRESENT)) ==
+ (PAGE_IS_WRITTEN | PAGE_IS_PRESENT),
+ "%s Get sd and present pages with required_mask and anyof_mask\n",
+ __func__);
+
+ /* 5. Don't get sd pages */
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0,
+ PAGE_IS_WRITTEN, 0, PAGE_IS_WRITTEN, PAGEMAP_BITS_ALL);
+ ksft_test_result(ret == 0, "%s Don't get sd pages\n", __func__);
+
+ /* 6. Don't get present pages */
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0,
+ PAGE_IS_PRESENT, 0, PAGE_IS_PRESENT, PAGEMAP_BITS_ALL);
+ ksft_test_result(ret == 0, "%s Don't get present pages\n", __func__);
+
+ wp_free(mem, mem_size);
+ munmap(mem, mem_size);
+
+ /* 8. Find written present pages with return mask */
+ mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (mem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+ wp_init(mem, mem_size);
+ wp_addr_range(mem, mem_size);
+
+ memset(mem, 0, mem_size);
+
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0,
+ 0, PAGEMAP_BITS_ALL, 0, PAGE_IS_WRITTEN);
+ ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)mem && LEN(vec[0]) == vec_size &&
+ vec[0].categories == PAGE_IS_WRITTEN,
+ "%s Find written present pages with return mask\n", __func__);
+ wp_free(mem, mem_size);
+ munmap(mem, mem_size);
+
+ /* 9. Memory mapped file */
+ fd = open(__FILE__, O_RDONLY);
+ if (fd < 0)
+ ksft_exit_fail_msg("%s Memory mapped file\n", __func__);
+
+ ret = stat(__FILE__, &sbuf);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ fmem = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (fmem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno));
+
+ tmp_buf = malloc(sbuf.st_size);
+ memcpy(tmp_buf, fmem, sbuf.st_size);
+
+ ret = pagemap_ioctl(fmem, sbuf.st_size, vec, vec_size, 0, 0,
+ 0, PAGEMAP_NON_WRITTEN_BITS, 0, PAGEMAP_NON_WRITTEN_BITS);
+
+ ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)fmem &&
+ LEN(vec[0]) == ceilf((float)sbuf.st_size/page_size) &&
+ (vec[0].categories & PAGE_IS_FILE),
+ "%s Memory mapped file\n", __func__);
+
+ munmap(fmem, sbuf.st_size);
+ close(fd);
+
+ /* 10. Create and read/write to a memory mapped file */
+ buf_size = page_size * 10;
+
+ fd = open(__FILE__".tmp2", O_RDWR | O_CREAT, 0666);
+ if (fd < 0)
+ ksft_exit_fail_msg("Read/write to memory: %s\n",
+ strerror(errno));
+
+ for (i = 0; i < buf_size; i++)
+ if (write(fd, "c", 1) < 0)
+ ksft_exit_fail_msg("Create and read/write to a memory mapped file\n");
+
+ fmem = mmap(NULL, buf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+ if (fmem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno));
+
+ wp_init(fmem, buf_size);
+ wp_addr_range(fmem, buf_size);
+
+ for (i = 0; i < buf_size; i++)
+ fmem[i] = 'z';
+
+ msync(fmem, buf_size, MS_SYNC);
+
+ ret = pagemap_ioctl(fmem, buf_size, vec, vec_size, 0, 0,
+ PAGE_IS_WRITTEN, PAGE_IS_PRESENT | PAGE_IS_SWAPPED | PAGE_IS_FILE, 0,
+ PAGEMAP_BITS_ALL);
+
+ ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)fmem &&
+ LEN(vec[0]) == (buf_size/page_size) &&
+ (vec[0].categories & PAGE_IS_WRITTEN),
+ "%s Read/write to memory\n", __func__);
+
+ wp_free(fmem, buf_size);
+ munmap(fmem, buf_size);
+ close(fd);
+
+ free(vec);
+ return 0;
+}
+
+int mprotect_tests(void)
+{
+ int ret;
+ char *mem, *mem2;
+ struct page_region vec;
+ int pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+
+ if (pagemap_fd < 0) {
+ fprintf(stderr, "open() failed\n");
+ exit(1);
+ }
+
+ /* 1. Map two pages */
+ mem = mmap(0, 2 * page_size, PROT_READ|PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (mem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+ wp_init(mem, 2 * page_size);
+ wp_addr_range(mem, 2 * page_size);
+
+ /* Populate both pages. */
+ memset(mem, 1, 2 * page_size);
+
+ ret = pagemap_ioctl(mem, 2 * page_size, &vec, 1, 0, 0, PAGE_IS_WRITTEN,
+ 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ksft_test_result(ret == 1 && LEN(vec) == 2, "%s Both pages written\n", __func__);
+
+ /* 2. Start tracking */
+ wp_addr_range(mem, 2 * page_size);
+
+ ksft_test_result(pagemap_ioctl(mem, 2 * page_size, &vec, 1, 0, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) == 0,
+ "%s Both pages are not written (dirty)\n", __func__);
+
+ /* 3. Remap the second page */
+ mem2 = mmap(mem + page_size, page_size, PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANON|MAP_FIXED, -1, 0);
+ if (mem2 == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+ wp_init(mem2, page_size);
+ wp_addr_range(mem2, page_size);
+
+ /* Protect + unprotect. */
+ mprotect(mem, page_size, PROT_NONE);
+ mprotect(mem, 2 * page_size, PROT_READ);
+ mprotect(mem, 2 * page_size, PROT_READ|PROT_WRITE);
+
+ /* Modify both pages. */
+ memset(mem, 2, 2 * page_size);
+
+ /* Protect + unprotect. */
+ mprotect(mem, page_size, PROT_NONE);
+ mprotect(mem, page_size, PROT_READ);
+ mprotect(mem, page_size, PROT_READ|PROT_WRITE);
+
+ ret = pagemap_ioctl(mem, 2 * page_size, &vec, 1, 0, 0, PAGE_IS_WRITTEN,
+ 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ksft_test_result(ret == 1 && LEN(vec) == 2,
+ "%s Both pages written after remap and mprotect\n", __func__);
+
+ /* 4. Clear and make the pages written */
+ wp_addr_range(mem, 2 * page_size);
+
+ memset(mem, 'A', 2 * page_size);
+
+ ret = pagemap_ioctl(mem, 2 * page_size, &vec, 1, 0, 0, PAGE_IS_WRITTEN,
+ 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ksft_test_result(ret == 1 && LEN(vec) == 2,
+ "%s Clear and make the pages written\n", __func__);
+
+ wp_free(mem, 2 * page_size);
+ munmap(mem, 2 * page_size);
+ return 0;
+}
+
+/* transact test */
+static const unsigned int nthreads = 6, pages_per_thread = 32, access_per_thread = 8;
+static pthread_barrier_t start_barrier, end_barrier;
+static unsigned int extra_thread_faults;
+static unsigned int iter_count = 1000;
+static volatile int finish;
+
+static ssize_t get_dirty_pages_reset(char *mem, unsigned int count,
+ int reset, int page_size)
+{
+ struct pm_scan_arg arg = {0};
+ struct page_region rgns[256];
+ int i, j, cnt, ret;
+
+ arg.size = sizeof(struct pm_scan_arg);
+ arg.start = (uintptr_t)mem;
+ arg.max_pages = count;
+ arg.end = (uintptr_t)(mem + count * page_size);
+ arg.vec = (uintptr_t)rgns;
+ arg.vec_len = sizeof(rgns) / sizeof(*rgns);
+ if (reset)
+ arg.flags |= PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC;
+ arg.category_mask = PAGE_IS_WRITTEN;
+ arg.return_mask = PAGE_IS_WRITTEN;
+
+ ret = ioctl(pagemap_fd, PAGEMAP_SCAN, &arg);
+ if (ret < 0)
+ ksft_exit_fail_msg("ioctl failed\n");
+
+ cnt = 0;
+ for (i = 0; i < ret; ++i) {
+ if (rgns[i].categories != PAGE_IS_WRITTEN)
+ ksft_exit_fail_msg("wrong flags\n");
+
+ for (j = 0; j < LEN(rgns[i]); ++j)
+ cnt++;
+ }
+
+ return cnt;
+}
+
+void *thread_proc(void *mem)
+{
+ int *m = mem;
+ long curr_faults, faults;
+ struct rusage r;
+ unsigned int i;
+ int ret;
+
+ if (getrusage(RUSAGE_THREAD, &r))
+ ksft_exit_fail_msg("getrusage\n");
+
+ curr_faults = r.ru_minflt;
+
+ while (!finish) {
+ ret = pthread_barrier_wait(&start_barrier);
+ if (ret && ret != PTHREAD_BARRIER_SERIAL_THREAD)
+ ksft_exit_fail_msg("pthread_barrier_wait\n");
+
+ for (i = 0; i < access_per_thread; ++i)
+ __atomic_add_fetch(m + i * (0x1000 / sizeof(*m)), 1, __ATOMIC_SEQ_CST);
+
+ ret = pthread_barrier_wait(&end_barrier);
+ if (ret && ret != PTHREAD_BARRIER_SERIAL_THREAD)
+ ksft_exit_fail_msg("pthread_barrier_wait\n");
+
+ if (getrusage(RUSAGE_THREAD, &r))
+ ksft_exit_fail_msg("getrusage\n");
+
+ faults = r.ru_minflt - curr_faults;
+ if (faults < access_per_thread)
+ ksft_exit_fail_msg("faults < access_per_thread");
+
+ __atomic_add_fetch(&extra_thread_faults, faults - access_per_thread,
+ __ATOMIC_SEQ_CST);
+ curr_faults = r.ru_minflt;
+ }
+
+ return NULL;
+}
+
+static void transact_test(int page_size)
+{
+ unsigned int i, count, extra_pages;
+ pthread_t th;
+ char *mem;
+ int ret, c;
+
+ if (pthread_barrier_init(&start_barrier, NULL, nthreads + 1))
+ ksft_exit_fail_msg("pthread_barrier_init\n");
+
+ if (pthread_barrier_init(&end_barrier, NULL, nthreads + 1))
+ ksft_exit_fail_msg("pthread_barrier_init\n");
+
+ mem = mmap(NULL, 0x1000 * nthreads * pages_per_thread, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (mem == MAP_FAILED)
+ ksft_exit_fail_msg("Error mmap %s.\n", strerror(errno));
+
+ wp_init(mem, 0x1000 * nthreads * pages_per_thread);
+ wp_addr_range(mem, 0x1000 * nthreads * pages_per_thread);
+
+ memset(mem, 0, 0x1000 * nthreads * pages_per_thread);
+
+ count = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1, page_size);
+ ksft_test_result(count > 0, "%s count %d\n", __func__, count);
+ count = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1, page_size);
+ ksft_test_result(count == 0, "%s count %d\n", __func__, count);
+
+ finish = 0;
+ for (i = 0; i < nthreads; ++i)
+ pthread_create(&th, NULL, thread_proc, mem + 0x1000 * i * pages_per_thread);
+
+ extra_pages = 0;
+ for (i = 0; i < iter_count; ++i) {
+ count = 0;
+
+ ret = pthread_barrier_wait(&start_barrier);
+ if (ret && ret != PTHREAD_BARRIER_SERIAL_THREAD)
+ ksft_exit_fail_msg("pthread_barrier_wait\n");
+
+ count = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1,
+ page_size);
+
+ ret = pthread_barrier_wait(&end_barrier);
+ if (ret && ret != PTHREAD_BARRIER_SERIAL_THREAD)
+ ksft_exit_fail_msg("pthread_barrier_wait\n");
+
+ if (count > nthreads * access_per_thread)
+ ksft_exit_fail_msg("Too big count %d expected %d, iter %d\n",
+ count, nthreads * access_per_thread, i);
+
+ c = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1, page_size);
+ count += c;
+
+ if (c > nthreads * access_per_thread) {
+ ksft_test_result_fail(" %s count > nthreads\n", __func__);
+ return;
+ }
+
+ if (count != nthreads * access_per_thread) {
+ /*
+ * The purpose of the test is to make sure that no page updates are lost
+ * when the page updates and read-resetting soft dirty flags are performed
+ * in parallel. However, it is possible that the application will get the
+ * soft dirty flags twice on the two consecutive read-resets. This seems
+ * unavoidable as soft dirty flag is handled in software through page faults
+ * in kernel. While the updating the flags is supposed to be synchronized
+ * between page fault handling and read-reset, it is possible that
+ * read-reset happens after page fault PTE update but before the application
+ * re-executes write instruction. So read-reset gets the flag, clears write
+ * access and application gets page fault again for the same write.
+ */
+ if (count < nthreads * access_per_thread) {
+ ksft_test_result_fail("Lost update, iter %d, %d vs %d.\n", i, count,
+ nthreads * access_per_thread);
+ return;
+ }
+
+ extra_pages += count - nthreads * access_per_thread;
+ }
+ }
+
+ pthread_barrier_wait(&start_barrier);
+ finish = 1;
+ pthread_barrier_wait(&end_barrier);
+
+ ksft_test_result_pass("%s Extra pages %u (%.1lf%%), extra thread faults %d.\n", __func__,
+ extra_pages,
+ 100.0 * extra_pages / (iter_count * nthreads * access_per_thread),
+ extra_thread_faults);
+}
+
+int main(void)
+{
+ int mem_size, shmid, buf_size, fd, i, ret;
+ char *mem, *map, *fmem;
+ struct stat sbuf;
+
+ ksft_print_header();
+
+ if (init_uffd())
+ return ksft_exit_pass();
+
+ ksft_set_plan(115);
+
+ page_size = getpagesize();
+ hpage_size = read_pmd_pagesize();
+
+ pagemap_fd = open(PAGEMAP, O_RDONLY);
+ if (pagemap_fd < 0)
+ return -EINVAL;
+
+ /* 1. Sanity testing */
+ sanity_tests_sd();
+
+ /* 2. Normal page testing */
+ mem_size = 10 * page_size;
+ mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (mem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+ wp_init(mem, mem_size);
+ wp_addr_range(mem, mem_size);
+
+ base_tests("Page testing:", mem, mem_size, 0);
+
+ wp_free(mem, mem_size);
+ munmap(mem, mem_size);
+
+ /* 3. Large page testing */
+ mem_size = 512 * 10 * page_size;
+ mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (mem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+ wp_init(mem, mem_size);
+ wp_addr_range(mem, mem_size);
+
+ base_tests("Large Page testing:", mem, mem_size, 0);
+
+ wp_free(mem, mem_size);
+ munmap(mem, mem_size);
+
+ /* 4. Huge page testing */
+ map = gethugepage(hpage_size);
+ if (map) {
+ wp_init(map, hpage_size);
+ wp_addr_range(map, hpage_size);
+ base_tests("Huge page testing:", map, hpage_size, 0);
+ wp_free(map, hpage_size);
+ free(map);
+ } else {
+ base_tests("Huge page testing:", NULL, 0, 1);
+ }
+
+ /* 5. SHM Hugetlb page testing */
+ mem_size = 2*1024*1024;
+ mem = gethugetlb_mem(mem_size, &shmid);
+ if (mem) {
+ wp_init(mem, mem_size);
+ wp_addr_range(mem, mem_size);
+
+ base_tests("Hugetlb shmem testing:", mem, mem_size, 0);
+
+ wp_free(mem, mem_size);
+ shmctl(shmid, IPC_RMID, NULL);
+ } else {
+ base_tests("Hugetlb shmem testing:", NULL, 0, 1);
+ }
+
+ /* 6. Hugetlb page testing */
+ mem = gethugetlb_mem(mem_size, NULL);
+ if (mem) {
+ wp_init(mem, mem_size);
+ wp_addr_range(mem, mem_size);
+
+ base_tests("Hugetlb mem testing:", mem, mem_size, 0);
+
+ wp_free(mem, mem_size);
+ } else {
+ base_tests("Hugetlb mem testing:", NULL, 0, 1);
+ }
+
+ /* 7. File Hugetlb testing */
+ mem_size = 2*1024*1024;
+ fd = memfd_create("uffd-test", MFD_HUGETLB | MFD_NOEXEC_SEAL);
+ mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (mem) {
+ wp_init(mem, mem_size);
+ wp_addr_range(mem, mem_size);
+
+ base_tests("Hugetlb shmem testing:", mem, mem_size, 0);
+
+ wp_free(mem, mem_size);
+ shmctl(shmid, IPC_RMID, NULL);
+ } else {
+ base_tests("Hugetlb shmem testing:", NULL, 0, 1);
+ }
+ close(fd);
+
+ /* 8. File memory testing */
+ buf_size = page_size * 10;
+
+ fd = open(__FILE__".tmp0", O_RDWR | O_CREAT, 0777);
+ if (fd < 0)
+ ksft_exit_fail_msg("Create and read/write to a memory mapped file: %s\n",
+ strerror(errno));
+
+ for (i = 0; i < buf_size; i++)
+ if (write(fd, "c", 1) < 0)
+ ksft_exit_fail_msg("Create and read/write to a memory mapped file\n");
+
+ ret = stat(__FILE__".tmp0", &sbuf);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ fmem = mmap(NULL, sbuf.st_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+ if (fmem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno));
+
+ wp_init(fmem, sbuf.st_size);
+ wp_addr_range(fmem, sbuf.st_size);
+
+ base_tests("File memory testing:", fmem, sbuf.st_size, 0);
+
+ wp_free(fmem, sbuf.st_size);
+ munmap(fmem, sbuf.st_size);
+ close(fd);
+
+ /* 9. File memory testing */
+ buf_size = page_size * 10;
+
+ fd = memfd_create(__FILE__".tmp00", MFD_NOEXEC_SEAL);
+ if (fd < 0)
+ ksft_exit_fail_msg("Create and read/write to a memory mapped file: %s\n",
+ strerror(errno));
+
+ if (ftruncate(fd, buf_size))
+ ksft_exit_fail_msg("Error ftruncate\n");
+
+ for (i = 0; i < buf_size; i++)
+ if (write(fd, "c", 1) < 0)
+ ksft_exit_fail_msg("Create and read/write to a memory mapped file\n");
+
+ fmem = mmap(NULL, buf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+ if (fmem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno));
+
+ wp_init(fmem, buf_size);
+ wp_addr_range(fmem, buf_size);
+
+ base_tests("File anonymous memory testing:", fmem, buf_size, 0);
+
+ wp_free(fmem, buf_size);
+ munmap(fmem, buf_size);
+ close(fd);
+
+ /* 10. Huge page tests */
+ hpage_unit_tests();
+
+ /* 11. Iterative test */
+ test_simple();
+
+ /* 12. Mprotect test */
+ mprotect_tests();
+
+ /* 13. Transact test */
+ transact_test(page_size);
+
+ /* 14. Sanity testing */
+ sanity_tests();
+
+ /*15. Unmapped address test */
+ unmapped_region_tests();
+
+ /* 16. Userfaultfd tests */
+ userfaultfd_tests();
+
+ close(pagemap_fd);
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/mm/pkey-helpers.h b/tools/testing/selftests/mm/pkey-helpers.h
index 92f3be3dd8e5..1af3156a9db8 100644
--- a/tools/testing/selftests/mm/pkey-helpers.h
+++ b/tools/testing/selftests/mm/pkey-helpers.h
@@ -34,7 +34,7 @@ extern int test_nr;
extern int iteration_nr;
#ifdef __GNUC__
-__attribute__((format(printf, 1, 2)))
+__printf(1, 2)
#endif
static inline void sigsafe_printf(const char *format, ...)
{
diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh
index 3e2bc818d566..00757445278e 100755
--- a/tools/testing/selftests/mm/run_vmtests.sh
+++ b/tools/testing/selftests/mm/run_vmtests.sh
@@ -56,6 +56,8 @@ separated by spaces:
memory protection key tests
- soft_dirty
test soft dirty page bit semantics
+- pagemap
+ test pagemap_scan IOCTL
- cow
test copy-on-write semantics
- thp
@@ -221,6 +223,13 @@ CATEGORY="hugetlb" run_test ./hugepage-mremap
CATEGORY="hugetlb" run_test ./hugepage-vmemmap
CATEGORY="hugetlb" run_test ./hugetlb-madvise
+nr_hugepages_tmp=$(cat /proc/sys/vm/nr_hugepages)
+# For this test, we need one and just one huge page
+echo 1 > /proc/sys/vm/nr_hugepages
+CATEGORY="hugetlb" run_test ./hugetlb_fault_after_madv
+# Restore the previous number of huge pages, since further tests rely on it
+echo "$nr_hugepages_tmp" > /proc/sys/vm/nr_hugepages
+
if test_selected "hugetlb"; then
echo "NOTE: These hugetlb tests provide minimal coverage. Use"
echo " https://github.com/libhugetlbfs/libhugetlbfs.git for"
@@ -303,6 +312,7 @@ CATEGORY="hmm" run_test bash ./test_hmm.sh smoke
# MADV_POPULATE_READ and MADV_POPULATE_WRITE tests
CATEGORY="madv_populate" run_test ./madv_populate
+echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope
CATEGORY="memfd_secret" run_test ./memfd_secret
# KSM KSM_MERGE_TIME_HUGE_PAGES test with size of 100
@@ -342,6 +352,8 @@ then
CATEGORY="soft_dirty" run_test ./soft-dirty
fi
+CATEGORY="pagemap" run_test ./pagemap_ioctl
+
# COW tests
CATEGORY="cow" run_test ./cow
diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c
index 558c9cd8901c..3082b40492dd 100644
--- a/tools/testing/selftests/mm/vm_util.c
+++ b/tools/testing/selftests/mm/vm_util.c
@@ -269,3 +269,22 @@ int uffd_unregister(int uffd, void *addr, uint64_t len)
return ret;
}
+
+unsigned long get_free_hugepages(void)
+{
+ unsigned long fhp = 0;
+ char *line = NULL;
+ size_t linelen = 0;
+ FILE *f = fopen("/proc/meminfo", "r");
+
+ if (!f)
+ return fhp;
+ while (getline(&line, &linelen, f) > 0) {
+ if (sscanf(line, "HugePages_Free: %lu", &fhp) == 1)
+ break;
+ }
+
+ free(line);
+ fclose(f);
+ return fhp;
+}
diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h
index c7fa61f0dff8..c02990bbd56f 100644
--- a/tools/testing/selftests/mm/vm_util.h
+++ b/tools/testing/selftests/mm/vm_util.h
@@ -51,6 +51,7 @@ int uffd_register(int uffd, void *addr, uint64_t len,
int uffd_unregister(int uffd, void *addr, uint64_t len);
int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len,
bool miss, bool wp, bool minor, uint64_t *ioctls);
+unsigned long get_free_hugepages(void);
/*
* On ppc64 this will only work with radix 2M hugepage size
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 61939a695f95..9274edfb76ff 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -34,6 +34,7 @@ TEST_PROGS += gro.sh
TEST_PROGS += gre_gso.sh
TEST_PROGS += cmsg_so_mark.sh
TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh
+TEST_PROGS += netns-name.sh
TEST_PROGS += srv6_end_dt46_l3vpn_test.sh
TEST_PROGS += srv6_end_dt4_l3vpn_test.sh
TEST_PROGS += srv6_end_dt6_l3vpn_test.sh
@@ -90,6 +91,7 @@ TEST_PROGS += test_bridge_neigh_suppress.sh
TEST_PROGS += test_vxlan_nolocalbypass.sh
TEST_PROGS += test_bridge_backup_port.sh
TEST_PROGS += fdb_flush.sh
+TEST_PROGS += fq_band_pktlimit.sh
TEST_FILES := settings
@@ -99,6 +101,7 @@ $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma
$(OUTPUT)/tcp_mmap: LDLIBS += -lpthread -lcrypto
$(OUTPUT)/tcp_inq: LDLIBS += -lpthread
$(OUTPUT)/bind_bhash: LDLIBS += -lpthread
+$(OUTPUT)/io_uring_zerocopy_tx: CFLAGS += -I../../../include/
# Rules to generate bpf obj nat6to4.o
CLANG ?= clang
diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c
index 24b21b15ed3f..8d7575389f58 100644
--- a/tools/testing/selftests/net/cmsg_sender.c
+++ b/tools/testing/selftests/net/cmsg_sender.c
@@ -45,11 +45,13 @@ struct options {
const char *host;
const char *service;
unsigned int size;
+ unsigned int num_pkt;
struct {
unsigned int mark;
unsigned int dontfrag;
unsigned int tclass;
unsigned int hlimit;
+ unsigned int priority;
} sockopt;
struct {
unsigned int family;
@@ -72,6 +74,7 @@ struct options {
} v6;
} opt = {
.size = 13,
+ .num_pkt = 1,
.sock = {
.family = AF_UNSPEC,
.type = SOCK_DGRAM,
@@ -112,7 +115,7 @@ static void cs_parse_args(int argc, char *argv[])
{
int o;
- while ((o = getopt(argc, argv, "46sS:p:m:M:d:tf:F:c:C:l:L:H:")) != -1) {
+ while ((o = getopt(argc, argv, "46sS:p:P:m:M:n:d:tf:F:c:C:l:L:H:")) != -1) {
switch (o) {
case 's':
opt.silent_send = true;
@@ -138,7 +141,9 @@ static void cs_parse_args(int argc, char *argv[])
cs_usage(argv[0]);
}
break;
-
+ case 'P':
+ opt.sockopt.priority = atoi(optarg);
+ break;
case 'm':
opt.mark.ena = true;
opt.mark.val = atoi(optarg);
@@ -146,6 +151,9 @@ static void cs_parse_args(int argc, char *argv[])
case 'M':
opt.sockopt.mark = atoi(optarg);
break;
+ case 'n':
+ opt.num_pkt = atoi(optarg);
+ break;
case 'd':
opt.txtime.ena = true;
opt.txtime.delay = atoi(optarg);
@@ -410,6 +418,10 @@ static void ca_set_sockopts(int fd)
setsockopt(fd, SOL_IPV6, IPV6_UNICAST_HOPS,
&opt.sockopt.hlimit, sizeof(opt.sockopt.hlimit)))
error(ERN_SOCKOPT, errno, "setsockopt IPV6_HOPLIMIT");
+ if (opt.sockopt.priority &&
+ setsockopt(fd, SOL_SOCKET, SO_PRIORITY,
+ &opt.sockopt.priority, sizeof(opt.sockopt.priority)))
+ error(ERN_SOCKOPT, errno, "setsockopt SO_PRIORITY");
}
int main(int argc, char *argv[])
@@ -421,6 +433,7 @@ int main(int argc, char *argv[])
char *buf;
int err;
int fd;
+ int i;
cs_parse_args(argc, argv);
@@ -480,24 +493,27 @@ int main(int argc, char *argv[])
cs_write_cmsg(fd, &msg, cbuf, sizeof(cbuf));
- err = sendmsg(fd, &msg, 0);
- if (err < 0) {
- if (!opt.silent_send)
- fprintf(stderr, "send failed: %s\n", strerror(errno));
- err = ERN_SEND;
- goto err_out;
- } else if (err != (int)opt.size) {
- fprintf(stderr, "short send\n");
- err = ERN_SEND_SHORT;
- goto err_out;
- } else {
- err = ERN_SUCCESS;
+ for (i = 0; i < opt.num_pkt; i++) {
+ err = sendmsg(fd, &msg, 0);
+ if (err < 0) {
+ if (!opt.silent_send)
+ fprintf(stderr, "send failed: %s\n", strerror(errno));
+ err = ERN_SEND;
+ goto err_out;
+ } else if (err != (int)opt.size) {
+ fprintf(stderr, "short send\n");
+ err = ERN_SEND_SHORT;
+ goto err_out;
+ }
}
+ err = ERN_SUCCESS;
- /* Make sure all timestamps have time to loop back */
- usleep(opt.txtime.delay);
+ if (opt.ts.ena) {
+ /* Make sure all timestamps have time to loop back */
+ usleep(opt.txtime.delay);
- cs_read_cmsg(fd, &msg, cbuf, sizeof(cbuf));
+ cs_read_cmsg(fd, &msg, cbuf, sizeof(cbuf));
+ }
err_out:
close(fd);
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index e7d2a530618a..66d0db7a2614 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -2437,6 +2437,9 @@ ipv4_mpath_list_test()
run_cmd "ip -n ns2 route add 203.0.113.0/24
nexthop via 172.16.201.2 nexthop via 172.16.202.2"
run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.fib_multipath_hash_policy=1"
+ run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.conf.veth2.rp_filter=0"
+ run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0"
+ run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.conf.default.rp_filter=0"
set +e
local dmac=$(ip -n ns2 -j link show dev veth2 | jq -r '.[]["address"]')
@@ -2449,7 +2452,7 @@ ipv4_mpath_list_test()
# words, the FIB lookup tracepoint needs to be triggered for every
# packet.
local t0_rx_pkts=$(link_stats_get ns2 veth2 rx packets)
- run_cmd "perf stat -e fib:fib_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd"
+ run_cmd "perf stat -a -e fib:fib_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd"
local t1_rx_pkts=$(link_stats_get ns2 veth2 rx packets)
local diff=$(echo $t1_rx_pkts - $t0_rx_pkts | bc -l)
list_rcv_eval $tmp_file $diff
@@ -2494,7 +2497,7 @@ ipv6_mpath_list_test()
# words, the FIB lookup tracepoint needs to be triggered for every
# packet.
local t0_rx_pkts=$(link_stats_get ns2 veth2 rx packets)
- run_cmd "perf stat -e fib6:fib6_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd"
+ run_cmd "perf stat -a -e fib6:fib6_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd"
local t1_rx_pkts=$(link_stats_get ns2 veth2 rx packets)
local diff=$(echo $t1_rx_pkts - $t0_rx_pkts | bc -l)
list_rcv_eval $tmp_file $diff
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
index d0c6c499d5da..e4e3e9405056 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
@@ -145,14 +145,14 @@ cfg_test_host_common()
# Check basic add, replace and delete behavior.
bridge mdb add dev br0 port br0 grp $grp $state vid 10
- bridge mdb show dev br0 vid 10 | grep -q "$grp"
+ bridge mdb get dev br0 grp $grp vid 10 &> /dev/null
check_err $? "Failed to add $name host entry"
bridge mdb replace dev br0 port br0 grp $grp $state vid 10 &> /dev/null
check_fail $? "Managed to replace $name host entry"
bridge mdb del dev br0 port br0 grp $grp $state vid 10
- bridge mdb show dev br0 vid 10 | grep -q "$grp"
+ bridge mdb get dev br0 grp $grp vid 10 &> /dev/null
check_fail $? "Failed to delete $name host entry"
# Check error cases.
@@ -200,7 +200,7 @@ cfg_test_port_common()
# Check basic add, replace and delete behavior.
bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10
- bridge mdb show dev br0 vid 10 | grep -q "$grp_key"
+ bridge mdb get dev br0 $grp_key vid 10 &> /dev/null
check_err $? "Failed to add $name entry"
bridge mdb replace dev br0 port $swp1 $grp_key permanent vid 10 \
@@ -208,31 +208,31 @@ cfg_test_port_common()
check_err $? "Failed to replace $name entry"
bridge mdb del dev br0 port $swp1 $grp_key permanent vid 10
- bridge mdb show dev br0 vid 10 | grep -q "$grp_key"
+ bridge mdb get dev br0 $grp_key vid 10 &> /dev/null
check_fail $? "Failed to delete $name entry"
# Check default protocol and replacement.
bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10
- bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | grep -q "static"
+ bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "static"
check_err $? "$name entry not added with default \"static\" protocol"
bridge mdb replace dev br0 port $swp1 $grp_key permanent vid 10 \
proto 123
- bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | grep -q "123"
+ bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "123"
check_err $? "Failed to replace protocol of $name entry"
bridge mdb del dev br0 port $swp1 $grp_key permanent vid 10
# Check behavior when VLAN is not specified.
bridge mdb add dev br0 port $swp1 $grp_key permanent
- bridge mdb show dev br0 vid 10 | grep -q "$grp_key"
+ bridge mdb get dev br0 $grp_key vid 10 &> /dev/null
check_err $? "$name entry with VLAN 10 not added when VLAN was not specified"
- bridge mdb show dev br0 vid 20 | grep -q "$grp_key"
+ bridge mdb get dev br0 $grp_key vid 20 &> /dev/null
check_err $? "$name entry with VLAN 20 not added when VLAN was not specified"
bridge mdb del dev br0 port $swp1 $grp_key permanent
- bridge mdb show dev br0 vid 10 | grep -q "$grp_key"
+ bridge mdb get dev br0 $grp_key vid 10 &> /dev/null
check_fail $? "$name entry with VLAN 10 not deleted when VLAN was not specified"
- bridge mdb show dev br0 vid 20 | grep -q "$grp_key"
+ bridge mdb get dev br0 $grp_key vid 20 &> /dev/null
check_fail $? "$name entry with VLAN 20 not deleted when VLAN was not specified"
# Check behavior when bridge port is down.
@@ -298,21 +298,21 @@ __cfg_test_port_ip_star_g()
RET=0
bridge mdb add dev br0 port $swp1 grp $grp vid 10
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "exclude"
+ bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "exclude"
check_err $? "Default filter mode is not \"exclude\""
bridge mdb del dev br0 port $swp1 grp $grp vid 10
# Check basic add and delete behavior.
bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode exclude \
source_list $src1
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q -v "src"
+ bridge -d mdb get dev br0 grp $grp vid 10 &> /dev/null
check_err $? "(*, G) entry not created"
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src1"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 &> /dev/null
check_err $? "(S, G) entry not created"
bridge mdb del dev br0 port $swp1 grp $grp vid 10
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q -v "src"
+ bridge -d mdb get dev br0 grp $grp vid 10 &> /dev/null
check_fail $? "(*, G) entry not deleted"
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src1"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 &> /dev/null
check_fail $? "(S, G) entry not deleted"
## State (permanent / temp) tests.
@@ -321,18 +321,15 @@ __cfg_test_port_ip_star_g()
bridge mdb add dev br0 port $swp1 grp $grp permanent vid 10 \
filter_mode exclude source_list $src1
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
- grep -q "permanent"
+ bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "permanent"
check_err $? "(*, G) entry not added as \"permanent\" when should"
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | \
grep -q "permanent"
check_err $? "(S, G) entry not added as \"permanent\" when should"
- bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
- grep -q " 0.00"
+ bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q " 0.00"
check_err $? "(*, G) \"permanent\" entry has a pending group timer"
- bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
- grep -q "\/0.00"
+ bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "\/0.00"
check_err $? "\"permanent\" source entry has a pending source timer"
bridge mdb del dev br0 port $swp1 grp $grp vid 10
@@ -342,18 +339,14 @@ __cfg_test_port_ip_star_g()
bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \
filter_mode exclude source_list $src1
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
- grep -q "temp"
+ bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "temp"
check_err $? "(*, G) EXCLUDE entry not added as \"temp\" when should"
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
- grep -q "temp"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "temp"
check_err $? "(S, G) \"blocked\" entry not added as \"temp\" when should"
- bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
- grep -q " 0.00"
+ bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q " 0.00"
check_fail $? "(*, G) EXCLUDE entry does not have a pending group timer"
- bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
- grep -q "\/0.00"
+ bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "\/0.00"
check_err $? "\"blocked\" source entry has a pending source timer"
bridge mdb del dev br0 port $swp1 grp $grp vid 10
@@ -363,18 +356,14 @@ __cfg_test_port_ip_star_g()
bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \
filter_mode include source_list $src1
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
- grep -q "temp"
+ bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "temp"
check_err $? "(*, G) INCLUDE entry not added as \"temp\" when should"
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
- grep -q "temp"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "temp"
check_err $? "(S, G) entry not added as \"temp\" when should"
- bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
- grep -q " 0.00"
+ bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q " 0.00"
check_err $? "(*, G) INCLUDE entry has a pending group timer"
- bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
- grep -q "\/0.00"
+ bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "\/0.00"
check_fail $? "Source entry does not have a pending source timer"
bridge mdb del dev br0 port $swp1 grp $grp vid 10
@@ -383,8 +372,7 @@ __cfg_test_port_ip_star_g()
bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \
filter_mode include source_list $src1
- bridge -d -s mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
- grep -q " 0.00"
+ bridge -d -s mdb get dev br0 grp $grp src $src1 vid 10 | grep -q " 0.00"
check_err $? "(S, G) entry has a pending group timer"
bridge mdb del dev br0 port $swp1 grp $grp vid 10
@@ -396,11 +384,9 @@ __cfg_test_port_ip_star_g()
bridge mdb add dev br0 port $swp1 grp $grp vid 10 \
filter_mode include source_list $src1
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
- grep -q "include"
+ bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "include"
check_err $? "(*, G) INCLUDE not added with \"include\" filter mode"
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
- grep -q "blocked"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "blocked"
check_fail $? "(S, G) entry marked as \"blocked\" when should not"
bridge mdb del dev br0 port $swp1 grp $grp vid 10
@@ -410,11 +396,9 @@ __cfg_test_port_ip_star_g()
bridge mdb add dev br0 port $swp1 grp $grp vid 10 \
filter_mode exclude source_list $src1
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
- grep -q "exclude"
+ bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "exclude"
check_err $? "(*, G) EXCLUDE not added with \"exclude\" filter mode"
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
- grep -q "blocked"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "blocked"
check_err $? "(S, G) entry not marked as \"blocked\" when should"
bridge mdb del dev br0 port $swp1 grp $grp vid 10
@@ -426,11 +410,9 @@ __cfg_test_port_ip_star_g()
bridge mdb add dev br0 port $swp1 grp $grp vid 10 \
filter_mode exclude source_list $src1 proto zebra
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
- grep -q "zebra"
+ bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "zebra"
check_err $? "(*, G) entry not added with \"zebra\" protocol"
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
- grep -q "zebra"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "zebra"
check_err $? "(S, G) entry not marked added with \"zebra\" protocol"
bridge mdb del dev br0 port $swp1 grp $grp vid 10
@@ -443,20 +425,16 @@ __cfg_test_port_ip_star_g()
bridge mdb replace dev br0 port $swp1 grp $grp permanent vid 10 \
filter_mode exclude source_list $src1
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
- grep -q "permanent"
+ bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "permanent"
check_err $? "(*, G) entry not marked as \"permanent\" after replace"
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
- grep -q "permanent"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "permanent"
check_err $? "(S, G) entry not marked as \"permanent\" after replace"
bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \
filter_mode exclude source_list $src1
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
- grep -q "temp"
+ bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "temp"
check_err $? "(*, G) entry not marked as \"temp\" after replace"
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
- grep -q "temp"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "temp"
check_err $? "(S, G) entry not marked as \"temp\" after replace"
bridge mdb del dev br0 port $swp1 grp $grp vid 10
@@ -467,20 +445,16 @@ __cfg_test_port_ip_star_g()
bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \
filter_mode include source_list $src1
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
- grep -q "include"
+ bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "include"
check_err $? "(*, G) not marked with \"include\" filter mode after replace"
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
- grep -q "blocked"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "blocked"
check_fail $? "(S, G) marked as \"blocked\" after replace"
bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \
filter_mode exclude source_list $src1
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
- grep -q "exclude"
+ bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "exclude"
check_err $? "(*, G) not marked with \"exclude\" filter mode after replace"
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
- grep -q "blocked"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "blocked"
check_err $? "(S, G) not marked as \"blocked\" after replace"
bridge mdb del dev br0 port $swp1 grp $grp vid 10
@@ -491,20 +465,20 @@ __cfg_test_port_ip_star_g()
bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \
filter_mode exclude source_list $src1,$src2,$src3
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src1"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 &> /dev/null
check_err $? "(S, G) entry for source $src1 not created after replace"
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src2"
+ bridge -d mdb get dev br0 grp $grp src $src2 vid 10 &> /dev/null
check_err $? "(S, G) entry for source $src2 not created after replace"
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src3"
+ bridge -d mdb get dev br0 grp $grp src $src3 vid 10 &> /dev/null
check_err $? "(S, G) entry for source $src3 not created after replace"
bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \
filter_mode exclude source_list $src1,$src3
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src1"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 &> /dev/null
check_err $? "(S, G) entry for source $src1 not created after second replace"
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src2"
+ bridge -d mdb get dev br0 grp $grp src $src2 vid 10 &> /dev/null
check_fail $? "(S, G) entry for source $src2 created after second replace"
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -q "src $src3"
+ bridge -d mdb get dev br0 grp $grp src $src3 vid 10 &> /dev/null
check_err $? "(S, G) entry for source $src3 not created after second replace"
bridge mdb del dev br0 port $swp1 grp $grp vid 10
@@ -515,11 +489,9 @@ __cfg_test_port_ip_star_g()
bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \
filter_mode exclude source_list $src1 proto bgp
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep -v "src" | \
- grep -q "bgp"
+ bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "bgp"
check_err $? "(*, G) protocol not changed to \"bgp\" after replace"
- bridge -d mdb show dev br0 vid 10 | grep "$grp" | grep "src" | \
- grep -q "bgp"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "bgp"
check_err $? "(S, G) protocol not changed to \"bgp\" after replace"
bridge mdb del dev br0 port $swp1 grp $grp vid 10
@@ -532,8 +504,8 @@ __cfg_test_port_ip_star_g()
bridge mdb add dev br0 port $swp2 grp $grp vid 10 \
filter_mode include source_list $src1
bridge mdb add dev br0 port $swp1 grp $grp vid 10
- bridge -d mdb show dev br0 vid 10 | grep "$swp1" | grep "$grp" | \
- grep "$src1" | grep -q "added_by_star_ex"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep "$swp1" | \
+ grep -q "added_by_star_ex"
check_err $? "\"added_by_star_ex\" entry not created after adding (*, G) entry"
bridge mdb del dev br0 port $swp1 grp $grp vid 10
bridge mdb del dev br0 port $swp2 grp $grp src $src1 vid 10
@@ -606,27 +578,23 @@ __cfg_test_port_ip_sg()
RET=0
bridge mdb add dev br0 port $swp1 $grp_key vid 10
- bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | grep -q "include"
+ bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "include"
check_err $? "Default filter mode is not \"include\""
bridge mdb del dev br0 port $swp1 $grp_key vid 10
# Check that entries can be added as both permanent and temp and that
# group timer is set correctly.
bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10
- bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | \
- grep -q "permanent"
+ bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "permanent"
check_err $? "Entry not added as \"permanent\" when should"
- bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \
- grep -q " 0.00"
+ bridge -d -s mdb get dev br0 $grp_key vid 10 | grep -q " 0.00"
check_err $? "\"permanent\" entry has a pending group timer"
bridge mdb del dev br0 port $swp1 $grp_key vid 10
bridge mdb add dev br0 port $swp1 $grp_key temp vid 10
- bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | \
- grep -q "temp"
+ bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "temp"
check_err $? "Entry not added as \"temp\" when should"
- bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \
- grep -q " 0.00"
+ bridge -d -s mdb get dev br0 $grp_key vid 10 | grep -q " 0.00"
check_fail $? "\"temp\" entry has an unpending group timer"
bridge mdb del dev br0 port $swp1 $grp_key vid 10
@@ -650,24 +618,19 @@ __cfg_test_port_ip_sg()
# Check that we can replace available attributes.
bridge mdb add dev br0 port $swp1 $grp_key vid 10 proto 123
bridge mdb replace dev br0 port $swp1 $grp_key vid 10 proto 111
- bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | \
- grep -q "111"
+ bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "111"
check_err $? "Failed to replace protocol"
bridge mdb replace dev br0 port $swp1 $grp_key vid 10 permanent
- bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | \
- grep -q "permanent"
+ bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "permanent"
check_err $? "Entry not marked as \"permanent\" after replace"
- bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \
- grep -q " 0.00"
+ bridge -d -s mdb get dev br0 $grp_key vid 10 | grep -q " 0.00"
check_err $? "Entry has a pending group timer after replace"
bridge mdb replace dev br0 port $swp1 $grp_key vid 10 temp
- bridge -d mdb show dev br0 vid 10 | grep "$grp_key" | \
- grep -q "temp"
+ bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "temp"
check_err $? "Entry not marked as \"temp\" after replace"
- bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \
- grep -q " 0.00"
+ bridge -d -s mdb get dev br0 $grp_key vid 10 | grep -q " 0.00"
check_fail $? "Entry has an unpending group timer after replace"
bridge mdb del dev br0 port $swp1 $grp_key vid 10
@@ -675,7 +638,7 @@ __cfg_test_port_ip_sg()
# (*, G) ports need to be added to it.
bridge mdb add dev br0 port $swp2 grp $grp vid 10
bridge mdb add dev br0 port $swp1 $grp_key vid 10
- bridge mdb show dev br0 vid 10 | grep "$grp_key" | grep $swp2 | \
+ bridge mdb get dev br0 $grp_key vid 10 | grep $swp2 | \
grep -q "added_by_star_ex"
check_err $? "\"added_by_star_ex\" entry not created after adding (S, G) entry"
bridge mdb del dev br0 port $swp1 $grp_key vid 10
@@ -1132,7 +1095,7 @@ ctrl_igmpv3_is_in_test()
$MZ $h1.10 -c 1 -a own -b 01:00:5e:01:01:01 -A 192.0.2.1 -B 239.1.1.1 \
-t ip proto=2,p=$(igmpv3_is_in_get 239.1.1.1 192.0.2.2) -q
- bridge -d mdb show dev br0 vid 10 | grep 239.1.1.1 | grep -q 192.0.2.2
+ bridge mdb get dev br0 grp 239.1.1.1 src 192.0.2.2 vid 10 &> /dev/null
check_fail $? "Permanent entry affected by IGMP packet"
# Replace the permanent entry with a temporary one and check that after
@@ -1145,12 +1108,10 @@ ctrl_igmpv3_is_in_test()
$MZ $h1.10 -a own -b 01:00:5e:01:01:01 -c 1 -A 192.0.2.1 -B 239.1.1.1 \
-t ip proto=2,p=$(igmpv3_is_in_get 239.1.1.1 192.0.2.2) -q
- bridge -d mdb show dev br0 vid 10 | grep 239.1.1.1 | grep -v "src" | \
- grep -q 192.0.2.2
+ bridge -d mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q 192.0.2.2
check_err $? "Source not add to source list"
- bridge -d mdb show dev br0 vid 10 | grep 239.1.1.1 | \
- grep -q "src 192.0.2.2"
+ bridge mdb get dev br0 grp 239.1.1.1 src 192.0.2.2 vid 10 &> /dev/null
check_err $? "(S, G) entry not created for new source"
bridge mdb del dev br0 port $swp1 grp 239.1.1.1 vid 10
@@ -1172,8 +1133,7 @@ ctrl_mldv2_is_in_test()
$MZ -6 $h1.10 -a own -b 33:33:00:00:00:01 -c 1 -A fe80::1 -B ff0e::1 \
-t ip hop=1,next=0,p="$p" -q
- bridge -d mdb show dev br0 vid 10 | grep ff0e::1 | \
- grep -q 2001:db8:1::2
+ bridge mdb get dev br0 grp ff0e::1 src 2001:db8:1::2 vid 10 &> /dev/null
check_fail $? "Permanent entry affected by MLD packet"
# Replace the permanent entry with a temporary one and check that after
@@ -1186,12 +1146,10 @@ ctrl_mldv2_is_in_test()
$MZ -6 $h1.10 -a own -b 33:33:00:00:00:01 -c 1 -A fe80::1 -B ff0e::1 \
-t ip hop=1,next=0,p="$p" -q
- bridge -d mdb show dev br0 vid 10 | grep ff0e::1 | grep -v "src" | \
- grep -q 2001:db8:1::2
+ bridge -d mdb get dev br0 grp ff0e::1 vid 10 | grep -q 2001:db8:1::2
check_err $? "Source not add to source list"
- bridge -d mdb show dev br0 vid 10 | grep ff0e::1 | \
- grep -q "src 2001:db8:1::2"
+ bridge mdb get dev br0 grp ff0e::1 src 2001:db8:1::2 vid 10 &> /dev/null
check_err $? "(S, G) entry not created for new source"
bridge mdb del dev br0 port $swp1 grp ff0e::1 vid 10
@@ -1208,8 +1166,8 @@ ctrl_test()
ctrl_mldv2_is_in_test
}
-if ! bridge mdb help 2>&1 | grep -q "replace"; then
- echo "SKIP: iproute2 too old, missing bridge mdb replace support"
+if ! bridge mdb help 2>&1 | grep -q "get"; then
+ echo "SKIP: iproute2 too old, missing bridge mdb get support"
exit $ksft_skip
fi
diff --git a/tools/testing/selftests/net/fq_band_pktlimit.sh b/tools/testing/selftests/net/fq_band_pktlimit.sh
new file mode 100755
index 000000000000..24b77bdf41ff
--- /dev/null
+++ b/tools/testing/selftests/net/fq_band_pktlimit.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Verify that FQ has a packet limit per band:
+#
+# 1. set the limit to 10 per band
+# 2. send 20 pkts on band A: verify that 10 are queued, 10 dropped
+# 3. send 20 pkts on band A: verify that 0 are queued, 20 dropped
+# 4. send 20 pkts on band B: verify that 10 are queued, 10 dropped
+#
+# Send packets with a 100ms delay to ensure that previously sent
+# packets are still queued when later ones are sent.
+# Use SO_TXTIME for this.
+
+die() {
+ echo "$1"
+ exit 1
+}
+
+# run inside private netns
+if [[ $# -eq 0 ]]; then
+ ./in_netns.sh "$0" __subprocess
+ exit
+fi
+
+ip link add type dummy
+ip link set dev dummy0 up
+ip -6 addr add fdaa::1/128 dev dummy0
+ip -6 route add fdaa::/64 dev dummy0
+tc qdisc replace dev dummy0 root handle 1: fq quantum 1514 initial_quantum 1514 limit 10
+
+./cmsg_sender -6 -p u -d 100000 -n 20 fdaa::2 8000
+OUT1="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')"
+
+./cmsg_sender -6 -p u -d 100000 -n 20 fdaa::2 8000
+OUT2="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')"
+
+./cmsg_sender -6 -p u -d 100000 -n 20 -P 7 fdaa::2 8000
+OUT3="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')"
+
+# Initial stats will report zero sent, as all packets are still
+# queued in FQ. Sleep for the delay period (100ms) and see that
+# twenty are now sent.
+sleep 0.1
+OUT4="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')"
+
+# Log the output after the test
+echo "${OUT1}"
+echo "${OUT2}"
+echo "${OUT3}"
+echo "${OUT4}"
+
+# Test the output for expected values
+echo "${OUT1}" | grep -q '0\ pkt\ (dropped\ 10' || die "unexpected drop count at 1"
+echo "${OUT2}" | grep -q '0\ pkt\ (dropped\ 30' || die "unexpected drop count at 2"
+echo "${OUT3}" | grep -q '0\ pkt\ (dropped\ 40' || die "unexpected drop count at 3"
+echo "${OUT4}" | grep -q '20\ pkt\ (dropped\ 40' || die "unexpected accept count at 4"
diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.c b/tools/testing/selftests/net/io_uring_zerocopy_tx.c
index 154287740172..76e604e4810e 100644
--- a/tools/testing/selftests/net/io_uring_zerocopy_tx.c
+++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.c
@@ -36,6 +36,8 @@
#include <sys/un.h>
#include <sys/wait.h>
+#include <io_uring/mini_liburing.h>
+
#define NOTIF_TAG 0xfffffffULL
#define NONZC_TAG 0
#define ZC_TAG 1
@@ -60,272 +62,6 @@ static struct sockaddr_storage cfg_dst_addr;
static char payload[IP_MAXPACKET] __attribute__((aligned(4096)));
-struct io_sq_ring {
- unsigned *head;
- unsigned *tail;
- unsigned *ring_mask;
- unsigned *ring_entries;
- unsigned *flags;
- unsigned *array;
-};
-
-struct io_cq_ring {
- unsigned *head;
- unsigned *tail;
- unsigned *ring_mask;
- unsigned *ring_entries;
- struct io_uring_cqe *cqes;
-};
-
-struct io_uring_sq {
- unsigned *khead;
- unsigned *ktail;
- unsigned *kring_mask;
- unsigned *kring_entries;
- unsigned *kflags;
- unsigned *kdropped;
- unsigned *array;
- struct io_uring_sqe *sqes;
-
- unsigned sqe_head;
- unsigned sqe_tail;
-
- size_t ring_sz;
-};
-
-struct io_uring_cq {
- unsigned *khead;
- unsigned *ktail;
- unsigned *kring_mask;
- unsigned *kring_entries;
- unsigned *koverflow;
- struct io_uring_cqe *cqes;
-
- size_t ring_sz;
-};
-
-struct io_uring {
- struct io_uring_sq sq;
- struct io_uring_cq cq;
- int ring_fd;
-};
-
-#ifdef __alpha__
-# ifndef __NR_io_uring_setup
-# define __NR_io_uring_setup 535
-# endif
-# ifndef __NR_io_uring_enter
-# define __NR_io_uring_enter 536
-# endif
-# ifndef __NR_io_uring_register
-# define __NR_io_uring_register 537
-# endif
-#else /* !__alpha__ */
-# ifndef __NR_io_uring_setup
-# define __NR_io_uring_setup 425
-# endif
-# ifndef __NR_io_uring_enter
-# define __NR_io_uring_enter 426
-# endif
-# ifndef __NR_io_uring_register
-# define __NR_io_uring_register 427
-# endif
-#endif
-
-#if defined(__x86_64) || defined(__i386__)
-#define read_barrier() __asm__ __volatile__("":::"memory")
-#define write_barrier() __asm__ __volatile__("":::"memory")
-#else
-
-#define read_barrier() __sync_synchronize()
-#define write_barrier() __sync_synchronize()
-#endif
-
-static int io_uring_setup(unsigned int entries, struct io_uring_params *p)
-{
- return syscall(__NR_io_uring_setup, entries, p);
-}
-
-static int io_uring_enter(int fd, unsigned int to_submit,
- unsigned int min_complete,
- unsigned int flags, sigset_t *sig)
-{
- return syscall(__NR_io_uring_enter, fd, to_submit, min_complete,
- flags, sig, _NSIG / 8);
-}
-
-static int io_uring_register_buffers(struct io_uring *ring,
- const struct iovec *iovecs,
- unsigned nr_iovecs)
-{
- int ret;
-
- ret = syscall(__NR_io_uring_register, ring->ring_fd,
- IORING_REGISTER_BUFFERS, iovecs, nr_iovecs);
- return (ret < 0) ? -errno : ret;
-}
-
-static int io_uring_mmap(int fd, struct io_uring_params *p,
- struct io_uring_sq *sq, struct io_uring_cq *cq)
-{
- size_t size;
- void *ptr;
- int ret;
-
- sq->ring_sz = p->sq_off.array + p->sq_entries * sizeof(unsigned);
- ptr = mmap(0, sq->ring_sz, PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQ_RING);
- if (ptr == MAP_FAILED)
- return -errno;
- sq->khead = ptr + p->sq_off.head;
- sq->ktail = ptr + p->sq_off.tail;
- sq->kring_mask = ptr + p->sq_off.ring_mask;
- sq->kring_entries = ptr + p->sq_off.ring_entries;
- sq->kflags = ptr + p->sq_off.flags;
- sq->kdropped = ptr + p->sq_off.dropped;
- sq->array = ptr + p->sq_off.array;
-
- size = p->sq_entries * sizeof(struct io_uring_sqe);
- sq->sqes = mmap(0, size, PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQES);
- if (sq->sqes == MAP_FAILED) {
- ret = -errno;
-err:
- munmap(sq->khead, sq->ring_sz);
- return ret;
- }
-
- cq->ring_sz = p->cq_off.cqes + p->cq_entries * sizeof(struct io_uring_cqe);
- ptr = mmap(0, cq->ring_sz, PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_CQ_RING);
- if (ptr == MAP_FAILED) {
- ret = -errno;
- munmap(sq->sqes, p->sq_entries * sizeof(struct io_uring_sqe));
- goto err;
- }
- cq->khead = ptr + p->cq_off.head;
- cq->ktail = ptr + p->cq_off.tail;
- cq->kring_mask = ptr + p->cq_off.ring_mask;
- cq->kring_entries = ptr + p->cq_off.ring_entries;
- cq->koverflow = ptr + p->cq_off.overflow;
- cq->cqes = ptr + p->cq_off.cqes;
- return 0;
-}
-
-static int io_uring_queue_init(unsigned entries, struct io_uring *ring,
- unsigned flags)
-{
- struct io_uring_params p;
- int fd, ret;
-
- memset(ring, 0, sizeof(*ring));
- memset(&p, 0, sizeof(p));
- p.flags = flags;
-
- fd = io_uring_setup(entries, &p);
- if (fd < 0)
- return fd;
- ret = io_uring_mmap(fd, &p, &ring->sq, &ring->cq);
- if (!ret)
- ring->ring_fd = fd;
- else
- close(fd);
- return ret;
-}
-
-static int io_uring_submit(struct io_uring *ring)
-{
- struct io_uring_sq *sq = &ring->sq;
- const unsigned mask = *sq->kring_mask;
- unsigned ktail, submitted, to_submit;
- int ret;
-
- read_barrier();
- if (*sq->khead != *sq->ktail) {
- submitted = *sq->kring_entries;
- goto submit;
- }
- if (sq->sqe_head == sq->sqe_tail)
- return 0;
-
- ktail = *sq->ktail;
- to_submit = sq->sqe_tail - sq->sqe_head;
- for (submitted = 0; submitted < to_submit; submitted++) {
- read_barrier();
- sq->array[ktail++ & mask] = sq->sqe_head++ & mask;
- }
- if (!submitted)
- return 0;
-
- if (*sq->ktail != ktail) {
- write_barrier();
- *sq->ktail = ktail;
- write_barrier();
- }
-submit:
- ret = io_uring_enter(ring->ring_fd, submitted, 0,
- IORING_ENTER_GETEVENTS, NULL);
- return ret < 0 ? -errno : ret;
-}
-
-static inline void io_uring_prep_send(struct io_uring_sqe *sqe, int sockfd,
- const void *buf, size_t len, int flags)
-{
- memset(sqe, 0, sizeof(*sqe));
- sqe->opcode = (__u8) IORING_OP_SEND;
- sqe->fd = sockfd;
- sqe->addr = (unsigned long) buf;
- sqe->len = len;
- sqe->msg_flags = (__u32) flags;
-}
-
-static inline void io_uring_prep_sendzc(struct io_uring_sqe *sqe, int sockfd,
- const void *buf, size_t len, int flags,
- unsigned zc_flags)
-{
- io_uring_prep_send(sqe, sockfd, buf, len, flags);
- sqe->opcode = (__u8) IORING_OP_SEND_ZC;
- sqe->ioprio = zc_flags;
-}
-
-static struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring)
-{
- struct io_uring_sq *sq = &ring->sq;
-
- if (sq->sqe_tail + 1 - sq->sqe_head > *sq->kring_entries)
- return NULL;
- return &sq->sqes[sq->sqe_tail++ & *sq->kring_mask];
-}
-
-static int io_uring_wait_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr)
-{
- struct io_uring_cq *cq = &ring->cq;
- const unsigned mask = *cq->kring_mask;
- unsigned head = *cq->khead;
- int ret;
-
- *cqe_ptr = NULL;
- do {
- read_barrier();
- if (head != *cq->ktail) {
- *cqe_ptr = &cq->cqes[head & mask];
- break;
- }
- ret = io_uring_enter(ring->ring_fd, 0, 1,
- IORING_ENTER_GETEVENTS, NULL);
- if (ret < 0)
- return -errno;
- } while (1);
-
- return 0;
-}
-
-static inline void io_uring_cqe_seen(struct io_uring *ring)
-{
- *(&ring->cq)->khead += 1;
- write_barrier();
-}
-
static unsigned long gettimeofday_ms(void)
{
struct timeval tv;
diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
index 85a8ee9395b3..95b498efacd1 100755
--- a/tools/testing/selftests/net/mptcp/diag.sh
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -182,23 +182,6 @@ chk_msk_inuse()
__chk_nr get_msk_inuse $expected "$msg" 0
}
-# $1: ns, $2: port
-wait_local_port_listen()
-{
- local listener_ns="${1}"
- local port="${2}"
-
- local port_hex i
-
- port_hex="$(printf "%04X" "${port}")"
- for i in $(seq 10); do
- ip netns exec "${listener_ns}" cat /proc/net/tcp | \
- awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" &&
- break
- sleep 0.1
- done
-}
-
wait_connected()
{
local listener_ns="${1}"
@@ -222,7 +205,7 @@ echo "a" | \
ip netns exec $ns \
./mptcp_connect -p 10000 -l -t ${timeout_poll} -w 20 \
0.0.0.0 >/dev/null &
-wait_local_port_listen $ns 10000
+mptcp_lib_wait_local_port_listen $ns 10000
chk_msk_nr 0 "no msk on netns creation"
chk_msk_listen 10000
@@ -245,7 +228,7 @@ echo "a" | \
ip netns exec $ns \
./mptcp_connect -p 10001 -l -s TCP -t ${timeout_poll} -w 20 \
0.0.0.0 >/dev/null &
-wait_local_port_listen $ns 10001
+mptcp_lib_wait_local_port_listen $ns 10001
echo "b" | \
timeout ${timeout_test} \
ip netns exec $ns \
@@ -266,7 +249,7 @@ for I in `seq 1 $NR_CLIENTS`; do
./mptcp_connect -p $((I+10001)) -l -w 20 \
-t ${timeout_poll} 0.0.0.0 >/dev/null &
done
-wait_local_port_listen $ns $((NR_CLIENTS + 10001))
+mptcp_lib_wait_local_port_listen $ns $((NR_CLIENTS + 10001))
for I in `seq 1 $NR_CLIENTS`; do
echo "b" | \
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 61a2a1988ce6..7898d62fce0b 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -254,31 +254,6 @@ else
set_ethtool_flags "$ns4" ns4eth3 "$ethtool_args"
fi
-print_file_err()
-{
- ls -l "$1" 1>&2
- echo "Trailing bytes are: "
- tail -c 27 "$1"
-}
-
-check_transfer()
-{
- local in=$1
- local out=$2
- local what=$3
-
- cmp "$in" "$out" > /dev/null 2>&1
- if [ $? -ne 0 ] ;then
- echo "[ FAIL ] $what does not match (in, out):"
- print_file_err "$in"
- print_file_err "$out"
-
- return 1
- fi
-
- return 0
-}
-
check_mptcp_disabled()
{
local disabled_ns="ns_disabled-$rndh"
@@ -310,12 +285,6 @@ check_mptcp_disabled()
return 0
}
-# $1: IP address
-is_v6()
-{
- [ -z "${1##*:*}" ]
-}
-
do_ping()
{
local listener_ns="$1"
@@ -324,7 +293,7 @@ do_ping()
local ping_args="-q -c 1"
local rc=0
- if is_v6 "${connect_addr}"; then
+ if mptcp_lib_is_v6 "${connect_addr}"; then
$ipv6 || return 0
ping_args="${ping_args} -6"
fi
@@ -341,38 +310,6 @@ do_ping()
return 0
}
-# $1: ns, $2: MIB counter
-get_mib_counter()
-{
- local listener_ns="${1}"
- local mib="${2}"
-
- # strip the header
- ip netns exec "${listener_ns}" \
- nstat -z -a "${mib}" | \
- tail -n+2 | \
- while read a count c rest; do
- echo $count
- done
-}
-
-# $1: ns, $2: port
-wait_local_port_listen()
-{
- local listener_ns="${1}"
- local port="${2}"
-
- local port_hex i
-
- port_hex="$(printf "%04X" "${port}")"
- for i in $(seq 10); do
- ip netns exec "${listener_ns}" cat /proc/net/tcp* | \
- awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" &&
- break
- sleep 0.1
- done
-}
-
do_transfer()
{
local listener_ns="$1"
@@ -441,12 +378,12 @@ do_transfer()
nstat -n
fi
- local stat_synrx_last_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
- local stat_ackrx_last_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
- local stat_cookietx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent")
- local stat_cookierx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv")
- local stat_csum_err_s=$(get_mib_counter "${listener_ns}" "MPTcpExtDataCsumErr")
- local stat_csum_err_c=$(get_mib_counter "${connector_ns}" "MPTcpExtDataCsumErr")
+ local stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
+ local stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
+ local stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
+ local stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
+ local stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
+ local stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
timeout ${timeout_test} \
ip netns exec ${listener_ns} \
@@ -454,7 +391,7 @@ do_transfer()
$extra_args $local_addr < "$sin" > "$sout" &
local spid=$!
- wait_local_port_listen "${listener_ns}" "${port}"
+ mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}"
local start
start=$(date +%s%3N)
@@ -504,16 +441,16 @@ do_transfer()
return 1
fi
- check_transfer $sin $cout "file received by client"
+ mptcp_lib_check_transfer $sin $cout "file received by client"
retc=$?
- check_transfer $cin $sout "file received by server"
+ mptcp_lib_check_transfer $cin $sout "file received by server"
rets=$?
- local stat_synrx_now_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
- local stat_ackrx_now_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
- local stat_cookietx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent")
- local stat_cookierx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv")
- local stat_ooo_now=$(get_mib_counter "${listener_ns}" "TcpExtTCPOFOQueue")
+ local stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
+ local stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
+ local stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
+ local stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
+ local stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue")
expect_synrx=$((stat_synrx_last_l))
expect_ackrx=$((stat_ackrx_last_l))
@@ -542,8 +479,8 @@ do_transfer()
fi
if $checksum; then
- local csum_err_s=$(get_mib_counter "${listener_ns}" "MPTcpExtDataCsumErr")
- local csum_err_c=$(get_mib_counter "${connector_ns}" "MPTcpExtDataCsumErr")
+ local csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
+ local csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
local csum_err_s_nr=$((csum_err_s - stat_csum_err_s))
if [ $csum_err_s_nr -gt 0 ]; then
@@ -613,9 +550,8 @@ make_file()
ksize=$((SIZE / 1024))
rem=$((SIZE - (ksize * 1024)))
- dd if=/dev/urandom of="$name" bs=1024 count=$ksize 2> /dev/null
- dd if=/dev/urandom conv=notrunc of="$name" bs=1 count=$rem 2> /dev/null
- echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name"
+ mptcp_lib_make_file $name 1024 $ksize
+ dd if=/dev/urandom conv=notrunc of="$name" oflag=append bs=1 count=$rem 2> /dev/null
echo "Created $name (size $(du -b "$name")) containing data sent by $who"
}
@@ -635,12 +571,12 @@ run_tests_lo()
fi
# skip if we don't want v6
- if ! $ipv6 && is_v6 "${connect_addr}"; then
+ if ! $ipv6 && mptcp_lib_is_v6 "${connect_addr}"; then
return 0
fi
local local_addr
- if is_v6 "${connect_addr}"; then
+ if mptcp_lib_is_v6 "${connect_addr}"; then
local_addr="::"
else
local_addr="0.0.0.0"
@@ -708,7 +644,7 @@ run_test_transparent()
TEST_GROUP="${msg}"
# skip if we don't want v6
- if ! $ipv6 && is_v6 "${connect_addr}"; then
+ if ! $ipv6 && mptcp_lib_is_v6 "${connect_addr}"; then
return 0
fi
@@ -716,7 +652,7 @@ run_test_transparent()
# the required infrastructure in MPTCP sockopt code. To support TOS, the
# following function has been exported (T). Not great but better than
# checking for a specific kernel version.
- if ! mptcp_lib_kallsyms_has "T ip_sock_set_tos$"; then
+ if ! mptcp_lib_kallsyms_has "T __ip_sock_set_tos$"; then
echo "INFO: ${msg} not supported by the kernel: SKIP"
mptcp_lib_result_skip "${TEST_GROUP}"
return
@@ -741,7 +677,7 @@ EOF
fi
local local_addr
- if is_v6 "${connect_addr}"; then
+ if mptcp_lib_is_v6 "${connect_addr}"; then
local_addr="::"
r6flag="-6"
else
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index ee1f89a872b3..8362ea454af3 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -511,13 +511,6 @@ get_failed_tests_ids()
done | sort -n
}
-print_file_err()
-{
- ls -l "$1" 1>&2
- echo -n "Trailing bytes are: "
- tail -c 27 "$1"
-}
-
check_transfer()
{
local in=$1
@@ -548,8 +541,8 @@ check_transfer()
local sum=$((0${a} + 0${b}))
if [ $check_invert -eq 0 ] || [ $sum -ne $((0xff)) ]; then
fail_test "$what does not match (in, out):"
- print_file_err "$in"
- print_file_err "$out"
+ mptcp_lib_print_file_err "$in"
+ mptcp_lib_print_file_err "$out"
return 1
else
@@ -587,49 +580,9 @@ link_failure()
done
}
-# $1: IP address
-is_v6()
-{
- [ -z "${1##*:*}" ]
-}
-
-# $1: ns, $2: port
-wait_local_port_listen()
-{
- local listener_ns="${1}"
- local port="${2}"
-
- local port_hex
- port_hex="$(printf "%04X" "${port}")"
-
- local i
- for i in $(seq 10); do
- ip netns exec "${listener_ns}" cat /proc/net/tcp* | \
- awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" &&
- break
- sleep 0.1
- done
-}
-
-# $1: ns ; $2: counter
-get_counter()
-{
- local ns="${1}"
- local counter="${2}"
- local count
-
- count=$(ip netns exec ${ns} nstat -asz "${counter}" | awk 'NR==1 {next} {print $2}')
- if [ -z "${count}" ]; then
- mptcp_lib_fail_if_expected_feature "${counter} counter"
- return 1
- fi
-
- echo "${count}"
-}
-
rm_addr_count()
{
- get_counter "${1}" "MPTcpExtRmAddr"
+ mptcp_lib_get_counter "${1}" "MPTcpExtRmAddr"
}
# $1: ns, $2: old rm_addr counter in $ns
@@ -649,7 +602,7 @@ wait_rm_addr()
rm_sf_count()
{
- get_counter "${1}" "MPTcpExtRmSubflow"
+ mptcp_lib_get_counter "${1}" "MPTcpExtRmSubflow"
}
# $1: ns, $2: old rm_sf counter in $ns
@@ -672,26 +625,20 @@ wait_mpj()
local ns="${1}"
local cnt old_cnt
- old_cnt=$(get_counter ${ns} "MPTcpExtMPJoinAckRx")
+ old_cnt=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPJoinAckRx")
local i
for i in $(seq 10); do
- cnt=$(get_counter ${ns} "MPTcpExtMPJoinAckRx")
+ cnt=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPJoinAckRx")
[ "$cnt" = "${old_cnt}" ] || break
sleep 0.1
done
}
-kill_wait()
-{
- kill $1 > /dev/null 2>&1
- wait $1 2>/dev/null
-}
-
kill_events_pids()
{
- kill_wait $evts_ns1_pid
- kill_wait $evts_ns2_pid
+ mptcp_lib_kill_wait $evts_ns1_pid
+ mptcp_lib_kill_wait $evts_ns2_pid
}
kill_tests_wait()
@@ -901,7 +848,7 @@ pm_nl_set_endpoint()
local id=10
while [ $add_nr_ns1 -gt 0 ]; do
local addr
- if is_v6 "${connect_addr}"; then
+ if mptcp_lib_is_v6 "${connect_addr}"; then
addr="dead:beef:$counter::1"
else
addr="10.0.$counter.1"
@@ -953,7 +900,7 @@ pm_nl_set_endpoint()
local id=20
while [ $add_nr_ns2 -gt 0 ]; do
local addr
- if is_v6 "${connect_addr}"; then
+ if mptcp_lib_is_v6 "${connect_addr}"; then
addr="dead:beef:$counter::2"
else
addr="10.0.$counter.2"
@@ -995,7 +942,7 @@ pm_nl_set_endpoint()
pm_nl_flush_endpoint ${connector_ns}
elif [ $rm_nr_ns2 -eq 9 ]; then
local addr
- if is_v6 "${connect_addr}"; then
+ if mptcp_lib_is_v6 "${connect_addr}"; then
addr="dead:beef:1::2"
else
addr="10.0.1.2"
@@ -1117,7 +1064,7 @@ do_transfer()
fi
local spid=$!
- wait_local_port_listen "${listener_ns}" "${port}"
+ mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}"
extra_cl_args="$extra_args $extra_cl_args"
if [ "$test_linkfail" -eq 0 ];then
@@ -1199,8 +1146,7 @@ make_file()
local who=$2
local size=$3
- dd if=/dev/urandom of="$name" bs=1024 count=$size 2> /dev/null
- echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name"
+ mptcp_lib_make_file $name 1024 $size
print_info "Test file (size $size KB) for $who"
}
@@ -1284,7 +1230,7 @@ chk_csum_nr()
fi
print_check "sum"
- count=$(get_counter ${ns1} "MPTcpExtDataCsumErr")
+ count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtDataCsumErr")
if [ "$count" != "$csum_ns1" ]; then
extra_msg="$extra_msg ns1=$count"
fi
@@ -1297,7 +1243,7 @@ chk_csum_nr()
print_ok
fi
print_check "csum"
- count=$(get_counter ${ns2} "MPTcpExtDataCsumErr")
+ count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtDataCsumErr")
if [ "$count" != "$csum_ns2" ]; then
extra_msg="$extra_msg ns2=$count"
fi
@@ -1341,7 +1287,7 @@ chk_fail_nr()
fi
print_check "ftx"
- count=$(get_counter ${ns_tx} "MPTcpExtMPFailTx")
+ count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFailTx")
if [ "$count" != "$fail_tx" ]; then
extra_msg="$extra_msg,tx=$count"
fi
@@ -1355,7 +1301,7 @@ chk_fail_nr()
fi
print_check "failrx"
- count=$(get_counter ${ns_rx} "MPTcpExtMPFailRx")
+ count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFailRx")
if [ "$count" != "$fail_rx" ]; then
extra_msg="$extra_msg,rx=$count"
fi
@@ -1388,7 +1334,7 @@ chk_fclose_nr()
fi
print_check "ctx"
- count=$(get_counter ${ns_tx} "MPTcpExtMPFastcloseTx")
+ count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFastcloseTx")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$fclose_tx" ]; then
@@ -1399,7 +1345,7 @@ chk_fclose_nr()
fi
print_check "fclzrx"
- count=$(get_counter ${ns_rx} "MPTcpExtMPFastcloseRx")
+ count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFastcloseRx")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$fclose_rx" ]; then
@@ -1429,20 +1375,24 @@ chk_rst_nr()
fi
print_check "rtx"
- count=$(get_counter ${ns_tx} "MPTcpExtMPRstTx")
+ count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPRstTx")
if [ -z "$count" ]; then
print_skip
- elif [ $count -lt $rst_tx ]; then
+ # accept more rst than expected except if we don't expect any
+ elif { [ $rst_tx -ne 0 ] && [ $count -lt $rst_tx ]; } ||
+ { [ $rst_tx -eq 0 ] && [ $count -ne 0 ]; }; then
fail_test "got $count MP_RST[s] TX expected $rst_tx"
else
print_ok
fi
print_check "rstrx"
- count=$(get_counter ${ns_rx} "MPTcpExtMPRstRx")
+ count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPRstRx")
if [ -z "$count" ]; then
print_skip
- elif [ "$count" -lt "$rst_rx" ]; then
+ # accept more rst than expected except if we don't expect any
+ elif { [ $rst_rx -ne 0 ] && [ $count -lt $rst_rx ]; } ||
+ { [ $rst_rx -eq 0 ] && [ $count -ne 0 ]; }; then
fail_test "got $count MP_RST[s] RX expected $rst_rx"
else
print_ok
@@ -1458,7 +1408,7 @@ chk_infi_nr()
local count
print_check "itx"
- count=$(get_counter ${ns2} "MPTcpExtInfiniteMapTx")
+ count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtInfiniteMapTx")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$infi_tx" ]; then
@@ -1468,7 +1418,7 @@ chk_infi_nr()
fi
print_check "infirx"
- count=$(get_counter ${ns1} "MPTcpExtInfiniteMapRx")
+ count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtInfiniteMapRx")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$infi_rx" ]; then
@@ -1497,7 +1447,7 @@ chk_join_nr()
fi
print_check "syn"
- count=$(get_counter ${ns1} "MPTcpExtMPJoinSynRx")
+ count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinSynRx")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$syn_nr" ]; then
@@ -1508,7 +1458,7 @@ chk_join_nr()
print_check "synack"
with_cookie=$(ip netns exec $ns2 sysctl -n net.ipv4.tcp_syncookies)
- count=$(get_counter ${ns2} "MPTcpExtMPJoinSynAckRx")
+ count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckRx")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$syn_ack_nr" ]; then
@@ -1525,7 +1475,7 @@ chk_join_nr()
fi
print_check "ack"
- count=$(get_counter ${ns1} "MPTcpExtMPJoinAckRx")
+ count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckRx")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$ack_nr" ]; then
@@ -1558,8 +1508,8 @@ chk_stale_nr()
print_check "stale"
- stale_nr=$(get_counter ${ns} "MPTcpExtSubflowStale")
- recover_nr=$(get_counter ${ns} "MPTcpExtSubflowRecover")
+ stale_nr=$(mptcp_lib_get_counter ${ns} "MPTcpExtSubflowStale")
+ recover_nr=$(mptcp_lib_get_counter ${ns} "MPTcpExtSubflowRecover")
if [ -z "$stale_nr" ] || [ -z "$recover_nr" ]; then
print_skip
elif [ $stale_nr -lt $stale_min ] ||
@@ -1596,7 +1546,7 @@ chk_add_nr()
timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout)
print_check "add"
- count=$(get_counter ${ns2} "MPTcpExtAddAddr")
+ count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtAddAddr")
if [ -z "$count" ]; then
print_skip
# if the test configured a short timeout tolerate greater then expected
@@ -1608,7 +1558,7 @@ chk_add_nr()
fi
print_check "echo"
- count=$(get_counter ${ns1} "MPTcpExtEchoAdd")
+ count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtEchoAdd")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$echo_nr" ]; then
@@ -1619,7 +1569,7 @@ chk_add_nr()
if [ $port_nr -gt 0 ]; then
print_check "pt"
- count=$(get_counter ${ns2} "MPTcpExtPortAdd")
+ count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtPortAdd")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$port_nr" ]; then
@@ -1629,7 +1579,7 @@ chk_add_nr()
fi
print_check "syn"
- count=$(get_counter ${ns1} "MPTcpExtMPJoinPortSynRx")
+ count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortSynRx")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$syn_nr" ]; then
@@ -1640,7 +1590,7 @@ chk_add_nr()
fi
print_check "synack"
- count=$(get_counter ${ns2} "MPTcpExtMPJoinPortSynAckRx")
+ count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinPortSynAckRx")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$syn_ack_nr" ]; then
@@ -1651,7 +1601,7 @@ chk_add_nr()
fi
print_check "ack"
- count=$(get_counter ${ns1} "MPTcpExtMPJoinPortAckRx")
+ count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortAckRx")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$ack_nr" ]; then
@@ -1662,7 +1612,7 @@ chk_add_nr()
fi
print_check "syn"
- count=$(get_counter ${ns1} "MPTcpExtMismatchPortSynRx")
+ count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortSynRx")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$mis_syn_nr" ]; then
@@ -1673,7 +1623,7 @@ chk_add_nr()
fi
print_check "ack"
- count=$(get_counter ${ns1} "MPTcpExtMismatchPortAckRx")
+ count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortAckRx")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$mis_ack_nr" ]; then
@@ -1695,7 +1645,7 @@ chk_add_tx_nr()
timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout)
print_check "add TX"
- count=$(get_counter ${ns1} "MPTcpExtAddAddrTx")
+ count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtAddAddrTx")
if [ -z "$count" ]; then
print_skip
# if the test configured a short timeout tolerate greater then expected
@@ -1707,7 +1657,7 @@ chk_add_tx_nr()
fi
print_check "echo TX"
- count=$(get_counter ${ns2} "MPTcpExtEchoAddTx")
+ count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtEchoAddTx")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$echo_tx_nr" ]; then
@@ -1745,7 +1695,7 @@ chk_rm_nr()
fi
print_check "rm"
- count=$(get_counter ${addr_ns} "MPTcpExtRmAddr")
+ count=$(mptcp_lib_get_counter ${addr_ns} "MPTcpExtRmAddr")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$rm_addr_nr" ]; then
@@ -1755,18 +1705,21 @@ chk_rm_nr()
fi
print_check "rmsf"
- count=$(get_counter ${subflow_ns} "MPTcpExtRmSubflow")
+ count=$(mptcp_lib_get_counter ${subflow_ns} "MPTcpExtRmSubflow")
if [ -z "$count" ]; then
print_skip
elif [ -n "$simult" ]; then
local cnt suffix
- cnt=$(get_counter ${addr_ns} "MPTcpExtRmSubflow")
+ cnt=$(mptcp_lib_get_counter ${addr_ns} "MPTcpExtRmSubflow")
# in case of simult flush, the subflow removal count on each side is
# unreliable
count=$((count + cnt))
- [ "$count" != "$rm_subflow_nr" ] && suffix="$count in [$rm_subflow_nr:$((rm_subflow_nr*2))]"
+ if [ "$count" != "$rm_subflow_nr" ]; then
+ suffix="$count in [$rm_subflow_nr:$((rm_subflow_nr*2))]"
+ extra_msg="$extra_msg simult"
+ fi
if [ $count -ge "$rm_subflow_nr" ] && \
[ "$count" -le "$((rm_subflow_nr *2 ))" ]; then
print_ok "$suffix"
@@ -1787,7 +1740,7 @@ chk_rm_tx_nr()
local rm_addr_tx_nr=$1
print_check "rm TX"
- count=$(get_counter ${ns2} "MPTcpExtRmAddrTx")
+ count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtRmAddrTx")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$rm_addr_tx_nr" ]; then
@@ -1804,7 +1757,7 @@ chk_prio_nr()
local count
print_check "ptx"
- count=$(get_counter ${ns1} "MPTcpExtMPPrioTx")
+ count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPPrioTx")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$mp_prio_nr_tx" ]; then
@@ -1814,7 +1767,7 @@ chk_prio_nr()
fi
print_check "prx"
- count=$(get_counter ${ns1} "MPTcpExtMPPrioRx")
+ count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPPrioRx")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$mp_prio_nr_rx" ]; then
@@ -1860,12 +1813,10 @@ chk_mptcp_info()
local cnt2
local dump_stats
- print_check "mptcp_info ${info1:0:8}=$exp1:$exp2"
+ print_check "mptcp_info ${info1:0:15}=$exp1:$exp2"
- cnt1=$(ss -N $ns1 -inmHM | grep "$info1:" |
- sed -n 's/.*\('"$info1"':\)\([[:digit:]]*\).*$/\2/p;q')
- cnt2=$(ss -N $ns2 -inmHM | grep "$info2:" |
- sed -n 's/.*\('"$info2"':\)\([[:digit:]]*\).*$/\2/p;q')
+ cnt1=$(ss -N $ns1 -inmHM | mptcp_lib_get_info_value "$info1" "$info1")
+ cnt2=$(ss -N $ns2 -inmHM | mptcp_lib_get_info_value "$info2" "$info2")
# 'ss' only display active connections and counters that are not 0.
[ -z "$cnt1" ] && cnt1=0
[ -z "$cnt2" ] && cnt2=0
@@ -1883,6 +1834,42 @@ chk_mptcp_info()
fi
}
+# $1: subflows in ns1 ; $2: subflows in ns2
+# number of all subflows, including the initial subflow.
+chk_subflows_total()
+{
+ local cnt1
+ local cnt2
+ local info="subflows_total"
+ local dump_stats
+
+ # if subflows_total counter is supported, use it:
+ if [ -n "$(ss -N $ns1 -inmHM | mptcp_lib_get_info_value $info $info)" ]; then
+ chk_mptcp_info $info $1 $info $2
+ return
+ fi
+
+ print_check "$info $1:$2"
+
+ # if not, count the TCP connections that are in fact MPTCP subflows
+ cnt1=$(ss -N $ns1 -ti state established state syn-sent state syn-recv |
+ grep -c tcp-ulp-mptcp)
+ cnt2=$(ss -N $ns2 -ti state established state syn-sent state syn-recv |
+ grep -c tcp-ulp-mptcp)
+
+ if [ "$1" != "$cnt1" ] || [ "$2" != "$cnt2" ]; then
+ fail_test "got subflows $cnt1:$cnt2 expected $1:$2"
+ dump_stats=1
+ else
+ print_ok
+ fi
+
+ if [ "$dump_stats" = 1 ]; then
+ ss -N $ns1 -ti
+ ss -N $ns2 -ti
+ fi
+}
+
chk_link_usage()
{
local ns=$1
@@ -1914,7 +1901,7 @@ wait_attempt_fail()
while [ $time -lt $timeout_ms ]; do
local cnt
- cnt=$(get_counter ${ns} "TcpAttemptFails")
+ cnt=$(mptcp_lib_get_counter ${ns} "TcpAttemptFails")
[ "$cnt" = 1 ] && return 1
time=$((time + 100))
@@ -2305,6 +2292,7 @@ remove_tests()
chk_join_nr 1 1 1
chk_rm_tx_nr 1
chk_rm_nr 1 1
+ chk_rst_nr 0 0
fi
# multiple subflows, remove
@@ -2317,6 +2305,7 @@ remove_tests()
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 2 2 2
chk_rm_nr 2 2
+ chk_rst_nr 0 0
fi
# single address, remove
@@ -2329,6 +2318,7 @@ remove_tests()
chk_join_nr 1 1 1
chk_add_nr 1 1
chk_rm_nr 1 1 invert
+ chk_rst_nr 0 0
fi
# subflow and signal, remove
@@ -2342,6 +2332,7 @@ remove_tests()
chk_join_nr 2 2 2
chk_add_nr 1 1
chk_rm_nr 1 1
+ chk_rst_nr 0 0
fi
# subflows and signal, remove
@@ -2356,6 +2347,7 @@ remove_tests()
chk_join_nr 3 3 3
chk_add_nr 1 1
chk_rm_nr 2 2
+ chk_rst_nr 0 0
fi
# addresses remove
@@ -2370,6 +2362,7 @@ remove_tests()
chk_join_nr 3 3 3
chk_add_nr 3 3
chk_rm_nr 3 3 invert
+ chk_rst_nr 0 0
fi
# invalid addresses remove
@@ -2384,6 +2377,7 @@ remove_tests()
chk_join_nr 1 1 1
chk_add_nr 3 3
chk_rm_nr 3 1 invert
+ chk_rst_nr 0 0
fi
# subflows and signal, flush
@@ -2398,6 +2392,7 @@ remove_tests()
chk_join_nr 3 3 3
chk_add_nr 1 1
chk_rm_nr 1 3 invert simult
+ chk_rst_nr 0 0
fi
# subflows flush
@@ -2417,6 +2412,7 @@ remove_tests()
else
chk_rm_nr 3 3
fi
+ chk_rst_nr 0 0
fi
# addresses flush
@@ -2431,6 +2427,7 @@ remove_tests()
chk_join_nr 3 3 3
chk_add_nr 3 3
chk_rm_nr 3 3 invert simult
+ chk_rst_nr 0 0
fi
# invalid addresses flush
@@ -2445,6 +2442,7 @@ remove_tests()
chk_join_nr 1 1 1
chk_add_nr 3 3
chk_rm_nr 3 1 invert
+ chk_rst_nr 0 0
fi
# remove id 0 subflow
@@ -2456,6 +2454,7 @@ remove_tests()
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1
chk_rm_nr 1 1
+ chk_rst_nr 0 0
fi
# remove id 0 address
@@ -2468,6 +2467,7 @@ remove_tests()
chk_join_nr 1 1 1
chk_add_nr 1 1
chk_rm_nr 1 1 invert
+ chk_rst_nr 0 0 invert
fi
}
@@ -2794,6 +2794,7 @@ backup_tests()
fi
}
+SUB_ESTABLISHED=10 # MPTCP_EVENT_SUB_ESTABLISHED
LISTENER_CREATED=15 #MPTCP_EVENT_LISTENER_CREATED
LISTENER_CLOSED=16 #MPTCP_EVENT_LISTENER_CLOSED
@@ -2828,13 +2829,13 @@ verify_listener_events()
return
fi
- type=$(grep "type:$e_type," $evt | sed -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q')
- family=$(grep "type:$e_type," $evt | sed -n 's/.*\(family:\)\([[:digit:]]*\).*$/\2/p;q')
- sport=$(grep "type:$e_type," $evt | sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q')
+ type=$(mptcp_lib_evts_get_info type "$evt" "$e_type")
+ family=$(mptcp_lib_evts_get_info family "$evt" "$e_type")
+ sport=$(mptcp_lib_evts_get_info sport "$evt" "$e_type")
if [ $family ] && [ $family = $AF_INET6 ]; then
- saddr=$(grep "type:$e_type," $evt | sed -n 's/.*\(saddr6:\)\([0-9a-f:.]*\).*$/\2/p;q')
+ saddr=$(mptcp_lib_evts_get_info saddr6 "$evt" "$e_type")
else
- saddr=$(grep "type:$e_type," $evt | sed -n 's/.*\(saddr4:\)\([0-9.]*\).*$/\2/p;q')
+ saddr=$(mptcp_lib_evts_get_info saddr4 "$evt" "$e_type")
fi
if [ $type ] && [ $type = $e_type ] &&
@@ -3220,7 +3221,7 @@ fastclose_tests()
if reset_check_counter "fastclose server test" "MPTcpExtMPFastcloseRx"; then
test_linkfail=1024 fastclose=server \
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 0 0 0
+ chk_join_nr 0 0 0 0 0 0 1
chk_fclose_nr 1 1 invert
chk_rst_nr 1 1
fi
@@ -3229,8 +3230,7 @@ fastclose_tests()
pedit_action_pkts()
{
tc -n $ns2 -j -s action show action pedit index 100 | \
- grep "packets" | \
- sed 's/.*"packets":\([0-9]\+\),.*/\1/'
+ mptcp_lib_get_info_value \"packets\" packets
}
fail_tests()
@@ -3255,69 +3255,70 @@ fail_tests()
fi
}
+# $1: ns ; $2: addr ; $3: id
userspace_pm_add_addr()
{
- local addr=$1
- local id=$2
+ local evts=$evts_ns1
local tk
- tk=$(grep "type:1," "$evts_ns1" |
- sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q')
- ip netns exec $ns1 ./pm_nl_ctl ann $addr token $tk id $id
+ [ "$1" == "$ns2" ] && evts=$evts_ns2
+ tk=$(mptcp_lib_evts_get_info token "$evts")
+
+ ip netns exec $1 ./pm_nl_ctl ann $2 token $tk id $3
sleep 1
}
-userspace_pm_rm_sf_addr_ns1()
+# $1: ns ; $2: id
+userspace_pm_rm_addr()
{
- local addr=$1
- local id=$2
- local tk sp da dp
-
- tk=$(grep "type:1," "$evts_ns1" |
- sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q')
- sp=$(grep "type:10" "$evts_ns1" |
- sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q')
- da=$(grep "type:10" "$evts_ns1" |
- sed -n 's/.*\(daddr6:\)\([0-9a-f:.]*\).*$/\2/p;q')
- dp=$(grep "type:10" "$evts_ns1" |
- sed -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q')
- ip netns exec $ns1 ./pm_nl_ctl rem token $tk id $id
- ip netns exec $ns1 ./pm_nl_ctl dsf lip "::ffff:$addr" \
- lport $sp rip $da rport $dp token $tk
- wait_rm_addr $ns1 1
- wait_rm_sf $ns1 1
+ local evts=$evts_ns1
+ local tk
+ local cnt
+
+ [ "$1" == "$ns2" ] && evts=$evts_ns2
+ tk=$(mptcp_lib_evts_get_info token "$evts")
+
+ cnt=$(rm_addr_count ${1})
+ ip netns exec $1 ./pm_nl_ctl rem token $tk id $2
+ wait_rm_addr $1 "${cnt}"
}
+# $1: ns ; $2: addr ; $3: id
userspace_pm_add_sf()
{
- local addr=$1
- local id=$2
+ local evts=$evts_ns1
local tk da dp
- tk=$(sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2")
- da=$(sed -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evts_ns2")
- dp=$(sed -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2")
- ip netns exec $ns2 ./pm_nl_ctl csf lip $addr lid $id \
+ [ "$1" == "$ns2" ] && evts=$evts_ns2
+ tk=$(mptcp_lib_evts_get_info token "$evts")
+ da=$(mptcp_lib_evts_get_info daddr4 "$evts")
+ dp=$(mptcp_lib_evts_get_info dport "$evts")
+
+ ip netns exec $1 ./pm_nl_ctl csf lip $2 lid $3 \
rip $da rport $dp token $tk
sleep 1
}
-userspace_pm_rm_sf_addr_ns2()
+# $1: ns ; $2: addr $3: event type
+userspace_pm_rm_sf()
{
- local addr=$1
- local id=$2
+ local evts=$evts_ns1
+ local t=${3:-1}
+ local ip=4
local tk da dp sp
+ local cnt
- tk=$(sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2")
- da=$(sed -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evts_ns2")
- dp=$(sed -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2")
- sp=$(grep "type:10" "$evts_ns2" |
- sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q')
- ip netns exec $ns2 ./pm_nl_ctl rem token $tk id $id
- ip netns exec $ns2 ./pm_nl_ctl dsf lip $addr lport $sp \
+ [ "$1" == "$ns2" ] && evts=$evts_ns2
+ if mptcp_lib_is_v6 $2; then ip=6; fi
+ tk=$(mptcp_lib_evts_get_info token "$evts")
+ da=$(mptcp_lib_evts_get_info "daddr$ip" "$evts" $t)
+ dp=$(mptcp_lib_evts_get_info dport "$evts" $t)
+ sp=$(mptcp_lib_evts_get_info sport "$evts" $t)
+
+ cnt=$(rm_sf_count ${1})
+ ip netns exec $1 ./pm_nl_ctl dsf lip $2 lport $sp \
rip $da rport $dp token $tk
- wait_rm_addr $ns2 1
- wait_rm_sf $ns2 1
+ wait_rm_sf $1 "${cnt}"
}
userspace_tests()
@@ -3400,18 +3401,21 @@ userspace_tests()
continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
set_userspace_pm $ns1
pm_nl_set_limits $ns2 1 1
- speed=10 \
+ speed=5 \
run_tests $ns1 $ns2 10.0.1.1 &
local tests_pid=$!
wait_mpj $ns1
- userspace_pm_add_addr 10.0.2.1 10
+ userspace_pm_add_addr $ns1 10.0.2.1 10
chk_join_nr 1 1 1
chk_add_nr 1 1
chk_mptcp_info subflows 1 subflows 1
+ chk_subflows_total 2 2
chk_mptcp_info add_addr_signal 1 add_addr_accepted 1
- userspace_pm_rm_sf_addr_ns1 10.0.2.1 10
+ userspace_pm_rm_addr $ns1 10
+ userspace_pm_rm_sf $ns1 "::ffff:10.0.2.1" $SUB_ESTABLISHED
chk_rm_nr 1 1 invert
chk_mptcp_info subflows 0 subflows 0
+ chk_subflows_total 1 1
kill_events_pids
wait $tests_pid
fi
@@ -3421,16 +3425,88 @@ userspace_tests()
continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
set_userspace_pm $ns2
pm_nl_set_limits $ns1 0 1
- speed=10 \
+ speed=5 \
run_tests $ns1 $ns2 10.0.1.1 &
local tests_pid=$!
wait_mpj $ns2
- userspace_pm_add_sf 10.0.3.2 20
+ userspace_pm_add_sf $ns2 10.0.3.2 20
chk_join_nr 1 1 1
chk_mptcp_info subflows 1 subflows 1
- userspace_pm_rm_sf_addr_ns2 10.0.3.2 20
+ chk_subflows_total 2 2
+ userspace_pm_rm_addr $ns2 20
+ userspace_pm_rm_sf $ns2 10.0.3.2 $SUB_ESTABLISHED
chk_rm_nr 1 1
chk_mptcp_info subflows 0 subflows 0
+ chk_subflows_total 1 1
+ kill_events_pids
+ wait $tests_pid
+ fi
+
+ # userspace pm create id 0 subflow
+ if reset_with_events "userspace pm create id 0 subflow" &&
+ continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
+ set_userspace_pm $ns2
+ pm_nl_set_limits $ns1 0 1
+ speed=5 \
+ run_tests $ns1 $ns2 10.0.1.1 &
+ local tests_pid=$!
+ wait_mpj $ns2
+ chk_mptcp_info subflows 0 subflows 0
+ chk_subflows_total 1 1
+ userspace_pm_add_sf $ns2 10.0.3.2 0
+ chk_join_nr 1 1 1
+ chk_mptcp_info subflows 1 subflows 1
+ chk_subflows_total 2 2
+ kill_events_pids
+ wait $tests_pid
+ fi
+
+ # userspace pm remove initial subflow
+ if reset_with_events "userspace pm remove initial subflow" &&
+ continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
+ set_userspace_pm $ns2
+ pm_nl_set_limits $ns1 0 1
+ speed=5 \
+ run_tests $ns1 $ns2 10.0.1.1 &
+ local tests_pid=$!
+ wait_mpj $ns2
+ userspace_pm_add_sf $ns2 10.0.3.2 20
+ chk_join_nr 1 1 1
+ chk_mptcp_info subflows 1 subflows 1
+ chk_subflows_total 2 2
+ userspace_pm_rm_sf $ns2 10.0.1.2
+ # we don't look at the counter linked to the RM_ADDR but
+ # to the one linked to the subflows that have been removed
+ chk_rm_nr 0 1
+ chk_rst_nr 0 0 invert
+ chk_mptcp_info subflows 1 subflows 1
+ chk_subflows_total 1 1
+ kill_events_pids
+ wait $tests_pid
+ fi
+
+ # userspace pm send RM_ADDR for ID 0
+ if reset_with_events "userspace pm send RM_ADDR for ID 0" &&
+ continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
+ set_userspace_pm $ns1
+ pm_nl_set_limits $ns2 1 1
+ speed=5 \
+ run_tests $ns1 $ns2 10.0.1.1 &
+ local tests_pid=$!
+ wait_mpj $ns1
+ userspace_pm_add_addr $ns1 10.0.2.1 10
+ chk_join_nr 1 1 1
+ chk_add_nr 1 1
+ chk_mptcp_info subflows 1 subflows 1
+ chk_subflows_total 2 2
+ chk_mptcp_info add_addr_signal 1 add_addr_accepted 1
+ userspace_pm_rm_addr $ns1 0
+ # we don't look at the counter linked to the subflows that
+ # have been removed but to the one linked to the RM_ADDR
+ chk_rm_nr 1 0 invert
+ chk_rst_nr 0 0 invert
+ chk_mptcp_info subflows 1 subflows 1
+ chk_subflows_total 1 1
kill_events_pids
wait $tests_pid
fi
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
index 92a5befe8039..022262a2cfe0 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -207,3 +207,94 @@ mptcp_lib_result_print_all_tap() {
printf "%s\n" "${subtest}"
done
}
+
+# get the value of keyword $1 in the line marked by keyword $2
+mptcp_lib_get_info_value() {
+ grep "${2}" | sed -n 's/.*\('"${1}"':\)\([0-9a-f:.]*\).*$/\2/p;q'
+}
+
+# $1: info name ; $2: evts_ns ; $3: event type
+mptcp_lib_evts_get_info() {
+ mptcp_lib_get_info_value "${1}" "^type:${3:-1}," < "${2}"
+}
+
+# $1: PID
+mptcp_lib_kill_wait() {
+ [ "${1}" -eq 0 ] && return 0
+
+ kill -SIGUSR1 "${1}" > /dev/null 2>&1
+ kill "${1}" > /dev/null 2>&1
+ wait "${1}" 2>/dev/null
+}
+
+# $1: IP address
+mptcp_lib_is_v6() {
+ [ -z "${1##*:*}" ]
+}
+
+# $1: ns, $2: MIB counter
+mptcp_lib_get_counter() {
+ local ns="${1}"
+ local counter="${2}"
+ local count
+
+ count=$(ip netns exec "${ns}" nstat -asz "${counter}" |
+ awk 'NR==1 {next} {print $2}')
+ if [ -z "${count}" ]; then
+ mptcp_lib_fail_if_expected_feature "${counter} counter"
+ return 1
+ fi
+
+ echo "${count}"
+}
+
+mptcp_lib_make_file() {
+ local name="${1}"
+ local bs="${2}"
+ local size="${3}"
+
+ dd if=/dev/urandom of="${name}" bs="${bs}" count="${size}" 2> /dev/null
+ echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "${name}"
+}
+
+# $1: file
+mptcp_lib_print_file_err() {
+ ls -l "${1}" 1>&2
+ echo "Trailing bytes are: "
+ tail -c 27 "${1}"
+}
+
+# $1: input file ; $2: output file ; $3: what kind of file
+mptcp_lib_check_transfer() {
+ local in="${1}"
+ local out="${2}"
+ local what="${3}"
+
+ if ! cmp "$in" "$out" > /dev/null 2>&1; then
+ echo "[ FAIL ] $what does not match (in, out):"
+ mptcp_lib_print_file_err "$in"
+ mptcp_lib_print_file_err "$out"
+
+ return 1
+ fi
+
+ return 0
+}
+
+# $1: ns, $2: port
+mptcp_lib_wait_local_port_listen() {
+ local listener_ns="${1}"
+ local port="${2}"
+
+ local port_hex
+ port_hex="$(printf "%04X" "${port}")"
+
+ local _
+ for _ in $(seq 10); do
+ ip netns exec "${listener_ns}" cat /proc/net/tcp* | \
+ awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) \
+ {rc=0; exit}} END {exit rc}" &&
+ break
+ sleep 0.1
+ done
+}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
index 8c8694f21e7d..c643872ddf47 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
@@ -11,7 +11,6 @@ cout=""
ksft_skip=4
timeout_poll=30
timeout_test=$((timeout_poll * 2 + 1))
-mptcp_connect=""
iptables="iptables"
ip6tables="ip6tables"
@@ -136,38 +135,6 @@ check_mark()
return 0
}
-print_file_err()
-{
- ls -l "$1" 1>&2
- echo "Trailing bytes are: "
- tail -c 27 "$1"
-}
-
-check_transfer()
-{
- local in=$1
- local out=$2
- local what=$3
-
- cmp "$in" "$out" > /dev/null 2>&1
- if [ $? -ne 0 ] ;then
- echo "[ FAIL ] $what does not match (in, out):"
- print_file_err "$in"
- print_file_err "$out"
- ret=1
-
- return 1
- fi
-
- return 0
-}
-
-# $1: IP address
-is_v6()
-{
- [ -z "${1##*:*}" ]
-}
-
do_transfer()
{
local listener_ns="$1"
@@ -184,7 +151,7 @@ do_transfer()
local mptcp_connect="./mptcp_connect -r 20"
local local_addr ip
- if is_v6 "${connect_addr}"; then
+ if mptcp_lib_is_v6 "${connect_addr}"; then
local_addr="::"
ip=ipv6
else
@@ -239,7 +206,7 @@ do_transfer()
check_mark $connector_ns 4 || retc=1
fi
- check_transfer $cin $sout "file received by server"
+ mptcp_lib_check_transfer $cin $sout "file received by server"
rets=$?
mptcp_lib_result_code "${retc}" "mark ${ip}"
@@ -258,8 +225,7 @@ make_file()
local who=$2
local size=$3
- dd if=/dev/urandom of="$name" bs=1024 count=$size 2> /dev/null
- echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name"
+ mptcp_lib_make_file $name 1024 $size
echo "Created $name (size $size KB) containing data sent by $who"
}
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index ce9203b817f8..ae8ad5d6fb9d 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -123,23 +123,6 @@ setup()
grep -q ' kmemleak_init$\| lockdep_init$\| kasan_init$\| prove_locking$' /proc/kallsyms && slack=$((slack+550))
}
-# $1: ns, $2: port
-wait_local_port_listen()
-{
- local listener_ns="${1}"
- local port="${2}"
-
- local port_hex i
-
- port_hex="$(printf "%04X" "${port}")"
- for i in $(seq 10); do
- ip netns exec "${listener_ns}" cat /proc/net/tcp* | \
- awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" &&
- break
- sleep 0.1
- done
-}
-
do_transfer()
{
local cin=$1
@@ -179,7 +162,7 @@ do_transfer()
0.0.0.0 < "$sin" > "$sout" &
local spid=$!
- wait_local_port_listen "${ns3}" "${port}"
+ mptcp_lib_wait_local_port_listen "${ns3}" "${port}"
timeout ${timeout_test} \
ip netns exec ${ns1} \
diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
index b25a3e33eb25..6167837f48e1 100755
--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -108,15 +108,6 @@ test_fail()
mptcp_lib_result_fail "${test_name}"
}
-kill_wait()
-{
- [ $1 -eq 0 ] && return 0
-
- kill -SIGUSR1 $1 > /dev/null 2>&1
- kill $1 > /dev/null 2>&1
- wait $1 2>/dev/null
-}
-
# This function is used in the cleanup trap
#shellcheck disable=SC2317
cleanup()
@@ -128,7 +119,7 @@ cleanup()
for pid in $client4_pid $server4_pid $client6_pid $server6_pid\
$server_evts_pid $client_evts_pid
do
- kill_wait $pid
+ mptcp_lib_kill_wait $pid
done
local netns
@@ -173,22 +164,12 @@ print_title "Init"
print_test "Created network namespaces ns1, ns2"
test_pass
-make_file()
-{
- # Store a chunk of data in a file to transmit over an MPTCP connection
- local name=$1
- local ksize=1
-
- dd if=/dev/urandom of="$name" bs=2 count=$ksize 2> /dev/null
- echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name"
-}
-
make_connection()
{
if [ -z "$file" ]; then
file=$(mktemp)
fi
- make_file "$file" "client"
+ mptcp_lib_make_file "$file" 2 1
local is_v6=$1
local app_port=$app4_port
@@ -210,7 +191,7 @@ make_connection()
fi
:>"$client_evts"
if [ $client_evts_pid -ne 0 ]; then
- kill_wait $client_evts_pid
+ mptcp_lib_kill_wait $client_evts_pid
fi
ip netns exec "$ns2" ./pm_nl_ctl events >> "$client_evts" 2>&1 &
client_evts_pid=$!
@@ -219,7 +200,7 @@ make_connection()
fi
:>"$server_evts"
if [ $server_evts_pid -ne 0 ]; then
- kill_wait $server_evts_pid
+ mptcp_lib_kill_wait $server_evts_pid
fi
ip netns exec "$ns1" ./pm_nl_ctl events >> "$server_evts" 2>&1 &
server_evts_pid=$!
@@ -247,14 +228,11 @@ make_connection()
local server_token
local server_serverside
- client_token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts")
- client_port=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts")
- client_serverside=$(sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q'\
- "$client_evts")
- server_token=$(grep "type:1," "$server_evts" |
- sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q')
- server_serverside=$(grep "type:1," "$server_evts" |
- sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q')
+ client_token=$(mptcp_lib_evts_get_info token "$client_evts")
+ client_port=$(mptcp_lib_evts_get_info sport "$client_evts")
+ client_serverside=$(mptcp_lib_evts_get_info server_side "$client_evts")
+ server_token=$(mptcp_lib_evts_get_info token "$server_evts")
+ server_serverside=$(mptcp_lib_evts_get_info server_side "$server_evts")
print_test "Established IP${is_v6} MPTCP Connection ns2 => ns1"
if [ "$client_token" != "" ] && [ "$server_token" != "" ] && [ "$client_serverside" = 0 ] &&
@@ -340,16 +318,16 @@ verify_announce_event()
local dport
local id
- type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
- token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+ type=$(mptcp_lib_evts_get_info type "$evt" $e_type)
+ token=$(mptcp_lib_evts_get_info token "$evt" $e_type)
if [ "$e_af" = "v6" ]
then
- addr=$(sed --unbuffered -n 's/.*\(daddr6:\)\([0-9a-f:.]*\).*$/\2/p;q' "$evt")
+ addr=$(mptcp_lib_evts_get_info daddr6 "$evt" $e_type)
else
- addr=$(sed --unbuffered -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evt")
+ addr=$(mptcp_lib_evts_get_info daddr4 "$evt" $e_type)
fi
- dport=$(sed --unbuffered -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
- id=$(sed --unbuffered -n 's/.*\(rem_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+ dport=$(mptcp_lib_evts_get_info dport "$evt" $e_type)
+ id=$(mptcp_lib_evts_get_info rem_id "$evt" $e_type)
check_expected "type" "token" "addr" "dport" "id"
}
@@ -367,7 +345,7 @@ test_announce()
$client_addr_id dev ns2eth1 > /dev/null 2>&1
local type
- type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts")
+ type=$(mptcp_lib_evts_get_info type "$server_evts")
print_test "ADD_ADDR 10.0.2.2 (ns2) => ns1, invalid token"
if [ "$type" = "" ]
then
@@ -446,9 +424,9 @@ verify_remove_event()
local token
local id
- type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
- token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
- id=$(sed --unbuffered -n 's/.*\(rem_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+ type=$(mptcp_lib_evts_get_info type "$evt" $e_type)
+ token=$(mptcp_lib_evts_get_info token "$evt" $e_type)
+ id=$(mptcp_lib_evts_get_info rem_id "$evt" $e_type)
check_expected "type" "token" "id"
}
@@ -466,7 +444,7 @@ test_remove()
$client_addr_id > /dev/null 2>&1
print_test "RM_ADDR id:${client_addr_id} ns2 => ns1, invalid token"
local type
- type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts")
+ type=$(mptcp_lib_evts_get_info type "$server_evts")
if [ "$type" = "" ]
then
test_pass
@@ -479,7 +457,7 @@ test_remove()
ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\
$invalid_id > /dev/null 2>&1
print_test "RM_ADDR id:${invalid_id} ns2 => ns1, invalid id"
- type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts")
+ type=$(mptcp_lib_evts_get_info type "$server_evts")
if [ "$type" = "" ]
then
test_pass
@@ -583,19 +561,19 @@ verify_subflow_events()
fi
fi
- type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
- token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
- family=$(sed --unbuffered -n 's/.*\(family:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
- dport=$(sed --unbuffered -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
- locid=$(sed --unbuffered -n 's/.*\(loc_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
- remid=$(sed --unbuffered -n 's/.*\(rem_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+ type=$(mptcp_lib_evts_get_info type "$evt" $e_type)
+ token=$(mptcp_lib_evts_get_info token "$evt" $e_type)
+ family=$(mptcp_lib_evts_get_info family "$evt" $e_type)
+ dport=$(mptcp_lib_evts_get_info dport "$evt" $e_type)
+ locid=$(mptcp_lib_evts_get_info loc_id "$evt" $e_type)
+ remid=$(mptcp_lib_evts_get_info rem_id "$evt" $e_type)
if [ "$family" = "$AF_INET6" ]
then
- saddr=$(sed --unbuffered -n 's/.*\(saddr6:\)\([0-9a-f:.]*\).*$/\2/p;q' "$evt")
- daddr=$(sed --unbuffered -n 's/.*\(daddr6:\)\([0-9a-f:.]*\).*$/\2/p;q' "$evt")
+ saddr=$(mptcp_lib_evts_get_info saddr6 "$evt" $e_type)
+ daddr=$(mptcp_lib_evts_get_info daddr6 "$evt" $e_type)
else
- saddr=$(sed --unbuffered -n 's/.*\(saddr4:\)\([0-9.]*\).*$/\2/p;q' "$evt")
- daddr=$(sed --unbuffered -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evt")
+ saddr=$(mptcp_lib_evts_get_info saddr4 "$evt" $e_type)
+ daddr=$(mptcp_lib_evts_get_info daddr4 "$evt" $e_type)
fi
check_expected "type" "token" "daddr" "dport" "family" "saddr" "locid" "remid"
@@ -627,10 +605,10 @@ test_subflows()
"10.0.2.2" "$client4_port" "23" "$client_addr_id" "ns1" "ns2"
# Delete the listener from the client ns, if one was created
- kill_wait $listener_pid
+ mptcp_lib_kill_wait $listener_pid
local sport
- sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts")
+ sport=$(mptcp_lib_evts_get_info sport "$server_evts" $SUB_ESTABLISHED)
# DESTROY_SUBFLOW from server to client machine
:>"$server_evts"
@@ -666,9 +644,9 @@ test_subflows()
"$client_addr_id" "ns1" "ns2"
# Delete the listener from the client ns, if one was created
- kill_wait $listener_pid
+ mptcp_lib_kill_wait $listener_pid
- sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts")
+ sport=$(mptcp_lib_evts_get_info sport "$server_evts" $SUB_ESTABLISHED)
# DESTROY_SUBFLOW6 from server to client machine
:>"$server_evts"
@@ -705,9 +683,9 @@ test_subflows()
"$client_addr_id" "ns1" "ns2"
# Delete the listener from the client ns, if one was created
- kill_wait $listener_pid
+ mptcp_lib_kill_wait $listener_pid
- sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts")
+ sport=$(mptcp_lib_evts_get_info sport "$server_evts" $SUB_ESTABLISHED)
# DESTROY_SUBFLOW from server to client machine
:>"$server_evts"
@@ -743,9 +721,9 @@ test_subflows()
"10.0.2.1" "$app4_port" "23" "$server_addr_id" "ns2" "ns1"
# Delete the listener from the server ns, if one was created
- kill_wait $listener_pid
+ mptcp_lib_kill_wait $listener_pid
- sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts")
+ sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED)
# DESTROY_SUBFLOW from client to server machine
:>"$client_evts"
@@ -782,9 +760,9 @@ test_subflows()
"$server_addr_id" "ns2" "ns1"
# Delete the listener from the server ns, if one was created
- kill_wait $listener_pid
+ mptcp_lib_kill_wait $listener_pid
- sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts")
+ sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED)
# DESTROY_SUBFLOW6 from client to server machine
:>"$client_evts"
@@ -819,9 +797,9 @@ test_subflows()
"10.0.2.2" "10.0.2.1" "$new4_port" "23" "$server_addr_id" "ns2" "ns1"
# Delete the listener from the server ns, if one was created
- kill_wait $listener_pid
+ mptcp_lib_kill_wait $listener_pid
- sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts")
+ sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED)
# DESTROY_SUBFLOW from client to server machine
:>"$client_evts"
@@ -865,9 +843,9 @@ test_subflows_v4_v6_mix()
"$server_addr_id" "ns2" "ns1"
# Delete the listener from the server ns, if one was created
- kill_wait $listener_pid
+ mptcp_lib_kill_wait $listener_pid
- sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts")
+ sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED)
# DESTROY_SUBFLOW from client to server machine
:>"$client_evts"
@@ -896,9 +874,10 @@ test_prio()
# Check TX
print_test "MP_PRIO TX"
- count=$(ip netns exec "$ns2" nstat -as | grep MPTcpExtMPPrioTx | awk '{print $2}')
- [ -z "$count" ] && count=0
- if [ $count != 1 ]; then
+ count=$(mptcp_lib_get_counter "$ns2" "MPTcpExtMPPrioTx")
+ if [ -z "$count" ]; then
+ test_skip
+ elif [ $count != 1 ]; then
test_fail "Count != 1: ${count}"
else
test_pass
@@ -906,9 +885,10 @@ test_prio()
# Check RX
print_test "MP_PRIO RX"
- count=$(ip netns exec "$ns1" nstat -as | grep MPTcpExtMPPrioRx | awk '{print $2}')
- [ -z "$count" ] && count=0
- if [ $count != 1 ]; then
+ count=$(mptcp_lib_get_counter "$ns1" "MPTcpExtMPPrioRx")
+ if [ -z "$count" ]; then
+ test_skip
+ elif [ $count != 1 ]; then
test_fail "Count != 1: ${count}"
else
test_pass
@@ -933,18 +913,13 @@ verify_listener_events()
print_test "CLOSE_LISTENER $e_saddr:$e_sport"
fi
- type=$(grep "type:$e_type," $evt |
- sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q')
- family=$(grep "type:$e_type," $evt |
- sed --unbuffered -n 's/.*\(family:\)\([[:digit:]]*\).*$/\2/p;q')
- sport=$(grep "type:$e_type," $evt |
- sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q')
+ type=$(mptcp_lib_evts_get_info type $evt $e_type)
+ family=$(mptcp_lib_evts_get_info family $evt $e_type)
+ sport=$(mptcp_lib_evts_get_info sport $evt $e_type)
if [ $family ] && [ $family = $AF_INET6 ]; then
- saddr=$(grep "type:$e_type," $evt |
- sed --unbuffered -n 's/.*\(saddr6:\)\([0-9a-f:.]*\).*$/\2/p;q')
+ saddr=$(mptcp_lib_evts_get_info saddr6 $evt $e_type)
else
- saddr=$(grep "type:$e_type," $evt |
- sed --unbuffered -n 's/.*\(saddr4:\)\([0-9.]*\).*$/\2/p;q')
+ saddr=$(mptcp_lib_evts_get_info saddr4 $evt $e_type)
fi
check_expected "type" "family" "saddr" "sport"
@@ -982,7 +957,7 @@ test_listener()
sleep 0.5
# Delete the listener from the client ns, if one was created
- kill_wait $listener_pid
+ mptcp_lib_kill_wait $listener_pid
sleep 0.5
verify_listener_events $client_evts $LISTENER_CLOSED $AF_INET 10.0.2.2 $client4_port
diff --git a/tools/testing/selftests/net/net_helper.sh b/tools/testing/selftests/net/net_helper.sh
new file mode 100755
index 000000000000..4fe0befa13fb
--- /dev/null
+++ b/tools/testing/selftests/net/net_helper.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Helper functions
+
+wait_local_port_listen()
+{
+ local listener_ns="${1}"
+ local port="${2}"
+ local protocol="${3}"
+ local port_hex
+ local i
+
+ port_hex="$(printf "%04X" "${port}")"
+ for i in $(seq 10); do
+ if ip netns exec "${listener_ns}" cat /proc/net/"${protocol}"* | \
+ grep -q "${port_hex}"; then
+ break
+ fi
+ sleep 0.1
+ done
+}
diff --git a/tools/testing/selftests/net/netns-name.sh b/tools/testing/selftests/net/netns-name.sh
new file mode 100755
index 000000000000..7d3d3fc99461
--- /dev/null
+++ b/tools/testing/selftests/net/netns-name.sh
@@ -0,0 +1,87 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -o pipefail
+
+NS=netns-name-test
+DEV=dummy-dev0
+DEV2=dummy-dev1
+ALT_NAME=some-alt-name
+
+RET_CODE=0
+
+cleanup() {
+ ip netns del $NS
+}
+
+trap cleanup EXIT
+
+fail() {
+ echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2
+ RET_CODE=1
+}
+
+ip netns add $NS
+
+#
+# Test basic move without a rename
+#
+ip -netns $NS link add name $DEV type dummy || fail
+ip -netns $NS link set dev $DEV netns 1 ||
+ fail "Can't perform a netns move"
+ip link show dev $DEV >> /dev/null || fail "Device not found after move"
+ip link del $DEV || fail
+
+#
+# Test move with a conflict
+#
+ip link add name $DEV type dummy
+ip -netns $NS link add name $DEV type dummy || fail
+ip -netns $NS link set dev $DEV netns 1 2> /dev/null &&
+ fail "Performed a netns move with a name conflict"
+ip link show dev $DEV >> /dev/null || fail "Device not found after move"
+ip -netns $NS link del $DEV || fail
+ip link del $DEV || fail
+
+#
+# Test move with a conflict and rename
+#
+ip link add name $DEV type dummy
+ip -netns $NS link add name $DEV type dummy || fail
+ip -netns $NS link set dev $DEV netns 1 name $DEV2 ||
+ fail "Can't perform a netns move with rename"
+ip link del $DEV2 || fail
+ip link del $DEV || fail
+
+#
+# Test dup alt-name with netns move
+#
+ip link add name $DEV type dummy || fail
+ip link property add dev $DEV altname $ALT_NAME || fail
+ip -netns $NS link add name $DEV2 type dummy || fail
+ip -netns $NS link property add dev $DEV2 altname $ALT_NAME || fail
+
+ip -netns $NS link set dev $DEV2 netns 1 2> /dev/null &&
+ fail "Moved with alt-name dup"
+
+ip link del $DEV || fail
+ip -netns $NS link del $DEV2 || fail
+
+#
+# Test creating alt-name in one net-ns and using in another
+#
+ip -netns $NS link add name $DEV type dummy || fail
+ip -netns $NS link property add dev $DEV altname $ALT_NAME || fail
+ip -netns $NS link set dev $DEV netns 1 || fail
+ip link show dev $ALT_NAME >> /dev/null || fail "Can't find alt-name after move"
+ip -netns $NS link show dev $ALT_NAME 2> /dev/null &&
+ fail "Can still find alt-name after move"
+ip link del $DEV || fail
+
+echo -ne "$(basename $0) \t\t\t\t"
+if [ $RET_CODE -eq 0 ]; then
+ echo "[ OK ]"
+else
+ echo "[ FAIL ]"
+fi
+exit $RET_CODE
diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh
index 9c2012d70b08..f8499d4c87f3 100755
--- a/tools/testing/selftests/net/openvswitch/openvswitch.sh
+++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh
@@ -3,6 +3,8 @@
#
# OVS kernel module self tests
+trap ovs_exit_sig EXIT TERM INT ERR
+
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
@@ -142,6 +144,12 @@ ovs_add_flow () {
return 0
}
+ovs_del_flows () {
+ info "Deleting all flows from DP: sbx:$1 br:$2"
+ ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py del-flows "$2"
+ return 0
+}
+
ovs_drop_record_and_run () {
local sbx=$1
shift
@@ -198,6 +206,17 @@ test_drop_reason() {
ip netns exec server ip addr add 172.31.110.20/24 dev s1
ip netns exec server ip link set s1 up
+ # Check if drop reasons can be sent
+ ovs_add_flow "test_drop_reason" dropreason \
+ 'in_port(1),eth(),eth_type(0x0806),arp()' 'drop(10)' 2>/dev/null
+ if [ $? == 1 ]; then
+ info "no support for drop reasons - skipping"
+ ovs_exit_sig
+ return $ksft_skip
+ fi
+
+ ovs_del_flows "test_drop_reason" dropreason
+
# Allow ARP
ovs_add_flow "test_drop_reason" dropreason \
'in_port(1),eth(),eth_type(0x0806),arp()' '2' || return 1
@@ -525,7 +544,7 @@ run_test() {
fi
if python3 ovs-dpctl.py -h 2>&1 | \
- grep "Need to install the python" >/dev/null 2>&1; then
+ grep -E "Need to (install|upgrade) the python" >/dev/null 2>&1; then
stdbuf -o0 printf "TEST: %-60s [PYLIB]\n" "${tdesc}"
return $ksft_skip
fi
diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
index 912dc8c49085..b97e621face9 100644
--- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
+++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
@@ -28,8 +28,10 @@ try:
from pyroute2.netlink import nlmsg_atoms
from pyroute2.netlink.exceptions import NetlinkError
from pyroute2.netlink.generic import GenericNetlinkSocket
+ import pyroute2
+
except ModuleNotFoundError:
- print("Need to install the python pyroute2 package.")
+ print("Need to install the python pyroute2 package >= 0.6.")
sys.exit(0)
@@ -1117,12 +1119,14 @@ class ovskey(nla):
"src",
lambda x: str(ipaddress.IPv4Address(x)),
int,
+ convert_ipv4,
),
(
"dst",
"dst",
- lambda x: str(ipaddress.IPv6Address(x)),
+ lambda x: str(ipaddress.IPv4Address(x)),
int,
+ convert_ipv4,
),
("tp_src", "tp_src", "%d", int),
("tp_dst", "tp_dst", "%d", int),
@@ -1904,6 +1908,32 @@ class OvsFlow(GenericNetlinkSocket):
raise ne
return reply
+ def del_flows(self, dpifindex):
+ """
+ Send a del message to the kernel that will drop all flows.
+
+ dpifindex should be a valid datapath obtained by calling
+ into the OvsDatapath lookup
+ """
+
+ flowmsg = OvsFlow.ovs_flow_msg()
+ flowmsg["cmd"] = OVS_FLOW_CMD_DEL
+ flowmsg["version"] = OVS_DATAPATH_VERSION
+ flowmsg["reserved"] = 0
+ flowmsg["dpifindex"] = dpifindex
+
+ try:
+ reply = self.nlm_request(
+ flowmsg,
+ msg_type=self.prid,
+ msg_flags=NLM_F_REQUEST | NLM_F_ACK,
+ )
+ reply = reply[0]
+ except NetlinkError as ne:
+ print(flowmsg)
+ raise ne
+ return reply
+
def dump(self, dpifindex, flowspec=None):
"""
Returns a list of messages containing flows.
@@ -1998,6 +2028,12 @@ def main(argv):
nlmsg_atoms.ovskey = ovskey
nlmsg_atoms.ovsactions = ovsactions
+ # version check for pyroute2
+ prverscheck = pyroute2.__version__.split(".")
+ if int(prverscheck[0]) == 0 and int(prverscheck[1]) < 6:
+ print("Need to upgrade the python pyroute2 package to >= 0.6.")
+ sys.exit(0)
+
parser = argparse.ArgumentParser()
parser.add_argument(
"-v",
@@ -2060,6 +2096,9 @@ def main(argv):
addflcmd.add_argument("flow", help="Flow specification")
addflcmd.add_argument("acts", help="Flow actions")
+ delfscmd = subparsers.add_parser("del-flows")
+ delfscmd.add_argument("flsbr", help="Datapath name")
+
args = parser.parse_args()
if args.verbose > 0:
@@ -2143,6 +2182,11 @@ def main(argv):
flow = OvsFlow.ovs_flow_msg()
flow.parse(args.flow, args.acts, rep["dpifindex"])
ovsflow.add_flow(rep["dpifindex"], flow)
+ elif hasattr(args, "flsbr"):
+ rep = ovsdp.info(args.flsbr, 0)
+ if rep is None:
+ print("DP '%s' not found." % args.flsbr)
+ ovsflow.del_flows(rep["dpifindex"])
return 0
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index f838dd370f6a..b3b2dc5a630c 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -2048,7 +2048,7 @@ run_test() {
case $ret in
0)
all_skipped=false
- [ $exitcode=$ksft_skip ] && exitcode=0
+ [ $exitcode -eq $ksft_skip ] && exitcode=0
;;
$ksft_skip)
[ $all_skipped = true ] && exitcode=$ksft_skip
diff --git a/tools/testing/selftests/net/route_localnet.sh b/tools/testing/selftests/net/route_localnet.sh
index 116bfeab72fa..e08701c750e3 100755
--- a/tools/testing/selftests/net/route_localnet.sh
+++ b/tools/testing/selftests/net/route_localnet.sh
@@ -18,8 +18,10 @@ setup() {
ip route del 127.0.0.0/8 dev lo table local
ip netns exec "${PEER_NS}" ip route del 127.0.0.0/8 dev lo table local
- ifconfig veth0 127.25.3.4/24 up
- ip netns exec "${PEER_NS}" ifconfig veth1 127.25.3.14/24 up
+ ip address add 127.25.3.4/24 dev veth0
+ ip link set dev veth0 up
+ ip netns exec "${PEER_NS}" ip address add 127.25.3.14/24 dev veth1
+ ip netns exec "${PEER_NS}" ip link set dev veth1 up
ip route flush cache
ip netns exec "${PEER_NS}" ip route flush cache
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index 5f2b3f6c0d74..38be9706c45f 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -859,7 +859,7 @@ kci_test_gretap()
run_cmd ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
- run_cmd ip -netns "$testns" link set dev $DEV_NS ups
+ run_cmd ip -netns "$testns" link set dev $DEV_NS up
run_cmd ip -netns "$testns" link del "$DEV_NS"
# test external mode
diff --git a/tools/testing/selftests/net/test_vxlan_mdb.sh b/tools/testing/selftests/net/test_vxlan_mdb.sh
index 31e5f0f8859d..6e996f8063cd 100755
--- a/tools/testing/selftests/net/test_vxlan_mdb.sh
+++ b/tools/testing/selftests/net/test_vxlan_mdb.sh
@@ -337,62 +337,62 @@ basic_common()
# Basic add, replace and delete behavior.
run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010"
log_test $? 0 "MDB entry addition"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\""
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010"
log_test $? 0 "MDB entry presence after addition"
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010"
log_test $? 0 "MDB entry replacement"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\""
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010"
log_test $? 0 "MDB entry presence after replacement"
run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010"
log_test $? 0 "MDB entry deletion"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\""
- log_test $? 1 "MDB entry presence after deletion"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010"
+ log_test $? 254 "MDB entry presence after deletion"
run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010"
log_test $? 255 "Non-existent MDB entry deletion"
# Default protocol and replacement.
run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \"proto static\""
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \"proto static\""
log_test $? 0 "MDB entry default protocol"
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent proto 123 dst $vtep_ip src_vni 10010"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \"proto 123\""
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \"proto 123\""
log_test $? 0 "MDB entry protocol replacement"
run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010"
# Default destination port and replacement.
run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \" dst_port \""
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \" dst_port \""
log_test $? 1 "MDB entry default destination port"
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent dst $vtep_ip dst_port 1234 src_vni 10010"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \"dst_port 1234\""
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \"dst_port 1234\""
log_test $? 0 "MDB entry destination port replacement"
run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010"
# Default destination VNI and replacement.
run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \" vni \""
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \" vni \""
log_test $? 1 "MDB entry default destination VNI"
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent dst $vtep_ip vni 1234 src_vni 10010"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \"vni 1234\""
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \"vni 1234\""
log_test $? 0 "MDB entry destination VNI replacement"
run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010"
# Default outgoing interface and replacement.
run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \" via \""
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \" via \""
log_test $? 1 "MDB entry default outgoing interface"
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010 via veth0"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep \"$grp_key\" | grep \"via veth0\""
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \"via veth0\""
log_test $? 0 "MDB entry outgoing interface replacement"
run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010"
@@ -550,127 +550,127 @@ star_g_common()
# Basic add, replace and delete behavior.
run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010"
log_test $? 0 "(*, G) MDB entry addition with source list"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \""
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010"
log_test $? 0 "(*, G) MDB entry presence after addition"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\""
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010"
log_test $? 0 "(S, G) MDB entry presence after addition"
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010"
log_test $? 0 "(*, G) MDB entry replacement with source list"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \""
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010"
log_test $? 0 "(*, G) MDB entry presence after replacement"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\""
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010"
log_test $? 0 "(S, G) MDB entry presence after replacement"
run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010"
log_test $? 0 "(*, G) MDB entry deletion"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \""
- log_test $? 1 "(*, G) MDB entry presence after deletion"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\""
- log_test $? 1 "(S, G) MDB entry presence after deletion"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010"
+ log_test $? 254 "(*, G) MDB entry presence after deletion"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010"
+ log_test $? 254 "(S, G) MDB entry presence after deletion"
# Default filter mode and replacement.
run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent dst $vtep_ip src_vni 10010"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep exclude"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep exclude"
log_test $? 0 "(*, G) MDB entry default filter mode"
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $src1 dst $vtep_ip src_vni 10010"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep include"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep include"
log_test $? 0 "(*, G) MDB entry after replacing filter mode to \"include\""
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\""
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010"
log_test $? 0 "(S, G) MDB entry after replacing filter mode to \"include\""
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\" | grep blocked"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep blocked"
log_test $? 1 "\"blocked\" flag after replacing filter mode to \"include\""
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep exclude"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep exclude"
log_test $? 0 "(*, G) MDB entry after replacing filter mode to \"exclude\""
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\""
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grep grp $grp src $src1 src_vni 10010"
log_test $? 0 "(S, G) MDB entry after replacing filter mode to \"exclude\""
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\" | grep blocked"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep blocked"
log_test $? 0 "\"blocked\" flag after replacing filter mode to \"exclude\""
run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010"
# Default source list and replacement.
run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent dst $vtep_ip src_vni 10010"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep source_list"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep source_list"
log_test $? 1 "(*, G) MDB entry default source list"
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1,$src2,$src3 dst $vtep_ip src_vni 10010"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\""
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010"
log_test $? 0 "(S, G) MDB entry of 1st source after replacing source list"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src2\""
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src2 src_vni 10010"
log_test $? 0 "(S, G) MDB entry of 2nd source after replacing source list"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src3\""
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src3 src_vni 10010"
log_test $? 0 "(S, G) MDB entry of 3rd source after replacing source list"
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1,$src3 dst $vtep_ip src_vni 10010"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src1\""
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010"
log_test $? 0 "(S, G) MDB entry of 1st source after removing source"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src2\""
- log_test $? 1 "(S, G) MDB entry of 2nd source after removing source"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \"src $src3\""
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src2 src_vni 10010"
+ log_test $? 254 "(S, G) MDB entry of 2nd source after removing source"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src3 src_vni 10010"
log_test $? 0 "(S, G) MDB entry of 3rd source after removing source"
run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010"
# Default protocol and replacement.
run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \"proto static\""
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \"proto static\""
log_test $? 0 "(*, G) MDB entry default protocol"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \"proto static\""
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \"proto static\""
log_test $? 0 "(S, G) MDB entry default protocol"
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 proto bgp dst $vtep_ip src_vni 10010"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \"proto bgp\""
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \"proto bgp\""
log_test $? 0 "(*, G) MDB entry protocol after replacement"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \"proto bgp\""
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \"proto bgp\""
log_test $? 0 "(S, G) MDB entry protocol after replacement"
run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010"
# Default destination port and replacement.
run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" dst_port \""
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \" dst_port \""
log_test $? 1 "(*, G) MDB entry default destination port"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" dst_port \""
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \" dst_port \""
log_test $? 1 "(S, G) MDB entry default destination port"
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip dst_port 1234 src_vni 10010"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" dst_port 1234 \""
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \" dst_port 1234 \""
log_test $? 0 "(*, G) MDB entry destination port after replacement"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" dst_port 1234 \""
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \" dst_port 1234 \""
log_test $? 0 "(S, G) MDB entry destination port after replacement"
run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010"
# Default destination VNI and replacement.
run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" vni \""
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \" vni \""
log_test $? 1 "(*, G) MDB entry default destination VNI"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" vni \""
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \" vni \""
log_test $? 1 "(S, G) MDB entry default destination VNI"
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip vni 1234 src_vni 10010"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" vni 1234 \""
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \" vni 1234 \""
log_test $? 0 "(*, G) MDB entry destination VNI after replacement"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" vni 1234 \""
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \" vni 1234 \""
log_test $? 0 "(S, G) MDB entry destination VNI after replacement"
run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010"
# Default outgoing interface and replacement.
run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" via \""
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \" via \""
log_test $? 1 "(*, G) MDB entry default outgoing interface"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" via \""
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \" via \""
log_test $? 1 "(S, G) MDB entry default outgoing interface"
run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010 via veth0"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep -v \" src \" | grep \" via veth0 \""
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \" via veth0 \""
log_test $? 0 "(*, G) MDB entry outgoing interface after replacement"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep \" src \" | grep \" via veth0 \""
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \" via veth0 \""
log_test $? 0 "(S, G) MDB entry outgoing interface after replacement"
run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010"
@@ -772,7 +772,7 @@ sg_common()
# Default filter mode.
run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp src $src permanent dst $vtep_ip src_vni 10010"
- run_cmd "bridge -n $ns1 -d -s mdb show dev vx0 | grep $grp | grep include"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src src_vni 10010 | grep include"
log_test $? 0 "(S, G) MDB entry default filter mode"
run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp src $src permanent dst $vtep_ip src_vni 10010"
@@ -2296,9 +2296,9 @@ if [ ! -x "$(command -v jq)" ]; then
exit $ksft_skip
fi
-bridge mdb help 2>&1 | grep -q "src_vni"
+bridge mdb help 2>&1 | grep -q "get"
if [ $? -ne 0 ]; then
- echo "SKIP: iproute2 bridge too old, missing VXLAN MDB support"
+ echo "SKIP: iproute2 bridge too old, missing VXLAN MDB get support"
exit $ksft_skip
fi
diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh
index 0c743752669a..af5dc57c8ce9 100755
--- a/tools/testing/selftests/net/udpgro.sh
+++ b/tools/testing/selftests/net/udpgro.sh
@@ -3,6 +3,8 @@
#
# Run a series of udpgro functional tests.
+source net_helper.sh
+
readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
BPF_FILE="../bpf/xdp_dummy.bpf.o"
@@ -51,8 +53,7 @@ run_one() {
echo "ok" || \
echo "failed" &
- # Hack: let bg programs complete the startup
- sleep 0.2
+ wait_local_port_listen ${PEER_NS} 8000 udp
./udpgso_bench_tx ${tx_args}
ret=$?
wait $(jobs -p)
@@ -97,7 +98,7 @@ run_one_nat() {
echo "ok" || \
echo "failed"&
- sleep 0.1
+ wait_local_port_listen "${PEER_NS}" 8000 udp
./udpgso_bench_tx ${tx_args}
ret=$?
kill -INT $pid
@@ -118,11 +119,9 @@ run_one_2sock() {
echo "ok" || \
echo "failed" &
- # Hack: let bg programs complete the startup
- sleep 0.2
+ wait_local_port_listen "${PEER_NS}" 12345 udp
./udpgso_bench_tx ${tx_args} -p 12345
- sleep 0.1
- # first UDP GSO socket should be closed at this point
+ wait_local_port_listen "${PEER_NS}" 8000 udp
./udpgso_bench_tx ${tx_args}
ret=$?
wait $(jobs -p)
diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh
index 894972877e8b..cb664679b434 100755
--- a/tools/testing/selftests/net/udpgro_bench.sh
+++ b/tools/testing/selftests/net/udpgro_bench.sh
@@ -3,6 +3,8 @@
#
# Run a series of udpgro benchmarks
+source net_helper.sh
+
readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
BPF_FILE="../bpf/xdp_dummy.bpf.o"
@@ -40,8 +42,7 @@ run_one() {
ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r &
ip netns exec "${PEER_NS}" ./udpgso_bench_rx -t ${rx_args} -r &
- # Hack: let bg programs complete the startup
- sleep 0.2
+ wait_local_port_listen "${PEER_NS}" 8000 udp
./udpgso_bench_tx ${tx_args}
}
diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh
index 0a6359bed0b9..dd47fa96f6b3 100755
--- a/tools/testing/selftests/net/udpgro_frglist.sh
+++ b/tools/testing/selftests/net/udpgro_frglist.sh
@@ -3,6 +3,8 @@
#
# Run a series of udpgro benchmarks
+source net_helper.sh
+
readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
BPF_FILE="../bpf/xdp_dummy.bpf.o"
@@ -45,8 +47,7 @@ run_one() {
echo ${rx_args}
ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r &
- # Hack: let bg programs complete the startup
- sleep 0.2
+ wait_local_port_listen "${PEER_NS}" 8000 udp
./udpgso_bench_tx ${tx_args}
}
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
index ef90aca4cc96..bced422b78f7 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -7,7 +7,7 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \
nft_queue.sh nft_meta.sh nf_nat_edemux.sh \
ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \
conntrack_vrf.sh nft_synproxy.sh rpath.sh nft_audit.sh \
- conntrack_sctp_collision.sh
+ conntrack_sctp_collision.sh xt_string.sh
HOSTPKG_CONFIG := pkg-config
diff --git a/tools/testing/selftests/netfilter/nft_audit.sh b/tools/testing/selftests/netfilter/nft_audit.sh
index bb34329e02a7..99ed5bd6e840 100755
--- a/tools/testing/selftests/netfilter/nft_audit.sh
+++ b/tools/testing/selftests/netfilter/nft_audit.sh
@@ -11,6 +11,12 @@ nft --version >/dev/null 2>&1 || {
exit $SKIP_RC
}
+# Run everything in a separate network namespace
+[ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; }
+
+# give other scripts a chance to finish - audit_logread sees all activity
+sleep 1
+
logfile=$(mktemp)
rulefile=$(mktemp)
echo "logging into $logfile"
@@ -93,6 +99,12 @@ do_test 'nft add counter t1 c1' \
do_test 'nft add counter t2 c1; add counter t2 c2' \
'table=t2 family=2 entries=2 op=nft_register_obj'
+for ((i = 3; i <= 500; i++)); do
+ echo "add counter t2 c$i"
+done >$rulefile
+do_test "nft -f $rulefile" \
+'table=t2 family=2 entries=498 op=nft_register_obj'
+
# adding/updating quotas
do_test 'nft add quota t1 q1 { 10 bytes }' \
@@ -101,6 +113,12 @@ do_test 'nft add quota t1 q1 { 10 bytes }' \
do_test 'nft add quota t2 q1 { 10 bytes }; add quota t2 q2 { 10 bytes }' \
'table=t2 family=2 entries=2 op=nft_register_obj'
+for ((i = 3; i <= 500; i++)); do
+ echo "add quota t2 q$i { 10 bytes }"
+done >$rulefile
+do_test "nft -f $rulefile" \
+'table=t2 family=2 entries=498 op=nft_register_obj'
+
# changing the quota value triggers obj update path
do_test 'nft add quota t1 q1 { 20 bytes }' \
'table=t1 family=2 entries=1 op=nft_register_obj'
@@ -150,6 +168,40 @@ done
do_test 'nft reset set t1 s' \
'table=t1 family=2 entries=3 op=nft_reset_setelem'
+# resetting counters
+
+do_test 'nft reset counter t1 c1' \
+'table=t1 family=2 entries=1 op=nft_reset_obj'
+
+do_test 'nft reset counters t1' \
+'table=t1 family=2 entries=1 op=nft_reset_obj'
+
+do_test 'nft reset counters t2' \
+'table=t2 family=2 entries=342 op=nft_reset_obj
+table=t2 family=2 entries=158 op=nft_reset_obj'
+
+do_test 'nft reset counters' \
+'table=t1 family=2 entries=1 op=nft_reset_obj
+table=t2 family=2 entries=341 op=nft_reset_obj
+table=t2 family=2 entries=159 op=nft_reset_obj'
+
+# resetting quotas
+
+do_test 'nft reset quota t1 q1' \
+'table=t1 family=2 entries=1 op=nft_reset_obj'
+
+do_test 'nft reset quotas t1' \
+'table=t1 family=2 entries=1 op=nft_reset_obj'
+
+do_test 'nft reset quotas t2' \
+'table=t2 family=2 entries=315 op=nft_reset_obj
+table=t2 family=2 entries=185 op=nft_reset_obj'
+
+do_test 'nft reset quotas' \
+'table=t1 family=2 entries=1 op=nft_reset_obj
+table=t2 family=2 entries=314 op=nft_reset_obj
+table=t2 family=2 entries=186 op=nft_reset_obj'
+
# deleting rules
readarray -t handles < <(nft -a list chain t1 c1 | \
diff --git a/tools/testing/selftests/netfilter/xt_string.sh b/tools/testing/selftests/netfilter/xt_string.sh
new file mode 100755
index 000000000000..1802653a4728
--- /dev/null
+++ b/tools/testing/selftests/netfilter/xt_string.sh
@@ -0,0 +1,128 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# return code to signal skipped test
+ksft_skip=4
+rc=0
+
+if ! iptables --version >/dev/null 2>&1; then
+ echo "SKIP: Test needs iptables"
+ exit $ksft_skip
+fi
+if ! ip -V >/dev/null 2>&1; then
+ echo "SKIP: Test needs iproute2"
+ exit $ksft_skip
+fi
+if ! nc -h >/dev/null 2>&1; then
+ echo "SKIP: Test needs netcat"
+ exit $ksft_skip
+fi
+
+pattern="foo bar baz"
+patlen=11
+hdrlen=$((20 + 8)) # IPv4 + UDP
+ns="ns-$(mktemp -u XXXXXXXX)"
+trap 'ip netns del $ns' EXIT
+ip netns add "$ns"
+ip -net "$ns" link add d0 type dummy
+ip -net "$ns" link set d0 up
+ip -net "$ns" addr add 10.1.2.1/24 dev d0
+
+#ip netns exec "$ns" tcpdump -npXi d0 &
+#tcpdump_pid=$!
+#trap 'kill $tcpdump_pid; ip netns del $ns' EXIT
+
+add_rule() { # (alg, from, to)
+ ip netns exec "$ns" \
+ iptables -A OUTPUT -o d0 -m string \
+ --string "$pattern" --algo $1 --from $2 --to $3
+}
+showrules() { # ()
+ ip netns exec "$ns" iptables -v -S OUTPUT | grep '^-A'
+}
+zerorules() {
+ ip netns exec "$ns" iptables -Z OUTPUT
+}
+countrule() { # (pattern)
+ showrules | grep -c -- "$*"
+}
+send() { # (offset)
+ ( for ((i = 0; i < $1 - $hdrlen; i++)); do
+ printf " "
+ done
+ printf "$pattern"
+ ) | ip netns exec "$ns" nc -w 1 -u 10.1.2.2 27374
+}
+
+add_rule bm 1000 1500
+add_rule bm 1400 1600
+add_rule kmp 1000 1500
+add_rule kmp 1400 1600
+
+zerorules
+send 0
+send $((1000 - $patlen))
+if [ $(countrule -c 0 0) -ne 4 ]; then
+ echo "FAIL: rules match data before --from"
+ showrules
+ ((rc--))
+fi
+
+zerorules
+send 1000
+send $((1400 - $patlen))
+if [ $(countrule -c 2) -ne 2 ]; then
+ echo "FAIL: only two rules should match at low offset"
+ showrules
+ ((rc--))
+fi
+
+zerorules
+send $((1500 - $patlen))
+if [ $(countrule -c 1) -ne 4 ]; then
+ echo "FAIL: all rules should match at end of packet"
+ showrules
+ ((rc--))
+fi
+
+zerorules
+send 1495
+if [ $(countrule -c 1) -ne 1 ]; then
+ echo "FAIL: only kmp with proper --to should match pattern spanning fragments"
+ showrules
+ ((rc--))
+fi
+
+zerorules
+send 1500
+if [ $(countrule -c 1) -ne 2 ]; then
+ echo "FAIL: two rules should match pattern at start of second fragment"
+ showrules
+ ((rc--))
+fi
+
+zerorules
+send $((1600 - $patlen))
+if [ $(countrule -c 1) -ne 2 ]; then
+ echo "FAIL: two rules should match pattern at end of largest --to"
+ showrules
+ ((rc--))
+fi
+
+zerorules
+send $((1600 - $patlen + 1))
+if [ $(countrule -c 1) -ne 0 ]; then
+ echo "FAIL: no rules should match pattern extending largest --to"
+ showrules
+ ((rc--))
+fi
+
+zerorules
+send 1600
+if [ $(countrule -c 1) -ne 0 ]; then
+ echo "FAIL: no rule should match pattern past largest --to"
+ showrules
+ ((rc--))
+fi
+
+exit $rc
diff --git a/tools/testing/selftests/nolibc/.gitignore b/tools/testing/selftests/nolibc/.gitignore
index 52f613cdad54..5119f9f7afd2 100644
--- a/tools/testing/selftests/nolibc/.gitignore
+++ b/tools/testing/selftests/nolibc/.gitignore
@@ -1,4 +1,5 @@
/initramfs/
+/initramfs.cpio
/libc-test
/nolibc-test
/run.out
diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile
index dfe66776a331..a0fc07253baf 100644
--- a/tools/testing/selftests/nolibc/Makefile
+++ b/tools/testing/selftests/nolibc/Makefile
@@ -82,7 +82,7 @@ QEMU_ARCH_arm = arm
QEMU_ARCH_mips = mipsel # works with malta_defconfig
QEMU_ARCH_ppc = ppc
QEMU_ARCH_ppc64 = ppc64
-QEMU_ARCH_ppc64le = ppc64le
+QEMU_ARCH_ppc64le = ppc64
QEMU_ARCH_riscv = riscv64
QEMU_ARCH_s390 = s390x
QEMU_ARCH_loongarch = loongarch64
@@ -113,6 +113,7 @@ else
Q=@
endif
+CFLAGS_i386 = $(call cc-option,-m32)
CFLAGS_ppc = -m32 -mbig-endian -mno-vsx $(call cc-option,-mmultiple)
CFLAGS_ppc64 = -m64 -mbig-endian -mno-vsx $(call cc-option,-mmultiple)
CFLAGS_ppc64le = -m64 -mlittle-endian -mno-vsx $(call cc-option,-mabi=elfv2)
@@ -131,18 +132,20 @@ REPORT ?= awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{if (!f) printf("\n"); f++
help:
@echo "Supported targets under selftests/nolibc:"
- @echo " all call the \"run\" target below"
- @echo " help this help"
- @echo " sysroot create the nolibc sysroot here (uses \$$ARCH)"
- @echo " nolibc-test build the executable (uses \$$CC and \$$CROSS_COMPILE)"
- @echo " libc-test build an executable using the compiler's default libc instead"
- @echo " run-user runs the executable under QEMU (uses \$$XARCH, \$$TEST)"
- @echo " initramfs prepare the initramfs with nolibc-test"
- @echo " defconfig create a fresh new default config (uses \$$XARCH)"
- @echo " kernel (re)build the kernel with the initramfs (uses \$$XARCH)"
- @echo " run runs the kernel in QEMU after building it (uses \$$XARCH, \$$TEST)"
- @echo " rerun runs a previously prebuilt kernel in QEMU (uses \$$XARCH, \$$TEST)"
- @echo " clean clean the sysroot, initramfs, build and output files"
+ @echo " all call the \"run\" target below"
+ @echo " help this help"
+ @echo " sysroot create the nolibc sysroot here (uses \$$ARCH)"
+ @echo " nolibc-test build the executable (uses \$$CC and \$$CROSS_COMPILE)"
+ @echo " libc-test build an executable using the compiler's default libc instead"
+ @echo " run-user runs the executable under QEMU (uses \$$XARCH, \$$TEST)"
+ @echo " initramfs.cpio prepare the initramfs archive with nolibc-test"
+ @echo " initramfs prepare the initramfs tree with nolibc-test"
+ @echo " defconfig create a fresh new default config (uses \$$XARCH)"
+ @echo " kernel (re)build the kernel (uses \$$XARCH)"
+ @echo " kernel-standalone (re)build the kernel with the initramfs (uses \$$XARCH)"
+ @echo " run runs the kernel in QEMU after building it (uses \$$XARCH, \$$TEST)"
+ @echo " rerun runs a previously prebuilt kernel in QEMU (uses \$$XARCH, \$$TEST)"
+ @echo " clean clean the sysroot, initramfs, build and output files"
@echo ""
@echo "The output file is \"run.out\". Test ranges may be passed using \$$TEST."
@echo ""
@@ -168,17 +171,17 @@ sysroot/$(ARCH)/include:
$(Q)mv sysroot/sysroot sysroot/$(ARCH)
ifneq ($(NOLIBC_SYSROOT),0)
-nolibc-test: nolibc-test.c sysroot/$(ARCH)/include
+nolibc-test: nolibc-test.c nolibc-test-linkage.c sysroot/$(ARCH)/include
$(QUIET_CC)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ \
- -nostdlib -static -Isysroot/$(ARCH)/include $< -lgcc
+ -nostdlib -nostdinc -static -Isysroot/$(ARCH)/include nolibc-test.c nolibc-test-linkage.c -lgcc
else
-nolibc-test: nolibc-test.c
+nolibc-test: nolibc-test.c nolibc-test-linkage.c
$(QUIET_CC)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ \
- -nostdlib -static -include ../../../include/nolibc/nolibc.h $< -lgcc
+ -nostdlib -static -include ../../../include/nolibc/nolibc.h nolibc-test.c nolibc-test-linkage.c -lgcc
endif
-libc-test: nolibc-test.c
- $(QUIET_CC)$(HOSTCC) -o $@ $<
+libc-test: nolibc-test.c nolibc-test-linkage.c
+ $(QUIET_CC)$(HOSTCC) -o $@ nolibc-test.c nolibc-test-linkage.c
# local libc-test
run-libc-test: libc-test
@@ -195,6 +198,9 @@ run-user: nolibc-test
$(Q)qemu-$(QEMU_ARCH) ./nolibc-test > "$(CURDIR)/run.out" || :
$(Q)$(REPORT) $(CURDIR)/run.out
+initramfs.cpio: kernel nolibc-test
+ $(QUIET_GEN)echo 'file /init nolibc-test 755 0 0' | $(srctree)/usr/gen_init_cpio - > initramfs.cpio
+
initramfs: nolibc-test
$(QUIET_MKDIR)mkdir -p initramfs
$(call QUIET_INSTALL, initramfs/init)
@@ -203,17 +209,20 @@ initramfs: nolibc-test
defconfig:
$(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) mrproper $(DEFCONFIG) prepare
-kernel: initramfs
+kernel:
+ $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME)
+
+kernel-standalone: initramfs
$(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) CONFIG_INITRAMFS_SOURCE=$(CURDIR)/initramfs
# run the tests after building the kernel
-run: kernel
- $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(srctree)/$(IMAGE)" -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out"
+run: kernel initramfs.cpio
+ $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(srctree)/$(IMAGE)" -initrd initramfs.cpio -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out"
$(Q)$(REPORT) $(CURDIR)/run.out
# re-run the tests from an existing kernel
rerun:
- $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(srctree)/$(IMAGE)" -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out"
+ $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(srctree)/$(IMAGE)" -initrd initramfs.cpio -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out"
$(Q)$(REPORT) $(CURDIR)/run.out
# report with existing test log
@@ -227,6 +236,8 @@ clean:
$(Q)rm -f nolibc-test
$(call QUIET_CLEAN, libc-test)
$(Q)rm -f libc-test
+ $(call QUIET_CLEAN, initramfs.cpio)
+ $(Q)rm -rf initramfs.cpio
$(call QUIET_CLEAN, initramfs)
$(Q)rm -rf initramfs
$(call QUIET_CLEAN, run.out)
diff --git a/tools/testing/selftests/nolibc/nolibc-test-linkage.c b/tools/testing/selftests/nolibc/nolibc-test-linkage.c
new file mode 100644
index 000000000000..5ff4c8a1db2a
--- /dev/null
+++ b/tools/testing/selftests/nolibc/nolibc-test-linkage.c
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include "nolibc-test-linkage.h"
+
+#ifndef NOLIBC
+#include <errno.h>
+#endif
+
+void *linkage_test_errno_addr(void)
+{
+ return &errno;
+}
+
+int linkage_test_constructor_test_value;
+
+__attribute__((constructor))
+static void constructor1(void)
+{
+ linkage_test_constructor_test_value = 2;
+}
+
+__attribute__((constructor))
+static void constructor2(void)
+{
+ linkage_test_constructor_test_value *= 3;
+}
diff --git a/tools/testing/selftests/nolibc/nolibc-test-linkage.h b/tools/testing/selftests/nolibc/nolibc-test-linkage.h
new file mode 100644
index 000000000000..c66473070d73
--- /dev/null
+++ b/tools/testing/selftests/nolibc/nolibc-test-linkage.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _NOLIBC_TEST_LINKAGE_H
+#define _NOLIBC_TEST_LINKAGE_H
+
+void *linkage_test_errno_addr(void);
+extern int linkage_test_constructor_test_value;
+
+#endif /* _NOLIBC_TEST_LINKAGE_H */
diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index fb3bf91462e2..2f10541e6f38 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -41,6 +41,8 @@
#endif
#endif
+#include "nolibc-test-linkage.h"
+
/* for the type of int_fast16_t and int_fast32_t, musl differs from glibc and nolibc */
#define SINT_MAX_OF_TYPE(type) (((type)1 << (sizeof(type) * 8 - 2)) - (type)1 + ((type)1 << (sizeof(type) * 8 - 2)))
#define SINT_MIN_OF_TYPE(type) (-SINT_MAX_OF_TYPE(type) - 1)
@@ -57,6 +59,9 @@ static int test_argc;
/* will be used by some test cases as readable file, please don't write it */
static const char *argv0;
+/* will be used by constructor tests */
+static int constructor_test_value;
+
/* definition of a series of tests */
struct test {
const char *name; /* test name */
@@ -594,6 +599,19 @@ int expect_strne(const char *expr, int llen, const char *cmp)
#define CASE_TEST(name) \
case __LINE__: llen += printf("%d %s", test, #name);
+/* constructors validate that they are executed in definition order */
+__attribute__((constructor))
+static void constructor1(void)
+{
+ constructor_test_value = 1;
+}
+
+__attribute__((constructor))
+static void constructor2(void)
+{
+ constructor_test_value *= 2;
+}
+
int run_startup(int min, int max)
{
int test;
@@ -630,7 +648,9 @@ int run_startup(int min, int max)
CASE_TEST(environ_HOME); EXPECT_PTRNZ(1, getenv("HOME")); break;
CASE_TEST(auxv_addr); EXPECT_PTRGT(test_auxv != (void *)-1, test_auxv, brk); break;
CASE_TEST(auxv_AT_UID); EXPECT_EQ(1, getauxval(AT_UID), getuid()); break;
- CASE_TEST(auxv_AT_PAGESZ); EXPECT_GE(1, getauxval(AT_PAGESZ), 4096); break;
+ CASE_TEST(constructor); EXPECT_EQ(1, constructor_test_value, 2); break;
+ CASE_TEST(linkage_errno); EXPECT_PTREQ(1, linkage_test_errno_addr(), &errno); break;
+ CASE_TEST(linkage_constr); EXPECT_EQ(1, linkage_test_constructor_test_value, 6); break;
case __LINE__:
return ret; /* must be last */
/* note: do not set any defaults so as to permit holes above */
@@ -894,14 +914,14 @@ int run_syscall(int min, int max)
CASE_TEST(lseek_0); EXPECT_SYSER(1, lseek(0, 0, SEEK_SET), -1, ESPIPE); break;
CASE_TEST(mkdir_root); EXPECT_SYSER(1, mkdir("/", 0755), -1, EEXIST); break;
CASE_TEST(mmap_bad); EXPECT_PTRER(1, mmap(NULL, 0, PROT_READ, MAP_PRIVATE, 0, 0), MAP_FAILED, EINVAL); break;
- CASE_TEST(munmap_bad); EXPECT_SYSER(1, munmap((void *)1, 0), -1, EINVAL); break;
+ CASE_TEST(munmap_bad); EXPECT_SYSER(1, munmap(NULL, 0), -1, EINVAL); break;
CASE_TEST(mmap_munmap_good); EXPECT_SYSZR(1, test_mmap_munmap()); break;
CASE_TEST(open_tty); EXPECT_SYSNE(1, tmp = open("/dev/null", 0), -1); if (tmp != -1) close(tmp); break;
CASE_TEST(open_blah); EXPECT_SYSER(1, tmp = open("/proc/self/blah", 0), -1, ENOENT); if (tmp != -1) close(tmp); break;
CASE_TEST(pipe); EXPECT_SYSZR(1, test_pipe()); break;
CASE_TEST(poll_null); EXPECT_SYSZR(1, poll(NULL, 0, 0)); break;
CASE_TEST(poll_stdout); EXPECT_SYSNE(1, ({ struct pollfd fds = { 1, POLLOUT, 0}; poll(&fds, 1, 0); }), -1); break;
- CASE_TEST(poll_fault); EXPECT_SYSER(1, poll((void *)1, 1, 0), -1, EFAULT); break;
+ CASE_TEST(poll_fault); EXPECT_SYSER(1, poll(NULL, 1, 0), -1, EFAULT); break;
CASE_TEST(prctl); EXPECT_SYSER(1, prctl(PR_SET_NAME, (unsigned long)NULL, 0, 0, 0), -1, EFAULT); break;
CASE_TEST(read_badf); EXPECT_SYSER(1, read(-1, &tmp, 1), -1, EBADF); break;
CASE_TEST(rmdir_blah); EXPECT_SYSER(1, rmdir("/blah"), -1, ENOENT); break;
@@ -910,7 +930,7 @@ int run_syscall(int min, int max)
CASE_TEST(select_stdout); EXPECT_SYSNE(1, ({ fd_set fds; FD_ZERO(&fds); FD_SET(1, &fds); select(2, NULL, &fds, NULL, NULL); }), -1); break;
CASE_TEST(select_fault); EXPECT_SYSER(1, select(1, (void *)1, NULL, NULL, 0), -1, EFAULT); break;
CASE_TEST(stat_blah); EXPECT_SYSER(1, stat("/proc/self/blah", &stat_buf), -1, ENOENT); break;
- CASE_TEST(stat_fault); EXPECT_SYSER(1, stat((void *)1, &stat_buf), -1, EFAULT); break;
+ CASE_TEST(stat_fault); EXPECT_SYSER(1, stat(NULL, &stat_buf), -1, EFAULT); break;
CASE_TEST(stat_timestamps); EXPECT_SYSZR(1, test_stat_timestamps()); break;
CASE_TEST(symlink_root); EXPECT_SYSER(1, symlink("/", "/"), -1, EEXIST); break;
CASE_TEST(unlink_root); EXPECT_SYSER(1, unlink("/"), -1, EISDIR); break;
diff --git a/tools/testing/selftests/openat2/openat2_test.c b/tools/testing/selftests/openat2/openat2_test.c
index 7fb902099de4..9024754530b2 100644
--- a/tools/testing/selftests/openat2/openat2_test.c
+++ b/tools/testing/selftests/openat2/openat2_test.c
@@ -300,7 +300,7 @@ void test_openat2_flags(void)
ksft_print_msg("openat2 unexpectedly returned ");
if (fdpath)
- ksft_print_msg("%d['%s'] with %X (!= %X)\n",
+ ksft_print_msg("%d['%s'] with %X (!= %llX)\n",
fd, fdpath, fdflags,
test->how.flags);
else
diff --git a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c
index 4e86f927880c..01cc37bf611c 100644
--- a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c
@@ -62,7 +62,7 @@ static void error_report(struct error *err, const char *test_name)
break;
case PIDFD_PASS:
- ksft_test_result_pass("%s test: Passed\n");
+ ksft_test_result_pass("%s test: Passed\n", test_name);
break;
default:
diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c
index 00a07e7c571c..c081ae91313a 100644
--- a/tools/testing/selftests/pidfd/pidfd_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_test.c
@@ -381,13 +381,13 @@ static int test_pidfd_send_signal_syscall_support(void)
static void *test_pidfd_poll_exec_thread(void *priv)
{
- ksft_print_msg("Child Thread: starting. pid %d tid %d ; and sleeping\n",
+ ksft_print_msg("Child Thread: starting. pid %d tid %ld ; and sleeping\n",
getpid(), syscall(SYS_gettid));
ksft_print_msg("Child Thread: doing exec of sleep\n");
execl("/bin/sleep", "sleep", str(CHILD_THREAD_MIN_WAIT), (char *)NULL);
- ksft_print_msg("Child Thread: DONE. pid %d tid %d\n",
+ ksft_print_msg("Child Thread: DONE. pid %d tid %ld\n",
getpid(), syscall(SYS_gettid));
return NULL;
}
@@ -427,7 +427,7 @@ static int child_poll_exec_test(void *args)
{
pthread_t t1;
- ksft_print_msg("Child (pidfd): starting. pid %d tid %d\n", getpid(),
+ ksft_print_msg("Child (pidfd): starting. pid %d tid %ld\n", getpid(),
syscall(SYS_gettid));
pthread_create(&t1, NULL, test_pidfd_poll_exec_thread, NULL);
/*
@@ -480,10 +480,10 @@ static void test_pidfd_poll_exec(int use_waitpid)
static void *test_pidfd_poll_leader_exit_thread(void *priv)
{
- ksft_print_msg("Child Thread: starting. pid %d tid %d ; and sleeping\n",
+ ksft_print_msg("Child Thread: starting. pid %d tid %ld ; and sleeping\n",
getpid(), syscall(SYS_gettid));
sleep(CHILD_THREAD_MIN_WAIT);
- ksft_print_msg("Child Thread: DONE. pid %d tid %d\n", getpid(), syscall(SYS_gettid));
+ ksft_print_msg("Child Thread: DONE. pid %d tid %ld\n", getpid(), syscall(SYS_gettid));
return NULL;
}
@@ -492,7 +492,7 @@ static int child_poll_leader_exit_test(void *args)
{
pthread_t t1, t2;
- ksft_print_msg("Child: starting. pid %d tid %d\n", getpid(), syscall(SYS_gettid));
+ ksft_print_msg("Child: starting. pid %d tid %ld\n", getpid(), syscall(SYS_gettid));
pthread_create(&t1, NULL, test_pidfd_poll_leader_exit_thread, NULL);
pthread_create(&t2, NULL, test_pidfd_poll_leader_exit_thread, NULL);
diff --git a/tools/testing/selftests/proc/proc-empty-vm.c b/tools/testing/selftests/proc/proc-empty-vm.c
index ee71ce52cb6a..56198d4ca2bf 100644
--- a/tools/testing/selftests/proc/proc-empty-vm.c
+++ b/tools/testing/selftests/proc/proc-empty-vm.c
@@ -23,6 +23,9 @@
* /proc/${pid}/smaps
* /proc/${pid}/smaps_rollup
*/
+#undef _GNU_SOURCE
+#define _GNU_SOURCE
+
#undef NDEBUG
#include <assert.h>
#include <errno.h>
@@ -34,6 +37,7 @@
#include <sys/mman.h>
#include <sys/ptrace.h>
#include <sys/resource.h>
+#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
@@ -42,6 +46,43 @@
#define TEST_VSYSCALL
#endif
+#if defined __amd64__
+ #ifndef SYS_pkey_alloc
+ #define SYS_pkey_alloc 330
+ #endif
+ #ifndef SYS_pkey_free
+ #define SYS_pkey_free 331
+ #endif
+#elif defined __i386__
+ #ifndef SYS_pkey_alloc
+ #define SYS_pkey_alloc 381
+ #endif
+ #ifndef SYS_pkey_free
+ #define SYS_pkey_free 382
+ #endif
+#else
+ #error "SYS_pkey_alloc"
+#endif
+
+static int g_protection_key_support;
+
+static int protection_key_support(void)
+{
+ long rv = syscall(SYS_pkey_alloc, 0, 0);
+ if (rv > 0) {
+ syscall(SYS_pkey_free, (int)rv);
+ return 1;
+ } else if (rv == -1 && errno == ENOSYS) {
+ return 0;
+ } else if (rv == -1 && errno == EINVAL) {
+ // ospke=n
+ return 0;
+ } else {
+ fprintf(stderr, "%s: error: rv %ld, errno %d\n", __func__, rv, errno);
+ exit(EXIT_FAILURE);
+ }
+}
+
/*
* 0: vsyscall VMA doesn't exist vsyscall=none
* 1: vsyscall VMA is --xp vsyscall=xonly
@@ -60,7 +101,7 @@ static const char proc_pid_maps_vsyscall_2[] =
static const char proc_pid_smaps_vsyscall_0[] = "";
static const char proc_pid_smaps_vsyscall_1[] =
-"ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n"
+"ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n"
"Size: 4 kB\n"
"KernelPageSize: 4 kB\n"
"MMUPageSize: 4 kB\n"
@@ -73,6 +114,7 @@ static const char proc_pid_smaps_vsyscall_1[] =
"Private_Dirty: 0 kB\n"
"Referenced: 0 kB\n"
"Anonymous: 0 kB\n"
+"KSM: 0 kB\n"
"LazyFree: 0 kB\n"
"AnonHugePages: 0 kB\n"
"ShmemPmdMapped: 0 kB\n"
@@ -83,14 +125,10 @@ static const char proc_pid_smaps_vsyscall_1[] =
"SwapPss: 0 kB\n"
"Locked: 0 kB\n"
"THPeligible: 0\n"
-/*
- * "ProtectionKey:" field is conditional. It is possible to check it as well,
- * but I don't have such machine.
- */
;
static const char proc_pid_smaps_vsyscall_2[] =
-"ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n"
+"ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n"
"Size: 4 kB\n"
"KernelPageSize: 4 kB\n"
"MMUPageSize: 4 kB\n"
@@ -103,6 +141,7 @@ static const char proc_pid_smaps_vsyscall_2[] =
"Private_Dirty: 0 kB\n"
"Referenced: 0 kB\n"
"Anonymous: 0 kB\n"
+"KSM: 0 kB\n"
"LazyFree: 0 kB\n"
"AnonHugePages: 0 kB\n"
"ShmemPmdMapped: 0 kB\n"
@@ -113,10 +152,6 @@ static const char proc_pid_smaps_vsyscall_2[] =
"SwapPss: 0 kB\n"
"Locked: 0 kB\n"
"THPeligible: 0\n"
-/*
- * "ProtectionKey:" field is conditional. It is possible to check it as well,
- * but I'm too tired.
- */
;
static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___)
@@ -238,19 +273,27 @@ static int test_proc_pid_smaps(pid_t pid)
}
perror("open /proc/${pid}/smaps");
return EXIT_FAILURE;
+ }
+ ssize_t rv = read(fd, buf, sizeof(buf));
+ close(fd);
+
+ assert(0 <= rv);
+ assert(rv <= sizeof(buf));
+
+ if (g_vsyscall == 0) {
+ assert(rv == 0);
} else {
- ssize_t rv = read(fd, buf, sizeof(buf));
- close(fd);
- if (g_vsyscall == 0) {
- assert(rv == 0);
- } else {
- size_t len = strlen(g_proc_pid_maps_vsyscall);
- /* TODO "ProtectionKey:" */
- assert(rv > len);
- assert(memcmp(buf, g_proc_pid_maps_vsyscall, len) == 0);
+ size_t len = strlen(g_proc_pid_smaps_vsyscall);
+ assert(rv > len);
+ assert(memcmp(buf, g_proc_pid_smaps_vsyscall, len) == 0);
+
+ if (g_protection_key_support) {
+#define PROTECTION_KEY "ProtectionKey: 0\n"
+ assert(memmem(buf, rv, PROTECTION_KEY, strlen(PROTECTION_KEY)));
}
- return EXIT_SUCCESS;
}
+
+ return EXIT_SUCCESS;
}
static const char g_smaps_rollup[] =
@@ -303,6 +346,95 @@ static int test_proc_pid_smaps_rollup(pid_t pid)
}
}
+static const char *parse_u64(const char *p, const char *const end, uint64_t *rv)
+{
+ *rv = 0;
+ for (; p != end; p += 1) {
+ if ('0' <= *p && *p <= '9') {
+ assert(!__builtin_mul_overflow(*rv, 10, rv));
+ assert(!__builtin_add_overflow(*rv, *p - '0', rv));
+ } else {
+ break;
+ }
+ }
+ assert(p != end);
+ return p;
+}
+
+/*
+ * There seems to be 2 types of valid output:
+ * "0 A A B 0 0 0\n" for dynamic exeuctables,
+ * "0 0 0 B 0 0 0\n" for static executables.
+ */
+static int test_proc_pid_statm(pid_t pid)
+{
+ char buf[4096];
+ snprintf(buf, sizeof(buf), "/proc/%u/statm", pid);
+ int fd = open(buf, O_RDONLY);
+ if (fd == -1) {
+ perror("open /proc/${pid}/statm");
+ return EXIT_FAILURE;
+ }
+
+ ssize_t rv = read(fd, buf, sizeof(buf));
+ close(fd);
+
+ assert(rv >= 0);
+ assert(rv <= sizeof(buf));
+ if (0) {
+ write(1, buf, rv);
+ }
+
+ const char *p = buf;
+ const char *const end = p + rv;
+
+ /* size */
+ assert(p != end && *p++ == '0');
+ assert(p != end && *p++ == ' ');
+
+ uint64_t resident;
+ p = parse_u64(p, end, &resident);
+ assert(p != end && *p++ == ' ');
+
+ uint64_t shared;
+ p = parse_u64(p, end, &shared);
+ assert(p != end && *p++ == ' ');
+
+ uint64_t text;
+ p = parse_u64(p, end, &text);
+ assert(p != end && *p++ == ' ');
+
+ assert(p != end && *p++ == '0');
+ assert(p != end && *p++ == ' ');
+
+ /* data */
+ assert(p != end && *p++ == '0');
+ assert(p != end && *p++ == ' ');
+
+ assert(p != end && *p++ == '0');
+ assert(p != end && *p++ == '\n');
+
+ assert(p == end);
+
+ /*
+ * "text" is "mm->end_code - mm->start_code" at execve(2) time.
+ * munmap() doesn't change it. It can be anything (just link
+ * statically). It can't be 0 because executing to this point
+ * implies at least 1 page of code.
+ */
+ assert(text > 0);
+
+ /*
+ * These two are always equal. Always 0 for statically linked
+ * executables and sometimes 0 for dynamically linked executables.
+ * There is no way to tell one from another without parsing ELF
+ * which is too much for this test.
+ */
+ assert(resident == shared);
+
+ return EXIT_SUCCESS;
+}
+
int main(void)
{
int rv = EXIT_SUCCESS;
@@ -328,6 +460,8 @@ int main(void)
abort();
}
+ g_protection_key_support = protection_key_support();
+
pid_t pid = fork();
if (pid == -1) {
perror("fork");
@@ -389,11 +523,9 @@ int main(void)
if (rv == EXIT_SUCCESS) {
rv = test_proc_pid_smaps_rollup(pid);
}
- /*
- * TODO test /proc/${pid}/statm, task_statm()
- * ->start_code, ->end_code aren't updated by munmap().
- * Output can be "0 0 0 2 0 0 0\n" where "2" can be anything.
- */
+ if (rv == EXIT_SUCCESS) {
+ rv = test_proc_pid_statm(pid);
+ }
/* Cut the rope. */
int wstatus;
diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh
index b8e2ea23cb3f..6e415ddb206f 100644..100755
--- a/tools/testing/selftests/rcutorture/bin/functions.sh
+++ b/tools/testing/selftests/rcutorture/bin/functions.sh
@@ -331,3 +331,32 @@ specify_qemu_net () {
echo $1 -net none
fi
}
+
+# Extract the ftrace output from the console log output
+# The ftrace output in the original logs look like:
+# Dumping ftrace buffer:
+# ---------------------------------
+# [...]
+# ---------------------------------
+extract_ftrace_from_console() {
+ awk < "$1" '
+
+ /Dumping ftrace buffer:/ {
+ buffer_count++
+ print "Ftrace dump " buffer_count ":"
+ capture = 1
+ next
+ }
+
+ /---------------------------------/ {
+ if(capture == 1) {
+ capture = 2
+ next
+ } else if(capture == 2) {
+ capture = 0
+ print ""
+ }
+ }
+
+ capture == 2'
+}
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
index 5be670dd4009..de65d77b47ff 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
@@ -13,7 +13,7 @@
#
# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
-T=/tmp/kvm-recheck.sh.$$
+T="`mktemp ${TMPDIR-/tmp}/kvm-recheck.sh.XXXXXX`"
trap 'rm -f $T' 0 2
configerrors=0
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index b0f36a638a69..7af73ddc148d 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -49,6 +49,7 @@ TORTURE_SHUTDOWN_GRACE=180
TORTURE_SUITE=rcu
TORTURE_MOD=rcutorture
TORTURE_TRUST_MAKE=""
+debuginfo="CONFIG_DEBUG_INFO_NONE=n CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y"
resdir=""
configs=""
cpus=0
@@ -68,6 +69,7 @@ usage () {
echo " --cpus N"
echo " --datestamp string"
echo " --defconfig string"
+ echo " --debug-info"
echo " --dryrun batches|scenarios|sched|script"
echo " --duration minutes | <seconds>s | <hours>h | <days>d"
echo " --gdb"
@@ -135,6 +137,15 @@ do
ds=$2
shift
;;
+ --debug-info|--debuginfo)
+ if test -z "$TORTURE_KCONFIG_KCSAN_ARG" && test -z "$TORTURE_BOOT_GDB_ARG"
+ then
+ TORTURE_KCONFIG_KCSAN_ARG="$debuginfo"; export TORTURE_KCONFIG_KCSAN_ARG
+ TORTURE_BOOT_GDB_ARG="nokaslr"; export TORTURE_BOOT_GDB_ARG
+ else
+ echo "Ignored redundant --debug-info (implied by --kcsan &c)"
+ fi
+ ;;
--defconfig)
checkarg --defconfig "defconfigtype" "$#" "$2" '^[^/][^/]*$' '^--'
TORTURE_DEFCONFIG=$2
@@ -163,7 +174,7 @@ do
shift
;;
--gdb)
- TORTURE_KCONFIG_GDB_ARG="CONFIG_DEBUG_INFO_NONE=n CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y"; export TORTURE_KCONFIG_GDB_ARG
+ TORTURE_KCONFIG_GDB_ARG="$debuginfo"; export TORTURE_KCONFIG_GDB_ARG
TORTURE_BOOT_GDB_ARG="nokaslr"; export TORTURE_BOOT_GDB_ARG
TORTURE_QEMU_GDB_ARG="-s -S"; export TORTURE_QEMU_GDB_ARG
;;
@@ -179,7 +190,7 @@ do
shift
;;
--kasan)
- TORTURE_KCONFIG_KASAN_ARG="CONFIG_DEBUG_INFO_NONE=n CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_KASAN=y"; export TORTURE_KCONFIG_KASAN_ARG
+ TORTURE_KCONFIG_KASAN_ARG="$debuginfo CONFIG_KASAN=y"; export TORTURE_KCONFIG_KASAN_ARG
if test -n "$torture_qemu_mem_default"
then
TORTURE_QEMU_MEM=2G
@@ -191,7 +202,7 @@ do
shift
;;
--kcsan)
- TORTURE_KCONFIG_KCSAN_ARG="CONFIG_DEBUG_INFO_NONE=n CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_KCSAN=y CONFIG_KCSAN_STRICT=y CONFIG_KCSAN_REPORT_ONCE_IN_MS=100000 CONFIG_KCSAN_VERBOSE=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y"; export TORTURE_KCONFIG_KCSAN_ARG
+ TORTURE_KCONFIG_KCSAN_ARG="$debuginfo CONFIG_KCSAN=y CONFIG_KCSAN_STRICT=y CONFIG_KCSAN_REPORT_ONCE_IN_MS=100000 CONFIG_KCSAN_VERBOSE=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y"; export TORTURE_KCONFIG_KCSAN_ARG
;;
--kmake-arg|--kmake-args)
checkarg --kmake-arg "(kernel make arguments)" $# "$2" '.*' '^error$'
diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh
index 9ab0f6bc172c..b07c11cf6929 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-console.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh
@@ -11,7 +11,7 @@
#
# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
-T=${TMPDIR-/tmp}/parse-console.sh.$$
+T="`mktemp -d ${TMPDIR-/tmp}/parse-console.sh.XXXXXX`"
file="$1"
title="$2"
@@ -182,3 +182,10 @@ if ! test -s $file.diags
then
rm -f $file.diags
fi
+
+# Call extract_ftrace_from_console function, if the output is empty,
+# don't create $file.ftrace. Otherwise output the results to $file.ftrace
+extract_ftrace_from_console $file > $file.ftrace
+if [ ! -s $file.ftrace ]; then
+ rm -f $file.ftrace
+fi
diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index 12b50a4a881a..d5a0d8a33c27 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -472,7 +472,7 @@ do
if test -n "$firsttime"
then
torture_bootargs="refscale.scale_type="$prim" refscale.nreaders=$HALF_ALLOTED_CPUS refscale.loops=10000 refscale.holdoff=20 torture.disable_onoff_at_boot"
- torture_set "refscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture refscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --bootargs "verbose_batched=$VERBOSE_BATCH_CPUS torture.verbose_sleep_frequency=8 torture.verbose_sleep_duration=$VERBOSE_BATCH_CPUS" --trust-make
+ torture_set "refscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture refscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --bootargs "refscale.verbose_batched=$VERBOSE_BATCH_CPUS torture.verbose_sleep_frequency=8 torture.verbose_sleep_duration=$VERBOSE_BATCH_CPUS" --trust-make
mv $T/last-resdir-nodebug $T/first-resdir-nodebug || :
if test -f "$T/last-resdir-kasan"
then
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02
index 093ea6e8e65c..9003c56cd764 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02
@@ -11,3 +11,4 @@ CONFIG_FORCE_TASKS_TRACE_RCU=y
#CHECK#CONFIG_TASKS_TRACE_RCU=y
CONFIG_TASKS_TRACE_RCU_READ_MB=n
CONFIG_RCU_EXPERT=y
+CONFIG_DEBUG_OBJECTS=y
diff --git a/tools/testing/selftests/resctrl/Makefile b/tools/testing/selftests/resctrl/Makefile
index 5073dbc96125..2deac2031de9 100644
--- a/tools/testing/selftests/resctrl/Makefile
+++ b/tools/testing/selftests/resctrl/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-CFLAGS = -g -Wall -O2 -D_FORTIFY_SOURCE=2
+CFLAGS = -g -Wall -O2 -D_FORTIFY_SOURCE=2 -D_GNU_SOURCE
CFLAGS += $(KHDR_INCLUDES)
TEST_GEN_PROGS := resctrl_tests
diff --git a/tools/testing/selftests/resctrl/cache.c b/tools/testing/selftests/resctrl/cache.c
index d3cbb829ff6a..bcbca356d56a 100644
--- a/tools/testing/selftests/resctrl/cache.c
+++ b/tools/testing/selftests/resctrl/cache.c
@@ -205,10 +205,11 @@ int measure_cache_vals(struct resctrl_val_param *param, int bm_pid)
* cache_val: execute benchmark and measure LLC occupancy resctrl
* and perf cache miss for the benchmark
* @param: parameters passed to cache_val()
+ * @span: buffer size for the benchmark
*
* Return: 0 on success. non-zero on failure.
*/
-int cat_val(struct resctrl_val_param *param)
+int cat_val(struct resctrl_val_param *param, size_t span)
{
int memflush = 1, operation = 0, ret = 0;
char *resctrl_val = param->resctrl_val;
@@ -245,7 +246,7 @@ int cat_val(struct resctrl_val_param *param)
if (ret)
break;
- if (run_fill_buf(param->span, memflush, operation, true)) {
+ if (run_fill_buf(span, memflush, operation, true)) {
fprintf(stderr, "Error-running fill buffer\n");
ret = -1;
goto pe_close;
@@ -294,7 +295,7 @@ int show_cache_info(unsigned long sum_llc_val, int no_of_bits,
ret = platform && abs((int)diff_percent) > max_diff_percent &&
(cmt ? (abs(avg_diff) > max_diff) : true);
- ksft_print_msg("%s Check cache miss rate within %d%%\n",
+ ksft_print_msg("%s Check cache miss rate within %lu%%\n",
ret ? "Fail:" : "Pass:", max_diff_percent);
ksft_print_msg("Percent diff=%d\n", abs((int)diff_percent));
diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c
index 3848dfb46aba..224ba8544d8a 100644
--- a/tools/testing/selftests/resctrl/cat_test.c
+++ b/tools/testing/selftests/resctrl/cat_test.c
@@ -41,7 +41,7 @@ static int cat_setup(struct resctrl_val_param *p)
return ret;
}
-static int check_results(struct resctrl_val_param *param)
+static int check_results(struct resctrl_val_param *param, size_t span)
{
char *token_array[8], temp[512];
unsigned long sum_llc_perf_miss = 0;
@@ -76,7 +76,7 @@ static int check_results(struct resctrl_val_param *param)
fclose(fp);
no_of_bits = count_bits(param->mask);
- return show_cache_info(sum_llc_perf_miss, no_of_bits, param->span / 64,
+ return show_cache_info(sum_llc_perf_miss, no_of_bits, span / 64,
MAX_DIFF, MAX_DIFF_PERCENT, runs - 1,
get_vendor() == ARCH_INTEL, false);
}
@@ -96,6 +96,7 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type)
char cbm_mask[256];
int count_of_bits;
char pipe_message;
+ size_t span;
/* Get default cbm mask for L3/L2 cache */
ret = get_cbm_mask(cache_type, cbm_mask);
@@ -140,7 +141,7 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type)
/* Set param values for parent thread which will be allocated bitmask
* with (max_bits - n) bits
*/
- param.span = cache_size * (count_of_bits - n) / count_of_bits;
+ span = cache_size * (count_of_bits - n) / count_of_bits;
strcpy(param.ctrlgrp, "c2");
strcpy(param.mongrp, "m2");
strcpy(param.filename, RESULT_FILE_NAME2);
@@ -162,23 +163,17 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type)
param.mask = l_mask_1;
strcpy(param.ctrlgrp, "c1");
strcpy(param.mongrp, "m1");
- param.span = cache_size * n / count_of_bits;
+ span = cache_size * n / count_of_bits;
strcpy(param.filename, RESULT_FILE_NAME1);
param.num_of_runs = 0;
param.cpu_no = sibling_cpu_no;
- } else {
- ret = signal_handler_register();
- if (ret) {
- kill(bm_pid, SIGKILL);
- goto out;
- }
}
remove(param.filename);
- ret = cat_val(&param);
+ ret = cat_val(&param, span);
if (ret == 0)
- ret = check_results(&param);
+ ret = check_results(&param, span);
if (bm_pid == 0) {
/* Tell parent that child is ready */
@@ -208,10 +203,8 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type)
}
close(pipefd[0]);
kill(bm_pid, SIGKILL);
- signal_handler_unregister();
}
-out:
cat_test_cleanup();
return ret;
diff --git a/tools/testing/selftests/resctrl/cmt_test.c b/tools/testing/selftests/resctrl/cmt_test.c
index cb2197647c6c..50bdbce9fba9 100644
--- a/tools/testing/selftests/resctrl/cmt_test.c
+++ b/tools/testing/selftests/resctrl/cmt_test.c
@@ -27,7 +27,7 @@ static int cmt_setup(struct resctrl_val_param *p)
return 0;
}
-static int check_results(struct resctrl_val_param *param, int no_of_bits)
+static int check_results(struct resctrl_val_param *param, size_t span, int no_of_bits)
{
char *token_array[8], temp[512];
unsigned long sum_llc_occu_resc = 0;
@@ -58,7 +58,7 @@ static int check_results(struct resctrl_val_param *param, int no_of_bits)
}
fclose(fp);
- return show_cache_info(sum_llc_occu_resc, no_of_bits, param->span,
+ return show_cache_info(sum_llc_occu_resc, no_of_bits, span,
MAX_DIFF, MAX_DIFF_PERCENT, runs - 1,
true, true);
}
@@ -68,16 +68,17 @@ void cmt_test_cleanup(void)
remove(RESULT_FILE_NAME);
}
-int cmt_resctrl_val(int cpu_no, int n, char **benchmark_cmd)
+int cmt_resctrl_val(int cpu_no, int n, const char * const *benchmark_cmd)
{
+ const char * const *cmd = benchmark_cmd;
+ const char *new_cmd[BENCHMARK_ARGS];
unsigned long cache_size = 0;
unsigned long long_mask;
+ char *span_str = NULL;
char cbm_mask[256];
int count_of_bits;
- int ret;
-
- if (!validate_resctrl_feature_request(CMT_STR))
- return -1;
+ size_t span;
+ int ret, i;
ret = get_cbm_mask("L3", cbm_mask);
if (ret)
@@ -105,24 +106,36 @@ int cmt_resctrl_val(int cpu_no, int n, char **benchmark_cmd)
.cpu_no = cpu_no,
.filename = RESULT_FILE_NAME,
.mask = ~(long_mask << n) & long_mask,
- .span = cache_size * n / count_of_bits,
.num_of_runs = 0,
.setup = cmt_setup,
};
- if (strcmp(benchmark_cmd[0], "fill_buf") == 0)
- sprintf(benchmark_cmd[1], "%zu", param.span);
+ span = cache_size * n / count_of_bits;
+
+ if (strcmp(cmd[0], "fill_buf") == 0) {
+ /* Duplicate the command to be able to replace span in it */
+ for (i = 0; benchmark_cmd[i]; i++)
+ new_cmd[i] = benchmark_cmd[i];
+ new_cmd[i] = NULL;
+
+ ret = asprintf(&span_str, "%zu", span);
+ if (ret < 0)
+ return -1;
+ new_cmd[1] = span_str;
+ cmd = new_cmd;
+ }
remove(RESULT_FILE_NAME);
- ret = resctrl_val(benchmark_cmd, &param);
+ ret = resctrl_val(cmd, &param);
if (ret)
goto out;
- ret = check_results(&param, n);
+ ret = check_results(&param, span, n);
out:
cmt_test_cleanup();
+ free(span_str);
return ret;
}
diff --git a/tools/testing/selftests/resctrl/mba_test.c b/tools/testing/selftests/resctrl/mba_test.c
index 4d2f145804b8..d3bf4368341e 100644
--- a/tools/testing/selftests/resctrl/mba_test.c
+++ b/tools/testing/selftests/resctrl/mba_test.c
@@ -12,7 +12,7 @@
#define RESULT_FILE_NAME "result_mba"
#define NUM_OF_RUNS 5
-#define MAX_DIFF_PERCENT 5
+#define MAX_DIFF_PERCENT 8
#define ALLOCATION_MAX 100
#define ALLOCATION_MIN 10
#define ALLOCATION_STEP 10
@@ -141,7 +141,7 @@ void mba_test_cleanup(void)
remove(RESULT_FILE_NAME);
}
-int mba_schemata_change(int cpu_no, char *bw_report, char **benchmark_cmd)
+int mba_schemata_change(int cpu_no, const char * const *benchmark_cmd)
{
struct resctrl_val_param param = {
.resctrl_val = MBA_STR,
@@ -149,7 +149,7 @@ int mba_schemata_change(int cpu_no, char *bw_report, char **benchmark_cmd)
.mongrp = "m1",
.cpu_no = cpu_no,
.filename = RESULT_FILE_NAME,
- .bw_report = bw_report,
+ .bw_report = "reads",
.setup = mba_setup
};
int ret;
diff --git a/tools/testing/selftests/resctrl/mbm_test.c b/tools/testing/selftests/resctrl/mbm_test.c
index c7de6f5977f6..741533f2b075 100644
--- a/tools/testing/selftests/resctrl/mbm_test.c
+++ b/tools/testing/selftests/resctrl/mbm_test.c
@@ -11,7 +11,7 @@
#include "resctrl.h"
#define RESULT_FILE_NAME "result_mbm"
-#define MAX_DIFF_PERCENT 5
+#define MAX_DIFF_PERCENT 8
#define NUM_OF_RUNS 5
static int
@@ -95,7 +95,7 @@ static int mbm_setup(struct resctrl_val_param *p)
return END_OF_TESTS;
/* Set up shemata with 100% allocation on the first run. */
- if (p->num_of_runs == 0)
+ if (p->num_of_runs == 0 && validate_resctrl_feature_request("MB", NULL))
ret = write_schemata(p->ctrlgrp, "100", p->cpu_no,
p->resctrl_val);
@@ -109,16 +109,15 @@ void mbm_test_cleanup(void)
remove(RESULT_FILE_NAME);
}
-int mbm_bw_change(size_t span, int cpu_no, char *bw_report, char **benchmark_cmd)
+int mbm_bw_change(int cpu_no, const char * const *benchmark_cmd)
{
struct resctrl_val_param param = {
.resctrl_val = MBM_STR,
.ctrlgrp = "c1",
.mongrp = "m1",
- .span = span,
.cpu_no = cpu_no,
.filename = RESULT_FILE_NAME,
- .bw_report = bw_report,
+ .bw_report = "reads",
.setup = mbm_setup
};
int ret;
@@ -129,7 +128,7 @@ int mbm_bw_change(size_t span, int cpu_no, char *bw_report, char **benchmark_cmd
if (ret)
goto out;
- ret = check_results(span);
+ ret = check_results(DEFAULT_SPAN);
out:
mbm_test_cleanup();
diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h
index 838d1a438f33..a33f414f6019 100644
--- a/tools/testing/selftests/resctrl/resctrl.h
+++ b/tools/testing/selftests/resctrl/resctrl.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#define _GNU_SOURCE
#ifndef RESCTRL_H
#define RESCTRL_H
#include <stdio.h>
@@ -28,16 +27,16 @@
#define RESCTRL_PATH "/sys/fs/resctrl"
#define PHYS_ID_PATH "/sys/devices/system/cpu/cpu"
#define INFO_PATH "/sys/fs/resctrl/info"
-#define L3_PATH "/sys/fs/resctrl/info/L3"
-#define MB_PATH "/sys/fs/resctrl/info/MB"
-#define L3_MON_PATH "/sys/fs/resctrl/info/L3_MON"
-#define L3_MON_FEATURES_PATH "/sys/fs/resctrl/info/L3_MON/mon_features"
#define ARCH_INTEL 1
#define ARCH_AMD 2
#define END_OF_TESTS 1
+#define BENCHMARK_ARGS 64
+
+#define DEFAULT_SPAN (250 * MB)
+
#define PARENT_EXIT(err_msg) \
do { \
perror(err_msg); \
@@ -52,7 +51,6 @@
* @ctrlgrp: Name of the control monitor group (con_mon grp)
* @mongrp: Name of the monitor group (mon grp)
* @cpu_no: CPU number to which the benchmark would be binded
- * @span: Memory bytes accessed in each benchmark iteration
* @filename: Name of file to which the o/p should be written
* @bw_report: Bandwidth report type (reads vs writes)
* @setup: Call back function to setup test environment
@@ -62,7 +60,6 @@ struct resctrl_val_param {
char ctrlgrp[64];
char mongrp[64];
int cpu_no;
- size_t span;
char filename[64];
char *bw_report;
unsigned long mask;
@@ -86,10 +83,9 @@ int get_resource_id(int cpu_no, int *resource_id);
int mount_resctrlfs(void);
int umount_resctrlfs(void);
int validate_bw_report_request(char *bw_report);
-bool validate_resctrl_feature_request(const char *resctrl_val);
+bool validate_resctrl_feature_request(const char *resource, const char *feature);
char *fgrep(FILE *inf, const char *str);
int taskset_benchmark(pid_t bm_pid, int cpu_no);
-void run_benchmark(int signum, siginfo_t *info, void *ucontext);
int write_schemata(char *ctrlgrp, char *schemata, int cpu_no,
char *resctrl_val);
int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp,
@@ -97,21 +93,21 @@ int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp,
int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu,
int group_fd, unsigned long flags);
int run_fill_buf(size_t span, int memflush, int op, bool once);
-int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param);
-int mbm_bw_change(size_t span, int cpu_no, char *bw_report, char **benchmark_cmd);
+int resctrl_val(const char * const *benchmark_cmd, struct resctrl_val_param *param);
+int mbm_bw_change(int cpu_no, const char * const *benchmark_cmd);
void tests_cleanup(void);
void mbm_test_cleanup(void);
-int mba_schemata_change(int cpu_no, char *bw_report, char **benchmark_cmd);
+int mba_schemata_change(int cpu_no, const char * const *benchmark_cmd);
void mba_test_cleanup(void);
int get_cbm_mask(char *cache_type, char *cbm_mask);
int get_cache_size(int cpu_no, char *cache_type, unsigned long *cache_size);
void ctrlc_handler(int signum, siginfo_t *info, void *ptr);
int signal_handler_register(void);
void signal_handler_unregister(void);
-int cat_val(struct resctrl_val_param *param);
+int cat_val(struct resctrl_val_param *param, size_t span);
void cat_test_cleanup(void);
int cat_perf_miss_val(int cpu_no, int no_of_bits, char *cache_type);
-int cmt_resctrl_val(int cpu_no, int n, char **benchmark_cmd);
+int cmt_resctrl_val(int cpu_no, int n, const char * const *benchmark_cmd);
unsigned int count_bits(unsigned long n);
void cmt_test_cleanup(void);
int get_core_sibling(int cpu_no);
diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c
index d511daeb6851..2bbe3045a018 100644
--- a/tools/testing/selftests/resctrl/resctrl_tests.c
+++ b/tools/testing/selftests/resctrl/resctrl_tests.c
@@ -10,9 +10,6 @@
*/
#include "resctrl.h"
-#define BENCHMARK_ARGS 64
-#define BENCHMARK_ARG_SIZE 64
-
static int detect_vendor(void)
{
FILE *inf = fopen("/proc/cpuinfo", "r");
@@ -52,8 +49,8 @@ int get_vendor(void)
static void cmd_help(void)
{
- printf("usage: resctrl_tests [-h] [-b \"benchmark_cmd [options]\"] [-t test list] [-n no_of_bits]\n");
- printf("\t-b benchmark_cmd [options]: run specified benchmark for MBM, MBA and CMT\n");
+ printf("usage: resctrl_tests [-h] [-t test list] [-n no_of_bits] [-b benchmark_cmd [option]...]\n");
+ printf("\t-b benchmark_cmd [option]...: run specified benchmark for MBM, MBA and CMT\n");
printf("\t default benchmark is builtin fill_buf\n");
printf("\t-t test list: run tests specified in the test list, ");
printf("e.g. -t mbm,mba,cmt,cat\n");
@@ -70,72 +67,98 @@ void tests_cleanup(void)
cat_test_cleanup();
}
-static void run_mbm_test(char **benchmark_cmd, size_t span,
- int cpu_no, char *bw_report)
+static int test_prepare(void)
{
int res;
- ksft_print_msg("Starting MBM BW change ...\n");
+ res = signal_handler_register();
+ if (res) {
+ ksft_print_msg("Failed to register signal handler\n");
+ return res;
+ }
res = mount_resctrlfs();
if (res) {
- ksft_exit_fail_msg("Failed to mount resctrl FS\n");
+ signal_handler_unregister();
+ ksft_print_msg("Failed to mount resctrl FS\n");
+ return res;
+ }
+ return 0;
+}
+
+static void test_cleanup(void)
+{
+ umount_resctrlfs();
+ signal_handler_unregister();
+}
+
+static void run_mbm_test(const char * const *benchmark_cmd, int cpu_no)
+{
+ int res;
+
+ ksft_print_msg("Starting MBM BW change ...\n");
+
+ if (test_prepare()) {
+ ksft_exit_fail_msg("Abnormal failure when preparing for the test\n");
return;
}
- if (!validate_resctrl_feature_request(MBM_STR) || (get_vendor() != ARCH_INTEL)) {
+ if (!validate_resctrl_feature_request("L3_MON", "mbm_total_bytes") ||
+ !validate_resctrl_feature_request("L3_MON", "mbm_local_bytes") ||
+ (get_vendor() != ARCH_INTEL)) {
ksft_test_result_skip("Hardware does not support MBM or MBM is disabled\n");
- goto umount;
+ goto cleanup;
}
- res = mbm_bw_change(span, cpu_no, bw_report, benchmark_cmd);
+ res = mbm_bw_change(cpu_no, benchmark_cmd);
ksft_test_result(!res, "MBM: bw change\n");
if ((get_vendor() == ARCH_INTEL) && res)
ksft_print_msg("Intel MBM may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n");
-umount:
- umount_resctrlfs();
+cleanup:
+ test_cleanup();
}
-static void run_mba_test(char **benchmark_cmd, int cpu_no, char *bw_report)
+static void run_mba_test(const char * const *benchmark_cmd, int cpu_no)
{
int res;
ksft_print_msg("Starting MBA Schemata change ...\n");
- res = mount_resctrlfs();
- if (res) {
- ksft_exit_fail_msg("Failed to mount resctrl FS\n");
+ if (test_prepare()) {
+ ksft_exit_fail_msg("Abnormal failure when preparing for the test\n");
return;
}
- if (!validate_resctrl_feature_request(MBA_STR) || (get_vendor() != ARCH_INTEL)) {
+ if (!validate_resctrl_feature_request("MB", NULL) ||
+ !validate_resctrl_feature_request("L3_MON", "mbm_local_bytes") ||
+ (get_vendor() != ARCH_INTEL)) {
ksft_test_result_skip("Hardware does not support MBA or MBA is disabled\n");
- goto umount;
+ goto cleanup;
}
- res = mba_schemata_change(cpu_no, bw_report, benchmark_cmd);
+ res = mba_schemata_change(cpu_no, benchmark_cmd);
ksft_test_result(!res, "MBA: schemata change\n");
-umount:
- umount_resctrlfs();
+cleanup:
+ test_cleanup();
}
-static void run_cmt_test(char **benchmark_cmd, int cpu_no)
+static void run_cmt_test(const char * const *benchmark_cmd, int cpu_no)
{
int res;
ksft_print_msg("Starting CMT test ...\n");
- res = mount_resctrlfs();
- if (res) {
- ksft_exit_fail_msg("Failed to mount resctrl FS\n");
+ if (test_prepare()) {
+ ksft_exit_fail_msg("Abnormal failure when preparing for the test\n");
return;
}
- if (!validate_resctrl_feature_request(CMT_STR)) {
+ if (!validate_resctrl_feature_request("L3_MON", "llc_occupancy") ||
+ !validate_resctrl_feature_request("L3", NULL)) {
ksft_test_result_skip("Hardware does not support CMT or CMT is disabled\n");
- goto umount;
+ goto cleanup;
}
res = cmt_resctrl_val(cpu_no, 5, benchmark_cmd);
@@ -143,8 +166,8 @@ static void run_cmt_test(char **benchmark_cmd, int cpu_no)
if ((get_vendor() == ARCH_INTEL) && res)
ksft_print_msg("Intel CMT may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n");
-umount:
- umount_resctrlfs();
+cleanup:
+ test_cleanup();
}
static void run_cat_test(int cpu_no, int no_of_bits)
@@ -153,48 +176,53 @@ static void run_cat_test(int cpu_no, int no_of_bits)
ksft_print_msg("Starting CAT test ...\n");
- res = mount_resctrlfs();
- if (res) {
- ksft_exit_fail_msg("Failed to mount resctrl FS\n");
+ if (test_prepare()) {
+ ksft_exit_fail_msg("Abnormal failure when preparing for the test\n");
return;
}
- if (!validate_resctrl_feature_request(CAT_STR)) {
+ if (!validate_resctrl_feature_request("L3", NULL)) {
ksft_test_result_skip("Hardware does not support CAT or CAT is disabled\n");
- goto umount;
+ goto cleanup;
}
res = cat_perf_miss_val(cpu_no, no_of_bits, "L3");
ksft_test_result(!res, "CAT: test\n");
-umount:
- umount_resctrlfs();
+cleanup:
+ test_cleanup();
}
int main(int argc, char **argv)
{
- bool has_ben = false, mbm_test = true, mba_test = true, cmt_test = true;
- char *benchmark_cmd[BENCHMARK_ARGS], bw_report[64], bm_type[64];
- char benchmark_cmd_area[BENCHMARK_ARGS][BENCHMARK_ARG_SIZE];
- int c, cpu_no = 1, argc_new = argc, i, no_of_bits = 0;
- int ben_ind, ben_count, tests = 0;
- size_t span = 250 * MB;
+ bool mbm_test = true, mba_test = true, cmt_test = true;
+ const char *benchmark_cmd[BENCHMARK_ARGS] = {};
+ int c, cpu_no = 1, i, no_of_bits = 0;
+ char *span_str = NULL;
bool cat_test = true;
+ int tests = 0;
+ int ret;
- for (i = 0; i < argc; i++) {
- if (strcmp(argv[i], "-b") == 0) {
- ben_ind = i + 1;
- ben_count = argc - ben_ind;
- argc_new = ben_ind - 1;
- has_ben = true;
- break;
- }
- }
-
- while ((c = getopt(argc_new, argv, "ht:b:n:p:")) != -1) {
+ while ((c = getopt(argc, argv, "ht:b:n:p:")) != -1) {
char *token;
switch (c) {
+ case 'b':
+ /*
+ * First move optind back to the (first) optarg and
+ * then build the benchmark command using the
+ * remaining arguments.
+ */
+ optind--;
+ if (argc - optind >= BENCHMARK_ARGS)
+ ksft_exit_fail_msg("Too long benchmark command");
+
+ /* Extract benchmark command from command line. */
+ for (i = 0; i < argc - optind; i++)
+ benchmark_cmd[i] = argv[i + optind];
+ benchmark_cmd[i] = NULL;
+
+ goto last_arg;
case 't':
token = strtok(optarg, ",");
@@ -243,6 +271,7 @@ int main(int argc, char **argv)
return -1;
}
}
+last_arg:
ksft_print_header();
@@ -254,29 +283,6 @@ int main(int argc, char **argv)
if (geteuid() != 0)
return ksft_exit_skip("Not running as root. Skipping...\n");
- if (has_ben) {
- /* Extract benchmark command from command line. */
- for (i = ben_ind; i < argc; i++) {
- benchmark_cmd[i - ben_ind] = benchmark_cmd_area[i];
- sprintf(benchmark_cmd[i - ben_ind], "%s", argv[i]);
- }
- benchmark_cmd[ben_count] = NULL;
- } else {
- /* If no benchmark is given by "-b" argument, use fill_buf. */
- for (i = 0; i < 5; i++)
- benchmark_cmd[i] = benchmark_cmd_area[i];
-
- strcpy(benchmark_cmd[0], "fill_buf");
- sprintf(benchmark_cmd[1], "%zu", span);
- strcpy(benchmark_cmd[2], "1");
- strcpy(benchmark_cmd[3], "0");
- strcpy(benchmark_cmd[4], "false");
- benchmark_cmd[5] = NULL;
- }
-
- sprintf(bw_report, "reads");
- sprintf(bm_type, "fill_buf");
-
if (!check_resctrlfs_support())
return ksft_exit_skip("resctrl FS does not exist. Enable X86_CPU_RESCTRL config option.\n");
@@ -285,13 +291,26 @@ int main(int argc, char **argv)
filter_dmesg();
+ if (!benchmark_cmd[0]) {
+ /* If no benchmark is given by "-b" argument, use fill_buf. */
+ benchmark_cmd[0] = "fill_buf";
+ ret = asprintf(&span_str, "%u", DEFAULT_SPAN);
+ if (ret < 0)
+ ksft_exit_fail_msg("Out of memory!\n");
+ benchmark_cmd[1] = span_str;
+ benchmark_cmd[2] = "1";
+ benchmark_cmd[3] = "0";
+ benchmark_cmd[4] = "false";
+ benchmark_cmd[5] = NULL;
+ }
+
ksft_set_plan(tests ? : 4);
if (mbm_test)
- run_mbm_test(benchmark_cmd, span, cpu_no, bw_report);
+ run_mbm_test(benchmark_cmd, cpu_no);
if (mba_test)
- run_mba_test(benchmark_cmd, cpu_no, bw_report);
+ run_mba_test(benchmark_cmd, cpu_no);
if (cmt_test)
run_cmt_test(benchmark_cmd, cpu_no);
@@ -299,5 +318,6 @@ int main(int argc, char **argv)
if (cat_test)
run_cat_test(cpu_no, no_of_bits);
+ free(span_str);
ksft_finished();
}
diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c
index f0f6c5f6e98b..88789678917b 100644
--- a/tools/testing/selftests/resctrl/resctrl_val.c
+++ b/tools/testing/selftests/resctrl/resctrl_val.c
@@ -468,7 +468,9 @@ pid_t bm_pid, ppid;
void ctrlc_handler(int signum, siginfo_t *info, void *ptr)
{
- kill(bm_pid, SIGKILL);
+ /* Only kill child after bm_pid is set after fork() */
+ if (bm_pid)
+ kill(bm_pid, SIGKILL);
umount_resctrlfs();
tests_cleanup();
ksft_print_msg("Ending\n\n");
@@ -482,9 +484,11 @@ void ctrlc_handler(int signum, siginfo_t *info, void *ptr)
*/
int signal_handler_register(void)
{
- struct sigaction sigact;
+ struct sigaction sigact = {};
int ret = 0;
+ bm_pid = 0;
+
sigact.sa_sigaction = ctrlc_handler;
sigemptyset(&sigact.sa_mask);
sigact.sa_flags = SA_SIGINFO;
@@ -504,7 +508,7 @@ int signal_handler_register(void)
*/
void signal_handler_unregister(void)
{
- struct sigaction sigact;
+ struct sigaction sigact = {};
sigact.sa_handler = SIG_DFL;
sigemptyset(&sigact.sa_mask);
@@ -622,6 +626,56 @@ measure_vals(struct resctrl_val_param *param, unsigned long *bw_resc_start)
}
/*
+ * run_benchmark - Run a specified benchmark or fill_buf (default benchmark)
+ * in specified signal. Direct benchmark stdio to /dev/null.
+ * @signum: signal number
+ * @info: signal info
+ * @ucontext: user context in signal handling
+ */
+static void run_benchmark(int signum, siginfo_t *info, void *ucontext)
+{
+ int operation, ret, memflush;
+ char **benchmark_cmd;
+ size_t span;
+ bool once;
+ FILE *fp;
+
+ benchmark_cmd = info->si_ptr;
+
+ /*
+ * Direct stdio of child to /dev/null, so that only parent writes to
+ * stdio (console)
+ */
+ fp = freopen("/dev/null", "w", stdout);
+ if (!fp)
+ PARENT_EXIT("Unable to direct benchmark status to /dev/null");
+
+ if (strcmp(benchmark_cmd[0], "fill_buf") == 0) {
+ /* Execute default fill_buf benchmark */
+ span = strtoul(benchmark_cmd[1], NULL, 10);
+ memflush = atoi(benchmark_cmd[2]);
+ operation = atoi(benchmark_cmd[3]);
+ if (!strcmp(benchmark_cmd[4], "true"))
+ once = true;
+ else if (!strcmp(benchmark_cmd[4], "false"))
+ once = false;
+ else
+ PARENT_EXIT("Invalid once parameter");
+
+ if (run_fill_buf(span, memflush, operation, once))
+ fprintf(stderr, "Error in running fill buffer\n");
+ } else {
+ /* Execute specified benchmark */
+ ret = execvp(benchmark_cmd[0], benchmark_cmd);
+ if (ret)
+ perror("wrong\n");
+ }
+
+ fclose(stdout);
+ PARENT_EXIT("Unable to run specified benchmark");
+}
+
+/*
* resctrl_val: execute benchmark and measure memory bandwidth on
* the benchmark
* @benchmark_cmd: benchmark command and its arguments
@@ -629,7 +683,7 @@ measure_vals(struct resctrl_val_param *param, unsigned long *bw_resc_start)
*
* Return: 0 on success. non-zero on failure.
*/
-int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param)
+int resctrl_val(const char * const *benchmark_cmd, struct resctrl_val_param *param)
{
char *resctrl_val = param->resctrl_val;
unsigned long bw_resc_start = 0;
@@ -706,28 +760,30 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param)
ksft_print_msg("Benchmark PID: %d\n", bm_pid);
- ret = signal_handler_register();
- if (ret)
- goto out;
-
- value.sival_ptr = benchmark_cmd;
+ /*
+ * The cast removes constness but nothing mutates benchmark_cmd within
+ * the context of this process. At the receiving process, it becomes
+ * argv, which is mutable, on exec() but that's after fork() so it
+ * doesn't matter for the process running the tests.
+ */
+ value.sival_ptr = (void *)benchmark_cmd;
/* Taskset benchmark to specified cpu */
ret = taskset_benchmark(bm_pid, param->cpu_no);
if (ret)
- goto unregister;
+ goto out;
/* Write benchmark to specified control&monitoring grp in resctrl FS */
ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp,
resctrl_val);
if (ret)
- goto unregister;
+ goto out;
if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) ||
!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) {
ret = initialize_mem_bw_imc();
if (ret)
- goto unregister;
+ goto out;
initialize_mem_bw_resctrl(param->ctrlgrp, param->mongrp,
param->cpu_no, resctrl_val);
@@ -742,7 +798,7 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param)
sizeof(pipe_message)) {
perror("# failed reading message from child process");
close(pipefd[0]);
- goto unregister;
+ goto out;
}
}
close(pipefd[0]);
@@ -751,7 +807,7 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param)
if (sigqueue(bm_pid, SIGUSR1, value) == -1) {
perror("# sigqueue SIGUSR1 to child");
ret = errno;
- goto unregister;
+ goto out;
}
/* Give benchmark enough time to fully run */
@@ -780,8 +836,6 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param)
}
}
-unregister:
- signal_handler_unregister();
out:
kill(bm_pid, SIGKILL);
diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c
index bd36ee206602..5ebd43683876 100644
--- a/tools/testing/selftests/resctrl/resctrlfs.c
+++ b/tools/testing/selftests/resctrl/resctrlfs.c
@@ -8,6 +8,9 @@
* Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
* Fenghua Yu <fenghua.yu@intel.com>
*/
+#include <fcntl.h>
+#include <limits.h>
+
#include "resctrl.h"
static int find_resctrl_mount(char *buffer)
@@ -292,58 +295,6 @@ int taskset_benchmark(pid_t bm_pid, int cpu_no)
}
/*
- * run_benchmark - Run a specified benchmark or fill_buf (default benchmark)
- * in specified signal. Direct benchmark stdio to /dev/null.
- * @signum: signal number
- * @info: signal info
- * @ucontext: user context in signal handling
- *
- * Return: void
- */
-void run_benchmark(int signum, siginfo_t *info, void *ucontext)
-{
- int operation, ret, memflush;
- char **benchmark_cmd;
- size_t span;
- bool once;
- FILE *fp;
-
- benchmark_cmd = info->si_ptr;
-
- /*
- * Direct stdio of child to /dev/null, so that only parent writes to
- * stdio (console)
- */
- fp = freopen("/dev/null", "w", stdout);
- if (!fp)
- PARENT_EXIT("Unable to direct benchmark status to /dev/null");
-
- if (strcmp(benchmark_cmd[0], "fill_buf") == 0) {
- /* Execute default fill_buf benchmark */
- span = strtoul(benchmark_cmd[1], NULL, 10);
- memflush = atoi(benchmark_cmd[2]);
- operation = atoi(benchmark_cmd[3]);
- if (!strcmp(benchmark_cmd[4], "true"))
- once = true;
- else if (!strcmp(benchmark_cmd[4], "false"))
- once = false;
- else
- PARENT_EXIT("Invalid once parameter");
-
- if (run_fill_buf(span, memflush, operation, once))
- fprintf(stderr, "Error in running fill buffer\n");
- } else {
- /* Execute specified benchmark */
- ret = execvp(benchmark_cmd[0], benchmark_cmd);
- if (ret)
- perror("wrong\n");
- }
-
- fclose(stdout);
- PARENT_EXIT("Unable to run specified benchmark");
-}
-
-/*
* create_grp - Create a group only if one doesn't exist
* @grp_name: Name of the group
* @grp: Full path and name of the group
@@ -488,9 +439,8 @@ out:
*/
int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, char *resctrl_val)
{
- char controlgroup[1024], schema[1024], reason[64];
- int resource_id, ret = 0;
- FILE *fp;
+ char controlgroup[1024], reason[128], schema[1024] = {};
+ int resource_id, fd, schema_len = -1, ret = 0;
if (strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR)) &&
strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) &&
@@ -518,28 +468,39 @@ int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, char *resctrl_val)
if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR)) ||
!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)))
- sprintf(schema, "%s%d%c%s", "L3:", resource_id, '=', schemata);
+ schema_len = snprintf(schema, sizeof(schema), "%s%d%c%s\n",
+ "L3:", resource_id, '=', schemata);
if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR)) ||
!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)))
- sprintf(schema, "%s%d%c%s", "MB:", resource_id, '=', schemata);
-
- fp = fopen(controlgroup, "w");
- if (!fp) {
- sprintf(reason, "Failed to open control group");
+ schema_len = snprintf(schema, sizeof(schema), "%s%d%c%s\n",
+ "MB:", resource_id, '=', schemata);
+ if (schema_len < 0 || schema_len >= sizeof(schema)) {
+ snprintf(reason, sizeof(reason),
+ "snprintf() failed with return value : %d", schema_len);
ret = -1;
-
goto out;
}
- if (fprintf(fp, "%s\n", schema) < 0) {
- sprintf(reason, "Failed to write schemata in control group");
- fclose(fp);
+ fd = open(controlgroup, O_WRONLY);
+ if (fd < 0) {
+ snprintf(reason, sizeof(reason),
+ "open() failed : %s", strerror(errno));
ret = -1;
- goto out;
+ goto err_schema_not_empty;
}
- fclose(fp);
+ if (write(fd, schema, schema_len) < 0) {
+ snprintf(reason, sizeof(reason),
+ "write() failed : %s", strerror(errno));
+ close(fd);
+ ret = -1;
+
+ goto err_schema_not_empty;
+ }
+ close(fd);
+err_schema_not_empty:
+ schema[schema_len - 1] = 0;
out:
ksft_print_msg("Write schema \"%s\" to resctrl FS%s%s\n",
schema, ret ? " # " : "",
@@ -604,63 +565,46 @@ char *fgrep(FILE *inf, const char *str)
/*
* validate_resctrl_feature_request - Check if requested feature is valid.
- * @resctrl_val: Requested feature
+ * @resource: Required resource (e.g., MB, L3, L2, L3_MON, etc.)
+ * @feature: Required monitor feature (in mon_features file). Can only be
+ * set for L3_MON. Must be NULL for all other resources.
*
- * Return: True if the feature is supported, else false. False is also
- * returned if resctrl FS is not mounted.
+ * Return: True if the resource/feature is supported, else false. False is
+ * also returned if resctrl FS is not mounted.
*/
-bool validate_resctrl_feature_request(const char *resctrl_val)
+bool validate_resctrl_feature_request(const char *resource, const char *feature)
{
+ char res_path[PATH_MAX];
struct stat statbuf;
- bool found = false;
char *res;
FILE *inf;
int ret;
- if (!resctrl_val)
+ if (!resource)
return false;
ret = find_resctrl_mount(NULL);
if (ret)
return false;
- if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR))) {
- if (!stat(L3_PATH, &statbuf))
- return true;
- } else if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) {
- if (!stat(MB_PATH, &statbuf))
- return true;
- } else if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) ||
- !strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) {
- if (!stat(L3_MON_PATH, &statbuf)) {
- inf = fopen(L3_MON_FEATURES_PATH, "r");
- if (!inf)
- return false;
-
- if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) {
- res = fgrep(inf, "llc_occupancy");
- if (res) {
- found = true;
- free(res);
- }
- }
-
- if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) {
- res = fgrep(inf, "mbm_total_bytes");
- if (res) {
- free(res);
- res = fgrep(inf, "mbm_local_bytes");
- if (res) {
- found = true;
- free(res);
- }
- }
- }
- fclose(inf);
- }
- }
+ snprintf(res_path, sizeof(res_path), "%s/%s", INFO_PATH, resource);
+
+ if (stat(res_path, &statbuf))
+ return false;
+
+ if (!feature)
+ return true;
+
+ snprintf(res_path, sizeof(res_path), "%s/%s/mon_features", INFO_PATH, resource);
+ inf = fopen(res_path, "r");
+ if (!inf)
+ return false;
+
+ res = fgrep(inf, feature);
+ free(res);
+ fclose(inf);
- return found;
+ return !!res;
}
int filter_dmesg(void)
diff --git a/tools/testing/selftests/riscv/hwprobe/Makefile b/tools/testing/selftests/riscv/hwprobe/Makefile
index ebdbb3c22e54..f224b84591fb 100644
--- a/tools/testing/selftests/riscv/hwprobe/Makefile
+++ b/tools/testing/selftests/riscv/hwprobe/Makefile
@@ -2,9 +2,14 @@
# Copyright (C) 2021 ARM Limited
# Originally tools/testing/arm64/abi/Makefile
-TEST_GEN_PROGS := hwprobe
+CFLAGS += -I$(top_srcdir)/tools/include
+
+TEST_GEN_PROGS := hwprobe cbo
include ../../lib.mk
$(OUTPUT)/hwprobe: hwprobe.c sys_hwprobe.S
- $(CC) -o$@ $(CFLAGS) $(LDFLAGS) $^
+ $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^
+
+$(OUTPUT)/cbo: cbo.c sys_hwprobe.S
+ $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^
diff --git a/tools/testing/selftests/riscv/hwprobe/cbo.c b/tools/testing/selftests/riscv/hwprobe/cbo.c
new file mode 100644
index 000000000000..50a2cc8aef38
--- /dev/null
+++ b/tools/testing/selftests/riscv/hwprobe/cbo.c
@@ -0,0 +1,228 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2023 Ventana Micro Systems Inc.
+ *
+ * Run with 'taskset -c <cpu-list> cbo' to only execute hwprobe on a
+ * subset of cpus, as well as only executing the tests on those cpus.
+ */
+#define _GNU_SOURCE
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
+#include <sched.h>
+#include <signal.h>
+#include <assert.h>
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <asm/ucontext.h>
+
+#include "hwprobe.h"
+#include "../../kselftest.h"
+
+#define MK_CBO(fn) cpu_to_le32((fn) << 20 | 10 << 15 | 2 << 12 | 0 << 7 | 15)
+
+static char mem[4096] __aligned(4096) = { [0 ... 4095] = 0xa5 };
+
+static bool illegal_insn;
+
+static void sigill_handler(int sig, siginfo_t *info, void *context)
+{
+ unsigned long *regs = (unsigned long *)&((ucontext_t *)context)->uc_mcontext;
+ uint32_t insn = *(uint32_t *)regs[0];
+
+ assert(insn == MK_CBO(regs[11]));
+
+ illegal_insn = true;
+ regs[0] += 4;
+}
+
+static void cbo_insn(char *base, int fn)
+{
+ uint32_t insn = MK_CBO(fn);
+
+ asm volatile(
+ "mv a0, %0\n"
+ "li a1, %1\n"
+ ".4byte %2\n"
+ : : "r" (base), "i" (fn), "i" (insn) : "a0", "a1", "memory");
+}
+
+static void cbo_inval(char *base) { cbo_insn(base, 0); }
+static void cbo_clean(char *base) { cbo_insn(base, 1); }
+static void cbo_flush(char *base) { cbo_insn(base, 2); }
+static void cbo_zero(char *base) { cbo_insn(base, 4); }
+
+static void test_no_zicbom(void *arg)
+{
+ ksft_print_msg("Testing Zicbom instructions remain privileged\n");
+
+ illegal_insn = false;
+ cbo_clean(&mem[0]);
+ ksft_test_result(illegal_insn, "No cbo.clean\n");
+
+ illegal_insn = false;
+ cbo_flush(&mem[0]);
+ ksft_test_result(illegal_insn, "No cbo.flush\n");
+
+ illegal_insn = false;
+ cbo_inval(&mem[0]);
+ ksft_test_result(illegal_insn, "No cbo.inval\n");
+}
+
+static void test_no_zicboz(void *arg)
+{
+ ksft_print_msg("No Zicboz, testing cbo.zero remains privileged\n");
+
+ illegal_insn = false;
+ cbo_zero(&mem[0]);
+ ksft_test_result(illegal_insn, "No cbo.zero\n");
+}
+
+static bool is_power_of_2(__u64 n)
+{
+ return n != 0 && (n & (n - 1)) == 0;
+}
+
+static void test_zicboz(void *arg)
+{
+ struct riscv_hwprobe pair = {
+ .key = RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE,
+ };
+ cpu_set_t *cpus = (cpu_set_t *)arg;
+ __u64 block_size;
+ int i, j;
+ long rc;
+
+ rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)cpus, 0);
+ block_size = pair.value;
+ ksft_test_result(rc == 0 && pair.key == RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE &&
+ is_power_of_2(block_size), "Zicboz block size\n");
+ ksft_print_msg("Zicboz block size: %ld\n", block_size);
+
+ illegal_insn = false;
+ cbo_zero(&mem[block_size]);
+ ksft_test_result(!illegal_insn, "cbo.zero\n");
+
+ if (illegal_insn || !is_power_of_2(block_size)) {
+ ksft_test_result_skip("cbo.zero check\n");
+ return;
+ }
+
+ assert(block_size <= 1024);
+
+ for (i = 0; i < 4096 / block_size; ++i) {
+ if (i % 2)
+ cbo_zero(&mem[i * block_size]);
+ }
+
+ for (i = 0; i < 4096 / block_size; ++i) {
+ char expected = i % 2 ? 0x0 : 0xa5;
+
+ for (j = 0; j < block_size; ++j) {
+ if (mem[i * block_size + j] != expected) {
+ ksft_test_result_fail("cbo.zero check\n");
+ ksft_print_msg("cbo.zero check: mem[%d] != 0x%x\n",
+ i * block_size + j, expected);
+ return;
+ }
+ }
+ }
+
+ ksft_test_result_pass("cbo.zero check\n");
+}
+
+static void check_no_zicboz_cpus(cpu_set_t *cpus)
+{
+ struct riscv_hwprobe pair = {
+ .key = RISCV_HWPROBE_KEY_IMA_EXT_0,
+ };
+ cpu_set_t one_cpu;
+ int i = 0, c = 0;
+ long rc;
+
+ while (i++ < CPU_COUNT(cpus)) {
+ while (!CPU_ISSET(c, cpus))
+ ++c;
+
+ CPU_ZERO(&one_cpu);
+ CPU_SET(c, &one_cpu);
+
+ rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)&one_cpu, 0);
+ assert(rc == 0 && pair.key == RISCV_HWPROBE_KEY_IMA_EXT_0);
+
+ if (pair.value & RISCV_HWPROBE_EXT_ZICBOZ)
+ ksft_exit_fail_msg("Zicboz is only present on a subset of harts.\n"
+ "Use taskset to select a set of harts where Zicboz\n"
+ "presence (present or not) is consistent for each hart\n");
+ ++c;
+ }
+}
+
+enum {
+ TEST_ZICBOZ,
+ TEST_NO_ZICBOZ,
+ TEST_NO_ZICBOM,
+};
+
+static struct test_info {
+ bool enabled;
+ unsigned int nr_tests;
+ void (*test_fn)(void *arg);
+} tests[] = {
+ [TEST_ZICBOZ] = { .nr_tests = 3, test_zicboz },
+ [TEST_NO_ZICBOZ] = { .nr_tests = 1, test_no_zicboz },
+ [TEST_NO_ZICBOM] = { .nr_tests = 3, test_no_zicbom },
+};
+
+int main(int argc, char **argv)
+{
+ struct sigaction act = {
+ .sa_sigaction = &sigill_handler,
+ .sa_flags = SA_SIGINFO,
+ };
+ struct riscv_hwprobe pair;
+ unsigned int plan = 0;
+ cpu_set_t cpus;
+ long rc;
+ int i;
+
+ if (argc > 1 && !strcmp(argv[1], "--sigill")) {
+ rc = sigaction(SIGILL, &act, NULL);
+ assert(rc == 0);
+ tests[TEST_NO_ZICBOZ].enabled = true;
+ tests[TEST_NO_ZICBOM].enabled = true;
+ }
+
+ rc = sched_getaffinity(0, sizeof(cpu_set_t), &cpus);
+ assert(rc == 0);
+
+ ksft_print_header();
+
+ pair.key = RISCV_HWPROBE_KEY_IMA_EXT_0;
+ rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)&cpus, 0);
+ if (rc < 0)
+ ksft_exit_fail_msg("hwprobe() failed with %d\n", rc);
+ assert(rc == 0 && pair.key == RISCV_HWPROBE_KEY_IMA_EXT_0);
+
+ if (pair.value & RISCV_HWPROBE_EXT_ZICBOZ) {
+ tests[TEST_ZICBOZ].enabled = true;
+ tests[TEST_NO_ZICBOZ].enabled = false;
+ } else {
+ check_no_zicboz_cpus(&cpus);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(tests); ++i)
+ plan += tests[i].enabled ? tests[i].nr_tests : 0;
+
+ if (plan == 0)
+ ksft_print_msg("No tests enabled.\n");
+ else
+ ksft_set_plan(plan);
+
+ for (i = 0; i < ARRAY_SIZE(tests); ++i) {
+ if (tests[i].enabled)
+ tests[i].test_fn(&cpus);
+ }
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/riscv/hwprobe/hwprobe.c b/tools/testing/selftests/riscv/hwprobe/hwprobe.c
index 09f290a67420..c474891df307 100644
--- a/tools/testing/selftests/riscv/hwprobe/hwprobe.c
+++ b/tools/testing/selftests/riscv/hwprobe/hwprobe.c
@@ -1,14 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
-#include <stddef.h>
-#include <asm/hwprobe.h>
-
-/*
- * Rather than relying on having a new enough libc to define this, just do it
- * ourselves. This way we don't need to be coupled to a new-enough libc to
- * contain the call.
- */
-long riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
- size_t cpu_count, unsigned long *cpus, unsigned int flags);
+#include "hwprobe.h"
+#include "../../kselftest.h"
int main(int argc, char **argv)
{
@@ -16,6 +8,9 @@ int main(int argc, char **argv)
unsigned long cpus;
long out;
+ ksft_print_header();
+ ksft_set_plan(5);
+
/* Fake the CPU_SET ops. */
cpus = -1;
@@ -25,13 +20,16 @@ int main(int argc, char **argv)
*/
for (long i = 0; i < 8; i++)
pairs[i].key = i;
+
out = riscv_hwprobe(pairs, 8, 1, &cpus, 0);
if (out != 0)
- return -1;
+ ksft_exit_fail_msg("hwprobe() failed with %ld\n", out);
+
for (long i = 0; i < 4; ++i) {
/* Fail if the kernel claims not to recognize a base key. */
if ((i < 4) && (pairs[i].key != i))
- return -2;
+ ksft_exit_fail_msg("Failed to recognize base key: key != i, "
+ "key=%ld, i=%ld\n", pairs[i].key, i);
if (pairs[i].key != RISCV_HWPROBE_KEY_BASE_BEHAVIOR)
continue;
@@ -39,52 +37,30 @@ int main(int argc, char **argv)
if (pairs[i].value & RISCV_HWPROBE_BASE_BEHAVIOR_IMA)
continue;
- return -3;
+ ksft_exit_fail_msg("Unexpected pair: (%ld, %ld)\n", pairs[i].key, pairs[i].value);
}
- /*
- * This should also work with a NULL CPU set, but should not work
- * with an improperly supplied CPU set.
- */
out = riscv_hwprobe(pairs, 8, 0, 0, 0);
- if (out != 0)
- return -4;
+ ksft_test_result(out == 0, "NULL CPU set\n");
out = riscv_hwprobe(pairs, 8, 0, &cpus, 0);
- if (out == 0)
- return -5;
+ ksft_test_result(out != 0, "Bad CPU set\n");
out = riscv_hwprobe(pairs, 8, 1, 0, 0);
- if (out == 0)
- return -6;
+ ksft_test_result(out != 0, "NULL CPU set with non-zero count\n");
- /*
- * Check that keys work by providing one that we know exists, and
- * checking to make sure the resultig pair is what we asked for.
- */
pairs[0].key = RISCV_HWPROBE_KEY_BASE_BEHAVIOR;
out = riscv_hwprobe(pairs, 1, 1, &cpus, 0);
- if (out != 0)
- return -7;
- if (pairs[0].key != RISCV_HWPROBE_KEY_BASE_BEHAVIOR)
- return -8;
+ ksft_test_result(out == 0 && pairs[0].key == RISCV_HWPROBE_KEY_BASE_BEHAVIOR,
+ "Existing key is maintained\n");
- /*
- * Check that an unknown key gets overwritten with -1,
- * but doesn't block elements after it.
- */
pairs[0].key = 0x5555;
pairs[1].key = 1;
pairs[1].value = 0xAAAA;
out = riscv_hwprobe(pairs, 2, 0, 0, 0);
- if (out != 0)
- return -9;
-
- if (pairs[0].key != -1)
- return -10;
-
- if ((pairs[1].key != 1) || (pairs[1].value == 0xAAAA))
- return -11;
+ ksft_test_result(out == 0 && pairs[0].key == -1 &&
+ pairs[1].key == 1 && pairs[1].value != 0xAAAA,
+ "Unknown key overwritten with -1 and doesn't block other elements\n");
- return 0;
+ ksft_finished();
}
diff --git a/tools/testing/selftests/riscv/hwprobe/hwprobe.h b/tools/testing/selftests/riscv/hwprobe/hwprobe.h
new file mode 100644
index 000000000000..721b0ce73a56
--- /dev/null
+++ b/tools/testing/selftests/riscv/hwprobe/hwprobe.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_RISCV_HWPROBE_H
+#define SELFTEST_RISCV_HWPROBE_H
+#include <stddef.h>
+#include <asm/hwprobe.h>
+
+/*
+ * Rather than relying on having a new enough libc to define this, just do it
+ * ourselves. This way we don't need to be coupled to a new-enough libc to
+ * contain the call.
+ */
+long riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
+ size_t cpu_count, unsigned long *cpus, unsigned int flags);
+
+#endif
diff --git a/tools/testing/selftests/riscv/mm/Makefile b/tools/testing/selftests/riscv/mm/Makefile
index 11e0f0568923..c333263f2b27 100644
--- a/tools/testing/selftests/riscv/mm/Makefile
+++ b/tools/testing/selftests/riscv/mm/Makefile
@@ -5,11 +5,11 @@
# Additional include paths needed by kselftest.h and local headers
CFLAGS += -D_GNU_SOURCE -std=gnu99 -I.
-TEST_GEN_FILES := testcases/mmap_default testcases/mmap_bottomup
+TEST_GEN_FILES := mmap_default mmap_bottomup
-TEST_PROGS := testcases/run_mmap.sh
+TEST_PROGS := run_mmap.sh
include ../../lib.mk
-$(OUTPUT)/mm: testcases/mmap_default.c testcases/mmap_bottomup.c testcases/mmap_tests.h
+$(OUTPUT)/mm: mmap_default.c mmap_bottomup.c mmap_tests.h
$(CC) -o$@ $(CFLAGS) $(LDFLAGS) $^
diff --git a/tools/testing/selftests/riscv/mm/testcases/mmap_bottomup.c b/tools/testing/selftests/riscv/mm/mmap_bottomup.c
index b29379f7e478..1757d19ca89b 100644
--- a/tools/testing/selftests/riscv/mm/testcases/mmap_bottomup.c
+++ b/tools/testing/selftests/riscv/mm/mmap_bottomup.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <sys/mman.h>
-#include <testcases/mmap_test.h>
+#include <mmap_test.h>
#include "../../kselftest_harness.h"
diff --git a/tools/testing/selftests/riscv/mm/testcases/mmap_default.c b/tools/testing/selftests/riscv/mm/mmap_default.c
index d1accb91b726..c63c60b9397e 100644
--- a/tools/testing/selftests/riscv/mm/testcases/mmap_default.c
+++ b/tools/testing/selftests/riscv/mm/mmap_default.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <sys/mman.h>
-#include <testcases/mmap_test.h>
+#include <mmap_test.h>
#include "../../kselftest_harness.h"
diff --git a/tools/testing/selftests/riscv/mm/testcases/mmap_test.h b/tools/testing/selftests/riscv/mm/mmap_test.h
index 9b8434f62f57..9b8434f62f57 100644
--- a/tools/testing/selftests/riscv/mm/testcases/mmap_test.h
+++ b/tools/testing/selftests/riscv/mm/mmap_test.h
diff --git a/tools/testing/selftests/riscv/mm/testcases/run_mmap.sh b/tools/testing/selftests/riscv/mm/run_mmap.sh
index ca5ad7c48bad..ca5ad7c48bad 100755
--- a/tools/testing/selftests/riscv/mm/testcases/run_mmap.sh
+++ b/tools/testing/selftests/riscv/mm/run_mmap.sh
diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c
index bf951a490bb4..20403d58345c 100644
--- a/tools/testing/selftests/rseq/param_test.c
+++ b/tools/testing/selftests/rseq/param_test.c
@@ -1231,7 +1231,7 @@ void *test_membarrier_worker_thread(void *arg)
}
/* Wait for initialization. */
- while (!atomic_load(&args->percpu_list_ptr)) {}
+ while (!__atomic_load_n(&args->percpu_list_ptr, __ATOMIC_ACQUIRE)) {}
for (i = 0; i < iters; ++i) {
int ret;
@@ -1299,22 +1299,22 @@ void *test_membarrier_manager_thread(void *arg)
test_membarrier_init_percpu_list(&list_a);
test_membarrier_init_percpu_list(&list_b);
- atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a);
+ __atomic_store_n(&args->percpu_list_ptr, (intptr_t)&list_a, __ATOMIC_RELEASE);
- while (!atomic_load(&args->stop)) {
+ while (!__atomic_load_n(&args->stop, __ATOMIC_ACQUIRE)) {
/* list_a is "active". */
cpu_a = rand() % CPU_SETSIZE;
/*
* As list_b is "inactive", we should never see changes
* to list_b.
*/
- if (expect_b != atomic_load(&list_b.c[cpu_b].head->data)) {
+ if (expect_b != __atomic_load_n(&list_b.c[cpu_b].head->data, __ATOMIC_ACQUIRE)) {
fprintf(stderr, "Membarrier test failed\n");
abort();
}
/* Make list_b "active". */
- atomic_store(&args->percpu_list_ptr, (intptr_t)&list_b);
+ __atomic_store_n(&args->percpu_list_ptr, (intptr_t)&list_b, __ATOMIC_RELEASE);
if (rseq_membarrier_expedited(cpu_a) &&
errno != ENXIO /* missing CPU */) {
perror("sys_membarrier");
@@ -1324,27 +1324,27 @@ void *test_membarrier_manager_thread(void *arg)
* Cpu A should now only modify list_b, so the values
* in list_a should be stable.
*/
- expect_a = atomic_load(&list_a.c[cpu_a].head->data);
+ expect_a = __atomic_load_n(&list_a.c[cpu_a].head->data, __ATOMIC_ACQUIRE);
cpu_b = rand() % CPU_SETSIZE;
/*
* As list_a is "inactive", we should never see changes
* to list_a.
*/
- if (expect_a != atomic_load(&list_a.c[cpu_a].head->data)) {
+ if (expect_a != __atomic_load_n(&list_a.c[cpu_a].head->data, __ATOMIC_ACQUIRE)) {
fprintf(stderr, "Membarrier test failed\n");
abort();
}
/* Make list_a "active". */
- atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a);
+ __atomic_store_n(&args->percpu_list_ptr, (intptr_t)&list_a, __ATOMIC_RELEASE);
if (rseq_membarrier_expedited(cpu_b) &&
errno != ENXIO /* missing CPU*/) {
perror("sys_membarrier");
abort();
}
/* Remember a value from list_b. */
- expect_b = atomic_load(&list_b.c[cpu_b].head->data);
+ expect_b = __atomic_load_n(&list_b.c[cpu_b].head->data, __ATOMIC_ACQUIRE);
}
test_membarrier_free_percpu_list(&list_a);
@@ -1401,7 +1401,7 @@ void test_membarrier(void)
}
}
- atomic_store(&thread_args.stop, 1);
+ __atomic_store_n(&thread_args.stop, 1, __ATOMIC_RELEASE);
ret = pthread_join(manager_thread, NULL);
if (ret) {
errno = ret;
diff --git a/tools/testing/selftests/sigaltstack/sas.c b/tools/testing/selftests/sigaltstack/sas.c
index 98d37cb744fb..07227fab1cc9 100644
--- a/tools/testing/selftests/sigaltstack/sas.c
+++ b/tools/testing/selftests/sigaltstack/sas.c
@@ -111,7 +111,7 @@ int main(void)
/* Make sure more than the required minimum. */
stack_size = getauxval(AT_MINSIGSTKSZ) + SIGSTKSZ;
- ksft_print_msg("[NOTE]\tthe stack size is %lu\n", stack_size);
+ ksft_print_msg("[NOTE]\tthe stack size is %u\n", stack_size);
ksft_print_header();
ksft_set_plan(3);
diff --git a/tools/testing/selftests/static_keys/test_static_keys.sh b/tools/testing/selftests/static_keys/test_static_keys.sh
index fc9f8cde7d42..3b0f17b81ac2 100755
--- a/tools/testing/selftests/static_keys/test_static_keys.sh
+++ b/tools/testing/selftests/static_keys/test_static_keys.sh
@@ -6,18 +6,18 @@
ksft_skip=4
if ! /sbin/modprobe -q -n test_static_key_base; then
- echo "static_key: module test_static_key_base is not found [SKIP]"
+ echo "static_keys: module test_static_key_base is not found [SKIP]"
exit $ksft_skip
fi
if ! /sbin/modprobe -q -n test_static_keys; then
- echo "static_key: module test_static_keys is not found [SKIP]"
+ echo "static_keys: module test_static_keys is not found [SKIP]"
exit $ksft_skip
fi
if /sbin/modprobe -q test_static_key_base; then
if /sbin/modprobe -q test_static_keys; then
- echo "static_key: ok"
+ echo "static_keys: ok"
/sbin/modprobe -q -r test_static_keys
/sbin/modprobe -q -r test_static_key_base
else
@@ -25,6 +25,6 @@ if /sbin/modprobe -q test_static_key_base; then
/sbin/modprobe -q -r test_static_key_base
fi
else
- echo "static_key: [FAIL]"
+ echo "static_keys: [FAIL]"
exit 1
fi
diff --git a/tools/testing/selftests/tc-testing/Makefile b/tools/testing/selftests/tc-testing/Makefile
index 3c4b7fa05075..e8b3dde4fa16 100644
--- a/tools/testing/selftests/tc-testing/Makefile
+++ b/tools/testing/selftests/tc-testing/Makefile
@@ -1,31 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-include ../../../scripts/Makefile.include
-top_srcdir = $(abspath ../../../..)
-APIDIR := $(top_scrdir)/include/uapi
-TEST_GEN_FILES = action.o
+TEST_PROGS += ./tdc.sh
+TEST_FILES := action-ebpf tdc*.py Tdc*.py plugins plugin-lib tc-tests scripts
include ../lib.mk
-
-PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1)
-
-ifeq ($(PROBE),)
- CPU ?= probe
-else
- CPU ?= generic
-endif
-
-CLANG_SYS_INCLUDES := $(shell $(CLANG) -v -E - </dev/null 2>&1 \
- | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }')
-
-CLANG_FLAGS = -I. -I$(APIDIR) \
- $(CLANG_SYS_INCLUDES) \
- -Wno-compare-distinct-pointer-types
-
-$(OUTPUT)/%.o: %.c
- $(CLANG) $(CLANG_FLAGS) \
- -O2 --target=bpf -emit-llvm -c $< -o - | \
- $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@
-
-TEST_PROGS += ./tdc.sh
-TEST_FILES := tdc*.py Tdc*.py plugins plugin-lib tc-tests
diff --git a/tools/testing/selftests/tc-testing/README b/tools/testing/selftests/tc-testing/README
index be7b00799b3e..fc8e858ff119 100644
--- a/tools/testing/selftests/tc-testing/README
+++ b/tools/testing/selftests/tc-testing/README
@@ -195,8 +195,6 @@ directory:
and the other is a test whether the command leaked memory or not.
(This one is a preliminary version, it may not work quite right yet,
but the overall template is there and it should only need tweaks.)
- - buildebpfPlugin.py:
- builds all programs in $EBPFDIR.
ACKNOWLEDGEMENTS
diff --git a/tools/testing/selftests/tc-testing/action-ebpf b/tools/testing/selftests/tc-testing/action-ebpf
new file mode 100644
index 000000000000..4879479b2ee5
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/action-ebpf
Binary files differ
diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index 5aa8705751f0..012aa33b341b 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -1,12 +1,21 @@
#
+# Network
+#
+
+CONFIG_DUMMY=y
+CONFIG_VETH=y
+
+#
# Core Netfilter Configuration
#
+CONFIG_NETFILTER_ADVANCED=y
CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_MARK=y
CONFIG_NF_CONNTRACK_ZONES=y
CONFIG_NF_CONNTRACK_LABELS=y
CONFIG_NF_CONNTRACK_PROCFS=y
CONFIG_NF_FLOW_TABLE=m
+CONFIG_NF_TABLES=m
CONFIG_NF_NAT=m
CONFIG_NETFILTER_XT_TARGET_LOG=m
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py
deleted file mode 100644
index d34fe06268d2..000000000000
--- a/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py
+++ /dev/null
@@ -1,67 +0,0 @@
-'''
-build ebpf program
-'''
-
-import os
-import signal
-from string import Template
-import subprocess
-import time
-from TdcPlugin import TdcPlugin
-from tdc_config import *
-
-class SubPlugin(TdcPlugin):
- def __init__(self):
- self.sub_class = 'buildebpf/SubPlugin'
- self.tap = ''
- super().__init__()
-
- def pre_suite(self, testcount, testidlist):
- super().pre_suite(testcount, testidlist)
-
- if self.args.buildebpf:
- self._ebpf_makeall()
-
- def post_suite(self, index):
- super().post_suite(index)
-
- self._ebpf_makeclean()
-
- def add_args(self, parser):
- super().add_args(parser)
-
- self.argparser_group = self.argparser.add_argument_group(
- 'buildebpf',
- 'options for buildebpfPlugin')
- self.argparser_group.add_argument(
- '--nobuildebpf', action='store_false', default=True,
- dest='buildebpf',
- help='Don\'t build eBPF programs')
-
- return self.argparser
-
- def _ebpf_makeall(self):
- if self.args.buildebpf:
- self._make('all')
-
- def _ebpf_makeclean(self):
- if self.args.buildebpf:
- self._make('clean')
-
- def _make(self, target):
- command = 'make -C {} {}'.format(self.args.NAMES['EBPFDIR'], target)
- proc = subprocess.Popen(command,
- shell=True,
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE,
- env=os.environ.copy())
- (rawout, serr) = proc.communicate()
-
- if proc.returncode != 0 and len(serr) > 0:
- foutput = serr.decode("utf-8")
- else:
- foutput = rawout.decode("utf-8")
-
- proc.stdout.close()
- proc.stderr.close()
- return proc, foutput
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
index b62429b0fcdb..bb19b8b76d3b 100644
--- a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
+++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
@@ -9,43 +9,13 @@ from TdcPlugin import TdcPlugin
from tdc_config import *
-def prepare_suite(obj, test):
- original = obj.args.NAMES
-
- if 'skip' in test and test['skip'] == 'yes':
- return
-
- if 'nsPlugin' not in test['plugins']:
- return
-
- shadow = {}
- shadow['IP'] = original['IP']
- shadow['TC'] = original['TC']
- shadow['NS'] = '{}-{}'.format(original['NS'], test['random'])
- shadow['DEV0'] = '{}id{}'.format(original['DEV0'], test['id'])
- shadow['DEV1'] = '{}id{}'.format(original['DEV1'], test['id'])
- shadow['DUMMY'] = '{}id{}'.format(original['DUMMY'], test['id'])
- shadow['DEV2'] = original['DEV2']
- obj.args.NAMES = shadow
-
- if obj.args.namespace:
- obj._ns_create()
- else:
- obj._ports_create()
-
- # Make sure the netns is visible in the fs
- while True:
- obj._proc_check()
- try:
- ns = obj.args.NAMES['NS']
- f = open('/run/netns/{}'.format(ns))
- f.close()
- break
- except:
- time.sleep(0.1)
- continue
-
- obj.args.NAMES = original
+try:
+ from pyroute2 import netns
+ from pyroute2 import IPRoute
+ netlink = True
+except ImportError:
+ netlink = False
+ print("!!! Consider installing pyroute2 !!!")
class SubPlugin(TdcPlugin):
def __init__(self):
@@ -53,64 +23,71 @@ class SubPlugin(TdcPlugin):
super().__init__()
def pre_suite(self, testcount, testlist):
- from itertools import cycle
-
super().pre_suite(testcount, testlist)
- print("Setting up namespaces and devices...")
+ def prepare_test(self, test):
+ if 'skip' in test and test['skip'] == 'yes':
+ return
- with Pool(self.args.mp) as p:
- it = zip(cycle([self]), testlist)
- p.starmap(prepare_suite, it)
+ if 'nsPlugin' not in test['plugins']:
+ return
- def pre_case(self, caseinfo, test_skip):
+ if netlink == True:
+ self._nl_ns_create()
+ else:
+ self._ipr2_ns_create()
+
+ # Make sure the netns is visible in the fs
+ ticks = 20
+ while True:
+ if ticks == 0:
+ raise TimeoutError
+ self._proc_check()
+ try:
+ ns = self.args.NAMES['NS']
+ f = open('/run/netns/{}'.format(ns))
+ f.close()
+ break
+ except:
+ time.sleep(0.1)
+ ticks -= 1
+ continue
+
+ def pre_case(self, test, test_skip):
if self.args.verbose:
print('{}.pre_case'.format(self.sub_class))
if test_skip:
return
+ self.prepare_test(test)
def post_case(self):
if self.args.verbose:
print('{}.post_case'.format(self.sub_class))
- if self.args.namespace:
- self._ns_destroy()
+ if netlink == True:
+ self._nl_ns_destroy()
else:
- self._ports_destroy()
+ self._ipr2_ns_destroy()
def post_suite(self, index):
if self.args.verbose:
print('{}.post_suite'.format(self.sub_class))
# Make sure we don't leak resources
- for f in os.listdir('/run/netns/'):
- cmd = self._replace_keywords("$IP netns del {}".format(f))
+ cmd = self._replace_keywords("$IP -a netns del")
- if self.args.verbose > 3:
- print('_exec_cmd: command "{}"'.format(cmd))
-
- subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+ if self.args.verbose > 3:
+ print('_exec_cmd: command "{}"'.format(cmd))
- def add_args(self, parser):
- super().add_args(parser)
- self.argparser_group = self.argparser.add_argument_group(
- 'netns',
- 'options for nsPlugin(run commands in net namespace)')
- self.argparser_group.add_argument(
- '-N', '--no-namespace', action='store_false', default=True,
- dest='namespace', help='Don\'t run commands in namespace')
- return self.argparser
+ subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
def adjust_command(self, stage, command):
super().adjust_command(stage, command)
cmdform = 'list'
cmdlist = list()
- if not self.args.namespace:
- return command
-
if self.args.verbose:
print('{}.adjust_command'.format(self.sub_class))
@@ -138,63 +115,90 @@ class SubPlugin(TdcPlugin):
print('adjust_command: return command [{}]'.format(command))
return command
- def _ports_create_cmds(self):
- cmds = []
-
- cmds.append(self._replace_keywords('link add $DEV0 type veth peer name $DEV1'))
- cmds.append(self._replace_keywords('link set $DEV0 up'))
- cmds.append(self._replace_keywords('link add $DUMMY type dummy'))
- if not self.args.namespace:
- cmds.append(self._replace_keywords('link set $DEV1 up'))
-
- return cmds
-
- def _ports_create(self):
- self._exec_cmd_batched('pre', self._ports_create_cmds())
+ def _nl_ns_create(self):
+ ns = self.args.NAMES["NS"];
+ dev0 = self.args.NAMES["DEV0"];
+ dev1 = self.args.NAMES["DEV1"];
+ dummy = self.args.NAMES["DUMMY"];
- def _ports_destroy_cmd(self):
- return self._replace_keywords('link del $DEV0')
-
- def _ports_destroy(self):
- self._exec_cmd('post', self._ports_destroy_cmd())
-
- def _ns_create_cmds(self):
+ if self.args.verbose:
+ print('{}._nl_ns_create'.format(self.sub_class))
+
+ netns.create(ns)
+ netns.pushns(newns=ns)
+ with IPRoute() as ip:
+ ip.link('add', ifname=dev1, kind='veth', peer={'ifname': dev0, 'net_ns_fd':'/proc/1/ns/net'})
+ ip.link('add', ifname=dummy, kind='dummy')
+ ticks = 20
+ while True:
+ if ticks == 0:
+ raise TimeoutError
+ try:
+ dev1_idx = ip.link_lookup(ifname=dev1)[0]
+ dummy_idx = ip.link_lookup(ifname=dummy)[0]
+ ip.link('set', index=dev1_idx, state='up')
+ ip.link('set', index=dummy_idx, state='up')
+ break
+ except:
+ time.sleep(0.1)
+ ticks -= 1
+ continue
+ netns.popns()
+
+ with IPRoute() as ip:
+ ticks = 20
+ while True:
+ if ticks == 0:
+ raise TimeoutError
+ try:
+ dev0_idx = ip.link_lookup(ifname=dev0)[0]
+ ip.link('set', index=dev0_idx, state='up')
+ break
+ except:
+ time.sleep(0.1)
+ ticks -= 1
+ continue
+
+ def _ipr2_ns_create_cmds(self):
cmds = []
- if self.args.namespace:
- ns = self.args.NAMES['NS']
+ ns = self.args.NAMES['NS']
- cmds.append(self._replace_keywords('netns add {}'.format(ns)))
- cmds.append(self._replace_keywords('link set $DEV1 netns {}'.format(ns)))
- cmds.append(self._replace_keywords('link set $DUMMY netns {}'.format(ns)))
- cmds.append(self._replace_keywords('netns exec {} $IP link set $DEV1 up'.format(ns)))
- cmds.append(self._replace_keywords('netns exec {} $IP link set $DUMMY up'.format(ns)))
+ cmds.append(self._replace_keywords('netns add {}'.format(ns)))
+ cmds.append(self._replace_keywords('link add $DEV1 type veth peer name $DEV0'))
+ cmds.append(self._replace_keywords('link set $DEV1 netns {}'.format(ns)))
+ cmds.append(self._replace_keywords('link add $DUMMY type dummy'.format(ns)))
+ cmds.append(self._replace_keywords('link set $DUMMY netns {}'.format(ns)))
+ cmds.append(self._replace_keywords('netns exec {} $IP link set $DEV1 up'.format(ns)))
+ cmds.append(self._replace_keywords('netns exec {} $IP link set $DUMMY up'.format(ns)))
+ cmds.append(self._replace_keywords('link set $DEV0 up'.format(ns)))
- if self.args.device:
- cmds.append(self._replace_keywords('link set $DEV2 netns {}'.format(ns)))
- cmds.append(self._replace_keywords('netns exec {} $IP link set $DEV2 up'.format(ns)))
+ if self.args.device:
+ cmds.append(self._replace_keywords('link set $DEV2 netns {}'.format(ns)))
+ cmds.append(self._replace_keywords('netns exec {} $IP link set $DEV2 up'.format(ns)))
return cmds
- def _ns_create(self):
+ def _ipr2_ns_create(self):
'''
Create the network namespace in which the tests will be run and set up
the required network devices for it.
'''
- self._ports_create()
- self._exec_cmd_batched('pre', self._ns_create_cmds())
+ self._exec_cmd_batched('pre', self._ipr2_ns_create_cmds())
+
+ def _nl_ns_destroy(self):
+ ns = self.args.NAMES['NS']
+ netns.remove(ns)
- def _ns_destroy_cmd(self):
+ def _ipr2_ns_destroy_cmd(self):
return self._replace_keywords('netns delete {}'.format(self.args.NAMES['NS']))
- def _ns_destroy(self):
+ def _ipr2_ns_destroy(self):
'''
Destroy the network namespace for testing (and any associated network
devices as well)
'''
- if self.args.namespace:
- self._exec_cmd('post', self._ns_destroy_cmd())
- self._ports_destroy()
+ self._exec_cmd('post', self._ipr2_ns_destroy_cmd())
@cached_property
def _proc(self):
diff --git a/tools/testing/selftests/tc-testing/taprio_wait_for_admin.sh b/tools/testing/selftests/tc-testing/scripts/taprio_wait_for_admin.sh
index f5335e8ad6b4..f5335e8ad6b4 100755
--- a/tools/testing/selftests/tc-testing/taprio_wait_for_admin.sh
+++ b/tools/testing/selftests/tc-testing/scripts/taprio_wait_for_admin.sh
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
index 91832400ddbd..6e00bf32ef9a 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
@@ -54,9 +54,6 @@
"actions",
"bpf"
],
- "plugins": {
- "requires": "buildebpfPlugin"
- },
"setup": [
[
"$TC action flush action bpf",
@@ -65,10 +62,10 @@
255
]
],
- "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ok index 667",
+ "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action-ebpf section action-ok index 667",
"expExitCode": "0",
"verifyCmd": "$TC action get action bpf index 667",
- "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ok\\] id [0-9].* tag [0-9a-f]{16}( jited)? default-action pipe.*index 667 ref",
+ "matchPattern": "action order [0-9]*: bpf action-ebpf:\\[action-ok\\] id [0-9].* tag [0-9a-f]{16}( jited)? default-action pipe.*index 667 ref",
"matchCount": "1",
"teardown": [
"$TC action flush action bpf"
@@ -81,9 +78,6 @@
"actions",
"bpf"
],
- "plugins": {
- "requires": "buildebpfPlugin"
- },
"setup": [
[
"$TC action flush action bpf",
@@ -92,10 +86,10 @@
255
]
],
- "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ko index 667",
+ "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action-ebpf section action-ko index 667",
"expExitCode": "255",
"verifyCmd": "$TC action get action bpf index 667",
- "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ko\\] id [0-9].*index 667 ref",
+ "matchPattern": "action order [0-9]*: bpf action-ebpf:\\[action-ko\\] id [0-9].*index 667 ref",
"matchCount": "0",
"teardown": [
[
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/filters/bpf.json
index 013fb983bc3f..725d406a30ac 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/bpf.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/bpf.json
@@ -52,17 +52,16 @@
],
"plugins": {
"requires": [
- "buildebpfPlugin",
"nsPlugin"
]
},
"setup": [
"$TC qdisc add dev $DEV1 ingress"
],
- "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf object-file $EBPFDIR/action.o section action-ok",
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf object-file $EBPFDIR/action-ebpf section action-ok",
"expExitCode": "0",
"verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf",
- "matchPattern": "filter parent ffff: protocol ip pref 100 bpf chain [0-9]+ handle 0x1 action.o:\\[action-ok\\].*tag [0-9a-f]{16}( jited)?",
+ "matchPattern": "filter parent ffff: protocol ip pref 100 bpf chain [0-9]+ handle 0x1 action-ebpf:\\[action-ok\\].*tag [0-9a-f]{16}( jited)?",
"matchCount": "1",
"teardown": [
"$TC qdisc del dev $DEV1 ingress"
@@ -77,17 +76,16 @@
],
"plugins": {
"requires": [
- "buildebpfPlugin",
"nsPlugin"
]
},
"setup": [
"$TC qdisc add dev $DEV1 ingress"
],
- "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf object-file $EBPFDIR/action.o section action-ko",
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf object-file $EBPFDIR/action-ebpf section action-ko",
"expExitCode": "1",
"verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf",
- "matchPattern": "filter parent ffff: protocol ip pref 100 bpf chain [0-9]+ handle 0x1 action.o:\\[action-ko\\].*tag [0-9a-f]{16}( jited)?",
+ "matchPattern": "filter parent ffff: protocol ip pref 100 bpf chain [0-9]+ handle 0x1 action-ebpf:\\[action-ko\\].*tag [0-9a-f]{16}( jited)?",
"matchCount": "0",
"teardown": [
"$TC qdisc del dev $DEV1 ingress"
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json
index ddc7c355be0a..24bd0c2a3014 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json
@@ -272,5 +272,62 @@
"teardown": [
"$TC qdisc del dev $DEV1 parent root drr"
]
+ },
+ {
+ "id": "bd32",
+ "name": "Try to delete hashtable referenced by another u32 filter",
+ "category": [
+ "filter",
+ "u32"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 parent root handle 10: drr",
+ "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 1: u32 divisor 1",
+ "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 800: match ip src any link 1:"
+ ],
+ "cmdUnderTest": "$TC filter delete dev $DEV1 parent 10: prio 2 handle 1: u32",
+ "expExitCode": "2",
+ "verifyCmd": "$TC filter show dev $DEV1",
+ "matchPattern": "protocol ip pref 2 u32 chain 0 fh 1:",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 parent root drr"
+ ]
+ },
+ {
+ "id": "4585",
+ "name": "Delete small tree of u32 hashtables and filters",
+ "category": [
+ "filter",
+ "u32"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 parent root handle 10: drr",
+ "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 1: u32 divisor 1",
+ "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 2: u32 divisor 1",
+ "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 3: u32 divisor 2",
+ "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 4: u32 divisor 1",
+ "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 1: match ip src any action drop",
+ "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 2: match ip src any action drop",
+ "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 3: match ip src any link 2:",
+ "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 3: match ip src any link 1:",
+ "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 4: match ip src any action drop",
+ "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 800: match ip src any link 3:",
+ "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 800: match ip src any link 4:"
+ ],
+ "cmdUnderTest": "$TC filter delete dev $DEV1 parent 10:",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1",
+ "matchPattern": "protocol ip pref 2 u32",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 parent root drr"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hfsc.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hfsc.json
index 0ddb8e1b4369..c98c339424d4 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hfsc.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hfsc.json
@@ -9,8 +9,7 @@
"plugins": {
"requires": "nsPlugin"
},
- "setup": [
- ],
+ "setup": [],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root hfsc",
"expExitCode": "0",
"verifyCmd": "$TC qdisc show dev $DUMMY",
@@ -126,8 +125,7 @@
"verifyCmd": "$TC qdisc show dev $DUMMY",
"matchPattern": "qdisc hfsc 1: root refcnt [0-9]+",
"matchCount": "0",
- "teardown": [
- ]
+ "teardown": []
},
{
"id": "8436",
@@ -139,8 +137,7 @@
"plugins": {
"requires": "nsPlugin"
},
- "setup": [
- ],
+ "setup": [],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root hfsc",
"expExitCode": "0",
"verifyCmd": "$TC class show dev $DUMMY",
@@ -149,5 +146,28 @@
"teardown": [
"$TC qdisc del dev $DUMMY handle 1: root"
]
+ },
+ {
+ "id": "bef4",
+ "name": "HFSC rt inner class upgrade to sc",
+ "category": [
+ "qdisc",
+ "hfsc"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root hfsc default 1",
+ "$TC class add dev $DUMMY parent 1: classid 1:1 hfsc rt rate 8"
+ ],
+ "cmdUnderTest": "$TC class add dev $DUMMY parent 1:1 classid 1:2 hfsc rt rate 8",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class hfsc 1:1 parent 1: sc m1 0bit d 0us m2 8bit.*rt m1 0bit d 0us m2 8bit",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
index 0599635c4bc6..2d603ef2e375 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
@@ -170,11 +170,11 @@
"setup": [
"echo \"1 1 8\" > /sys/bus/netdevsim/new_device",
"$TC qdisc replace dev $ETH handle 8001: parent root stab overhead 24 taprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 base-time 0 sched-entry S ff 20000000 clockid CLOCK_TAI",
- "./taprio_wait_for_admin.sh $TC $ETH"
+ "./scripts/taprio_wait_for_admin.sh $TC $ETH"
],
"cmdUnderTest": "$TC qdisc replace dev $ETH parent 8001:7 taprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 base-time 200 sched-entry S ff 20000000 clockid CLOCK_TAI",
"expExitCode": "2",
- "verifyCmd": "bash -c \"./taprio_wait_for_admin.sh $TC $ETH && $TC -j qdisc show dev $ETH root | jq '.[].options.base_time'\"",
+ "verifyCmd": "bash -c \"./scripts/taprio_wait_for_admin.sh $TC $ETH && $TC -j qdisc show dev $ETH root | jq '.[].options.base_time'\"",
"matchPattern": "0",
"matchCount": "1",
"teardown": [
@@ -195,11 +195,11 @@
"setup": [
"echo \"1 1 8\" > /sys/bus/netdevsim/new_device",
"$TC qdisc replace dev $ETH handle 8001: parent root stab overhead 24 taprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 base-time 0 sched-entry S ff 20000000 flags 0x2",
- "./taprio_wait_for_admin.sh $TC $ETH"
+ "./scripts/taprio_wait_for_admin.sh $TC $ETH"
],
"cmdUnderTest": "$TC qdisc replace dev $ETH parent 8001:7 taprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 base-time 200 sched-entry S ff 20000000 flags 0x2",
"expExitCode": "2",
- "verifyCmd": "bash -c \"./taprio_wait_for_admin.sh $TC $ETH && $TC -j qdisc show dev $ETH root | jq '.[].options.base_time'\"",
+ "verifyCmd": "bash -c \"./scripts/taprio_wait_for_admin.sh $TC $ETH && $TC -j qdisc show dev $ETH root | jq '.[].options.base_time'\"",
"matchPattern": "0",
"matchCount": "1",
"teardown": [
diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
index a6718192aff3..caeacc691587 100755
--- a/tools/testing/selftests/tc-testing/tdc.py
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -497,11 +497,6 @@ def prepare_run(pm, args, testlist):
pm.call_post_suite(1)
return emergency_exit_message
- if args.verbose:
- print('give test rig 2 seconds to stabilize')
-
- time.sleep(2)
-
def purge_run(pm, index):
pm.call_post_suite(index)
@@ -616,7 +611,7 @@ def test_runner_mp(pm, args, alltests):
batches.insert(0, serial)
print("Executing {} tests in parallel and {} in serial".format(len(parallel), len(serial)))
- print("Using {} batches".format(len(batches)))
+ print("Using {} batches and {} workers".format(len(batches), args.mp))
# We can't pickle these objects so workaround them
global mp_pm
@@ -1017,12 +1012,17 @@ def main():
parser = pm.call_add_args(parser)
(args, remaining) = parser.parse_known_args()
args.NAMES = NAMES
+ args.mp = min(args.mp, 4)
pm.set_args(args)
check_default_settings(args, remaining, pm)
if args.verbose > 2:
print('args is {}'.format(args))
- set_operation_mode(pm, parser, args, remaining)
+ try:
+ set_operation_mode(pm, parser, args, remaining)
+ except KeyboardInterrupt:
+ # Cleanup on Ctrl-C
+ pm.call_post_suite(None)
if __name__ == "__main__":
main()
diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh
index eb357bd7923c..407fa53822a0 100755
--- a/tools/testing/selftests/tc-testing/tdc.sh
+++ b/tools/testing/selftests/tc-testing/tdc.sh
@@ -1,7 +1,68 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
-modprobe netdevsim
-modprobe sch_teql
-./tdc.py -c actions --nobuildebpf
-./tdc.py -c qdisc
+# If a module is required and was not compiled
+# the test that requires it will fail anyways
+try_modprobe() {
+ modprobe -q -R "$1"
+ if [ $? -ne 0 ]; then
+ echo "Module $1 not found... skipping."
+ else
+ modprobe "$1"
+ fi
+}
+
+try_modprobe netdevsim
+try_modprobe act_bpf
+try_modprobe act_connmark
+try_modprobe act_csum
+try_modprobe act_ct
+try_modprobe act_ctinfo
+try_modprobe act_gact
+try_modprobe act_gate
+try_modprobe act_ipt
+try_modprobe act_mirred
+try_modprobe act_mpls
+try_modprobe act_nat
+try_modprobe act_pedit
+try_modprobe act_police
+try_modprobe act_sample
+try_modprobe act_simple
+try_modprobe act_skbedit
+try_modprobe act_skbmod
+try_modprobe act_tunnel_key
+try_modprobe act_vlan
+try_modprobe cls_basic
+try_modprobe cls_bpf
+try_modprobe cls_cgroup
+try_modprobe cls_flow
+try_modprobe cls_flower
+try_modprobe cls_fw
+try_modprobe cls_matchall
+try_modprobe cls_route
+try_modprobe cls_u32
+try_modprobe em_canid
+try_modprobe em_cmp
+try_modprobe em_ipset
+try_modprobe em_ipt
+try_modprobe em_meta
+try_modprobe em_nbyte
+try_modprobe em_text
+try_modprobe em_u32
+try_modprobe sch_cake
+try_modprobe sch_cbs
+try_modprobe sch_choke
+try_modprobe sch_codel
+try_modprobe sch_drr
+try_modprobe sch_etf
+try_modprobe sch_ets
+try_modprobe sch_fq
+try_modprobe sch_fq_codel
+try_modprobe sch_fq_pie
+try_modprobe sch_gred
+try_modprobe sch_hfsc
+try_modprobe sch_hhf
+try_modprobe sch_htb
+try_modprobe sch_teql
+./tdc.py -J`nproc` -c actions
+./tdc.py -J`nproc` -c qdisc
diff --git a/tools/testing/selftests/tdx/.gitignore b/tools/testing/selftests/tdx/.gitignore
new file mode 100644
index 000000000000..5db4d15cc673
--- /dev/null
+++ b/tools/testing/selftests/tdx/.gitignore
@@ -0,0 +1 @@
+tdx_guest_test
diff --git a/tools/testing/selftests/thermal/intel/power_floor/Makefile b/tools/testing/selftests/thermal/intel/power_floor/Makefile
new file mode 100644
index 000000000000..9b88e57dbba5
--- /dev/null
+++ b/tools/testing/selftests/thermal/intel/power_floor/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+ifndef CROSS_COMPILE
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
+
+ifeq ($(ARCH),x86)
+TEST_GEN_PROGS := power_floor_test
+
+include ../../../lib.mk
+
+endif
+endif
diff --git a/tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c b/tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c
new file mode 100644
index 000000000000..0326b39a11b9
--- /dev/null
+++ b/tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <signal.h>
+
+#define POWER_FLOOR_ENABLE_ATTRIBUTE "/sys/bus/pci/devices/0000:00:04.0/power_limits/power_floor_enable"
+#define POWER_FLOOR_STATUS_ATTRIBUTE "/sys/bus/pci/devices/0000:00:04.0/power_limits/power_floor_status"
+
+void power_floor_exit(int signum)
+{
+ int fd;
+
+ /* Disable feature via sysfs knob */
+
+ fd = open(POWER_FLOOR_ENABLE_ATTRIBUTE, O_RDWR);
+ if (fd < 0) {
+ perror("Unable to open power floor enable file\n");
+ exit(1);
+ }
+
+ if (write(fd, "0\n", 2) < 0) {
+ perror("Can' disable power floor notifications\n");
+ exit(1);
+ }
+
+ printf("Disabled power floor notifications\n");
+
+ close(fd);
+}
+
+int main(int argc, char **argv)
+{
+ struct pollfd ufd;
+ char status_str[3];
+ int fd, ret;
+
+ if (signal(SIGINT, power_floor_exit) == SIG_IGN)
+ signal(SIGINT, SIG_IGN);
+ if (signal(SIGHUP, power_floor_exit) == SIG_IGN)
+ signal(SIGHUP, SIG_IGN);
+ if (signal(SIGTERM, power_floor_exit) == SIG_IGN)
+ signal(SIGTERM, SIG_IGN);
+
+ /* Enable feature via sysfs knob */
+ fd = open(POWER_FLOOR_ENABLE_ATTRIBUTE, O_RDWR);
+ if (fd < 0) {
+ perror("Unable to open power floor enable file\n");
+ exit(1);
+ }
+
+ if (write(fd, "1\n", 2) < 0) {
+ perror("Can' enable power floor notifications\n");
+ exit(1);
+ }
+
+ close(fd);
+
+ printf("Enabled power floor notifications\n");
+
+ while (1) {
+ fd = open(POWER_FLOOR_STATUS_ATTRIBUTE, O_RDONLY);
+ if (fd < 0) {
+ perror("Unable to power floor status file\n");
+ exit(1);
+ }
+
+ if ((lseek(fd, 0L, SEEK_SET)) < 0) {
+ fprintf(stderr, "Failed to set pointer to beginning\n");
+ exit(1);
+ }
+
+ if (read(fd, status_str, sizeof(status_str)) < 0) {
+ fprintf(stderr, "Failed to read from:%s\n",
+ POWER_FLOOR_STATUS_ATTRIBUTE);
+ exit(1);
+ }
+
+ ufd.fd = fd;
+ ufd.events = POLLPRI;
+
+ ret = poll(&ufd, 1, -1);
+ if (ret < 0) {
+ perror("poll error");
+ exit(1);
+ } else if (ret == 0) {
+ printf("Poll Timeout\n");
+ } else {
+ if ((lseek(fd, 0L, SEEK_SET)) < 0) {
+ fprintf(stderr, "Failed to set pointer to beginning\n");
+ exit(1);
+ }
+
+ if (read(fd, status_str, sizeof(status_str)) < 0)
+ exit(0);
+
+ printf("power floor status: %s\n", status_str);
+ }
+
+ close(fd);
+ }
+}
diff --git a/tools/testing/selftests/thermal/intel/workload_hint/Makefile b/tools/testing/selftests/thermal/intel/workload_hint/Makefile
new file mode 100644
index 000000000000..37ff3286283b
--- /dev/null
+++ b/tools/testing/selftests/thermal/intel/workload_hint/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+ifndef CROSS_COMPILE
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
+
+ifeq ($(ARCH),x86)
+TEST_GEN_PROGS := workload_hint_test
+
+include ../../../lib.mk
+
+endif
+endif
diff --git a/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c b/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c
new file mode 100644
index 000000000000..217c3a641c53
--- /dev/null
+++ b/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <signal.h>
+
+#define WORKLOAD_NOTIFICATION_DELAY_ATTRIBUTE "/sys/bus/pci/devices/0000:00:04.0/workload_hint/notification_delay_ms"
+#define WORKLOAD_ENABLE_ATTRIBUTE "/sys/bus/pci/devices/0000:00:04.0/workload_hint/workload_hint_enable"
+#define WORKLOAD_TYPE_INDEX_ATTRIBUTE "/sys/bus/pci/devices/0000:00:04.0/workload_hint/workload_type_index"
+
+static const char * const workload_types[] = {
+ "idle",
+ "battery_life",
+ "sustained",
+ "bursty",
+ NULL
+};
+
+#define WORKLOAD_TYPE_MAX_INDEX 3
+
+void workload_hint_exit(int signum)
+{
+ int fd;
+
+ /* Disable feature via sysfs knob */
+
+ fd = open(WORKLOAD_ENABLE_ATTRIBUTE, O_RDWR);
+ if (fd < 0) {
+ perror("Unable to open workload type feature enable file\n");
+ exit(1);
+ }
+
+ if (write(fd, "0\n", 2) < 0) {
+ perror("Can' disable workload hints\n");
+ exit(1);
+ }
+
+ printf("Disabled workload type prediction\n");
+
+ close(fd);
+}
+
+int main(int argc, char **argv)
+{
+ struct pollfd ufd;
+ char index_str[4];
+ int fd, ret, index;
+ char delay_str[64];
+ int delay = 0;
+
+ printf("Usage: workload_hint_test [notification delay in milli seconds]\n");
+
+ if (argc > 1) {
+ ret = sscanf(argv[1], "%d", &delay);
+ if (ret < 0) {
+ printf("Invalid delay\n");
+ exit(1);
+ }
+
+ printf("Setting notification delay to %d ms\n", delay);
+ if (delay < 0)
+ exit(1);
+
+ sprintf(delay_str, "%s\n", argv[1]);
+
+ sprintf(delay_str, "%s\n", argv[1]);
+ fd = open(WORKLOAD_NOTIFICATION_DELAY_ATTRIBUTE, O_RDWR);
+ if (fd < 0) {
+ perror("Unable to open workload notification delay\n");
+ exit(1);
+ }
+
+ if (write(fd, delay_str, strlen(delay_str)) < 0) {
+ perror("Can't set delay\n");
+ exit(1);
+ }
+
+ close(fd);
+ }
+
+ if (signal(SIGINT, workload_hint_exit) == SIG_IGN)
+ signal(SIGINT, SIG_IGN);
+ if (signal(SIGHUP, workload_hint_exit) == SIG_IGN)
+ signal(SIGHUP, SIG_IGN);
+ if (signal(SIGTERM, workload_hint_exit) == SIG_IGN)
+ signal(SIGTERM, SIG_IGN);
+
+ /* Enable feature via sysfs knob */
+ fd = open(WORKLOAD_ENABLE_ATTRIBUTE, O_RDWR);
+ if (fd < 0) {
+ perror("Unable to open workload type feature enable file\n");
+ exit(1);
+ }
+
+ if (write(fd, "1\n", 2) < 0) {
+ perror("Can' enable workload hints\n");
+ exit(1);
+ }
+
+ close(fd);
+
+ printf("Enabled workload type prediction\n");
+
+ while (1) {
+ fd = open(WORKLOAD_TYPE_INDEX_ATTRIBUTE, O_RDONLY);
+ if (fd < 0) {
+ perror("Unable to open workload type file\n");
+ exit(1);
+ }
+
+ if ((lseek(fd, 0L, SEEK_SET)) < 0) {
+ fprintf(stderr, "Failed to set pointer to beginning\n");
+ exit(1);
+ }
+
+ if (read(fd, index_str, sizeof(index_str)) < 0) {
+ fprintf(stderr, "Failed to read from:%s\n",
+ WORKLOAD_TYPE_INDEX_ATTRIBUTE);
+ exit(1);
+ }
+
+ ufd.fd = fd;
+ ufd.events = POLLPRI;
+
+ ret = poll(&ufd, 1, -1);
+ if (ret < 0) {
+ perror("poll error");
+ exit(1);
+ } else if (ret == 0) {
+ printf("Poll Timeout\n");
+ } else {
+ if ((lseek(fd, 0L, SEEK_SET)) < 0) {
+ fprintf(stderr, "Failed to set pointer to beginning\n");
+ exit(1);
+ }
+
+ if (read(fd, index_str, sizeof(index_str)) < 0)
+ exit(0);
+
+ ret = sscanf(index_str, "%d", &index);
+ if (ret < 0)
+ break;
+ if (index > WORKLOAD_TYPE_MAX_INDEX)
+ printf("Invalid workload type index\n");
+ else
+ printf("workload type:%s\n", workload_types[index]);
+ }
+
+ close(fd);
+ }
+}
diff --git a/tools/testing/selftests/timers/nsleep-lat.c b/tools/testing/selftests/timers/nsleep-lat.c
index eb3e79ed7b4a..edb5acacf214 100644
--- a/tools/testing/selftests/timers/nsleep-lat.c
+++ b/tools/testing/selftests/timers/nsleep-lat.c
@@ -118,7 +118,7 @@ int nanosleep_lat_test(int clockid, long long ns)
clock_gettime(clockid, &end);
if (((timespec_sub(start, end)/count)-ns) > UNRESONABLE_LATENCY) {
- printf("Large rel latency: %lld ns :", (timespec_sub(start, end)/count)-ns);
+ ksft_print_msg("Large rel latency: %lld ns :", (timespec_sub(start, end)/count)-ns);
return -1;
}
@@ -132,20 +132,23 @@ int nanosleep_lat_test(int clockid, long long ns)
}
if (latency/count > UNRESONABLE_LATENCY) {
- printf("Large abs latency: %lld ns :", latency/count);
+ ksft_print_msg("Large abs latency: %lld ns :", latency/count);
return -1;
}
return 0;
}
-
+#define SKIPPED_CLOCK_COUNT 3
int main(int argc, char **argv)
{
long long length;
int clockid, ret;
+ ksft_print_header();
+ ksft_set_plan(NR_CLOCKIDS - CLOCK_REALTIME - SKIPPED_CLOCK_COUNT);
+
for (clockid = CLOCK_REALTIME; clockid < NR_CLOCKIDS; clockid++) {
/* Skip cputime clockids since nanosleep won't increment cputime */
@@ -154,9 +157,6 @@ int main(int argc, char **argv)
clockid == CLOCK_HWSPECIFIC)
continue;
- printf("nsleep latency %-26s ", clockstring(clockid));
- fflush(stdout);
-
length = 10;
while (length <= (NSEC_PER_SEC * 10)) {
ret = nanosleep_lat_test(clockid, length);
@@ -167,14 +167,12 @@ int main(int argc, char **argv)
}
if (ret == UNSUPPORTED) {
- printf("[UNSUPPORTED]\n");
- continue;
- }
- if (ret < 0) {
- printf("[FAILED]\n");
- return ksft_exit_fail();
+ ksft_test_result_skip("%s\n", clockstring(clockid));
+ } else {
+ ksft_test_result(ret >= 0, "%s\n",
+ clockstring(clockid));
}
- printf("[OK]\n");
}
- return ksft_exit_pass();
+
+ ksft_finished();
}
diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c
index 8a17c0e8d82b..d49dd3ffd0d9 100644
--- a/tools/testing/selftests/timers/posix_timers.c
+++ b/tools/testing/selftests/timers/posix_timers.c
@@ -76,22 +76,21 @@ static int check_diff(struct timeval start, struct timeval end)
static int check_itimer(int which)
{
+ const char *name;
int err;
struct timeval start, end;
struct itimerval val = {
.it_value.tv_sec = DELAY,
};
- printf("Check itimer ");
-
if (which == ITIMER_VIRTUAL)
- printf("virtual... ");
+ name = "ITIMER_VIRTUAL";
else if (which == ITIMER_PROF)
- printf("prof... ");
+ name = "ITIMER_PROF";
else if (which == ITIMER_REAL)
- printf("real... ");
-
- fflush(stdout);
+ name = "ITIMER_REAL";
+ else
+ return -1;
done = 0;
@@ -104,13 +103,13 @@ static int check_itimer(int which)
err = gettimeofday(&start, NULL);
if (err < 0) {
- perror("Can't call gettimeofday()\n");
+ ksft_perror("Can't call gettimeofday()");
return -1;
}
err = setitimer(which, &val, NULL);
if (err < 0) {
- perror("Can't set timer\n");
+ ksft_perror("Can't set timer");
return -1;
}
@@ -123,20 +122,18 @@ static int check_itimer(int which)
err = gettimeofday(&end, NULL);
if (err < 0) {
- perror("Can't call gettimeofday()\n");
+ ksft_perror("Can't call gettimeofday()");
return -1;
}
- if (!check_diff(start, end))
- printf("[OK]\n");
- else
- printf("[FAIL]\n");
+ ksft_test_result(check_diff(start, end) == 0, "%s\n", name);
return 0;
}
static int check_timer_create(int which)
{
+ const char *type;
int err;
timer_t id;
struct timeval start, end;
@@ -144,31 +141,32 @@ static int check_timer_create(int which)
.it_value.tv_sec = DELAY,
};
- printf("Check timer_create() ");
if (which == CLOCK_THREAD_CPUTIME_ID) {
- printf("per thread... ");
+ type = "thread";
} else if (which == CLOCK_PROCESS_CPUTIME_ID) {
- printf("per process... ");
+ type = "process";
+ } else {
+ ksft_print_msg("Unknown timer_create() type %d\n", which);
+ return -1;
}
- fflush(stdout);
done = 0;
err = timer_create(which, NULL, &id);
if (err < 0) {
- perror("Can't create timer\n");
+ ksft_perror("Can't create timer");
return -1;
}
signal(SIGALRM, sig_handler);
err = gettimeofday(&start, NULL);
if (err < 0) {
- perror("Can't call gettimeofday()\n");
+ ksft_perror("Can't call gettimeofday()");
return -1;
}
err = timer_settime(id, 0, &val, NULL);
if (err < 0) {
- perror("Can't set timer\n");
+ ksft_perror("Can't set timer");
return -1;
}
@@ -176,14 +174,12 @@ static int check_timer_create(int which)
err = gettimeofday(&end, NULL);
if (err < 0) {
- perror("Can't call gettimeofday()\n");
+ ksft_perror("Can't call gettimeofday()");
return -1;
}
- if (!check_diff(start, end))
- printf("[OK]\n");
- else
- printf("[FAIL]\n");
+ ksft_test_result(check_diff(start, end) == 0,
+ "timer_create() per %s\n", type);
return 0;
}
@@ -220,25 +216,25 @@ static int check_timer_distribution(void)
.it_interval.tv_nsec = 1000 * 1000,
};
- printf("Check timer_create() per process signal distribution... ");
- fflush(stdout);
-
remain = nthreads + 1; /* worker threads + this thread */
signal(SIGALRM, distribution_handler);
err = timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id);
if (err < 0) {
- perror("Can't create timer\n");
+ ksft_perror("Can't create timer");
return -1;
}
err = timer_settime(id, 0, &val, NULL);
if (err < 0) {
- perror("Can't set timer\n");
+ ksft_perror("Can't set timer");
return -1;
}
for (i = 0; i < nthreads; i++) {
- if (pthread_create(&threads[i], NULL, distribution_thread, NULL)) {
- perror("Can't create thread\n");
+ err = pthread_create(&threads[i], NULL, distribution_thread,
+ NULL);
+ if (err) {
+ ksft_print_msg("Can't create thread: %s (%d)\n",
+ strerror(errno), errno);
return -1;
}
}
@@ -247,25 +243,30 @@ static int check_timer_distribution(void)
while (__atomic_load_n(&remain, __ATOMIC_RELAXED));
for (i = 0; i < nthreads; i++) {
- if (pthread_join(threads[i], NULL)) {
- perror("Can't join thread\n");
+ err = pthread_join(threads[i], NULL);
+ if (err) {
+ ksft_print_msg("Can't join thread: %s (%d)\n",
+ strerror(errno), errno);
return -1;
}
}
if (timer_delete(id)) {
- perror("Can't delete timer\n");
+ ksft_perror("Can't delete timer");
return -1;
}
- printf("[OK]\n");
+ ksft_test_result_pass("check_timer_distribution\n");
return 0;
}
int main(int argc, char **argv)
{
- printf("Testing posix timers. False negative may happen on CPU execution \n");
- printf("based timers if other threads run on the CPU...\n");
+ ksft_print_header();
+ ksft_set_plan(6);
+
+ ksft_print_msg("Testing posix timers. False negative may happen on CPU execution \n");
+ ksft_print_msg("based timers if other threads run on the CPU...\n");
if (check_itimer(ITIMER_VIRTUAL) < 0)
return ksft_exit_fail();
@@ -294,5 +295,5 @@ int main(int argc, char **argv)
if (check_timer_distribution() < 0)
return ksft_exit_fail();
- return ksft_exit_pass();
+ ksft_finished();
}
diff --git a/tools/testing/selftests/uevent/uevent_filtering.c b/tools/testing/selftests/uevent/uevent_filtering.c
index 5cebfb356345..dbe55f3a66f4 100644
--- a/tools/testing/selftests/uevent/uevent_filtering.c
+++ b/tools/testing/selftests/uevent/uevent_filtering.c
@@ -78,7 +78,7 @@ static int uevent_listener(unsigned long post_flags, bool expect_uevent,
{
int sk_fd, ret;
socklen_t sk_addr_len;
- int fret = -1, rcv_buf_sz = __UEVENT_BUFFER_SIZE;
+ int rcv_buf_sz = __UEVENT_BUFFER_SIZE;
uint64_t sync_add = 1;
struct sockaddr_nl sk_addr = { 0 }, rcv_addr = { 0 };
char buf[__UEVENT_BUFFER_SIZE] = { 0 };
@@ -121,6 +121,7 @@ static int uevent_listener(unsigned long post_flags, bool expect_uevent,
if ((size_t)sk_addr_len != sizeof(sk_addr)) {
fprintf(stderr, "Invalid socket address size\n");
+ ret = -1;
goto on_error;
}
@@ -147,11 +148,12 @@ static int uevent_listener(unsigned long post_flags, bool expect_uevent,
ret = write_nointr(sync_fd, &sync_add, sizeof(sync_add));
close(sync_fd);
if (ret != sizeof(sync_add)) {
+ ret = -1;
fprintf(stderr, "Failed to synchronize with parent process\n");
goto on_error;
}
- fret = 0;
+ ret = 0;
for (;;) {
ssize_t r;
@@ -187,7 +189,7 @@ static int uevent_listener(unsigned long post_flags, bool expect_uevent,
on_error:
close(sk_fd);
- return fret;
+ return ret;
}
int trigger_uevent(unsigned int times)
diff --git a/tools/testing/selftests/user_events/.gitignore b/tools/testing/selftests/user_events/.gitignore
new file mode 100644
index 000000000000..f570febd211b
--- /dev/null
+++ b/tools/testing/selftests/user_events/.gitignore
@@ -0,0 +1,4 @@
+abi_test
+dyn_test
+ftrace_test
+perf_test
diff --git a/tools/testing/selftests/user_events/abi_test.c b/tools/testing/selftests/user_events/abi_test.c
index 8202f1327c39..cef1ff1af223 100644
--- a/tools/testing/selftests/user_events/abi_test.c
+++ b/tools/testing/selftests/user_events/abi_test.c
@@ -24,6 +24,18 @@
const char *data_file = "/sys/kernel/tracing/user_events_data";
const char *enable_file = "/sys/kernel/tracing/events/user_events/__abi_event/enable";
+static bool event_exists(void)
+{
+ int fd = open(enable_file, O_RDWR);
+
+ if (fd < 0)
+ return false;
+
+ close(fd);
+
+ return true;
+}
+
static int change_event(bool enable)
{
int fd = open(enable_file, O_RDWR);
@@ -47,7 +59,22 @@ static int change_event(bool enable)
return ret;
}
-static int reg_enable(long *enable, int size, int bit)
+static int event_delete(void)
+{
+ int fd = open(data_file, O_RDWR);
+ int ret;
+
+ if (fd < 0)
+ return -1;
+
+ ret = ioctl(fd, DIAG_IOCSDEL, "__abi_event");
+
+ close(fd);
+
+ return ret;
+}
+
+static int reg_enable_flags(void *enable, int size, int bit, int flags)
{
struct user_reg reg = {0};
int fd = open(data_file, O_RDWR);
@@ -58,6 +85,7 @@ static int reg_enable(long *enable, int size, int bit)
reg.size = sizeof(reg);
reg.name_args = (__u64)"__abi_event";
+ reg.flags = flags;
reg.enable_bit = bit;
reg.enable_addr = (__u64)enable;
reg.enable_size = size;
@@ -69,7 +97,12 @@ static int reg_enable(long *enable, int size, int bit)
return ret;
}
-static int reg_disable(long *enable, int bit)
+static int reg_enable(void *enable, int size, int bit)
+{
+ return reg_enable_flags(enable, size, bit, 0);
+}
+
+static int reg_disable(void *enable, int bit)
{
struct user_unreg reg = {0};
int fd = open(data_file, O_RDWR);
@@ -90,7 +123,8 @@ static int reg_disable(long *enable, int bit)
}
FIXTURE(user) {
- long check;
+ int check;
+ long check_long;
bool umount;
};
@@ -99,6 +133,7 @@ FIXTURE_SETUP(user) {
change_event(false);
self->check = 0;
+ self->check_long = 0;
}
FIXTURE_TEARDOWN(user) {
@@ -126,6 +161,26 @@ TEST_F(user, enablement) {
ASSERT_EQ(0, change_event(false));
}
+TEST_F(user, flags) {
+ /* USER_EVENT_REG_PERSIST is allowed */
+ ASSERT_EQ(0, reg_enable_flags(&self->check, sizeof(int), 0,
+ USER_EVENT_REG_PERSIST));
+ ASSERT_EQ(0, reg_disable(&self->check, 0));
+
+ /* Ensure it exists after close and disable */
+ ASSERT_TRUE(event_exists());
+
+ /* Ensure we can delete it */
+ ASSERT_EQ(0, event_delete());
+
+ /* USER_EVENT_REG_MAX or above is not allowed */
+ ASSERT_EQ(-1, reg_enable_flags(&self->check, sizeof(int), 0,
+ USER_EVENT_REG_MAX));
+
+ /* Ensure it does not exist after invalid flags */
+ ASSERT_FALSE(event_exists());
+}
+
TEST_F(user, bit_sizes) {
/* Allow 0-31 bits for 32-bit */
ASSERT_EQ(0, reg_enable(&self->check, sizeof(int), 0));
@@ -136,9 +191,9 @@ TEST_F(user, bit_sizes) {
#if BITS_PER_LONG == 8
/* Allow 0-64 bits for 64-bit */
- ASSERT_EQ(0, reg_enable(&self->check, sizeof(long), 63));
- ASSERT_NE(0, reg_enable(&self->check, sizeof(long), 64));
- ASSERT_EQ(0, reg_disable(&self->check, 63));
+ ASSERT_EQ(0, reg_enable(&self->check_long, sizeof(long), 63));
+ ASSERT_NE(0, reg_enable(&self->check_long, sizeof(long), 64));
+ ASSERT_EQ(0, reg_disable(&self->check_long, 63));
#endif
/* Disallowed sizes (everything beside 4 and 8) */
@@ -200,7 +255,7 @@ static int clone_check(void *check)
for (i = 0; i < 10; ++i) {
usleep(100000);
- if (*(long *)check)
+ if (*(int *)check)
return 0;
}
diff --git a/tools/testing/selftests/user_events/dyn_test.c b/tools/testing/selftests/user_events/dyn_test.c
index a85980190bea..bdf9ab127488 100644
--- a/tools/testing/selftests/user_events/dyn_test.c
+++ b/tools/testing/selftests/user_events/dyn_test.c
@@ -17,9 +17,25 @@
#include "../kselftest_harness.h"
#include "user_events_selftests.h"
+const char *dyn_file = "/sys/kernel/tracing/dynamic_events";
const char *abi_file = "/sys/kernel/tracing/user_events_data";
const char *enable_file = "/sys/kernel/tracing/events/user_events/__test_event/enable";
+static int event_delete(void)
+{
+ int fd = open(abi_file, O_RDWR);
+ int ret;
+
+ if (fd < 0)
+ return -1;
+
+ ret = ioctl(fd, DIAG_IOCSDEL, "__test_event");
+
+ close(fd);
+
+ return ret;
+}
+
static bool wait_for_delete(void)
{
int i;
@@ -64,7 +80,31 @@ static int unreg_event(int fd, int *check, int bit)
return ioctl(fd, DIAG_IOCSUNREG, &unreg);
}
-static int parse(int *check, const char *value)
+static int parse_dyn(const char *value)
+{
+ int fd = open(dyn_file, O_RDWR | O_APPEND);
+ int len = strlen(value);
+ int ret;
+
+ if (fd == -1)
+ return -1;
+
+ ret = write(fd, value, len);
+
+ if (ret == len)
+ ret = 0;
+ else
+ ret = -1;
+
+ close(fd);
+
+ if (ret == 0)
+ event_delete();
+
+ return ret;
+}
+
+static int parse_abi(int *check, const char *value)
{
int fd = open(abi_file, O_RDWR);
int ret;
@@ -90,6 +130,18 @@ static int parse(int *check, const char *value)
return ret;
}
+static int parse(int *check, const char *value)
+{
+ int abi_ret = parse_abi(check, value);
+ int dyn_ret = parse_dyn(value);
+
+ /* Ensure both ABI and DYN parse the same way */
+ if (dyn_ret != abi_ret)
+ return -1;
+
+ return dyn_ret;
+}
+
static int check_match(int *check, const char *first, const char *second, bool *match)
{
int fd = open(abi_file, O_RDWR);
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 7e8c937627dd..0b872c0a42d2 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -14,6 +14,7 @@ TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap
check_initial_reg_state sigreturn iopl ioperm \
test_vsyscall mov_ss_trap \
syscall_arg_fault fsgsbase_restore sigaltstack
+TARGETS_C_BOTHBITS += nx_stack
TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer
@@ -109,3 +110,6 @@ $(OUTPUT)/test_syscall_vdso_32: thunks_32.S
# state.
$(OUTPUT)/check_initial_reg_state_32: CFLAGS += -Wl,-ereal_start -static
$(OUTPUT)/check_initial_reg_state_64: CFLAGS += -Wl,-ereal_start -static
+
+$(OUTPUT)/nx_stack_32: CFLAGS += -Wl,-z,noexecstack
+$(OUTPUT)/nx_stack_64: CFLAGS += -Wl,-z,noexecstack
diff --git a/tools/testing/selftests/x86/lam.c b/tools/testing/selftests/x86/lam.c
index eb0e46905bf9..8f9b06d9ce03 100644
--- a/tools/testing/selftests/x86/lam.c
+++ b/tools/testing/selftests/x86/lam.c
@@ -573,7 +573,7 @@ int do_uring(unsigned long lam)
char path[PATH_MAX] = {0};
/* get current process path */
- if (readlink("/proc/self/exe", path, PATH_MAX) <= 0)
+ if (readlink("/proc/self/exe", path, PATH_MAX - 1) <= 0)
return 1;
int file_fd = open(path, O_RDONLY);
@@ -680,14 +680,14 @@ static int handle_execve(struct testcases *test)
perror("Fork failed.");
ret = 1;
} else if (pid == 0) {
- char path[PATH_MAX];
+ char path[PATH_MAX] = {0};
/* Set LAM mode in parent process */
if (set_lam(lam) != 0)
return 1;
/* Get current binary's path and the binary was run by execve */
- if (readlink("/proc/self/exe", path, PATH_MAX) <= 0)
+ if (readlink("/proc/self/exe", path, PATH_MAX - 1) <= 0)
exit(-1);
/* run binary to get LAM mode and return to parent process */
diff --git a/tools/testing/selftests/x86/nx_stack.c b/tools/testing/selftests/x86/nx_stack.c
new file mode 100644
index 000000000000..ea4a4e246879
--- /dev/null
+++ b/tools/testing/selftests/x86/nx_stack.c
@@ -0,0 +1,212 @@
+/*
+ * Copyright (c) 2023 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/*
+ * Test that userspace stack is NX. Requires linking with -Wl,-z,noexecstack
+ * because I don't want to bother with PT_GNU_STACK detection.
+ *
+ * Fill the stack with INT3's and then try to execute some of them:
+ * SIGSEGV -- good, SIGTRAP -- bad.
+ *
+ * Regular stack is completely overwritten before testing.
+ * Test doesn't exit SIGSEGV handler after first fault at INT3.
+ */
+#undef _GNU_SOURCE
+#define _GNU_SOURCE
+#undef NDEBUG
+#include <assert.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <unistd.h>
+
+#define PAGE_SIZE 4096
+
+/*
+ * This is memset(rsp, 0xcc, -1); but down.
+ * It will SIGSEGV when bottom of the stack is reached.
+ * Byte-size access is important! (see rdi tweak in the signal handler).
+ */
+void make_stack1(void);
+asm(
+".pushsection .text\n"
+".globl make_stack1\n"
+".align 16\n"
+"make_stack1:\n"
+ "mov $0xcc, %al\n"
+#if defined __amd64__
+ "mov %rsp, %rdi\n"
+ "mov $-1, %rcx\n"
+#elif defined __i386__
+ "mov %esp, %edi\n"
+ "mov $-1, %ecx\n"
+#else
+#error
+#endif
+ "std\n"
+ "rep stosb\n"
+ /* unreachable */
+ "hlt\n"
+".type make_stack1,@function\n"
+".size make_stack1,.-make_stack1\n"
+".popsection\n"
+);
+
+/*
+ * memset(p, 0xcc, -1);
+ * It will SIGSEGV when top of the stack is reached.
+ */
+void make_stack2(uint64_t p);
+asm(
+".pushsection .text\n"
+".globl make_stack2\n"
+".align 16\n"
+"make_stack2:\n"
+ "mov $0xcc, %al\n"
+#if defined __amd64__
+ "mov $-1, %rcx\n"
+#elif defined __i386__
+ "mov $-1, %ecx\n"
+#else
+#error
+#endif
+ "cld\n"
+ "rep stosb\n"
+ /* unreachable */
+ "hlt\n"
+".type make_stack2,@function\n"
+".size make_stack2,.-make_stack2\n"
+".popsection\n"
+);
+
+static volatile int test_state = 0;
+static volatile unsigned long stack_min_addr;
+
+#if defined __amd64__
+#define RDI REG_RDI
+#define RIP REG_RIP
+#define RIP_STRING "rip"
+#elif defined __i386__
+#define RDI REG_EDI
+#define RIP REG_EIP
+#define RIP_STRING "eip"
+#else
+#error
+#endif
+
+static void sigsegv(int _, siginfo_t *__, void *uc_)
+{
+ /*
+ * Some Linux versions didn't clear DF before entering signal
+ * handler. make_stack1() doesn't have a chance to clear DF
+ * either so we clear it by hand here.
+ */
+ asm volatile ("cld" ::: "memory");
+
+ ucontext_t *uc = uc_;
+
+ if (test_state == 0) {
+ /* Stack is faulted and cleared from RSP to the lowest address. */
+ stack_min_addr = ++uc->uc_mcontext.gregs[RDI];
+ if (1) {
+ printf("stack min %lx\n", stack_min_addr);
+ }
+ uc->uc_mcontext.gregs[RIP] = (uintptr_t)&make_stack2;
+ test_state = 1;
+ } else if (test_state == 1) {
+ /* Stack has been cleared from top to bottom. */
+ unsigned long stack_max_addr = uc->uc_mcontext.gregs[RDI];
+ if (1) {
+ printf("stack max %lx\n", stack_max_addr);
+ }
+ /* Start faulting pages on stack and see what happens. */
+ uc->uc_mcontext.gregs[RIP] = stack_max_addr - PAGE_SIZE;
+ test_state = 2;
+ } else if (test_state == 2) {
+ /* Stack page is NX -- good, test next page. */
+ uc->uc_mcontext.gregs[RIP] -= PAGE_SIZE;
+ if (uc->uc_mcontext.gregs[RIP] == stack_min_addr) {
+ /* One more SIGSEGV and test ends. */
+ test_state = 3;
+ }
+ } else {
+ printf("PASS\tAll stack pages are NX\n");
+ _exit(EXIT_SUCCESS);
+ }
+}
+
+static void sigtrap(int _, siginfo_t *__, void *uc_)
+{
+ const ucontext_t *uc = uc_;
+ unsigned long rip = uc->uc_mcontext.gregs[RIP];
+ printf("FAIL\texecutable page on the stack: " RIP_STRING " %lx\n", rip);
+ _exit(EXIT_FAILURE);
+}
+
+int main(void)
+{
+ {
+ struct sigaction act = {};
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = SA_SIGINFO;
+ act.sa_sigaction = &sigsegv;
+ int rv = sigaction(SIGSEGV, &act, NULL);
+ assert(rv == 0);
+ }
+ {
+ struct sigaction act = {};
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = SA_SIGINFO;
+ act.sa_sigaction = &sigtrap;
+ int rv = sigaction(SIGTRAP, &act, NULL);
+ assert(rv == 0);
+ }
+ {
+ struct rlimit rlim;
+ int rv = getrlimit(RLIMIT_STACK, &rlim);
+ assert(rv == 0);
+ /* Cap stack at time-honored 8 MiB value. */
+ rlim.rlim_max = rlim.rlim_cur;
+ if (rlim.rlim_max > 8 * 1024 * 1024) {
+ rlim.rlim_max = 8 * 1024 * 1024;
+ }
+ rv = setrlimit(RLIMIT_STACK, &rlim);
+ assert(rv == 0);
+ }
+ {
+ /*
+ * We don't know now much stack SIGSEGV handler uses.
+ * Bump this by 1 page every time someone complains,
+ * or rewrite it in assembly.
+ */
+ const size_t len = SIGSTKSZ;
+ void *p = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ assert(p != MAP_FAILED);
+ stack_t ss = {};
+ ss.ss_sp = p;
+ ss.ss_size = len;
+ int rv = sigaltstack(&ss, NULL);
+ assert(rv == 0);
+ }
+ make_stack1();
+ /*
+ * Unreachable, but if _this_ INT3 is ever reached, it's a bug somewhere.
+ * Fold it into main SIGTRAP pathway.
+ */
+ __builtin_trap();
+}