diff options
Diffstat (limited to 'tools')
371 files changed, 16952 insertions, 2707 deletions
diff --git a/tools/arch/arm64/vdso b/tools/arch/arm64/vdso new file mode 120000 index 000000000000..233c7a26f6e5 --- /dev/null +++ b/tools/arch/arm64/vdso @@ -0,0 +1 @@ +../../../arch/arm64/kernel/vdso
\ No newline at end of file diff --git a/tools/arch/loongarch/vdso b/tools/arch/loongarch/vdso new file mode 120000 index 000000000000..ebda43a82db7 --- /dev/null +++ b/tools/arch/loongarch/vdso @@ -0,0 +1 @@ +../../../arch/loongarch/vdso
\ No newline at end of file diff --git a/tools/arch/powerpc/vdso b/tools/arch/powerpc/vdso new file mode 120000 index 000000000000..4e676d1d1cb4 --- /dev/null +++ b/tools/arch/powerpc/vdso @@ -0,0 +1 @@ +../../../arch/powerpc/kernel/vdso
\ No newline at end of file diff --git a/tools/arch/s390/vdso b/tools/arch/s390/vdso new file mode 120000 index 000000000000..6cf4c1cebdcd --- /dev/null +++ b/tools/arch/s390/vdso @@ -0,0 +1 @@ +../../../arch/s390/kernel/vdso64
\ No newline at end of file diff --git a/tools/arch/x86/kcpuid/cpuid.csv b/tools/arch/x86/kcpuid/cpuid.csv index e0c25b75327e..d751eb8585d0 100644 --- a/tools/arch/x86/kcpuid/cpuid.csv +++ b/tools/arch/x86/kcpuid/cpuid.csv @@ -1,451 +1,1053 @@ -# The basic row format is: -# LEAF, SUBLEAF, register_name, bits, short_name, long_description - -# Leaf 00H - 0, 0, EAX, 31:0, max_basic_leafs, Max input value for supported subleafs - -# Leaf 01H - 1, 0, EAX, 3:0, stepping, Stepping ID - 1, 0, EAX, 7:4, model, Model - 1, 0, EAX, 11:8, family, Family ID - 1, 0, EAX, 13:12, processor, Processor Type - 1, 0, EAX, 19:16, model_ext, Extended Model ID - 1, 0, EAX, 27:20, family_ext, Extended Family ID - - 1, 0, EBX, 7:0, brand, Brand Index - 1, 0, EBX, 15:8, clflush_size, CLFLUSH line size (value * 8) in bytes - 1, 0, EBX, 23:16, max_cpu_id, Maxim number of addressable logic cpu in this package - 1, 0, EBX, 31:24, apic_id, Initial APIC ID - - 1, 0, ECX, 0, sse3, Streaming SIMD Extensions 3(SSE3) - 1, 0, ECX, 1, pclmulqdq, PCLMULQDQ instruction supported - 1, 0, ECX, 2, dtes64, DS area uses 64-bit layout - 1, 0, ECX, 3, mwait, MONITOR/MWAIT supported - 1, 0, ECX, 4, ds_cpl, CPL Qualified Debug Store which allows for branch message storage qualified by CPL - 1, 0, ECX, 5, vmx, Virtual Machine Extensions supported - 1, 0, ECX, 6, smx, Safer Mode Extension supported - 1, 0, ECX, 7, eist, Enhanced Intel SpeedStep Technology - 1, 0, ECX, 8, tm2, Thermal Monitor 2 - 1, 0, ECX, 9, ssse3, Supplemental Streaming SIMD Extensions 3 (SSSE3) - 1, 0, ECX, 10, l1_ctx_id, L1 data cache could be set to either adaptive mode or shared mode (check IA32_MISC_ENABLE bit 24 definition) - 1, 0, ECX, 11, sdbg, IA32_DEBUG_INTERFACE MSR for silicon debug supported - 1, 0, ECX, 12, fma, FMA extensions using YMM state supported - 1, 0, ECX, 13, cmpxchg16b, 'CMPXCHG16B - Compare and Exchange Bytes' supported - 1, 0, ECX, 14, xtpr_update, xTPR Update Control supported - 1, 0, ECX, 15, pdcm, Perfmon and Debug Capability present - 1, 0, ECX, 17, pcid, Process-Context Identifiers feature present - 1, 0, ECX, 18, dca, Prefetching data from a memory mapped device supported - 1, 0, ECX, 19, sse4_1, SSE4.1 feature present - 1, 0, ECX, 20, sse4_2, SSE4.2 feature present - 1, 0, ECX, 21, x2apic, x2APIC supported - 1, 0, ECX, 22, movbe, MOVBE instruction supported - 1, 0, ECX, 23, popcnt, POPCNT instruction supported - 1, 0, ECX, 24, tsc_deadline_timer, LAPIC supports one-shot operation using a TSC deadline value - 1, 0, ECX, 25, aesni, AESNI instruction supported - 1, 0, ECX, 26, xsave, XSAVE/XRSTOR processor extended states (XSETBV/XGETBV/XCR0) - 1, 0, ECX, 27, osxsave, OS has set CR4.OSXSAVE bit to enable XSETBV/XGETBV/XCR0 - 1, 0, ECX, 28, avx, AVX instruction supported - 1, 0, ECX, 29, f16c, 16-bit floating-point conversion instruction supported - 1, 0, ECX, 30, rdrand, RDRAND instruction supported - - 1, 0, EDX, 0, fpu, x87 FPU on chip - 1, 0, EDX, 1, vme, Virtual-8086 Mode Enhancement - 1, 0, EDX, 2, de, Debugging Extensions - 1, 0, EDX, 3, pse, Page Size Extensions - 1, 0, EDX, 4, tsc, Time Stamp Counter - 1, 0, EDX, 5, msr, RDMSR and WRMSR Support - 1, 0, EDX, 6, pae, Physical Address Extensions - 1, 0, EDX, 7, mce, Machine Check Exception - 1, 0, EDX, 8, cx8, CMPXCHG8B instr - 1, 0, EDX, 9, apic, APIC on Chip - 1, 0, EDX, 11, sep, SYSENTER and SYSEXIT instrs - 1, 0, EDX, 12, mtrr, Memory Type Range Registers - 1, 0, EDX, 13, pge, Page Global Bit - 1, 0, EDX, 14, mca, Machine Check Architecture - 1, 0, EDX, 15, cmov, Conditional Move Instrs - 1, 0, EDX, 16, pat, Page Attribute Table - 1, 0, EDX, 17, pse36, 36-Bit Page Size Extension - 1, 0, EDX, 18, psn, Processor Serial Number - 1, 0, EDX, 19, clflush, CLFLUSH instr -# 1, 0, EDX, 20, - 1, 0, EDX, 21, ds, Debug Store - 1, 0, EDX, 22, acpi, Thermal Monitor and Software Controlled Clock Facilities - 1, 0, EDX, 23, mmx, Intel MMX Technology - 1, 0, EDX, 24, fxsr, XSAVE and FXRSTOR Instrs - 1, 0, EDX, 25, sse, SSE - 1, 0, EDX, 26, sse2, SSE2 - 1, 0, EDX, 27, ss, Self Snoop - 1, 0, EDX, 28, hit, Max APIC IDs - 1, 0, EDX, 29, tm, Thermal Monitor -# 1, 0, EDX, 30, - 1, 0, EDX, 31, pbe, Pending Break Enable - -# Leaf 02H -# cache and TLB descriptor info - -# Leaf 03H -# Precessor Serial Number, introduced on Pentium III, not valid for -# latest models - -# Leaf 04H -# thread/core and cache topology - 4, 0, EAX, 4:0, cache_type, Cache type like instr/data or unified - 4, 0, EAX, 7:5, cache_level, Cache Level (starts at 1) - 4, 0, EAX, 8, cache_self_init, Cache Self Initialization - 4, 0, EAX, 9, fully_associate, Fully Associative cache -# 4, 0, EAX, 13:10, resvd, resvd - 4, 0, EAX, 25:14, max_logical_id, Max number of addressable IDs for logical processors sharing the cache - 4, 0, EAX, 31:26, max_phy_id, Max number of addressable IDs for processors in phy package - - 4, 0, EBX, 11:0, cache_linesize, Size of a cache line in bytes - 4, 0, EBX, 21:12, cache_partition, Physical Line partitions - 4, 0, EBX, 31:22, cache_ways, Ways of associativity - 4, 0, ECX, 31:0, cache_sets, Number of Sets - 1 - 4, 0, EDX, 0, c_wbinvd, 1 means WBINVD/INVD is not ganranteed to act upon lower level caches of non-originating threads sharing this cache - 4, 0, EDX, 1, c_incl, Whether cache is inclusive of lower cache level - 4, 0, EDX, 2, c_comp_index, Complex Cache Indexing - -# Leaf 05H -# MONITOR/MWAIT - 5, 0, EAX, 15:0, min_mon_size, Smallest monitor line size in bytes - 5, 0, EBX, 15:0, max_mon_size, Largest monitor line size in bytes - 5, 0, ECX, 0, mwait_ext, Enum of Monitor-Mwait extensions supported - 5, 0, ECX, 1, mwait_irq_break, Largest monitor line size in bytes - 5, 0, EDX, 3:0, c0_sub_stats, Number of C0* sub C-states supported using MWAIT - 5, 0, EDX, 7:4, c1_sub_stats, Number of C1* sub C-states supported using MWAIT - 5, 0, EDX, 11:8, c2_sub_stats, Number of C2* sub C-states supported using MWAIT - 5, 0, EDX, 15:12, c3_sub_stats, Number of C3* sub C-states supported using MWAIT - 5, 0, EDX, 19:16, c4_sub_stats, Number of C4* sub C-states supported using MWAIT - 5, 0, EDX, 23:20, c5_sub_stats, Number of C5* sub C-states supported using MWAIT - 5, 0, EDX, 27:24, c6_sub_stats, Number of C6* sub C-states supported using MWAIT - 5, 0, EDX, 31:28, c7_sub_stats, Number of C7* sub C-states supported using MWAIT - -# Leaf 06H -# Thermal & Power Management - - 6, 0, EAX, 0, dig_temp, Digital temperature sensor supported - 6, 0, EAX, 1, turbo, Intel Turbo Boost - 6, 0, EAX, 2, arat, Always running APIC timer -# 6, 0, EAX, 3, resv, Reserved - 6, 0, EAX, 4, pln, Power limit notifications supported - 6, 0, EAX, 5, ecmd, Clock modulation duty cycle extension supported - 6, 0, EAX, 6, ptm, Package thermal management supported - 6, 0, EAX, 7, hwp, HWP base register - 6, 0, EAX, 8, hwp_notify, HWP notification - 6, 0, EAX, 9, hwp_act_window, HWP activity window - 6, 0, EAX, 10, hwp_energy, HWP energy performance preference - 6, 0, EAX, 11, hwp_pkg_req, HWP package level request -# 6, 0, EAX, 12, resv, Reserved - 6, 0, EAX, 13, hdc, HDC base registers supported - 6, 0, EAX, 14, turbo3, Turbo Boost Max 3.0 - 6, 0, EAX, 15, hwp_cap, Highest Performance change supported - 6, 0, EAX, 16, hwp_peci, HWP PECI override is supported - 6, 0, EAX, 17, hwp_flex, Flexible HWP is supported - 6, 0, EAX, 18, hwp_fast, Fast access mode for the IA32_HWP_REQUEST MSR is supported -# 6, 0, EAX, 19, resv, Reserved - 6, 0, EAX, 20, hwp_ignr, Ignoring Idle Logical Processor HWP request is supported - - 6, 0, EBX, 3:0, therm_irq_thresh, Number of Interrupt Thresholds in Digital Thermal Sensor - 6, 0, ECX, 0, aperfmperf, Presence of IA32_MPERF and IA32_APERF - 6, 0, ECX, 3, energ_bias, Performance-energy bias preference supported - -# Leaf 07H -# ECX == 0 -# AVX512 refers to https://en.wikipedia.org/wiki/AVX-512 -# XXX: Do we really need to enumerate each and every AVX512 sub features - - 7, 0, EBX, 0, fsgsbase, RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE supported - 7, 0, EBX, 1, tsc_adjust, TSC_ADJUST MSR supported - 7, 0, EBX, 2, sgx, Software Guard Extensions - 7, 0, EBX, 3, bmi1, BMI1 - 7, 0, EBX, 4, hle, Hardware Lock Elision - 7, 0, EBX, 5, avx2, AVX2 -# 7, 0, EBX, 6, fdp_excp_only, x87 FPU Data Pointer updated only on x87 exceptions - 7, 0, EBX, 7, smep, Supervisor-Mode Execution Prevention - 7, 0, EBX, 8, bmi2, BMI2 - 7, 0, EBX, 9, rep_movsb, Enhanced REP MOVSB/STOSB - 7, 0, EBX, 10, invpcid, INVPCID instruction - 7, 0, EBX, 11, rtm, Restricted Transactional Memory - 7, 0, EBX, 12, rdt_m, Intel RDT Monitoring capability - 7, 0, EBX, 13, depc_fpu_cs_ds, Deprecates FPU CS and FPU DS - 7, 0, EBX, 14, mpx, Memory Protection Extensions - 7, 0, EBX, 15, rdt_a, Intel RDT Allocation capability - 7, 0, EBX, 16, avx512f, AVX512 Foundation instr - 7, 0, EBX, 17, avx512dq, AVX512 Double and Quadword AVX512 instr - 7, 0, EBX, 18, rdseed, RDSEED instr - 7, 0, EBX, 19, adx, ADX instr - 7, 0, EBX, 20, smap, Supervisor Mode Access Prevention - 7, 0, EBX, 21, avx512ifma, AVX512 Integer Fused Multiply Add -# 7, 0, EBX, 22, resvd, resvd - 7, 0, EBX, 23, clflushopt, CLFLUSHOPT instr - 7, 0, EBX, 24, clwb, CLWB instr - 7, 0, EBX, 25, intel_pt, Intel Processor Trace instr - 7, 0, EBX, 26, avx512pf, Prefetch - 7, 0, EBX, 27, avx512er, AVX512 Exponent Reciproca instr - 7, 0, EBX, 28, avx512cd, AVX512 Conflict Detection instr - 7, 0, EBX, 29, sha, Intel Secure Hash Algorithm Extensions instr - 7, 0, EBX, 30, avx512bw, AVX512 Byte & Word instr - 7, 0, EBX, 31, avx512vl, AVX512 Vector Length Extentions (VL) - 7, 0, ECX, 0, prefetchwt1, X - 7, 0, ECX, 1, avx512vbmi, AVX512 Vector Byte Manipulation Instructions - 7, 0, ECX, 2, umip, User-mode Instruction Prevention - - 7, 0, ECX, 3, pku, Protection Keys for User-mode pages - 7, 0, ECX, 4, ospke, CR4 PKE set to enable protection keys -# 7, 0, ECX, 16:5, resvd, resvd - 7, 0, ECX, 21:17, mawau, The value of MAWAU used by the BNDLDX and BNDSTX instructions in 64-bit mode - 7, 0, ECX, 22, rdpid, RDPID and IA32_TSC_AUX -# 7, 0, ECX, 29:23, resvd, resvd - 7, 0, ECX, 30, sgx_lc, SGX Launch Configuration -# 7, 0, ECX, 31, resvd, resvd - -# Leaf 08H -# - - -# Leaf 09H -# Direct Cache Access (DCA) information - 9, 0, ECX, 31:0, dca_cap, The value of IA32_PLATFORM_DCA_CAP +# SPDX-License-Identifier: CC0-1.0 +# Generator: x86-cpuid-db v1.0 -# Leaf 0AH -# Architectural Performance Monitoring # -# Do we really need to print out the PMU related stuff? -# Does normal user really care about it? +# Auto-generated file. +# Please submit all updates and bugfixes to https://x86-cpuid.org # - 0xA, 0, EAX, 7:0, pmu_ver, Performance Monitoring Unit version - 0xA, 0, EAX, 15:8, pmu_gp_cnt_num, Numer of general-purose PMU counters per logical CPU - 0xA, 0, EAX, 23:16, pmu_cnt_bits, Bit wideth of PMU counter - 0xA, 0, EAX, 31:24, pmu_ebx_bits, Length of EBX bit vector to enumerate PMU events - - 0xA, 0, EBX, 0, pmu_no_core_cycle_evt, Core cycle event not available - 0xA, 0, EBX, 1, pmu_no_instr_ret_evt, Instruction retired event not available - 0xA, 0, EBX, 2, pmu_no_ref_cycle_evt, Reference cycles event not available - 0xA, 0, EBX, 3, pmu_no_llc_ref_evt, Last-level cache reference event not available - 0xA, 0, EBX, 4, pmu_no_llc_mis_evt, Last-level cache misses event not available - 0xA, 0, EBX, 5, pmu_no_br_instr_ret_evt, Branch instruction retired event not available - 0xA, 0, EBX, 6, pmu_no_br_mispredict_evt, Branch mispredict retired event not available - - 0xA, 0, ECX, 4:0, pmu_fixed_cnt_num, Performance Monitoring Unit version - 0xA, 0, ECX, 12:5, pmu_fixed_cnt_bits, Numer of PMU counters per logical CPU - -# Leaf 0BH -# Extended Topology Enumeration Leaf -# - - 0xB, 0, EAX, 4:0, id_shift, Number of bits to shift right on x2APIC ID to get a unique topology ID of the next level type - 0xB, 0, EBX, 15:0, cpu_nr, Number of logical processors at this level type - 0xB, 0, ECX, 15:8, lvl_type, 0-Invalid 1-SMT 2-Core - 0xB, 0, EDX, 31:0, x2apic_id, x2APIC ID the current logical processor - - -# Leaf 0DH -# Processor Extended State - 0xD, 0, EAX, 0, x87, X87 state - 0xD, 0, EAX, 1, sse, SSE state - 0xD, 0, EAX, 2, avx, AVX state - 0xD, 0, EAX, 4:3, mpx, MPX state - 0xD, 0, EAX, 7:5, avx512, AVX-512 state - 0xD, 0, EAX, 9, pkru, PKRU state - - 0xD, 0, EBX, 31:0, max_sz_xcr0, Maximum size (bytes) required by enabled features in XCR0 - 0xD, 0, ECX, 31:0, max_sz_xsave, Maximum size (bytes) of the XSAVE/XRSTOR save area - - 0xD, 1, EAX, 0, xsaveopt, XSAVEOPT available - 0xD, 1, EAX, 1, xsavec, XSAVEC and compacted form supported - 0xD, 1, EAX, 2, xgetbv, XGETBV supported - 0xD, 1, EAX, 3, xsaves, XSAVES/XRSTORS and IA32_XSS supported - - 0xD, 1, EBX, 31:0, max_sz_xcr0, Maximum size (bytes) required by enabled features in XCR0 - 0xD, 1, ECX, 8, pt, PT state - 0xD, 1, ECX, 11, cet_usr, CET user state - 0xD, 1, ECX, 12, cet_supv, CET supervisor state - 0xD, 1, ECX, 13, hdc, HDC state - 0xD, 1, ECX, 16, hwp, HWP state - -# Leaf 0FH -# Intel RDT Monitoring - - 0xF, 0, EBX, 31:0, rmid_range, Maximum range (zero-based) of RMID within this physical processor of all types - 0xF, 0, EDX, 1, l3c_rdt_mon, L3 Cache RDT Monitoring supported - - 0xF, 1, ECX, 31:0, rmid_range, Maximum range (zero-based) of RMID of this types - 0xF, 1, EDX, 0, l3c_ocp_mon, L3 Cache occupancy Monitoring supported - 0xF, 1, EDX, 1, l3c_tbw_mon, L3 Cache Total Bandwidth Monitoring supported - 0xF, 1, EDX, 2, l3c_lbw_mon, L3 Cache Local Bandwidth Monitoring supported +# The basic row format is: +# LEAF, SUBLEAVES, reg, bits, short_name , long_description + +# Leaf 0H +# Maximum standard leaf number + CPU vendor string + + 0, 0, eax, 31:0, max_std_leaf , Highest cpuid standard leaf supported + 0, 0, ebx, 31:0, cpu_vendorid_0 , CPU vendor ID string bytes 0 - 3 + 0, 0, ecx, 31:0, cpu_vendorid_2 , CPU vendor ID string bytes 8 - 11 + 0, 0, edx, 31:0, cpu_vendorid_1 , CPU vendor ID string bytes 4 - 7 + +# Leaf 1H +# CPU FMS (Family/Model/Stepping) + standard feature flags + + 1, 0, eax, 3:0, stepping , Stepping ID + 1, 0, eax, 7:4, base_model , Base CPU model ID + 1, 0, eax, 11:8, base_family_id , Base CPU family ID + 1, 0, eax, 13:12, cpu_type , CPU type + 1, 0, eax, 19:16, ext_model , Extended CPU model ID + 1, 0, eax, 27:20, ext_family , Extended CPU family ID + 1, 0, ebx, 7:0, brand_id , Brand index + 1, 0, ebx, 15:8, clflush_size , CLFLUSH instruction cache line size + 1, 0, ebx, 23:16, n_logical_cpu , Logical CPU (HW threads) count + 1, 0, ebx, 31:24, local_apic_id , Initial local APIC physical ID + 1, 0, ecx, 0, pni , Streaming SIMD Extensions 3 (SSE3) + 1, 0, ecx, 1, pclmulqdq , PCLMULQDQ instruction support + 1, 0, ecx, 2, dtes64 , 64-bit DS save area + 1, 0, ecx, 3, monitor , MONITOR/MWAIT support + 1, 0, ecx, 4, ds_cpl , CPL Qualified Debug Store + 1, 0, ecx, 5, vmx , Virtual Machine Extensions + 1, 0, ecx, 6, smx , Safer Mode Extensions + 1, 0, ecx, 7, est , Enhanced Intel SpeedStep + 1, 0, ecx, 8, tm2 , Thermal Monitor 2 + 1, 0, ecx, 9, ssse3 , Supplemental SSE3 + 1, 0, ecx, 10, cid , L1 Context ID + 1, 0, ecx, 11, sdbg , Sillicon Debug + 1, 0, ecx, 12, fma , FMA extensions using YMM state + 1, 0, ecx, 13, cx16 , CMPXCHG16B instruction support + 1, 0, ecx, 14, xtpr , xTPR Update Control + 1, 0, ecx, 15, pdcm , Perfmon and Debug Capability + 1, 0, ecx, 17, pcid , Process-context identifiers + 1, 0, ecx, 18, dca , Direct Cache Access + 1, 0, ecx, 19, sse4_1 , SSE4.1 + 1, 0, ecx, 20, sse4_2 , SSE4.2 + 1, 0, ecx, 21, x2apic , X2APIC support + 1, 0, ecx, 22, movbe , MOVBE instruction support + 1, 0, ecx, 23, popcnt , POPCNT instruction support + 1, 0, ecx, 24, tsc_deadline_timer , APIC timer one-shot operation + 1, 0, ecx, 25, aes , AES instructions + 1, 0, ecx, 26, xsave , XSAVE (and related instructions) support + 1, 0, ecx, 27, osxsave , XSAVE (and related instructions) are enabled by OS + 1, 0, ecx, 28, avx , AVX instructions support + 1, 0, ecx, 29, f16c , Half-precision floating-point conversion support + 1, 0, ecx, 30, rdrand , RDRAND instruction support + 1, 0, ecx, 31, guest_status , System is running as guest; (para-)virtualized system + 1, 0, edx, 0, fpu , Floating-Point Unit on-chip (x87) + 1, 0, edx, 1, vme , Virtual-8086 Mode Extensions + 1, 0, edx, 2, de , Debugging Extensions + 1, 0, edx, 3, pse , Page Size Extension + 1, 0, edx, 4, tsc , Time Stamp Counter + 1, 0, edx, 5, msr , Model-Specific Registers (RDMSR and WRMSR support) + 1, 0, edx, 6, pae , Physical Address Extensions + 1, 0, edx, 7, mce , Machine Check Exception + 1, 0, edx, 8, cx8 , CMPXCHG8B instruction + 1, 0, edx, 9, apic , APIC on-chip + 1, 0, edx, 11, sep , SYSENTER, SYSEXIT, and associated MSRs + 1, 0, edx, 12, mtrr , Memory Type Range Registers + 1, 0, edx, 13, pge , Page Global Extensions + 1, 0, edx, 14, mca , Machine Check Architecture + 1, 0, edx, 15, cmov , Conditional Move Instruction + 1, 0, edx, 16, pat , Page Attribute Table + 1, 0, edx, 17, pse36 , Page Size Extension (36-bit) + 1, 0, edx, 18, pn , Processor Serial Number + 1, 0, edx, 19, clflush , CLFLUSH instruction + 1, 0, edx, 21, dts , Debug Store + 1, 0, edx, 22, acpi , Thermal monitor and clock control + 1, 0, edx, 23, mmx , MMX instructions + 1, 0, edx, 24, fxsr , FXSAVE and FXRSTOR instructions + 1, 0, edx, 25, sse , SSE instructions + 1, 0, edx, 26, sse2 , SSE2 instructions + 1, 0, edx, 27, ss , Self Snoop + 1, 0, edx, 28, ht , Hyper-threading + 1, 0, edx, 29, tm , Thermal Monitor + 1, 0, edx, 30, ia64 , Legacy IA-64 (Itanium) support bit, now resreved + 1, 0, edx, 31, pbe , Pending Break Enable + +# Leaf 2H +# Intel cache and TLB information one-byte descriptors + + 2, 0, eax, 7:0, iteration_count , Number of times this CPUD leaf must be queried + 2, 0, eax, 15:8, desc1 , Descriptor #1 + 2, 0, eax, 23:16, desc2 , Descriptor #2 + 2, 0, eax, 30:24, desc3 , Descriptor #3 + 2, 0, eax, 31, eax_invalid , Descriptors 1-3 are invalid if set + 2, 0, ebx, 7:0, desc4 , Descriptor #4 + 2, 0, ebx, 15:8, desc5 , Descriptor #5 + 2, 0, ebx, 23:16, desc6 , Descriptor #6 + 2, 0, ebx, 30:24, desc7 , Descriptor #7 + 2, 0, ebx, 31, ebx_invalid , Descriptors 4-7 are invalid if set + 2, 0, ecx, 7:0, desc8 , Descriptor #8 + 2, 0, ecx, 15:8, desc9 , Descriptor #9 + 2, 0, ecx, 23:16, desc10 , Descriptor #10 + 2, 0, ecx, 30:24, desc11 , Descriptor #11 + 2, 0, ecx, 31, ecx_invalid , Descriptors 8-11 are invalid if set + 2, 0, edx, 7:0, desc12 , Descriptor #12 + 2, 0, edx, 15:8, desc13 , Descriptor #13 + 2, 0, edx, 23:16, desc14 , Descriptor #14 + 2, 0, edx, 30:24, desc15 , Descriptor #15 + 2, 0, edx, 31, edx_invalid , Descriptors 12-15 are invalid if set + +# Leaf 4H +# Intel deterministic cache parameters + + 4, 31:0, eax, 4:0, cache_type , Cache type field + 4, 31:0, eax, 7:5, cache_level , Cache level (1-based) + 4, 31:0, eax, 8, cache_self_init , Self-initialializing cache level + 4, 31:0, eax, 9, fully_associative , Fully-associative cache + 4, 31:0, eax, 25:14, num_threads_sharing , Number logical CPUs sharing this cache + 4, 31:0, eax, 31:26, num_cores_on_die , Number of cores in the physical package + 4, 31:0, ebx, 11:0, cache_linesize , System coherency line size (0-based) + 4, 31:0, ebx, 21:12, cache_npartitions , Physical line partitions (0-based) + 4, 31:0, ebx, 31:22, cache_nways , Ways of associativity (0-based) + 4, 31:0, ecx, 30:0, cache_nsets , Cache number of sets (0-based) + 4, 31:0, edx, 0, wbinvd_rll_no_guarantee, WBINVD/INVD not guaranteed for Remote Lower-Level caches + 4, 31:0, edx, 1, ll_inclusive , Cache is inclusive of Lower-Level caches + 4, 31:0, edx, 2, complex_indexing , Not a direct-mapped cache (complex function) + +# Leaf 5H +# MONITOR/MWAIT instructions enumeration + + 5, 0, eax, 15:0, min_mon_size , Smallest monitor-line size, in bytes + 5, 0, ebx, 15:0, max_mon_size , Largest monitor-line size, in bytes + 5, 0, ecx, 0, mwait_ext , Enumeration of MONITOR/MWAIT extensions is supported + 5, 0, ecx, 1, mwait_irq_break , Interrupts as a break-event for MWAIT is supported + 5, 0, edx, 3:0, n_c0_substates , Number of C0 sub C-states supported using MWAIT + 5, 0, edx, 7:4, n_c1_substates , Number of C1 sub C-states supported using MWAIT + 5, 0, edx, 11:8, n_c2_substates , Number of C2 sub C-states supported using MWAIT + 5, 0, edx, 15:12, n_c3_substates , Number of C3 sub C-states supported using MWAIT + 5, 0, edx, 19:16, n_c4_substates , Number of C4 sub C-states supported using MWAIT + 5, 0, edx, 23:20, n_c5_substates , Number of C5 sub C-states supported using MWAIT + 5, 0, edx, 27:24, n_c6_substates , Number of C6 sub C-states supported using MWAIT + 5, 0, edx, 31:28, n_c7_substates , Number of C7 sub C-states supported using MWAIT + +# Leaf 6H +# Thermal and Power Management enumeration + + 6, 0, eax, 0, dtherm , Digital temprature sensor + 6, 0, eax, 1, turbo_boost , Intel Turbo Boost + 6, 0, eax, 2, arat , Always-Running APIC Timer (not affected by p-state) + 6, 0, eax, 4, pln , Power Limit Notification (PLN) event + 6, 0, eax, 5, ecmd , Clock modulation duty cycle extension + 6, 0, eax, 6, pts , Package thermal management + 6, 0, eax, 7, hwp , HWP (Hardware P-states) base registers are supported + 6, 0, eax, 8, hwp_notify , HWP notification (IA32_HWP_INTERRUPT MSR) + 6, 0, eax, 9, hwp_act_window , HWP activity window (IA32_HWP_REQUEST[bits 41:32]) supported + 6, 0, eax, 10, hwp_epp , HWP Energy Performance Preference + 6, 0, eax, 11, hwp_pkg_req , HWP Package Level Request + 6, 0, eax, 13, hdc_base_regs , HDC base registers are supported + 6, 0, eax, 14, turbo_boost_3_0 , Intel Turbo Boost Max 3.0 + 6, 0, eax, 15, hwp_capabilities , HWP Highest Performance change + 6, 0, eax, 16, hwp_peci_override , HWP PECI override + 6, 0, eax, 17, hwp_flexible , Flexible HWP + 6, 0, eax, 18, hwp_fast , IA32_HWP_REQUEST MSR fast access mode + 6, 0, eax, 19, hfi , HW_FEEDBACK MSRs supported + 6, 0, eax, 20, hwp_ignore_idle , Ignoring idle logical CPU HWP req is supported + 6, 0, eax, 23, thread_director , Intel thread director support + 6, 0, eax, 24, therm_interrupt_bit25 , IA32_THERM_INTERRUPT MSR bit 25 is supported + 6, 0, ebx, 3:0, n_therm_thresholds , Digital thermometer thresholds + 6, 0, ecx, 0, aperfmperf , MPERF/APERF MSRs (effective frequency interface) + 6, 0, ecx, 3, epb , IA32_ENERGY_PERF_BIAS MSR support + 6, 0, ecx, 15:8, thrd_director_nclasses , Number of classes, Intel thread director + 6, 0, edx, 0, perfcap_reporting , Performance capability reporting + 6, 0, edx, 1, encap_reporting , Energy efficiency capability reporting + 6, 0, edx, 11:8, feedback_sz , HW feedback interface struct size, in 4K pages + 6, 0, edx, 31:16, this_lcpu_hwfdbk_idx , This logical CPU index @ HW feedback struct, 0-based + +# Leaf 7H +# Extended CPU features enumeration + + 7, 0, eax, 31:0, leaf7_n_subleaves , Number of cpuid 0x7 subleaves + 7, 0, ebx, 0, fsgsbase , FSBASE/GSBASE read/write support + 7, 0, ebx, 1, tsc_adjust , IA32_TSC_ADJUST MSR supported + 7, 0, ebx, 2, sgx , Intel SGX (Software Guard Extensions) + 7, 0, ebx, 3, bmi1 , Bit manipulation extensions group 1 + 7, 0, ebx, 4, hle , Hardware Lock Elision + 7, 0, ebx, 5, avx2 , AVX2 instruction set + 7, 0, ebx, 6, fdp_excptn_only , FPU Data Pointer updated only on x87 exceptions + 7, 0, ebx, 7, smep , Supervisor Mode Execution Protection + 7, 0, ebx, 8, bmi2 , Bit manipulation extensions group 2 + 7, 0, ebx, 9, erms , Enhanced REP MOVSB/STOSB + 7, 0, ebx, 10, invpcid , INVPCID instruction (Invalidate Processor Context ID) + 7, 0, ebx, 11, rtm , Intel restricted transactional memory + 7, 0, ebx, 12, cqm , Intel RDT-CMT / AMD Platform-QoS cache monitoring + 7, 0, ebx, 13, zero_fcs_fds , Deprecated FPU CS/DS (stored as zero) + 7, 0, ebx, 14, mpx , Intel memory protection extensions + 7, 0, ebx, 15, rdt_a , Intel RDT / AMD Platform-QoS Enforcemeent + 7, 0, ebx, 16, avx512f , AVX-512 foundation instructions + 7, 0, ebx, 17, avx512dq , AVX-512 double/quadword instructions + 7, 0, ebx, 18, rdseed , RDSEED instruction + 7, 0, ebx, 19, adx , ADCX/ADOX instructions + 7, 0, ebx, 20, smap , Supervisor mode access prevention + 7, 0, ebx, 21, avx512ifma , AVX-512 integer fused multiply add + 7, 0, ebx, 23, clflushopt , CLFLUSHOPT instruction + 7, 0, ebx, 24, clwb , CLWB instruction + 7, 0, ebx, 25, intel_pt , Intel processor trace + 7, 0, ebx, 26, avx512pf , AVX-512 prefetch instructions + 7, 0, ebx, 27, avx512er , AVX-512 exponent/reciprocal instrs + 7, 0, ebx, 28, avx512cd , AVX-512 conflict detection instrs + 7, 0, ebx, 29, sha_ni , SHA/SHA256 instructions + 7, 0, ebx, 30, avx512bw , AVX-512 BW (byte/word granular) instructions + 7, 0, ebx, 31, avx512vl , AVX-512 VL (128/256 vector length) extensions + 7, 0, ecx, 0, prefetchwt1 , PREFETCHWT1 (Intel Xeon Phi only) + 7, 0, ecx, 1, avx512vbmi , AVX-512 Vector byte manipulation instrs + 7, 0, ecx, 2, umip , User mode instruction protection + 7, 0, ecx, 3, pku , Protection keys for user-space + 7, 0, ecx, 4, ospke , OS protection keys enable + 7, 0, ecx, 5, waitpkg , WAITPKG instructions + 7, 0, ecx, 6, avx512_vbmi2 , AVX-512 vector byte manipulation instrs group 2 + 7, 0, ecx, 7, cet_ss , CET shadow stack features + 7, 0, ecx, 8, gfni , Galois field new instructions + 7, 0, ecx, 9, vaes , Vector AES instrs + 7, 0, ecx, 10, vpclmulqdq , VPCLMULQDQ 256-bit instruction support + 7, 0, ecx, 11, avx512_vnni , Vector neural network instructions + 7, 0, ecx, 12, avx512_bitalg , AVX-512 bit count/shiffle + 7, 0, ecx, 13, tme , Intel total memory encryption + 7, 0, ecx, 14, avx512_vpopcntdq , AVX-512: POPCNT for vectors of DW/QW + 7, 0, ecx, 16, la57 , 57-bit linear addreses (five-level paging) + 7, 0, ecx, 21:17, mawau_val_lm , BNDLDX/BNDSTX MAWAU value in 64-bit mode + 7, 0, ecx, 22, rdpid , RDPID instruction + 7, 0, ecx, 23, key_locker , Intel key locker support + 7, 0, ecx, 24, bus_lock_detect , OS bus-lock detection + 7, 0, ecx, 25, cldemote , CLDEMOTE instruction + 7, 0, ecx, 27, movdiri , MOVDIRI instruction + 7, 0, ecx, 28, movdir64b , MOVDIR64B instruction + 7, 0, ecx, 29, enqcmd , Enqueue stores supported (ENQCMD{,S}) + 7, 0, ecx, 30, sgx_lc , Intel SGX launch configuration + 7, 0, ecx, 31, pks , Protection keys for supervisor-mode pages + 7, 0, edx, 1, sgx_keys , Intel SGX attestation services + 7, 0, edx, 2, avx512_4vnniw , AVX-512 neural network instructions + 7, 0, edx, 3, avx512_4fmaps , AVX-512 multiply accumulation single precision + 7, 0, edx, 4, fsrm , Fast short REP MOV + 7, 0, edx, 5, uintr , CPU supports user interrupts + 7, 0, edx, 8, avx512_vp2intersect , VP2INTERSECT{D,Q} instructions + 7, 0, edx, 9, srdbs_ctrl , SRBDS mitigation MSR available + 7, 0, edx, 10, md_clear , VERW MD_CLEAR microcode support + 7, 0, edx, 11, rtm_always_abort , XBEGIN (RTM transaction) always aborts + 7, 0, edx, 13, tsx_force_abort , MSR TSX_FORCE_ABORT, RTM_ABORT bit, supported + 7, 0, edx, 14, serialize , SERIALIZE instruction + 7, 0, edx, 15, hybrid_cpu , The CPU is identified as a 'hybrid part' + 7, 0, edx, 16, tsxldtrk , TSX suspend/resume load address tracking + 7, 0, edx, 18, pconfig , PCONFIG instruction + 7, 0, edx, 19, arch_lbr , Intel architectural LBRs + 7, 0, edx, 20, ibt , CET indirect branch tracking + 7, 0, edx, 22, amx_bf16 , AMX-BF16: tile bfloat16 support + 7, 0, edx, 23, avx512_fp16 , AVX-512 FP16 instructions + 7, 0, edx, 24, amx_tile , AMX-TILE: tile architecture support + 7, 0, edx, 25, amx_int8 , AMX-INT8: tile 8-bit integer support + 7, 0, edx, 26, spec_ctrl , Speculation Control (IBRS/IBPB: indirect branch restrictions) + 7, 0, edx, 27, intel_stibp , Single thread indirect branch predictors + 7, 0, edx, 28, flush_l1d , FLUSH L1D cache: IA32_FLUSH_CMD MSR + 7, 0, edx, 29, arch_capabilities , Intel IA32_ARCH_CAPABILITIES MSR + 7, 0, edx, 30, core_capabilities , IA32_CORE_CAPABILITIES MSR + 7, 0, edx, 31, spec_ctrl_ssbd , Speculative store bypass disable + 7, 1, eax, 4, avx_vnni , AVX-VNNI instructions + 7, 1, eax, 5, avx512_bf16 , AVX-512 bFloat16 instructions + 7, 1, eax, 6, lass , Linear address space separation + 7, 1, eax, 7, cmpccxadd , CMPccXADD instructions + 7, 1, eax, 8, arch_perfmon_ext , ArchPerfmonExt: CPUID leaf 0x23 is supported + 7, 1, eax, 10, fzrm , Fast zero-length REP MOVSB + 7, 1, eax, 11, fsrs , Fast short REP STOSB + 7, 1, eax, 12, fsrc , Fast Short REP CMPSB/SCASB + 7, 1, eax, 17, fred , FRED: Flexible return and event delivery transitions + 7, 1, eax, 18, lkgs , LKGS: Load 'kernel' (userspace) GS + 7, 1, eax, 19, wrmsrns , WRMSRNS instr (WRMSR-non-serializing) + 7, 1, eax, 21, amx_fp16 , AMX-FP16: FP16 tile operations + 7, 1, eax, 22, hreset , History reset support + 7, 1, eax, 23, avx_ifma , Integer fused multiply add + 7, 1, eax, 26, lam , Linear address masking + 7, 1, eax, 27, rd_wr_msrlist , RDMSRLIST/WRMSRLIST instructions + 7, 1, ebx, 0, intel_ppin , Protected processor inventory number (PPIN{,_CTL} MSRs) + 7, 1, edx, 4, avx_vnni_int8 , AVX-VNNI-INT8 instructions + 7, 1, edx, 5, avx_ne_convert , AVX-NE-CONVERT instructions + 7, 1, edx, 8, amx_complex , AMX-COMPLEX instructions (starting from Granite Rapids) + 7, 1, edx, 14, prefetchit_0_1 , PREFETCHIT0/1 instructions + 7, 1, edx, 18, cet_sss , CET supervisor shadow stacks safe to use + 7, 2, edx, 0, intel_psfd , Intel predictive store forward disable + 7, 2, edx, 1, ipred_ctrl , MSR bits IA32_SPEC_CTRL.IPRED_DIS_{U,S} + 7, 2, edx, 2, rrsba_ctrl , MSR bits IA32_SPEC_CTRL.RRSBA_DIS_{U,S} + 7, 2, edx, 3, ddp_ctrl , MSR bit IA32_SPEC_CTRL.DDPD_U + 7, 2, edx, 4, bhi_ctrl , MSR bit IA32_SPEC_CTRL.BHI_DIS_S + 7, 2, edx, 5, mcdt_no , MCDT mitigation not needed + 7, 2, edx, 6, uclock_disable , UC-lock disable is supported + +# Leaf 9H +# Intel DCA (Direct Cache Access) enumeration + + 9, 0, eax, 0, dca_enabled_in_bios , DCA is enabled in BIOS + +# Leaf AH +# Intel PMU (Performance Monitoring Unit) enumeration + + 0xa, 0, eax, 7:0, pmu_version , Performance monitoring unit version ID + 0xa, 0, eax, 15:8, pmu_n_gcounters , Number of general PMU counters per logical CPU + 0xa, 0, eax, 23:16, pmu_gcounters_nbits , Bitwidth of PMU general counters + 0xa, 0, eax, 31:24, pmu_cpuid_ebx_bits , Length of cpuid leaf 0xa EBX bit vector + 0xa, 0, ebx, 0, no_core_cycle_evt , Core cycle event not available + 0xa, 0, ebx, 1, no_insn_retired_evt , Instruction retired event not available + 0xa, 0, ebx, 2, no_refcycle_evt , Reference cycles event not available + 0xa, 0, ebx, 3, no_llc_ref_evt , LLC-reference event not available + 0xa, 0, ebx, 4, no_llc_miss_evt , LLC-misses event not available + 0xa, 0, ebx, 5, no_br_insn_ret_evt , Branch instruction retired event not available + 0xa, 0, ebx, 6, no_br_mispredict_evt , Branch mispredict retired event not available + 0xa, 0, ebx, 7, no_td_slots_evt , Topdown slots event not available + 0xa, 0, ecx, 31:0, pmu_fcounters_bitmap , Fixed-function PMU counters support bitmap + 0xa, 0, edx, 4:0, pmu_n_fcounters , Number of fixed PMU counters + 0xa, 0, edx, 12:5, pmu_fcounters_nbits , Bitwidth of PMU fixed counters + 0xa, 0, edx, 15, anythread_depr , AnyThread deprecation + +# Leaf BH +# CPUs v1 extended topology enumeration + + 0xb, 1:0, eax, 4:0, x2apic_id_shift , Bit width of this level (previous levels inclusive) + 0xb, 1:0, ebx, 15:0, domain_lcpus_count , Logical CPUs count across all instances of this domain + 0xb, 1:0, ecx, 7:0, domain_nr , This domain level (subleaf ID) + 0xb, 1:0, ecx, 15:8, domain_type , This domain type + 0xb, 1:0, edx, 31:0, x2apic_id , x2APIC ID of current logical CPU + +# Leaf DH +# Processor extended state enumeration + + 0xd, 0, eax, 0, xcr0_x87 , XCR0.X87 (bit 0) supported + 0xd, 0, eax, 1, xcr0_sse , XCR0.SEE (bit 1) supported + 0xd, 0, eax, 2, xcr0_avx , XCR0.AVX (bit 2) supported + 0xd, 0, eax, 3, xcr0_mpx_bndregs , XCR0.BNDREGS (bit 3) supported (MPX BND0-BND3 regs) + 0xd, 0, eax, 4, xcr0_mpx_bndcsr , XCR0.BNDCSR (bit 4) supported (MPX BNDCFGU/BNDSTATUS regs) + 0xd, 0, eax, 5, xcr0_avx512_opmask , XCR0.OPMASK (bit 5) supported (AVX-512 k0-k7 regs) + 0xd, 0, eax, 6, xcr0_avx512_zmm_hi256 , XCR0.ZMM_Hi256 (bit 6) supported (AVX-512 ZMM0->ZMM7/15 regs) + 0xd, 0, eax, 7, xcr0_avx512_hi16_zmm , XCR0.HI16_ZMM (bit 7) supported (AVX-512 ZMM16->ZMM31 regs) + 0xd, 0, eax, 9, xcr0_pkru , XCR0.PKRU (bit 9) supported (XSAVE PKRU reg) + 0xd, 0, eax, 11, xcr0_cet_u , AMD XCR0.CET_U (bit 11) supported (CET supervisor state) + 0xd, 0, eax, 12, xcr0_cet_s , AMD XCR0.CET_S (bit 12) support (CET user state) + 0xd, 0, eax, 17, xcr0_tileconfig , XCR0.TILECONFIG (bit 17) supported (AMX can manage TILECONFIG) + 0xd, 0, eax, 18, xcr0_tiledata , XCR0.TILEDATA (bit 18) supported (AMX can manage TILEDATA) + 0xd, 0, ebx, 31:0, xsave_sz_xcr0_enabled , XSAVE/XRSTR area byte size, for XCR0 enabled features + 0xd, 0, ecx, 31:0, xsave_sz_max , XSAVE/XRSTR area max byte size, all CPU features + 0xd, 0, edx, 30, xcr0_lwp , AMD XCR0.LWP (bit 62) supported (Light-weight Profiling) + 0xd, 1, eax, 0, xsaveopt , XSAVEOPT instruction + 0xd, 1, eax, 1, xsavec , XSAVEC instruction + 0xd, 1, eax, 2, xgetbv1 , XGETBV instruction with ECX = 1 + 0xd, 1, eax, 3, xsaves , XSAVES/XRSTORS instructions (and XSS MSR) + 0xd, 1, eax, 4, xfd , Extended feature disable support + 0xd, 1, ebx, 31:0, xsave_sz_xcr0_xmms_enabled, XSAVE area size, all XCR0 and XMMS features enabled + 0xd, 1, ecx, 8, xss_pt , PT state, supported + 0xd, 1, ecx, 10, xss_pasid , PASID state, supported + 0xd, 1, ecx, 11, xss_cet_u , CET user state, supported + 0xd, 1, ecx, 12, xss_cet_p , CET supervisor state, supported + 0xd, 1, ecx, 13, xss_hdc , HDC state, supported + 0xd, 1, ecx, 14, xss_uintr , UINTR state, supported + 0xd, 1, ecx, 15, xss_lbr , LBR state, supported + 0xd, 1, ecx, 16, xss_hwp , HWP state, supported + 0xd, 63:2, eax, 31:0, xsave_sz , Size of save area for subleaf-N feature, in bytes + 0xd, 63:2, ebx, 31:0, xsave_offset , Offset of save area for subleaf-N feature, in bytes + 0xd, 63:2, ecx, 0, is_xss_bit , Subleaf N describes an XSS bit, otherwise XCR0 bit + 0xd, 63:2, ecx, 1, compacted_xsave_64byte_aligned, When compacted, subleaf-N feature xsave area is 64-byte aligned + +# Leaf FH +# Intel RDT / AMD PQoS resource monitoring + + 0xf, 0, ebx, 31:0, core_rmid_max , RMID max, within this core, all types (0-based) + 0xf, 0, edx, 1, cqm_llc , LLC QoS-monitoring supported + 0xf, 1, eax, 7:0, l3c_qm_bitwidth , L3 QoS-monitoring counter bitwidth (24-based) + 0xf, 1, eax, 8, l3c_qm_overflow_bit , QM_CTR MSR bit 61 is an overflow bit + 0xf, 1, ebx, 31:0, l3c_qm_conver_factor , QM_CTR MSR conversion factor to bytes + 0xf, 1, ecx, 31:0, l3c_qm_rmid_max , L3 QoS-monitoring max RMID + 0xf, 1, edx, 0, cqm_occup_llc , L3 QoS occupancy monitoring supported + 0xf, 1, edx, 1, cqm_mbm_total , L3 QoS total bandwidth monitoring supported + 0xf, 1, edx, 2, cqm_mbm_local , L3 QoS local bandwidth monitoring supported # Leaf 10H -# Intel RDT Allocation - - 0x10, 0, EBX, 1, l3c_rdt_alloc, L3 Cache Allocation supported - 0x10, 0, EBX, 2, l2c_rdt_alloc, L2 Cache Allocation supported - 0x10, 0, EBX, 3, mem_bw_alloc, Memory Bandwidth Allocation supported - +# Intel RDT / AMD PQoS allocation enumeration + + 0x10, 0, ebx, 1, cat_l3 , L3 Cache Allocation Technology supported + 0x10, 0, ebx, 2, cat_l2 , L2 Cache Allocation Technology supported + 0x10, 0, ebx, 3, mba , Memory Bandwidth Allocation supported + 0x10, 2:1, eax, 4:0, cat_cbm_len , L3/L2_CAT capacity bitmask length, minus-one notation + 0x10, 2:1, ebx, 31:0, cat_units_bitmap , L3/L2_CAT bitmap of allocation units + 0x10, 2:1, ecx, 1, l3_cat_cos_infreq_updates, L3_CAT COS updates should be infrequent + 0x10, 2:1, ecx, 2, cdp_l3 , L3/L2_CAT CDP (Code and Data Prioritization) + 0x10, 2:1, ecx, 3, cat_sparse_1s , L3/L2_CAT non-contiguous 1s value supported + 0x10, 2:1, edx, 15:0, cat_cos_max , L3/L2_CAT max COS (Class of Service) supported + 0x10, 3, eax, 11:0, mba_max_delay , Max MBA throttling value; minus-one notation + 0x10, 3, ecx, 0, per_thread_mba , Per-thread MBA controls are supported + 0x10, 3, ecx, 2, mba_delay_linear , Delay values are linear + 0x10, 3, edx, 15:0, mba_cos_max , MBA max Class of Service supported # Leaf 12H -# SGX Capability -# -# Some detailed SGX features not added yet - - 0x12, 0, EAX, 0, sgx1, L3 Cache Allocation supported - 0x12, 1, EAX, 0, sgx2, L3 Cache Allocation supported - +# Intel Software Guard Extensions (SGX) enumeration + + 0x12, 0, eax, 0, sgx1 , SGX1 leaf functions supported + 0x12, 0, eax, 1, sgx2 , SGX2 leaf functions supported + 0x12, 0, eax, 5, enclv_leaves , ENCLV leaves (E{INC,DEC}VIRTCHILD, ESETCONTEXT) supported + 0x12, 0, eax, 6, encls_leaves , ENCLS leaves (ENCLS ETRACKC, ERDINFO, ELDBC, ELDUC) supported + 0x12, 0, eax, 7, enclu_everifyreport2 , ENCLU leaf EVERIFYREPORT2 supported + 0x12, 0, eax, 10, encls_eupdatesvn , ENCLS leaf EUPDATESVN supported + 0x12, 0, eax, 11, sgx_edeccssa , ENCLU leaf EDECCSSA supported + 0x12, 0, ebx, 0, miscselect_exinfo , SSA.MISC frame: reporting #PF and #GP exceptions inside enclave supported + 0x12, 0, ebx, 1, miscselect_cpinfo , SSA.MISC frame: reporting #CP exceptions inside enclave supported + 0x12, 0, edx, 7:0, max_enclave_sz_not64 , Maximum enclave size in non-64-bit mode (log2) + 0x12, 0, edx, 15:8, max_enclave_sz_64 , Maximum enclave size in 64-bit mode (log2) + 0x12, 1, eax, 0, secs_attr_init , ATTRIBUTES.INIT supported (enclave initialized by EINIT) + 0x12, 1, eax, 1, secs_attr_debug , ATTRIBUTES.DEBUG supported (enclave permits debugger read/write) + 0x12, 1, eax, 2, secs_attr_mode64bit , ATTRIBUTES.MODE64BIT supported (enclave runs in 64-bit mode) + 0x12, 1, eax, 4, secs_attr_provisionkey , ATTRIBUTES.PROVISIONKEY supported (provisioning key available) + 0x12, 1, eax, 5, secs_attr_einittoken_key, ATTRIBUTES.EINITTOKEN_KEY supported (EINIT token key available) + 0x12, 1, eax, 6, secs_attr_cet , ATTRIBUTES.CET supported (enable CET attributes) + 0x12, 1, eax, 7, secs_attr_kss , ATTRIBUTES.KSS supported (Key Separation and Sharing enabled) + 0x12, 1, eax, 10, secs_attr_aexnotify , ATTRIBUTES.AEXNOTIFY supported (enclave threads may get AEX notifications + 0x12, 1, ecx, 0, xfrm_x87 , Enclave XFRM.X87 (bit 0) supported + 0x12, 1, ecx, 1, xfrm_sse , Enclave XFRM.SEE (bit 1) supported + 0x12, 1, ecx, 2, xfrm_avx , Enclave XFRM.AVX (bit 2) supported + 0x12, 1, ecx, 3, xfrm_mpx_bndregs , Enclave XFRM.BNDREGS (bit 3) supported (MPX BND0-BND3 regs) + 0x12, 1, ecx, 4, xfrm_mpx_bndcsr , Enclave XFRM.BNDCSR (bit 4) supported (MPX BNDCFGU/BNDSTATUS regs) + 0x12, 1, ecx, 5, xfrm_avx512_opmask , Enclave XFRM.OPMASK (bit 5) supported (AVX-512 k0-k7 regs) + 0x12, 1, ecx, 6, xfrm_avx512_zmm_hi256 , Enclave XFRM.ZMM_Hi256 (bit 6) supported (AVX-512 ZMM0->ZMM7/15 regs) + 0x12, 1, ecx, 7, xfrm_avx512_hi16_zmm , Enclave XFRM.HI16_ZMM (bit 7) supported (AVX-512 ZMM16->ZMM31 regs) + 0x12, 1, ecx, 9, xfrm_pkru , Enclave XFRM.PKRU (bit 9) supported (XSAVE PKRU reg) + 0x12, 1, ecx, 17, xfrm_tileconfig , Enclave XFRM.TILECONFIG (bit 17) supported (AMX can manage TILECONFIG) + 0x12, 1, ecx, 18, xfrm_tiledata , Enclave XFRM.TILEDATA (bit 18) supported (AMX can manage TILEDATA) + 0x12, 31:2, eax, 3:0, subleaf_type , Subleaf type (dictates output layout) + 0x12, 31:2, eax, 31:12, epc_sec_base_addr_0 , EPC section base addr, bits[12:31] + 0x12, 31:2, ebx, 19:0, epc_sec_base_addr_1 , EPC section base addr, bits[32:51] + 0x12, 31:2, ecx, 3:0, epc_sec_type , EPC section type / property encoding + 0x12, 31:2, ecx, 31:12, epc_sec_size_0 , EPC section size, bits[12:31] + 0x12, 31:2, edx, 19:0, epc_sec_size_1 , EPC section size, bits[32:51] # Leaf 14H -# Intel Processor Tracer -# +# Intel Processor Trace enumeration + + 0x14, 0, eax, 31:0, pt_max_subleaf , Max cpuid 0x14 subleaf + 0x14, 0, ebx, 0, cr3_filtering , IA32_RTIT_CR3_MATCH is accessible + 0x14, 0, ebx, 1, psb_cyc , Configurable PSB and cycle-accurate mode + 0x14, 0, ebx, 2, ip_filtering , IP/TraceStop filtering; Warm-reset PT MSRs preservation + 0x14, 0, ebx, 3, mtc_timing , MTC timing packet; COFI-based packets suppression + 0x14, 0, ebx, 4, ptwrite , PTWRITE support + 0x14, 0, ebx, 5, power_event_trace , Power Event Trace support + 0x14, 0, ebx, 6, psb_pmi_preserve , PSB and PMI preservation support + 0x14, 0, ebx, 7, event_trace , Event Trace packet generation through IA32_RTIT_CTL.EventEn + 0x14, 0, ebx, 8, tnt_disable , TNT packet generation disable through IA32_RTIT_CTL.DisTNT + 0x14, 0, ecx, 0, topa_output , ToPA output scheme support + 0x14, 0, ecx, 1, topa_multiple_entries , ToPA tables can hold multiple entries + 0x14, 0, ecx, 2, single_range_output , Single-range output scheme supported + 0x14, 0, ecx, 3, trance_transport_output, Trace Transport subsystem output support + 0x14, 0, ecx, 31, ip_payloads_lip , IP payloads have LIP values (CS base included) + 0x14, 1, eax, 2:0, num_address_ranges , Filtering number of configurable Address Ranges + 0x14, 1, eax, 31:16, mtc_periods_bmp , Bitmap of supported MTC period encodings + 0x14, 1, ebx, 15:0, cycle_thresholds_bmp , Bitmap of supported Cycle Threshold encodings + 0x14, 1, ebx, 31:16, psb_periods_bmp , Bitmap of supported Configurable PSB frequency encodings # Leaf 15H -# Time Stamp Counter and Nominal Core Crystal Clock Information +# Intel TSC (Time Stamp Counter) enumeration - 0x15, 0, EAX, 31:0, tsc_denominator, The denominator of the TSC/”core crystal clock” ratio - 0x15, 0, EBX, 31:0, tsc_numerator, The numerator of the TSC/”core crystal clock” ratio - 0x15, 0, ECX, 31:0, nom_freq, Nominal frequency of the core crystal clock in Hz + 0x15, 0, eax, 31:0, tsc_denominator , Denominator of the TSC/'core crystal clock' ratio + 0x15, 0, ebx, 31:0, tsc_numerator , Numerator of the TSC/'core crystal clock' ratio + 0x15, 0, ecx, 31:0, cpu_crystal_hz , Core crystal clock nominal frequency, in Hz # Leaf 16H -# Processor Frequency Information +# Intel processor fequency enumeration - 0x16, 0, EAX, 15:0, cpu_base_freq, Processor Base Frequency in MHz - 0x16, 0, EBX, 15:0, cpu_max_freq, Maximum Frequency in MHz - 0x16, 0, ECX, 15:0, bus_freq, Bus (Reference) Frequency in MHz + 0x16, 0, eax, 15:0, cpu_base_mhz , Processor base frequency, in MHz + 0x16, 0, ebx, 15:0, cpu_max_mhz , Processor max frequency, in MHz + 0x16, 0, ecx, 15:0, bus_mhz , Bus reference frequency, in MHz # Leaf 17H -# System-On-Chip Vendor Attribute - - 0x17, 0, EAX, 31:0, max_socid, Maximum input value of supported sub-leaf - 0x17, 0, EBX, 15:0, soc_vid, SOC Vendor ID - 0x17, 0, EBX, 16, std_vid, SOC Vendor ID is assigned via an industry standard scheme - 0x17, 0, ECX, 31:0, soc_pid, SOC Project ID assigned by vendor - 0x17, 0, EDX, 31:0, soc_sid, SOC Stepping ID +# Intel SoC vendor attributes enumeration + + 0x17, 0, eax, 31:0, soc_max_subleaf , Max cpuid leaf 0x17 subleaf + 0x17, 0, ebx, 15:0, soc_vendor_id , SoC vendor ID + 0x17, 0, ebx, 16, is_vendor_scheme , Assigned by industry enumaeratoion scheme (not Intel) + 0x17, 0, ecx, 31:0, soc_proj_id , SoC project ID, assigned by vendor + 0x17, 0, edx, 31:0, soc_stepping_id , Soc project stepping ID, assigned by vendor + 0x17, 3:1, eax, 31:0, vendor_brand_a , Vendor Brand ID string, bytes subleaf_nr * (0 -> 3) + 0x17, 3:1, ebx, 31:0, vendor_brand_b , Vendor Brand ID string, bytes subleaf_nr * (4 -> 7) + 0x17, 3:1, ecx, 31:0, vendor_brand_c , Vendor Brand ID string, bytes subleaf_nr * (8 -> 11) + 0x17, 3:1, edx, 31:0, vendor_brand_d , Vendor Brand ID string, bytes subleaf_nr * (12 -> 15) # Leaf 18H -# Deterministic Address Translation Parameters - +# Intel determenestic address translation (TLB) parameters + + 0x18, 31:0, eax, 31:0, tlb_max_subleaf , Max cpuid 0x18 subleaf + 0x18, 31:0, ebx, 0, tlb_4k_page , TLB 4KB-page entries supported + 0x18, 31:0, ebx, 1, tlb_2m_page , TLB 2MB-page entries supported + 0x18, 31:0, ebx, 2, tlb_4m_page , TLB 4MB-page entries supported + 0x18, 31:0, ebx, 3, tlb_1g_page , TLB 1GB-page entries supported + 0x18, 31:0, ebx, 10:8, hard_partitioning , (Hard/Soft) partitioning between logical CPUs sharing this struct + 0x18, 31:0, ebx, 31:16, n_way_associative , Ways of associativity + 0x18, 31:0, ecx, 31:0, n_sets , Number of sets + 0x18, 31:0, edx, 4:0, tlb_type , Translation cache type (TLB type) + 0x18, 31:0, edx, 7:5, tlb_cache_level , Translation cache level (1-based) + 0x18, 31:0, edx, 8, is_fully_associative , Fully-associative structure + 0x18, 31:0, edx, 25:14, tlb_max_addressible_ids, Max num of addressible IDs for logical CPUs sharing this TLB - 1 # Leaf 19H -# Key Locker Leaf +# Intel Key Locker enumeration + 0x19, 0, eax, 0, kl_cpl0_only , CPL0-only key Locker restriction supported + 0x19, 0, eax, 1, kl_no_encrypt , No-encrypt key locker restriction supported + 0x19, 0, eax, 2, kl_no_decrypt , No-decrypt key locker restriction supported + 0x19, 0, ebx, 0, aes_keylocker , AES key locker instructions supported + 0x19, 0, ebx, 2, aes_keylocker_wide , AES wide key locker instructions supported + 0x19, 0, ebx, 4, kl_msr_iwkey , Key locker MSRs and IWKEY backups supported + 0x19, 0, ecx, 0, loadiwkey_no_backup , LOADIWKEY NoBackup parameter supported + 0x19, 0, ecx, 1, iwkey_rand , IWKEY randomization (KeySource encoding 1) supported # Leaf 1AH -# Hybrid Information - - 0x1A, 0, EAX, 31:24, core_type, 20H-Intel_Atom 40H-Intel_Core - +# Intel hybrid CPUs identification (e.g. Atom, Core) + + 0x1a, 0, eax, 23:0, core_native_model , This core's native model ID + 0x1a, 0, eax, 31:24, core_type , This core's type + +# Leaf 1BH +# Intel PCONFIG (Platform configuration) enumeration + + 0x1b, 31:0, eax, 11:0, pconfig_subleaf_type , CPUID 0x1b subleaf type + 0x1b, 31:0, ebx, 31:0, pconfig_target_id_x , A supported PCONFIG target ID + 0x1b, 31:0, ecx, 31:0, pconfig_target_id_y , A supported PCONFIG target ID + 0x1b, 31:0, edx, 31:0, pconfig_target_id_z , A supported PCONFIG target ID + +# Leaf 1CH +# Intel LBR (Last Branch Record) enumeration + + 0x1c, 0, eax, 0, lbr_depth_8 , Max stack depth (number of LBR entries) = 8 + 0x1c, 0, eax, 1, lbr_depth_16 , Max stack depth (number of LBR entries) = 16 + 0x1c, 0, eax, 2, lbr_depth_24 , Max stack depth (number of LBR entries) = 24 + 0x1c, 0, eax, 3, lbr_depth_32 , Max stack depth (number of LBR entries) = 32 + 0x1c, 0, eax, 4, lbr_depth_40 , Max stack depth (number of LBR entries) = 40 + 0x1c, 0, eax, 5, lbr_depth_48 , Max stack depth (number of LBR entries) = 48 + 0x1c, 0, eax, 6, lbr_depth_56 , Max stack depth (number of LBR entries) = 56 + 0x1c, 0, eax, 7, lbr_depth_64 , Max stack depth (number of LBR entries) = 64 + 0x1c, 0, eax, 30, lbr_deep_c_reset , LBRs maybe cleared on MWAIT C-state > C1 + 0x1c, 0, eax, 31, lbr_ip_is_lip , LBR IP contain Last IP, otherwise effective IP + 0x1c, 0, ebx, 0, lbr_cpl , CPL filtering (non-zero IA32_LBR_CTL[2:1]) supported + 0x1c, 0, ebx, 1, lbr_branch_filter , Branch filtering (non-zero IA32_LBR_CTL[22:16]) supported + 0x1c, 0, ebx, 2, lbr_call_stack , Call-stack mode (IA32_LBR_CTL[3] = 1) supported + 0x1c, 0, ecx, 0, lbr_mispredict , Branch misprediction bit supported (IA32_LBR_x_INFO[63]) + 0x1c, 0, ecx, 1, lbr_timed_lbr , Timed LBRs (CPU cycles since last LBR entry) supported + 0x1c, 0, ecx, 2, lbr_branch_type , Branch type field (IA32_LBR_INFO_x[59:56]) supported + 0x1c, 0, ecx, 19:16, lbr_events_gpc_bmp , LBR PMU-events logging support; bitmap for first 4 GP (general-purpose) Counters + +# Leaf 1DH +# Intel AMX (Advanced Matrix Extensions) tile information + + 0x1d, 0, eax, 31:0, amx_max_palette , Highest palette ID / subleaf ID + 0x1d, 1, eax, 15:0, amx_palette_size , AMX palette total tiles size, in bytes + 0x1d, 1, eax, 31:16, amx_tile_size , AMX single tile's size, in bytes + 0x1d, 1, ebx, 15:0, amx_tile_row_size , AMX tile single row's size, in bytes + 0x1d, 1, ebx, 31:16, amx_palette_nr_tiles , AMX palette number of tiles + 0x1d, 1, ecx, 15:0, amx_tile_nr_rows , AMX tile max number of rows + +# Leaf 1EH +# Intel AMX, TMUL (Tile-matrix MULtiply) accelerator unit enumeration + + 0x1e, 0, ebx, 7:0, tmul_maxk , TMUL unit maximum height, K (rows or columns) + 0x1e, 0, ebx, 23:8, tmul_maxn , TMUL unit maxiumum SIMD dimension, N (column bytes) # Leaf 1FH -# V2 Extended Topology - A preferred superset to leaf 0BH - - -# According to SDM -# 40000000H - 4FFFFFFFH is invalid range +# Intel extended topology enumeration v2 + + 0x1f, 5:0, eax, 4:0, x2apic_id_shift , Bit width of this level (previous levels inclusive) + 0x1f, 5:0, ebx, 15:0, domain_lcpus_count , Logical CPUs count across all instances of this domain + 0x1f, 5:0, ecx, 7:0, domain_level , This domain level (subleaf ID) + 0x1f, 5:0, ecx, 15:8, domain_type , This domain type + 0x1f, 5:0, edx, 31:0, x2apic_id , x2APIC ID of current logical CPU + +# Leaf 20H +# Intel HRESET (History Reset) enumeration + + 0x20, 0, eax, 31:0, hreset_nr_subleaves , CPUID 0x20 max subleaf + 1 + 0x20, 0, ebx, 0, hreset_thread_director , HRESET of Intel thread director is supported + +# Leaf 21H +# Intel TD (Trust Domain) guest execution environment enumeration + + 0x21, 0, ebx, 31:0, tdx_vendorid_0 , TDX vendor ID string bytes 0 - 3 + 0x21, 0, ecx, 31:0, tdx_vendorid_2 , CPU vendor ID string bytes 8 - 11 + 0x21, 0, edx, 31:0, tdx_vendorid_1 , CPU vendor ID string bytes 4 - 7 + +# Leaf 23H +# Intel Architectural Performance Monitoring Extended (ArchPerfmonExt) + + 0x23, 0, eax, 1, subleaf_1_counters , Subleaf 1, PMU counters bitmaps, is valid + 0x23, 0, eax, 3, subleaf_3_events , Subleaf 3, PMU events bitmaps, is valid + 0x23, 0, ebx, 0, unitmask2 , IA32_PERFEVTSELx MSRs UnitMask2 is supported + 0x23, 0, ebx, 1, zbit , IA32_PERFEVTSELx MSRs Z-bit is supported + 0x23, 1, eax, 31:0, pmu_gp_counters_bitmap , General-purpose PMU counters bitmap + 0x23, 1, ebx, 31:0, pmu_f_counters_bitmap , Fixed PMU counters bitmap + 0x23, 3, eax, 0, core_cycles_evt , Core cycles event supported + 0x23, 3, eax, 1, insn_retired_evt , Instructions retired event supported + 0x23, 3, eax, 2, ref_cycles_evt , Reference cycles event supported + 0x23, 3, eax, 3, llc_refs_evt , Last-level cache references event supported + 0x23, 3, eax, 4, llc_misses_evt , Last-level cache misses event supported + 0x23, 3, eax, 5, br_insn_ret_evt , Branch instruction retired event supported + 0x23, 3, eax, 6, br_mispr_evt , Branch mispredict retired event supported + 0x23, 3, eax, 7, td_slots_evt , Topdown slots event supported + 0x23, 3, eax, 8, td_backend_bound_evt , Topdown backend bound event supported + 0x23, 3, eax, 9, td_bad_spec_evt , Topdown bad speculation event supported + 0x23, 3, eax, 10, td_frontend_bound_evt , Topdown frontend bound event supported + 0x23, 3, eax, 11, td_retiring_evt , Topdown retiring event support + +# Leaf 40000000H +# Maximum hypervisor standard leaf + hypervisor vendor string + +0x40000000, 0, eax, 31:0, max_hyp_leaf , Maximum hypervisor standard leaf number +0x40000000, 0, ebx, 31:0, hypervisor_id_0 , Hypervisor ID string bytes 0 - 3 +0x40000000, 0, ecx, 31:0, hypervisor_id_1 , Hypervisor ID string bytes 4 - 7 +0x40000000, 0, edx, 31:0, hypervisor_id_2 , Hypervisor ID string bytes 8 - 11 + +# Leaf 80000000H +# Maximum extended leaf number + CPU vendor string (AMD) + +0x80000000, 0, eax, 31:0, max_ext_leaf , Maximum extended cpuid leaf supported +0x80000000, 0, ebx, 31:0, cpu_vendorid_0 , Vendor ID string bytes 0 - 3 +0x80000000, 0, ecx, 31:0, cpu_vendorid_2 , Vendor ID string bytes 8 - 11 +0x80000000, 0, edx, 31:0, cpu_vendorid_1 , Vendor ID string bytes 4 - 7 # Leaf 80000001H -# Extended Processor Signature and Feature Bits - -0x80000001, 0, EAX, 27:20, extfamily, Extended family -0x80000001, 0, EAX, 19:16, extmodel, Extended model -0x80000001, 0, EAX, 11:8, basefamily, Description of Family -0x80000001, 0, EAX, 11:8, basemodel, Model numbers vary with product -0x80000001, 0, EAX, 3:0, stepping, Processor stepping (revision) for a specific model - -0x80000001, 0, EBX, 31:28, pkgtype, Specifies the package type - -0x80000001, 0, ECX, 0, lahf_lm, LAHF/SAHF available in 64-bit mode -0x80000001, 0, ECX, 1, cmplegacy, Core multi-processing legacy mode -0x80000001, 0, ECX, 2, svm, Indicates support for: VMRUN, VMLOAD, VMSAVE, CLGI, VMMCALL, and INVLPGA -0x80000001, 0, ECX, 3, extapicspace, Extended APIC register space -0x80000001, 0, ECX, 4, altmovecr8, Indicates support for LOCK MOV CR0 means MOV CR8 -0x80000001, 0, ECX, 5, lzcnt, LZCNT -0x80000001, 0, ECX, 6, sse4a, EXTRQ, INSERTQ, MOVNTSS, and MOVNTSD instruction support -0x80000001, 0, ECX, 7, misalignsse, Misaligned SSE Mode -0x80000001, 0, ECX, 8, prefetchw, PREFETCHW -0x80000001, 0, ECX, 9, osvw, OS Visible Work-around support -0x80000001, 0, ECX, 10, ibs, Instruction Based Sampling -0x80000001, 0, ECX, 11, xop, Extended operation support -0x80000001, 0, ECX, 12, skinit, SKINIT and STGI support -0x80000001, 0, ECX, 13, wdt, Watchdog timer support -0x80000001, 0, ECX, 15, lwp, Lightweight profiling support -0x80000001, 0, ECX, 16, fma4, Four-operand FMA instruction support -0x80000001, 0, ECX, 17, tce, Translation cache extension -0x80000001, 0, ECX, 22, TopologyExtensions, Indicates support for Core::X86::Cpuid::CachePropEax0 and Core::X86::Cpuid::ExtApicId -0x80000001, 0, ECX, 23, perfctrextcore, Indicates support for Core::X86::Msr::PERF_CTL0 - 5 and Core::X86::Msr::PERF_CTR -0x80000001, 0, ECX, 24, perfctrextdf, Indicates support for Core::X86::Msr::DF_PERF_CTL and Core::X86::Msr::DF_PERF_CTR -0x80000001, 0, ECX, 26, databreakpointextension, Indicates data breakpoint support for Core::X86::Msr::DR0_ADDR_MASK, Core::X86::Msr::DR1_ADDR_MASK, Core::X86::Msr::DR2_ADDR_MASK and Core::X86::Msr::DR3_ADDR_MASK -0x80000001, 0, ECX, 27, perftsc, Performance time-stamp counter supported -0x80000001, 0, ECX, 28, perfctrextllc, Indicates support for L3 performance counter extensions -0x80000001, 0, ECX, 29, mwaitextended, MWAITX and MONITORX capability is supported -0x80000001, 0, ECX, 30, admskextn, Indicates support for address mask extension (to 32 bits and to all 4 DRs) for instruction breakpoints - -0x80000001, 0, EDX, 0, fpu, x87 floating point unit on-chip -0x80000001, 0, EDX, 1, vme, Virtual-mode enhancements -0x80000001, 0, EDX, 2, de, Debugging extensions, IO breakpoints, CR4.DE -0x80000001, 0, EDX, 3, pse, Page-size extensions (4 MB pages) -0x80000001, 0, EDX, 4, tsc, Time stamp counter, RDTSC/RDTSCP instructions, CR4.TSD -0x80000001, 0, EDX, 5, msr, Model-specific registers (MSRs), with RDMSR and WRMSR instructions -0x80000001, 0, EDX, 6, pae, Physical-address extensions (PAE) -0x80000001, 0, EDX, 7, mce, Machine Check Exception, CR4.MCE -0x80000001, 0, EDX, 8, cmpxchg8b, CMPXCHG8B instruction -0x80000001, 0, EDX, 9, apic, advanced programmable interrupt controller (APIC) exists and is enabled -0x80000001, 0, EDX, 11, sysret, SYSCALL/SYSRET supported -0x80000001, 0, EDX, 12, mtrr, Memory-type range registers -0x80000001, 0, EDX, 13, pge, Page global extension, CR4.PGE -0x80000001, 0, EDX, 14, mca, Machine check architecture, MCG_CAP -0x80000001, 0, EDX, 15, cmov, Conditional move instructions, CMOV, FCOMI, FCMOV -0x80000001, 0, EDX, 16, pat, Page attribute table -0x80000001, 0, EDX, 17, pse36, Page-size extensions -0x80000001, 0, EDX, 20, exec_dis, Execute Disable Bit available -0x80000001, 0, EDX, 22, mmxext, AMD extensions to MMX instructions -0x80000001, 0, EDX, 23, mmx, MMX instructions -0x80000001, 0, EDX, 24, fxsr, FXSAVE and FXRSTOR instructions -0x80000001, 0, EDX, 25, ffxsr, FXSAVE and FXRSTOR instruction optimizations -0x80000001, 0, EDX, 26, 1gb_page, 1GB page supported -0x80000001, 0, EDX, 27, rdtscp, RDTSCP and IA32_TSC_AUX are available -0x80000001, 0, EDX, 29, lm, 64b Architecture supported -0x80000001, 0, EDX, 30, threednowext, AMD extensions to 3DNow! instructions -0x80000001, 0, EDX, 31, threednow, 3DNow! instructions - -# Leaf 80000002H/80000003H/80000004H -# Processor Brand String +# Extended CPU feature identifiers + +0x80000001, 0, eax, 3:0, e_stepping_id , Stepping ID +0x80000001, 0, eax, 7:4, e_base_model , Base processor model +0x80000001, 0, eax, 11:8, e_base_family , Base processor family +0x80000001, 0, eax, 19:16, e_ext_model , Extended processor model +0x80000001, 0, eax, 27:20, e_ext_family , Extended processor family +0x80000001, 0, ebx, 15:0, brand_id , Brand ID +0x80000001, 0, ebx, 31:28, pkg_type , Package type +0x80000001, 0, ecx, 0, lahf_lm , LAHF and SAHF in 64-bit mode +0x80000001, 0, ecx, 1, cmp_legacy , Multi-processing legacy mode (No HT) +0x80000001, 0, ecx, 2, svm , Secure Virtual Machine +0x80000001, 0, ecx, 3, extapic , Extended APIC space +0x80000001, 0, ecx, 4, cr8_legacy , LOCK MOV CR0 means MOV CR8 +0x80000001, 0, ecx, 5, abm , LZCNT advanced bit manipulation +0x80000001, 0, ecx, 6, sse4a , SSE4A support +0x80000001, 0, ecx, 7, misalignsse , Misaligned SSE mode +0x80000001, 0, ecx, 8, 3dnowprefetch , 3DNow PREFETCH/PREFETCHW support +0x80000001, 0, ecx, 9, osvw , OS visible workaround +0x80000001, 0, ecx, 10, ibs , Instruction based sampling +0x80000001, 0, ecx, 11, xop , XOP: extended operation (AVX instructions) +0x80000001, 0, ecx, 12, skinit , SKINIT/STGI support +0x80000001, 0, ecx, 13, wdt , Watchdog timer support +0x80000001, 0, ecx, 15, lwp , Lightweight profiling +0x80000001, 0, ecx, 16, fma4 , 4-operand FMA instruction +0x80000001, 0, ecx, 17, tce , Translation cache extension +0x80000001, 0, ecx, 19, nodeid_msr , NodeId MSR (0xc001100c) +0x80000001, 0, ecx, 21, tbm , Trailing bit manipulations +0x80000001, 0, ecx, 22, topoext , Topology Extensions (cpuid leaf 0x8000001d) +0x80000001, 0, ecx, 23, perfctr_core , Core performance counter extensions +0x80000001, 0, ecx, 24, perfctr_nb , NB/DF performance counter extensions +0x80000001, 0, ecx, 26, bpext , Data access breakpoint extension +0x80000001, 0, ecx, 27, ptsc , Performance time-stamp counter +0x80000001, 0, ecx, 28, perfctr_llc , LLC (L3) performance counter extensions +0x80000001, 0, ecx, 29, mwaitx , MWAITX/MONITORX support +0x80000001, 0, ecx, 30, addr_mask_ext , Breakpoint address mask extension (to bit 31) +0x80000001, 0, edx, 0, e_fpu , Floating-Point Unit on-chip (x87) +0x80000001, 0, edx, 1, e_vme , Virtual-8086 Mode Extensions +0x80000001, 0, edx, 2, e_de , Debugging Extensions +0x80000001, 0, edx, 3, e_pse , Page Size Extension +0x80000001, 0, edx, 4, e_tsc , Time Stamp Counter +0x80000001, 0, edx, 5, e_msr , Model-Specific Registers (RDMSR and WRMSR support) +0x80000001, 0, edx, 6, pae , Physical Address Extensions +0x80000001, 0, edx, 7, mce , Machine Check Exception +0x80000001, 0, edx, 8, cx8 , CMPXCHG8B instruction +0x80000001, 0, edx, 9, apic , APIC on-chip +0x80000001, 0, edx, 11, syscall , SYSCALL and SYSRET instructions +0x80000001, 0, edx, 12, mtrr , Memory Type Range Registers +0x80000001, 0, edx, 13, pge , Page Global Extensions +0x80000001, 0, edx, 14, mca , Machine Check Architecture +0x80000001, 0, edx, 15, cmov , Conditional Move Instruction +0x80000001, 0, edx, 16, pat , Page Attribute Table +0x80000001, 0, edx, 17, pse36 , Page Size Extension (36-bit) +0x80000001, 0, edx, 19, mp , Out-of-spec AMD Multiprocessing bit +0x80000001, 0, edx, 20, nx , No-execute page protection +0x80000001, 0, edx, 22, mmxext , AMD MMX extensions +0x80000001, 0, edx, 24, e_fxsr , FXSAVE and FXRSTOR instructions +0x80000001, 0, edx, 25, fxsr_opt , FXSAVE and FXRSTOR optimizations +0x80000001, 0, edx, 26, pdpe1gb , 1-GB large page support +0x80000001, 0, edx, 27, rdtscp , RDTSCP instruction +0x80000001, 0, edx, 29, lm , Long mode (x86-64, 64-bit support) +0x80000001, 0, edx, 30, 3dnowext , AMD 3DNow extensions +0x80000001, 0, edx, 31, 3dnow , 3DNow instructions + +# Leaf 80000002H +# CPU brand ID string, bytes 0 - 15 + +0x80000002, 0, eax, 31:0, cpu_brandid_0 , CPU brand ID string, bytes 0 - 3 +0x80000002, 0, ebx, 31:0, cpu_brandid_1 , CPU brand ID string, bytes 4 - 7 +0x80000002, 0, ecx, 31:0, cpu_brandid_2 , CPU brand ID string, bytes 8 - 11 +0x80000002, 0, edx, 31:0, cpu_brandid_3 , CPU brand ID string, bytes 12 - 15 + +# Leaf 80000003H +# CPU brand ID string, bytes 16 - 31 + +0x80000003, 0, eax, 31:0, cpu_brandid_4 , CPU brand ID string bytes, 16 - 19 +0x80000003, 0, ebx, 31:0, cpu_brandid_5 , CPU brand ID string bytes, 20 - 23 +0x80000003, 0, ecx, 31:0, cpu_brandid_6 , CPU brand ID string bytes, 24 - 27 +0x80000003, 0, edx, 31:0, cpu_brandid_7 , CPU brand ID string bytes, 28 - 31 + +# Leaf 80000004H +# CPU brand ID string, bytes 32 - 47 + +0x80000004, 0, eax, 31:0, cpu_brandid_8 , CPU brand ID string, bytes 32 - 35 +0x80000004, 0, ebx, 31:0, cpu_brandid_9 , CPU brand ID string, bytes 36 - 39 +0x80000004, 0, ecx, 31:0, cpu_brandid_10 , CPU brand ID string, bytes 40 - 43 +0x80000004, 0, edx, 31:0, cpu_brandid_11 , CPU brand ID string, bytes 44 - 47 # Leaf 80000005H -# Reserved +# AMD L1 cache and L1 TLB enumeration + +0x80000005, 0, eax, 7:0, l1_itlb_2m_4m_nentries , L1 ITLB #entires, 2M and 4M pages +0x80000005, 0, eax, 15:8, l1_itlb_2m_4m_assoc , L1 ITLB associativity, 2M and 4M pages +0x80000005, 0, eax, 23:16, l1_dtlb_2m_4m_nentries , L1 DTLB #entires, 2M and 4M pages +0x80000005, 0, eax, 31:24, l1_dtlb_2m_4m_assoc , L1 DTLB associativity, 2M and 4M pages +0x80000005, 0, ebx, 7:0, l1_itlb_4k_nentries , L1 ITLB #entries, 4K pages +0x80000005, 0, ebx, 15:8, l1_itlb_4k_assoc , L1 ITLB associativity, 4K pages +0x80000005, 0, ebx, 23:16, l1_dtlb_4k_nentries , L1 DTLB #entries, 4K pages +0x80000005, 0, ebx, 31:24, l1_dtlb_4k_assoc , L1 DTLB associativity, 4K pages +0x80000005, 0, ecx, 7:0, l1_dcache_line_size , L1 dcache line size, in bytes +0x80000005, 0, ecx, 15:8, l1_dcache_nlines , L1 dcache lines per tag +0x80000005, 0, ecx, 23:16, l1_dcache_assoc , L1 dcache associativity +0x80000005, 0, ecx, 31:24, l1_dcache_size_kb , L1 dcache size, in KB +0x80000005, 0, edx, 7:0, l1_icache_line_size , L1 icache line size, in bytes +0x80000005, 0, edx, 15:8, l1_icache_nlines , L1 icache lines per tag +0x80000005, 0, edx, 23:16, l1_icache_assoc , L1 icache associativity +0x80000005, 0, edx, 31:24, l1_icache_size_kb , L1 icache size, in KB # Leaf 80000006H -# Extended L2 Cache Features - -0x80000006, 0, ECX, 7:0, clsize, Cache Line size in bytes -0x80000006, 0, ECX, 15:12, l2c_assoc, L2 Associativity -0x80000006, 0, ECX, 31:16, csize, Cache size in 1K units - +# (Mostly AMD) L2 TLB, L2 cache, and L3 cache enumeration + +0x80000006, 0, eax, 11:0, l2_itlb_2m_4m_nentries , L2 iTLB #entries, 2M and 4M pages +0x80000006, 0, eax, 15:12, l2_itlb_2m_4m_assoc , L2 iTLB associativity, 2M and 4M pages +0x80000006, 0, eax, 27:16, l2_dtlb_2m_4m_nentries , L2 dTLB #entries, 2M and 4M pages +0x80000006, 0, eax, 31:28, l2_dtlb_2m_4m_assoc , L2 dTLB associativity, 2M and 4M pages +0x80000006, 0, ebx, 11:0, l2_itlb_4k_nentries , L2 iTLB #entries, 4K pages +0x80000006, 0, ebx, 15:12, l2_itlb_4k_assoc , L2 iTLB associativity, 4K pages +0x80000006, 0, ebx, 27:16, l2_dtlb_4k_nentries , L2 dTLB #entries, 4K pages +0x80000006, 0, ebx, 31:28, l2_dtlb_4k_assoc , L2 dTLB associativity, 4K pages +0x80000006, 0, ecx, 7:0, l2_line_size , L2 cache line size, in bytes +0x80000006, 0, ecx, 11:8, l2_nlines , L2 cache number of lines per tag +0x80000006, 0, ecx, 15:12, l2_assoc , L2 cache associativity +0x80000006, 0, ecx, 31:16, l2_size_kb , L2 cache size, in KB +0x80000006, 0, edx, 7:0, l3_line_size , L3 cache line size, in bytes +0x80000006, 0, edx, 11:8, l3_nlines , L3 cache number of lines per tag +0x80000006, 0, edx, 15:12, l3_assoc , L3 cache associativity +0x80000006, 0, edx, 31:18, l3_size_range , L3 cache size range # Leaf 80000007H - -0x80000007, 0, EDX, 8, nonstop_tsc, Invariant TSC available - +# CPU power management (mostly AMD) and AMD RAS enumeration + +0x80000007, 0, ebx, 0, overflow_recov , MCA overflow conditions not fatal +0x80000007, 0, ebx, 1, succor , Software containment of UnCORRectable errors +0x80000007, 0, ebx, 2, hw_assert , Hardware assert MSRs +0x80000007, 0, ebx, 3, smca , Scalable MCA (MCAX MSRs) +0x80000007, 0, ecx, 31:0, cpu_pwr_sample_ratio , CPU power sample time ratio +0x80000007, 0, edx, 0, digital_temp , Digital temprature sensor +0x80000007, 0, edx, 1, powernow_freq_id , PowerNOW! frequency scaling +0x80000007, 0, edx, 2, powernow_volt_id , PowerNOW! voltage scaling +0x80000007, 0, edx, 3, thermal_trip , THERMTRIP (Thermal Trip) +0x80000007, 0, edx, 4, hw_thermal_control , Hardware thermal control +0x80000007, 0, edx, 5, sw_thermal_control , Software thermal control +0x80000007, 0, edx, 6, 100mhz_steps , 100 MHz multiplier control +0x80000007, 0, edx, 7, hw_pstate , Hardware P-state control +0x80000007, 0, edx, 8, constant_tsc , TSC ticks at constant rate across all P and C states +0x80000007, 0, edx, 9, cpb , Core performance boost +0x80000007, 0, edx, 10, eff_freq_ro , Read-only effective frequency interface +0x80000007, 0, edx, 11, proc_feedback , Processor feedback interface (deprecated) +0x80000007, 0, edx, 12, acc_power , Processor power reporting interface +0x80000007, 0, edx, 13, connected_standby , CPU Connected Standby support +0x80000007, 0, edx, 14, rapl , Runtime Average Power Limit interface # Leaf 80000008H - -0x80000008, 0, EAX, 7:0, phy_adr_bits, Physical Address Bits -0x80000008, 0, EAX, 15:8, lnr_adr_bits, Linear Address Bits -0x80000007, 0, EBX, 9, wbnoinvd, WBNOINVD - -# 0x8000001E -# EAX: Extended APIC ID -0x8000001E, 0, EAX, 31:0, extended_apic_id, Extended APIC ID -# EBX: Core Identifiers -0x8000001E, 0, EBX, 7:0, core_id, Identifies the logical core ID -0x8000001E, 0, EBX, 15:8, threads_per_core, The number of threads per core is threads_per_core + 1 -# ECX: Node Identifiers -0x8000001E, 0, ECX, 7:0, node_id, Node ID -0x8000001E, 0, ECX, 10:8, nodes_per_processor, Nodes per processor { 0: 1 node, else reserved } - -# 8000001F: AMD Secure Encryption -0x8000001F, 0, EAX, 0, sme, Secure Memory Encryption -0x8000001F, 0, EAX, 1, sev, Secure Encrypted Virtualization -0x8000001F, 0, EAX, 2, vmpgflush, VM Page Flush MSR -0x8000001F, 0, EAX, 3, seves, SEV Encrypted State -0x8000001F, 0, EBX, 5:0, c-bit, Page table bit number used to enable memory encryption -0x8000001F, 0, EBX, 11:6, mem_encrypt_physaddr_width, Reduction of physical address space in bits with SME enabled -0x8000001F, 0, ECX, 31:0, num_encrypted_guests, Maximum ASID value that may be used for an SEV-enabled guest -0x8000001F, 0, EDX, 31:0, minimum_sev_asid, Minimum ASID value that must be used for an SEV-enabled, SEV-ES-disabled guest +# CPU capacity parameters and extended feature flags (mostly AMD) + +0x80000008, 0, eax, 7:0, phys_addr_bits , Max physical address bits +0x80000008, 0, eax, 15:8, virt_addr_bits , Max virtual address bits +0x80000008, 0, eax, 23:16, guest_phys_addr_bits , Max nested-paging guest physical address bits +0x80000008, 0, ebx, 0, clzero , CLZERO supported +0x80000008, 0, ebx, 1, irperf , Instruction retired counter MSR +0x80000008, 0, ebx, 2, xsaveerptr , XSAVE/XRSTOR always saves/restores FPU error pointers +0x80000008, 0, ebx, 3, invlpgb , INVLPGB broadcasts a TLB invalidate to all threads +0x80000008, 0, ebx, 4, rdpru , RDPRU (Read Processor Register at User level) supported +0x80000008, 0, ebx, 6, mba , Memory Bandwidth Allocation (AMD bit) +0x80000008, 0, ebx, 8, mcommit , MCOMMIT (Memory commit) supported +0x80000008, 0, ebx, 9, wbnoinvd , WBNOINVD supported +0x80000008, 0, ebx, 12, amd_ibpb , Indirect Branch Prediction Barrier +0x80000008, 0, ebx, 13, wbinvd_int , Interruptible WBINVD/WBNOINVD +0x80000008, 0, ebx, 14, amd_ibrs , Indirect Branch Restricted Speculation +0x80000008, 0, ebx, 15, amd_stibp , Single Thread Indirect Branch Prediction mode +0x80000008, 0, ebx, 16, ibrs_always_on , IBRS always-on preferred +0x80000008, 0, ebx, 17, amd_stibp_always_on , STIBP always-on preferred +0x80000008, 0, ebx, 18, ibrs_fast , IBRS is preferred over software solution +0x80000008, 0, ebx, 19, ibrs_same_mode , IBRS provides same mode protection +0x80000008, 0, ebx, 20, no_efer_lmsle , EFER[LMSLE] bit (Long-Mode Segment Limit Enable) unsupported +0x80000008, 0, ebx, 21, tlb_flush_nested , INVLPGB RAX[5] bit can be set (nested translations) +0x80000008, 0, ebx, 23, amd_ppin , Protected Processor Inventory Number +0x80000008, 0, ebx, 24, amd_ssbd , Speculative Store Bypass Disable +0x80000008, 0, ebx, 25, virt_ssbd , virtualized SSBD (Speculative Store Bypass Disable) +0x80000008, 0, ebx, 26, amd_ssb_no , SSBD not needed (fixed in HW) +0x80000008, 0, ebx, 27, cppc , Collaborative Processor Performance Control +0x80000008, 0, ebx, 28, amd_psfd , Predictive Store Forward Disable +0x80000008, 0, ebx, 29, btc_no , CPU not affected by Branch Type Confusion +0x80000008, 0, ebx, 30, ibpb_ret , IBPB clears RSB/RAS too +0x80000008, 0, ebx, 31, brs , Branch Sampling supported +0x80000008, 0, ecx, 7:0, cpu_nthreads , Number of physical threads - 1 +0x80000008, 0, ecx, 15:12, apicid_coreid_len , Number of thread core ID bits (shift) in APIC ID +0x80000008, 0, ecx, 17:16, perf_tsc_len , Performance time-stamp counter size +0x80000008, 0, edx, 15:0, invlpgb_max_pages , INVLPGB maximum page count +0x80000008, 0, edx, 31:16, rdpru_max_reg_id , RDPRU max register ID (ECX input) + +# Leaf 8000000AH +# AMD SVM (Secure Virtual Machine) enumeration + +0x8000000a, 0, eax, 7:0, svm_version , SVM revision number +0x8000000a, 0, ebx, 31:0, svm_nasid , Number of address space identifiers (ASID) +0x8000000a, 0, edx, 0, npt , Nested paging +0x8000000a, 0, edx, 1, lbrv , LBR virtualization +0x8000000a, 0, edx, 2, svm_lock , SVM lock +0x8000000a, 0, edx, 3, nrip_save , NRIP save support on #VMEXIT +0x8000000a, 0, edx, 4, tsc_scale , MSR based TSC rate control +0x8000000a, 0, edx, 5, vmcb_clean , VMCB clean bits support +0x8000000a, 0, edx, 6, flushbyasid , Flush by ASID + Extended VMCB TLB_Control +0x8000000a, 0, edx, 7, decodeassists , Decode Assists support +0x8000000a, 0, edx, 10, pausefilter , Pause intercept filter +0x8000000a, 0, edx, 12, pfthreshold , Pause filter threshold +0x8000000a, 0, edx, 13, avic , Advanced virtual interrupt controller +0x8000000a, 0, edx, 15, v_vmsave_vmload , Virtual VMSAVE/VMLOAD (nested virt) +0x8000000a, 0, edx, 16, vgif , Virtualize the Global Interrupt Flag +0x8000000a, 0, edx, 17, gmet , Guest mode execution trap +0x8000000a, 0, edx, 18, x2avic , Virtual x2APIC +0x8000000a, 0, edx, 19, sss_check , Supervisor Shadow Stack restrictions +0x8000000a, 0, edx, 20, v_spec_ctrl , Virtual SPEC_CTRL +0x8000000a, 0, edx, 21, ro_gpt , Read-Only guest page table support +0x8000000a, 0, edx, 23, h_mce_override , Host MCE override +0x8000000a, 0, edx, 24, tlbsync_int , TLBSYNC intercept + INVLPGB/TLBSYNC in VMCB +0x8000000a, 0, edx, 25, vnmi , NMI virtualization +0x8000000a, 0, edx, 26, ibs_virt , IBS Virtualization +0x8000000a, 0, edx, 27, ext_lvt_off_chg , Extended LVT offset fault change +0x8000000a, 0, edx, 28, svme_addr_chk , Guest SVME addr check + +# Leaf 80000019H +# AMD TLB 1G-pages enumeration + +0x80000019, 0, eax, 11:0, l1_itlb_1g_nentries , L1 iTLB #entries, 1G pages +0x80000019, 0, eax, 15:12, l1_itlb_1g_assoc , L1 iTLB associativity, 1G pages +0x80000019, 0, eax, 27:16, l1_dtlb_1g_nentries , L1 dTLB #entries, 1G pages +0x80000019, 0, eax, 31:28, l1_dtlb_1g_assoc , L1 dTLB associativity, 1G pages +0x80000019, 0, ebx, 11:0, l2_itlb_1g_nentries , L2 iTLB #entries, 1G pages +0x80000019, 0, ebx, 15:12, l2_itlb_1g_assoc , L2 iTLB associativity, 1G pages +0x80000019, 0, ebx, 27:16, l2_dtlb_1g_nentries , L2 dTLB #entries, 1G pages +0x80000019, 0, ebx, 31:28, l2_dtlb_1g_assoc , L2 dTLB associativity, 1G pages + +# Leaf 8000001AH +# AMD instruction optimizations enumeration + +0x8000001a, 0, eax, 0, fp_128 , Internal FP/SIMD exec data path is 128-bits wide +0x8000001a, 0, eax, 1, movu_preferred , SSE: MOVU* better than MOVL*/MOVH* +0x8000001a, 0, eax, 2, fp_256 , internal FP/SSE exec data path is 256-bits wide + +# Leaf 8000001BH +# AMD IBS (Instruction-Based Sampling) enumeration + +0x8000001b, 0, eax, 0, ibs_flags_valid , IBS feature flags valid +0x8000001b, 0, eax, 1, ibs_fetch_sampling , IBS fetch sampling supported +0x8000001b, 0, eax, 2, ibs_op_sampling , IBS execution sampling supported +0x8000001b, 0, eax, 3, ibs_rdwr_op_counter , IBS read/write of op counter supported +0x8000001b, 0, eax, 4, ibs_op_count , IBS OP counting mode supported +0x8000001b, 0, eax, 5, ibs_branch_target , IBS branch target address reporting supported +0x8000001b, 0, eax, 6, ibs_op_counters_ext , IBS IbsOpCurCnt/IbsOpMaxCnt extend by 7 bits +0x8000001b, 0, eax, 7, ibs_rip_invalid_chk , IBS invalid RIP indication supported +0x8000001b, 0, eax, 8, ibs_op_branch_fuse , IBS fused branch micro-op indication supported +0x8000001b, 0, eax, 9, ibs_fetch_ctl_ext , IBS Fetch Control Extended MSR (0xc001103c) supported +0x8000001b, 0, eax, 10, ibs_op_data_4 , IBS op data 4 MSR supported +0x8000001b, 0, eax, 11, ibs_l3_miss_filter , IBS L3-miss filtering supported (Zen4+) + +# Leaf 8000001CH +# AMD LWP (Lightweight Profiling) + +0x8000001c, 0, eax, 0, os_lwp_avail , LWP is available to application programs (supported by OS) +0x8000001c, 0, eax, 1, os_lpwval , LWPVAL instruction (EventId=1) is supported by OS +0x8000001c, 0, eax, 2, os_lwp_ire , Instructions Retired Event (EventId=2) is supported by OS +0x8000001c, 0, eax, 3, os_lwp_bre , Branch Retired Event (EventId=3) is supported by OS +0x8000001c, 0, eax, 4, os_lwp_dme , DCache Miss Event (EventId=4) is supported by OS +0x8000001c, 0, eax, 5, os_lwp_cnh , CPU Clocks Not Halted event (EventId=5) is supported by OS +0x8000001c, 0, eax, 6, os_lwp_rnh , CPU Reference clocks Not Halted event (EventId=6) is supported by OS +0x8000001c, 0, eax, 29, os_lwp_cont , LWP sampling in continuous mode is supported by OS +0x8000001c, 0, eax, 30, os_lwp_ptsc , Performance Time Stamp Counter in event records is supported by OS +0x8000001c, 0, eax, 31, os_lwp_int , Interrupt on threshold overflow is supported by OS +0x8000001c, 0, ebx, 7:0, lwp_lwpcb_sz , LWP Control Block size, in quadwords +0x8000001c, 0, ebx, 15:8, lwp_event_sz , LWP event record size, in bytes +0x8000001c, 0, ebx, 23:16, lwp_max_events , LWP max supported EventId value (EventID 255 not included) +0x8000001c, 0, ebx, 31:24, lwp_event_offset , LWP events area offset in the LWP Control Block +0x8000001c, 0, ecx, 4:0, lwp_latency_max , Num of bits in cache latency counters (10 to 31) +0x8000001c, 0, ecx, 5, lwp_data_adddr , Cache miss events report the data address of the reference +0x8000001c, 0, ecx, 8:6, lwp_latency_rnd , Amount by which cache latency is rounded +0x8000001c, 0, ecx, 15:9, lwp_version , LWP implementation version +0x8000001c, 0, ecx, 23:16, lwp_buf_min_sz , LWP event ring buffer min size, in units of 32 event records +0x8000001c, 0, ecx, 28, lwp_branch_predict , Branches Retired events can be filtered +0x8000001c, 0, ecx, 29, lwp_ip_filtering , IP filtering (IPI, IPF, BaseIP, and LimitIP @ LWPCP) supported +0x8000001c, 0, ecx, 30, lwp_cache_levels , Cache-related events can be filtered by cache level +0x8000001c, 0, ecx, 31, lwp_cache_latency , Cache-related events can be filtered by latency +0x8000001c, 0, edx, 0, hw_lwp_avail , LWP is available in Hardware +0x8000001c, 0, edx, 1, hw_lpwval , LWPVAL instruction (EventId=1) is available in HW +0x8000001c, 0, edx, 2, hw_lwp_ire , Instructions Retired Event (EventId=2) is available in HW +0x8000001c, 0, edx, 3, hw_lwp_bre , Branch Retired Event (EventId=3) is available in HW +0x8000001c, 0, edx, 4, hw_lwp_dme , DCache Miss Event (EventId=4) is available in HW +0x8000001c, 0, edx, 5, hw_lwp_cnh , CPU Clocks Not Halted event (EventId=5) is available in HW +0x8000001c, 0, edx, 6, hw_lwp_rnh , CPU Reference clocks Not Halted event (EventId=6) is available in HW +0x8000001c, 0, edx, 29, hw_lwp_cont , LWP sampling in continuous mode is available in HW +0x8000001c, 0, edx, 30, hw_lwp_ptsc , Performance Time Stamp Counter in event records is available in HW +0x8000001c, 0, edx, 31, hw_lwp_int , Interrupt on threshold overflow is available in HW + +# Leaf 8000001DH +# AMD deterministic cache parameters + +0x8000001d, 31:0, eax, 4:0, cache_type , Cache type field +0x8000001d, 31:0, eax, 7:5, cache_level , Cache level (1-based) +0x8000001d, 31:0, eax, 8, cache_self_init , Self-initializing cache level +0x8000001d, 31:0, eax, 9, fully_associative , Fully-associative cache +0x8000001d, 31:0, eax, 25:14, num_threads_sharing , Number of logical CPUs sharing cache +0x8000001d, 31:0, ebx, 11:0, cache_linesize , System coherency line size (0-based) +0x8000001d, 31:0, ebx, 21:12, cache_npartitions , Physical line partitions (0-based) +0x8000001d, 31:0, ebx, 31:22, cache_nways , Ways of associativity (0-based) +0x8000001d, 31:0, ecx, 30:0, cache_nsets , Cache number of sets (0-based) +0x8000001d, 31:0, edx, 0, wbinvd_rll_no_guarantee, WBINVD/INVD not guaranteed for Remote Lower-Level caches +0x8000001d, 31:0, edx, 1, ll_inclusive , Cache is inclusive of Lower-Level caches + +# Leaf 8000001EH +# AMD CPU topology enumeration + +0x8000001e, 0, eax, 31:0, ext_apic_id , Extended APIC ID +0x8000001e, 0, ebx, 7:0, core_id , Unique per-socket logical core unit ID +0x8000001e, 0, ebx, 15:8, core_nthreas , #Threads per core (zero-based) +0x8000001e, 0, ecx, 7:0, node_id , Node (die) ID of invoking logical CPU +0x8000001e, 0, ecx, 10:8, nnodes_per_socket , #nodes in invoking logical CPU's package/socket + +# Leaf 8000001FH +# AMD encrypted memory capabilities enumeration (SME/SEV) + +0x8000001f, 0, eax, 0, sme , Secure Memory Encryption supported +0x8000001f, 0, eax, 1, sev , Secure Encrypted Virtualization supported +0x8000001f, 0, eax, 2, vm_page_flush , VM Page Flush MSR (0xc001011e) available +0x8000001f, 0, eax, 3, sev_es , SEV Encrypted State supported +0x8000001f, 0, eax, 4, sev_nested_paging , SEV secure nested paging supported +0x8000001f, 0, eax, 5, vm_permission_levels , VMPL supported +0x8000001f, 0, eax, 6, rpmquery , RPMQUERY instruction supported +0x8000001f, 0, eax, 7, vmpl_sss , VMPL supervisor shadwo stack supported +0x8000001f, 0, eax, 8, secure_tsc , Secure TSC supported +0x8000001f, 0, eax, 9, v_tsc_aux , Hardware virtualizes TSC_AUX +0x8000001f, 0, eax, 10, sme_coherent , HW enforces cache coherency across encryption domains +0x8000001f, 0, eax, 11, req_64bit_hypervisor , SEV guest mandates 64-bit hypervisor +0x8000001f, 0, eax, 12, restricted_injection , Restricted Injection supported +0x8000001f, 0, eax, 13, alternate_injection , Alternate Injection supported +0x8000001f, 0, eax, 14, debug_swap , SEV-ES: full debug state swap is supported +0x8000001f, 0, eax, 15, disallow_host_ibs , SEV-ES: Disallowing IBS use by the host is supported +0x8000001f, 0, eax, 16, virt_transparent_enc , Virtual Transparent Encryption +0x8000001f, 0, eax, 17, vmgexit_paremeter , VmgexitParameter is supported in SEV_FEATURES +0x8000001f, 0, eax, 18, virt_tom_msr , Virtual TOM MSR is supported +0x8000001f, 0, eax, 19, virt_ibs , IBS state virtualization is supported for SEV-ES guests +0x8000001f, 0, eax, 24, vmsa_reg_protection , VMSA register protection is supported +0x8000001f, 0, eax, 25, smt_protection , SMT protection is supported +0x8000001f, 0, eax, 28, svsm_page_msr , SVSM communication page MSR (0xc001f000h) is supported +0x8000001f, 0, eax, 29, nested_virt_snp_msr , VIRT_RMPUPDATE/VIRT_PSMASH MSRs are supported +0x8000001f, 0, ebx, 5:0, pte_cbit_pos , PTE bit number used to enable memory encryption +0x8000001f, 0, ebx, 11:6, phys_addr_reduction_nbits, Reduction of phys address space when encryption is enabled, in bits +0x8000001f, 0, ebx, 15:12, vmpl_count , Number of VM permission levels (VMPL) supported +0x8000001f, 0, ecx, 31:0, enc_guests_max , Max supported number of simultaneous encrypted guests +0x8000001f, 0, edx, 31:0, min_sev_asid_no_sev_es , Mininum ASID for SEV-enabled SEV-ES-disabled guest + +# Leaf 80000020H +# AMD Platform QoS extended feature IDs + +0x80000020, 0, ebx, 1, mba , Memory Bandwidth Allocation support +0x80000020, 0, ebx, 2, smba , Slow Memory Bandwidth Allocation support +0x80000020, 0, ebx, 3, bmec , Bandwidth Monitoring Event Configuration support +0x80000020, 0, ebx, 4, l3rr , L3 Range Reservation support +0x80000020, 1, eax, 31:0, mba_limit_len , MBA enforcement limit size +0x80000020, 1, edx, 31:0, mba_cos_max , MBA max Class of Service number (zero-based) +0x80000020, 2, eax, 31:0, smba_limit_len , SMBA enforcement limit size +0x80000020, 2, edx, 31:0, smba_cos_max , SMBA max Class of Service number (zero-based) +0x80000020, 3, ebx, 7:0, bmec_num_events , BMEC number of bandwidth events available +0x80000020, 3, ecx, 0, bmec_local_reads , Local NUMA reads can be tracked +0x80000020, 3, ecx, 1, bmec_remote_reads , Remote NUMA reads can be tracked +0x80000020, 3, ecx, 2, bmec_local_nontemp_wr , Local NUMA non-temporal writes can be tracked +0x80000020, 3, ecx, 3, bmec_remote_nontemp_wr , Remote NUMA non-temporal writes can be tracked +0x80000020, 3, ecx, 4, bmec_local_slow_mem_rd , Local NUMA slow-memory reads can be tracked +0x80000020, 3, ecx, 5, bmec_remote_slow_mem_rd, Remote NUMA slow-memory reads can be tracked +0x80000020, 3, ecx, 6, bmec_all_dirty_victims , Dirty QoS victims to all types of memory can be tracked + +# Leaf 80000021H +# AMD extended features enumeration 2 + +0x80000021, 0, eax, 0, no_nested_data_bp , No nested data breakpoints +0x80000021, 0, eax, 1, fsgs_non_serializing , WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing +0x80000021, 0, eax, 2, lfence_rdtsc , LFENCE always serializing / synchronizes RDTSC +0x80000021, 0, eax, 3, smm_page_cfg_lock , SMM paging configuration lock is supported +0x80000021, 0, eax, 6, null_sel_clr_base , Null selector clears base +0x80000021, 0, eax, 7, upper_addr_ignore , EFER MSR Upper Address Ignore Enable bit supported +0x80000021, 0, eax, 8, autoibrs , EFER MSR Automatic IBRS enable bit supported +0x80000021, 0, eax, 9, no_smm_ctl_msr , SMM_CTL MSR (0xc0010116) is not present +0x80000021, 0, eax, 10, fsrs_supported , Fast Short Rep Stosb (FSRS) is supported +0x80000021, 0, eax, 11, fsrc_supported , Fast Short Repe Cmpsb (FSRC) is supported +0x80000021, 0, eax, 13, prefetch_ctl_msr , Prefetch control MSR is supported +0x80000021, 0, eax, 17, user_cpuid_disable , #GP when executing CPUID at CPL > 0 is supported +0x80000021, 0, eax, 18, epsf_supported , Enhanced Predictive Store Forwarding (EPSF) is supported +0x80000021, 0, ebx, 11:0, microcode_patch_size , Size of microcode patch, in 16-byte units + +# Leaf 80000022H +# AMD Performance Monitoring v2 enumeration + +0x80000022, 0, eax, 0, perfmon_v2 , Performance monitoring v2 supported +0x80000022, 0, eax, 1, lbr_v2 , Last Branch Record v2 extensions (LBR Stack) +0x80000022, 0, eax, 2, lbr_pmc_freeze , Freezing core performance counters / LBR Stack supported +0x80000022, 0, ebx, 3:0, n_pmc_core , Number of core perfomance counters +0x80000022, 0, ebx, 9:4, lbr_v2_stack_size , Number of available LBR stack entries +0x80000022, 0, ebx, 15:10, n_pmc_northbridge , Number of available northbridge (data fabric) performance counters +0x80000022, 0, ebx, 21:16, n_pmc_umc , Number of available UMC performance counters +0x80000022, 0, ecx, 31:0, active_umc_bitmask , Active UMCs bitmask + +# Leaf 80000023H +# AMD Secure Multi-key Encryption enumeration + +0x80000023, 0, eax, 0, mem_hmk_mode , MEM-HMK encryption mode is supported +0x80000023, 0, ebx, 15:0, mem_hmk_avail_keys , MEM-HMK mode: total num of available encryption keys + +# Leaf 80000026H +# AMD extended topology enumeration v2 + +0x80000026, 3:0, eax, 4:0, x2apic_id_shift , Bit width of this level (previous levels inclusive) +0x80000026, 3:0, eax, 29, core_has_pwreff_ranking, This core has a power efficiency ranking +0x80000026, 3:0, eax, 30, domain_has_hybrid_cores, This domain level has hybrid (E, P) cores +0x80000026, 3:0, eax, 31, domain_core_count_asymm, The 'Core' domain has asymmetric cores count +0x80000026, 3:0, ebx, 15:0, domain_lcpus_count , Number of logical CPUs at this domain instance +0x80000026, 3:0, ebx, 23:16, core_pwreff_ranking , This core's static power efficiency ranking +0x80000026, 3:0, ebx, 27:24, core_native_model_id , This core's native model ID +0x80000026, 3:0, ebx, 31:28, core_type , This core's type +0x80000026, 3:0, ecx, 7:0, domain_level , This domain level (subleaf ID) +0x80000026, 3:0, ecx, 15:8, domain_type , This domain type +0x80000026, 3:0, edx, 31:0, x2apic_id , x2APIC ID of current logical CPU diff --git a/tools/arch/x86/kcpuid/kcpuid.c b/tools/arch/x86/kcpuid/kcpuid.c index 24b7d017ec2c..1b25c0a95d3f 100644 --- a/tools/arch/x86/kcpuid/kcpuid.c +++ b/tools/arch/x86/kcpuid/kcpuid.c @@ -7,7 +7,8 @@ #include <string.h> #include <getopt.h> -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#define min(a, b) (((a) < (b)) ? (a) : (b)) typedef unsigned int u32; typedef unsigned long long u64; @@ -76,7 +77,6 @@ struct cpuid_range { */ struct cpuid_range *leafs_basic, *leafs_ext; -static int num_leafs; static bool is_amd; static bool show_details; static bool show_raw; @@ -98,27 +98,17 @@ static inline void cpuid(u32 *eax, u32 *ebx, u32 *ecx, u32 *edx) static inline bool has_subleafs(u32 f) { - if (f == 0x7 || f == 0xd) - return true; - - if (is_amd) { - if (f == 0x8000001d) + u32 with_subleaves[] = { + 0x4, 0x7, 0xb, 0xd, 0xf, 0x10, 0x12, + 0x14, 0x17, 0x18, 0x1b, 0x1d, 0x1f, 0x23, + 0x8000001d, 0x80000020, 0x80000026, + }; + + for (unsigned i = 0; i < ARRAY_SIZE(with_subleaves); i++) + if (f == with_subleaves[i]) return true; - return false; - } - switch (f) { - case 0x4: - case 0xb: - case 0xf: - case 0x10: - case 0x14: - case 0x18: - case 0x1f: - return true; - default: - return false; - } + return false; } static void leaf_print_raw(struct subleaf *leaf) @@ -204,15 +194,12 @@ static void raw_dump_range(struct cpuid_range *range) } } -#define MAX_SUBLEAF_NUM 32 +#define MAX_SUBLEAF_NUM 64 struct cpuid_range *setup_cpuid_range(u32 input_eax) { - u32 max_func, idx_func; - int subleaf; + u32 max_func, idx_func, subleaf, max_subleaf; + u32 eax, ebx, ecx, edx, f = input_eax; struct cpuid_range *range; - u32 eax, ebx, ecx, edx; - u32 f = input_eax; - int max_subleaf; bool allzero; eax = input_eax; @@ -246,7 +233,6 @@ struct cpuid_range *setup_cpuid_range(u32 input_eax) allzero = cpuid_store(range, f, subleaf, eax, ebx, ecx, edx); if (allzero) continue; - num_leafs++; if (!has_subleafs(f)) continue; @@ -257,11 +243,18 @@ struct cpuid_range *setup_cpuid_range(u32 input_eax) * Some can provide the exact number of subleafs, * others have to be tried (0xf) */ - if (f == 0x7 || f == 0x14 || f == 0x17 || f == 0x18) - max_subleaf = (eax & 0xff) + 1; - + if (f == 0x7 || f == 0x14 || f == 0x17 || f == 0x18 || f == 0x1d) + max_subleaf = min((eax & 0xff) + 1, max_subleaf); if (f == 0xb) max_subleaf = 2; + if (f == 0x1f) + max_subleaf = 6; + if (f == 0x23) + max_subleaf = 4; + if (f == 0x80000020) + max_subleaf = 4; + if (f == 0x80000026) + max_subleaf = 5; for (subleaf = 1; subleaf < max_subleaf; subleaf++) { eax = f; @@ -272,7 +265,6 @@ struct cpuid_range *setup_cpuid_range(u32 input_eax) eax, ebx, ecx, edx); if (allzero) continue; - num_leafs++; } } @@ -313,6 +305,8 @@ static int parse_line(char *line) struct bits_desc *bdesc; int reg_index; char *start, *end; + u32 subleaf_start, subleaf_end; + unsigned bit_start, bit_end; /* Skip comments and NULL line */ if (line[0] == '#' || line[0] == '\n') @@ -351,13 +345,25 @@ static int parse_line(char *line) return 0; /* subleaf */ - sub = strtoul(tokens[1], NULL, 0); - if ((int)sub > func->nr) - return -1; + buf = tokens[1]; + end = strtok(buf, ":"); + start = strtok(NULL, ":"); + subleaf_end = strtoul(end, NULL, 0); + + /* A subleaf range is given? */ + if (start) { + subleaf_start = strtoul(start, NULL, 0); + subleaf_end = min(subleaf_end, (u32)(func->nr - 1)); + if (subleaf_start > subleaf_end) + return 0; + } else { + subleaf_start = subleaf_end; + if (subleaf_start > (u32)(func->nr - 1)) + return 0; + } - leaf = &func->leafs[sub]; + /* register */ buf = tokens[2]; - if (strcasestr(buf, "EAX")) reg_index = R_EAX; else if (strcasestr(buf, "EBX")) @@ -369,23 +375,23 @@ static int parse_line(char *line) else goto err_exit; - reg = &leaf->info[reg_index]; - bdesc = ®->descs[reg->nr++]; - /* bit flag or bits field */ buf = tokens[3]; - end = strtok(buf, ":"); - bdesc->end = strtoul(end, NULL, 0); - bdesc->start = bdesc->end; - - /* start != NULL means it is bit fields */ start = strtok(NULL, ":"); - if (start) - bdesc->start = strtoul(start, NULL, 0); - - strcpy(bdesc->simp, tokens[4]); - strcpy(bdesc->detail, tokens[5]); + bit_end = strtoul(end, NULL, 0); + bit_start = (start) ? strtoul(start, NULL, 0) : bit_end; + + for (sub = subleaf_start; sub <= subleaf_end; sub++) { + leaf = &func->leafs[sub]; + reg = &leaf->info[reg_index]; + bdesc = ®->descs[reg->nr++]; + + bdesc->end = bit_end; + bdesc->start = bit_start; + strcpy(bdesc->simp, strtok(tokens[4], " \t")); + strcpy(bdesc->detail, tokens[5]); + } return 0; err_exit: @@ -452,8 +458,9 @@ static void decode_bits(u32 value, struct reg_desc *rdesc, enum cpuid_reg reg) if (start == end) { /* single bit flag */ if (value & (1 << start)) - printf("\t%-20s %s%s\n", + printf("\t%-20s %s%s%s\n", bdesc->simp, + show_flags_only ? "" : "\t\t\t", show_details ? "-" : "", show_details ? bdesc->detail : "" ); diff --git a/tools/arch/x86/vdso b/tools/arch/x86/vdso new file mode 120000 index 000000000000..7eb962fd3454 --- /dev/null +++ b/tools/arch/x86/vdso @@ -0,0 +1 @@ +../../../arch/x86/entry/vdso/
\ No newline at end of file diff --git a/tools/crypto/ccp/dbc.c b/tools/crypto/ccp/dbc.c index a807df0f0597..80248d3d3a5a 100644 --- a/tools/crypto/ccp/dbc.c +++ b/tools/crypto/ccp/dbc.c @@ -57,7 +57,6 @@ int process_param(int fd, int msg_index, __u8 *signature, int *data) .msg_index = msg_index, .param = *data, }; - int ret; assert(signature); assert(data); diff --git a/tools/gpio/Makefile b/tools/gpio/Makefile index d29c9c49e251..ed565eb52275 100644 --- a/tools/gpio/Makefile +++ b/tools/gpio/Makefile @@ -78,7 +78,7 @@ $(OUTPUT)gpio-watch: $(GPIO_WATCH_IN) clean: rm -f $(ALL_PROGRAMS) rm -f $(OUTPUT)include/linux/gpio.h - find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete + find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete -o -name '\.*.cmd' -delete install: $(ALL_PROGRAMS) install -d -m 755 $(DESTDIR)$(bindir); \ diff --git a/tools/gpio/gpio-hammer.c b/tools/gpio/gpio-hammer.c index 54fdf59dd320..ba0866eb3581 100644 --- a/tools/gpio/gpio-hammer.c +++ b/tools/gpio/gpio-hammer.c @@ -54,7 +54,7 @@ int hammer_device(const char *device_name, unsigned int *lines, int num_lines, fprintf(stdout, "Hammer lines ["); for (i = 0; i < num_lines; i++) { - fprintf(stdout, "%d", lines[i]); + fprintf(stdout, "%u", lines[i]); if (i != (num_lines - 1)) fprintf(stdout, ", "); } @@ -89,7 +89,7 @@ int hammer_device(const char *device_name, unsigned int *lines, int num_lines, fprintf(stdout, "["); for (i = 0; i < num_lines; i++) { - fprintf(stdout, "%d: %d", lines[i], + fprintf(stdout, "%u: %d", lines[i], gpiotools_test_bit(values.bits, i)); if (i != (num_lines - 1)) fprintf(stdout, ", "); diff --git a/tools/hv/Makefile b/tools/hv/Makefile index 2e60e2c212cd..34ffcec264ab 100644 --- a/tools/hv/Makefile +++ b/tools/hv/Makefile @@ -52,7 +52,7 @@ $(OUTPUT)hv_fcopy_uio_daemon: $(HV_FCOPY_UIO_DAEMON_IN) clean: rm -f $(ALL_PROGRAMS) - find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete + find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete -o -name '\.*.cmd' -delete install: $(ALL_PROGRAMS) install -d -m 755 $(DESTDIR)$(sbindir); \ diff --git a/tools/hv/hv_fcopy_uio_daemon.c b/tools/hv/hv_fcopy_uio_daemon.c index 3ce316cc9f97..7a00f3066a98 100644 --- a/tools/hv/hv_fcopy_uio_daemon.c +++ b/tools/hv/hv_fcopy_uio_daemon.c @@ -296,6 +296,13 @@ static int hv_fcopy_start(struct hv_start_fcopy *smsg_in) file_name = (char *)malloc(file_size * sizeof(char)); path_name = (char *)malloc(path_size * sizeof(char)); + if (!file_name || !path_name) { + free(file_name); + free(path_name); + syslog(LOG_ERR, "Can't allocate memory for file name and/or path name"); + return HV_E_FAIL; + } + wcstoutf8(file_name, (__u16 *)in_file_name, file_size); wcstoutf8(path_name, (__u16 *)in_path_name, path_size); diff --git a/tools/hv/lsvmbus b/tools/hv/lsvmbus index 099f2c44dbed..f83698f14da2 100644..100755 --- a/tools/hv/lsvmbus +++ b/tools/hv/lsvmbus @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 import os diff --git a/tools/include/asm/alternative.h b/tools/include/asm/alternative.h index 7ce02a223732..8e548ac8f740 100644 --- a/tools/include/asm/alternative.h +++ b/tools/include/asm/alternative.h @@ -2,8 +2,18 @@ #ifndef _TOOLS_ASM_ALTERNATIVE_ASM_H #define _TOOLS_ASM_ALTERNATIVE_ASM_H +#if defined(__s390x__) +#ifdef __ASSEMBLY__ +.macro ALTERNATIVE oldinstr, newinstr, feature + \oldinstr +.endm +#endif +#else + /* Just disable it so we can build arch/x86/lib/memcpy_64.S for perf bench: */ #define ALTERNATIVE # #endif + +#endif diff --git a/tools/include/generated/asm-offsets.h b/tools/include/generated/asm-offsets.h new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/tools/include/generated/asm-offsets.h diff --git a/tools/include/generated/asm/cpucap-defs.h b/tools/include/generated/asm/cpucap-defs.h new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/tools/include/generated/asm/cpucap-defs.h diff --git a/tools/include/generated/asm/sysreg-defs.h b/tools/include/generated/asm/sysreg-defs.h new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/tools/include/generated/asm/sysreg-defs.h diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index 6f7f22ac9da5..4366da278033 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -2,6 +2,8 @@ #ifndef _TOOLS_LINUX_COMPILER_H_ #define _TOOLS_LINUX_COMPILER_H_ +#ifndef __ASSEMBLY__ + #include <linux/compiler_types.h> #ifndef __compiletime_error @@ -224,4 +226,6 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s __asm__ ("" : "=r" (var) : "0" (var)) #endif +#endif /* __ASSEMBLY__ */ + #endif /* _TOOLS_LINUX_COMPILER_H */ diff --git a/tools/include/linux/linkage.h b/tools/include/linux/linkage.h index bc763d500262..a48ff086899c 100644 --- a/tools/include/linux/linkage.h +++ b/tools/include/linux/linkage.h @@ -1,4 +1,8 @@ #ifndef _TOOLS_INCLUDE_LINUX_LINKAGE_H #define _TOOLS_INCLUDE_LINUX_LINKAGE_H +#define SYM_FUNC_START(x) .globl x; x: + +#define SYM_FUNC_END(x) + #endif /* _TOOLS_INCLUDE_LINUX_LINKAGE_H */ diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile index e69c26abe1ea..a1f55fb24bb3 100644 --- a/tools/include/nolibc/Makefile +++ b/tools/include/nolibc/Makefile @@ -35,6 +35,7 @@ all_files := \ stackprotector.h \ std.h \ stdarg.h \ + stdbool.h \ stdint.h \ stdlib.h \ string.h \ diff --git a/tools/include/nolibc/arch-aarch64.h b/tools/include/nolibc/arch-aarch64.h index b23ac1f04035..06fdef7b291a 100644 --- a/tools/include/nolibc/arch-aarch64.h +++ b/tools/include/nolibc/arch-aarch64.h @@ -142,13 +142,13 @@ }) /* startup code */ -void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void) +void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) { __asm__ volatile ( "mov x0, sp\n" /* save stack pointer to x0, as arg1 of _start_c */ "and sp, x0, -16\n" /* sp must be 16-byte aligned in the callee */ "bl _start_c\n" /* transfer to c runtime */ ); - __builtin_unreachable(); + __nolibc_entrypoint_epilogue(); } #endif /* _NOLIBC_ARCH_AARCH64_H */ diff --git a/tools/include/nolibc/arch-arm.h b/tools/include/nolibc/arch-arm.h index cae4afa7c1c7..6180ff99ab43 100644 --- a/tools/include/nolibc/arch-arm.h +++ b/tools/include/nolibc/arch-arm.h @@ -185,15 +185,15 @@ }) /* startup code */ -void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void) +void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) { __asm__ volatile ( - "mov %r0, sp\n" /* save stack pointer to %r0, as arg1 of _start_c */ - "and ip, %r0, #-8\n" /* sp must be 8-byte aligned in the callee */ + "mov r0, sp\n" /* save stack pointer to %r0, as arg1 of _start_c */ + "and ip, r0, #-8\n" /* sp must be 8-byte aligned in the callee */ "mov sp, ip\n" "bl _start_c\n" /* transfer to c runtime */ ); - __builtin_unreachable(); + __nolibc_entrypoint_epilogue(); } #endif /* _NOLIBC_ARCH_ARM_H */ diff --git a/tools/include/nolibc/arch-i386.h b/tools/include/nolibc/arch-i386.h index 28c26a00a762..ff5afc35bbd8 100644 --- a/tools/include/nolibc/arch-i386.h +++ b/tools/include/nolibc/arch-i386.h @@ -162,7 +162,7 @@ * 2) The deepest stack frame should be set to zero * */ -void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void) +void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) { __asm__ volatile ( "xor %ebp, %ebp\n" /* zero the stack frame */ @@ -174,7 +174,7 @@ void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_ "call _start_c\n" /* transfer to c runtime */ "hlt\n" /* ensure it does not return */ ); - __builtin_unreachable(); + __nolibc_entrypoint_epilogue(); } #endif /* _NOLIBC_ARCH_I386_H */ diff --git a/tools/include/nolibc/arch-loongarch.h b/tools/include/nolibc/arch-loongarch.h index 3f8ef8f86c0f..fb519545959e 100644 --- a/tools/include/nolibc/arch-loongarch.h +++ b/tools/include/nolibc/arch-loongarch.h @@ -149,14 +149,14 @@ #endif /* startup code */ -void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void) +void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) { __asm__ volatile ( "move $a0, $sp\n" /* save stack pointer to $a0, as arg1 of _start_c */ LONG_BSTRINS " $sp, $zero, 3, 0\n" /* $sp must be 16-byte aligned */ "bl _start_c\n" /* transfer to c runtime */ ); - __builtin_unreachable(); + __nolibc_entrypoint_epilogue(); } #endif /* _NOLIBC_ARCH_LOONGARCH_H */ diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h index 62cc50ef3288..1791a8ce58da 100644 --- a/tools/include/nolibc/arch-mips.h +++ b/tools/include/nolibc/arch-mips.h @@ -179,7 +179,7 @@ }) /* startup code, note that it's called __start on MIPS */ -void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector __start(void) +void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector __start(void) { __asm__ volatile ( ".set push\n" @@ -194,11 +194,13 @@ void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_ "li $t0, -8\n" "and $sp, $sp, $t0\n" /* $sp must be 8-byte aligned */ "addiu $sp, $sp, -16\n" /* the callee expects to save a0..a3 there */ - "jal _start_c\n" /* transfer to c runtime */ + "lui $t9, %hi(_start_c)\n" /* ABI requires current function address in $t9 */ + "ori $t9, %lo(_start_c)\n" + "jalr $t9\n" /* transfer to c runtime */ " nop\n" /* delayed slot */ ".set pop\n" ); - __builtin_unreachable(); + __nolibc_entrypoint_epilogue(); } #endif /* _NOLIBC_ARCH_MIPS_H */ diff --git a/tools/include/nolibc/arch-powerpc.h b/tools/include/nolibc/arch-powerpc.h index ac212e6185b2..ee2fdb8d601d 100644 --- a/tools/include/nolibc/arch-powerpc.h +++ b/tools/include/nolibc/arch-powerpc.h @@ -172,7 +172,7 @@ _ret; \ }) -#ifndef __powerpc64__ +#if !defined(__powerpc64__) && !defined(__clang__) /* FIXME: For 32-bit PowerPC, with newer gcc compilers (e.g. gcc 13.1.0), * "omit-frame-pointer" fails with __attribute__((no_stack_protector)) but * works with __attribute__((__optimize__("-fno-stack-protector"))) @@ -184,7 +184,7 @@ #endif /* !__powerpc64__ */ /* startup code */ -void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void) +void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) { #ifdef __powerpc64__ #if _CALL_ELF == 2 @@ -215,7 +215,7 @@ void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_ "bl _start_c\n" /* transfer to c runtime */ ); #endif - __builtin_unreachable(); + __nolibc_entrypoint_epilogue(); } #endif /* _NOLIBC_ARCH_POWERPC_H */ diff --git a/tools/include/nolibc/arch-riscv.h b/tools/include/nolibc/arch-riscv.h index 1927c643c739..8827bf936212 100644 --- a/tools/include/nolibc/arch-riscv.h +++ b/tools/include/nolibc/arch-riscv.h @@ -140,7 +140,7 @@ }) /* startup code */ -void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void) +void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) { __asm__ volatile ( ".option push\n" @@ -151,7 +151,7 @@ void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_ "andi sp, a0, -16\n" /* sp must be 16-byte aligned */ "call _start_c\n" /* transfer to c runtime */ ); - __builtin_unreachable(); + __nolibc_entrypoint_epilogue(); } #endif /* _NOLIBC_ARCH_RISCV_H */ diff --git a/tools/include/nolibc/arch-s390.h b/tools/include/nolibc/arch-s390.h index 5d60fd43f883..2ec13d8b9a2d 100644 --- a/tools/include/nolibc/arch-s390.h +++ b/tools/include/nolibc/arch-s390.h @@ -139,7 +139,7 @@ }) /* startup code */ -void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void) +void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) { __asm__ volatile ( "lgr %r2, %r15\n" /* save stack pointer to %r2, as arg1 of _start_c */ @@ -147,7 +147,7 @@ void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_ "xc 0(8,%r15), 0(%r15)\n" /* clear backchain */ "brasl %r14, _start_c\n" /* transfer to c runtime */ ); - __builtin_unreachable(); + __nolibc_entrypoint_epilogue(); } struct s390_mmap_arg_struct { diff --git a/tools/include/nolibc/arch-x86_64.h b/tools/include/nolibc/arch-x86_64.h index 68609f421934..1e40620a2b33 100644 --- a/tools/include/nolibc/arch-x86_64.h +++ b/tools/include/nolibc/arch-x86_64.h @@ -161,7 +161,7 @@ * 2) The deepest stack frame should be zero (the %rbp). * */ -void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_stack_protector _start(void) +void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) { __asm__ volatile ( "xor %ebp, %ebp\n" /* zero the stack frame */ @@ -170,7 +170,7 @@ void __attribute__((weak, noreturn, optimize("Os", "omit-frame-pointer"))) __no_ "call _start_c\n" /* transfer to c runtime */ "hlt\n" /* ensure it does not return */ ); - __builtin_unreachable(); + __nolibc_entrypoint_epilogue(); } #define NOLIBC_ARCH_HAS_MEMMOVE @@ -193,10 +193,10 @@ __asm__ ( "movq %rdi, %rdx\n\t" "subq %rsi, %rdx\n\t" "cmpq %rcx, %rdx\n\t" - "jb .Lbackward_copy\n\t" + "jb 1f\n\t" "rep movsb\n\t" "retq\n" -".Lbackward_copy:" +"1:" /* backward copy */ "leaq -1(%rdi, %rcx, 1), %rdi\n\t" "leaq -1(%rsi, %rcx, 1), %rsi\n\t" "std\n\t" diff --git a/tools/include/nolibc/compiler.h b/tools/include/nolibc/compiler.h index beddc3665d69..9bc6a706a332 100644 --- a/tools/include/nolibc/compiler.h +++ b/tools/include/nolibc/compiler.h @@ -6,20 +6,30 @@ #ifndef _NOLIBC_COMPILER_H #define _NOLIBC_COMPILER_H +#if defined(__has_attribute) +# define __nolibc_has_attribute(attr) __has_attribute(attr) +#else +# define __nolibc_has_attribute(attr) 0 +#endif + +#if __nolibc_has_attribute(naked) +# define __nolibc_entrypoint __attribute__((naked)) +# define __nolibc_entrypoint_epilogue() +#else +# define __nolibc_entrypoint __attribute__((optimize("Os", "omit-frame-pointer"))) +# define __nolibc_entrypoint_epilogue() __builtin_unreachable() +#endif /* __nolibc_has_attribute(naked) */ + #if defined(__SSP__) || defined(__SSP_STRONG__) || defined(__SSP_ALL__) || defined(__SSP_EXPLICIT__) #define _NOLIBC_STACKPROTECTOR #endif /* defined(__SSP__) ... */ -#if defined(__has_attribute) -# if __has_attribute(no_stack_protector) -# define __no_stack_protector __attribute__((no_stack_protector)) -# else -# define __no_stack_protector __attribute__((__optimize__("-fno-stack-protector"))) -# endif +#if __nolibc_has_attribute(no_stack_protector) +# define __no_stack_protector __attribute__((no_stack_protector)) #else # define __no_stack_protector __attribute__((__optimize__("-fno-stack-protector"))) -#endif /* defined(__has_attribute) */ +#endif /* __nolibc_has_attribute(no_stack_protector) */ #endif /* _NOLIBC_COMPILER_H */ diff --git a/tools/include/nolibc/crt.h b/tools/include/nolibc/crt.h index 43b551468c2a..bbcd5fd09806 100644 --- a/tools/include/nolibc/crt.h +++ b/tools/include/nolibc/crt.h @@ -13,23 +13,24 @@ const unsigned long *_auxv __attribute__((weak)); static void __stack_chk_init(void); static void exit(int); -extern void (*const __preinit_array_start[])(void) __attribute__((weak)); -extern void (*const __preinit_array_end[])(void) __attribute__((weak)); +extern void (*const __preinit_array_start[])(int, char **, char**) __attribute__((weak)); +extern void (*const __preinit_array_end[])(int, char **, char**) __attribute__((weak)); -extern void (*const __init_array_start[])(void) __attribute__((weak)); -extern void (*const __init_array_end[])(void) __attribute__((weak)); +extern void (*const __init_array_start[])(int, char **, char**) __attribute__((weak)); +extern void (*const __init_array_end[])(int, char **, char**) __attribute__((weak)); extern void (*const __fini_array_start[])(void) __attribute__((weak)); extern void (*const __fini_array_end[])(void) __attribute__((weak)); -__attribute__((weak)) +__attribute__((weak,used)) void _start_c(long *sp) { long argc; char **argv; char **envp; int exitcode; - void (* const *func)(void); + void (* const *ctor_func)(int, char **, char **); + void (* const *dtor_func)(void); const unsigned long *auxv; /* silence potential warning: conflicting types for 'main' */ int _nolibc_main(int, char **, char **) __asm__ ("main"); @@ -66,16 +67,16 @@ void _start_c(long *sp) ; _auxv = auxv; - for (func = __preinit_array_start; func < __preinit_array_end; func++) - (*func)(); - for (func = __init_array_start; func < __init_array_end; func++) - (*func)(); + for (ctor_func = __preinit_array_start; ctor_func < __preinit_array_end; ctor_func++) + (*ctor_func)(argc, argv, envp); + for (ctor_func = __init_array_start; ctor_func < __init_array_end; ctor_func++) + (*ctor_func)(argc, argv, envp); /* go to application */ exitcode = _nolibc_main(argc, argv, envp); - for (func = __fini_array_end; func > __fini_array_start;) - (*--func)(); + for (dtor_func = __fini_array_end; dtor_func > __fini_array_start;) + (*--dtor_func)(); exit(exitcode); } diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index 989e707263a4..92436b1e4441 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -74,7 +74,8 @@ * -I../nolibc -o hello hello.c -lgcc * * The available standard (but limited) include files are: - * ctype.h, errno.h, signal.h, stdarg.h, stdio.h, stdlib.h, string.h, time.h + * ctype.h, errno.h, signal.h, stdarg.h, stdbool.h stdio.h, stdlib.h, + * string.h, time.h * * In addition, the following ones are expected to be provided by the compiler: * float.h, stddef.h diff --git a/tools/include/nolibc/stackprotector.h b/tools/include/nolibc/stackprotector.h index 13f1d0e60387..1d0d5259ec41 100644 --- a/tools/include/nolibc/stackprotector.h +++ b/tools/include/nolibc/stackprotector.h @@ -18,7 +18,7 @@ * triggering stack protector errors themselves */ -__attribute__((weak,noreturn,section(".text.nolibc_stack_chk"))) +__attribute__((weak,used,noreturn,section(".text.nolibc_stack_chk"))) void __stack_chk_fail(void) { pid_t pid; @@ -34,7 +34,7 @@ void __stack_chk_fail_local(void) __stack_chk_fail(); } -__attribute__((weak,section(".data.nolibc_stack_chk"))) +__attribute__((weak,used,section(".data.nolibc_stack_chk"))) uintptr_t __stack_chk_guard; static __no_stack_protector void __stack_chk_init(void) diff --git a/tools/include/nolibc/stdbool.h b/tools/include/nolibc/stdbool.h new file mode 100644 index 000000000000..60feece22f17 --- /dev/null +++ b/tools/include/nolibc/stdbool.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * Boolean types support for NOLIBC + * Copyright (C) 2024 Thomas Weißschuh <linux@weissschuh.net> + */ + +#ifndef _NOLIBC_STDBOOL_H +#define _NOLIBC_STDBOOL_H + +#define bool _Bool +#define true 1 +#define false 0 + +#define __bool_true_false_are_defined 1 + +#endif /* _NOLIBC_STDBOOL_H */ diff --git a/tools/include/nolibc/string.h b/tools/include/nolibc/string.h index f9ab28421e6d..9ec9c24f38c0 100644 --- a/tools/include/nolibc/string.h +++ b/tools/include/nolibc/string.h @@ -7,6 +7,7 @@ #ifndef _NOLIBC_STRING_H #define _NOLIBC_STRING_H +#include "arch.h" #include "std.h" static void *malloc(size_t len); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index f329ee44627a..1fb3cb2636e6 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2851,7 +2851,7 @@ union bpf_attr { * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**, * **TCP_NODELAY**, **TCP_MAXSEG**, **TCP_WINDOW_CLAMP**, * **TCP_THIN_LINEAR_TIMEOUTS**, **TCP_BPF_DELACK_MAX**, - * **TCP_BPF_RTO_MIN**. + * **TCP_BPF_RTO_MIN**, **TCP_BPF_SOCK_OPS_CB_FLAGS**. * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. * * **IPPROTO_IPV6**, which supports the following *optname*\ s: * **IPV6_TCLASS**, **IPV6_AUTOFLOWLABEL**. @@ -7080,6 +7080,7 @@ enum { TCP_BPF_SYN = 1005, /* Copy the TCP header */ TCP_BPF_SYN_IP = 1006, /* Copy the IP[46] and TCP header */ TCP_BPF_SYN_MAC = 1007, /* Copy the MAC, IP[46], and TCP header */ + TCP_BPF_SOCK_OPS_CB_FLAGS = 1008, /* Get or Set TCP sock ops flags */ }; enum { diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 43742ac5b00d..7c308f04e7a0 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -93,6 +93,7 @@ enum { NETDEV_A_PAGE_POOL_INFLIGHT, NETDEV_A_PAGE_POOL_INFLIGHT_MEM, NETDEV_A_PAGE_POOL_DETACH_TIME, + NETDEV_A_PAGE_POOL_DMABUF, __NETDEV_A_PAGE_POOL_MAX, NETDEV_A_PAGE_POOL_MAX = (__NETDEV_A_PAGE_POOL_MAX - 1) @@ -131,6 +132,7 @@ enum { NETDEV_A_QUEUE_IFINDEX, NETDEV_A_QUEUE_TYPE, NETDEV_A_QUEUE_NAPI_ID, + NETDEV_A_QUEUE_DMABUF, __NETDEV_A_QUEUE_MAX, NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1) @@ -174,6 +176,16 @@ enum { }; enum { + NETDEV_A_DMABUF_IFINDEX = 1, + NETDEV_A_DMABUF_QUEUES, + NETDEV_A_DMABUF_FD, + NETDEV_A_DMABUF_ID, + + __NETDEV_A_DMABUF_MAX, + NETDEV_A_DMABUF_MAX = (__NETDEV_A_DMABUF_MAX - 1) +}; + +enum { NETDEV_CMD_DEV_GET = 1, NETDEV_CMD_DEV_ADD_NTF, NETDEV_CMD_DEV_DEL_NTF, @@ -186,6 +198,7 @@ enum { NETDEV_CMD_QUEUE_GET, NETDEV_CMD_NAPI_GET, NETDEV_CMD_QSTATS_GET, + NETDEV_CMD_BIND_RX, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) diff --git a/tools/memory-model/Documentation/README b/tools/memory-model/Documentation/README index 304162743a5b..9999c1effdb6 100644 --- a/tools/memory-model/Documentation/README +++ b/tools/memory-model/Documentation/README @@ -9,6 +9,8 @@ depending on what you know and what you would like to learn. Please note that the documents later in this list assume that the reader understands the material provided by documents earlier in this list. +If LKMM-specific terms lost you, glossary.txt might help you. + o You are new to Linux-kernel concurrency: simple.txt o You have some background in Linux-kernel concurrency, and would @@ -21,6 +23,9 @@ o You are familiar with the Linux-kernel concurrency primitives that you need, and just want to get started with LKMM litmus tests: litmus-tests.txt +o You would like to access lock-protected shared variables without + having their corresponding locks held: locking.txt + o You are familiar with Linux-kernel concurrency, and would like a detailed intuitive understanding of LKMM, including situations involving more than two threads: recipes.txt @@ -28,12 +33,18 @@ o You are familiar with Linux-kernel concurrency, and would o You would like a detailed understanding of what your compiler can and cannot do to control dependencies: control-dependencies.txt +o You would like to mark concurrent normal accesses to shared + variables so that intentional "racy" accesses can be properly + documented, especially when you are responding to complaints + from KCSAN: access-marking.txt + o You are familiar with Linux-kernel concurrency and the use of LKMM, and would like a quick reference: cheatsheet.txt o You are familiar with Linux-kernel concurrency and the use of LKMM, and would like to learn about LKMM's requirements, - rationale, and implementation: explanation.txt + rationale, and implementation: explanation.txt and + herd-representation.txt o You are interested in the publications related to LKMM, including hardware manuals, academic literature, standards-committee @@ -61,10 +72,21 @@ control-dependencies.txt explanation.txt Detailed description of the memory model. +glossary.txt + Brief definitions of LKMM-related terms. + +herd-representation.txt + The (abstract) representation of the Linux-kernel concurrency + primitives in terms of events. + litmus-tests.txt The format, features, capabilities, and limitations of the litmus tests that LKMM can evaluate. +locking.txt + Rules for accessing lock-protected shared variables outside of + their corresponding critical sections. + ordering.txt Overview of the Linux kernel's low-level memory-ordering primitives by category. diff --git a/tools/memory-model/Documentation/herd-representation.txt b/tools/memory-model/Documentation/herd-representation.txt new file mode 100644 index 000000000000..ed988906f2b7 --- /dev/null +++ b/tools/memory-model/Documentation/herd-representation.txt @@ -0,0 +1,110 @@ +# +# Legend: +# R, a Load event +# W, a Store event +# F, a Fence event +# LKR, a Lock-Read event +# LKW, a Lock-Write event +# UL, an Unlock event +# LF, a Lock-Fail event +# RL, a Read-Locked event +# RU, a Read-Unlocked event +# R*, a Load event included in RMW +# W*, a Store event included in RMW +# SRCU, a Sleepable-Read-Copy-Update event +# +# po, a Program-Order link +# rmw, a Read-Modify-Write link - every rmw link is a po link +# +# By convention, a blank line in a cell means "same as the preceding line". +# +# Disclaimer. The table includes representations of "add" and "and" operations; +# corresponding/identical representations of "sub", "inc", "dec" and "or", "xor", +# "andnot" operations are omitted. +# + ------------------------------------------------------------------------------ + | C macro | Events | + ------------------------------------------------------------------------------ + | Non-RMW ops | | + ------------------------------------------------------------------------------ + | READ_ONCE | R[once] | + | atomic_read | | + | WRITE_ONCE | W[once] | + | atomic_set | | + | smp_load_acquire | R[acquire] | + | atomic_read_acquire | | + | smp_store_release | W[release] | + | atomic_set_release | | + | smp_store_mb | W[once] ->po F[mb] | + | smp_mb | F[mb] | + | smp_rmb | F[rmb] | + | smp_wmb | F[wmb] | + | smp_mb__before_atomic | F[before-atomic] | + | smp_mb__after_atomic | F[after-atomic] | + | spin_unlock | UL | + | spin_is_locked | On success: RL | + | | On failure: RU | + | smp_mb__after_spinlock | F[after-spinlock] | + | smp_mb__after_unlock_lock | F[after-unlock-lock] | + | rcu_read_lock | F[rcu-lock] | + | rcu_read_unlock | F[rcu-unlock] | + | synchronize_rcu | F[sync-rcu] | + | rcu_dereference | R[once] | + | rcu_assign_pointer | W[release] | + | srcu_read_lock | R[srcu-lock] | + | srcu_down_read | | + | srcu_read_unlock | W[srcu-unlock] | + | srcu_up_read | | + | synchronize_srcu | SRCU[sync-srcu] | + | smp_mb__after_srcu_read_unlock | F[after-srcu-read-unlock] | + ------------------------------------------------------------------------------ + | RMW ops w/o return value | | + ------------------------------------------------------------------------------ + | atomic_add | R*[noreturn] ->rmw W*[once] | + | atomic_and | | + | spin_lock | LKR ->po LKW | + ------------------------------------------------------------------------------ + | RMW ops w/ return value | | + ------------------------------------------------------------------------------ + | atomic_add_return | F[mb] ->po R*[once] | + | | ->rmw W*[once] ->po F[mb] | + | atomic_fetch_add | | + | atomic_fetch_and | | + | atomic_xchg | | + | xchg | | + | atomic_add_negative | | + | atomic_add_return_relaxed | R*[once] ->rmw W*[once] | + | atomic_fetch_add_relaxed | | + | atomic_fetch_and_relaxed | | + | atomic_xchg_relaxed | | + | xchg_relaxed | | + | atomic_add_negative_relaxed | | + | atomic_add_return_acquire | R*[acquire] ->rmw W*[once] | + | atomic_fetch_add_acquire | | + | atomic_fetch_and_acquire | | + | atomic_xchg_acquire | | + | xchg_acquire | | + | atomic_add_negative_acquire | | + | atomic_add_return_release | R*[once] ->rmw W*[release] | + | atomic_fetch_add_release | | + | atomic_fetch_and_release | | + | atomic_xchg_release | | + | xchg_release | | + | atomic_add_negative_release | | + ------------------------------------------------------------------------------ + | Conditional RMW ops | | + ------------------------------------------------------------------------------ + | atomic_cmpxchg | On success: F[mb] ->po R*[once] | + | | ->rmw W*[once] ->po F[mb] | + | | On failure: R*[once] | + | cmpxchg | | + | atomic_add_unless | | + | atomic_cmpxchg_relaxed | On success: R*[once] ->rmw W*[once] | + | | On failure: R*[once] | + | atomic_cmpxchg_acquire | On success: R*[acquire] ->rmw W*[once] | + | | On failure: R*[once] | + | atomic_cmpxchg_release | On success: R*[once] ->rmw W*[release] | + | | On failure: R*[once] | + | spin_trylock | On success: LKR ->po LKW | + | | On failure: LF | + ------------------------------------------------------------------------------ diff --git a/tools/memory-model/Documentation/simple.txt b/tools/memory-model/Documentation/simple.txt index 4c789ec8334f..21f06c1d1b70 100644 --- a/tools/memory-model/Documentation/simple.txt +++ b/tools/memory-model/Documentation/simple.txt @@ -266,5 +266,5 @@ More complex use cases ====================== If the alternatives above do not do what you need, please look at the -recipes-pairs.txt file to peel off the next layer of the memory-ordering +recipes.txt file to peel off the next layer of the memory-ordering onion. diff --git a/tools/mm/Makefile b/tools/mm/Makefile index 15791c1c5b28..f5725b5c23aa 100644 --- a/tools/mm/Makefile +++ b/tools/mm/Makefile @@ -23,7 +23,7 @@ $(LIBS): $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) clean: - $(RM) page-types slabinfo page_owner_sort + $(RM) page-types slabinfo page_owner_sort thp_swap_allocator_test make -C $(LIB_DIR) clean sbindir ?= /usr/sbin diff --git a/tools/mm/page-types.c b/tools/mm/page-types.c index 8d5595b6c59f..fa050d5a48cd 100644 --- a/tools/mm/page-types.c +++ b/tools/mm/page-types.c @@ -71,12 +71,12 @@ /* [32-] kernel hacking assistances */ #define KPF_RESERVED 32 #define KPF_MLOCKED 33 -#define KPF_MAPPEDTODISK 34 +#define KPF_OWNER_2 34 #define KPF_PRIVATE 35 #define KPF_PRIVATE_2 36 #define KPF_OWNER_PRIVATE 37 #define KPF_ARCH 38 -#define KPF_UNCACHED 39 +#define KPF_UNCACHED 39 /* unused */ #define KPF_SOFTDIRTY 40 #define KPF_ARCH_2 41 @@ -129,12 +129,11 @@ static const char * const page_flag_names[] = { [KPF_RESERVED] = "r:reserved", [KPF_MLOCKED] = "m:mlocked", - [KPF_MAPPEDTODISK] = "d:mappedtodisk", + [KPF_OWNER_2] = "d:owner_2", [KPF_PRIVATE] = "P:private", [KPF_PRIVATE_2] = "p:private_2", [KPF_OWNER_PRIVATE] = "O:owner_private", [KPF_ARCH] = "h:arch", - [KPF_UNCACHED] = "c:uncached", [KPF_SOFTDIRTY] = "f:softdirty", [KPF_ARCH_2] = "H:arch_2", @@ -472,9 +471,9 @@ static int bit_mask_ok(uint64_t flags) static uint64_t expand_overloaded_flags(uint64_t flags, uint64_t pme) { - /* Anonymous pages overload PG_mappedtodisk */ - if ((flags & BIT(ANON)) && (flags & BIT(MAPPEDTODISK))) - flags ^= BIT(MAPPEDTODISK) | BIT(ANON_EXCLUSIVE); + /* Anonymous pages use PG_owner_2 for anon_exclusive */ + if ((flags & BIT(ANON)) && (flags & BIT(OWNER_2))) + flags ^= BIT(OWNER_2) | BIT(ANON_EXCLUSIVE); /* SLUB overloads several page flags */ if (flags & BIT(SLAB)) { diff --git a/tools/net/ynl/lib/.gitignore b/tools/net/ynl/lib/.gitignore index c18dd8d83cee..296c4035dbf2 100644 --- a/tools/net/ynl/lib/.gitignore +++ b/tools/net/ynl/lib/.gitignore @@ -1 +1,2 @@ __pycache__/ +*.d diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index fcb18a5a6d70..e16cef160bc2 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -696,14 +696,14 @@ ynl_sock_create(const struct ynl_family *yf, struct ynl_error *yse) addr.nl_family = AF_NETLINK; if (bind(ys->socket, (struct sockaddr *)&addr, sizeof(addr)) < 0) { __perr(yse, "unable to bind to a socket address"); - goto err_close_sock;; + goto err_close_sock; } memset(&addr, 0, sizeof(addr)); addrlen = sizeof(addr); if (getsockname(ys->socket, (struct sockaddr *)&addr, &addrlen) < 0) { __perr(yse, "unable to read socket address"); - goto err_close_sock;; + goto err_close_sock; } ys->portid = addr.nl_pid; ys->seq = random(); diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index d42c1d605969..c22c22bf2cb7 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -388,6 +388,8 @@ class NetlinkProtocol: def decode(self, ynl, nl_msg, op): msg = self._decode(nl_msg) + if op is None: + op = ynl.rsp_by_value[msg.cmd()] fixed_header_size = ynl._struct_size(op.fixed_header) msg.raw_attrs = NlAttrs(msg.raw, fixed_header_size) return msg @@ -921,8 +923,7 @@ class YnlFamily(SpecFamily): print("Netlink done while checking for ntf!?") continue - op = self.rsp_by_value[nl_msg.cmd()] - decoded = self.nlproto.decode(self, nl_msg, op) + decoded = self.nlproto.decode(self, nl_msg, None) if decoded.cmd() not in self.async_msg_ids: print("Unexpected msg id done while checking for ntf", decoded) continue @@ -980,7 +981,7 @@ class YnlFamily(SpecFamily): if nl_msg.extack: self._decode_extack(req_msg, op, nl_msg.extack) else: - op = self.rsp_by_value[nl_msg.cmd()] + op = None req_flags = [] if nl_msg.error: diff --git a/tools/net/ynl/samples/netdev.c b/tools/net/ynl/samples/netdev.c index 3e7b29bd55d5..22609d44c89a 100644 --- a/tools/net/ynl/samples/netdev.c +++ b/tools/net/ynl/samples/netdev.c @@ -79,7 +79,10 @@ int main(int argc, char **argv) goto err_close; printf("Select ifc ($ifindex; or 0 = dump; or -2 ntf check): "); - scanf("%d", &ifindex); + if (scanf("%d", &ifindex) != 1) { + fprintf(stderr, "Error: unable to parse input\n"); + goto err_destroy; + } if (ifindex > 0) { struct netdev_dev_get_req *req; @@ -119,6 +122,7 @@ int main(int argc, char **argv) err_close: fprintf(stderr, "YNL: %s\n", ys->err.msg); +err_destroy: ynl_sock_destroy(ys); return 2; } diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 51529fabd517..717530bc9c52 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -2668,13 +2668,15 @@ def main(): cw.p('#define ' + hdr_prot) cw.nl() + hdr_file=os.path.basename(args.out_file[:-2]) + ".h" + if args.mode == 'kernel': cw.p('#include <net/netlink.h>') cw.p('#include <net/genetlink.h>') cw.nl() if not args.header: if args.out_file: - cw.p(f'#include "{os.path.basename(args.out_file[:-2])}.h"') + cw.p(f'#include "{hdr_file}"') cw.nl() headers = ['uapi/' + parsed.uapi_header] headers += parsed.kernel_family.get('headers', []) @@ -2686,7 +2688,7 @@ def main(): if family_contains_bitfield32(parsed): cw.p('#include <linux/netlink.h>') else: - cw.p(f'#include "{parsed.name}-user.h"') + cw.p(f'#include "{hdr_file}"') cw.p('#include "ynl.h"') headers = [parsed.uapi_header] for definition in parsed['definitions']: diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c index 5c9335fff2d3..9a95871afc95 100644 --- a/tools/perf/builtin-daemon.c +++ b/tools/perf/builtin-daemon.c @@ -691,7 +691,7 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out) fprintf(out, "%c%" PRIu64, /* session up time */ - csv_sep, (curr - daemon->start) / 60); + csv_sep, (uint64_t)((curr - daemon->start) / 60)); fprintf(out, "\n"); } else { @@ -702,7 +702,7 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out) fprintf(out, " lock: %s/lock\n", daemon->base); fprintf(out, " up: %" PRIu64 " minutes\n", - (curr - daemon->start) / 60); + (uint64_t)((curr - daemon->start) / 60)); } } @@ -730,7 +730,7 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out) fprintf(out, "%c%" PRIu64, /* session up time */ - csv_sep, (curr - session->start) / 60); + csv_sep, (uint64_t)((curr - session->start) / 60)); fprintf(out, "\n"); } else { @@ -747,7 +747,7 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out) fprintf(out, " ack: %s/%s\n", session->base, SESSION_ACK); fprintf(out, " up: %" PRIu64 " minutes\n", - (curr - session->start) / 60); + (uint64_t)((curr - session->start) / 60)); } } diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c index 40132655ccd1..c76f53a90a7b 100644 --- a/tools/perf/tests/pmu.c +++ b/tools/perf/tests/pmu.c @@ -456,11 +456,13 @@ static int test__name_cmp(struct test_suite *test __maybe_unused, int subtest __ /** * Test perf_pmu__match() that's used to search for a PMU given a name passed * on the command line. The name that's passed may also be a filename type glob - * match. + * match. If the name does not match, perf_pmu__match() attempts to match the + * alias of the PMU, if provided. */ static int test__pmu_match(struct test_suite *test __maybe_unused, int subtest __maybe_unused) { struct perf_pmu test_pmu; + test_pmu.alias_name = NULL; test_pmu.name = "pmuname"; TEST_ASSERT_EQUAL("Exact match", perf_pmu__match(&test_pmu, "pmuname"), true); diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c index b4cb3fe5cc25..bc4e92c0c08b 100644 --- a/tools/perf/util/bpf_lock_contention.c +++ b/tools/perf/util/bpf_lock_contention.c @@ -286,6 +286,9 @@ static void account_end_timestamp(struct lock_contention *con) goto next; for (int i = 0; i < total_cpus; i++) { + if (cpu_data[i].lock == 0) + continue; + update_lock_stat(stat_fd, -1, end_ts, aggr_mode, &cpu_data[i]); } diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 3be882b2e845..31a223eaf8e6 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -20,6 +20,7 @@ #include "util/env.h" #include "util/kvm-stat.h" #include "util/kwork.h" +#include "util/sample.h" #include "util/lock-contention.h" #include <internal/lib.h> #include "../builtin.h" diff --git a/tools/power/cpupower/bindings/python/.gitignore b/tools/power/cpupower/bindings/python/.gitignore new file mode 100644 index 000000000000..5c9a1f0212dd --- /dev/null +++ b/tools/power/cpupower/bindings/python/.gitignore @@ -0,0 +1,8 @@ +__pycache__/ +raw_pylibcpupower_wrap.c +*.o +*.so +*.py +!test_raw_pylibcpupower.py +# git keeps ignoring this file, use git add -f raw_libcpupower.i +!raw_pylibcpupower.i diff --git a/tools/power/cpupower/bindings/python/Makefile b/tools/power/cpupower/bindings/python/Makefile new file mode 100644 index 000000000000..dc09c5b66ead --- /dev/null +++ b/tools/power/cpupower/bindings/python/Makefile @@ -0,0 +1,33 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Makefile for libcpupower's Python bindings +# +# This Makefile expects you have already run the makefile for cpupower to build +# the .o files in the lib directory for the bindings to be created. + +CC := gcc +HAVE_SWIG := $(shell if which swig >/dev/null 2>&1; then echo 1; else echo 0; fi) +HAVE_PYCONFIG := $(shell if which python-config >/dev/null 2>&1; then echo 1; else echo 0; fi) + +LIB_DIR := ../../lib +PY_INCLUDE = $(firstword $(shell python-config --includes)) +OBJECTS_LIB = $(wildcard $(LIB_DIR)/*.o) + +all: _raw_pylibcpupower.so + +_raw_pylibcpupower.so: raw_pylibcpupower_wrap.o + $(CC) -shared $(OBJECTS_LIB) raw_pylibcpupower_wrap.o -o _raw_pylibcpupower.so + +raw_pylibcpupower_wrap.o: raw_pylibcpupower_wrap.c + $(CC) -fPIC -c raw_pylibcpupower_wrap.c $(PY_INCLUDE) + +raw_pylibcpupower_wrap.c: raw_pylibcpupower.i +ifeq ($(HAVE_SWIG),0) + $(error "swig was not found. Make sure you have it installed and in the PATH to generate the bindings.") +else ifeq ($(HAVE_PYCONFIG),0) + $(error "python-config was not found. Make sure you have it installed and in the PATH to generate the bindings.") +endif + swig -python raw_pylibcpupower.i + +# Will only clean the bindings folder; will not clean the actual cpupower folder +clean: + rm -f raw_pylibcpupower.py raw_pylibcpupower_wrap.c raw_pylibcpupower_wrap.o _raw_pylibcpupower.so diff --git a/tools/power/cpupower/bindings/python/README b/tools/power/cpupower/bindings/python/README new file mode 100644 index 000000000000..0a4bb2581e8a --- /dev/null +++ b/tools/power/cpupower/bindings/python/README @@ -0,0 +1,59 @@ +This folder contains the necessary files to build the Python bindings for +libcpupower (aside from the libcpupower object files). + + +requirements +------------ + +* You need the object files in the libcpupower directory compiled by +cpupower's makefile. +* The SWIG program must be installed. +* The Python's development libraries installed. + +Please check that your version of SWIG is compatible with the version of Python +installed on your machine by checking the SWIG changelog on their website. +https://swig.org/ + +Note that while SWIG itself is GPL v3+ licensed; the resulting output, +the bindings code: is permissively licensed + the license of libcpupower's .o +files. For these bindings that means GPL v2. + +Please see https://swig.org/legal.html and the discussion [1] for more details. + +[1] +https://lore.kernel.org/linux-pm/Zqv9BOjxLAgyNP5B@hatbackup/ + + +build +----- + +Install SWIG and the Python development files provided by your distribution. + +Build the object files for libcpupower by running make in the cpupower +directory. + +Return to the directory this README is in to run: + +$ make + + +testing +------- + +Please verify the _raw_pylibcpupower.so and raw_pylibcpupower.py files have +been created. + +To run the test script: + +$ python test_raw_pylibcpupower.py + + +credits +------- + +Original Bindings Author: +John B. Wyatt IV +jwyatt@redhat.com +sageofredondo@gmail.com + +Copyright (C) 2024 Red Hat diff --git a/tools/power/cpupower/bindings/python/raw_pylibcpupower.i b/tools/power/cpupower/bindings/python/raw_pylibcpupower.i new file mode 100644 index 000000000000..96556d87a745 --- /dev/null +++ b/tools/power/cpupower/bindings/python/raw_pylibcpupower.i @@ -0,0 +1,247 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +%module raw_pylibcpupower +%{ +#include "../../lib/cpupower_intern.h" +#include "../../lib/acpi_cppc.h" +#include "../../lib/cpufreq.h" +#include "../../lib/cpuidle.h" +#include "../../lib/cpupower.h" +#include "../../lib/powercap.h" +%} + +/* + * cpupower_intern.h + */ + +#define PATH_TO_CPU "/sys/devices/system/cpu/" +#define MAX_LINE_LEN 4096 +#define SYSFS_PATH_MAX 255 + +int is_valid_path(const char *path); + +unsigned int cpupower_read_sysfs(const char *path, char *buf, size_t buflen); + +unsigned int cpupower_write_sysfs(const char *path, char *buf, size_t buflen); + +/* + * acpi_cppc.h + */ + +enum acpi_cppc_value { + HIGHEST_PERF, + LOWEST_PERF, + NOMINAL_PERF, + LOWEST_NONLINEAR_PERF, + LOWEST_FREQ, + NOMINAL_FREQ, + REFERENCE_PERF, + WRAPAROUND_TIME, + MAX_CPPC_VALUE_FILES +}; + +unsigned long acpi_cppc_get_data(unsigned int cpu, + enum acpi_cppc_value which); + +/* + * cpufreq.h + */ + +struct cpufreq_policy { + unsigned long min; + unsigned long max; + char *governor; +}; + +struct cpufreq_available_governors { + char *governor; + struct cpufreq_available_governors *next; + struct cpufreq_available_governors *first; +}; + +struct cpufreq_available_frequencies { + unsigned long frequency; + struct cpufreq_available_frequencies *next; + struct cpufreq_available_frequencies *first; +}; + + +struct cpufreq_affected_cpus { + unsigned int cpu; + struct cpufreq_affected_cpus *next; + struct cpufreq_affected_cpus *first; +}; + +struct cpufreq_stats { + unsigned long frequency; + unsigned long long time_in_state; + struct cpufreq_stats *next; + struct cpufreq_stats *first; +}; + +unsigned long cpufreq_get_freq_kernel(unsigned int cpu); + +unsigned long cpufreq_get_freq_hardware(unsigned int cpu); + +#define cpufreq_get(cpu) cpufreq_get_freq_kernel(cpu); + +unsigned long cpufreq_get_transition_latency(unsigned int cpu); + +int cpufreq_get_hardware_limits(unsigned int cpu, + unsigned long *min, + unsigned long *max); + +char *cpufreq_get_driver(unsigned int cpu); + +void cpufreq_put_driver(char *ptr); + +struct cpufreq_policy *cpufreq_get_policy(unsigned int cpu); + +void cpufreq_put_policy(struct cpufreq_policy *policy); + +struct cpufreq_available_governors +*cpufreq_get_available_governors(unsigned int cpu); + +void cpufreq_put_available_governors( + struct cpufreq_available_governors *first); + +struct cpufreq_available_frequencies +*cpufreq_get_available_frequencies(unsigned int cpu); + +void cpufreq_put_available_frequencies( + struct cpufreq_available_frequencies *first); + +struct cpufreq_available_frequencies +*cpufreq_get_boost_frequencies(unsigned int cpu); + +void cpufreq_put_boost_frequencies( + struct cpufreq_available_frequencies *first); + +struct cpufreq_affected_cpus *cpufreq_get_affected_cpus(unsigned + int cpu); + +void cpufreq_put_affected_cpus(struct cpufreq_affected_cpus *first); + +struct cpufreq_affected_cpus *cpufreq_get_related_cpus(unsigned + int cpu); + +void cpufreq_put_related_cpus(struct cpufreq_affected_cpus *first); + +struct cpufreq_stats *cpufreq_get_stats(unsigned int cpu, + unsigned long long *total_time); + +void cpufreq_put_stats(struct cpufreq_stats *stats); + +unsigned long cpufreq_get_transitions(unsigned int cpu); + +int cpufreq_set_policy(unsigned int cpu, struct cpufreq_policy *policy); + +int cpufreq_modify_policy_min(unsigned int cpu, unsigned long min_freq); + +int cpufreq_modify_policy_max(unsigned int cpu, unsigned long max_freq); + +int cpufreq_modify_policy_governor(unsigned int cpu, char *governor); + +int cpufreq_set_frequency(unsigned int cpu, + unsigned long target_frequency); + +unsigned long cpufreq_get_sysfs_value_from_table(unsigned int cpu, + const char **table, + unsigned int index, + unsigned int size); + +/* + * cpuidle.h + */ + +int cpuidle_is_state_disabled(unsigned int cpu, + unsigned int idlestate); +int cpuidle_state_disable(unsigned int cpu, unsigned int idlestate, + unsigned int disable); +unsigned long cpuidle_state_latency(unsigned int cpu, + unsigned int idlestate); +unsigned long cpuidle_state_usage(unsigned int cpu, + unsigned int idlestate); +unsigned long long cpuidle_state_time(unsigned int cpu, + unsigned int idlestate); +char *cpuidle_state_name(unsigned int cpu, + unsigned int idlestate); +char *cpuidle_state_desc(unsigned int cpu, + unsigned int idlestate); +unsigned int cpuidle_state_count(unsigned int cpu); + +char *cpuidle_get_governor(void); + +char *cpuidle_get_driver(void); + +/* + * cpupower.h + */ + +struct cpupower_topology { + /* Amount of CPU cores, packages and threads per core in the system */ + unsigned int cores; + unsigned int pkgs; + unsigned int threads; /* per core */ + + /* Array gets mallocated with cores entries, holding per core info */ + struct cpuid_core_info *core_info; +}; + +struct cpuid_core_info { + int pkg; + int core; + int cpu; + + /* flags */ + unsigned int is_online:1; +}; + +int get_cpu_topology(struct cpupower_topology *cpu_top); + +void cpu_topology_release(struct cpupower_topology cpu_top); + +int cpupower_is_cpu_online(unsigned int cpu); + +/* + * powercap.h + */ + +struct powercap_zone { + char name[MAX_LINE_LEN]; + /* + * sys_name relative to PATH_TO_POWERCAP, + * do not forget the / in between + */ + char sys_name[SYSFS_PATH_MAX]; + int tree_depth; + struct powercap_zone *parent; + struct powercap_zone *children[POWERCAP_MAX_CHILD_ZONES]; + /* More possible caps or attributes to be added? */ + uint32_t has_power_uw:1, + has_energy_uj:1; + +}; + +int powercap_walk_zones(struct powercap_zone *zone, + int (*f)(struct powercap_zone *zone)); + +struct powercap_zone *powercap_init_zones(void); + +int powercap_get_enabled(int *mode); + +int powercap_set_enabled(int mode); + +int powercap_get_driver(char *driver, int buflen); + +int powercap_get_max_energy_range_uj(struct powercap_zone *zone, uint64_t *val); + +int powercap_get_energy_uj(struct powercap_zone *zone, uint64_t *val); + +int powercap_get_max_power_range_uw(struct powercap_zone *zone, uint64_t *val); + +int powercap_get_power_uw(struct powercap_zone *zone, uint64_t *val); + +int powercap_zone_get_enabled(struct powercap_zone *zone, int *mode); + +int powercap_zone_set_enabled(struct powercap_zone *zone, int mode); diff --git a/tools/power/cpupower/bindings/python/test_raw_pylibcpupower.py b/tools/power/cpupower/bindings/python/test_raw_pylibcpupower.py new file mode 100755 index 000000000000..3d6f62b9556a --- /dev/null +++ b/tools/power/cpupower/bindings/python/test_raw_pylibcpupower.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-only + +import raw_pylibcpupower as p + +# Simple function call + +""" +Get cstate count +""" +cpu_cstates_count = p.cpuidle_state_count(0) +if cpu_cstates_count > -1: + print(f"CPU 0 has {cpu_cstates_count} c-states") +else: + print(f"cstate count error: return code: {cpu_cstates_count}") + +""" +Disable cstate (will fail if the above is 0, ex: a virtual machine) +""" +cstate_disabled = p.cpuidle_state_disable(0, 0, 1) +if cpu_cstates_count == 0: + print(f"CPU 0 has {cpu_cstates_count} c-states") +else: + print(f"cstate count error: return code: {cpu_cstates_count}") + +match cstate_disabled: + case 0: + print(f"CPU state disabled") + case -1: + print(f"Idlestate not available") + case _: + print(f"Not documented") + + +# Pointer example + +topo = p.cpupower_topology() +total_cpus = p.get_cpu_topology(topo) +if total_cpus > 0: + print(f"Number of total cpus: {total_cpus} and number of cores: {topo.cores}") +else: + print(f"Error: could not get cpu topology") diff --git a/tools/power/cpupower/lib/cpuidle.c b/tools/power/cpupower/lib/cpuidle.c index 479c5971aa6d..0ecac009273c 100644 --- a/tools/power/cpupower/lib/cpuidle.c +++ b/tools/power/cpupower/lib/cpuidle.c @@ -116,6 +116,7 @@ enum idlestate_value { IDLESTATE_USAGE, IDLESTATE_POWER, IDLESTATE_LATENCY, + IDLESTATE_RESIDENCY, IDLESTATE_TIME, IDLESTATE_DISABLE, MAX_IDLESTATE_VALUE_FILES @@ -125,6 +126,7 @@ static const char *idlestate_value_files[MAX_IDLESTATE_VALUE_FILES] = { [IDLESTATE_USAGE] = "usage", [IDLESTATE_POWER] = "power", [IDLESTATE_LATENCY] = "latency", + [IDLESTATE_RESIDENCY] = "residency", [IDLESTATE_TIME] = "time", [IDLESTATE_DISABLE] = "disable", }; @@ -254,6 +256,12 @@ unsigned long cpuidle_state_latency(unsigned int cpu, return cpuidle_state_get_one_value(cpu, idlestate, IDLESTATE_LATENCY); } +unsigned long cpuidle_state_residency(unsigned int cpu, + unsigned int idlestate) +{ + return cpuidle_state_get_one_value(cpu, idlestate, IDLESTATE_RESIDENCY); +} + unsigned long cpuidle_state_usage(unsigned int cpu, unsigned int idlestate) { diff --git a/tools/power/cpupower/lib/cpuidle.h b/tools/power/cpupower/lib/cpuidle.h index 2e10fead2e1e..2ab404d40259 100644 --- a/tools/power/cpupower/lib/cpuidle.h +++ b/tools/power/cpupower/lib/cpuidle.h @@ -8,6 +8,8 @@ int cpuidle_state_disable(unsigned int cpu, unsigned int idlestate, unsigned int disable); unsigned long cpuidle_state_latency(unsigned int cpu, unsigned int idlestate); +unsigned long cpuidle_state_residency(unsigned int cpu, + unsigned int idlestate); unsigned long cpuidle_state_usage(unsigned int cpu, unsigned int idlestate); unsigned long long cpuidle_state_time(unsigned int cpu, diff --git a/tools/power/cpupower/lib/powercap.c b/tools/power/cpupower/lib/powercap.c index a7a59c6bacda..94a0c69e55ef 100644 --- a/tools/power/cpupower/lib/powercap.c +++ b/tools/power/cpupower/lib/powercap.c @@ -78,6 +78,14 @@ int powercap_get_enabled(int *mode) } /* + * TODO: implement function. Returns dummy 0 for now. + */ +int powercap_set_enabled(int mode) +{ + return 0; +} + +/* * Hardcoded, because rapl is the only powercap implementation - * this needs to get more generic if more powercap implementations * should show up diff --git a/tools/power/cpupower/utils/cpuidle-info.c b/tools/power/cpupower/utils/cpuidle-info.c index 44126a87fa7a..e0d17f0de3fe 100644 --- a/tools/power/cpupower/utils/cpuidle-info.c +++ b/tools/power/cpupower/utils/cpuidle-info.c @@ -64,6 +64,8 @@ static void cpuidle_cpu_output(unsigned int cpu, int verbose) printf(_("Latency: %lu\n"), cpuidle_state_latency(cpu, idlestate)); + printf(_("Residency: %lu\n"), + cpuidle_state_residency(cpu, idlestate)); printf(_("Usage: %lu\n"), cpuidle_state_usage(cpu, idlestate)); printf(_("Duration: %llu\n"), @@ -115,6 +117,8 @@ static void proc_cpuidle_cpu_output(unsigned int cpu) printf(_("promotion[--] demotion[--] ")); printf(_("latency[%03lu] "), cpuidle_state_latency(cpu, cstate)); + printf(_("residency[%05lu] "), + cpuidle_state_residency(cpu, cstate)); printf(_("usage[%08lu] "), cpuidle_state_usage(cpu, cstate)); printf(_("duration[%020Lu] \n"), diff --git a/tools/power/pm-graph/.gitignore b/tools/power/pm-graph/.gitignore new file mode 100644 index 000000000000..37762a8a06d6 --- /dev/null +++ b/tools/power/pm-graph/.gitignore @@ -0,0 +1,3 @@ +# sleepgraph.py artifacts +suspend-[0-9]*-[0-9]* +suspend-[0-9]*-[0-9]*-x[0-9]* diff --git a/tools/power/pm-graph/Makefile b/tools/power/pm-graph/Makefile index b5310832c19c..aeddbaf2d4c4 100644 --- a/tools/power/pm-graph/Makefile +++ b/tools/power/pm-graph/Makefile @@ -1,51 +1,86 @@ # SPDX-License-Identifier: GPL-2.0 -PREFIX ?= /usr -DESTDIR ?= +# +# Copyright (c) 2013, Intel Corporation. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms and conditions of the GNU General Public License, +# version 2, as published by the Free Software Foundation. +# +# This program is distributed in the hope it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# Authors: +# Todd Brandt <todd.e.brandt@linux.intel.com> + +# Prefix to the directories we're installing to +DESTDIR ?= + +# Directory definitions. These are default and most probably +# do not need to be changed. Please note that DESTDIR is +# added in front of any of them + +BINDIR ?= /usr/bin +MANDIR ?= /usr/share/man +LIBDIR ?= /usr/lib + +# Toolchain: what tools do we use, and what options do they need: +INSTALL = /usr/bin/install +INSTALL_DATA = ${INSTALL} -m 644 all: @echo "Nothing to build" install : uninstall - install -d $(DESTDIR)$(PREFIX)/lib/pm-graph - install sleepgraph.py $(DESTDIR)$(PREFIX)/lib/pm-graph - install bootgraph.py $(DESTDIR)$(PREFIX)/lib/pm-graph - install -d $(DESTDIR)$(PREFIX)/lib/pm-graph/config - install -m 644 config/cgskip.txt $(DESTDIR)$(PREFIX)/lib/pm-graph/config - install -m 644 config/freeze-callgraph.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config - install -m 644 config/freeze.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config - install -m 644 config/freeze-dev.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config - install -m 644 config/standby-callgraph.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config - install -m 644 config/standby.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config - install -m 644 config/standby-dev.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config - install -m 644 config/suspend-callgraph.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config - install -m 644 config/suspend.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config - install -m 644 config/suspend-dev.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config - install -m 644 config/suspend-x2-proc.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config - - install -d $(DESTDIR)$(PREFIX)/bin - ln -s ../lib/pm-graph/bootgraph.py $(DESTDIR)$(PREFIX)/bin/bootgraph - ln -s ../lib/pm-graph/sleepgraph.py $(DESTDIR)$(PREFIX)/bin/sleepgraph - - install -d $(DESTDIR)$(PREFIX)/share/man/man8 - install bootgraph.8 $(DESTDIR)$(PREFIX)/share/man/man8 - install sleepgraph.8 $(DESTDIR)$(PREFIX)/share/man/man8 + $(INSTALL) -d $(DESTDIR)$(LIBDIR)/pm-graph + $(INSTALL) sleepgraph.py $(DESTDIR)$(LIBDIR)/pm-graph + $(INSTALL) bootgraph.py $(DESTDIR)$(LIBDIR)/pm-graph + $(INSTALL) -d $(DESTDIR)$(LIBDIR)/pm-graph/config + $(INSTALL_DATA) config/cgskip.txt $(DESTDIR)$(LIBDIR)/pm-graph/config + $(INSTALL_DATA) config/freeze-callgraph.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config + $(INSTALL_DATA) config/freeze.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config + $(INSTALL_DATA) config/freeze-dev.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config + $(INSTALL_DATA) config/standby-callgraph.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config + $(INSTALL_DATA) config/standby.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config + $(INSTALL_DATA) config/standby-dev.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config + $(INSTALL_DATA) config/suspend-callgraph.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config + $(INSTALL_DATA) config/suspend.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config + $(INSTALL_DATA) config/suspend-dev.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config + $(INSTALL_DATA) config/suspend-x2-proc.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config + + $(INSTALL) -d $(DESTDIR)$(BINDIR) + ln -s ../lib/pm-graph/bootgraph.py $(DESTDIR)$(BINDIR)/bootgraph + ln -s ../lib/pm-graph/sleepgraph.py $(DESTDIR)$(BINDIR)/sleepgraph + + $(INSTALL) -d $(DESTDIR)$(MANDIR)/man8 + $(INSTALL) bootgraph.8 $(DESTDIR)$(MANDIR)/man8 + $(INSTALL) sleepgraph.8 $(DESTDIR)$(MANDIR)/man8 uninstall : - rm -f $(DESTDIR)$(PREFIX)/share/man/man8/bootgraph.8 - rm -f $(DESTDIR)$(PREFIX)/share/man/man8/sleepgraph.8 + rm -f $(DESTDIR)$(MANDIR)/man8/bootgraph.8 + rm -f $(DESTDIR)$(MANDIR)/man8/sleepgraph.8 - rm -f $(DESTDIR)$(PREFIX)/bin/bootgraph - rm -f $(DESTDIR)$(PREFIX)/bin/sleepgraph + rm -f $(DESTDIR)$(BINDIR)/bootgraph + rm -f $(DESTDIR)$(BINDIR)/sleepgraph - rm -f $(DESTDIR)$(PREFIX)/lib/pm-graph/config/* - if [ -d $(DESTDIR)$(PREFIX)/lib/pm-graph/config ] ; then \ - rmdir $(DESTDIR)$(PREFIX)/lib/pm-graph/config; \ + rm -f $(DESTDIR)$(LIBDIR)/pm-graph/config/* + if [ -d $(DESTDIR)$(LIBDIR)/pm-graph/config ] ; then \ + rmdir $(DESTDIR)$(LIBDIR)/pm-graph/config; \ fi; - rm -f $(DESTDIR)$(PREFIX)/lib/pm-graph/__pycache__/* - if [ -d $(DESTDIR)$(PREFIX)/lib/pm-graph/__pycache__ ] ; then \ - rmdir $(DESTDIR)$(PREFIX)/lib/pm-graph/__pycache__; \ + rm -f $(DESTDIR)$(LIBDIR)/pm-graph/__pycache__/* + if [ -d $(DESTDIR)$(LIBDIR)/pm-graph/__pycache__ ] ; then \ + rmdir $(DESTDIR)$(LIBDIR)/pm-graph/__pycache__; \ fi; - rm -f $(DESTDIR)$(PREFIX)/lib/pm-graph/* - if [ -d $(DESTDIR)$(PREFIX)/lib/pm-graph ] ; then \ - rmdir $(DESTDIR)$(PREFIX)/lib/pm-graph; \ + rm -f $(DESTDIR)$(LIBDIR)/pm-graph/* + if [ -d $(DESTDIR)$(LIBDIR)/pm-graph ] ; then \ + rmdir $(DESTDIR)$(LIBDIR)/pm-graph; \ fi; + +help: + @echo 'Building targets:' + @echo ' all - Nothing to build' + @echo ' install - Install the program and create necessary directories' + @echo ' uninstall - Remove installed files and directories' + +.PHONY: all install uninstall help diff --git a/tools/rcu/rcu-updaters.sh b/tools/rcu/rcu-updaters.sh index 4ef1397927bb..8a5df3f22550 100755 --- a/tools/rcu/rcu-updaters.sh +++ b/tools/rcu/rcu-updaters.sh @@ -21,12 +21,10 @@ fi bpftrace -e 'kprobe:kvfree_call_rcu, kprobe:call_rcu, kprobe:call_rcu_tasks, - kprobe:call_rcu_tasks_rude, kprobe:call_rcu_tasks_trace, kprobe:call_srcu, kprobe:rcu_barrier, kprobe:rcu_barrier_tasks, - kprobe:rcu_barrier_tasks_rude, kprobe:rcu_barrier_tasks_trace, kprobe:srcu_barrier, kprobe:synchronize_rcu, diff --git a/tools/sound/dapm-graph b/tools/sound/dapm-graph index 57d78f6df041..f14bdfedee8f 100755 --- a/tools/sound/dapm-graph +++ b/tools/sound/dapm-graph @@ -8,6 +8,8 @@ set -eu +STYLE_COMPONENT_ON="color=dodgerblue;style=bold" +STYLE_COMPONENT_OFF="color=gray40;style=filled;fillcolor=gray90" STYLE_NODE_ON="shape=box,style=bold,color=green4" STYLE_NODE_OFF="shape=box,style=filled,color=gray30,fillcolor=gray95" @@ -132,11 +134,17 @@ process_dapm_widget() # Collect any links. We could use "in" links or "out" links, # let's use "in" links if echo "${line}" | grep -q '^in '; then + local w_route=$(echo "$line" | awk -F\" '{print $2}') local w_src=$(echo "$line" | awk -F\" '{print $6 "_" $4}' | sed 's/^(null)_/ROOT_/') dbg_echo " - Input route from: ${w_src}" - echo " \"${w_src}\" -> \"$w_tag\"" >> "${links_file}" + dbg_echo " - Route: ${w_route}" + local w_edge_attrs="" + if [ "${w_route}" != "static" ]; then + w_edge_attrs=" [label=\"${w_route}\"]" + fi + echo " \"${w_src}\" -> \"$w_tag\"${w_edge_attrs}" >> "${links_file}" fi done @@ -150,16 +158,20 @@ process_dapm_widget() # # $1 = temporary work dir # $2 = component directory -# $3 = forced component name (extracted for path if empty) +# $3 = "ROOT" for the root card directory, empty otherwise process_dapm_component() { local tmp_dir="${1}" local c_dir="${2}" local c_name="${3}" + local is_component=0 local dot_file="${tmp_dir}/main.dot" local links_file="${tmp_dir}/links.dot" + local c_attribs="" if [ -z "${c_name}" ]; then + is_component=1 + # Extract directory name into component name: # "./cs42l51.0-004a/dapm" -> "cs42l51.0-004a" c_name="$(basename $(dirname "${c_dir}"))" @@ -167,11 +179,23 @@ process_dapm_component() dbg_echo " * Component: ${c_name}" - echo "" >> "${dot_file}" - echo " subgraph \"${c_name}\" {" >> "${dot_file}" - echo " cluster = true" >> "${dot_file}" - echo " label = \"${c_name}\"" >> "${dot_file}" - echo " color=dodgerblue" >> "${dot_file}" + if [ ${is_component} = 1 ]; then + if [ -f "${c_dir}/bias_level" ]; then + c_onoff=$(sed -n -e 1p "${c_dir}/bias_level" | awk '{print $1}') + dbg_echo " - bias_level: ${c_onoff}" + if [ "$c_onoff" = "On" ]; then + c_attribs="${STYLE_COMPONENT_ON}" + elif [ "$c_onoff" = "Off" ]; then + c_attribs="${STYLE_COMPONENT_OFF}" + fi + fi + + echo "" >> "${dot_file}" + echo " subgraph \"${c_name}\" {" >> "${dot_file}" + echo " cluster = true" >> "${dot_file}" + echo " label = \"${c_name}\"" >> "${dot_file}" + echo " ${c_attribs}" >> "${dot_file}" + fi # Create empty file to ensure it will exist in all cases >"${links_file}" @@ -181,7 +205,9 @@ process_dapm_component() process_dapm_widget "${tmp_dir}" "${c_name}" "${w_file}" done - echo " }" >> "${dot_file}" + if [ ${is_component} = 1 ]; then + echo " }" >> "${dot_file}" + fi cat "${links_file}" >> "${dot_file}" } @@ -200,7 +226,7 @@ process_dapm_tree() echo "digraph G {" > "${dot_file}" echo " fontname=\"sans-serif\"" >> "${dot_file}" echo " node [fontname=\"sans-serif\"]" >> "${dot_file}" - + echo " edge [fontname=\"sans-serif\"]" >> "${dot_file}" # Process root directory (no component) process_dapm_component "${tmp_dir}" "${dapm_dir}/dapm" "ROOT" diff --git a/tools/spi/spidev_fdx.c b/tools/spi/spidev_fdx.c index 7d2a867cd4ae..bc9c4f6c3ba8 100644 --- a/tools/spi/spidev_fdx.c +++ b/tools/spi/spidev_fdx.c @@ -99,7 +99,7 @@ static void dumpstat(const char *name, int fd) return; } - printf("%s: spi mode 0x%x, %d bits %sper word, %d Hz max\n", + printf("%s: spi mode 0x%x, %d bits %sper word, %u Hz max\n", name, mode, bits, lsb ? "(lsb first) " : "", speed); } diff --git a/tools/testing/fault-injection/failcmd.sh b/tools/testing/fault-injection/failcmd.sh index 78dac34264be..c4f2432750f4 100644..100755 --- a/tools/testing/fault-injection/failcmd.sh +++ b/tools/testing/fault-injection/failcmd.sh @@ -64,6 +64,14 @@ ENVIRONMENT EOF } +exit_if_not_hex() { + local value="$1" + if ! [[ $value =~ ^0x[0-9a-fA-F]+$ ]]; then + echo "Error: The provided value '$value' is not a valid hexadecimal number." >&2 + exit 1 + fi +} + if [ $UID != 0 ]; then echo must be run as root >&2 exit 1 @@ -160,18 +168,22 @@ while true; do shift 2 ;; --require-start) + exit_if_not_hex "$2" echo $2 > $FAULTATTR/require-start shift 2 ;; --require-end) + exit_if_not_hex "$2" echo $2 > $FAULTATTR/require-end shift 2 ;; --reject-start) + exit_if_not_hex "$2" echo $2 > $FAULTATTR/reject-start shift 2 ;; --reject-end) + exit_if_not_hex "$2" echo $2 > $FAULTATTR/reject-end shift 2 ;; diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index 7254c110ff23..61931c4926fd 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -72,7 +72,8 @@ class LinuxSourceTreeOperations: raise ConfigError(e.output.decode()) def make(self, jobs: int, build_dir: str, make_options: Optional[List[str]]) -> None: - command = ['make', 'ARCH=' + self._linux_arch, 'O=' + build_dir, '--jobs=' + str(jobs)] + command = ['make', 'all', 'compile_commands.json', 'ARCH=' + self._linux_arch, + 'O=' + build_dir, '--jobs=' + str(jobs)] if make_options: command.extend(make_options) if self._cross_compile: diff --git a/tools/testing/memblock/internal.h b/tools/testing/memblock/internal.h index f6c6e5474c3a..1cf82acb2a3e 100644 --- a/tools/testing/memblock/internal.h +++ b/tools/testing/memblock/internal.h @@ -20,7 +20,7 @@ void memblock_free_pages(struct page *page, unsigned long pfn, { } -static inline void accept_memory(phys_addr_t start, phys_addr_t end) +static inline void accept_memory(phys_addr_t start, unsigned long size) { } diff --git a/tools/testing/radix-tree/.gitignore b/tools/testing/radix-tree/.gitignore index 49bccb90c35b..ce167a761981 100644 --- a/tools/testing/radix-tree/.gitignore +++ b/tools/testing/radix-tree/.gitignore @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only +generated/autoconf.h generated/bit-length.h generated/map-shift.h idr.c diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile index d1acd7d58850..8b3591a51e1f 100644 --- a/tools/testing/radix-tree/Makefile +++ b/tools/testing/radix-tree/Makefile @@ -1,77 +1,29 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -I. -I../../include -I../../../lib -g -Og -Wall \ - -D_LGPL_SOURCE -fsanitize=address -fsanitize=undefined -LDFLAGS += -fsanitize=address -fsanitize=undefined -LDLIBS+= -lpthread -lurcu -TARGETS = main idr-test multiorder xarray maple -LIBS := slab.o find_bit.o bitmap.o hweight.o vsprintf.o -CORE_OFILES := xarray.o radix-tree.o idr.o linux.o test.o maple.o $(LIBS) -OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \ - regression4.o tag_check.o multiorder.o idr-test.o iteration_check.o \ - iteration_check_2.o benchmark.o - -ifndef SHIFT - SHIFT=3 -endif +.PHONY: clean -ifeq ($(BUILD), 32) - CFLAGS += -m32 - LDFLAGS += -m32 -LONG_BIT := 32 -endif - -ifndef LONG_BIT -LONG_BIT := $(shell getconf LONG_BIT) -endif +TARGETS = main idr-test multiorder xarray maple +CORE_OFILES = $(SHARED_OFILES) xarray.o maple.o test.o +OFILES = main.o $(CORE_OFILES) regression1.o regression2.o \ + regression3.o regression4.o tag_check.o multiorder.o idr-test.o \ + iteration_check.o iteration_check_2.o benchmark.o targets: generated/map-shift.h generated/bit-length.h $(TARGETS) +include ../shared/shared.mk + main: $(OFILES) idr-test.o: ../../../lib/test_ida.c idr-test: idr-test.o $(CORE_OFILES) -xarray: $(CORE_OFILES) +xarray: $(CORE_OFILES) xarray.o -maple: $(CORE_OFILES) +maple: $(CORE_OFILES) maple.o multiorder: multiorder.o $(CORE_OFILES) clean: - $(RM) $(TARGETS) *.o radix-tree.c idr.c generated/map-shift.h generated/bit-length.h - -vpath %.c ../../lib - -$(OFILES): Makefile *.h */*.h generated/map-shift.h generated/bit-length.h \ - ../../include/linux/*.h \ - ../../include/asm/*.h \ - ../../../include/linux/xarray.h \ - ../../../include/linux/maple_tree.h \ - ../../../include/linux/radix-tree.h \ - ../../../lib/radix-tree.h \ - ../../../include/linux/idr.h - -radix-tree.c: ../../../lib/radix-tree.c - sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@ - -idr.c: ../../../lib/idr.c - sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@ - -xarray.o: ../../../lib/xarray.c ../../../lib/test_xarray.c - -maple.o: ../../../lib/maple_tree.c ../../../lib/test_maple_tree.c - -generated/map-shift.h: - @if ! grep -qws $(SHIFT) generated/map-shift.h; then \ - echo "#define XA_CHUNK_SHIFT $(SHIFT)" > \ - generated/map-shift.h; \ - fi - -generated/bit-length.h: FORCE - @if ! grep -qws CONFIG_$(LONG_BIT)BIT generated/bit-length.h; then \ - echo "Generating $@"; \ - echo "#define CONFIG_$(LONG_BIT)BIT 1" > $@; \ - fi + $(RM) $(TARGETS) *.o radix-tree.c idr.c generated/* -FORCE: ; +$(OFILES): $(SHARED_DEPS) *.h diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c index cd1cf05503b4..c5b00aca9def 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -120,7 +120,7 @@ static noinline void __init check_new_node(struct maple_tree *mt) MT_BUG_ON(mt, mas.alloc->slot[0] == NULL); mas_push_node(&mas, mn); mas_reset(&mas); - mas_nomem(&mas, GFP_KERNEL); /* free */ + mas_destroy(&mas); mtree_unlock(mt); @@ -144,7 +144,7 @@ static noinline void __init check_new_node(struct maple_tree *mt) mn->parent = ma_parent_ptr(mn); ma_free_rcu(mn); mas.status = ma_start; - mas_nomem(&mas, GFP_KERNEL); + mas_destroy(&mas); /* Allocate 3 nodes, will fail. */ mas_node_count(&mas, 3); /* Drop the lock and allocate 3 nodes. */ @@ -161,7 +161,7 @@ static noinline void __init check_new_node(struct maple_tree *mt) MT_BUG_ON(mt, mas_allocated(&mas) != 3); /* Free. */ mas_reset(&mas); - mas_nomem(&mas, GFP_KERNEL); + mas_destroy(&mas); /* Set allocation request to 1. */ mas_set_alloc_req(&mas, 1); @@ -277,6 +277,7 @@ static noinline void __init check_new_node(struct maple_tree *mt) } mas_reset(&mas); MT_BUG_ON(mt, mas_nomem(&mas, GFP_KERNEL)); + mas_destroy(&mas); } @@ -299,7 +300,7 @@ static noinline void __init check_new_node(struct maple_tree *mt) } MT_BUG_ON(mt, mas_allocated(&mas) != total); mas_reset(&mas); - mas_nomem(&mas, GFP_KERNEL); /* Free. */ + mas_destroy(&mas); /* Free. */ MT_BUG_ON(mt, mas_allocated(&mas) != 0); for (i = 1; i < 128; i++) { @@ -35847,6 +35848,7 @@ static noinline void __init check_nomem(struct maple_tree *mt) mas_store(&ms, &ms); /* insert 1 -> &ms */ mas_nomem(&ms, GFP_KERNEL); /* Node allocated in here. */ mtree_unlock(mt); + mas_destroy(&ms); mtree_destroy(mt); } @@ -36224,6 +36226,97 @@ static noinline void __init check_mtree_dup(struct maple_tree *mt) extern void test_kmem_cache_bulk(void); +/* callback function used for check_nomem_writer_race() */ +static void writer2(void *maple_tree) +{ + struct maple_tree *mt = (struct maple_tree *)maple_tree; + MA_STATE(mas, mt, 6, 10); + + mtree_lock(mas.tree); + mas_store(&mas, xa_mk_value(0xC)); + mas_destroy(&mas); + mtree_unlock(mas.tree); +} + +/* + * check_nomem_writer_race() - test a possible race in the mas_nomem() path + * @mt: The tree to build. + * + * There is a possible race condition in low memory conditions when mas_nomem() + * gives up its lock. A second writer can chagne the entry that the primary + * writer executing the mas_nomem() path is modifying. This test recreates this + * scenario to ensure we are handling it correctly. + */ +static void check_nomem_writer_race(struct maple_tree *mt) +{ + MA_STATE(mas, mt, 0, 5); + + mt_set_non_kernel(0); + /* setup root with 2 values with NULL in between */ + mtree_store_range(mt, 0, 5, xa_mk_value(0xA), GFP_KERNEL); + mtree_store_range(mt, 6, 10, NULL, GFP_KERNEL); + mtree_store_range(mt, 11, 15, xa_mk_value(0xB), GFP_KERNEL); + + /* setup writer 2 that will trigger the race condition */ + mt_set_private(mt); + mt_set_callback(writer2); + + mtree_lock(mt); + /* erase 0-5 */ + mas_erase(&mas); + + /* index 6-10 should retain the value from writer 2 */ + check_load(mt, 6, xa_mk_value(0xC)); + mtree_unlock(mt); + + /* test for the same race but with mas_store_gfp() */ + mtree_store_range(mt, 0, 5, xa_mk_value(0xA), GFP_KERNEL); + mtree_store_range(mt, 6, 10, NULL, GFP_KERNEL); + + mas_set_range(&mas, 0, 5); + mtree_lock(mt); + mas_store_gfp(&mas, NULL, GFP_KERNEL); + + /* ensure write made by writer 2 is retained */ + check_load(mt, 6, xa_mk_value(0xC)); + + mt_set_private(NULL); + mt_set_callback(NULL); + mtree_unlock(mt); +} + + /* test to simulate expanding a vma from [0x7fffffffe000, 0x7ffffffff000) + * to [0x7ffde4ca1000, 0x7ffffffff000) and then shrinking the vma to + * [0x7ffde4ca1000, 0x7ffde4ca2000) + */ +static inline int check_vma_modification(struct maple_tree *mt) +{ + MA_STATE(mas, mt, 0, 0); + + mtree_lock(mt); + /* vma with old start and old end */ + __mas_set_range(&mas, 0x7fffffffe000, 0x7ffffffff000 - 1); + mas_preallocate(&mas, xa_mk_value(1), GFP_KERNEL); + mas_store_prealloc(&mas, xa_mk_value(1)); + + /* next write occurs partly in previous range [0, 0x7fffffffe000)*/ + mas_prev_range(&mas, 0); + /* expand vma to {0x7ffde4ca1000, 0x7ffffffff000) */ + __mas_set_range(&mas, 0x7ffde4ca1000, 0x7ffffffff000 - 1); + mas_preallocate(&mas, xa_mk_value(1), GFP_KERNEL); + mas_store_prealloc(&mas, xa_mk_value(1)); + + /* shrink vma to [0x7ffde4ca1000, 7ffde4ca2000) */ + __mas_set_range(&mas, 0x7ffde4ca2000, 0x7ffffffff000 - 1); + mas_preallocate(&mas, NULL, GFP_KERNEL); + mas_store_prealloc(&mas, NULL); + mt_dump(mt, mt_dump_hex); + + mas_destroy(&mas); + mtree_unlock(mt); + return 0; +} + void farmer_tests(void) { struct maple_node *node; @@ -36231,6 +36324,10 @@ void farmer_tests(void) mt_dump(&tree, mt_dump_dec); + mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN | MT_FLAGS_USE_RCU); + check_vma_modification(&tree); + mtree_destroy(&tree); + tree.ma_root = xa_mk_value(0); mt_dump(&tree, mt_dump_dec); @@ -36257,6 +36354,10 @@ void farmer_tests(void) check_dfs_preorder(&tree); mtree_destroy(&tree); + mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE | MT_FLAGS_USE_RCU); + check_nomem_writer_race(&tree); + mtree_destroy(&tree); + mt_init_flags(&tree, MT_FLAGS_ALLOC_RANGE); check_prealloc(&tree); mtree_destroy(&tree); diff --git a/tools/testing/radix-tree/xarray.c b/tools/testing/radix-tree/xarray.c index d0e53bff1eb6..253208a8541b 100644 --- a/tools/testing/radix-tree/xarray.c +++ b/tools/testing/radix-tree/xarray.c @@ -4,17 +4,9 @@ * Copyright (c) 2018 Matthew Wilcox <willy@infradead.org> */ -#define XA_DEBUG +#include "xarray-shared.h" #include "test.h" -#define module_init(x) -#define module_exit(x) -#define MODULE_AUTHOR(x) -#define MODULE_DESCRIPTION(X) -#define MODULE_LICENSE(x) -#define dump_stack() assert(0) - -#include "../../../lib/xarray.c" #undef XA_DEBUG #include "../../../lib/test_xarray.c" diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index bc8fe9e8f7f2..b38199965f99 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 +TARGETS += acct TARGETS += alsa TARGETS += amd-pstate TARGETS += arm64 @@ -65,9 +66,11 @@ TARGETS += net/af_unix TARGETS += net/forwarding TARGETS += net/hsr TARGETS += net/mptcp +TARGETS += net/netfilter TARGETS += net/openvswitch +TARGETS += net/packetdrill +TARGETS += net/rds TARGETS += net/tcp_ao -TARGETS += net/netfilter TARGETS += nsfs TARGETS += perf_events TARGETS += pidfd @@ -107,7 +110,6 @@ TARGETS += tmpfs TARGETS += tpm2 TARGETS += tty TARGETS += uevent -TARGETS += user TARGETS += user_events TARGETS += vDSO TARGETS += mm diff --git a/tools/testing/selftests/acct/.gitignore b/tools/testing/selftests/acct/.gitignore new file mode 100644 index 000000000000..7e78aac19038 --- /dev/null +++ b/tools/testing/selftests/acct/.gitignore @@ -0,0 +1,3 @@ +acct_syscall +config +process_log
\ No newline at end of file diff --git a/tools/testing/selftests/acct/Makefile b/tools/testing/selftests/acct/Makefile new file mode 100644 index 000000000000..7e025099cf65 --- /dev/null +++ b/tools/testing/selftests/acct/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 +TEST_GEN_PROGS := acct_syscall +CFLAGS += -Wall + +include ../lib.mk
\ No newline at end of file diff --git a/tools/testing/selftests/acct/acct_syscall.c b/tools/testing/selftests/acct/acct_syscall.c new file mode 100644 index 000000000000..e44e8fe1f4a3 --- /dev/null +++ b/tools/testing/selftests/acct/acct_syscall.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* kselftest for acct() system call + * The acct() system call enables or disables process accounting. + */ + +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <sys/wait.h> + +#include "../kselftest.h" + +int main(void) +{ + char filename[] = "process_log"; + FILE *fp; + pid_t child_pid; + int sz; + + // Setting up kselftest framework + ksft_print_header(); + ksft_set_plan(1); + + // Check if test is run a root + if (geteuid()) { + ksft_test_result_skip("This test needs root to run!\n"); + return 1; + } + + // Create file to log closed processes + fp = fopen(filename, "w"); + + if (!fp) { + ksft_test_result_error("%s.\n", strerror(errno)); + ksft_finished(); + return 1; + } + + acct(filename); + + // Handle error conditions + if (errno) { + ksft_test_result_error("%s.\n", strerror(errno)); + fclose(fp); + ksft_finished(); + return 1; + } + + // Create child process and wait for it to terminate. + + child_pid = fork(); + + if (child_pid < 0) { + ksft_test_result_error("Creating a child process to log failed\n"); + acct(NULL); + return 1; + } else if (child_pid > 0) { + wait(NULL); + fseek(fp, 0L, SEEK_END); + sz = ftell(fp); + + acct(NULL); + + if (sz <= 0) { + ksft_test_result_fail("Terminated child process not logged\n"); + ksft_exit_fail(); + return 1; + } + + ksft_test_result_pass("Successfully logged terminated process.\n"); + fclose(fp); + ksft_exit_pass(); + return 0; + } + + return 1; +} diff --git a/tools/testing/selftests/alsa/Makefile b/tools/testing/selftests/alsa/Makefile index c1ce39874e2b..25be68025290 100644 --- a/tools/testing/selftests/alsa/Makefile +++ b/tools/testing/selftests/alsa/Makefile @@ -12,9 +12,9 @@ LDLIBS+=-lpthread OVERRIDE_TARGETS = 1 -TEST_GEN_PROGS := mixer-test pcm-test test-pcmtest-driver +TEST_GEN_PROGS := mixer-test pcm-test test-pcmtest-driver utimer-test -TEST_GEN_PROGS_EXTENDED := libatest.so +TEST_GEN_PROGS_EXTENDED := libatest.so global-timer TEST_FILES := conf.d pcm-test.conf diff --git a/tools/testing/selftests/alsa/global-timer.c b/tools/testing/selftests/alsa/global-timer.c new file mode 100644 index 000000000000..c15ec0ba851a --- /dev/null +++ b/tools/testing/selftests/alsa/global-timer.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This tool is used by the utimer test, and it allows us to + * count the ticks of a global timer in a certain time frame + * (which is set by `timeout` parameter). + * + * Author: Ivan Orlov <ivan.orlov0322@gmail.com> + */ +#include <stdio.h> +#include <stdlib.h> +#include <alsa/asoundlib.h> +#include <time.h> + +static int ticked; +static void async_callback(snd_async_handler_t *ahandler) +{ + ticked++; +} + +static char timer_name[64]; +static void bind_to_timer(int device, int subdevice, int timeout) +{ + snd_timer_t *handle; + snd_timer_params_t *params; + snd_async_handler_t *ahandler; + + time_t end; + + sprintf(timer_name, "hw:CLASS=%d,SCLASS=%d,DEV=%d,SUBDEV=%d", + SND_TIMER_CLASS_GLOBAL, SND_TIMER_SCLASS_NONE, + device, subdevice); + + snd_timer_params_alloca(¶ms); + + if (snd_timer_open(&handle, timer_name, SND_TIMER_OPEN_NONBLOCK) < 0) { + perror("Can't open the timer"); + exit(EXIT_FAILURE); + } + + snd_timer_params_set_auto_start(params, 1); + snd_timer_params_set_ticks(params, 1); + if (snd_timer_params(handle, params) < 0) { + perror("Can't set timer params"); + exit(EXIT_FAILURE); + } + + if (snd_async_add_timer_handler(&ahandler, handle, async_callback, NULL) < 0) { + perror("Can't create a handler"); + exit(EXIT_FAILURE); + } + end = time(NULL) + timeout; + if (snd_timer_start(handle) < 0) { + perror("Failed to start the timer"); + exit(EXIT_FAILURE); + } + printf("Timer has started\n"); + while (time(NULL) <= end) { + /* + * Waiting for the timeout to elapse. Can't use sleep here, as it gets + * constantly interrupted by the signal from the timer (SIGIO) + */ + } + snd_timer_stop(handle); + snd_timer_close(handle); +} + +int main(int argc, char *argv[]) +{ + int device, subdevice, timeout; + + if (argc < 4) { + perror("Usage: %s <device> <subdevice> <timeout>"); + return EXIT_FAILURE; + } + + setlinebuf(stdout); + + device = atoi(argv[1]); + subdevice = atoi(argv[2]); + timeout = atoi(argv[3]); + + bind_to_timer(device, subdevice, timeout); + + printf("Total ticks count: %d\n", ticked); + + return EXIT_SUCCESS; +} diff --git a/tools/testing/selftests/alsa/utimer-test.c b/tools/testing/selftests/alsa/utimer-test.c new file mode 100644 index 000000000000..32ee3ce57721 --- /dev/null +++ b/tools/testing/selftests/alsa/utimer-test.c @@ -0,0 +1,164 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This test covers the functionality of userspace-driven ALSA timers. Such timers + * are purely virtual (so they don't directly depend on the hardware), and they could be + * created and triggered by userspace applications. + * + * Author: Ivan Orlov <ivan.orlov0322@gmail.com> + */ +#include "../kselftest_harness.h" +#include <sound/asound.h> +#include <unistd.h> +#include <fcntl.h> +#include <limits.h> +#include <sys/ioctl.h> +#include <stdlib.h> +#include <pthread.h> +#include <string.h> + +#define FRAME_RATE 8000 +#define PERIOD_SIZE 4410 +#define UTIMER_DEFAULT_ID -1 +#define UTIMER_DEFAULT_FD -1 +#define NANO 1000000000ULL +#define TICKS_COUNT 10 +#define TICKS_RECORDING_DELTA 5 +#define TIMER_OUTPUT_BUF_LEN 1024 +#define TIMER_FREQ_SEC 1 +#define RESULT_PREFIX_LEN strlen("Total ticks count: ") + +enum timer_app_event { + TIMER_APP_STARTED, + TIMER_APP_RESULT, + TIMER_NO_EVENT, +}; + +FIXTURE(timer_f) { + struct snd_timer_uinfo *utimer_info; +}; + +FIXTURE_SETUP(timer_f) { + int timer_dev_fd; + + if (geteuid()) + SKIP(return, "This test needs root to run!"); + + self->utimer_info = calloc(1, sizeof(*self->utimer_info)); + ASSERT_NE(NULL, self->utimer_info); + + /* Resolution is the time the period of frames takes in nanoseconds */ + self->utimer_info->resolution = (NANO / FRAME_RATE * PERIOD_SIZE); + + timer_dev_fd = open("/dev/snd/timer", O_RDONLY); + ASSERT_GE(timer_dev_fd, 0); + + ASSERT_EQ(ioctl(timer_dev_fd, SNDRV_TIMER_IOCTL_CREATE, self->utimer_info), 0); + ASSERT_GE(self->utimer_info->fd, 0); + + close(timer_dev_fd); +} + +FIXTURE_TEARDOWN(timer_f) { + close(self->utimer_info->fd); + free(self->utimer_info); +} + +static void *ticking_func(void *data) +{ + int i; + int *fd = (int *)data; + + for (i = 0; i < TICKS_COUNT; i++) { + /* Well, trigger the timer! */ + ioctl(*fd, SNDRV_TIMER_IOCTL_TRIGGER, NULL); + sleep(TIMER_FREQ_SEC); + } + + return NULL; +} + +static enum timer_app_event parse_timer_output(const char *s) +{ + if (strstr(s, "Timer has started")) + return TIMER_APP_STARTED; + if (strstr(s, "Total ticks count")) + return TIMER_APP_RESULT; + + return TIMER_NO_EVENT; +} + +static int parse_timer_result(const char *s) +{ + char *end; + long d; + + d = strtol(s + RESULT_PREFIX_LEN, &end, 10); + if (end == s + RESULT_PREFIX_LEN) + return -1; + + return d; +} + +/* + * This test triggers the timer and counts ticks at the same time. The amount + * of the timer trigger calls should be equal to the amount of ticks received. + */ +TEST_F(timer_f, utimer) { + char command[64]; + pthread_t ticking_thread; + int total_ticks = 0; + FILE *rfp; + char *buf = malloc(TIMER_OUTPUT_BUF_LEN); + + ASSERT_NE(buf, NULL); + + /* The timeout should be the ticks interval * count of ticks + some delta */ + sprintf(command, "./global-timer %d %d %d", SNDRV_TIMER_GLOBAL_UDRIVEN, + self->utimer_info->id, TICKS_COUNT * TIMER_FREQ_SEC + TICKS_RECORDING_DELTA); + + rfp = popen(command, "r"); + while (fgets(buf, TIMER_OUTPUT_BUF_LEN, rfp)) { + buf[TIMER_OUTPUT_BUF_LEN - 1] = 0; + switch (parse_timer_output(buf)) { + case TIMER_APP_STARTED: + /* global-timer waits for timer to trigger, so start the ticking thread */ + pthread_create(&ticking_thread, NULL, ticking_func, + &self->utimer_info->fd); + break; + case TIMER_APP_RESULT: + total_ticks = parse_timer_result(buf); + break; + case TIMER_NO_EVENT: + break; + } + } + pthread_join(ticking_thread, NULL); + ASSERT_EQ(total_ticks, TICKS_COUNT); + pclose(rfp); +} + +TEST(wrong_timers_test) { + int timer_dev_fd; + int utimer_fd; + size_t i; + struct snd_timer_uinfo wrong_timer = { + .resolution = 0, + .id = UTIMER_DEFAULT_ID, + .fd = UTIMER_DEFAULT_FD, + }; + + timer_dev_fd = open("/dev/snd/timer", O_RDONLY); + ASSERT_GE(timer_dev_fd, 0); + + utimer_fd = ioctl(timer_dev_fd, SNDRV_TIMER_IOCTL_CREATE, &wrong_timer); + ASSERT_LT(utimer_fd, 0); + /* Check that id was not updated */ + ASSERT_EQ(wrong_timer.id, UTIMER_DEFAULT_ID); + + /* Test the NULL as an argument is processed correctly */ + ASSERT_LT(ioctl(timer_dev_fd, SNDRV_TIMER_IOCTL_CREATE, NULL), 0); + + close(timer_dev_fd); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index d8909b2b535a..f2d6007a2b98 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -156,6 +156,12 @@ static void pmull_sigill(void) asm volatile(".inst 0x0ee0e000" : : : ); } +static void poe_sigill(void) +{ + /* mrs x0, POR_EL0 */ + asm volatile("mrs x0, S3_3_C10_C2_4" : : : "x0"); +} + static void rng_sigill(void) { asm volatile("mrs x0, S3_3_C2_C4_0" : : : "x0"); @@ -602,6 +608,14 @@ static const struct hwcap_data { .sigill_fn = pmull_sigill, }, { + .name = "POE", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_POE, + .cpuinfo = "poe", + .sigill_fn = poe_sigill, + .sigill_reliable = true, + }, + { .name = "RNG", .at_hwcap = AT_HWCAP2, .hwcap_bit = HWCAP2_RNG, diff --git a/tools/testing/selftests/arm64/abi/ptrace.c b/tools/testing/selftests/arm64/abi/ptrace.c index e4fa507cbdd0..b51d21f78cf9 100644 --- a/tools/testing/selftests/arm64/abi/ptrace.c +++ b/tools/testing/selftests/arm64/abi/ptrace.c @@ -163,10 +163,10 @@ static void test_hw_debug(pid_t child, int type, const char *type_name) static int do_child(void) { if (ptrace(PTRACE_TRACEME, -1, NULL, NULL)) - ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno)); + ksft_exit_fail_perror("PTRACE_TRACEME"); if (raise(SIGSTOP)) - ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno)); + ksft_exit_fail_perror("raise(SIGSTOP)"); return EXIT_SUCCESS; } diff --git a/tools/testing/selftests/arm64/signal/.gitignore b/tools/testing/selftests/arm64/signal/.gitignore index 1ce5b5eac386..b2f2bfd5c6aa 100644 --- a/tools/testing/selftests/arm64/signal/.gitignore +++ b/tools/testing/selftests/arm64/signal/.gitignore @@ -2,6 +2,7 @@ mangle_* fake_sigreturn_* fpmr_* +poe_* sme_* ssve_* sve_* diff --git a/tools/testing/selftests/arm64/signal/Makefile b/tools/testing/selftests/arm64/signal/Makefile index 8f5febaf1a9a..edb3613513b8 100644 --- a/tools/testing/selftests/arm64/signal/Makefile +++ b/tools/testing/selftests/arm64/signal/Makefile @@ -23,7 +23,7 @@ $(TEST_GEN_PROGS): $(PROGS) # Common test-unit targets to build common-layout test-cases executables # Needs secondary expansion to properly include the testcase c-file in pre-reqs COMMON_SOURCES := test_signals.c test_signals_utils.c testcases/testcases.c \ - signals.S + signals.S sve_helpers.c COMMON_HEADERS := test_signals.h test_signals_utils.h testcases/testcases.h .SECONDEXPANSION: diff --git a/tools/testing/selftests/arm64/signal/sve_helpers.c b/tools/testing/selftests/arm64/signal/sve_helpers.c new file mode 100644 index 000000000000..0acc121af306 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/sve_helpers.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 ARM Limited + * + * Common helper functions for SVE and SME functionality. + */ + +#include <stdbool.h> +#include <kselftest.h> +#include <asm/sigcontext.h> +#include <sys/prctl.h> + +unsigned int vls[SVE_VQ_MAX]; +unsigned int nvls; + +int sve_fill_vls(bool use_sme, int min_vls) +{ + int vq, vl; + int pr_set_vl = use_sme ? PR_SME_SET_VL : PR_SVE_SET_VL; + int len_mask = use_sme ? PR_SME_VL_LEN_MASK : PR_SVE_VL_LEN_MASK; + + /* + * Enumerate up to SVE_VQ_MAX vector lengths + */ + for (vq = SVE_VQ_MAX; vq > 0; --vq) { + vl = prctl(pr_set_vl, vq * 16); + if (vl == -1) + return KSFT_FAIL; + + vl &= len_mask; + + /* + * Unlike SVE, SME does not require the minimum vector length + * to be implemented, or the VLs to be consecutive, so any call + * to the prctl might return the single implemented VL, which + * might be larger than 16. So to avoid this loop never + * terminating, bail out here when we find a higher VL than + * we asked for. + * See the ARM ARM, DDI 0487K.a, B1.4.2: I_QQRNR and I_NWYBP. + */ + if (vq < sve_vq_from_vl(vl)) + break; + + /* Skip missing VLs */ + vq = sve_vq_from_vl(vl); + + vls[nvls++] = vl; + } + + if (nvls < min_vls) { + fprintf(stderr, "Only %d VL supported\n", nvls); + return KSFT_SKIP; + } + + return KSFT_PASS; +} diff --git a/tools/testing/selftests/arm64/signal/sve_helpers.h b/tools/testing/selftests/arm64/signal/sve_helpers.h new file mode 100644 index 000000000000..50948ce471cc --- /dev/null +++ b/tools/testing/selftests/arm64/signal/sve_helpers.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2024 ARM Limited + * + * Common helper functions for SVE and SME functionality. + */ + +#ifndef __SVE_HELPERS_H__ +#define __SVE_HELPERS_H__ + +#include <stdbool.h> + +#define VLS_USE_SVE false +#define VLS_USE_SME true + +extern unsigned int vls[]; +extern unsigned int nvls; + +int sve_fill_vls(bool use_sme, int min_vls); + +#endif diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c index ebd5815b54bb..dfd6a2badf9f 100644 --- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c @@ -6,44 +6,28 @@ * handler, this is not supported and is expected to segfault. */ +#include <kselftest.h> #include <signal.h> #include <ucontext.h> #include <sys/prctl.h> #include "test_signals_utils.h" +#include "sve_helpers.h" #include "testcases.h" struct fake_sigframe sf; -static unsigned int vls[SVE_VQ_MAX]; -unsigned int nvls = 0; static bool sme_get_vls(struct tdescr *td) { - int vq, vl; + int res = sve_fill_vls(VLS_USE_SME, 2); - /* - * Enumerate up to SVE_VQ_MAX vector lengths - */ - for (vq = SVE_VQ_MAX; vq > 0; --vq) { - vl = prctl(PR_SVE_SET_VL, vq * 16); - if (vl == -1) - return false; + if (!res) + return true; - vl &= PR_SME_VL_LEN_MASK; + if (res == KSFT_SKIP) + td->result = KSFT_SKIP; - /* Skip missing VLs */ - vq = sve_vq_from_vl(vl); - - vls[nvls++] = vl; - } - - /* We need at least two VLs */ - if (nvls < 2) { - fprintf(stderr, "Only %d VL supported\n", nvls); - return false; - } - - return true; + return false; } static int fake_sigreturn_ssve_change_vl(struct tdescr *td, @@ -51,30 +35,30 @@ static int fake_sigreturn_ssve_change_vl(struct tdescr *td, { size_t resv_sz, offset; struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf); - struct sve_context *sve; + struct za_context *za; /* Get a signal context with a SME ZA frame in it */ if (!get_current_context(td, &sf.uc, sizeof(sf.uc))) return 1; resv_sz = GET_SF_RESV_SIZE(sf); - head = get_header(head, SVE_MAGIC, resv_sz, &offset); + head = get_header(head, ZA_MAGIC, resv_sz, &offset); if (!head) { - fprintf(stderr, "No SVE context\n"); + fprintf(stderr, "No ZA context\n"); return 1; } - if (head->size != sizeof(struct sve_context)) { + if (head->size != sizeof(struct za_context)) { fprintf(stderr, "Register data present, aborting\n"); return 1; } - sve = (struct sve_context *)head; + za = (struct za_context *)head; /* No changes are supported; init left us at minimum VL so go to max */ fprintf(stderr, "Attempting to change VL from %d to %d\n", - sve->vl, vls[0]); - sve->vl = vls[0]; + za->vl, vls[0]); + za->vl = vls[0]; fake_sigreturn(&sf, sizeof(sf), 0); diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c index e2a452190511..e1ccf8f85a70 100644 --- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c @@ -12,40 +12,22 @@ #include <sys/prctl.h> #include "test_signals_utils.h" +#include "sve_helpers.h" #include "testcases.h" struct fake_sigframe sf; -static unsigned int vls[SVE_VQ_MAX]; -unsigned int nvls = 0; static bool sve_get_vls(struct tdescr *td) { - int vq, vl; + int res = sve_fill_vls(VLS_USE_SVE, 2); - /* - * Enumerate up to SVE_VQ_MAX vector lengths - */ - for (vq = SVE_VQ_MAX; vq > 0; --vq) { - vl = prctl(PR_SVE_SET_VL, vq * 16); - if (vl == -1) - return false; + if (!res) + return true; - vl &= PR_SVE_VL_LEN_MASK; - - /* Skip missing VLs */ - vq = sve_vq_from_vl(vl); - - vls[nvls++] = vl; - } - - /* We need at least two VLs */ - if (nvls < 2) { - fprintf(stderr, "Only %d VL supported\n", nvls); + if (res == KSFT_SKIP) td->result = KSFT_SKIP; - return false; - } - return true; + return false; } static int fake_sigreturn_sve_change_vl(struct tdescr *td, diff --git a/tools/testing/selftests/arm64/signal/testcases/poe_siginfo.c b/tools/testing/selftests/arm64/signal/testcases/poe_siginfo.c new file mode 100644 index 000000000000..36bd9940ee05 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/poe_siginfo.c @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023 Arm Limited + * + * Verify that the POR_EL0 register context in signal frames is set up as + * expected. + */ + +#include <signal.h> +#include <ucontext.h> +#include <sys/auxv.h> +#include <sys/prctl.h> +#include <unistd.h> +#include <asm/sigcontext.h> + +#include "test_signals_utils.h" +#include "testcases.h" + +static union { + ucontext_t uc; + char buf[1024 * 128]; +} context; + +#define SYS_POR_EL0 "S3_3_C10_C2_4" + +static uint64_t get_por_el0(void) +{ + uint64_t val; + + asm volatile( + "mrs %0, " SYS_POR_EL0 "\n" + : "=r"(val) + : + : ); + + return val; +} + +int poe_present(struct tdescr *td, siginfo_t *si, ucontext_t *uc) +{ + struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context); + struct poe_context *poe_ctx; + size_t offset; + bool in_sigframe; + bool have_poe; + __u64 orig_poe; + + have_poe = getauxval(AT_HWCAP2) & HWCAP2_POE; + if (have_poe) + orig_poe = get_por_el0(); + + if (!get_current_context(td, &context.uc, sizeof(context))) + return 1; + + poe_ctx = (struct poe_context *) + get_header(head, POE_MAGIC, td->live_sz, &offset); + + in_sigframe = poe_ctx != NULL; + + fprintf(stderr, "POR_EL0 sigframe %s on system %s POE\n", + in_sigframe ? "present" : "absent", + have_poe ? "with" : "without"); + + td->pass = (in_sigframe == have_poe); + + /* + * Check that the value we read back was the one present at + * the time that the signal was triggered. + */ + if (have_poe && poe_ctx) { + if (poe_ctx->por_el0 != orig_poe) { + fprintf(stderr, "POR_EL0 in frame is %llx, was %llx\n", + poe_ctx->por_el0, orig_poe); + td->pass = false; + } + } + + return 0; +} + +struct tdescr tde = { + .name = "POR_EL0", + .descr = "Validate that POR_EL0 is present as expected", + .timeout = 3, + .run = poe_present, +}; diff --git a/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c b/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c index 3d37daafcff5..6dbe48cf8b09 100644 --- a/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c +++ b/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c @@ -6,51 +6,31 @@ * set up as expected. */ +#include <kselftest.h> #include <signal.h> #include <ucontext.h> #include <sys/prctl.h> #include "test_signals_utils.h" +#include "sve_helpers.h" #include "testcases.h" static union { ucontext_t uc; char buf[1024 * 64]; } context; -static unsigned int vls[SVE_VQ_MAX]; -unsigned int nvls = 0; static bool sme_get_vls(struct tdescr *td) { - int vq, vl; + int res = sve_fill_vls(VLS_USE_SME, 1); - /* - * Enumerate up to SVE_VQ_MAX vector lengths - */ - for (vq = SVE_VQ_MAX; vq > 0; --vq) { - vl = prctl(PR_SME_SET_VL, vq * 16); - if (vl == -1) - return false; - - vl &= PR_SME_VL_LEN_MASK; - - /* Did we find the lowest supported VL? */ - if (vq < sve_vq_from_vl(vl)) - break; + if (!res) + return true; - /* Skip missing VLs */ - vq = sve_vq_from_vl(vl); - - vls[nvls++] = vl; - } - - /* We need at least one VL */ - if (nvls < 1) { - fprintf(stderr, "Only %d VL supported\n", nvls); - return false; - } + if (res == KSFT_SKIP) + td->result = KSFT_SKIP; - return true; + return false; } static void setup_ssve_regs(void) diff --git a/tools/testing/selftests/arm64/signal/testcases/ssve_za_regs.c b/tools/testing/selftests/arm64/signal/testcases/ssve_za_regs.c index 9dc5f128bbc0..5557e116e973 100644 --- a/tools/testing/selftests/arm64/signal/testcases/ssve_za_regs.c +++ b/tools/testing/selftests/arm64/signal/testcases/ssve_za_regs.c @@ -6,51 +6,31 @@ * signal frames is set up as expected when enabled simultaneously. */ +#include <kselftest.h> #include <signal.h> #include <ucontext.h> #include <sys/prctl.h> #include "test_signals_utils.h" +#include "sve_helpers.h" #include "testcases.h" static union { ucontext_t uc; char buf[1024 * 128]; } context; -static unsigned int vls[SVE_VQ_MAX]; -unsigned int nvls = 0; static bool sme_get_vls(struct tdescr *td) { - int vq, vl; + int res = sve_fill_vls(VLS_USE_SME, 1); - /* - * Enumerate up to SVE_VQ_MAX vector lengths - */ - for (vq = SVE_VQ_MAX; vq > 0; --vq) { - vl = prctl(PR_SME_SET_VL, vq * 16); - if (vl == -1) - return false; - - vl &= PR_SME_VL_LEN_MASK; - - /* Did we find the lowest supported VL? */ - if (vq < sve_vq_from_vl(vl)) - break; + if (!res) + return true; - /* Skip missing VLs */ - vq = sve_vq_from_vl(vl); - - vls[nvls++] = vl; - } - - /* We need at least one VL */ - if (nvls < 1) { - fprintf(stderr, "Only %d VL supported\n", nvls); - return false; - } + if (res == KSFT_SKIP) + td->result = KSFT_SKIP; - return true; + return false; } static void setup_regs(void) diff --git a/tools/testing/selftests/arm64/signal/testcases/sve_regs.c b/tools/testing/selftests/arm64/signal/testcases/sve_regs.c index 8b16eabbb769..8143eb1c58c1 100644 --- a/tools/testing/selftests/arm64/signal/testcases/sve_regs.c +++ b/tools/testing/selftests/arm64/signal/testcases/sve_regs.c @@ -6,47 +6,31 @@ * expected. */ +#include <kselftest.h> #include <signal.h> #include <ucontext.h> #include <sys/prctl.h> #include "test_signals_utils.h" +#include "sve_helpers.h" #include "testcases.h" static union { ucontext_t uc; char buf[1024 * 64]; } context; -static unsigned int vls[SVE_VQ_MAX]; -unsigned int nvls = 0; static bool sve_get_vls(struct tdescr *td) { - int vq, vl; + int res = sve_fill_vls(VLS_USE_SVE, 1); - /* - * Enumerate up to SVE_VQ_MAX vector lengths - */ - for (vq = SVE_VQ_MAX; vq > 0; --vq) { - vl = prctl(PR_SVE_SET_VL, vq * 16); - if (vl == -1) - return false; - - vl &= PR_SVE_VL_LEN_MASK; - - /* Skip missing VLs */ - vq = sve_vq_from_vl(vl); + if (!res) + return true; - vls[nvls++] = vl; - } - - /* We need at least one VL */ - if (nvls < 1) { - fprintf(stderr, "Only %d VL supported\n", nvls); - return false; - } + if (res == KSFT_SKIP) + td->result = KSFT_SKIP; - return true; + return false; } static void setup_sve_regs(void) diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.c b/tools/testing/selftests/arm64/signal/testcases/testcases.c index 674b88cc8c39..e6daa94fcd2e 100644 --- a/tools/testing/selftests/arm64/signal/testcases/testcases.c +++ b/tools/testing/selftests/arm64/signal/testcases/testcases.c @@ -6,29 +6,6 @@ #include "testcases.h" -struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic, - size_t resv_sz, size_t *offset) -{ - size_t offs = 0; - struct _aarch64_ctx *found = NULL; - - if (!head || resv_sz < HDR_SZ) - return found; - - while (offs <= resv_sz - HDR_SZ && - head->magic != magic && head->magic) { - offs += head->size; - head = GET_RESV_NEXT_HEAD(head); - } - if (head->magic == magic) { - found = head; - if (offset) - *offset = offs; - } - - return found; -} - bool validate_extra_context(struct extra_context *extra, char **err, void **extra_data, size_t *extra_size) { @@ -184,6 +161,10 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err) if (head->size != sizeof(struct esr_context)) *err = "Bad size for esr_context"; break; + case POE_MAGIC: + if (head->size != sizeof(struct poe_context)) + *err = "Bad size for poe_context"; + break; case TPIDR2_MAGIC: if (head->size != sizeof(struct tpidr2_context)) *err = "Bad size for tpidr2_context"; diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.h b/tools/testing/selftests/arm64/signal/testcases/testcases.h index 7727126347e0..9872b8912714 100644 --- a/tools/testing/selftests/arm64/signal/testcases/testcases.h +++ b/tools/testing/selftests/arm64/signal/testcases/testcases.h @@ -26,6 +26,9 @@ #define HDR_SZ \ sizeof(struct _aarch64_ctx) +#define GET_UC_RESV_HEAD(uc) \ + (struct _aarch64_ctx *)(&(uc->uc_mcontext.__reserved)) + #define GET_SF_RESV_HEAD(sf) \ (struct _aarch64_ctx *)(&(sf).uc.uc_mcontext.__reserved) @@ -88,8 +91,29 @@ struct fake_sigframe { bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err); -struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic, - size_t resv_sz, size_t *offset); +static inline struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic, + size_t resv_sz, size_t *offset) +{ + size_t offs = 0; + struct _aarch64_ctx *found = NULL; + + if (!head || resv_sz < HDR_SZ) + return found; + + while (offs <= resv_sz - HDR_SZ && + head->magic != magic && head->magic) { + offs += head->size; + head = GET_RESV_NEXT_HEAD(head); + } + if (head->magic == magic) { + found = head; + if (offset) + *offset = offs; + } + + return found; +} + static inline struct _aarch64_ctx *get_terminator(struct _aarch64_ctx *head, size_t resv_sz, diff --git a/tools/testing/selftests/arm64/signal/testcases/za_no_regs.c b/tools/testing/selftests/arm64/signal/testcases/za_no_regs.c index 4d6f94b6178f..ce26e9c2fa5e 100644 --- a/tools/testing/selftests/arm64/signal/testcases/za_no_regs.c +++ b/tools/testing/selftests/arm64/signal/testcases/za_no_regs.c @@ -6,47 +6,31 @@ * expected. */ +#include <kselftest.h> #include <signal.h> #include <ucontext.h> #include <sys/prctl.h> #include "test_signals_utils.h" +#include "sve_helpers.h" #include "testcases.h" static union { ucontext_t uc; char buf[1024 * 128]; } context; -static unsigned int vls[SVE_VQ_MAX]; -unsigned int nvls = 0; static bool sme_get_vls(struct tdescr *td) { - int vq, vl; + int res = sve_fill_vls(VLS_USE_SME, 1); - /* - * Enumerate up to SME_VQ_MAX vector lengths - */ - for (vq = SVE_VQ_MAX; vq > 0; --vq) { - vl = prctl(PR_SME_SET_VL, vq * 16); - if (vl == -1) - return false; - - vl &= PR_SME_VL_LEN_MASK; - - /* Skip missing VLs */ - vq = sve_vq_from_vl(vl); + if (!res) + return true; - vls[nvls++] = vl; - } - - /* We need at least one VL */ - if (nvls < 1) { - fprintf(stderr, "Only %d VL supported\n", nvls); - return false; - } + if (res == KSFT_SKIP) + td->result = KSFT_SKIP; - return true; + return false; } static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc, diff --git a/tools/testing/selftests/arm64/signal/testcases/za_regs.c b/tools/testing/selftests/arm64/signal/testcases/za_regs.c index 174ad6656696..b9e13f27f1f9 100644 --- a/tools/testing/selftests/arm64/signal/testcases/za_regs.c +++ b/tools/testing/selftests/arm64/signal/testcases/za_regs.c @@ -6,51 +6,31 @@ * expected. */ +#include <kselftest.h> #include <signal.h> #include <ucontext.h> #include <sys/prctl.h> #include "test_signals_utils.h" +#include "sve_helpers.h" #include "testcases.h" static union { ucontext_t uc; char buf[1024 * 128]; } context; -static unsigned int vls[SVE_VQ_MAX]; -unsigned int nvls = 0; static bool sme_get_vls(struct tdescr *td) { - int vq, vl; + int res = sve_fill_vls(VLS_USE_SME, 1); - /* - * Enumerate up to SME_VQ_MAX vector lengths - */ - for (vq = SVE_VQ_MAX; vq > 0; --vq) { - vl = prctl(PR_SME_SET_VL, vq * 16); - if (vl == -1) - return false; - - vl &= PR_SME_VL_LEN_MASK; - - /* Did we find the lowest supported VL? */ - if (vq < sve_vq_from_vl(vl)) - break; + if (!res) + return true; - /* Skip missing VLs */ - vq = sve_vq_from_vl(vl); - - vls[nvls++] = vl; - } - - /* We need at least one VL */ - if (nvls < 1) { - fprintf(stderr, "Only %d VL supported\n", nvls); - return false; - } + if (res == KSFT_SKIP) + td->result = KSFT_SKIP; - return true; + return false; } static void setup_za_regs(void) diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h index 11ee801e75e7..6c3b4d4f173a 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h @@ -34,6 +34,12 @@ DECLARE_TRACE(bpf_testmod_test_write_bare, TP_ARGS(task, ctx) ); +/* Used in bpf_testmod_test_read() to test __nullable suffix */ +DECLARE_TRACE(bpf_testmod_test_nullable_bare, + TP_PROTO(struct bpf_testmod_test_read_ctx *ctx__nullable), + TP_ARGS(ctx__nullable) +); + #undef BPF_TESTMOD_DECLARE_TRACE #ifdef DECLARE_TRACE_WRITABLE #define BPF_TESTMOD_DECLARE_TRACE(call, proto, args, size) \ diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index c73d04bc9e9d..8835761d9a12 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -394,6 +394,8 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj, if (bpf_testmod_loop_test(101) > 100) trace_bpf_testmod_test_read(current, &ctx); + trace_bpf_testmod_test_nullable_bare(NULL); + /* Magic number to enable writable tp */ if (len == 64) { struct bpf_testmod_test_writable_ctx writable = { @@ -470,7 +472,7 @@ uprobe_ret_handler(struct uprobe_consumer *self, unsigned long func, struct testmod_uprobe { struct path path; - loff_t offset; + struct uprobe *uprobe; struct uprobe_consumer consumer; }; @@ -484,25 +486,25 @@ static int testmod_register_uprobe(loff_t offset) { int err = -EBUSY; - if (uprobe.offset) + if (uprobe.uprobe) return -EBUSY; mutex_lock(&testmod_uprobe_mutex); - if (uprobe.offset) + if (uprobe.uprobe) goto out; err = kern_path("/proc/self/exe", LOOKUP_FOLLOW, &uprobe.path); if (err) goto out; - err = uprobe_register_refctr(d_real_inode(uprobe.path.dentry), - offset, 0, &uprobe.consumer); - if (err) + uprobe.uprobe = uprobe_register(d_real_inode(uprobe.path.dentry), + offset, 0, &uprobe.consumer); + if (IS_ERR(uprobe.uprobe)) { + err = PTR_ERR(uprobe.uprobe); path_put(&uprobe.path); - else - uprobe.offset = offset; - + uprobe.uprobe = NULL; + } out: mutex_unlock(&testmod_uprobe_mutex); return err; @@ -512,10 +514,11 @@ static void testmod_unregister_uprobe(void) { mutex_lock(&testmod_uprobe_mutex); - if (uprobe.offset) { - uprobe_unregister(d_real_inode(uprobe.path.dentry), - uprobe.offset, &uprobe.consumer); - uprobe.offset = 0; + if (uprobe.uprobe) { + uprobe_unregister_nosync(uprobe.uprobe, &uprobe.consumer); + uprobe_unregister_sync(); + path_put(&uprobe.path); + uprobe.uprobe = NULL; } mutex_unlock(&testmod_uprobe_mutex); diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 7eafcf91b02e..e63d74ce046f 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -3551,6 +3551,40 @@ static struct btf_raw_test raw_tests[] = { BTF_STR_SEC("\0x\0?.foo bar:buz"), }, { + .descr = "datasec: name with non-printable first char not is ok", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* VAR x */ /* [2] */ + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), + BTF_VAR_STATIC, + /* DATASEC ?.data */ /* [3] */ + BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(2, 0, 4), + BTF_END_RAW, + }, + BTF_STR_SEC("\0x\0\7foo"), + .err_str = "Invalid name", + .btf_load_err = true, +}, +{ + .descr = "datasec: name '\\0' is not ok", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* VAR x */ /* [2] */ + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), + BTF_VAR_STATIC, + /* DATASEC \0 */ /* [3] */ + BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(2, 0, 4), + BTF_END_RAW, + }, + BTF_STR_SEC("\0x\0"), + .err_str = "Invalid name", + .btf_load_err = true, +}, +{ .descr = "type name '?foo' is not ok", .raw_types = { /* union ?foo; */ diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c index 7cfac53c0d58..b614a5272dfd 100644 --- a/tools/testing/selftests/bpf/prog_tests/dynptr.c +++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c @@ -9,6 +9,7 @@ enum test_setup_type { SETUP_SYSCALL_SLEEP, SETUP_SKB_PROG, + SETUP_SKB_PROG_TP, }; static struct { @@ -28,6 +29,7 @@ static struct { {"test_dynptr_clone", SETUP_SKB_PROG}, {"test_dynptr_skb_no_buff", SETUP_SKB_PROG}, {"test_dynptr_skb_strcmp", SETUP_SKB_PROG}, + {"test_dynptr_skb_tp_btf", SETUP_SKB_PROG_TP}, }; static void verify_success(const char *prog_name, enum test_setup_type setup_type) @@ -35,7 +37,7 @@ static void verify_success(const char *prog_name, enum test_setup_type setup_typ struct dynptr_success *skel; struct bpf_program *prog; struct bpf_link *link; - int err; + int err; skel = dynptr_success__open(); if (!ASSERT_OK_PTR(skel, "dynptr_success__open")) @@ -47,7 +49,7 @@ static void verify_success(const char *prog_name, enum test_setup_type setup_typ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) goto cleanup; - bpf_program__set_autoload(prog, true); + bpf_program__set_autoload(prog, true); err = dynptr_success__load(skel); if (!ASSERT_OK(err, "dynptr_success__load")) @@ -87,6 +89,37 @@ static void verify_success(const char *prog_name, enum test_setup_type setup_typ break; } + case SETUP_SKB_PROG_TP: + { + struct __sk_buff skb = {}; + struct bpf_object *obj; + int aux_prog_fd; + + /* Just use its test_run to trigger kfree_skb tracepoint */ + err = bpf_prog_test_load("./test_pkt_access.bpf.o", BPF_PROG_TYPE_SCHED_CLS, + &obj, &aux_prog_fd); + if (!ASSERT_OK(err, "prog_load sched cls")) + goto cleanup; + + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .ctx_in = &skb, + .ctx_size_in = sizeof(skb), + ); + + link = bpf_program__attach(prog); + if (!ASSERT_OK_PTR(link, "bpf_program__attach")) + goto cleanup; + + err = bpf_prog_test_run_opts(aux_prog_fd, &topts); + bpf_link__destroy(link); + + if (!ASSERT_OK(err, "test_run")) + goto cleanup; + + break; + } } ASSERT_EQ(skel->bss->err, 0, "err"); diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c index 3171047414a7..cfcc90cb7ffb 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c @@ -378,8 +378,8 @@ struct test tests[] = { .iph_inner.ihl = 5, .iph_inner.protocol = IPPROTO_TCP, .iph_inner.tot_len = - __bpf_constant_htons(MAGIC_BYTES) - - sizeof(struct iphdr), + __bpf_constant_htons(MAGIC_BYTES - + sizeof(struct iphdr)), .tcp.doff = 5, .tcp.source = 80, .tcp.dest = 8080, @@ -407,8 +407,8 @@ struct test tests[] = { .iph_inner.ihl = 5, .iph_inner.protocol = IPPROTO_TCP, .iph_inner.tot_len = - __bpf_constant_htons(MAGIC_BYTES) - - sizeof(struct iphdr), + __bpf_constant_htons(MAGIC_BYTES - + sizeof(struct iphdr)), .tcp.doff = 5, .tcp.source = 80, .tcp.dest = 8080, @@ -436,8 +436,8 @@ struct test tests[] = { .iph_inner.ihl = 5, .iph_inner.protocol = IPPROTO_TCP, .iph_inner.tot_len = - __bpf_constant_htons(MAGIC_BYTES) - - sizeof(struct iphdr), + __bpf_constant_htons(MAGIC_BYTES - + sizeof(struct iphdr)), .tcp.doff = 5, .tcp.source = 99, .tcp.dest = 9090, diff --git a/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c b/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c index 7d4a9b3d3722..e12255121c15 100644 --- a/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c +++ b/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c @@ -154,6 +154,51 @@ err_out: close(sfd); } +static void test_nonstandard_opt(int family) +{ + struct setget_sockopt__bss *bss = skel->bss; + struct bpf_link *getsockopt_link = NULL; + int sfd = -1, fd = -1, cfd = -1, flags; + socklen_t flagslen = sizeof(flags); + + memset(bss, 0, sizeof(*bss)); + + sfd = start_server(family, SOCK_STREAM, + family == AF_INET6 ? addr6_str : addr4_str, 0, 0); + if (!ASSERT_GE(sfd, 0, "start_server")) + return; + + fd = connect_to_fd(sfd, 0); + if (!ASSERT_GE(fd, 0, "connect_to_fd_server")) + goto err_out; + + /* cgroup/getsockopt prog will intercept getsockopt() below and + * retrieve the tcp socket bpf_sock_ops_cb_flags value for the + * accept()ed socket; this was set earlier in the passive established + * callback for the accept()ed socket via bpf_setsockopt(). + */ + getsockopt_link = bpf_program__attach_cgroup(skel->progs._getsockopt, cg_fd); + if (!ASSERT_OK_PTR(getsockopt_link, "getsockopt prog")) + goto err_out; + + cfd = accept(sfd, NULL, 0); + if (!ASSERT_GE(cfd, 0, "accept")) + goto err_out; + + if (!ASSERT_OK(getsockopt(cfd, SOL_TCP, TCP_BPF_SOCK_OPS_CB_FLAGS, &flags, &flagslen), + "getsockopt_flags")) + goto err_out; + ASSERT_EQ(flags & BPF_SOCK_OPS_STATE_CB_FLAG, BPF_SOCK_OPS_STATE_CB_FLAG, + "cb_flags_set"); +err_out: + close(sfd); + if (fd != -1) + close(fd); + if (cfd != -1) + close(cfd); + bpf_link__destroy(getsockopt_link); +} + void test_setget_sockopt(void) { cg_fd = test__join_cgroup(CG_NAME); @@ -191,6 +236,8 @@ void test_setget_sockopt(void) test_udp(AF_INET); test_ktls(AF_INET6); test_ktls(AF_INET); + test_nonstandard_opt(AF_INET); + test_nonstandard_opt(AF_INET6); done: setget_sockopt__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 1337153eb0ad..82bfb266741c 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -451,11 +451,11 @@ out: #define MAX_EVENTS 10 static void test_sockmap_skb_verdict_shutdown(void) { + int n, err, map, verdict, c1 = -1, p1 = -1; struct epoll_event ev, events[MAX_EVENTS]; - int n, err, map, verdict, s, c1 = -1, p1 = -1; struct test_sockmap_pass_prog *skel; - int epollfd; int zero = 0; + int epollfd; char b; skel = test_sockmap_pass_prog__open_and_load(); @@ -469,10 +469,7 @@ static void test_sockmap_skb_verdict_shutdown(void) if (!ASSERT_OK(err, "bpf_prog_attach")) goto out; - s = socket_loopback(AF_INET, SOCK_STREAM); - if (s < 0) - goto out; - err = create_pair(s, AF_INET, SOCK_STREAM, &c1, &p1); + err = create_pair(AF_INET, SOCK_STREAM, &c1, &p1); if (err < 0) goto out; @@ -506,8 +503,8 @@ out: static void test_sockmap_skb_verdict_fionread(bool pass_prog) { + int err, map, verdict, c0 = -1, c1 = -1, p0 = -1, p1 = -1; int expected, zero = 0, sent, recvd, avail; - int err, map, verdict, s, c0 = -1, c1 = -1, p0 = -1, p1 = -1; struct test_sockmap_pass_prog *pass = NULL; struct test_sockmap_drop_prog *drop = NULL; char buf[256] = "0123456789"; @@ -534,11 +531,8 @@ static void test_sockmap_skb_verdict_fionread(bool pass_prog) if (!ASSERT_OK(err, "bpf_prog_attach")) goto out; - s = socket_loopback(AF_INET, SOCK_STREAM); - if (!ASSERT_GT(s, -1, "socket_loopback(s)")) - goto out; - err = create_socket_pairs(s, AF_INET, SOCK_STREAM, &c0, &c1, &p0, &p1); - if (!ASSERT_OK(err, "create_socket_pairs(s)")) + err = create_socket_pairs(AF_INET, SOCK_STREAM, &c0, &c1, &p0, &p1); + if (!ASSERT_OK(err, "create_socket_pairs()")) goto out; err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST); @@ -570,16 +564,12 @@ out: static void test_sockmap_skb_verdict_peek_helper(int map) { - int err, s, c1, p1, zero = 0, sent, recvd, avail; + int err, c1, p1, zero = 0, sent, recvd, avail; char snd[256] = "0123456789"; char rcv[256] = "0"; - s = socket_loopback(AF_INET, SOCK_STREAM); - if (!ASSERT_GT(s, -1, "socket_loopback(s)")) - return; - - err = create_pair(s, AF_INET, SOCK_STREAM, &c1, &p1); - if (!ASSERT_OK(err, "create_pairs(s)")) + err = create_pair(AF_INET, SOCK_STREAM, &c1, &p1); + if (!ASSERT_OK(err, "create_pair()")) return; err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST); diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h index e880f97bc44d..38e35c72bdaa 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h @@ -3,6 +3,9 @@ #include <linux/vm_sockets.h> +/* include/linux/net.h */ +#define SOCK_TYPE_MASK 0xf + #define IO_TIMEOUT_SEC 30 #define MAX_STRERR_LEN 256 #define MAX_TEST_NAME 80 @@ -14,6 +17,17 @@ #define __always_unused __attribute__((__unused__)) +/* include/linux/cleanup.h */ +#define __get_and_null(p, nullvalue) \ + ({ \ + __auto_type __ptr = &(p); \ + __auto_type __val = *__ptr; \ + *__ptr = nullvalue; \ + __val; \ + }) + +#define take_fd(fd) __get_and_null(fd, -EBADF) + #define _FAIL(errnum, fmt...) \ ({ \ error_at_line(0, (errnum), __func__, __LINE__, fmt); \ @@ -179,6 +193,14 @@ __ret; \ }) +static inline void close_fd(int *fd) +{ + if (*fd >= 0) + xclose(*fd); +} + +#define __close_fd __attribute__((cleanup(close_fd))) + static inline int poll_connect(int fd, unsigned int timeout_sec) { struct timeval timeout = { .tv_sec = timeout_sec }; @@ -312,54 +334,6 @@ static inline int add_to_sockmap(int sock_mapfd, int fd1, int fd2) return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); } -static inline int create_pair(int s, int family, int sotype, int *c, int *p) -{ - struct sockaddr_storage addr; - socklen_t len; - int err = 0; - - len = sizeof(addr); - err = xgetsockname(s, sockaddr(&addr), &len); - if (err) - return err; - - *c = xsocket(family, sotype, 0); - if (*c < 0) - return errno; - err = xconnect(*c, sockaddr(&addr), len); - if (err) { - err = errno; - goto close_cli0; - } - - *p = xaccept_nonblock(s, NULL, NULL); - if (*p < 0) { - err = errno; - goto close_cli0; - } - return err; -close_cli0: - close(*c); - return err; -} - -static inline int create_socket_pairs(int s, int family, int sotype, - int *c0, int *c1, int *p0, int *p1) -{ - int err; - - err = create_pair(s, family, sotype, c0, p0); - if (err) - return err; - - err = create_pair(s, family, sotype, c1, p1); - if (err) { - close(*c0); - close(*p0); - } - return err; -} - static inline int enable_reuseport(int s, int progfd) { int err, one = 1; @@ -412,5 +386,84 @@ static inline int socket_loopback(int family, int sotype) return socket_loopback_reuseport(family, sotype, -1); } +static inline int create_pair(int family, int sotype, int *p0, int *p1) +{ + __close_fd int s, c = -1, p = -1; + struct sockaddr_storage addr; + socklen_t len = sizeof(addr); + int err; + + s = socket_loopback(family, sotype); + if (s < 0) + return s; + + err = xgetsockname(s, sockaddr(&addr), &len); + if (err) + return err; + + c = xsocket(family, sotype, 0); + if (c < 0) + return c; + + err = connect(c, sockaddr(&addr), len); + if (err) { + if (errno != EINPROGRESS) { + FAIL_ERRNO("connect"); + return err; + } + + err = poll_connect(c, IO_TIMEOUT_SEC); + if (err) { + FAIL_ERRNO("poll_connect"); + return err; + } + } + + switch (sotype & SOCK_TYPE_MASK) { + case SOCK_DGRAM: + err = xgetsockname(c, sockaddr(&addr), &len); + if (err) + return err; + + err = xconnect(s, sockaddr(&addr), len); + if (err) + return err; + + *p0 = take_fd(s); + break; + case SOCK_STREAM: + case SOCK_SEQPACKET: + p = xaccept_nonblock(s, NULL, NULL); + if (p < 0) + return p; + + *p0 = take_fd(p); + break; + default: + FAIL("Unsupported socket type %#x", sotype); + return -EOPNOTSUPP; + } + + *p1 = take_fd(c); + return 0; +} + +static inline int create_socket_pairs(int family, int sotype, int *c0, int *c1, + int *p0, int *p1) +{ + int err; + + err = create_pair(family, sotype, c0, p0); + if (err) + return err; + + err = create_pair(family, sotype, c1, p1); + if (err) { + close(*c0); + close(*p0); + } + + return err; +} #endif // __SOCKMAP_HELPERS__ diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index ebb5f6336052..4ee1148d22be 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -677,7 +677,7 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd, int verd_mapfd, enum redir_mode mode) { const char *log_prefix = redir_mode_str(mode); - int s, c0, c1, p0, p1; + int c0, c1, p0, p1; unsigned int pass; int err, n; u32 key; @@ -685,13 +685,10 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd, zero_verdict_count(verd_mapfd); - s = socket_loopback(family, sotype | SOCK_NONBLOCK); - if (s < 0) - return; - - err = create_socket_pairs(s, family, sotype, &c0, &c1, &p0, &p1); + err = create_socket_pairs(family, sotype | SOCK_NONBLOCK, &c0, &c1, + &p0, &p1); if (err) - goto close_srv; + return; err = add_to_sockmap(sock_mapfd, p0, p1); if (err) @@ -722,8 +719,6 @@ close: xclose(c1); xclose(p0); xclose(c0); -close_srv: - xclose(s); } static void test_skb_redir_to_connected(struct test_sockmap_listen *skel, @@ -909,7 +904,7 @@ static void test_msg_redir_to_listening_with_link(struct test_sockmap_listen *sk static void redir_partial(int family, int sotype, int sock_map, int parser_map) { - int s, c0 = -1, c1 = -1, p0 = -1, p1 = -1; + int c0 = -1, c1 = -1, p0 = -1, p1 = -1; int err, n, key, value; char buf[] = "abc"; @@ -919,13 +914,10 @@ static void redir_partial(int family, int sotype, int sock_map, int parser_map) if (err) return; - s = socket_loopback(family, sotype | SOCK_NONBLOCK); - if (s < 0) - goto clean_parser_map; - - err = create_socket_pairs(s, family, sotype, &c0, &c1, &p0, &p1); + err = create_socket_pairs(family, sotype | SOCK_NONBLOCK, &c0, &c1, + &p0, &p1); if (err) - goto close_srv; + goto clean_parser_map; err = add_to_sockmap(sock_map, p0, p1); if (err) @@ -944,8 +936,6 @@ close: xclose(p0); xclose(c1); xclose(p1); -close_srv: - xclose(s); clean_parser_map: key = 0; @@ -1500,49 +1490,7 @@ static void test_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *ma /* Returns two connected loopback vsock sockets */ static int vsock_socketpair_connectible(int sotype, int *v0, int *v1) { - struct sockaddr_storage addr; - socklen_t len = sizeof(addr); - int s, p, c; - - s = socket_loopback(AF_VSOCK, sotype); - if (s < 0) - return -1; - - c = xsocket(AF_VSOCK, sotype | SOCK_NONBLOCK, 0); - if (c == -1) - goto close_srv; - - if (getsockname(s, sockaddr(&addr), &len) < 0) - goto close_cli; - - if (connect(c, sockaddr(&addr), len) < 0 && errno != EINPROGRESS) { - FAIL_ERRNO("connect"); - goto close_cli; - } - - len = sizeof(addr); - p = accept_timeout(s, sockaddr(&addr), &len, IO_TIMEOUT_SEC); - if (p < 0) - goto close_cli; - - if (poll_connect(c, IO_TIMEOUT_SEC) < 0) { - FAIL_ERRNO("poll_connect"); - goto close_acc; - } - - *v0 = p; - *v1 = c; - - return 0; - -close_acc: - close(p); -close_cli: - close(c); -close_srv: - close(s); - - return -1; + return create_pair(AF_VSOCK, sotype | SOCK_NONBLOCK, v0, v1); } static void vsock_unix_redir_connectible(int sock_mapfd, int verd_mapfd, @@ -1691,44 +1639,7 @@ static void test_reuseport(struct test_sockmap_listen *skel, static int inet_socketpair(int family, int type, int *s, int *c) { - struct sockaddr_storage addr; - socklen_t len; - int p0, c0; - int err; - - p0 = socket_loopback(family, type | SOCK_NONBLOCK); - if (p0 < 0) - return p0; - - len = sizeof(addr); - err = xgetsockname(p0, sockaddr(&addr), &len); - if (err) - goto close_peer0; - - c0 = xsocket(family, type | SOCK_NONBLOCK, 0); - if (c0 < 0) { - err = c0; - goto close_peer0; - } - err = xconnect(c0, sockaddr(&addr), len); - if (err) - goto close_cli0; - err = xgetsockname(c0, sockaddr(&addr), &len); - if (err) - goto close_cli0; - err = xconnect(p0, sockaddr(&addr), len); - if (err) - goto close_cli0; - - *s = p0; - *c = c0; - return 0; - -close_cli0: - xclose(c0); -close_peer0: - xclose(p0); - return err; + return create_pair(family, type | SOCK_NONBLOCK, s, c); } static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd, @@ -1795,11 +1706,11 @@ static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd, int sfd[2]; int err; - if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd)) + if (socketpair(AF_UNIX, type | SOCK_NONBLOCK, 0, sfd)) return; c0 = sfd[0], p0 = sfd[1]; - err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1); + err = inet_socketpair(family, type, &p1, &c1); if (err) goto close; @@ -1847,7 +1758,7 @@ static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd, int sfd[2]; int err; - err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0); + err = inet_socketpair(family, type, &p0, &c0); if (err) return; @@ -1882,7 +1793,7 @@ static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel, unix_inet_redir_to_connected(family, SOCK_DGRAM, sock_map, -1, verdict_map, REDIR_EGRESS, NO_FLAGS); - unix_inet_redir_to_connected(family, SOCK_DGRAM, + unix_inet_redir_to_connected(family, SOCK_STREAM, sock_map, -1, verdict_map, REDIR_EGRESS, NO_FLAGS); diff --git a/tools/testing/selftests/bpf/prog_tests/tp_btf_nullable.c b/tools/testing/selftests/bpf/prog_tests/tp_btf_nullable.c new file mode 100644 index 000000000000..accc42e01f8a --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/tp_btf_nullable.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> +#include "test_tp_btf_nullable.skel.h" + +void test_tp_btf_nullable(void) +{ + if (!env.has_testmod) { + test__skip(); + return; + } + + RUN_TESTS(test_tp_btf_nullable); +} diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c index 68b8c6eca508..8f36c9de7591 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_fail.c +++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c @@ -6,6 +6,7 @@ #include <stdbool.h> #include <linux/bpf.h> #include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> #include <linux/if_ether.h> #include "bpf_misc.h" #include "bpf_kfuncs.h" @@ -1254,6 +1255,30 @@ int skb_invalid_ctx(void *ctx) return 0; } +SEC("fentry/skb_tx_error") +__failure __msg("must be referenced or trusted") +int BPF_PROG(skb_invalid_ctx_fentry, void *skb) +{ + struct bpf_dynptr ptr; + + /* this should fail */ + bpf_dynptr_from_skb(skb, 0, &ptr); + + return 0; +} + +SEC("fexit/skb_tx_error") +__failure __msg("must be referenced or trusted") +int BPF_PROG(skb_invalid_ctx_fexit, void *skb) +{ + struct bpf_dynptr ptr; + + /* this should fail */ + bpf_dynptr_from_skb(skb, 0, &ptr); + + return 0; +} + /* Reject writes to dynptr slot for uninit arg */ SEC("?raw_tp") __failure __msg("potential write to dynptr at off=-16") diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c index 5985920d162e..bfcc85686cf0 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_success.c +++ b/tools/testing/selftests/bpf/progs/dynptr_success.c @@ -5,6 +5,7 @@ #include <stdbool.h> #include <linux/bpf.h> #include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> #include "bpf_misc.h" #include "bpf_kfuncs.h" #include "errno.h" @@ -544,3 +545,25 @@ int test_dynptr_skb_strcmp(struct __sk_buff *skb) return 1; } + +SEC("tp_btf/kfree_skb") +int BPF_PROG(test_dynptr_skb_tp_btf, void *skb, void *location) +{ + __u8 write_data[2] = {1, 2}; + struct bpf_dynptr ptr; + int ret; + + if (bpf_dynptr_from_skb(skb, 0, &ptr)) { + err = 1; + return 1; + } + + /* since tp_btf skbs are read only, writes should fail */ + ret = bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data), 0); + if (ret != -EINVAL) { + err = 2; + return 1; + } + + return 1; +} diff --git a/tools/testing/selftests/bpf/progs/setget_sockopt.c b/tools/testing/selftests/bpf/progs/setget_sockopt.c index 60518aed1ffc..6dd4318debbf 100644 --- a/tools/testing/selftests/bpf/progs/setget_sockopt.c +++ b/tools/testing/selftests/bpf/progs/setget_sockopt.c @@ -59,6 +59,8 @@ static const struct sockopt_test sol_tcp_tests[] = { { .opt = TCP_THIN_LINEAR_TIMEOUTS, .flip = 1, }, { .opt = TCP_USER_TIMEOUT, .new = 123400, .expected = 123400, }, { .opt = TCP_NOTSENT_LOWAT, .new = 1314, .expected = 1314, }, + { .opt = TCP_BPF_SOCK_OPS_CB_FLAGS, .new = BPF_SOCK_OPS_ALL_CB_FLAGS, + .expected = BPF_SOCK_OPS_ALL_CB_FLAGS, }, { .opt = 0, }, }; @@ -353,11 +355,30 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family, return 1; } +SEC("cgroup/getsockopt") +int _getsockopt(struct bpf_sockopt *ctx) +{ + struct bpf_sock *sk = ctx->sk; + int *optval = ctx->optval; + struct tcp_sock *tp; + + if (!sk || ctx->level != SOL_TCP || ctx->optname != TCP_BPF_SOCK_OPS_CB_FLAGS) + return 1; + + tp = bpf_core_cast(sk, struct tcp_sock); + if (ctx->optval + sizeof(int) <= ctx->optval_end) { + *optval = tp->bpf_sock_ops_cb_flags; + ctx->retval = 0; + } + return 1; +} + SEC("sockops") int skops_sockopt(struct bpf_sock_ops *skops) { struct bpf_sock *bpf_sk = skops->sk; struct sock *sk; + int flags; if (!bpf_sk) return 1; @@ -384,9 +405,8 @@ int skops_sockopt(struct bpf_sock_ops *skops) nr_passive += !(bpf_test_sockopt(skops, sk) || test_tcp_maxseg(skops, sk) || test_tcp_saved_syn(skops, sk)); - bpf_sock_ops_cb_flags_set(skops, - skops->bpf_sock_ops_cb_flags | - BPF_SOCK_OPS_STATE_CB_FLAG); + flags = skops->bpf_sock_ops_cb_flags | BPF_SOCK_OPS_STATE_CB_FLAG; + bpf_setsockopt(skops, SOL_TCP, TCP_BPF_SOCK_OPS_CB_FLAGS, &flags, sizeof(flags)); break; case BPF_SOCK_OPS_STATE_CB: if (skops->args[1] == BPF_TCP_CLOSE_WAIT) diff --git a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c index 44ee0d037f95..eb5cca1fce16 100644 --- a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c +++ b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c @@ -486,17 +486,10 @@ static int tcp_validate_cookie(struct tcp_syncookie *ctx) goto err; mssind = (cookie & (3 << 6)) >> 6; - if (ctx->ipv4) { - if (mssind > ARRAY_SIZE(msstab4)) - goto err; - + if (ctx->ipv4) ctx->attrs.mss = msstab4[mssind]; - } else { - if (mssind > ARRAY_SIZE(msstab6)) - goto err; - + else ctx->attrs.mss = msstab6[mssind]; - } ctx->attrs.snd_wscale = cookie & BPF_SYNCOOKIE_WSCALE_MASK; ctx->attrs.rcv_wscale = ctx->attrs.snd_wscale; diff --git a/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c b/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c new file mode 100644 index 000000000000..bba3e37f749b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "../bpf_testmod/bpf_testmod.h" +#include "bpf_misc.h" + +SEC("tp_btf/bpf_testmod_test_nullable_bare") +__failure __msg("R1 invalid mem access 'trusted_ptr_or_null_'") +int BPF_PROG(handle_tp_btf_nullable_bare1, struct bpf_testmod_test_read_ctx *nullable_ctx) +{ + return nullable_ctx->len; +} + +SEC("tp_btf/bpf_testmod_test_nullable_bare") +int BPF_PROG(handle_tp_btf_nullable_bare2, struct bpf_testmod_test_read_ctx *nullable_ctx) +{ + if (nullable_ctx) + return nullable_ctx->len; + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index 92af633faea8..11f047b8af75 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -325,6 +325,25 @@ out: return zc_avail; } +#define MAX_SKB_FRAGS_PATH "/proc/sys/net/core/max_skb_frags" +static unsigned int get_max_skb_frags(void) +{ + unsigned int max_skb_frags = 0; + FILE *file; + + file = fopen(MAX_SKB_FRAGS_PATH, "r"); + if (!file) { + ksft_print_msg("Error opening %s\n", MAX_SKB_FRAGS_PATH); + return 0; + } + + if (fscanf(file, "%u", &max_skb_frags) != 1) + ksft_print_msg("Error reading %s\n", MAX_SKB_FRAGS_PATH); + + fclose(file); + return max_skb_frags; +} + static struct option long_options[] = { {"interface", required_argument, 0, 'i'}, {"busy-poll", no_argument, 0, 'b'}, @@ -2245,13 +2264,24 @@ static int testapp_poll_rxq_tmout(struct test_spec *test) static int testapp_too_many_frags(struct test_spec *test) { - struct pkt pkts[2 * XSK_DESC__MAX_SKB_FRAGS + 2] = {}; + struct pkt *pkts; u32 max_frags, i; + int ret; - if (test->mode == TEST_MODE_ZC) + if (test->mode == TEST_MODE_ZC) { max_frags = test->ifobj_tx->xdp_zc_max_segs; - else - max_frags = XSK_DESC__MAX_SKB_FRAGS; + } else { + max_frags = get_max_skb_frags(); + if (!max_frags) { + ksft_print_msg("Couldn't retrieve MAX_SKB_FRAGS from system, using default (17) value\n"); + max_frags = 17; + } + max_frags += 1; + } + + pkts = calloc(2 * max_frags + 2, sizeof(struct pkt)); + if (!pkts) + return TEST_FAILURE; test->mtu = MAX_ETH_JUMBO_SIZE; @@ -2281,7 +2311,10 @@ static int testapp_too_many_frags(struct test_spec *test) pkts[2 * max_frags + 1].valid = true; pkt_stream_generate_custom(test, pkts, 2 * max_frags + 2); - return testapp_validate_traffic(test); + ret = testapp_validate_traffic(test); + + free(pkts); + return ret; } static int xsk_load_xdp_programs(struct ifobject *ifobj) diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h index 885c948c5d83..e46e823f6a1a 100644 --- a/tools/testing/selftests/bpf/xskxceiver.h +++ b/tools/testing/selftests/bpf/xskxceiver.h @@ -55,7 +55,6 @@ #define XSK_UMEM__LARGE_FRAME_SIZE (3 * 1024) #define XSK_UMEM__MAX_FRAME_SIZE (4 * 1024) #define XSK_DESC__INVALID_OPTION (0xffff) -#define XSK_DESC__MAX_SKB_FRAGS 18 #define HUGEPAGE_SIZE (2 * 1024 * 1024) #define PKT_DUMP_NB_TO_PRINT 16 #define RUN_ALL_TESTS UINT_MAX diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c index 432db923bced..1e2d46636a0c 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.c +++ b/tools/testing/selftests/cgroup/cgroup_util.c @@ -141,6 +141,16 @@ long cg_read_long(const char *cgroup, const char *control) return atol(buf); } +long cg_read_long_fd(int fd) +{ + char buf[128]; + + if (pread(fd, buf, sizeof(buf), 0) <= 0) + return -1; + + return atol(buf); +} + long cg_read_key_long(const char *cgroup, const char *control, const char *key) { char buf[PAGE_SIZE]; @@ -183,6 +193,18 @@ int cg_write(const char *cgroup, const char *control, char *buf) return ret == len ? 0 : ret; } +/* + * Returns fd on success, or -1 on failure. + * (fd should be closed with close() as usual) + */ +int cg_open(const char *cgroup, const char *control, int flags) +{ + char path[PATH_MAX]; + + snprintf(path, sizeof(path), "%s/%s", cgroup, control); + return open(path, flags); +} + int cg_write_numeric(const char *cgroup, const char *control, long value) { char buf[64]; diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h index e8d04ac9e3d2..19b131ee7707 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.h +++ b/tools/testing/selftests/cgroup/cgroup_util.h @@ -34,9 +34,11 @@ extern int cg_read_strcmp(const char *cgroup, const char *control, extern int cg_read_strstr(const char *cgroup, const char *control, const char *needle); extern long cg_read_long(const char *cgroup, const char *control); +extern long cg_read_long_fd(int fd); long cg_read_key_long(const char *cgroup, const char *control, const char *key); extern long cg_read_lc(const char *cgroup, const char *control); extern int cg_write(const char *cgroup, const char *control, char *buf); +extern int cg_open(const char *cgroup, const char *control, int flags); int cg_write_numeric(const char *cgroup, const char *control, long value); extern int cg_run(const char *cgroup, int (*fn)(const char *cgroup, void *arg), diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh index 7c08cc153367..03c1bdaed2c3 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh @@ -84,6 +84,20 @@ echo member > test/cpuset.cpus.partition echo "" > test/cpuset.cpus [[ $RESULT -eq 0 ]] && skip_test "Child cgroups are using cpuset!" +# +# If isolated CPUs have been reserved at boot time (as shown in +# cpuset.cpus.isolated), these isolated CPUs should be outside of CPUs 0-7 +# that will be used by this script for testing purpose. If not, some of +# the tests may fail incorrectly. These isolated CPUs will also be removed +# before being compared with the expected results. +# +BOOT_ISOLCPUS=$(cat $CGROUP2/cpuset.cpus.isolated) +if [[ -n "$BOOT_ISOLCPUS" ]] +then + [[ $(echo $BOOT_ISOLCPUS | sed -e "s/[,-].*//") -le 7 ]] && + skip_test "Pre-isolated CPUs ($BOOT_ISOLCPUS) overlap CPUs to be tested" + echo "Pre-isolated CPUs: $BOOT_ISOLCPUS" +fi cleanup() { online_cpus @@ -321,7 +335,7 @@ TEST_MATRIX=( # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ -------- # - # Incorrect change to cpuset.cpus invalidates partition root + # Incorrect change to cpuset.cpus[.exclusive] invalidates partition root # # Adding CPUs to partition root that are not in parent's # cpuset.cpus is allowed, but those extra CPUs are ignored. @@ -365,6 +379,16 @@ TEST_MATRIX=( # cpuset.cpus can overlap with sibling cpuset.cpus.exclusive but not subsumed by it " C0-3 . . C4-5 X5 . . . 0 A1:0-3,B1:4-5" + # Child partition root that try to take all CPUs from parent partition + # with tasks will remain invalid. + " C1-4:P1:S+ P1 . . . . . . 0 A1:1-4,A2:1-4 A1:P1,A2:P-1" + " C1-4:P1:S+ P1 . . . C1-4 . . 0 A1,A2:1-4 A1:P1,A2:P1" + " C1-4:P1:S+ P1 . . T C1-4 . . 0 A1:1-4,A2:1-4 A1:P1,A2:P-1" + + # Clearing of cpuset.cpus with a preset cpuset.cpus.exclusive shouldn't + # affect cpuset.cpus.exclusive.effective. + " C1-4:X3:S+ C1:X3 . . . C . . 0 A2:1-4,XA2:3" + # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ -------- # Failure cases: @@ -632,7 +656,8 @@ check_cgroup_states() # Note that isolated CPUs from the sched/domains context include offline # CPUs as well as CPUs in non-isolated 1-CPU partition. Those CPUs may # not be included in the cpuset.cpus.isolated control file which contains -# only CPUs in isolated partitions. +# only CPUs in isolated partitions as well as those that are isolated at +# boot time. # # $1 - expected isolated cpu list(s) <isolcpus1>{,<isolcpus2>} # <isolcpus1> - expected sched/domains value @@ -659,18 +684,21 @@ check_isolcpus() fi # - # Check the debug isolated cpumask, if present + # Check cpuset.cpus.isolated cpumask # - [[ -f $ISCPUS ]] && { + if [[ -z "$BOOT_ISOLCPUS" ]] + then + ISOLCPUS=$(cat $ISCPUS) + else + ISOLCPUS=$(cat $ISCPUS | sed -e "s/,*$BOOT_ISOLCPUS//") + fi + [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && { + # Take a 50ms pause and try again + pause 0.05 ISOLCPUS=$(cat $ISCPUS) - [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && { - # Take a 50ms pause and try again - pause 0.05 - ISOLCPUS=$(cat $ISCPUS) - } - [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && return 1 - ISOLCPUS= } + [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && return 1 + ISOLCPUS= # # Use the sched domain in debugfs to check isolated CPUs, if available @@ -703,6 +731,9 @@ check_isolcpus() fi done [[ "$ISOLCPUS" = *- ]] && ISOLCPUS=${ISOLCPUS}$LASTISOLCPU + [[ -n "BOOT_ISOLCPUS" ]] && + ISOLCPUS=$(echo $ISOLCPUS | sed -e "s/,*$BOOT_ISOLCPUS//") + [[ "$EXPECT_VAL" = "$ISOLCPUS" ]] } @@ -720,7 +751,8 @@ test_fail() } # -# Check to see if there are unexpected isolated CPUs left +# Check to see if there are unexpected isolated CPUs left beyond the boot +# time isolated ones. # null_isolcpus_check() { diff --git a/tools/testing/selftests/cgroup/test_cpuset_v1_base.sh b/tools/testing/selftests/cgroup/test_cpuset_v1_base.sh new file mode 100755 index 000000000000..42a6628fb8bc --- /dev/null +++ b/tools/testing/selftests/cgroup/test_cpuset_v1_base.sh @@ -0,0 +1,77 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Basc test for cpuset v1 interfaces write/read +# + +skip_test() { + echo "$1" + echo "Test SKIPPED" + exit 4 # ksft_skip +} + +write_test() { + dir=$1 + interface=$2 + value=$3 + original=$(cat $dir/$interface) + echo "testing $interface $value" + echo $value > $dir/$interface + new=$(cat $dir/$interface) + [[ $value -ne $(cat $dir/$interface) ]] && { + echo "$interface write $value failed: new:$new" + exit 1 + } +} + +[[ $(id -u) -eq 0 ]] || skip_test "Test must be run as root!" + +# Find cpuset v1 mount point +CPUSET=$(mount -t cgroup | grep cpuset | head -1 | awk '{print $3}') +[[ -n "$CPUSET" ]] || skip_test "cpuset v1 mount point not found!" + +# +# Create a test cpuset, read write test +# +TDIR=test$$ +[[ -d $CPUSET/$TDIR ]] || mkdir $CPUSET/$TDIR + +ITF_MATRIX=( + #interface value expect root_only + 'cpuset.cpus 0-1 0-1 0' + 'cpuset.mem_exclusive 1 1 0' + 'cpuset.mem_exclusive 0 0 0' + 'cpuset.mem_hardwall 1 1 0' + 'cpuset.mem_hardwall 0 0 0' + 'cpuset.memory_migrate 1 1 0' + 'cpuset.memory_migrate 0 0 0' + 'cpuset.memory_spread_page 1 1 0' + 'cpuset.memory_spread_page 0 0 0' + 'cpuset.memory_spread_slab 1 1 0' + 'cpuset.memory_spread_slab 0 0 0' + 'cpuset.mems 0 0 0' + 'cpuset.sched_load_balance 1 1 0' + 'cpuset.sched_load_balance 0 0 0' + 'cpuset.sched_relax_domain_level 2 2 0' + 'cpuset.memory_pressure_enabled 1 1 1' + 'cpuset.memory_pressure_enabled 0 0 1' +) + +run_test() +{ + cnt="${ITF_MATRIX[@]}" + for i in "${ITF_MATRIX[@]}" ; do + args=($i) + root_only=${args[3]} + [[ $root_only -eq 1 ]] && { + write_test "$CPUSET" "${args[0]}" "${args[1]}" "${args[2]}" + continue + } + write_test "$CPUSET/$TDIR" "${args[0]}" "${args[1]}" "${args[2]}" + done +} + +run_test +rmdir $CPUSET/$TDIR +echo "Test PASSED" +exit 0 diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c index 41ae8047b889..16f5d74ae762 100644 --- a/tools/testing/selftests/cgroup/test_memcontrol.c +++ b/tools/testing/selftests/cgroup/test_memcontrol.c @@ -161,13 +161,16 @@ cleanup: /* * This test create a memory cgroup, allocates * some anonymous memory and some pagecache - * and check memory.current and some memory.stat values. + * and checks memory.current, memory.peak, and some memory.stat values. */ -static int test_memcg_current(const char *root) +static int test_memcg_current_peak(const char *root) { int ret = KSFT_FAIL; - long current; + long current, peak, peak_reset; char *memcg; + bool fd2_closed = false, fd3_closed = false, fd4_closed = false; + int peak_fd = -1, peak_fd2 = -1, peak_fd3 = -1, peak_fd4 = -1; + struct stat ss; memcg = cg_name(root, "memcg_test"); if (!memcg) @@ -180,15 +183,124 @@ static int test_memcg_current(const char *root) if (current != 0) goto cleanup; + peak = cg_read_long(memcg, "memory.peak"); + if (peak != 0) + goto cleanup; + if (cg_run(memcg, alloc_anon_50M_check, NULL)) goto cleanup; + peak = cg_read_long(memcg, "memory.peak"); + if (peak < MB(50)) + goto cleanup; + + /* + * We'll open a few FDs for the same memory.peak file to exercise the free-path + * We need at least three to be closed in a different order than writes occurred to test + * the linked-list handling. + */ + peak_fd = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); + + if (peak_fd == -1) { + if (errno == ENOENT) + ret = KSFT_SKIP; + goto cleanup; + } + + /* + * Before we try to use memory.peak's fd, try to figure out whether + * this kernel supports writing to that file in the first place. (by + * checking the writable bit on the file's st_mode) + */ + if (fstat(peak_fd, &ss)) + goto cleanup; + + if ((ss.st_mode & S_IWUSR) == 0) { + ret = KSFT_SKIP; + goto cleanup; + } + + peak_fd2 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); + + if (peak_fd2 == -1) + goto cleanup; + + peak_fd3 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); + + if (peak_fd3 == -1) + goto cleanup; + + /* any non-empty string resets, but make it clear */ + static const char reset_string[] = "reset\n"; + + peak_reset = write(peak_fd, reset_string, sizeof(reset_string)); + if (peak_reset != sizeof(reset_string)) + goto cleanup; + + peak_reset = write(peak_fd2, reset_string, sizeof(reset_string)); + if (peak_reset != sizeof(reset_string)) + goto cleanup; + + peak_reset = write(peak_fd3, reset_string, sizeof(reset_string)); + if (peak_reset != sizeof(reset_string)) + goto cleanup; + + /* Make sure a completely independent read isn't affected by our FD-local reset above*/ + peak = cg_read_long(memcg, "memory.peak"); + if (peak < MB(50)) + goto cleanup; + + fd2_closed = true; + if (close(peak_fd2)) + goto cleanup; + + peak_fd4 = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); + + if (peak_fd4 == -1) + goto cleanup; + + peak_reset = write(peak_fd4, reset_string, sizeof(reset_string)); + if (peak_reset != sizeof(reset_string)) + goto cleanup; + + peak = cg_read_long_fd(peak_fd); + if (peak > MB(30) || peak < 0) + goto cleanup; + if (cg_run(memcg, alloc_pagecache_50M_check, NULL)) goto cleanup; + peak = cg_read_long(memcg, "memory.peak"); + if (peak < MB(50)) + goto cleanup; + + /* Make sure everything is back to normal */ + peak = cg_read_long_fd(peak_fd); + if (peak < MB(50)) + goto cleanup; + + peak = cg_read_long_fd(peak_fd4); + if (peak < MB(50)) + goto cleanup; + + fd3_closed = true; + if (close(peak_fd3)) + goto cleanup; + + fd4_closed = true; + if (close(peak_fd4)) + goto cleanup; + ret = KSFT_PASS; cleanup: + close(peak_fd); + if (!fd2_closed) + close(peak_fd2); + if (!fd3_closed) + close(peak_fd3); + if (!fd4_closed) + close(peak_fd4); cg_destroy(memcg); free(memcg); @@ -817,13 +929,19 @@ cleanup: /* * This test checks that memory.swap.max limits the amount of - * anonymous memory which can be swapped out. + * anonymous memory which can be swapped out. Additionally, it verifies that + * memory.swap.peak reflects the high watermark and can be reset. */ -static int test_memcg_swap_max(const char *root) +static int test_memcg_swap_max_peak(const char *root) { int ret = KSFT_FAIL; char *memcg; - long max; + long max, peak; + struct stat ss; + int swap_peak_fd = -1, mem_peak_fd = -1; + + /* any non-empty string resets */ + static const char reset_string[] = "foobarbaz"; if (!is_swap_enabled()) return KSFT_SKIP; @@ -840,6 +958,61 @@ static int test_memcg_swap_max(const char *root) goto cleanup; } + swap_peak_fd = cg_open(memcg, "memory.swap.peak", + O_RDWR | O_APPEND | O_CLOEXEC); + + if (swap_peak_fd == -1) { + if (errno == ENOENT) + ret = KSFT_SKIP; + goto cleanup; + } + + /* + * Before we try to use memory.swap.peak's fd, try to figure out + * whether this kernel supports writing to that file in the first + * place. (by checking the writable bit on the file's st_mode) + */ + if (fstat(swap_peak_fd, &ss)) + goto cleanup; + + if ((ss.st_mode & S_IWUSR) == 0) { + ret = KSFT_SKIP; + goto cleanup; + } + + mem_peak_fd = cg_open(memcg, "memory.peak", O_RDWR | O_APPEND | O_CLOEXEC); + + if (mem_peak_fd == -1) + goto cleanup; + + if (cg_read_long(memcg, "memory.swap.peak")) + goto cleanup; + + if (cg_read_long_fd(swap_peak_fd)) + goto cleanup; + + /* switch the swap and mem fds into local-peak tracking mode*/ + int peak_reset = write(swap_peak_fd, reset_string, sizeof(reset_string)); + + if (peak_reset != sizeof(reset_string)) + goto cleanup; + + if (cg_read_long_fd(swap_peak_fd)) + goto cleanup; + + if (cg_read_long(memcg, "memory.peak")) + goto cleanup; + + if (cg_read_long_fd(mem_peak_fd)) + goto cleanup; + + peak_reset = write(mem_peak_fd, reset_string, sizeof(reset_string)); + if (peak_reset != sizeof(reset_string)) + goto cleanup; + + if (cg_read_long_fd(mem_peak_fd)) + goto cleanup; + if (cg_read_strcmp(memcg, "memory.max", "max\n")) goto cleanup; @@ -862,6 +1035,61 @@ static int test_memcg_swap_max(const char *root) if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1) goto cleanup; + peak = cg_read_long(memcg, "memory.peak"); + if (peak < MB(29)) + goto cleanup; + + peak = cg_read_long(memcg, "memory.swap.peak"); + if (peak < MB(29)) + goto cleanup; + + peak = cg_read_long_fd(mem_peak_fd); + if (peak < MB(29)) + goto cleanup; + + peak = cg_read_long_fd(swap_peak_fd); + if (peak < MB(29)) + goto cleanup; + + /* + * open, reset and close the peak swap on another FD to make sure + * multiple extant fds don't corrupt the linked-list + */ + peak_reset = cg_write(memcg, "memory.swap.peak", (char *)reset_string); + if (peak_reset) + goto cleanup; + + peak_reset = cg_write(memcg, "memory.peak", (char *)reset_string); + if (peak_reset) + goto cleanup; + + /* actually reset on the fds */ + peak_reset = write(swap_peak_fd, reset_string, sizeof(reset_string)); + if (peak_reset != sizeof(reset_string)) + goto cleanup; + + peak_reset = write(mem_peak_fd, reset_string, sizeof(reset_string)); + if (peak_reset != sizeof(reset_string)) + goto cleanup; + + peak = cg_read_long_fd(swap_peak_fd); + if (peak > MB(10)) + goto cleanup; + + /* + * The cgroup is now empty, but there may be a page or two associated + * with the open FD accounted to it. + */ + peak = cg_read_long_fd(mem_peak_fd); + if (peak > MB(1)) + goto cleanup; + + if (cg_read_long(memcg, "memory.peak") < MB(29)) + goto cleanup; + + if (cg_read_long(memcg, "memory.swap.peak") < MB(29)) + goto cleanup; + if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30))) goto cleanup; @@ -869,9 +1097,29 @@ static int test_memcg_swap_max(const char *root) if (max <= 0) goto cleanup; + peak = cg_read_long(memcg, "memory.peak"); + if (peak < MB(29)) + goto cleanup; + + peak = cg_read_long(memcg, "memory.swap.peak"); + if (peak < MB(29)) + goto cleanup; + + peak = cg_read_long_fd(mem_peak_fd); + if (peak < MB(29)) + goto cleanup; + + peak = cg_read_long_fd(swap_peak_fd); + if (peak < MB(19)) + goto cleanup; + ret = KSFT_PASS; cleanup: + if (mem_peak_fd != -1 && close(mem_peak_fd)) + ret = KSFT_FAIL; + if (swap_peak_fd != -1 && close(swap_peak_fd)) + ret = KSFT_FAIL; cg_destroy(memcg); free(memcg); @@ -1295,7 +1543,7 @@ struct memcg_test { const char *name; } tests[] = { T(test_memcg_subtree_control), - T(test_memcg_current), + T(test_memcg_current_peak), T(test_memcg_min), T(test_memcg_low), T(test_memcg_high), @@ -1303,7 +1551,7 @@ struct memcg_test { T(test_memcg_max), T(test_memcg_reclaim), T(test_memcg_oom_events), - T(test_memcg_swap_max), + T(test_memcg_swap_max_peak), T(test_memcg_sock), T(test_memcg_oom_group_leaf_events), T(test_memcg_oom_group_parent_events), diff --git a/tools/testing/selftests/cgroup/test_zswap.c b/tools/testing/selftests/cgroup/test_zswap.c index 190096017f80..40de679248b8 100644 --- a/tools/testing/selftests/cgroup/test_zswap.c +++ b/tools/testing/selftests/cgroup/test_zswap.c @@ -263,15 +263,13 @@ out: static int attempt_writeback(const char *cgroup, void *arg) { long pagesize = sysconf(_SC_PAGESIZE); - char *test_group = arg; size_t memsize = MB(4); char buf[pagesize]; long zswap_usage; - bool wb_enabled; + bool wb_enabled = *(bool *) arg; int ret = -1; char *mem; - wb_enabled = cg_read_long(test_group, "memory.zswap.writeback"); mem = (char *)malloc(memsize); if (!mem) return ret; @@ -288,12 +286,12 @@ static int attempt_writeback(const char *cgroup, void *arg) memcpy(&mem[i], buf, pagesize); /* Try and reclaim allocated memory */ - if (cg_write_numeric(test_group, "memory.reclaim", memsize)) { + if (cg_write_numeric(cgroup, "memory.reclaim", memsize)) { ksft_print_msg("Failed to reclaim all of the requested memory\n"); goto out; } - zswap_usage = cg_read_long(test_group, "memory.zswap.current"); + zswap_usage = cg_read_long(cgroup, "memory.zswap.current"); /* zswpin */ for (int i = 0; i < memsize; i += pagesize) { @@ -303,7 +301,7 @@ static int attempt_writeback(const char *cgroup, void *arg) } } - if (cg_write_numeric(test_group, "memory.zswap.max", zswap_usage/2)) + if (cg_write_numeric(cgroup, "memory.zswap.max", zswap_usage/2)) goto out; /* @@ -312,7 +310,7 @@ static int attempt_writeback(const char *cgroup, void *arg) * If writeback is disabled, memory reclaim will fail as zswap is limited and * it can't writeback to swap. */ - ret = cg_write_numeric(test_group, "memory.reclaim", memsize); + ret = cg_write_numeric(cgroup, "memory.reclaim", memsize); if (!wb_enabled) ret = (ret == -EAGAIN) ? 0 : -1; @@ -321,12 +319,41 @@ out: return ret; } +static int test_zswap_writeback_one(const char *cgroup, bool wb) +{ + long zswpwb_before, zswpwb_after; + + zswpwb_before = get_cg_wb_count(cgroup); + if (zswpwb_before != 0) { + ksft_print_msg("zswpwb_before = %ld instead of 0\n", zswpwb_before); + return -1; + } + + if (cg_run(cgroup, attempt_writeback, (void *) &wb)) + return -1; + + /* Verify that zswap writeback occurred only if writeback was enabled */ + zswpwb_after = get_cg_wb_count(cgroup); + if (zswpwb_after < 0) + return -1; + + if (wb != !!zswpwb_after) { + ksft_print_msg("zswpwb_after is %ld while wb is %s", + zswpwb_after, wb ? "enabled" : "disabled"); + return -1; + } + + return 0; +} + /* Test to verify the zswap writeback path */ static int test_zswap_writeback(const char *root, bool wb) { - long zswpwb_before, zswpwb_after; int ret = KSFT_FAIL; - char *test_group; + char *test_group, *test_group_child = NULL; + + if (cg_read_strcmp(root, "memory.zswap.writeback", "1")) + return KSFT_SKIP; test_group = cg_name(root, "zswap_writeback_test"); if (!test_group) @@ -336,29 +363,35 @@ static int test_zswap_writeback(const char *root, bool wb) if (cg_write(test_group, "memory.zswap.writeback", wb ? "1" : "0")) goto out; - zswpwb_before = get_cg_wb_count(test_group); - if (zswpwb_before != 0) { - ksft_print_msg("zswpwb_before = %ld instead of 0\n", zswpwb_before); + if (test_zswap_writeback_one(test_group, wb)) goto out; - } - if (cg_run(test_group, attempt_writeback, (void *) test_group)) + /* Reset memory.zswap.max to max (modified by attempt_writeback), and + * set up child cgroup, whose memory.zswap.writeback is hardcoded to 1. + * Thus, the parent's setting shall be what's in effect. */ + if (cg_write(test_group, "memory.zswap.max", "max")) + goto out; + if (cg_write(test_group, "cgroup.subtree_control", "+memory")) goto out; - /* Verify that zswap writeback occurred only if writeback was enabled */ - zswpwb_after = get_cg_wb_count(test_group); - if (zswpwb_after < 0) + test_group_child = cg_name(test_group, "zswap_writeback_test_child"); + if (!test_group_child) + goto out; + if (cg_create(test_group_child)) + goto out; + if (cg_write(test_group_child, "memory.zswap.writeback", "1")) goto out; - if (wb != !!zswpwb_after) { - ksft_print_msg("zswpwb_after is %ld while wb is %s", - zswpwb_after, wb ? "enabled" : "disabled"); + if (test_zswap_writeback_one(test_group_child, wb)) goto out; - } ret = KSFT_PASS; out: + if (test_group_child) { + cg_destroy(test_group_child); + free(test_group_child); + } cg_destroy(test_group); free(test_group); return ret; diff --git a/tools/testing/selftests/core/Makefile b/tools/testing/selftests/core/Makefile index ce262d097269..8e99f87f5d7c 100644 --- a/tools/testing/selftests/core/Makefile +++ b/tools/testing/selftests/core/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only CFLAGS += -g $(KHDR_INCLUDES) -TEST_GEN_PROGS := close_range_test +TEST_GEN_PROGS := close_range_test unshare_test include ../lib.mk diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c index 12b4eb9d0434..e0d9851fe1c9 100644 --- a/tools/testing/selftests/core/close_range_test.c +++ b/tools/testing/selftests/core/close_range_test.c @@ -26,6 +26,10 @@ #define F_DUPFD_QUERY (F_LINUX_SPECIFIC_BASE + 3) #endif +#ifndef F_CREATED_QUERY +#define F_CREATED_QUERY (F_LINUX_SPECIFIC_BASE + 4) +#endif + static inline int sys_close_range(unsigned int fd, unsigned int max_fd, unsigned int flags) { @@ -624,4 +628,39 @@ TEST(close_range_bitmap_corruption) EXPECT_EQ(0, WEXITSTATUS(status)); } +TEST(fcntl_created) +{ + for (int i = 0; i < 101; i++) { + int fd; + char path[PATH_MAX]; + + fd = open("/dev/null", O_RDONLY | O_CLOEXEC); + ASSERT_GE(fd, 0) { + if (errno == ENOENT) + SKIP(return, + "Skipping test since /dev/null does not exist"); + } + + /* We didn't create "/dev/null". */ + EXPECT_EQ(fcntl(fd, F_CREATED_QUERY, 0), 0); + close(fd); + + sprintf(path, "aaaa_%d", i); + fd = open(path, O_CREAT | O_RDONLY | O_CLOEXEC, 0600); + ASSERT_GE(fd, 0); + + /* We created "aaaa_%d". */ + EXPECT_EQ(fcntl(fd, F_CREATED_QUERY, 0), 1); + close(fd); + + fd = open(path, O_RDONLY | O_CLOEXEC); + ASSERT_GE(fd, 0); + + /* We're opening it again, so no positive creation check. */ + EXPECT_EQ(fcntl(fd, F_CREATED_QUERY, 0), 0); + close(fd); + unlink(path); + } +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/core/unshare_test.c b/tools/testing/selftests/core/unshare_test.c new file mode 100644 index 000000000000..7fec9dfb1b0e --- /dev/null +++ b/tools/testing/selftests/core/unshare_test.c @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <linux/kernel.h> +#include <limits.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <syscall.h> +#include <unistd.h> +#include <sys/resource.h> +#include <linux/close_range.h> + +#include "../kselftest_harness.h" +#include "../clone3/clone3_selftests.h" + +TEST(unshare_EMFILE) +{ + pid_t pid; + int status; + struct __clone_args args = { + .flags = CLONE_FILES, + .exit_signal = SIGCHLD, + }; + int fd; + ssize_t n, n2; + static char buf[512], buf2[512]; + struct rlimit rlimit; + int nr_open; + + fd = open("/proc/sys/fs/nr_open", O_RDWR); + ASSERT_GE(fd, 0); + + n = read(fd, buf, sizeof(buf)); + ASSERT_GT(n, 0); + ASSERT_EQ(buf[n - 1], '\n'); + + ASSERT_EQ(sscanf(buf, "%d", &nr_open), 1); + + ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit)); + + /* bump fs.nr_open */ + n2 = sprintf(buf2, "%d\n", nr_open + 1024); + lseek(fd, 0, SEEK_SET); + write(fd, buf2, n2); + + /* bump ulimit -n */ + rlimit.rlim_cur = nr_open + 1024; + rlimit.rlim_max = nr_open + 1024; + EXPECT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit)) { + lseek(fd, 0, SEEK_SET); + write(fd, buf, n); + exit(EXIT_FAILURE); + } + + /* get a descriptor past the old fs.nr_open */ + EXPECT_GE(dup2(2, nr_open + 64), 0) { + lseek(fd, 0, SEEK_SET); + write(fd, buf, n); + exit(EXIT_FAILURE); + } + + /* get descriptor table shared */ + pid = sys_clone3(&args, sizeof(args)); + EXPECT_GE(pid, 0) { + lseek(fd, 0, SEEK_SET); + write(fd, buf, n); + exit(EXIT_FAILURE); + } + + if (pid == 0) { + int err; + + /* restore fs.nr_open */ + lseek(fd, 0, SEEK_SET); + write(fd, buf, n); + /* ... and now unshare(CLONE_FILES) must fail with EMFILE */ + err = unshare(CLONE_FILES); + EXPECT_EQ(err, -1) + exit(EXIT_FAILURE); + EXPECT_EQ(errno, EMFILE) + exit(EXIT_FAILURE); + exit(EXIT_SUCCESS); + } + + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/cpufreq/cpufreq.sh b/tools/testing/selftests/cpufreq/cpufreq.sh index a8b1dbc0a3a5..e350c521b467 100755 --- a/tools/testing/selftests/cpufreq/cpufreq.sh +++ b/tools/testing/selftests/cpufreq/cpufreq.sh @@ -231,6 +231,21 @@ do_suspend() for i in `seq 1 $2`; do printf "Starting $1\n" + + if [ "$3" = "rtc" ]; then + if ! command -v rtcwake &> /dev/null; then + printf "rtcwake could not be found, please install it.\n" + return 1 + fi + + rtcwake -m $filename -s 15 + + if [ $? -ne 0 ]; then + printf "Failed to suspend using RTC wake alarm\n" + return 1 + fi + fi + echo $filename > $SYSFS/power/state printf "Came out of $1\n" diff --git a/tools/testing/selftests/cpufreq/main.sh b/tools/testing/selftests/cpufreq/main.sh index a0eb84cf7167..f12ff7416e41 100755 --- a/tools/testing/selftests/cpufreq/main.sh +++ b/tools/testing/selftests/cpufreq/main.sh @@ -24,6 +24,8 @@ helpme() [-t <basic: Basic cpufreq testing suspend: suspend/resume, hibernate: hibernate/resume, + suspend_rtc: suspend/resume back using the RTC wakeup alarm, + hibernate_rtc: hibernate/resume back using the RTC wakeup alarm, modtest: test driver or governor modules. Only to be used with -d or -g options, sptest1: Simple governor switch to produce lockdep. sptest2: Concurrent governor switch to produce lockdep. @@ -76,7 +78,8 @@ parse_arguments() helpme ;; - t) # --func_type (Function to perform: basic, suspend, hibernate, modtest, sptest1/2/3/4 (default: basic)) + t) # --func_type (Function to perform: basic, suspend, hibernate, + # suspend_rtc, hibernate_rtc, modtest, sptest1/2/3/4 (default: basic)) FUNC=$OPTARG ;; @@ -121,6 +124,14 @@ do_test() do_suspend "hibernate" 1 ;; + "suspend_rtc") + do_suspend "suspend" 1 rtc + ;; + + "hibernate_rtc") + do_suspend "hibernate" 1 rtc + ;; + "modtest") # Do we have modules in place? if [ -z $DRIVER_MOD ] && [ -z $GOVERNOR_MOD ]; then diff --git a/tools/testing/selftests/damon/.gitignore b/tools/testing/selftests/damon/.gitignore index e65ef9d9cedc..2ab675fecb6b 100644 --- a/tools/testing/selftests/damon/.gitignore +++ b/tools/testing/selftests/damon/.gitignore @@ -3,3 +3,4 @@ huge_count_read_write debugfs_target_ids_read_before_terminate_race debugfs_target_ids_pid_leak access_memory +access_memory_even diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile index 1e2e98cc809d..5b2a6a5dd1af 100644 --- a/tools/testing/selftests/damon/Makefile +++ b/tools/testing/selftests/damon/Makefile @@ -25,4 +25,6 @@ TEST_PROGS += debugfs_target_ids_pid_leak.sh TEST_PROGS += sysfs_update_removed_scheme_dir.sh TEST_PROGS += sysfs_update_schemes_tried_regions_hang.py +EXTRA_CLEAN = __pycache__ + include ../lib.mk diff --git a/tools/testing/selftests/damon/damon_nr_regions.py b/tools/testing/selftests/damon/damon_nr_regions.py index 2e8a74aff543..2e8a74aff543 100644..100755 --- a/tools/testing/selftests/damon/damon_nr_regions.py +++ b/tools/testing/selftests/damon/damon_nr_regions.py diff --git a/tools/testing/selftests/damon/damos_apply_interval.py b/tools/testing/selftests/damon/damos_apply_interval.py index f04d43702481..f04d43702481 100644..100755 --- a/tools/testing/selftests/damon/damos_apply_interval.py +++ b/tools/testing/selftests/damon/damos_apply_interval.py diff --git a/tools/testing/selftests/damon/damos_quota.py b/tools/testing/selftests/damon/damos_quota.py index 7d4c6bb2e3cd..7d4c6bb2e3cd 100644..100755 --- a/tools/testing/selftests/damon/damos_quota.py +++ b/tools/testing/selftests/damon/damos_quota.py diff --git a/tools/testing/selftests/damon/damos_quota_goal.py b/tools/testing/selftests/damon/damos_quota_goal.py index 18246f3b62f7..18246f3b62f7 100644..100755 --- a/tools/testing/selftests/damon/damos_quota_goal.py +++ b/tools/testing/selftests/damon/damos_quota_goal.py diff --git a/tools/testing/selftests/damon/damos_tried_regions.py b/tools/testing/selftests/damon/damos_tried_regions.py index 3b347eb28bd2..3b347eb28bd2 100644..100755 --- a/tools/testing/selftests/damon/damos_tried_regions.py +++ b/tools/testing/selftests/damon/damos_tried_regions.py diff --git a/tools/testing/selftests/damon/debugfs_target_ids_pid_leak.sh b/tools/testing/selftests/damon/debugfs_target_ids_pid_leak.sh index 31fe33c2b032..31fe33c2b032 100644..100755 --- a/tools/testing/selftests/damon/debugfs_target_ids_pid_leak.sh +++ b/tools/testing/selftests/damon/debugfs_target_ids_pid_leak.sh diff --git a/tools/testing/selftests/damon/debugfs_target_ids_read_before_terminate_race.sh b/tools/testing/selftests/damon/debugfs_target_ids_read_before_terminate_race.sh index fc793c4c9aea..fc793c4c9aea 100644..100755 --- a/tools/testing/selftests/damon/debugfs_target_ids_read_before_terminate_race.sh +++ b/tools/testing/selftests/damon/debugfs_target_ids_read_before_terminate_race.sh diff --git a/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_hang.py b/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_hang.py index 28c887a0108f..28c887a0108f 100644..100755 --- a/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_hang.py +++ b/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_hang.py diff --git a/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_wss_estimation.py b/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_wss_estimation.py index 90ad7409a7a6..90ad7409a7a6 100644..100755 --- a/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_wss_estimation.py +++ b/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_wss_estimation.py diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index e54f382bcb02..39fb97a8c1df 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -1,8 +1,11 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_INCLUDES := $(wildcard lib/py/*.py) +TEST_INCLUDES := $(wildcard lib/py/*.py) \ + ../../net/net_helper.sh \ + ../../net/lib.sh \ TEST_PROGS := \ + netcons_basic.sh \ ping.py \ queues.py \ stats.py \ diff --git a/tools/testing/selftests/drivers/net/config b/tools/testing/selftests/drivers/net/config index f6a58ce8a230..a2d8af60876d 100644 --- a/tools/testing/selftests/drivers/net/config +++ b/tools/testing/selftests/drivers/net/config @@ -1,2 +1,6 @@ CONFIG_IPV6=y CONFIG_NETDEVSIM=m +CONFIG_CONFIGFS_FS=y +CONFIG_NETCONSOLE=m +CONFIG_NETCONSOLE_DYNAMIC=y +CONFIG_NETCONSOLE_EXTENDED_LOG=y diff --git a/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py index 026d98976c35..05b6fbb3fcdd 100755 --- a/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py +++ b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 +import errno import time import os from lib.py import ksft_run, ksft_exit, ksft_pr @@ -61,7 +62,7 @@ def test_pp_alloc(cfg, netdevnl): try: stats = get_stats() except NlError as e: - if e.nl_msg.error == -95: + if e.nl_msg.error == -errno.EOPNOTSUPP: stats = {} else: raise diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py index 011508ca604b..9d7adb3cf33b 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py @@ -3,7 +3,7 @@ import datetime import random -from lib.py import ksft_run, ksft_pr, ksft_exit, ksft_eq, ksft_ge, ksft_lt +from lib.py import ksft_run, ksft_pr, ksft_exit, ksft_eq, ksft_ne, ksft_ge, ksft_lt from lib.py import NetDrvEpEnv from lib.py import EthtoolFamily, NetdevFamily from lib.py import KsftSkipEx @@ -90,10 +90,10 @@ def _send_traffic_check(cfg, port, name, params): ksft_ge(directed, 20000, f"traffic on {name}: " + str(cnts)) if params.get('noise'): ksft_lt(sum(cnts[i] for i in params['noise']), directed / 2, - "traffic on other queues:" + str(cnts)) + f"traffic on other queues ({name})':" + str(cnts)) if params.get('empty'): ksft_eq(sum(cnts[i] for i in params['empty']), 0, - "traffic on inactive queues: " + str(cnts)) + f"traffic on inactive queues ({name}): " + str(cnts)) def test_rss_key_indir(cfg): @@ -302,6 +302,78 @@ def test_hitless_key_update(cfg): ksft_eq(carrier1 - carrier0, 0) +def test_rss_context_dump(cfg): + """ + Test dumping RSS contexts. This tests mostly exercises the kernel APIs. + """ + + # Get a random key of the right size + data = get_rss(cfg) + if 'rss-hash-key' in data: + key_data = _rss_key_rand(len(data['rss-hash-key'])) + key = _rss_key_str(key_data) + else: + key_data = [] + key = "ba:ad" + + ids = [] + try: + ids.append(ethtool_create(cfg, "-X", f"context new")) + defer(ethtool, f"-X {cfg.ifname} context {ids[-1]} delete") + + ids.append(ethtool_create(cfg, "-X", f"context new weight 1 1")) + defer(ethtool, f"-X {cfg.ifname} context {ids[-1]} delete") + + ids.append(ethtool_create(cfg, "-X", f"context new hkey {key}")) + defer(ethtool, f"-X {cfg.ifname} context {ids[-1]} delete") + except CmdExitFailure: + if not ids: + raise KsftSkipEx("Unable to add any contexts") + ksft_pr(f"Added only {len(ids)} out of 3 contexts") + + expect_tuples = set([(cfg.ifname, -1)] + [(cfg.ifname, ctx_id) for ctx_id in ids]) + + # Dump all + ctxs = cfg.ethnl.rss_get({}, dump=True) + tuples = [(c['header']['dev-name'], c.get('context', -1)) for c in ctxs] + ksft_eq(len(tuples), len(set(tuples)), "duplicates in context dump") + ctx_tuples = set([ctx for ctx in tuples if ctx[0] == cfg.ifname]) + ksft_eq(expect_tuples, ctx_tuples) + + # Sanity-check the results + for data in ctxs: + ksft_ne(set(data['indir']), {0}, "indir table is all zero") + ksft_ne(set(data.get('hkey', [1])), {0}, "key is all zero") + + # More specific checks + if len(ids) > 1 and data.get('context') == ids[1]: + ksft_eq(set(data['indir']), {0, 1}, + "ctx1 - indir table mismatch") + if len(ids) > 2 and data.get('context') == ids[2]: + ksft_eq(data['hkey'], bytes(key_data), "ctx2 - key mismatch") + + # Ifindex filter + ctxs = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}}, dump=True) + tuples = [(c['header']['dev-name'], c.get('context', -1)) for c in ctxs] + ctx_tuples = set(tuples) + ksft_eq(len(tuples), len(ctx_tuples), "duplicates in context dump") + ksft_eq(expect_tuples, ctx_tuples) + + # Skip ctx 0 + expect_tuples.remove((cfg.ifname, -1)) + + ctxs = cfg.ethnl.rss_get({'start-context': 1}, dump=True) + tuples = [(c['header']['dev-name'], c.get('context', -1)) for c in ctxs] + ksft_eq(len(tuples), len(set(tuples)), "duplicates in context dump") + ctx_tuples = set([ctx for ctx in tuples if ctx[0] == cfg.ifname]) + ksft_eq(expect_tuples, ctx_tuples) + + # And finally both with ifindex and skip main + ctxs = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}, 'start-context': 1}, dump=True) + ctx_tuples = set([(c['header']['dev-name'], c.get('context', -1)) for c in ctxs]) + ksft_eq(expect_tuples, ctx_tuples) + + def test_rss_context(cfg, ctx_cnt=1, create_with_cfg=None): """ Test separating traffic into RSS contexts. @@ -542,7 +614,7 @@ def main() -> None: ksft_run([test_rss_key_indir, test_rss_queue_reconfigure, test_rss_resize, test_hitless_key_update, test_rss_context, test_rss_context4, test_rss_context32, - test_rss_context_queue_reconfigure, + test_rss_context_dump, test_rss_context_queue_reconfigure, test_rss_context_overlap, test_rss_context_overlap2, test_rss_context_out_of_order, test_rss_context4_create_with_cfg], args=(cfg, )) diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py index a5e800b8f103..1ea9bb695e94 100644 --- a/tools/testing/selftests/drivers/net/lib/py/env.py +++ b/tools/testing/selftests/drivers/net/lib/py/env.py @@ -4,6 +4,7 @@ import os import time from pathlib import Path from lib.py import KsftSkipEx, KsftXfailEx +from lib.py import ksft_setup from lib.py import cmd, ethtool, ip from lib.py import NetNS, NetdevSimDev from .remote import Remote @@ -14,7 +15,7 @@ def _load_env_file(src_path): src_dir = Path(src_path).parent.resolve() if not (src_dir / "net.config").exists(): - return env + return ksft_setup(env) with open((src_dir / "net.config").as_posix(), 'r') as fp: for line in fp.readlines(): @@ -30,7 +31,7 @@ def _load_env_file(src_path): if len(pair) != 2: raise Exception("Can't parse configuration line:", full_file) env[pair[0]] = pair[1] - return env + return ksft_setup(env) class NetDrvEnv: diff --git a/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh b/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh index 877cd6df94a1..fe905a7f34b3 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh @@ -2,6 +2,7 @@ # SPDX-License-Identifier: GPL-2.0 lib_dir=$(dirname $0)/../../../net/forwarding +ethtool_lib_dir=$(dirname $0)/../hw ALL_TESTS=" autoneg @@ -11,7 +12,7 @@ ALL_TESTS=" NUM_NETIFS=2 : ${TIMEOUT:=30000} # ms source $lib_dir/lib.sh -source $lib_dir/ethtool_lib.sh +source $ethtool_lib_dir/ethtool_lib.sh setup_prepare() { diff --git a/tools/testing/selftests/drivers/net/netcons_basic.sh b/tools/testing/selftests/drivers/net/netcons_basic.sh new file mode 100755 index 000000000000..06021b2059b7 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netcons_basic.sh @@ -0,0 +1,234 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 + +# This test creates two netdevsim virtual interfaces, assigns one of them (the +# "destination interface") to a new namespace, and assigns IP addresses to both +# interfaces. +# +# It listens on the destination interface using socat and configures a dynamic +# target on netconsole, pointing to the destination IP address. +# +# Finally, it checks whether the message was received properly on the +# destination interface. Note that this test may pollute the kernel log buffer +# (dmesg) and relies on dynamic configuration and namespaces being configured. +# +# Author: Breno Leitao <leitao@debian.org> + +set -euo pipefail + +SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +# Simple script to test dynamic targets in netconsole +SRCIF="" # to be populated later +SRCIP=192.168.1.1 +DSTIF="" # to be populated later +DSTIP=192.168.1.2 + +PORT="6666" +MSG="netconsole selftest" +TARGET=$(mktemp -u netcons_XXXXX) +DEFAULT_PRINTK_VALUES=$(cat /proc/sys/kernel/printk) +NETCONS_CONFIGFS="/sys/kernel/config/netconsole" +NETCONS_PATH="${NETCONS_CONFIGFS}"/"${TARGET}" +# NAMESPACE will be populated by setup_ns with a random value +NAMESPACE="" + +# IDs for netdevsim +NSIM_DEV_1_ID=$((256 + RANDOM % 256)) +NSIM_DEV_2_ID=$((512 + RANDOM % 256)) + +# Used to create and delete namespaces +source "${SCRIPTDIR}"/../../net/lib.sh +source "${SCRIPTDIR}"/../../net/net_helper.sh + +# Create netdevsim interfaces +create_ifaces() { + local NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device + + echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_NEW" + echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_NEW" + udevadm settle 2> /dev/null || true + + local NSIM1=/sys/bus/netdevsim/devices/netdevsim"$NSIM_DEV_1_ID" + local NSIM2=/sys/bus/netdevsim/devices/netdevsim"$NSIM_DEV_2_ID" + + # These are global variables + SRCIF=$(find "$NSIM1"/net -maxdepth 1 -type d ! \ + -path "$NSIM1"/net -exec basename {} \;) + DSTIF=$(find "$NSIM2"/net -maxdepth 1 -type d ! \ + -path "$NSIM2"/net -exec basename {} \;) +} + +link_ifaces() { + local NSIM_DEV_SYS_LINK="/sys/bus/netdevsim/link_device" + local SRCIF_IFIDX=$(cat /sys/class/net/"$SRCIF"/ifindex) + local DSTIF_IFIDX=$(cat /sys/class/net/"$DSTIF"/ifindex) + + exec {NAMESPACE_FD}</var/run/netns/"${NAMESPACE}" + exec {INITNS_FD}</proc/self/ns/net + + # Bind the dst interface to namespace + ip link set "${DSTIF}" netns "${NAMESPACE}" + + # Linking one device to the other one (on the other namespace} + if ! echo "${INITNS_FD}:$SRCIF_IFIDX $NAMESPACE_FD:$DSTIF_IFIDX" > $NSIM_DEV_SYS_LINK + then + echo "linking netdevsim1 with netdevsim2 should succeed" + cleanup + exit "${ksft_skip}" + fi +} + +function configure_ip() { + # Configure the IPs for both interfaces + ip netns exec "${NAMESPACE}" ip addr add "${DSTIP}"/24 dev "${DSTIF}" + ip netns exec "${NAMESPACE}" ip link set "${DSTIF}" up + + ip addr add "${SRCIP}"/24 dev "${SRCIF}" + ip link set "${SRCIF}" up +} + +function set_network() { + # setup_ns function is coming from lib.sh + setup_ns NAMESPACE + + # Create both interfaces, and assign the destination to a different + # namespace + create_ifaces + + # Link both interfaces back to back + link_ifaces + + configure_ip +} + +function create_dynamic_target() { + DSTMAC=$(ip netns exec "${NAMESPACE}" \ + ip link show "${DSTIF}" | awk '/ether/ {print $2}') + + # Create a dynamic target + mkdir "${NETCONS_PATH}" + + echo "${DSTIP}" > "${NETCONS_PATH}"/remote_ip + echo "${SRCIP}" > "${NETCONS_PATH}"/local_ip + echo "${DSTMAC}" > "${NETCONS_PATH}"/remote_mac + echo "${SRCIF}" > "${NETCONS_PATH}"/dev_name + + echo 1 > "${NETCONS_PATH}"/enabled +} + +function cleanup() { + local NSIM_DEV_SYS_DEL="/sys/bus/netdevsim/del_device" + + # delete netconsole dynamic reconfiguration + echo 0 > "${NETCONS_PATH}"/enabled + # Remove the configfs entry + rmdir "${NETCONS_PATH}" + + # Delete netdevsim devices + echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_DEL" + echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_DEL" + + # this is coming from lib.sh + cleanup_all_ns + + # Restoring printk configurations + echo "${DEFAULT_PRINTK_VALUES}" > /proc/sys/kernel/printk +} + +function listen_port_and_save_to() { + local OUTPUT=${1} + # Just wait for 2 seconds + timeout 2 ip netns exec "${NAMESPACE}" \ + socat UDP-LISTEN:"${PORT}",fork "${OUTPUT}" +} + +function validate_result() { + local TMPFILENAME="$1" + + # Check if the file exists + if [ ! -f "$TMPFILENAME" ]; then + echo "FAIL: File was not generated." >&2 + exit "${ksft_fail}" + fi + + if ! grep -q "${MSG}" "${TMPFILENAME}"; then + echo "FAIL: ${MSG} not found in ${TMPFILENAME}" >&2 + cat "${TMPFILENAME}" >&2 + exit "${ksft_fail}" + fi + + # Delete the file once it is validated, otherwise keep it + # for debugging purposes + rm "${TMPFILENAME}" + exit "${ksft_pass}" +} + +function check_for_dependencies() { + if [ "$(id -u)" -ne 0 ]; then + echo "This test must be run as root" >&2 + exit "${ksft_skip}" + fi + + if ! which socat > /dev/null ; then + echo "SKIP: socat(1) is not available" >&2 + exit "${ksft_skip}" + fi + + if ! which ip > /dev/null ; then + echo "SKIP: ip(1) is not available" >&2 + exit "${ksft_skip}" + fi + + if ! which udevadm > /dev/null ; then + echo "SKIP: udevadm(1) is not available" >&2 + exit "${ksft_skip}" + fi + + if [ ! -d "${NETCONS_CONFIGFS}" ]; then + echo "SKIP: directory ${NETCONS_CONFIGFS} does not exist. Check if NETCONSOLE_DYNAMIC is enabled" >&2 + exit "${ksft_skip}" + fi + + if ip link show "${DSTIF}" 2> /dev/null; then + echo "SKIP: interface ${DSTIF} exists in the system. Not overwriting it." >&2 + exit "${ksft_skip}" + fi + + if ip addr list | grep -E "inet.*(${SRCIP}|${DSTIP})" 2> /dev/null; then + echo "SKIP: IPs already in use. Skipping it" >&2 + exit "${ksft_skip}" + fi +} + +# ========== # +# Start here # +# ========== # +modprobe netdevsim 2> /dev/null || true +modprobe netconsole 2> /dev/null || true + +# The content of kmsg will be save to the following file +OUTPUT_FILE="/tmp/${TARGET}" + +# Check for basic system dependency and exit if not found +check_for_dependencies +# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) +echo "6 5" > /proc/sys/kernel/printk +# Remove the namespace, interfaces and netconsole target on exit +trap cleanup EXIT +# Create one namespace and two interfaces +set_network +# Create a dynamic target for netconsole +create_dynamic_target +# Listed for netconsole port inside the namespace and destination interface +listen_port_and_save_to "${OUTPUT_FILE}" & +# Wait for socat to start and listen to the port. +wait_local_port_listen "${NAMESPACE}" "${PORT}" udp +# Send the message +echo "${MSG}: ${TARGET}" > /dev/kmsg +# Wait until socat saves the file to disk +busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" + +# Make sure the message was received in the dst part +# and exit +validate_result "${OUTPUT_FILE}" diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py index 820b8e0a22c6..63e3c045a3b2 100755 --- a/tools/testing/selftests/drivers/net/stats.py +++ b/tools/testing/selftests/drivers/net/stats.py @@ -1,10 +1,13 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 +import errno from lib.py import ksft_run, ksft_exit, ksft_pr from lib.py import ksft_ge, ksft_eq, ksft_in, ksft_true, ksft_raises, KsftSkipEx, KsftXfailEx +from lib.py import ksft_disruptive from lib.py import EthtoolFamily, NetdevFamily, RtnlFamily, NlError from lib.py import NetDrvEnv +from lib.py import ip, defer ethnl = EthtoolFamily() netfam = NetdevFamily() @@ -17,7 +20,7 @@ def check_pause(cfg) -> None: try: ethnl.pause_get({"header": {"dev-index": cfg.ifindex}}) except NlError as e: - if e.error == 95: + if e.error == errno.EOPNOTSUPP: raise KsftXfailEx("pause not supported by the device") raise @@ -32,7 +35,7 @@ def check_fec(cfg) -> None: try: ethnl.fec_get({"header": {"dev-index": cfg.ifindex}}) except NlError as e: - if e.error == 95: + if e.error == errno.EOPNOTSUPP: raise KsftXfailEx("FEC not supported by the device") raise @@ -117,7 +120,7 @@ def qstat_by_ifindex(cfg) -> None: # loopback has no stats with ksft_raises(NlError) as cm: netfam.qstats_get({"ifindex": 1}, dump=True) - ksft_eq(cm.exception.nl_msg.error, -95) + ksft_eq(cm.exception.nl_msg.error, -errno.EOPNOTSUPP) ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex') # Try to get stats for lowest unused ifindex but not 0 @@ -133,9 +136,31 @@ def qstat_by_ifindex(cfg) -> None: ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex') +@ksft_disruptive +def check_down(cfg) -> None: + try: + qstat = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] + except NlError as e: + if e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("qstats not supported by the device") + raise + + ip(f"link set dev {cfg.dev['ifname']} down") + defer(ip, f"link set dev {cfg.dev['ifname']} up") + + qstat2 = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] + for k, v in qstat.items(): + ksft_ge(qstat2[k], qstat[k], comment=f"{k} went backwards on device down") + + # exercise per-queue API to make sure that "device down" state + # is handled correctly and doesn't crash + netfam.qstats_get({"ifindex": cfg.ifindex, "scope": "queue"}, dump=True) + + def main() -> None: with NetDrvEnv(__file__) as cfg: - ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex], + ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex, + check_down], args=(cfg, )) ksft_exit() diff --git a/tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c b/tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c index ea0cdc37b44f..7ee7492138c6 100644 --- a/tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c +++ b/tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c @@ -257,12 +257,6 @@ TEST_F(attest_fixture, att_inval_addr) att_inval_addr_test(&self->uvio_attest.meas_addr, _metadata, self); } -static void __attribute__((constructor)) __constructor_order_last(void) -{ - if (!__constructor_order) - __constructor_order = _CONSTRUCTOR_ORDER_BACKWARD; -} - int main(int argc, char **argv) { int fd = open(UV_PATH, O_ACCMODE); diff --git a/tools/testing/selftests/dt/test_unprobed_devices.sh b/tools/testing/selftests/dt/test_unprobed_devices.sh index 2d7e70c5ad2d..5e3f42ef249e 100755 --- a/tools/testing/selftests/dt/test_unprobed_devices.sh +++ b/tools/testing/selftests/dt/test_unprobed_devices.sh @@ -34,8 +34,21 @@ nodes_compatible=$( # Check if node is available if [[ -e "${node}"/status ]]; then status=$(tr -d '\000' < "${node}"/status) - [[ "${status}" != "okay" && "${status}" != "ok" ]] && continue + if [[ "${status}" != "okay" && "${status}" != "ok" ]]; then + if [ -n "${disabled_nodes_regex}" ]; then + disabled_nodes_regex="${disabled_nodes_regex}|${node}" + else + disabled_nodes_regex="${node}" + fi + continue + fi fi + + # Ignore this node if one of its ancestors was disabled + if [ -n "${disabled_nodes_regex}" ]; then + echo "${node}" | grep -q -E "${disabled_nodes_regex}" && continue + fi + echo "${node}" | sed -e 's|\/proc\/device-tree||' done | sort ) diff --git a/tools/testing/selftests/exec/execveat.c b/tools/testing/selftests/exec/execveat.c index 6418ded40bdd..071e03532cba 100644 --- a/tools/testing/selftests/exec/execveat.c +++ b/tools/testing/selftests/exec/execveat.c @@ -117,7 +117,7 @@ static int check_execveat_invoked_rc(int fd, const char *path, int flags, } if ((WEXITSTATUS(status) != expected_rc) && (WEXITSTATUS(status) != expected_rc2)) { - ksft_print_msg("child %d exited with %d not %d nor %d\n", + ksft_print_msg("child %d exited with %d neither %d nor %d\n", child, WEXITSTATUS(status), expected_rc, expected_rc2); ksft_test_result_fail("%s\n", test_name); diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c index e044f5fc57fd..70cb0c8b21cf 100644 --- a/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c +++ b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c @@ -319,8 +319,11 @@ static void test_listmount_ns(void) * Tell our parent how many mounts we have, and then wait for it * to tell us we're done. */ - write(child_ready_pipe[1], &nr_mounts, sizeof(nr_mounts)); - read(parent_ready_pipe[0], &cval, sizeof(cval)); + if (write(child_ready_pipe[1], &nr_mounts, sizeof(nr_mounts)) != + sizeof(nr_mounts)) + ret = NSID_ERROR; + if (read(parent_ready_pipe[0], &cval, sizeof(cval)) != sizeof(cval)) + ret = NSID_ERROR; exit(NSID_PASS); } diff --git a/tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc b/tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc index c45094d1e1d2..094419e190c2 100644 --- a/tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc +++ b/tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc @@ -6,6 +6,18 @@ original_group=`stat -c "%g" .` original_owner=`stat -c "%u" .` mount_point=`stat -c '%m' .` + +# If stat -c '%m' does not work (e.g. busybox) or failed, try to use the +# current working directory (which should be a tracefs) as the mount point. +if [ ! -d "$mount_point" ]; then + if mount | grep -qw $PWD ; then + mount_point=$PWD + else + # If PWD doesn't work, that is an environmental problem. + exit_unresolved + fi +fi + mount_options=`mount | grep "$mount_point" | sed -e 's/.*(\(.*\)).*/\1/'` # find another owner and group that is not the original @@ -83,32 +95,38 @@ run_tests() { done } -mount -o remount,"$new_options" . +# Run the tests twice as leftovers can cause issues +for loop in 1 2 ; do -run_tests + echo "Running iteration $loop" -mount -o remount,"$mount_options" . + mount -o remount,"$new_options" . -for d in "." "events" "events/sched" "events/sched/sched_switch" "events/sched/sched_switch/enable" $canary; do - test "$d" $original_group -done + run_tests + + mount -o remount,"$mount_options" . + + for d in "." "events" "events/sched" "events/sched/sched_switch" "events/sched/sched_switch/enable" $canary; do + test "$d" $original_group + done # check instances as well -chgrp $other_group instances + chgrp $other_group instances -instance="$(mktemp -u test-XXXXXX)" + instance="$(mktemp -u test-XXXXXX)" -mkdir instances/$instance + mkdir instances/$instance -cd instances/$instance + cd instances/$instance -run_tests + run_tests -cd ../.. + cd ../.. -rmdir instances/$instance + rmdir instances/$instance -chgrp $original_group instances + chgrp $original_group instances +done exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_uprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_uprobe.tc new file mode 100644 index 000000000000..a275decdc880 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_uprobe.tc @@ -0,0 +1,26 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Generic dynamic event - add/remove/test uprobe events +# requires: uprobe_events + +echo 0 > events/enable +echo > dynamic_events + +echo 'cat /proc/$$/maps' | /bin/sh | \ + grep "r-xp .*/bin/.*sh$" | \ + awk '{printf "p:myevent %s:0x%s\n", $6,$3 }' >> uprobe_events + +grep -q myevent uprobe_events +test -d events/uprobes/myevent + +echo 1 > events/uprobes/myevent/enable +echo 'ls' | /bin/sh > /dev/null +echo 0 > events/uprobes/myevent/enable +grep -q myevent trace + +echo "-:myevent" >> uprobe_events +! grep -q myevent uprobe_events + +echo > uprobe_events + +clear_trace diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc index 073a748b9380..263f6b798c85 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc @@ -19,7 +19,14 @@ fail() { # mesg FILTER=set_ftrace_filter FUNC1="schedule" -FUNC2="sched_tick" +if grep '^sched_tick\b' available_filter_functions; then + FUNC2="sched_tick" +elif grep '^scheduler_tick\b' available_filter_functions; then + FUNC2="scheduler_tick" +else + exit_unresolved +fi + ALL_FUNCS="#### all functions enabled ####" diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc index e21c9c27ece4..77f4c07cdcb8 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Kprobe event char type argument -# requires: kprobe_events +# requires: kprobe_events available_filter_functions case `uname -m` in x86_64) diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc index 93217d459556..39001073f7ed 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Kprobe event string type argument -# requires: kprobe_events +# requires: kprobe_events available_filter_functions case `uname -m` in x86_64) diff --git a/tools/testing/selftests/hid/.gitignore b/tools/testing/selftests/hid/.gitignore index 995af0670f69..746c62361f77 100644 --- a/tools/testing/selftests/hid/.gitignore +++ b/tools/testing/selftests/hid/.gitignore @@ -2,4 +2,5 @@ bpftool *.skel.h /tools hid_bpf +hidraw results diff --git a/tools/testing/selftests/hid/Makefile b/tools/testing/selftests/hid/Makefile index 2b5ea18bde38..72be55ac4bdf 100644 --- a/tools/testing/selftests/hid/Makefile +++ b/tools/testing/selftests/hid/Makefile @@ -32,7 +32,7 @@ CFLAGS += -Wno-unused-command-line-argument endif # Order correspond to 'make run_tests' order -TEST_GEN_PROGS = hid_bpf +TEST_GEN_PROGS = hid_bpf hidraw # Emit succinct information message describing current building step # $1 - generic step name (e.g., CC, LINK, etc); diff --git a/tools/testing/selftests/hid/hid_bpf.c b/tools/testing/selftests/hid/hid_bpf.c index 75b7b4ef6cfa..86f4d66379f7 100644 --- a/tools/testing/selftests/hid/hid_bpf.c +++ b/tools/testing/selftests/hid/hid_bpf.c @@ -1,93 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2022 Red Hat */ +/* Copyright (c) 2022-2024 Red Hat */ #include "hid.skel.h" - -#include "../kselftest_harness.h" - +#include "hid_common.h" #include <bpf/bpf.h> -#include <fcntl.h> -#include <fnmatch.h> -#include <dirent.h> -#include <poll.h> -#include <pthread.h> -#include <stdbool.h> -#include <linux/hidraw.h> -#include <linux/uhid.h> - -#define SHOW_UHID_DEBUG 0 - -#define min(a, b) \ - ({ __typeof__(a) _a = (a); \ - __typeof__(b) _b = (b); \ - _a < _b ? _a : _b; }) - -static unsigned char rdesc[] = { - 0x06, 0x00, 0xff, /* Usage Page (Vendor Defined Page 1) */ - 0x09, 0x21, /* Usage (Vendor Usage 0x21) */ - 0xa1, 0x01, /* COLLECTION (Application) */ - 0x09, 0x01, /* Usage (Vendor Usage 0x01) */ - 0xa1, 0x00, /* COLLECTION (Physical) */ - 0x85, 0x02, /* REPORT_ID (2) */ - 0x19, 0x01, /* USAGE_MINIMUM (1) */ - 0x29, 0x08, /* USAGE_MAXIMUM (3) */ - 0x15, 0x00, /* LOGICAL_MINIMUM (0) */ - 0x25, 0xff, /* LOGICAL_MAXIMUM (255) */ - 0x95, 0x08, /* REPORT_COUNT (8) */ - 0x75, 0x08, /* REPORT_SIZE (8) */ - 0x81, 0x02, /* INPUT (Data,Var,Abs) */ - 0xc0, /* END_COLLECTION */ - 0x09, 0x01, /* Usage (Vendor Usage 0x01) */ - 0xa1, 0x00, /* COLLECTION (Physical) */ - 0x85, 0x01, /* REPORT_ID (1) */ - 0x06, 0x00, 0xff, /* Usage Page (Vendor Defined Page 1) */ - 0x19, 0x01, /* USAGE_MINIMUM (1) */ - 0x29, 0x03, /* USAGE_MAXIMUM (3) */ - 0x15, 0x00, /* LOGICAL_MINIMUM (0) */ - 0x25, 0x01, /* LOGICAL_MAXIMUM (1) */ - 0x95, 0x03, /* REPORT_COUNT (3) */ - 0x75, 0x01, /* REPORT_SIZE (1) */ - 0x81, 0x02, /* INPUT (Data,Var,Abs) */ - 0x95, 0x01, /* REPORT_COUNT (1) */ - 0x75, 0x05, /* REPORT_SIZE (5) */ - 0x81, 0x01, /* INPUT (Cnst,Var,Abs) */ - 0x05, 0x01, /* USAGE_PAGE (Generic Desktop) */ - 0x09, 0x30, /* USAGE (X) */ - 0x09, 0x31, /* USAGE (Y) */ - 0x15, 0x81, /* LOGICAL_MINIMUM (-127) */ - 0x25, 0x7f, /* LOGICAL_MAXIMUM (127) */ - 0x75, 0x10, /* REPORT_SIZE (16) */ - 0x95, 0x02, /* REPORT_COUNT (2) */ - 0x81, 0x06, /* INPUT (Data,Var,Rel) */ - - 0x06, 0x00, 0xff, /* Usage Page (Vendor Defined Page 1) */ - 0x19, 0x01, /* USAGE_MINIMUM (1) */ - 0x29, 0x03, /* USAGE_MAXIMUM (3) */ - 0x15, 0x00, /* LOGICAL_MINIMUM (0) */ - 0x25, 0x01, /* LOGICAL_MAXIMUM (1) */ - 0x95, 0x03, /* REPORT_COUNT (3) */ - 0x75, 0x01, /* REPORT_SIZE (1) */ - 0x91, 0x02, /* Output (Data,Var,Abs) */ - 0x95, 0x01, /* REPORT_COUNT (1) */ - 0x75, 0x05, /* REPORT_SIZE (5) */ - 0x91, 0x01, /* Output (Cnst,Var,Abs) */ - - 0x06, 0x00, 0xff, /* Usage Page (Vendor Defined Page 1) */ - 0x19, 0x06, /* USAGE_MINIMUM (6) */ - 0x29, 0x08, /* USAGE_MAXIMUM (8) */ - 0x15, 0x00, /* LOGICAL_MINIMUM (0) */ - 0x25, 0x01, /* LOGICAL_MAXIMUM (1) */ - 0x95, 0x03, /* REPORT_COUNT (3) */ - 0x75, 0x01, /* REPORT_SIZE (1) */ - 0xb1, 0x02, /* Feature (Data,Var,Abs) */ - 0x95, 0x01, /* REPORT_COUNT (1) */ - 0x75, 0x05, /* REPORT_SIZE (5) */ - 0x91, 0x01, /* Output (Cnst,Var,Abs) */ - - 0xc0, /* END_COLLECTION */ - 0xc0, /* END_COLLECTION */ -}; - -static __u8 feature_data[] = { 1, 2 }; struct attach_prog_args { int prog_fd; @@ -105,354 +20,6 @@ struct hid_hw_request_syscall_args { __u8 request_type; }; -#define ASSERT_OK(data) ASSERT_FALSE(data) -#define ASSERT_OK_PTR(ptr) ASSERT_NE(NULL, ptr) - -#define UHID_LOG(fmt, ...) do { \ - if (SHOW_UHID_DEBUG) \ - TH_LOG(fmt, ##__VA_ARGS__); \ -} while (0) - -static pthread_mutex_t uhid_started_mtx = PTHREAD_MUTEX_INITIALIZER; -static pthread_cond_t uhid_started = PTHREAD_COND_INITIALIZER; - -static pthread_mutex_t uhid_output_mtx = PTHREAD_MUTEX_INITIALIZER; -static pthread_cond_t uhid_output_cond = PTHREAD_COND_INITIALIZER; -static unsigned char output_report[10]; - -/* no need to protect uhid_stopped, only one thread accesses it */ -static bool uhid_stopped; - -static int uhid_write(struct __test_metadata *_metadata, int fd, const struct uhid_event *ev) -{ - ssize_t ret; - - ret = write(fd, ev, sizeof(*ev)); - if (ret < 0) { - TH_LOG("Cannot write to uhid: %m"); - return -errno; - } else if (ret != sizeof(*ev)) { - TH_LOG("Wrong size written to uhid: %zd != %zu", - ret, sizeof(ev)); - return -EFAULT; - } else { - return 0; - } -} - -static int uhid_create(struct __test_metadata *_metadata, int fd, int rand_nb) -{ - struct uhid_event ev; - char buf[25]; - - sprintf(buf, "test-uhid-device-%d", rand_nb); - - memset(&ev, 0, sizeof(ev)); - ev.type = UHID_CREATE; - strcpy((char *)ev.u.create.name, buf); - ev.u.create.rd_data = rdesc; - ev.u.create.rd_size = sizeof(rdesc); - ev.u.create.bus = BUS_USB; - ev.u.create.vendor = 0x0001; - ev.u.create.product = 0x0a37; - ev.u.create.version = 0; - ev.u.create.country = 0; - - sprintf(buf, "%d", rand_nb); - strcpy((char *)ev.u.create.phys, buf); - - return uhid_write(_metadata, fd, &ev); -} - -static void uhid_destroy(struct __test_metadata *_metadata, int fd) -{ - struct uhid_event ev; - - memset(&ev, 0, sizeof(ev)); - ev.type = UHID_DESTROY; - - uhid_write(_metadata, fd, &ev); -} - -static int uhid_event(struct __test_metadata *_metadata, int fd) -{ - struct uhid_event ev, answer; - ssize_t ret; - - memset(&ev, 0, sizeof(ev)); - ret = read(fd, &ev, sizeof(ev)); - if (ret == 0) { - UHID_LOG("Read HUP on uhid-cdev"); - return -EFAULT; - } else if (ret < 0) { - UHID_LOG("Cannot read uhid-cdev: %m"); - return -errno; - } else if (ret != sizeof(ev)) { - UHID_LOG("Invalid size read from uhid-dev: %zd != %zu", - ret, sizeof(ev)); - return -EFAULT; - } - - switch (ev.type) { - case UHID_START: - pthread_mutex_lock(&uhid_started_mtx); - pthread_cond_signal(&uhid_started); - pthread_mutex_unlock(&uhid_started_mtx); - - UHID_LOG("UHID_START from uhid-dev"); - break; - case UHID_STOP: - uhid_stopped = true; - - UHID_LOG("UHID_STOP from uhid-dev"); - break; - case UHID_OPEN: - UHID_LOG("UHID_OPEN from uhid-dev"); - break; - case UHID_CLOSE: - UHID_LOG("UHID_CLOSE from uhid-dev"); - break; - case UHID_OUTPUT: - UHID_LOG("UHID_OUTPUT from uhid-dev"); - - pthread_mutex_lock(&uhid_output_mtx); - memcpy(output_report, - ev.u.output.data, - min(ev.u.output.size, sizeof(output_report))); - pthread_cond_signal(&uhid_output_cond); - pthread_mutex_unlock(&uhid_output_mtx); - break; - case UHID_GET_REPORT: - UHID_LOG("UHID_GET_REPORT from uhid-dev"); - - answer.type = UHID_GET_REPORT_REPLY; - answer.u.get_report_reply.id = ev.u.get_report.id; - answer.u.get_report_reply.err = ev.u.get_report.rnum == 1 ? 0 : -EIO; - answer.u.get_report_reply.size = sizeof(feature_data); - memcpy(answer.u.get_report_reply.data, feature_data, sizeof(feature_data)); - - uhid_write(_metadata, fd, &answer); - - break; - case UHID_SET_REPORT: - UHID_LOG("UHID_SET_REPORT from uhid-dev"); - break; - default: - TH_LOG("Invalid event from uhid-dev: %u", ev.type); - } - - return 0; -} - -struct uhid_thread_args { - int fd; - struct __test_metadata *_metadata; -}; -static void *uhid_read_events_thread(void *arg) -{ - struct uhid_thread_args *args = (struct uhid_thread_args *)arg; - struct __test_metadata *_metadata = args->_metadata; - struct pollfd pfds[1]; - int fd = args->fd; - int ret = 0; - - pfds[0].fd = fd; - pfds[0].events = POLLIN; - - uhid_stopped = false; - - while (!uhid_stopped) { - ret = poll(pfds, 1, 100); - if (ret < 0) { - TH_LOG("Cannot poll for fds: %m"); - break; - } - if (pfds[0].revents & POLLIN) { - ret = uhid_event(_metadata, fd); - if (ret) - break; - } - } - - return (void *)(long)ret; -} - -static int uhid_start_listener(struct __test_metadata *_metadata, pthread_t *tid, int uhid_fd) -{ - struct uhid_thread_args args = { - .fd = uhid_fd, - ._metadata = _metadata, - }; - int err; - - pthread_mutex_lock(&uhid_started_mtx); - err = pthread_create(tid, NULL, uhid_read_events_thread, (void *)&args); - ASSERT_EQ(0, err) { - TH_LOG("Could not start the uhid thread: %d", err); - pthread_mutex_unlock(&uhid_started_mtx); - close(uhid_fd); - return -EIO; - } - pthread_cond_wait(&uhid_started, &uhid_started_mtx); - pthread_mutex_unlock(&uhid_started_mtx); - - return 0; -} - -static int uhid_send_event(struct __test_metadata *_metadata, int fd, __u8 *buf, size_t size) -{ - struct uhid_event ev; - - if (size > sizeof(ev.u.input.data)) - return -E2BIG; - - memset(&ev, 0, sizeof(ev)); - ev.type = UHID_INPUT2; - ev.u.input2.size = size; - - memcpy(ev.u.input2.data, buf, size); - - return uhid_write(_metadata, fd, &ev); -} - -static int setup_uhid(struct __test_metadata *_metadata, int rand_nb) -{ - int fd; - const char *path = "/dev/uhid"; - int ret; - - fd = open(path, O_RDWR | O_CLOEXEC); - ASSERT_GE(fd, 0) TH_LOG("open uhid-cdev failed; %d", fd); - - ret = uhid_create(_metadata, fd, rand_nb); - ASSERT_EQ(0, ret) { - TH_LOG("create uhid device failed: %d", ret); - close(fd); - } - - return fd; -} - -static bool match_sysfs_device(int dev_id, const char *workdir, struct dirent *dir) -{ - const char *target = "0003:0001:0A37.*"; - char phys[512]; - char uevent[1024]; - char temp[512]; - int fd, nread; - bool found = false; - - if (fnmatch(target, dir->d_name, 0)) - return false; - - /* we found the correct VID/PID, now check for phys */ - sprintf(uevent, "%s/%s/uevent", workdir, dir->d_name); - - fd = open(uevent, O_RDONLY | O_NONBLOCK); - if (fd < 0) - return false; - - sprintf(phys, "PHYS=%d", dev_id); - - nread = read(fd, temp, ARRAY_SIZE(temp)); - if (nread > 0 && (strstr(temp, phys)) != NULL) - found = true; - - close(fd); - - return found; -} - -static int get_hid_id(int dev_id) -{ - const char *workdir = "/sys/devices/virtual/misc/uhid"; - const char *str_id; - DIR *d; - struct dirent *dir; - int found = -1, attempts = 3; - - /* it would be nice to be able to use nftw, but the no_alu32 target doesn't support it */ - - while (found < 0 && attempts > 0) { - attempts--; - d = opendir(workdir); - if (d) { - while ((dir = readdir(d)) != NULL) { - if (!match_sysfs_device(dev_id, workdir, dir)) - continue; - - str_id = dir->d_name + sizeof("0003:0001:0A37."); - found = (int)strtol(str_id, NULL, 16); - - break; - } - closedir(d); - } - if (found < 0) - usleep(100000); - } - - return found; -} - -static int get_hidraw(int dev_id) -{ - const char *workdir = "/sys/devices/virtual/misc/uhid"; - char sysfs[1024]; - DIR *d, *subd; - struct dirent *dir, *subdir; - int i, found = -1; - - /* retry 5 times in case the system is loaded */ - for (i = 5; i > 0; i--) { - usleep(10); - d = opendir(workdir); - - if (!d) - continue; - - while ((dir = readdir(d)) != NULL) { - if (!match_sysfs_device(dev_id, workdir, dir)) - continue; - - sprintf(sysfs, "%s/%s/hidraw", workdir, dir->d_name); - - subd = opendir(sysfs); - if (!subd) - continue; - - while ((subdir = readdir(subd)) != NULL) { - if (fnmatch("hidraw*", subdir->d_name, 0)) - continue; - - found = atoi(subdir->d_name + strlen("hidraw")); - } - - closedir(subd); - - if (found > 0) - break; - } - closedir(d); - } - - return found; -} - -static int open_hidraw(int dev_id) -{ - int hidraw_number; - char hidraw_path[64] = { 0 }; - - hidraw_number = get_hidraw(dev_id); - if (hidraw_number < 0) - return hidraw_number; - - /* open hidraw node to check the other side of the pipe */ - sprintf(hidraw_path, "/dev/hidraw%d", hidraw_number); - return open(hidraw_path, O_RDWR | O_NONBLOCK); -} - FIXTURE(hid_bpf) { int dev_id; int uhid_fd; @@ -1357,12 +924,6 @@ static int libbpf_print_fn(enum libbpf_print_level level, return 0; } -static void __attribute__((constructor)) __constructor_order_last(void) -{ - if (!__constructor_order) - __constructor_order = _CONSTRUCTOR_ORDER_BACKWARD; -} - int main(int argc, char **argv) { /* Use libbpf 1.0 API mode */ diff --git a/tools/testing/selftests/hid/hid_common.h b/tools/testing/selftests/hid/hid_common.h new file mode 100644 index 000000000000..f151f151a1ed --- /dev/null +++ b/tools/testing/selftests/hid/hid_common.h @@ -0,0 +1,436 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2022-2024 Red Hat */ + +#include "../kselftest_harness.h" + +#include <fcntl.h> +#include <fnmatch.h> +#include <dirent.h> +#include <poll.h> +#include <pthread.h> +#include <stdbool.h> +#include <linux/hidraw.h> +#include <linux/uhid.h> + +#define SHOW_UHID_DEBUG 0 + +#define min(a, b) \ + ({ __typeof__(a) _a = (a); \ + __typeof__(b) _b = (b); \ + _a < _b ? _a : _b; }) + +static unsigned char rdesc[] = { + 0x06, 0x00, 0xff, /* Usage Page (Vendor Defined Page 1) */ + 0x09, 0x21, /* Usage (Vendor Usage 0x21) */ + 0xa1, 0x01, /* COLLECTION (Application) */ + 0x09, 0x01, /* Usage (Vendor Usage 0x01) */ + 0xa1, 0x00, /* COLLECTION (Physical) */ + 0x85, 0x02, /* REPORT_ID (2) */ + 0x19, 0x01, /* USAGE_MINIMUM (1) */ + 0x29, 0x08, /* USAGE_MAXIMUM (3) */ + 0x15, 0x00, /* LOGICAL_MINIMUM (0) */ + 0x25, 0xff, /* LOGICAL_MAXIMUM (255) */ + 0x95, 0x08, /* REPORT_COUNT (8) */ + 0x75, 0x08, /* REPORT_SIZE (8) */ + 0x81, 0x02, /* INPUT (Data,Var,Abs) */ + 0xc0, /* END_COLLECTION */ + 0x09, 0x01, /* Usage (Vendor Usage 0x01) */ + 0xa1, 0x00, /* COLLECTION (Physical) */ + 0x85, 0x01, /* REPORT_ID (1) */ + 0x06, 0x00, 0xff, /* Usage Page (Vendor Defined Page 1) */ + 0x19, 0x01, /* USAGE_MINIMUM (1) */ + 0x29, 0x03, /* USAGE_MAXIMUM (3) */ + 0x15, 0x00, /* LOGICAL_MINIMUM (0) */ + 0x25, 0x01, /* LOGICAL_MAXIMUM (1) */ + 0x95, 0x03, /* REPORT_COUNT (3) */ + 0x75, 0x01, /* REPORT_SIZE (1) */ + 0x81, 0x02, /* INPUT (Data,Var,Abs) */ + 0x95, 0x01, /* REPORT_COUNT (1) */ + 0x75, 0x05, /* REPORT_SIZE (5) */ + 0x81, 0x01, /* INPUT (Cnst,Var,Abs) */ + 0x05, 0x01, /* USAGE_PAGE (Generic Desktop) */ + 0x09, 0x30, /* USAGE (X) */ + 0x09, 0x31, /* USAGE (Y) */ + 0x15, 0x81, /* LOGICAL_MINIMUM (-127) */ + 0x25, 0x7f, /* LOGICAL_MAXIMUM (127) */ + 0x75, 0x10, /* REPORT_SIZE (16) */ + 0x95, 0x02, /* REPORT_COUNT (2) */ + 0x81, 0x06, /* INPUT (Data,Var,Rel) */ + + 0x06, 0x00, 0xff, /* Usage Page (Vendor Defined Page 1) */ + 0x19, 0x01, /* USAGE_MINIMUM (1) */ + 0x29, 0x03, /* USAGE_MAXIMUM (3) */ + 0x15, 0x00, /* LOGICAL_MINIMUM (0) */ + 0x25, 0x01, /* LOGICAL_MAXIMUM (1) */ + 0x95, 0x03, /* REPORT_COUNT (3) */ + 0x75, 0x01, /* REPORT_SIZE (1) */ + 0x91, 0x02, /* Output (Data,Var,Abs) */ + 0x95, 0x01, /* REPORT_COUNT (1) */ + 0x75, 0x05, /* REPORT_SIZE (5) */ + 0x91, 0x01, /* Output (Cnst,Var,Abs) */ + + 0x06, 0x00, 0xff, /* Usage Page (Vendor Defined Page 1) */ + 0x19, 0x06, /* USAGE_MINIMUM (6) */ + 0x29, 0x08, /* USAGE_MAXIMUM (8) */ + 0x15, 0x00, /* LOGICAL_MINIMUM (0) */ + 0x25, 0x01, /* LOGICAL_MAXIMUM (1) */ + 0x95, 0x03, /* REPORT_COUNT (3) */ + 0x75, 0x01, /* REPORT_SIZE (1) */ + 0xb1, 0x02, /* Feature (Data,Var,Abs) */ + 0x95, 0x01, /* REPORT_COUNT (1) */ + 0x75, 0x05, /* REPORT_SIZE (5) */ + 0x91, 0x01, /* Output (Cnst,Var,Abs) */ + + 0xc0, /* END_COLLECTION */ + 0xc0, /* END_COLLECTION */ +}; + +static __u8 feature_data[] = { 1, 2 }; + +#define ASSERT_OK(data) ASSERT_FALSE(data) +#define ASSERT_OK_PTR(ptr) ASSERT_NE(NULL, ptr) + +#define UHID_LOG(fmt, ...) do { \ + if (SHOW_UHID_DEBUG) \ + TH_LOG(fmt, ##__VA_ARGS__); \ +} while (0) + +static pthread_mutex_t uhid_started_mtx = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t uhid_started = PTHREAD_COND_INITIALIZER; + +static pthread_mutex_t uhid_output_mtx = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t uhid_output_cond = PTHREAD_COND_INITIALIZER; +static unsigned char output_report[10]; + +/* no need to protect uhid_stopped, only one thread accesses it */ +static bool uhid_stopped; + +static int uhid_write(struct __test_metadata *_metadata, int fd, const struct uhid_event *ev) +{ + ssize_t ret; + + ret = write(fd, ev, sizeof(*ev)); + if (ret < 0) { + TH_LOG("Cannot write to uhid: %m"); + return -errno; + } else if (ret != sizeof(*ev)) { + TH_LOG("Wrong size written to uhid: %zd != %zu", + ret, sizeof(ev)); + return -EFAULT; + } else { + return 0; + } +} + +static int uhid_create(struct __test_metadata *_metadata, int fd, int rand_nb) +{ + struct uhid_event ev; + char buf[25]; + + sprintf(buf, "test-uhid-device-%d", rand_nb); + + memset(&ev, 0, sizeof(ev)); + ev.type = UHID_CREATE; + strcpy((char *)ev.u.create.name, buf); + ev.u.create.rd_data = rdesc; + ev.u.create.rd_size = sizeof(rdesc); + ev.u.create.bus = BUS_USB; + ev.u.create.vendor = 0x0001; + ev.u.create.product = 0x0a37; + ev.u.create.version = 0; + ev.u.create.country = 0; + + sprintf(buf, "%d", rand_nb); + strcpy((char *)ev.u.create.phys, buf); + + return uhid_write(_metadata, fd, &ev); +} + +static void uhid_destroy(struct __test_metadata *_metadata, int fd) +{ + struct uhid_event ev; + + memset(&ev, 0, sizeof(ev)); + ev.type = UHID_DESTROY; + + uhid_write(_metadata, fd, &ev); +} + +static int uhid_event(struct __test_metadata *_metadata, int fd) +{ + struct uhid_event ev, answer; + ssize_t ret; + + memset(&ev, 0, sizeof(ev)); + ret = read(fd, &ev, sizeof(ev)); + if (ret == 0) { + UHID_LOG("Read HUP on uhid-cdev"); + return -EFAULT; + } else if (ret < 0) { + UHID_LOG("Cannot read uhid-cdev: %m"); + return -errno; + } else if (ret != sizeof(ev)) { + UHID_LOG("Invalid size read from uhid-dev: %zd != %zu", + ret, sizeof(ev)); + return -EFAULT; + } + + switch (ev.type) { + case UHID_START: + pthread_mutex_lock(&uhid_started_mtx); + pthread_cond_signal(&uhid_started); + pthread_mutex_unlock(&uhid_started_mtx); + + UHID_LOG("UHID_START from uhid-dev"); + break; + case UHID_STOP: + uhid_stopped = true; + + UHID_LOG("UHID_STOP from uhid-dev"); + break; + case UHID_OPEN: + UHID_LOG("UHID_OPEN from uhid-dev"); + break; + case UHID_CLOSE: + UHID_LOG("UHID_CLOSE from uhid-dev"); + break; + case UHID_OUTPUT: + UHID_LOG("UHID_OUTPUT from uhid-dev"); + + pthread_mutex_lock(&uhid_output_mtx); + memcpy(output_report, + ev.u.output.data, + min(ev.u.output.size, sizeof(output_report))); + pthread_cond_signal(&uhid_output_cond); + pthread_mutex_unlock(&uhid_output_mtx); + break; + case UHID_GET_REPORT: + UHID_LOG("UHID_GET_REPORT from uhid-dev"); + + answer.type = UHID_GET_REPORT_REPLY; + answer.u.get_report_reply.id = ev.u.get_report.id; + answer.u.get_report_reply.err = ev.u.get_report.rnum == 1 ? 0 : -EIO; + answer.u.get_report_reply.size = sizeof(feature_data); + memcpy(answer.u.get_report_reply.data, feature_data, sizeof(feature_data)); + + uhid_write(_metadata, fd, &answer); + + break; + case UHID_SET_REPORT: + UHID_LOG("UHID_SET_REPORT from uhid-dev"); + break; + default: + TH_LOG("Invalid event from uhid-dev: %u", ev.type); + } + + return 0; +} + +struct uhid_thread_args { + int fd; + struct __test_metadata *_metadata; +}; +static void *uhid_read_events_thread(void *arg) +{ + struct uhid_thread_args *args = (struct uhid_thread_args *)arg; + struct __test_metadata *_metadata = args->_metadata; + struct pollfd pfds[1]; + int fd = args->fd; + int ret = 0; + + pfds[0].fd = fd; + pfds[0].events = POLLIN; + + uhid_stopped = false; + + while (!uhid_stopped) { + ret = poll(pfds, 1, 100); + if (ret < 0) { + TH_LOG("Cannot poll for fds: %m"); + break; + } + if (pfds[0].revents & POLLIN) { + ret = uhid_event(_metadata, fd); + if (ret) + break; + } + } + + return (void *)(long)ret; +} + +static int uhid_start_listener(struct __test_metadata *_metadata, pthread_t *tid, int uhid_fd) +{ + struct uhid_thread_args args = { + .fd = uhid_fd, + ._metadata = _metadata, + }; + int err; + + pthread_mutex_lock(&uhid_started_mtx); + err = pthread_create(tid, NULL, uhid_read_events_thread, (void *)&args); + ASSERT_EQ(0, err) { + TH_LOG("Could not start the uhid thread: %d", err); + pthread_mutex_unlock(&uhid_started_mtx); + close(uhid_fd); + return -EIO; + } + pthread_cond_wait(&uhid_started, &uhid_started_mtx); + pthread_mutex_unlock(&uhid_started_mtx); + + return 0; +} + +static int uhid_send_event(struct __test_metadata *_metadata, int fd, __u8 *buf, size_t size) +{ + struct uhid_event ev; + + if (size > sizeof(ev.u.input.data)) + return -E2BIG; + + memset(&ev, 0, sizeof(ev)); + ev.type = UHID_INPUT2; + ev.u.input2.size = size; + + memcpy(ev.u.input2.data, buf, size); + + return uhid_write(_metadata, fd, &ev); +} + +static int setup_uhid(struct __test_metadata *_metadata, int rand_nb) +{ + int fd; + const char *path = "/dev/uhid"; + int ret; + + fd = open(path, O_RDWR | O_CLOEXEC); + ASSERT_GE(fd, 0) TH_LOG("open uhid-cdev failed; %d", fd); + + ret = uhid_create(_metadata, fd, rand_nb); + ASSERT_EQ(0, ret) { + TH_LOG("create uhid device failed: %d", ret); + close(fd); + } + + return fd; +} + +static bool match_sysfs_device(int dev_id, const char *workdir, struct dirent *dir) +{ + const char *target = "0003:0001:0A37.*"; + char phys[512]; + char uevent[1024]; + char temp[512]; + int fd, nread; + bool found = false; + + if (fnmatch(target, dir->d_name, 0)) + return false; + + /* we found the correct VID/PID, now check for phys */ + sprintf(uevent, "%s/%s/uevent", workdir, dir->d_name); + + fd = open(uevent, O_RDONLY | O_NONBLOCK); + if (fd < 0) + return false; + + sprintf(phys, "PHYS=%d", dev_id); + + nread = read(fd, temp, ARRAY_SIZE(temp)); + if (nread > 0 && (strstr(temp, phys)) != NULL) + found = true; + + close(fd); + + return found; +} + +static int get_hid_id(int dev_id) +{ + const char *workdir = "/sys/devices/virtual/misc/uhid"; + const char *str_id; + DIR *d; + struct dirent *dir; + int found = -1, attempts = 3; + + /* it would be nice to be able to use nftw, but the no_alu32 target doesn't support it */ + + while (found < 0 && attempts > 0) { + attempts--; + d = opendir(workdir); + if (d) { + while ((dir = readdir(d)) != NULL) { + if (!match_sysfs_device(dev_id, workdir, dir)) + continue; + + str_id = dir->d_name + sizeof("0003:0001:0A37."); + found = (int)strtol(str_id, NULL, 16); + + break; + } + closedir(d); + } + if (found < 0) + usleep(100000); + } + + return found; +} + +static int get_hidraw(int dev_id) +{ + const char *workdir = "/sys/devices/virtual/misc/uhid"; + char sysfs[1024]; + DIR *d, *subd; + struct dirent *dir, *subdir; + int i, found = -1; + + /* retry 5 times in case the system is loaded */ + for (i = 5; i > 0; i--) { + usleep(10); + d = opendir(workdir); + + if (!d) + continue; + + while ((dir = readdir(d)) != NULL) { + if (!match_sysfs_device(dev_id, workdir, dir)) + continue; + + sprintf(sysfs, "%s/%s/hidraw", workdir, dir->d_name); + + subd = opendir(sysfs); + if (!subd) + continue; + + while ((subdir = readdir(subd)) != NULL) { + if (fnmatch("hidraw*", subdir->d_name, 0)) + continue; + + found = atoi(subdir->d_name + strlen("hidraw")); + } + + closedir(subd); + + if (found > 0) + break; + } + closedir(d); + } + + return found; +} + +static int open_hidraw(int dev_id) +{ + int hidraw_number; + char hidraw_path[64] = { 0 }; + + hidraw_number = get_hidraw(dev_id); + if (hidraw_number < 0) + return hidraw_number; + + /* open hidraw node to check the other side of the pipe */ + sprintf(hidraw_path, "/dev/hidraw%d", hidraw_number); + return open(hidraw_path, O_RDWR | O_NONBLOCK); +} diff --git a/tools/testing/selftests/hid/hidraw.c b/tools/testing/selftests/hid/hidraw.c new file mode 100644 index 000000000000..f8b4f7ff292c --- /dev/null +++ b/tools/testing/selftests/hid/hidraw.c @@ -0,0 +1,237 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022-2024 Red Hat */ + +#include "hid_common.h" + +/* for older kernels */ +#ifndef HIDIOCREVOKE +#define HIDIOCREVOKE _IOW('H', 0x0D, int) /* Revoke device access */ +#endif /* HIDIOCREVOKE */ + +FIXTURE(hidraw) { + int dev_id; + int uhid_fd; + int hidraw_fd; + int hid_id; + pthread_t tid; +}; +static void close_hidraw(FIXTURE_DATA(hidraw) * self) +{ + if (self->hidraw_fd) + close(self->hidraw_fd); + self->hidraw_fd = 0; +} + +FIXTURE_TEARDOWN(hidraw) { + void *uhid_err; + + uhid_destroy(_metadata, self->uhid_fd); + + close_hidraw(self); + pthread_join(self->tid, &uhid_err); +} +#define TEARDOWN_LOG(fmt, ...) do { \ + TH_LOG(fmt, ##__VA_ARGS__); \ + hidraw_teardown(_metadata, self, variant); \ +} while (0) + +FIXTURE_SETUP(hidraw) +{ + time_t t; + int err; + + /* initialize random number generator */ + srand((unsigned int)time(&t)); + + self->dev_id = rand() % 1024; + + self->uhid_fd = setup_uhid(_metadata, self->dev_id); + + /* locate the uev, self, variant);ent file of the created device */ + self->hid_id = get_hid_id(self->dev_id); + ASSERT_GT(self->hid_id, 0) + TEARDOWN_LOG("Could not locate uhid device id: %d", self->hid_id); + + err = uhid_start_listener(_metadata, &self->tid, self->uhid_fd); + ASSERT_EQ(0, err) TEARDOWN_LOG("could not start udev listener: %d", err); + + self->hidraw_fd = open_hidraw(self->dev_id); + ASSERT_GE(self->hidraw_fd, 0) TH_LOG("open_hidraw"); +} + +/* + * A simple test to see if the fixture is working fine. + * If this fails, none of the other tests will pass. + */ +TEST_F(hidraw, test_create_uhid) +{ +} + +/* + * Inject one event in the uhid device, + * check that we get the same data through hidraw + */ +TEST_F(hidraw, raw_event) +{ + __u8 buf[10] = {0}; + int err; + + /* inject one event */ + buf[0] = 1; + buf[1] = 42; + uhid_send_event(_metadata, self->uhid_fd, buf, 6); + + /* read the data from hidraw */ + memset(buf, 0, sizeof(buf)); + err = read(self->hidraw_fd, buf, sizeof(buf)); + ASSERT_EQ(err, 6) TH_LOG("read_hidraw"); + ASSERT_EQ(buf[0], 1); + ASSERT_EQ(buf[1], 42); +} + +/* + * After initial opening/checks of hidraw, revoke the hidraw + * node and check that we can not read any more data. + */ +TEST_F(hidraw, raw_event_revoked) +{ + __u8 buf[10] = {0}; + int err; + + /* inject one event */ + buf[0] = 1; + buf[1] = 42; + uhid_send_event(_metadata, self->uhid_fd, buf, 6); + + /* read the data from hidraw */ + memset(buf, 0, sizeof(buf)); + err = read(self->hidraw_fd, buf, sizeof(buf)); + ASSERT_EQ(err, 6) TH_LOG("read_hidraw"); + ASSERT_EQ(buf[0], 1); + ASSERT_EQ(buf[1], 42); + + /* call the revoke ioctl */ + err = ioctl(self->hidraw_fd, HIDIOCREVOKE, NULL); + ASSERT_OK(err) TH_LOG("couldn't revoke the hidraw fd"); + + /* inject one other event */ + buf[0] = 1; + buf[1] = 43; + uhid_send_event(_metadata, self->uhid_fd, buf, 6); + + /* read the data from hidraw */ + memset(buf, 0, sizeof(buf)); + err = read(self->hidraw_fd, buf, sizeof(buf)); + ASSERT_EQ(err, -1) TH_LOG("read_hidraw"); + ASSERT_EQ(errno, ENODEV) TH_LOG("unexpected error code while reading the hidraw node: %d", + errno); +} + +/* + * Revoke the hidraw node and check that we can not do any ioctl. + */ +TEST_F(hidraw, ioctl_revoked) +{ + int err, desc_size = 0; + + /* call the revoke ioctl */ + err = ioctl(self->hidraw_fd, HIDIOCREVOKE, NULL); + ASSERT_OK(err) TH_LOG("couldn't revoke the hidraw fd"); + + /* do an ioctl */ + err = ioctl(self->hidraw_fd, HIDIOCGRDESCSIZE, &desc_size); + ASSERT_EQ(err, -1) TH_LOG("ioctl_hidraw"); + ASSERT_EQ(errno, ENODEV) TH_LOG("unexpected error code while doing an ioctl: %d", + errno); +} + +/* + * Setup polling of the fd, and check that revoke works properly. + */ +TEST_F(hidraw, poll_revoked) +{ + struct pollfd pfds[1]; + __u8 buf[10] = {0}; + int err, ready; + + /* setup polling */ + pfds[0].fd = self->hidraw_fd; + pfds[0].events = POLLIN; + + /* inject one event */ + buf[0] = 1; + buf[1] = 42; + uhid_send_event(_metadata, self->uhid_fd, buf, 6); + + while (true) { + ready = poll(pfds, 1, 5000); + ASSERT_EQ(ready, 1) TH_LOG("poll return value"); + + if (pfds[0].revents & POLLIN) { + memset(buf, 0, sizeof(buf)); + err = read(self->hidraw_fd, buf, sizeof(buf)); + ASSERT_EQ(err, 6) TH_LOG("read_hidraw"); + ASSERT_EQ(buf[0], 1); + ASSERT_EQ(buf[1], 42); + + /* call the revoke ioctl */ + err = ioctl(self->hidraw_fd, HIDIOCREVOKE, NULL); + ASSERT_OK(err) TH_LOG("couldn't revoke the hidraw fd"); + } else { + break; + } + } + + ASSERT_TRUE(pfds[0].revents & POLLHUP); +} + +/* + * After initial opening/checks of hidraw, revoke the hidraw + * node and check that we can not read any more data. + */ +TEST_F(hidraw, write_event_revoked) +{ + struct timespec time_to_wait; + __u8 buf[10] = {0}; + int err; + + /* inject one event from hidraw */ + buf[0] = 1; /* report ID */ + buf[1] = 2; + buf[2] = 42; + + pthread_mutex_lock(&uhid_output_mtx); + + memset(output_report, 0, sizeof(output_report)); + clock_gettime(CLOCK_REALTIME, &time_to_wait); + time_to_wait.tv_sec += 2; + + err = write(self->hidraw_fd, buf, 3); + ASSERT_EQ(err, 3) TH_LOG("unexpected error while writing to hidraw node: %d", err); + + err = pthread_cond_timedwait(&uhid_output_cond, &uhid_output_mtx, &time_to_wait); + ASSERT_OK(err) TH_LOG("error while calling waiting for the condition"); + + ASSERT_EQ(output_report[0], 1); + ASSERT_EQ(output_report[1], 2); + ASSERT_EQ(output_report[2], 42); + + /* call the revoke ioctl */ + err = ioctl(self->hidraw_fd, HIDIOCREVOKE, NULL); + ASSERT_OK(err) TH_LOG("couldn't revoke the hidraw fd"); + + /* inject one other event */ + buf[0] = 1; + buf[1] = 43; + err = write(self->hidraw_fd, buf, 3); + ASSERT_LT(err, 0) TH_LOG("unexpected success while writing to hidraw node: %d", err); + ASSERT_EQ(errno, ENODEV) TH_LOG("unexpected error code while writing to hidraw node: %d", + errno); + + pthread_mutex_unlock(&uhid_output_mtx); +} + +int main(int argc, char **argv) +{ + return test_harness_run(argc, argv); +} diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index 6343f4053bd4..4927b9add5ad 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -825,7 +825,7 @@ TEST_F(iommufd_ioas, copy_area) { struct iommu_ioas_copy copy_cmd = { .size = sizeof(copy_cmd), - .flags = IOMMU_IOAS_MAP_FIXED_IOVA, + .flags = IOMMU_IOAS_MAP_FIXED_IOVA | IOMMU_IOAS_MAP_WRITEABLE, .dst_ioas_id = self->ioas_id, .src_ioas_id = self->ioas_id, .length = PAGE_SIZE, @@ -1318,7 +1318,7 @@ TEST_F(iommufd_ioas, copy_sweep) { struct iommu_ioas_copy copy_cmd = { .size = sizeof(copy_cmd), - .flags = IOMMU_IOAS_MAP_FIXED_IOVA, + .flags = IOMMU_IOAS_MAP_FIXED_IOVA | IOMMU_IOAS_MAP_WRITEABLE, .src_ioas_id = self->ioas_id, .dst_iova = MOCK_APERTURE_START, .length = MOCK_PAGE_SIZE, @@ -1608,7 +1608,7 @@ TEST_F(iommufd_mock_domain, user_copy) }; struct iommu_ioas_copy copy_cmd = { .size = sizeof(copy_cmd), - .flags = IOMMU_IOAS_MAP_FIXED_IOVA, + .flags = IOMMU_IOAS_MAP_FIXED_IOVA | IOMMU_IOAS_MAP_WRITEABLE, .dst_ioas_id = self->ioas_id, .dst_iova = MOCK_APERTURE_START, .length = BUFFER_SIZE, diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index b8967b6e29d5..29fedf609611 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h @@ -61,6 +61,7 @@ #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) #endif +#if defined(__i386__) || defined(__x86_64__) /* arch */ /* * gcc cpuid.h provides __cpuid_count() since v4.4. * Clang/LLVM cpuid.h provides __cpuid_count() since v3.4.0. @@ -75,6 +76,7 @@ : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \ : "0" (level), "2" (count)) #endif +#endif /* end arch */ /* define kselftest exit codes */ #define KSFT_PASS 0 @@ -371,15 +373,7 @@ static inline __noreturn __printf(1, 2) void ksft_exit_fail_msg(const char *msg, static inline __noreturn void ksft_exit_fail_perror(const char *msg) { -#ifndef NOLIBC ksft_exit_fail_msg("%s: %s (%d)\n", msg, strerror(errno), errno); -#else - /* - * nolibc doesn't provide strerror() and it seems - * inappropriate to add one, just print the errno. - */ - ksft_exit_fail_msg("%s: %d)\n", msg, errno); -#endif } static inline __noreturn void ksft_exit_xfail(void) diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh index 74954f6a8f94..2c3c58e65a41 100644 --- a/tools/testing/selftests/kselftest/runner.sh +++ b/tools/testing/selftests/kselftest/runner.sh @@ -111,8 +111,11 @@ run_one() stdbuf="/usr/bin/stdbuf --output=L " fi eval kselftest_cmd_args="\$${kselftest_cmd_args_ref:-}" - cmd="$stdbuf ./$BASENAME_TEST $kselftest_cmd_args" - if [ ! -x "$TEST" ]; then + if [ -x "$TEST" ]; then + cmd="$stdbuf ./$BASENAME_TEST $kselftest_cmd_args" + elif [ -x "./ksft_runner.sh" ]; then + cmd="$stdbuf ./ksft_runner.sh ./$BASENAME_TEST" + else echo "# Warning: file $TEST is not executable" if [ $(head -n 1 "$TEST" | cut -c -2) = "#!" ] diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 40723a6a083f..a5a72415e37b 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -488,12 +488,6 @@ * Use once to append a main() to the test file. */ #define TEST_HARNESS_MAIN \ - static void __attribute__((constructor)) \ - __constructor_order_last(void) \ - { \ - if (!__constructor_order) \ - __constructor_order = _CONSTRUCTOR_ORDER_BACKWARD; \ - } \ int main(int argc, char **argv) { \ return test_harness_run(argc, argv); \ } @@ -824,7 +818,7 @@ item->prev = item; \ return; \ } \ - if (__constructor_order == _CONSTRUCTOR_ORDER_FORWARD) { \ + if (__constructor_order_forward) { \ item->next = NULL; \ item->prev = head->prev; \ item->prev->next = item; \ @@ -888,10 +882,7 @@ struct __test_xfail { } static struct __fixture_metadata *__fixture_list = &_fixture_global; -static int __constructor_order; - -#define _CONSTRUCTOR_ORDER_FORWARD 1 -#define _CONSTRUCTOR_ORDER_BACKWARD -1 +static bool __constructor_order_forward; static inline void __register_fixture(struct __fixture_metadata *f) { @@ -942,7 +933,7 @@ static inline bool __test_passed(struct __test_metadata *metadata) * list so tests are run in source declaration order. * https://gcc.gnu.org/onlinedocs/gccint/Initialization.html * However, it seems not all toolchains do this correctly, so use - * __constructor_order to detect which direction is called first + * __constructor_order_foward to detect which direction is called first * and adjust list building logic to get things running in the right * direction. */ @@ -1337,8 +1328,7 @@ static int test_harness_run(int argc, char **argv) static void __attribute__((constructor)) __constructor_order_first(void) { - if (!__constructor_order) - __constructor_order = _CONSTRUCTOR_ORDER_FORWARD; + __constructor_order_forward = true; } #endif /* __KSELFTEST_HARNESS_H */ diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 48d32c5aa3eb..0c4b254ab56b 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -152,6 +152,7 @@ TEST_GEN_PROGS_x86_64 += pre_fault_memory_test TEST_GEN_PROGS_EXTENDED_x86_64 += x86_64/nx_huge_pages_test TEST_GEN_PROGS_aarch64 += aarch64/aarch32_id_regs +TEST_GEN_PROGS_aarch64 += aarch64/arch_timer_edge_cases TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions TEST_GEN_PROGS_aarch64 += aarch64/hypercalls TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test @@ -163,6 +164,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/vgic_init TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq TEST_GEN_PROGS_aarch64 += aarch64/vgic_lpi_stress TEST_GEN_PROGS_aarch64 += aarch64/vpmu_counter_access +TEST_GEN_PROGS_aarch64 += aarch64/no-vgic-v3 TEST_GEN_PROGS_aarch64 += access_tracking_perf_test TEST_GEN_PROGS_aarch64 += arch_timer TEST_GEN_PROGS_aarch64 += demand_paging_test diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/aarch64/arch_timer_edge_cases.c new file mode 100644 index 000000000000..a36a7e2db434 --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/arch_timer_edge_cases.c @@ -0,0 +1,1062 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * arch_timer_edge_cases.c - Tests the aarch64 timer IRQ functionality. + * + * The test validates some edge cases related to the arch-timer: + * - timers above the max TVAL value. + * - timers in the past + * - moving counters ahead and behind pending timers. + * - reprograming timers. + * - timers fired multiple times. + * - masking/unmasking using the timer control mask. + * + * Copyright (c) 2021, Google LLC. + */ + +#define _GNU_SOURCE + +#include <pthread.h> +#include <sys/sysinfo.h> + +#include "arch_timer.h" +#include "gic.h" +#include "vgic.h" + +static const uint64_t CVAL_MAX = ~0ULL; +/* tval is a signed 32-bit int. */ +static const int32_t TVAL_MAX = INT32_MAX; +static const int32_t TVAL_MIN = INT32_MIN; + +/* After how much time we say there is no IRQ. */ +static const uint32_t TIMEOUT_NO_IRQ_US = 50000; + +/* A nice counter value to use as the starting one for most tests. */ +static const uint64_t DEF_CNT = (CVAL_MAX / 2); + +/* Number of runs. */ +static const uint32_t NR_TEST_ITERS_DEF = 5; + +/* Default wait test time in ms. */ +static const uint32_t WAIT_TEST_MS = 10; + +/* Default "long" wait test time in ms. */ +static const uint32_t LONG_WAIT_TEST_MS = 100; + +/* Shared with IRQ handler. */ +struct test_vcpu_shared_data { + atomic_t handled; + atomic_t spurious; +} shared_data; + +struct test_args { + /* Virtual or physical timer and counter tests. */ + enum arch_timer timer; + /* Delay used for most timer tests. */ + uint64_t wait_ms; + /* Delay used in the test_long_timer_delays test. */ + uint64_t long_wait_ms; + /* Number of iterations. */ + int iterations; + /* Whether to test the physical timer. */ + bool test_physical; + /* Whether to test the virtual timer. */ + bool test_virtual; +}; + +struct test_args test_args = { + .wait_ms = WAIT_TEST_MS, + .long_wait_ms = LONG_WAIT_TEST_MS, + .iterations = NR_TEST_ITERS_DEF, + .test_physical = true, + .test_virtual = true, +}; + +static int vtimer_irq, ptimer_irq; + +enum sync_cmd { + SET_COUNTER_VALUE, + USERSPACE_USLEEP, + USERSPACE_SCHED_YIELD, + USERSPACE_MIGRATE_SELF, + NO_USERSPACE_CMD, +}; + +typedef void (*sleep_method_t)(enum arch_timer timer, uint64_t usec); + +static void sleep_poll(enum arch_timer timer, uint64_t usec); +static void sleep_sched_poll(enum arch_timer timer, uint64_t usec); +static void sleep_in_userspace(enum arch_timer timer, uint64_t usec); +static void sleep_migrate(enum arch_timer timer, uint64_t usec); + +sleep_method_t sleep_method[] = { + sleep_poll, + sleep_sched_poll, + sleep_migrate, + sleep_in_userspace, +}; + +typedef void (*irq_wait_method_t)(void); + +static void wait_for_non_spurious_irq(void); +static void wait_poll_for_irq(void); +static void wait_sched_poll_for_irq(void); +static void wait_migrate_poll_for_irq(void); + +irq_wait_method_t irq_wait_method[] = { + wait_for_non_spurious_irq, + wait_poll_for_irq, + wait_sched_poll_for_irq, + wait_migrate_poll_for_irq, +}; + +enum timer_view { + TIMER_CVAL, + TIMER_TVAL, +}; + +static void assert_irqs_handled(uint32_t n) +{ + int h = atomic_read(&shared_data.handled); + + __GUEST_ASSERT(h == n, "Handled %d IRQS but expected %d", h, n); +} + +static void userspace_cmd(uint64_t cmd) +{ + GUEST_SYNC_ARGS(cmd, 0, 0, 0, 0); +} + +static void userspace_migrate_vcpu(void) +{ + userspace_cmd(USERSPACE_MIGRATE_SELF); +} + +static void userspace_sleep(uint64_t usecs) +{ + GUEST_SYNC_ARGS(USERSPACE_USLEEP, usecs, 0, 0, 0); +} + +static void set_counter(enum arch_timer timer, uint64_t counter) +{ + GUEST_SYNC_ARGS(SET_COUNTER_VALUE, counter, timer, 0, 0); +} + +static void guest_irq_handler(struct ex_regs *regs) +{ + unsigned int intid = gic_get_and_ack_irq(); + enum arch_timer timer; + uint64_t cnt, cval; + uint32_t ctl; + bool timer_condition, istatus; + + if (intid == IAR_SPURIOUS) { + atomic_inc(&shared_data.spurious); + goto out; + } + + if (intid == ptimer_irq) + timer = PHYSICAL; + else if (intid == vtimer_irq) + timer = VIRTUAL; + else + goto out; + + ctl = timer_get_ctl(timer); + cval = timer_get_cval(timer); + cnt = timer_get_cntct(timer); + timer_condition = cnt >= cval; + istatus = (ctl & CTL_ISTATUS) && (ctl & CTL_ENABLE); + GUEST_ASSERT_EQ(timer_condition, istatus); + + /* Disable and mask the timer. */ + timer_set_ctl(timer, CTL_IMASK); + + atomic_inc(&shared_data.handled); + +out: + gic_set_eoi(intid); +} + +static void set_cval_irq(enum arch_timer timer, uint64_t cval_cycles, + uint32_t ctl) +{ + atomic_set(&shared_data.handled, 0); + atomic_set(&shared_data.spurious, 0); + timer_set_cval(timer, cval_cycles); + timer_set_ctl(timer, ctl); +} + +static void set_tval_irq(enum arch_timer timer, uint64_t tval_cycles, + uint32_t ctl) +{ + atomic_set(&shared_data.handled, 0); + atomic_set(&shared_data.spurious, 0); + timer_set_ctl(timer, ctl); + timer_set_tval(timer, tval_cycles); +} + +static void set_xval_irq(enum arch_timer timer, uint64_t xval, uint32_t ctl, + enum timer_view tv) +{ + switch (tv) { + case TIMER_CVAL: + set_cval_irq(timer, xval, ctl); + break; + case TIMER_TVAL: + set_tval_irq(timer, xval, ctl); + break; + default: + GUEST_FAIL("Could not get timer %d", timer); + } +} + +/* + * Note that this can theoretically hang forever, so we rely on having + * a timeout mechanism in the "runner", like: + * tools/testing/selftests/kselftest/runner.sh. + */ +static void wait_for_non_spurious_irq(void) +{ + int h; + + local_irq_disable(); + + for (h = atomic_read(&shared_data.handled); h == atomic_read(&shared_data.handled);) { + wfi(); + local_irq_enable(); + isb(); /* handle IRQ */ + local_irq_disable(); + } +} + +/* + * Wait for an non-spurious IRQ by polling in the guest or in + * userspace (e.g. userspace_cmd=USERSPACE_SCHED_YIELD). + * + * Note that this can theoretically hang forever, so we rely on having + * a timeout mechanism in the "runner", like: + * tools/testing/selftests/kselftest/runner.sh. + */ +static void poll_for_non_spurious_irq(enum sync_cmd usp_cmd) +{ + int h; + + local_irq_disable(); + + h = atomic_read(&shared_data.handled); + + local_irq_enable(); + while (h == atomic_read(&shared_data.handled)) { + if (usp_cmd == NO_USERSPACE_CMD) + cpu_relax(); + else + userspace_cmd(usp_cmd); + } + local_irq_disable(); +} + +static void wait_poll_for_irq(void) +{ + poll_for_non_spurious_irq(NO_USERSPACE_CMD); +} + +static void wait_sched_poll_for_irq(void) +{ + poll_for_non_spurious_irq(USERSPACE_SCHED_YIELD); +} + +static void wait_migrate_poll_for_irq(void) +{ + poll_for_non_spurious_irq(USERSPACE_MIGRATE_SELF); +} + +/* + * Sleep for usec microseconds by polling in the guest or in + * userspace (e.g. userspace_cmd=USERSPACE_SCHEDULE). + */ +static void guest_poll(enum arch_timer test_timer, uint64_t usec, + enum sync_cmd usp_cmd) +{ + uint64_t cycles = usec_to_cycles(usec); + /* Whichever timer we are testing with, sleep with the other. */ + enum arch_timer sleep_timer = 1 - test_timer; + uint64_t start = timer_get_cntct(sleep_timer); + + while ((timer_get_cntct(sleep_timer) - start) < cycles) { + if (usp_cmd == NO_USERSPACE_CMD) + cpu_relax(); + else + userspace_cmd(usp_cmd); + } +} + +static void sleep_poll(enum arch_timer timer, uint64_t usec) +{ + guest_poll(timer, usec, NO_USERSPACE_CMD); +} + +static void sleep_sched_poll(enum arch_timer timer, uint64_t usec) +{ + guest_poll(timer, usec, USERSPACE_SCHED_YIELD); +} + +static void sleep_migrate(enum arch_timer timer, uint64_t usec) +{ + guest_poll(timer, usec, USERSPACE_MIGRATE_SELF); +} + +static void sleep_in_userspace(enum arch_timer timer, uint64_t usec) +{ + userspace_sleep(usec); +} + +/* + * Reset the timer state to some nice values like the counter not being close + * to the edge, and the control register masked and disabled. + */ +static void reset_timer_state(enum arch_timer timer, uint64_t cnt) +{ + set_counter(timer, cnt); + timer_set_ctl(timer, CTL_IMASK); +} + +static void test_timer_xval(enum arch_timer timer, uint64_t xval, + enum timer_view tv, irq_wait_method_t wm, bool reset_state, + uint64_t reset_cnt) +{ + local_irq_disable(); + + if (reset_state) + reset_timer_state(timer, reset_cnt); + + set_xval_irq(timer, xval, CTL_ENABLE, tv); + + /* This method re-enables IRQs to handle the one we're looking for. */ + wm(); + + assert_irqs_handled(1); + local_irq_enable(); +} + +/* + * The test_timer_* functions will program the timer, wait for it, and assert + * the firing of the correct IRQ. + * + * These functions don't have a timeout and return as soon as they receive an + * IRQ. They can hang (forever), so we rely on having a timeout mechanism in + * the "runner", like: tools/testing/selftests/kselftest/runner.sh. + */ + +static void test_timer_cval(enum arch_timer timer, uint64_t cval, + irq_wait_method_t wm, bool reset_state, + uint64_t reset_cnt) +{ + test_timer_xval(timer, cval, TIMER_CVAL, wm, reset_state, reset_cnt); +} + +static void test_timer_tval(enum arch_timer timer, int32_t tval, + irq_wait_method_t wm, bool reset_state, + uint64_t reset_cnt) +{ + test_timer_xval(timer, (uint64_t) tval, TIMER_TVAL, wm, reset_state, + reset_cnt); +} + +static void test_xval_check_no_irq(enum arch_timer timer, uint64_t xval, + uint64_t usec, enum timer_view timer_view, + sleep_method_t guest_sleep) +{ + local_irq_disable(); + + set_xval_irq(timer, xval, CTL_ENABLE | CTL_IMASK, timer_view); + guest_sleep(timer, usec); + + local_irq_enable(); + isb(); + + /* Assume success (no IRQ) after waiting usec microseconds */ + assert_irqs_handled(0); +} + +static void test_cval_no_irq(enum arch_timer timer, uint64_t cval, + uint64_t usec, sleep_method_t wm) +{ + test_xval_check_no_irq(timer, cval, usec, TIMER_CVAL, wm); +} + +static void test_tval_no_irq(enum arch_timer timer, int32_t tval, uint64_t usec, + sleep_method_t wm) +{ + /* tval will be cast to an int32_t in test_xval_check_no_irq */ + test_xval_check_no_irq(timer, (uint64_t) tval, usec, TIMER_TVAL, wm); +} + +/* Test masking/unmasking a timer using the timer mask (not the IRQ mask). */ +static void test_timer_control_mask_then_unmask(enum arch_timer timer) +{ + reset_timer_state(timer, DEF_CNT); + set_tval_irq(timer, -1, CTL_ENABLE | CTL_IMASK); + + /* Unmask the timer, and then get an IRQ. */ + local_irq_disable(); + timer_set_ctl(timer, CTL_ENABLE); + /* This method re-enables IRQs to handle the one we're looking for. */ + wait_for_non_spurious_irq(); + + assert_irqs_handled(1); + local_irq_enable(); +} + +/* Check that timer control masks actually mask a timer being fired. */ +static void test_timer_control_masks(enum arch_timer timer) +{ + reset_timer_state(timer, DEF_CNT); + + /* Local IRQs are not masked at this point. */ + + set_tval_irq(timer, -1, CTL_ENABLE | CTL_IMASK); + + /* Assume no IRQ after waiting TIMEOUT_NO_IRQ_US microseconds */ + sleep_poll(timer, TIMEOUT_NO_IRQ_US); + + assert_irqs_handled(0); + timer_set_ctl(timer, CTL_IMASK); +} + +static void test_fire_a_timer_multiple_times(enum arch_timer timer, + irq_wait_method_t wm, int num) +{ + int i; + + local_irq_disable(); + reset_timer_state(timer, DEF_CNT); + + set_tval_irq(timer, 0, CTL_ENABLE); + + for (i = 1; i <= num; i++) { + /* This method re-enables IRQs to handle the one we're looking for. */ + wm(); + + /* The IRQ handler masked and disabled the timer. + * Enable and unmmask it again. + */ + timer_set_ctl(timer, CTL_ENABLE); + + assert_irqs_handled(i); + } + + local_irq_enable(); +} + +static void test_timers_fired_multiple_times(enum arch_timer timer) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) + test_fire_a_timer_multiple_times(timer, irq_wait_method[i], 10); +} + +/* + * Set a timer for tval=delta_1_ms then reprogram it to + * tval=delta_2_ms. Check that we get the timer fired. There is no + * timeout for the wait: we use the wfi instruction. + */ +static void test_reprogramming_timer(enum arch_timer timer, irq_wait_method_t wm, + int32_t delta_1_ms, int32_t delta_2_ms) +{ + local_irq_disable(); + reset_timer_state(timer, DEF_CNT); + + /* Program the timer to DEF_CNT + delta_1_ms. */ + set_tval_irq(timer, msec_to_cycles(delta_1_ms), CTL_ENABLE); + + /* Reprogram the timer to DEF_CNT + delta_2_ms. */ + timer_set_tval(timer, msec_to_cycles(delta_2_ms)); + + /* This method re-enables IRQs to handle the one we're looking for. */ + wm(); + + /* The IRQ should arrive at DEF_CNT + delta_2_ms (or after). */ + GUEST_ASSERT(timer_get_cntct(timer) >= + DEF_CNT + msec_to_cycles(delta_2_ms)); + + local_irq_enable(); + assert_irqs_handled(1); +}; + +static void test_reprogram_timers(enum arch_timer timer) +{ + int i; + uint64_t base_wait = test_args.wait_ms; + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { + /* + * Ensure reprogramming works whether going from a + * longer time to a shorter or vice versa. + */ + test_reprogramming_timer(timer, irq_wait_method[i], 2 * base_wait, + base_wait); + test_reprogramming_timer(timer, irq_wait_method[i], base_wait, + 2 * base_wait); + } +} + +static void test_basic_functionality(enum arch_timer timer) +{ + int32_t tval = (int32_t) msec_to_cycles(test_args.wait_ms); + uint64_t cval = DEF_CNT + msec_to_cycles(test_args.wait_ms); + int i; + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { + irq_wait_method_t wm = irq_wait_method[i]; + + test_timer_cval(timer, cval, wm, true, DEF_CNT); + test_timer_tval(timer, tval, wm, true, DEF_CNT); + } +} + +/* + * This test checks basic timer behavior without actually firing timers, things + * like: the relationship between cval and tval, tval down-counting. + */ +static void timers_sanity_checks(enum arch_timer timer, bool use_sched) +{ + reset_timer_state(timer, DEF_CNT); + + local_irq_disable(); + + /* cval in the past */ + timer_set_cval(timer, + timer_get_cntct(timer) - + msec_to_cycles(test_args.wait_ms)); + if (use_sched) + userspace_migrate_vcpu(); + GUEST_ASSERT(timer_get_tval(timer) < 0); + + /* tval in the past */ + timer_set_tval(timer, -1); + if (use_sched) + userspace_migrate_vcpu(); + GUEST_ASSERT(timer_get_cval(timer) < timer_get_cntct(timer)); + + /* tval larger than TVAL_MAX. This requires programming with + * timer_set_cval instead so the value is expressible + */ + timer_set_cval(timer, + timer_get_cntct(timer) + TVAL_MAX + + msec_to_cycles(test_args.wait_ms)); + if (use_sched) + userspace_migrate_vcpu(); + GUEST_ASSERT(timer_get_tval(timer) <= 0); + + /* + * tval larger than 2 * TVAL_MAX. + * Twice the TVAL_MAX completely loops around the TVAL. + */ + timer_set_cval(timer, + timer_get_cntct(timer) + 2ULL * TVAL_MAX + + msec_to_cycles(test_args.wait_ms)); + if (use_sched) + userspace_migrate_vcpu(); + GUEST_ASSERT(timer_get_tval(timer) <= + msec_to_cycles(test_args.wait_ms)); + + /* negative tval that rollovers from 0. */ + set_counter(timer, msec_to_cycles(1)); + timer_set_tval(timer, -1 * msec_to_cycles(test_args.wait_ms)); + if (use_sched) + userspace_migrate_vcpu(); + GUEST_ASSERT(timer_get_cval(timer) >= (CVAL_MAX - msec_to_cycles(test_args.wait_ms))); + + /* tval should keep down-counting from 0 to -1. */ + timer_set_tval(timer, 0); + sleep_poll(timer, 1); + GUEST_ASSERT(timer_get_tval(timer) < 0); + + local_irq_enable(); + + /* Mask and disable any pending timer. */ + timer_set_ctl(timer, CTL_IMASK); +} + +static void test_timers_sanity_checks(enum arch_timer timer) +{ + timers_sanity_checks(timer, false); + /* Check how KVM saves/restores these edge-case values. */ + timers_sanity_checks(timer, true); +} + +static void test_set_cnt_after_tval_max(enum arch_timer timer, irq_wait_method_t wm) +{ + local_irq_disable(); + reset_timer_state(timer, DEF_CNT); + + set_cval_irq(timer, + (uint64_t) TVAL_MAX + + msec_to_cycles(test_args.wait_ms) / 2, CTL_ENABLE); + + set_counter(timer, TVAL_MAX); + + /* This method re-enables IRQs to handle the one we're looking for. */ + wm(); + + assert_irqs_handled(1); + local_irq_enable(); +} + +/* Test timers set for: cval = now + TVAL_MAX + wait_ms / 2 */ +static void test_timers_above_tval_max(enum arch_timer timer) +{ + uint64_t cval; + int i; + + /* + * Test that the system is not implementing cval in terms of + * tval. If that was the case, setting a cval to "cval = now + * + TVAL_MAX + wait_ms" would wrap to "cval = now + + * wait_ms", and the timer would fire immediately. Test that it + * doesn't. + */ + for (i = 0; i < ARRAY_SIZE(sleep_method); i++) { + reset_timer_state(timer, DEF_CNT); + cval = timer_get_cntct(timer) + TVAL_MAX + + msec_to_cycles(test_args.wait_ms); + test_cval_no_irq(timer, cval, + msecs_to_usecs(test_args.wait_ms) + + TIMEOUT_NO_IRQ_US, sleep_method[i]); + } + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { + /* Get the IRQ by moving the counter forward. */ + test_set_cnt_after_tval_max(timer, irq_wait_method[i]); + } +} + +/* + * Template function to be used by the test_move_counter_ahead_* tests. It + * sets the counter to cnt_1, the [c|t]val, the counter to cnt_2, and + * then waits for an IRQ. + */ +static void test_set_cnt_after_xval(enum arch_timer timer, uint64_t cnt_1, + uint64_t xval, uint64_t cnt_2, + irq_wait_method_t wm, enum timer_view tv) +{ + local_irq_disable(); + + set_counter(timer, cnt_1); + timer_set_ctl(timer, CTL_IMASK); + + set_xval_irq(timer, xval, CTL_ENABLE, tv); + set_counter(timer, cnt_2); + /* This method re-enables IRQs to handle the one we're looking for. */ + wm(); + + assert_irqs_handled(1); + local_irq_enable(); +} + +/* + * Template function to be used by the test_move_counter_ahead_* tests. It + * sets the counter to cnt_1, the [c|t]val, the counter to cnt_2, and + * then waits for an IRQ. + */ +static void test_set_cnt_after_xval_no_irq(enum arch_timer timer, + uint64_t cnt_1, uint64_t xval, + uint64_t cnt_2, + sleep_method_t guest_sleep, + enum timer_view tv) +{ + local_irq_disable(); + + set_counter(timer, cnt_1); + timer_set_ctl(timer, CTL_IMASK); + + set_xval_irq(timer, xval, CTL_ENABLE, tv); + set_counter(timer, cnt_2); + guest_sleep(timer, TIMEOUT_NO_IRQ_US); + + local_irq_enable(); + isb(); + + /* Assume no IRQ after waiting TIMEOUT_NO_IRQ_US microseconds */ + assert_irqs_handled(0); + timer_set_ctl(timer, CTL_IMASK); +} + +static void test_set_cnt_after_tval(enum arch_timer timer, uint64_t cnt_1, + int32_t tval, uint64_t cnt_2, + irq_wait_method_t wm) +{ + test_set_cnt_after_xval(timer, cnt_1, tval, cnt_2, wm, TIMER_TVAL); +} + +static void test_set_cnt_after_cval(enum arch_timer timer, uint64_t cnt_1, + uint64_t cval, uint64_t cnt_2, + irq_wait_method_t wm) +{ + test_set_cnt_after_xval(timer, cnt_1, cval, cnt_2, wm, TIMER_CVAL); +} + +static void test_set_cnt_after_tval_no_irq(enum arch_timer timer, + uint64_t cnt_1, int32_t tval, + uint64_t cnt_2, sleep_method_t wm) +{ + test_set_cnt_after_xval_no_irq(timer, cnt_1, tval, cnt_2, wm, + TIMER_TVAL); +} + +static void test_set_cnt_after_cval_no_irq(enum arch_timer timer, + uint64_t cnt_1, uint64_t cval, + uint64_t cnt_2, sleep_method_t wm) +{ + test_set_cnt_after_xval_no_irq(timer, cnt_1, cval, cnt_2, wm, + TIMER_CVAL); +} + +/* Set a timer and then move the counter ahead of it. */ +static void test_move_counters_ahead_of_timers(enum arch_timer timer) +{ + int i; + int32_t tval; + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { + irq_wait_method_t wm = irq_wait_method[i]; + + test_set_cnt_after_cval(timer, 0, DEF_CNT, DEF_CNT + 1, wm); + test_set_cnt_after_cval(timer, CVAL_MAX, 1, 2, wm); + + /* Move counter ahead of negative tval. */ + test_set_cnt_after_tval(timer, 0, -1, DEF_CNT + 1, wm); + test_set_cnt_after_tval(timer, 0, -1, TVAL_MAX, wm); + tval = TVAL_MAX; + test_set_cnt_after_tval(timer, 0, tval, (uint64_t) tval + 1, + wm); + } + + for (i = 0; i < ARRAY_SIZE(sleep_method); i++) { + sleep_method_t sm = sleep_method[i]; + + test_set_cnt_after_cval_no_irq(timer, 0, DEF_CNT, CVAL_MAX, sm); + } +} + +/* + * Program a timer, mask it, and then change the tval or counter to cancel it. + * Unmask it and check that nothing fires. + */ +static void test_move_counters_behind_timers(enum arch_timer timer) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(sleep_method); i++) { + sleep_method_t sm = sleep_method[i]; + + test_set_cnt_after_cval_no_irq(timer, DEF_CNT, DEF_CNT - 1, 0, + sm); + test_set_cnt_after_tval_no_irq(timer, DEF_CNT, -1, 0, sm); + } +} + +static void test_timers_in_the_past(enum arch_timer timer) +{ + int32_t tval = -1 * (int32_t) msec_to_cycles(test_args.wait_ms); + uint64_t cval; + int i; + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { + irq_wait_method_t wm = irq_wait_method[i]; + + /* set a timer wait_ms the past. */ + cval = DEF_CNT - msec_to_cycles(test_args.wait_ms); + test_timer_cval(timer, cval, wm, true, DEF_CNT); + test_timer_tval(timer, tval, wm, true, DEF_CNT); + + /* Set a timer to counter=0 (in the past) */ + test_timer_cval(timer, 0, wm, true, DEF_CNT); + + /* Set a time for tval=0 (now) */ + test_timer_tval(timer, 0, wm, true, DEF_CNT); + + /* Set a timer to as far in the past as possible */ + test_timer_tval(timer, TVAL_MIN, wm, true, DEF_CNT); + } + + /* + * Set the counter to wait_ms, and a tval to -wait_ms. There should be no + * IRQ as that tval means cval=CVAL_MAX-wait_ms. + */ + for (i = 0; i < ARRAY_SIZE(sleep_method); i++) { + sleep_method_t sm = sleep_method[i]; + + set_counter(timer, msec_to_cycles(test_args.wait_ms)); + test_tval_no_irq(timer, tval, TIMEOUT_NO_IRQ_US, sm); + } +} + +static void test_long_timer_delays(enum arch_timer timer) +{ + int32_t tval = (int32_t) msec_to_cycles(test_args.long_wait_ms); + uint64_t cval = DEF_CNT + msec_to_cycles(test_args.long_wait_ms); + int i; + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { + irq_wait_method_t wm = irq_wait_method[i]; + + test_timer_cval(timer, cval, wm, true, DEF_CNT); + test_timer_tval(timer, tval, wm, true, DEF_CNT); + } +} + +static void guest_run_iteration(enum arch_timer timer) +{ + test_basic_functionality(timer); + test_timers_sanity_checks(timer); + + test_timers_above_tval_max(timer); + test_timers_in_the_past(timer); + + test_move_counters_ahead_of_timers(timer); + test_move_counters_behind_timers(timer); + test_reprogram_timers(timer); + + test_timers_fired_multiple_times(timer); + + test_timer_control_mask_then_unmask(timer); + test_timer_control_masks(timer); +} + +static void guest_code(enum arch_timer timer) +{ + int i; + + local_irq_disable(); + + gic_init(GIC_V3, 1); + + timer_set_ctl(VIRTUAL, CTL_IMASK); + timer_set_ctl(PHYSICAL, CTL_IMASK); + + gic_irq_enable(vtimer_irq); + gic_irq_enable(ptimer_irq); + local_irq_enable(); + + for (i = 0; i < test_args.iterations; i++) { + GUEST_SYNC(i); + guest_run_iteration(timer); + } + + test_long_timer_delays(timer); + GUEST_DONE(); +} + +static uint32_t next_pcpu(void) +{ + uint32_t max = get_nprocs(); + uint32_t cur = sched_getcpu(); + uint32_t next = cur; + cpu_set_t cpuset; + + TEST_ASSERT(max > 1, "Need at least two physical cpus"); + + sched_getaffinity(0, sizeof(cpuset), &cpuset); + + do { + next = (next + 1) % CPU_SETSIZE; + } while (!CPU_ISSET(next, &cpuset)); + + return next; +} + +static void migrate_self(uint32_t new_pcpu) +{ + int ret; + cpu_set_t cpuset; + pthread_t thread; + + thread = pthread_self(); + + CPU_ZERO(&cpuset); + CPU_SET(new_pcpu, &cpuset); + + pr_debug("Migrating from %u to %u\n", sched_getcpu(), new_pcpu); + + ret = pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset); + + TEST_ASSERT(ret == 0, "Failed to migrate to pCPU: %u; ret: %d\n", + new_pcpu, ret); +} + +static void kvm_set_cntxct(struct kvm_vcpu *vcpu, uint64_t cnt, + enum arch_timer timer) +{ + if (timer == PHYSICAL) + vcpu_set_reg(vcpu, KVM_REG_ARM_PTIMER_CNT, cnt); + else + vcpu_set_reg(vcpu, KVM_REG_ARM_TIMER_CNT, cnt); +} + +static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc) +{ + enum sync_cmd cmd = uc->args[1]; + uint64_t val = uc->args[2]; + enum arch_timer timer = uc->args[3]; + + switch (cmd) { + case SET_COUNTER_VALUE: + kvm_set_cntxct(vcpu, val, timer); + break; + case USERSPACE_USLEEP: + usleep(val); + break; + case USERSPACE_SCHED_YIELD: + sched_yield(); + break; + case USERSPACE_MIGRATE_SELF: + migrate_self(next_pcpu()); + break; + default: + break; + } +} + +static void test_run(struct kvm_vm *vm, struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + /* Start on CPU 0 */ + migrate_self(0); + + while (true) { + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + handle_sync(vcpu, &uc); + break; + case UCALL_DONE: + goto out; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + goto out; + default: + TEST_FAIL("Unexpected guest exit\n"); + } + } + + out: + return; +} + +static void test_init_timer_irq(struct kvm_vm *vm, struct kvm_vcpu *vcpu) +{ + vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL, + KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq); + vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL, + KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq); + + sync_global_to_guest(vm, ptimer_irq); + sync_global_to_guest(vm, vtimer_irq); + + pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq); +} + +static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu, + enum arch_timer timer) +{ + *vm = vm_create_with_one_vcpu(vcpu, guest_code); + TEST_ASSERT(*vm, "Failed to create the test VM\n"); + + vm_init_descriptor_tables(*vm); + vm_install_exception_handler(*vm, VECTOR_IRQ_CURRENT, + guest_irq_handler); + + vcpu_init_descriptor_tables(*vcpu); + vcpu_args_set(*vcpu, 1, timer); + + test_init_timer_irq(*vm, *vcpu); + vgic_v3_setup(*vm, 1, 64); + sync_global_to_guest(*vm, test_args); +} + +static void test_print_help(char *name) +{ + pr_info("Usage: %s [-h] [-b] [-i iterations] [-l long_wait_ms] [-p] [-v]\n" + , name); + pr_info("\t-i: Number of iterations (default: %u)\n", + NR_TEST_ITERS_DEF); + pr_info("\t-b: Test both physical and virtual timers (default: true)\n"); + pr_info("\t-l: Delta (in ms) used for long wait time test (default: %u)\n", + LONG_WAIT_TEST_MS); + pr_info("\t-l: Delta (in ms) used for wait times (default: %u)\n", + WAIT_TEST_MS); + pr_info("\t-p: Test physical timer (default: true)\n"); + pr_info("\t-v: Test virtual timer (default: true)\n"); + pr_info("\t-h: Print this help message\n"); +} + +static bool parse_args(int argc, char *argv[]) +{ + int opt; + + while ((opt = getopt(argc, argv, "bhi:l:pvw:")) != -1) { + switch (opt) { + case 'b': + test_args.test_physical = true; + test_args.test_virtual = true; + break; + case 'i': + test_args.iterations = + atoi_positive("Number of iterations", optarg); + break; + case 'l': + test_args.long_wait_ms = + atoi_positive("Long wait time", optarg); + break; + case 'p': + test_args.test_physical = true; + test_args.test_virtual = false; + break; + case 'v': + test_args.test_virtual = true; + test_args.test_physical = false; + break; + case 'w': + test_args.wait_ms = atoi_positive("Wait time", optarg); + break; + case 'h': + default: + goto err; + } + } + + return true; + + err: + test_print_help(argv[0]); + return false; +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + /* Tell stdout not to buffer its content */ + setbuf(stdout, NULL); + + if (!parse_args(argc, argv)) + exit(KSFT_SKIP); + + if (test_args.test_virtual) { + test_vm_create(&vm, &vcpu, VIRTUAL); + test_run(vm, vcpu); + kvm_vm_free(vm); + } + + if (test_args.test_physical) { + test_vm_create(&vm, &vcpu, PHYSICAL); + test_run(vm, vcpu); + kvm_vm_free(vm); + } + + return 0; +} diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c index 4abebde78187..d43fb3f49050 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -40,6 +40,18 @@ static struct feature_id_reg feat_id_regs[] = { ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ 8, 1 + }, + { + ARM64_SYS_REG(3, 0, 10, 2, 4), /* POR_EL1 */ + ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ + 16, + 1 + }, + { + ARM64_SYS_REG(3, 3, 10, 2, 4), /* POR_EL0 */ + ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ + 16, + 1 } }; @@ -468,6 +480,7 @@ static __u64 base_regs[] = { ARM64_SYS_REG(3, 0, 10, 2, 0), /* MAIR_EL1 */ ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */ ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */ + ARM64_SYS_REG(3, 0, 10, 2, 4), /* POR_EL1 */ ARM64_SYS_REG(3, 0, 10, 3, 0), /* AMAIR_EL1 */ ARM64_SYS_REG(3, 0, 12, 0, 0), /* VBAR_EL1 */ ARM64_SYS_REG(3, 0, 12, 1, 1), /* DISR_EL1 */ @@ -475,6 +488,7 @@ static __u64 base_regs[] = { ARM64_SYS_REG(3, 0, 13, 0, 4), /* TPIDR_EL1 */ ARM64_SYS_REG(3, 0, 14, 1, 0), /* CNTKCTL_EL1 */ ARM64_SYS_REG(3, 2, 0, 0, 0), /* CSSELR_EL1 */ + ARM64_SYS_REG(3, 3, 10, 2, 4), /* POR_EL0 */ ARM64_SYS_REG(3, 3, 13, 0, 2), /* TPIDR_EL0 */ ARM64_SYS_REG(3, 3, 13, 0, 3), /* TPIDRRO_EL0 */ ARM64_SYS_REG(3, 3, 14, 0, 1), /* CNTPCT_EL0 */ diff --git a/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c b/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c new file mode 100644 index 000000000000..943d65fc6b0b --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Check that, on a GICv3 system, not configuring GICv3 correctly +// results in all of the sysregs generating an UNDEF exception. + +#include <test_util.h> +#include <kvm_util.h> +#include <processor.h> + +static volatile bool handled; + +#define __check_sr_read(r) \ + ({ \ + uint64_t val; \ + \ + handled = false; \ + dsb(sy); \ + val = read_sysreg_s(SYS_ ## r); \ + val; \ + }) + +#define __check_sr_write(r) \ + do { \ + handled = false; \ + dsb(sy); \ + write_sysreg_s(0, SYS_ ## r); \ + isb(); \ + } while(0) + +/* Fatal checks */ +#define check_sr_read(r) \ + do { \ + __check_sr_read(r); \ + __GUEST_ASSERT(handled, #r " no read trap"); \ + } while(0) + +#define check_sr_write(r) \ + do { \ + __check_sr_write(r); \ + __GUEST_ASSERT(handled, #r " no write trap"); \ + } while(0) + +#define check_sr_rw(r) \ + do { \ + check_sr_read(r); \ + check_sr_write(r); \ + } while(0) + +static void guest_code(void) +{ + uint64_t val; + + /* + * Check that we advertise that ID_AA64PFR0_EL1.GIC == 0, having + * hidden the feature at runtime without any other userspace action. + */ + __GUEST_ASSERT(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), + read_sysreg(id_aa64pfr0_el1)) == 0, + "GICv3 wrongly advertised"); + + /* + * Access all GICv3 registers, and fail if we don't get an UNDEF. + * Note that we happily access all the APxRn registers without + * checking their existance, as all we want to see is a failure. + */ + check_sr_rw(ICC_PMR_EL1); + check_sr_read(ICC_IAR0_EL1); + check_sr_write(ICC_EOIR0_EL1); + check_sr_rw(ICC_HPPIR0_EL1); + check_sr_rw(ICC_BPR0_EL1); + check_sr_rw(ICC_AP0R0_EL1); + check_sr_rw(ICC_AP0R1_EL1); + check_sr_rw(ICC_AP0R2_EL1); + check_sr_rw(ICC_AP0R3_EL1); + check_sr_rw(ICC_AP1R0_EL1); + check_sr_rw(ICC_AP1R1_EL1); + check_sr_rw(ICC_AP1R2_EL1); + check_sr_rw(ICC_AP1R3_EL1); + check_sr_write(ICC_DIR_EL1); + check_sr_read(ICC_RPR_EL1); + check_sr_write(ICC_SGI1R_EL1); + check_sr_write(ICC_ASGI1R_EL1); + check_sr_write(ICC_SGI0R_EL1); + check_sr_read(ICC_IAR1_EL1); + check_sr_write(ICC_EOIR1_EL1); + check_sr_rw(ICC_HPPIR1_EL1); + check_sr_rw(ICC_BPR1_EL1); + check_sr_rw(ICC_CTLR_EL1); + check_sr_rw(ICC_IGRPEN0_EL1); + check_sr_rw(ICC_IGRPEN1_EL1); + + /* + * ICC_SRE_EL1 may not be trappable, as ICC_SRE_EL2.Enable can + * be RAO/WI. Engage in non-fatal accesses, starting with a + * write of 0 to try and disable SRE, and let's see if it + * sticks. + */ + __check_sr_write(ICC_SRE_EL1); + if (!handled) + GUEST_PRINTF("ICC_SRE_EL1 write not trapping (OK)\n"); + + val = __check_sr_read(ICC_SRE_EL1); + if (!handled) { + __GUEST_ASSERT((val & BIT(0)), + "ICC_SRE_EL1 not trapped but ICC_SRE_EL1.SRE not set\n"); + GUEST_PRINTF("ICC_SRE_EL1 read not trapping (OK)\n"); + } + + GUEST_DONE(); +} + +static void guest_undef_handler(struct ex_regs *regs) +{ + /* Success, we've gracefully exploded! */ + handled = true; + regs->pc += 4; +} + +static void test_run_vcpu(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + do { + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_PRINTF: + printf("%s", uc.buffer); + break; + case UCALL_DONE: + break; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } while (uc.cmd != UCALL_DONE); +} + +static void test_guest_no_gicv3(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + /* Create a VM without a GICv3 */ + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vcpu); + + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + ESR_EC_UNKNOWN, guest_undef_handler); + + test_run_vcpu(vcpu); + + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + uint64_t pfr0; + + vm = vm_create_with_one_vcpu(&vcpu, NULL); + vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &pfr0); + __TEST_REQUIRE(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), pfr0), + "GICv3 not supported."); + kvm_vm_free(vm); + + test_guest_no_gicv3(); + + return 0; +} diff --git a/tools/testing/selftests/kvm/aarch64/set_id_regs.c b/tools/testing/selftests/kvm/aarch64/set_id_regs.c index d20981663831..2a3fe7914b72 100644 --- a/tools/testing/selftests/kvm/aarch64/set_id_regs.c +++ b/tools/testing/selftests/kvm/aarch64/set_id_regs.c @@ -126,6 +126,7 @@ static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = { REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV2, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, DIT, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, SEL2, 0), + REG_FTR_BITS(FTR_EXACT, ID_AA64PFR0_EL1, GIC, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 0), diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c index a51dbd2a5f84..f4ac28d53747 100644 --- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c +++ b/tools/testing/selftests/kvm/aarch64/vgic_irq.c @@ -269,13 +269,12 @@ static void guest_inject(struct test_args *args, KVM_INJECT_MULTI(cmd, first_intid, num); while (irq_handled < num) { - asm volatile("wfi\n" - "msr daifclr, #2\n" - /* handle IRQ */ - "msr daifset, #2\n" - : : : "memory"); + wfi(); + local_irq_enable(); + isb(); /* handle IRQ */ + local_irq_disable(); } - asm volatile("msr daifclr, #2" : : : "memory"); + local_irq_enable(); GUEST_ASSERT_EQ(irq_handled, num); for (i = first_intid; i < num + first_intid; i++) diff --git a/tools/testing/selftests/kvm/include/aarch64/arch_timer.h b/tools/testing/selftests/kvm/include/aarch64/arch_timer.h index b3e97525cb55..bf461de34785 100644 --- a/tools/testing/selftests/kvm/include/aarch64/arch_timer.h +++ b/tools/testing/selftests/kvm/include/aarch64/arch_timer.h @@ -79,7 +79,7 @@ static inline uint64_t timer_get_cval(enum arch_timer timer) return 0; } -static inline void timer_set_tval(enum arch_timer timer, uint32_t tval) +static inline void timer_set_tval(enum arch_timer timer, int32_t tval) { switch (timer) { case VIRTUAL: @@ -95,6 +95,22 @@ static inline void timer_set_tval(enum arch_timer timer, uint32_t tval) isb(); } +static inline int32_t timer_get_tval(enum arch_timer timer) +{ + isb(); + switch (timer) { + case VIRTUAL: + return read_sysreg(cntv_tval_el0); + case PHYSICAL: + return read_sysreg(cntp_tval_el0); + default: + GUEST_FAIL("Could not get timer %d\n", timer); + } + + /* We should not reach here */ + return 0; +} + static inline void timer_set_ctl(enum arch_timer timer, uint32_t ctl) { switch (timer) { diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h index 9b20a355d81a..de977d131082 100644 --- a/tools/testing/selftests/kvm/include/aarch64/processor.h +++ b/tools/testing/selftests/kvm/include/aarch64/processor.h @@ -243,4 +243,7 @@ void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, uint64_t arg6, struct arm_smccc_res *res); +/* Execute a Wait For Interrupt instruction. */ +void wfi(void); + #endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 0ac7cc89f38c..fe4dc3693112 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -639,3 +639,9 @@ void vm_vaddr_populate_bitmap(struct kvm_vm *vm) sparsebit_set_num(vm->vpages_valid, 0, (1ULL << vm->va_bits) >> vm->page_shift); } + +/* Helper to call wfi instruction. */ +void wfi(void) +{ + asm volatile("wfi"); +} diff --git a/tools/testing/selftests/lib/Makefile b/tools/testing/selftests/lib/Makefile index ee71fc99d5b5..c52fe3ad8e98 100644 --- a/tools/testing/selftests/lib/Makefile +++ b/tools/testing/selftests/lib/Makefile @@ -4,6 +4,5 @@ # No binaries, but make sure arg-less "make" doesn't trigger "run_tests" all: -TEST_PROGS := printf.sh bitmap.sh prime_numbers.sh scanf.sh strscpy.sh - +TEST_PROGS := printf.sh bitmap.sh prime_numbers.sh scanf.sh include ../lib.mk diff --git a/tools/testing/selftests/lib/config b/tools/testing/selftests/lib/config index 645839b50b0a..dc15aba8d0a3 100644 --- a/tools/testing/selftests/lib/config +++ b/tools/testing/selftests/lib/config @@ -2,5 +2,4 @@ CONFIG_TEST_PRINTF=m CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_PRIME_NUMBERS=m -CONFIG_TEST_STRSCPY=m CONFIG_TEST_BITOPS=m diff --git a/tools/testing/selftests/lib/strscpy.sh b/tools/testing/selftests/lib/strscpy.sh deleted file mode 100755 index be60ef6e1a7f..000000000000 --- a/tools/testing/selftests/lib/strscpy.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0+ -$(dirname $0)/../kselftest/module.sh "strscpy*" test_strscpy diff --git a/tools/testing/selftests/livepatch/test-livepatch.sh b/tools/testing/selftests/livepatch/test-livepatch.sh index 65c9c058458d..bd13257bfdfe 100755 --- a/tools/testing/selftests/livepatch/test-livepatch.sh +++ b/tools/testing/selftests/livepatch/test-livepatch.sh @@ -139,11 +139,8 @@ load_lp $MOD_REPLACE replace=1 grep 'live patched' /proc/cmdline > /dev/kmsg grep 'live patched' /proc/meminfo > /dev/kmsg -mods=(/sys/kernel/livepatch/*) -nmods=${#mods[@]} -if [ "$nmods" -ne 1 ]; then - die "Expecting only one moduled listed, found $nmods" -fi +loop_until 'mods=(/sys/kernel/livepatch/*); nmods=${#mods[@]}; [[ "$nmods" -eq 1 ]]' || + die "Expecting only one moduled listed, found $nmods" # These modules were disabled by the atomic replace for mod in $MOD_LIVEPATCH3 $MOD_LIVEPATCH2 $MOD_LIVEPATCH1; do diff --git a/tools/testing/selftests/lsm/lsm_list_modules_test.c b/tools/testing/selftests/lsm/lsm_list_modules_test.c index 06d24d4679a6..1cc8a977c711 100644 --- a/tools/testing/selftests/lsm/lsm_list_modules_test.c +++ b/tools/testing/selftests/lsm/lsm_list_modules_test.c @@ -128,6 +128,9 @@ TEST(correct_lsm_list_modules) case LSM_ID_EVM: name = "evm"; break; + case LSM_ID_IPE: + name = "ipe"; + break; default: name = "INVALID"; break; diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index cfad627e8d94..02e1204971b0 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -90,6 +90,7 @@ CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_64bit_pr CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_program.c -no-pie) VMTARGETS := protection_keys +VMTARGETS += pkey_sighandler_tests BINARIES_32 := $(VMTARGETS:%=%_32) BINARIES_64 := $(VMTARGETS:%=%_64) @@ -106,13 +107,13 @@ TEST_GEN_FILES += $(BINARIES_64) endif else -ifneq (,$(findstring $(ARCH),powerpc)) +ifneq (,$(filter $(ARCH),arm64 powerpc)) TEST_GEN_FILES += protection_keys endif endif -ifneq (,$(filter $(ARCH),arm64 ia64 mips64 parisc64 powerpc riscv64 s390x sparc64 x86_64 s390)) +ifneq (,$(filter $(ARCH),arm64 mips64 parisc64 powerpc riscv64 s390x sparc64 x86_64 s390)) TEST_GEN_FILES += va_high_addr_switch TEST_GEN_FILES += virtual_address_range TEST_GEN_FILES += write_to_hugetlbfs diff --git a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh index d680c00d2853..67df7b47087f 100755 --- a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh +++ b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh @@ -254,7 +254,7 @@ function cleanup_hugetlb_memory() { local cgroup="$1" if [[ "$(pgrep -f write_to_hugetlbfs)" != "" ]]; then echo killing write_to_hugetlbfs - killall -2 write_to_hugetlbfs + killall -2 --wait write_to_hugetlbfs wait_for_hugetlb_memory_to_get_depleted $cgroup fi set -e diff --git a/tools/testing/selftests/mm/hugepage-mmap.c b/tools/testing/selftests/mm/hugepage-mmap.c index 267eea2e0e0b..3b1b532f1cbb 100644 --- a/tools/testing/selftests/mm/hugepage-mmap.c +++ b/tools/testing/selftests/mm/hugepage-mmap.c @@ -8,13 +8,6 @@ * like /mnt) using the command mount -t hugetlbfs nodev /mnt. In this * example, the app is requesting memory of size 256MB that is backed by * huge pages. - * - * For the ia64 architecture, the Linux kernel reserves Region number 4 for - * huge pages. That means that if one requires a fixed address, a huge page - * aligned address starting with 0x800000... will be required. If a fixed - * address is not required, the kernel will select an address in the proper - * range. - * Other architectures, such as ppc64, i386 or x86_64 are not so constrained. */ #define _GNU_SOURCE #include <stdlib.h> @@ -27,15 +20,6 @@ #define LENGTH (256UL*1024*1024) #define PROTECTION (PROT_READ | PROT_WRITE) -/* Only ia64 requires this */ -#ifdef __ia64__ -#define ADDR (void *)(0x8000000000000000UL) -#define FLAGS (MAP_SHARED | MAP_FIXED) -#else -#define ADDR (void *)(0x0UL) -#define FLAGS (MAP_SHARED) -#endif - static void check_bytes(char *addr) { ksft_print_msg("First hex is %x\n", *((unsigned int *)addr)); @@ -74,7 +58,7 @@ int main(void) if (fd < 0) ksft_exit_fail_msg("memfd_create() failed: %s\n", strerror(errno)); - addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, fd, 0); + addr = mmap(NULL, LENGTH, PROTECTION, MAP_SHARED, fd, 0); if (addr == MAP_FAILED) { close(fd); ksft_exit_fail_msg("mmap(): %s\n", strerror(errno)); diff --git a/tools/testing/selftests/mm/hugepage-shm.c b/tools/testing/selftests/mm/hugepage-shm.c index 478bb1e989e9..ef06260802b5 100644 --- a/tools/testing/selftests/mm/hugepage-shm.c +++ b/tools/testing/selftests/mm/hugepage-shm.c @@ -8,13 +8,6 @@ * SHM_HUGETLB in the shmget system call to inform the kernel that it is * requesting huge pages. * - * For the ia64 architecture, the Linux kernel reserves Region number 4 for - * huge pages. That means that if one requires a fixed address, a huge page - * aligned address starting with 0x800000... will be required. If a fixed - * address is not required, the kernel will select an address in the proper - * range. - * Other architectures, such as ppc64, i386 or x86_64 are not so constrained. - * * Note: The default shared memory limit is quite low on many kernels, * you may need to increase it via: * @@ -39,15 +32,6 @@ #define dprintf(x) printf(x) -/* Only ia64 requires this */ -#ifdef __ia64__ -#define ADDR (void *)(0x8000000000000000UL) -#define SHMAT_FLAGS (SHM_RND) -#else -#define ADDR (void *)(0x0UL) -#define SHMAT_FLAGS (0) -#endif - int main(void) { int shmid; @@ -61,7 +45,7 @@ int main(void) } printf("shmid: 0x%x\n", shmid); - shmaddr = shmat(shmid, ADDR, SHMAT_FLAGS); + shmaddr = shmat(shmid, NULL, 0); if (shmaddr == (char *)-1) { perror("Shared memory attach failure"); shmctl(shmid, IPC_RMID, NULL); diff --git a/tools/testing/selftests/mm/hugepage-vmemmap.c b/tools/testing/selftests/mm/hugepage-vmemmap.c index 894d28c3dd47..df366a4d1b92 100644 --- a/tools/testing/selftests/mm/hugepage-vmemmap.c +++ b/tools/testing/selftests/mm/hugepage-vmemmap.c @@ -22,20 +22,6 @@ #define PM_PFRAME_BITS 55 #define PM_PFRAME_MASK ~((1UL << PM_PFRAME_BITS) - 1) -/* - * For ia64 architecture, Linux kernel reserves Region number 4 for hugepages. - * That means the addresses starting with 0x800000... will need to be - * specified. Specifying a fixed address is not required on ppc64, i386 - * or x86_64. - */ -#ifdef __ia64__ -#define MAP_ADDR (void *)(0x8000000000000000UL) -#define MAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_FIXED) -#else -#define MAP_ADDR NULL -#define MAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB) -#endif - static size_t pagesize; static size_t maplength; @@ -113,7 +99,8 @@ int main(int argc, char **argv) exit(1); } - addr = mmap(MAP_ADDR, maplength, PROT_READ | PROT_WRITE, MAP_FLAGS, -1, 0); + addr = mmap(NULL, maplength, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0); if (addr == MAP_FAILED) { perror("mmap"); exit(1); diff --git a/tools/testing/selftests/mm/khugepaged.c b/tools/testing/selftests/mm/khugepaged.c index 829320a519e7..56d4480e8d3c 100644 --- a/tools/testing/selftests/mm/khugepaged.c +++ b/tools/testing/selftests/mm/khugepaged.c @@ -1095,7 +1095,7 @@ static void usage(void) fprintf(stderr, "\n\tSupported Options:\n"); fprintf(stderr, "\t\t-h: This help message.\n"); fprintf(stderr, "\t\t-s: mTHP size, expressed as page order.\n"); - fprintf(stderr, "\t\t Defaults to 0. Use this size for anon allocations.\n"); + fprintf(stderr, "\t\t Defaults to 0. Use this size for anon or shmem allocations.\n"); exit(1); } @@ -1209,6 +1209,8 @@ int main(int argc, char **argv) default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8; default_settings.hugepages[hpage_pmd_order].enabled = THP_INHERIT; default_settings.hugepages[anon_order].enabled = THP_ALWAYS; + default_settings.shmem_hugepages[hpage_pmd_order].enabled = SHMEM_INHERIT; + default_settings.shmem_hugepages[anon_order].enabled = SHMEM_ALWAYS; save_settings(); thp_push_settings(&default_settings); diff --git a/tools/testing/selftests/mm/map_hugetlb.c b/tools/testing/selftests/mm/map_hugetlb.c index a1f005a90a4f..b47399feab53 100644 --- a/tools/testing/selftests/mm/map_hugetlb.c +++ b/tools/testing/selftests/mm/map_hugetlb.c @@ -4,11 +4,6 @@ * system call with MAP_HUGETLB flag. Before running this program make * sure the administrator has allocated enough default sized huge pages * to cover the 256 MB allocation. - * - * For ia64 architecture, Linux kernel reserves Region number 4 for hugepages. - * That means the addresses starting with 0x800000... will need to be - * specified. Specifying a fixed address is not required on ppc64, i386 - * or x86_64. */ #include <stdlib.h> #include <stdio.h> @@ -21,15 +16,6 @@ #define LENGTH (256UL*1024*1024) #define PROTECTION (PROT_READ | PROT_WRITE) -/* Only ia64 requires this */ -#ifdef __ia64__ -#define ADDR (void *)(0x8000000000000000UL) -#define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_FIXED) -#else -#define ADDR (void *)(0x0UL) -#define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB) -#endif - static void check_bytes(char *addr) { ksft_print_msg("First hex is %x\n", *((unsigned int *)addr)); @@ -60,7 +46,7 @@ int main(int argc, char **argv) void *addr; size_t hugepage_size; size_t length = LENGTH; - int flags = FLAGS; + int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB; int shift = 0; hugepage_size = default_huge_page_size(); @@ -85,7 +71,7 @@ int main(int argc, char **argv) ksft_print_msg("Default size hugepages\n"); ksft_print_msg("Mapping %lu Mbytes\n", (unsigned long)length >> 20); - addr = mmap(ADDR, length, PROTECTION, flags, -1, 0); + addr = mmap(NULL, length, PROTECTION, flags, -1, 0); if (addr == MAP_FAILED) ksft_exit_fail_msg("mmap: %s\n", strerror(errno)); diff --git a/tools/testing/selftests/mm/migration.c b/tools/testing/selftests/mm/migration.c index 6908569ef406..64bcbb7151cf 100644 --- a/tools/testing/selftests/mm/migration.c +++ b/tools/testing/selftests/mm/migration.c @@ -15,10 +15,10 @@ #include <signal.h> #include <time.h> -#define TWOMEG (2<<20) -#define RUNTIME (20) - -#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1))) +#define TWOMEG (2<<20) +#define RUNTIME (20) +#define MAX_RETRIES 100 +#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1))) FIXTURE(migration) { @@ -65,6 +65,7 @@ int migrate(uint64_t *ptr, int n1, int n2) int ret, tmp; int status = 0; struct timespec ts1, ts2; + int failures = 0; if (clock_gettime(CLOCK_MONOTONIC, &ts1)) return -1; @@ -79,13 +80,17 @@ int migrate(uint64_t *ptr, int n1, int n2) ret = move_pages(0, 1, (void **) &ptr, &n2, &status, MPOL_MF_MOVE_ALL); if (ret) { - if (ret > 0) + if (ret > 0) { + /* Migration is best effort; try again */ + if (++failures < MAX_RETRIES) + continue; printf("Didn't migrate %d pages\n", ret); + } else perror("Couldn't migrate pages"); return -2; } - + failures = 0; tmp = n2; n2 = n1; n1 = tmp; diff --git a/tools/testing/selftests/mm/mseal_test.c b/tools/testing/selftests/mm/mseal_test.c index a818f010de47..01675c412b2a 100644 --- a/tools/testing/selftests/mm/mseal_test.c +++ b/tools/testing/selftests/mm/mseal_test.c @@ -81,32 +81,31 @@ static int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, return sret; } -static void *sys_mmap(void *addr, unsigned long len, unsigned long prot, - unsigned long flags, unsigned long fd, unsigned long offset) +static int sys_munmap(void *ptr, size_t size) { - void *sret; + int sret; errno = 0; - sret = (void *) syscall(__NR_mmap, addr, len, prot, - flags, fd, offset); + sret = syscall(__NR_munmap, ptr, size); return sret; } -static int sys_munmap(void *ptr, size_t size) +static int sys_madvise(void *start, size_t len, int types) { int sret; errno = 0; - sret = syscall(__NR_munmap, ptr, size); + sret = syscall(__NR_madvise, start, len, types); return sret; } -static int sys_madvise(void *start, size_t len, int types) +static void *sys_mremap(void *addr, size_t old_len, size_t new_len, + unsigned long flags, void *new_addr) { - int sret; + void *sret; errno = 0; - sret = syscall(__NR_madvise, start, len, types); + sret = (void *) syscall(__NR_mremap, addr, old_len, new_len, flags, new_addr); return sret; } @@ -172,7 +171,7 @@ static void setup_single_address(int size, void **ptrOut) { void *ptr; - ptr = sys_mmap(NULL, size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + ptr = mmap(NULL, size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); *ptrOut = ptr; } @@ -181,7 +180,7 @@ static void setup_single_address_rw(int size, void **ptrOut) void *ptr; unsigned long mapflags = MAP_ANONYMOUS | MAP_PRIVATE; - ptr = sys_mmap(NULL, size, PROT_READ | PROT_WRITE, mapflags, -1, 0); + ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, mapflags, -1, 0); *ptrOut = ptr; } @@ -205,7 +204,7 @@ bool seal_support(void) void *ptr; unsigned long page_size = getpagesize(); - ptr = sys_mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + ptr = mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); if (ptr == (void *) -1) return false; @@ -481,8 +480,8 @@ static void test_seal_zero_address(void) int prot; /* use mmap to change protection. */ - ptr = sys_mmap(0, size, PROT_NONE, - MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); + ptr = mmap(0, size, PROT_NONE, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); FAIL_TEST_IF_FALSE(ptr == 0); size = get_vma_size(ptr, &prot); @@ -767,6 +766,42 @@ static void test_seal_mprotect_partial_mprotect(bool seal) REPORT_TEST_PASS(); } +static void test_seal_mprotect_partial_mprotect_tail(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 2 * page_size; + int ret; + int prot; + + /* + * Check if a partial mseal (that results in two vmas) works correctly. + * It might mprotect the first, but it'll never touch the second (msealed) vma. + */ + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + if (seal) { + ret = sys_mseal(ptr + page_size, page_size); + FAIL_TEST_IF_FALSE(!ret); + } + + ret = sys_mprotect(ptr, size, PROT_EXEC); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + if (seal) { + FAIL_TEST_IF_FALSE(get_vma_size(ptr + page_size, &prot) > 0); + FAIL_TEST_IF_FALSE(prot == 0x4); + } + + REPORT_TEST_PASS(); +} + + static void test_seal_mprotect_two_vma_with_gap(bool seal) { void *ptr; @@ -984,6 +1019,36 @@ static void test_seal_munmap_vma_with_gap(bool seal) REPORT_TEST_PASS(); } +static void test_seal_munmap_partial_across_vmas(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 2 * page_size; + int ret; + int prot; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + if (seal) { + ret = sys_mseal(ptr + page_size, page_size); + FAIL_TEST_IF_FALSE(!ret); + } + + ret = sys_munmap(ptr, size); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + if (seal) { + FAIL_TEST_IF_FALSE(get_vma_size(ptr + page_size, &prot) > 0); + FAIL_TEST_IF_FALSE(prot == 0x4); + } + + REPORT_TEST_PASS(); +} + static void test_munmap_start_freed(bool seal) { void *ptr; @@ -1115,12 +1180,12 @@ static void test_seal_mremap_shrink(bool seal) } /* shrink from 4 pages to 2 pages. */ - ret2 = mremap(ptr, size, 2 * page_size, 0, 0); + ret2 = sys_mremap(ptr, size, 2 * page_size, 0, 0); if (seal) { - FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); + FAIL_TEST_IF_FALSE(ret2 == (void *) MAP_FAILED); FAIL_TEST_IF_FALSE(errno == EPERM); } else { - FAIL_TEST_IF_FALSE(ret2 != MAP_FAILED); + FAIL_TEST_IF_FALSE(ret2 != (void *) MAP_FAILED); } @@ -1147,7 +1212,7 @@ static void test_seal_mremap_expand(bool seal) } /* expand from 2 page to 4 pages. */ - ret2 = mremap(ptr, 2 * page_size, 4 * page_size, 0, 0); + ret2 = sys_mremap(ptr, 2 * page_size, 4 * page_size, 0, 0); if (seal) { FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); FAIL_TEST_IF_FALSE(errno == EPERM); @@ -1180,7 +1245,7 @@ static void test_seal_mremap_move(bool seal) } /* move from ptr to fixed address. */ - ret2 = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, newPtr); + ret2 = sys_mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, newPtr); if (seal) { FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); FAIL_TEST_IF_FALSE(errno == EPERM); @@ -1209,8 +1274,8 @@ static void test_seal_mmap_overwrite_prot(bool seal) } /* use mmap to change protection. */ - ret2 = sys_mmap(ptr, size, PROT_NONE, - MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); + ret2 = mmap(ptr, size, PROT_NONE, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); if (seal) { FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); FAIL_TEST_IF_FALSE(errno == EPERM); @@ -1240,8 +1305,8 @@ static void test_seal_mmap_expand(bool seal) } /* use mmap to expand. */ - ret2 = sys_mmap(ptr, size, PROT_READ, - MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); + ret2 = mmap(ptr, size, PROT_READ, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); if (seal) { FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); FAIL_TEST_IF_FALSE(errno == EPERM); @@ -1268,8 +1333,8 @@ static void test_seal_mmap_shrink(bool seal) } /* use mmap to shrink. */ - ret2 = sys_mmap(ptr, 8 * page_size, PROT_READ, - MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); + ret2 = mmap(ptr, 8 * page_size, PROT_READ, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); if (seal) { FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); FAIL_TEST_IF_FALSE(errno == EPERM); @@ -1299,7 +1364,7 @@ static void test_seal_mremap_shrink_fixed(bool seal) } /* mremap to move and shrink to fixed address */ - ret2 = mremap(ptr, size, 2 * page_size, MREMAP_MAYMOVE | MREMAP_FIXED, + ret2 = sys_mremap(ptr, size, 2 * page_size, MREMAP_MAYMOVE | MREMAP_FIXED, newAddr); if (seal) { FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); @@ -1330,7 +1395,7 @@ static void test_seal_mremap_expand_fixed(bool seal) } /* mremap to move and expand to fixed address */ - ret2 = mremap(ptr, page_size, size, MREMAP_MAYMOVE | MREMAP_FIXED, + ret2 = sys_mremap(ptr, page_size, size, MREMAP_MAYMOVE | MREMAP_FIXED, newAddr); if (seal) { FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); @@ -1361,7 +1426,7 @@ static void test_seal_mremap_move_fixed(bool seal) } /* mremap to move to fixed address */ - ret2 = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, newAddr); + ret2 = sys_mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, newAddr); if (seal) { FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); FAIL_TEST_IF_FALSE(errno == EPERM); @@ -1390,14 +1455,13 @@ static void test_seal_mremap_move_fixed_zero(bool seal) /* * MREMAP_FIXED can move the mapping to zero address */ - ret2 = mremap(ptr, size, 2 * page_size, MREMAP_MAYMOVE | MREMAP_FIXED, + ret2 = sys_mremap(ptr, size, 2 * page_size, MREMAP_MAYMOVE | MREMAP_FIXED, 0); if (seal) { FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); FAIL_TEST_IF_FALSE(errno == EPERM); } else { FAIL_TEST_IF_FALSE(ret2 == 0); - } REPORT_TEST_PASS(); @@ -1420,13 +1484,13 @@ static void test_seal_mremap_move_dontunmap(bool seal) } /* mremap to move, and don't unmap src addr. */ - ret2 = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_DONTUNMAP, 0); + ret2 = sys_mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_DONTUNMAP, 0); if (seal) { FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); FAIL_TEST_IF_FALSE(errno == EPERM); } else { + /* kernel will allocate a new address */ FAIL_TEST_IF_FALSE(ret2 != MAP_FAILED); - } REPORT_TEST_PASS(); @@ -1434,7 +1498,7 @@ static void test_seal_mremap_move_dontunmap(bool seal) static void test_seal_mremap_move_dontunmap_anyaddr(bool seal) { - void *ptr; + void *ptr, *ptr2; unsigned long page_size = getpagesize(); unsigned long size = 4 * page_size; int ret; @@ -1449,24 +1513,30 @@ static void test_seal_mremap_move_dontunmap_anyaddr(bool seal) } /* - * The 0xdeaddead should not have effect on dest addr - * when MREMAP_DONTUNMAP is set. + * The new address is any address that not allocated. + * use allocate/free to similate that. */ - ret2 = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_DONTUNMAP, - 0xdeaddead); + setup_single_address(size, &ptr2); + FAIL_TEST_IF_FALSE(ptr2 != (void *)-1); + ret = sys_munmap(ptr2, size); + FAIL_TEST_IF_FALSE(!ret); + + /* + * remap to any address. + */ + ret2 = sys_mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_DONTUNMAP, + (void *) ptr2); if (seal) { FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); FAIL_TEST_IF_FALSE(errno == EPERM); } else { - FAIL_TEST_IF_FALSE(ret2 != MAP_FAILED); - FAIL_TEST_IF_FALSE((long)ret2 != 0xdeaddead); - + /* remap success and return ptr2 */ + FAIL_TEST_IF_FALSE(ret2 == ptr2); } REPORT_TEST_PASS(); } - static void test_seal_merge_and_split(void) { void *ptr; @@ -1650,7 +1720,7 @@ static void test_seal_discard_ro_anon_on_filebacked(bool seal) ret = fallocate(fd, 0, 0, size); FAIL_TEST_IF_FALSE(!ret); - ptr = sys_mmap(NULL, size, PROT_READ, mapflags, fd, 0); + ptr = mmap(NULL, size, PROT_READ, mapflags, fd, 0); FAIL_TEST_IF_FALSE(ptr != MAP_FAILED); if (seal) { @@ -1680,7 +1750,7 @@ static void test_seal_discard_ro_anon_on_shared(bool seal) int ret; unsigned long mapflags = MAP_ANONYMOUS | MAP_SHARED; - ptr = sys_mmap(NULL, size, PROT_READ, mapflags, -1, 0); + ptr = mmap(NULL, size, PROT_READ, mapflags, -1, 0); FAIL_TEST_IF_FALSE(ptr != (void *)-1); if (seal) { @@ -1731,6 +1801,69 @@ static void test_seal_discard_ro_anon(bool seal) REPORT_TEST_PASS(); } +static void test_seal_discard_across_vmas(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 2 * page_size; + int ret; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + if (seal) { + ret = seal_single_address(ptr + page_size, page_size); + FAIL_TEST_IF_FALSE(!ret); + } + + ret = sys_madvise(ptr, size, MADV_DONTNEED); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + ret = sys_munmap(ptr, size); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + REPORT_TEST_PASS(); +} + + +static void test_seal_madvise_nodiscard(bool seal) +{ + void *ptr; + unsigned long page_size = getpagesize(); + unsigned long size = 4 * page_size; + int ret; + + setup_single_address(size, &ptr); + FAIL_TEST_IF_FALSE(ptr != (void *)-1); + + if (seal) { + ret = seal_single_address(ptr, size); + FAIL_TEST_IF_FALSE(!ret); + } + + /* + * Test a random madvise flag like MADV_RANDOM that does not touch page + * contents (and thus should work for msealed VMAs). RANDOM also happens to + * share bits with other discard-ish flags like REMOVE. + */ + ret = sys_madvise(ptr, size, MADV_RANDOM); + FAIL_TEST_IF_FALSE(!ret); + + ret = sys_munmap(ptr, size); + if (seal) + FAIL_TEST_IF_FALSE(ret < 0); + else + FAIL_TEST_IF_FALSE(!ret); + + REPORT_TEST_PASS(); +} + int main(int argc, char **argv) { bool test_seal = seal_support(); @@ -1743,7 +1876,7 @@ int main(int argc, char **argv) if (!pkey_supported()) ksft_print_msg("PKEY not supported\n"); - ksft_set_plan(80); + ksft_set_plan(88); test_seal_addseal(); test_seal_unmapped_start(); @@ -1789,12 +1922,17 @@ int main(int argc, char **argv) test_seal_mprotect_split(false); test_seal_mprotect_split(true); + test_seal_mprotect_partial_mprotect_tail(false); + test_seal_mprotect_partial_mprotect_tail(true); + test_seal_munmap(false); test_seal_munmap(true); test_seal_munmap_two_vma(false); test_seal_munmap_two_vma(true); test_seal_munmap_vma_with_gap(false); test_seal_munmap_vma_with_gap(true); + test_seal_munmap_partial_across_vmas(false); + test_seal_munmap_partial_across_vmas(true); test_munmap_start_freed(false); test_munmap_start_freed(true); @@ -1822,8 +1960,12 @@ int main(int argc, char **argv) test_seal_mremap_move_fixed_zero(true); test_seal_mremap_move_dontunmap_anyaddr(false); test_seal_mremap_move_dontunmap_anyaddr(true); + test_seal_madvise_nodiscard(false); + test_seal_madvise_nodiscard(true); test_seal_discard_ro_anon(false); test_seal_discard_ro_anon(true); + test_seal_discard_across_vmas(false); + test_seal_discard_across_vmas(true); test_seal_discard_ro_anon_on_rw(false); test_seal_discard_ro_anon_on_rw(true); test_seal_discard_ro_anon_on_shared(false); diff --git a/tools/testing/selftests/mm/pkey-arm64.h b/tools/testing/selftests/mm/pkey-arm64.h new file mode 100644 index 000000000000..580e1b0bb38e --- /dev/null +++ b/tools/testing/selftests/mm/pkey-arm64.h @@ -0,0 +1,139 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Arm Ltd. + */ + +#ifndef _PKEYS_ARM64_H +#define _PKEYS_ARM64_H + +#include "vm_util.h" +/* for signal frame parsing */ +#include "../arm64/signal/testcases/testcases.h" + +#ifndef SYS_mprotect_key +# define SYS_mprotect_key 288 +#endif +#ifndef SYS_pkey_alloc +# define SYS_pkey_alloc 289 +# define SYS_pkey_free 290 +#endif +#define MCONTEXT_IP(mc) mc.pc +#define MCONTEXT_TRAPNO(mc) -1 + +#define PKEY_MASK 0xf + +#define POE_NONE 0x0 +#define POE_X 0x2 +#define POE_RX 0x3 +#define POE_RWX 0x7 + +#define NR_PKEYS 8 +#define NR_RESERVED_PKEYS 1 /* pkey-0 */ + +#define PKEY_ALLOW_ALL 0x77777777 + +#define PKEY_BITS_PER_PKEY 4 +#define PAGE_SIZE sysconf(_SC_PAGESIZE) +#undef HPAGE_SIZE +#define HPAGE_SIZE default_huge_page_size() + +/* 4-byte instructions * 16384 = 64K page */ +#define __page_o_noops() asm(".rept 16384 ; nop; .endr") + +static inline u64 __read_pkey_reg(void) +{ + u64 pkey_reg = 0; + + // POR_EL0 + asm volatile("mrs %0, S3_3_c10_c2_4" : "=r" (pkey_reg)); + + return pkey_reg; +} + +static inline void __write_pkey_reg(u64 pkey_reg) +{ + u64 por = pkey_reg; + + dprintf4("%s() changing %016llx to %016llx\n", + __func__, __read_pkey_reg(), pkey_reg); + + // POR_EL0 + asm volatile("msr S3_3_c10_c2_4, %0\nisb" :: "r" (por) :); + + dprintf4("%s() pkey register after changing %016llx to %016llx\n", + __func__, __read_pkey_reg(), pkey_reg); +} + +static inline int cpu_has_pkeys(void) +{ + /* No simple way to determine this */ + return 1; +} + +static inline u32 pkey_bit_position(int pkey) +{ + return pkey * PKEY_BITS_PER_PKEY; +} + +static inline int get_arch_reserved_keys(void) +{ + return NR_RESERVED_PKEYS; +} + +void expect_fault_on_read_execonly_key(void *p1, int pkey) +{ +} + +void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey) +{ + return PTR_ERR_ENOTSUP; +} + +#define set_pkey_bits set_pkey_bits +static inline u64 set_pkey_bits(u64 reg, int pkey, u64 flags) +{ + u32 shift = pkey_bit_position(pkey); + u64 new_val = POE_RWX; + + /* mask out bits from pkey in old value */ + reg &= ~((u64)PKEY_MASK << shift); + + if (flags & PKEY_DISABLE_ACCESS) + new_val = POE_X; + else if (flags & PKEY_DISABLE_WRITE) + new_val = POE_RX; + + /* OR in new bits for pkey */ + reg |= new_val << shift; + + return reg; +} + +#define get_pkey_bits get_pkey_bits +static inline u64 get_pkey_bits(u64 reg, int pkey) +{ + u32 shift = pkey_bit_position(pkey); + /* + * shift down the relevant bits to the lowest four, then + * mask off all the other higher bits + */ + u32 perm = (reg >> shift) & PKEY_MASK; + + if (perm == POE_X) + return PKEY_DISABLE_ACCESS; + if (perm == POE_RX) + return PKEY_DISABLE_WRITE; + return 0; +} + +static void aarch64_write_signal_pkey(ucontext_t *uctxt, u64 pkey) +{ + struct _aarch64_ctx *ctx = GET_UC_RESV_HEAD(uctxt); + struct poe_context *poe_ctx = + (struct poe_context *) get_header(ctx, POE_MAGIC, + sizeof(uctxt->uc_mcontext), NULL); + if (poe_ctx) + poe_ctx->por_el0 = pkey; +} + +#endif /* _PKEYS_ARM64_H */ diff --git a/tools/testing/selftests/mm/pkey-helpers.h b/tools/testing/selftests/mm/pkey-helpers.h index 1af3156a9db8..9ab6a3ee153b 100644 --- a/tools/testing/selftests/mm/pkey-helpers.h +++ b/tools/testing/selftests/mm/pkey-helpers.h @@ -79,7 +79,18 @@ extern void abort_hooks(void); } \ } while (0) -__attribute__((noinline)) int read_ptr(int *ptr); +#define barrier() __asm__ __volatile__("": : :"memory") +#ifndef noinline +# define noinline __attribute__((noinline)) +#endif + +noinline int read_ptr(int *ptr) +{ + /* Keep GCC from optimizing this away somehow */ + barrier(); + return *ptr; +} + void expected_pkey_fault(int pkey); int sys_pkey_alloc(unsigned long flags, unsigned long init_val); int sys_pkey_free(unsigned long pkey); @@ -91,12 +102,17 @@ void record_pkey_malloc(void *ptr, long size, int prot); #include "pkey-x86.h" #elif defined(__powerpc64__) /* arch */ #include "pkey-powerpc.h" +#elif defined(__aarch64__) /* arch */ +#include "pkey-arm64.h" #else /* arch */ #error Architecture not supported #endif /* arch */ +#ifndef PKEY_MASK #define PKEY_MASK (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE) +#endif +#ifndef set_pkey_bits static inline u64 set_pkey_bits(u64 reg, int pkey, u64 flags) { u32 shift = pkey_bit_position(pkey); @@ -106,7 +122,9 @@ static inline u64 set_pkey_bits(u64 reg, int pkey, u64 flags) reg |= (flags & PKEY_MASK) << shift; return reg; } +#endif +#ifndef get_pkey_bits static inline u64 get_pkey_bits(u64 reg, int pkey) { u32 shift = pkey_bit_position(pkey); @@ -116,6 +134,7 @@ static inline u64 get_pkey_bits(u64 reg, int pkey) */ return ((reg >> shift) & PKEY_MASK); } +#endif extern u64 shadow_pkey_reg; diff --git a/tools/testing/selftests/mm/pkey-powerpc.h b/tools/testing/selftests/mm/pkey-powerpc.h index ae5df26104e5..3d0c0bdae5bc 100644 --- a/tools/testing/selftests/mm/pkey-powerpc.h +++ b/tools/testing/selftests/mm/pkey-powerpc.h @@ -8,7 +8,10 @@ # define SYS_pkey_free 385 #endif #define REG_IP_IDX PT_NIP +#define MCONTEXT_IP(mc) mc.gp_regs[REG_IP_IDX] +#define MCONTEXT_TRAPNO(mc) mc.gp_regs[REG_TRAPNO] #define REG_TRAPNO PT_TRAP +#define MCONTEXT_FPREGS #define gregs gp_regs #define fpregs fp_regs #define si_pkey_offset 0x20 diff --git a/tools/testing/selftests/mm/pkey-x86.h b/tools/testing/selftests/mm/pkey-x86.h index 814758e109c0..5f28e26a2511 100644 --- a/tools/testing/selftests/mm/pkey-x86.h +++ b/tools/testing/selftests/mm/pkey-x86.h @@ -15,6 +15,10 @@ #endif +#define MCONTEXT_IP(mc) mc.gregs[REG_IP_IDX] +#define MCONTEXT_TRAPNO(mc) mc.gregs[REG_TRAPNO] +#define MCONTEXT_FPREGS + #ifndef PKEY_DISABLE_ACCESS # define PKEY_DISABLE_ACCESS 0x1 #endif diff --git a/tools/testing/selftests/mm/pkey_sighandler_tests.c b/tools/testing/selftests/mm/pkey_sighandler_tests.c new file mode 100644 index 000000000000..a8088b645ad6 --- /dev/null +++ b/tools/testing/selftests/mm/pkey_sighandler_tests.c @@ -0,0 +1,481 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Tests Memory Protection Keys (see Documentation/core-api/protection-keys.rst) + * + * The testcases in this file exercise various flows related to signal handling, + * using an alternate signal stack, with the default pkey (pkey 0) disabled. + * + * Compile with: + * gcc -mxsave -o pkey_sighandler_tests -O2 -g -std=gnu99 -pthread -Wall pkey_sighandler_tests.c -I../../../../tools/include -lrt -ldl -lm + * gcc -mxsave -m32 -o pkey_sighandler_tests -O2 -g -std=gnu99 -pthread -Wall pkey_sighandler_tests.c -I../../../../tools/include -lrt -ldl -lm + */ +#define _GNU_SOURCE +#define __SANE_USERSPACE_TYPES__ +#include <errno.h> +#include <sys/syscall.h> +#include <string.h> +#include <stdio.h> +#include <stdint.h> +#include <stdbool.h> +#include <signal.h> +#include <assert.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <pthread.h> +#include <limits.h> + +#include "pkey-helpers.h" + +#define STACK_SIZE PTHREAD_STACK_MIN + +void expected_pkey_fault(int pkey) {} + +pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; +pthread_cond_t cond = PTHREAD_COND_INITIALIZER; +siginfo_t siginfo = {0}; + +/* + * We need to use inline assembly instead of glibc's syscall because glibc's + * syscall will attempt to access the PLT in order to call a library function + * which is protected by MPK 0 which we don't have access to. + */ +static inline __always_inline +long syscall_raw(long n, long a1, long a2, long a3, long a4, long a5, long a6) +{ + unsigned long ret; +#ifdef __x86_64__ + register long r10 asm("r10") = a4; + register long r8 asm("r8") = a5; + register long r9 asm("r9") = a6; + asm volatile ("syscall" + : "=a"(ret) + : "a"(n), "D"(a1), "S"(a2), "d"(a3), "r"(r10), "r"(r8), "r"(r9) + : "rcx", "r11", "memory"); +#elif defined __i386__ + asm volatile ("int $0x80" + : "=a"(ret) + : "a"(n), "b"(a1), "c"(a2), "d"(a3), "S"(a4), "D"(a5) + : "memory"); +#else +# error syscall_raw() not implemented +#endif + return ret; +} + +static void sigsegv_handler(int signo, siginfo_t *info, void *ucontext) +{ + pthread_mutex_lock(&mutex); + + memcpy(&siginfo, info, sizeof(siginfo_t)); + + pthread_cond_signal(&cond); + pthread_mutex_unlock(&mutex); + + syscall_raw(SYS_exit, 0, 0, 0, 0, 0, 0); +} + +static void sigusr1_handler(int signo, siginfo_t *info, void *ucontext) +{ + pthread_mutex_lock(&mutex); + + memcpy(&siginfo, info, sizeof(siginfo_t)); + + pthread_cond_signal(&cond); + pthread_mutex_unlock(&mutex); +} + +static void sigusr2_handler(int signo, siginfo_t *info, void *ucontext) +{ + /* + * pkru should be the init_pkru value which enabled MPK 0 so + * we can use library functions. + */ + printf("%s invoked.\n", __func__); +} + +static void raise_sigusr2(void) +{ + pid_t tid = 0; + + tid = syscall_raw(SYS_gettid, 0, 0, 0, 0, 0, 0); + + syscall_raw(SYS_tkill, tid, SIGUSR2, 0, 0, 0, 0); + + /* + * We should return from the signal handler here and be able to + * return to the interrupted thread. + */ +} + +static void *thread_segv_with_pkey0_disabled(void *ptr) +{ + /* Disable MPK 0 (and all others too) */ + __write_pkey_reg(0x55555555); + + /* Segfault (with SEGV_MAPERR) */ + *(int *) (0x1) = 1; + return NULL; +} + +static void *thread_segv_pkuerr_stack(void *ptr) +{ + /* Disable MPK 0 (and all others too) */ + __write_pkey_reg(0x55555555); + + /* After we disable MPK 0, we can't access the stack to return */ + return NULL; +} + +static void *thread_segv_maperr_ptr(void *ptr) +{ + stack_t *stack = ptr; + int *bad = (int *)1; + + /* + * Setup alternate signal stack, which should be pkey_mprotect()ed by + * MPK 0. The thread's stack cannot be used for signals because it is + * not accessible by the default init_pkru value of 0x55555554. + */ + syscall_raw(SYS_sigaltstack, (long)stack, 0, 0, 0, 0, 0); + + /* Disable MPK 0. Only MPK 1 is enabled. */ + __write_pkey_reg(0x55555551); + + /* Segfault */ + *bad = 1; + syscall_raw(SYS_exit, 0, 0, 0, 0, 0, 0); + return NULL; +} + +/* + * Verify that the sigsegv handler is invoked when pkey 0 is disabled. + * Note that the new thread stack and the alternate signal stack is + * protected by MPK 0. + */ +static void test_sigsegv_handler_with_pkey0_disabled(void) +{ + struct sigaction sa; + pthread_attr_t attr; + pthread_t thr; + + sa.sa_flags = SA_SIGINFO; + + sa.sa_sigaction = sigsegv_handler; + sigemptyset(&sa.sa_mask); + if (sigaction(SIGSEGV, &sa, NULL) == -1) { + perror("sigaction"); + exit(EXIT_FAILURE); + } + + memset(&siginfo, 0, sizeof(siginfo)); + + pthread_attr_init(&attr); + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); + + pthread_create(&thr, &attr, thread_segv_with_pkey0_disabled, NULL); + + pthread_mutex_lock(&mutex); + while (siginfo.si_signo == 0) + pthread_cond_wait(&cond, &mutex); + pthread_mutex_unlock(&mutex); + + ksft_test_result(siginfo.si_signo == SIGSEGV && + siginfo.si_code == SEGV_MAPERR && + siginfo.si_addr == (void *)1, + "%s\n", __func__); +} + +/* + * Verify that the sigsegv handler is invoked when pkey 0 is disabled. + * Note that the new thread stack and the alternate signal stack is + * protected by MPK 0, which renders them inaccessible when MPK 0 + * is disabled. So just the return from the thread should cause a + * segfault with SEGV_PKUERR. + */ +static void test_sigsegv_handler_cannot_access_stack(void) +{ + struct sigaction sa; + pthread_attr_t attr; + pthread_t thr; + + sa.sa_flags = SA_SIGINFO; + + sa.sa_sigaction = sigsegv_handler; + sigemptyset(&sa.sa_mask); + if (sigaction(SIGSEGV, &sa, NULL) == -1) { + perror("sigaction"); + exit(EXIT_FAILURE); + } + + memset(&siginfo, 0, sizeof(siginfo)); + + pthread_attr_init(&attr); + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); + + pthread_create(&thr, &attr, thread_segv_pkuerr_stack, NULL); + + pthread_mutex_lock(&mutex); + while (siginfo.si_signo == 0) + pthread_cond_wait(&cond, &mutex); + pthread_mutex_unlock(&mutex); + + ksft_test_result(siginfo.si_signo == SIGSEGV && + siginfo.si_code == SEGV_PKUERR, + "%s\n", __func__); +} + +/* + * Verify that the sigsegv handler that uses an alternate signal stack + * is correctly invoked for a thread which uses a non-zero MPK to protect + * its own stack, and disables all other MPKs (including 0). + */ +static void test_sigsegv_handler_with_different_pkey_for_stack(void) +{ + struct sigaction sa; + static stack_t sigstack; + void *stack; + int pkey; + int parent_pid = 0; + int child_pid = 0; + + sa.sa_flags = SA_SIGINFO | SA_ONSTACK; + + sa.sa_sigaction = sigsegv_handler; + + sigemptyset(&sa.sa_mask); + if (sigaction(SIGSEGV, &sa, NULL) == -1) { + perror("sigaction"); + exit(EXIT_FAILURE); + } + + stack = mmap(0, STACK_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + assert(stack != MAP_FAILED); + + /* Allow access to MPK 0 and MPK 1 */ + __write_pkey_reg(0x55555550); + + /* Protect the new stack with MPK 1 */ + pkey = pkey_alloc(0, 0); + pkey_mprotect(stack, STACK_SIZE, PROT_READ | PROT_WRITE, pkey); + + /* Set up alternate signal stack that will use the default MPK */ + sigstack.ss_sp = mmap(0, STACK_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + sigstack.ss_flags = 0; + sigstack.ss_size = STACK_SIZE; + + memset(&siginfo, 0, sizeof(siginfo)); + + /* Use clone to avoid newer glibcs using rseq on new threads */ + long ret = syscall_raw(SYS_clone, + CLONE_VM | CLONE_FS | CLONE_FILES | + CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM | + CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID | + CLONE_DETACHED, + (long) ((char *)(stack) + STACK_SIZE), + (long) &parent_pid, + (long) &child_pid, 0, 0); + + if (ret < 0) { + errno = -ret; + perror("clone"); + } else if (ret == 0) { + thread_segv_maperr_ptr(&sigstack); + syscall_raw(SYS_exit, 0, 0, 0, 0, 0, 0); + } + + pthread_mutex_lock(&mutex); + while (siginfo.si_signo == 0) + pthread_cond_wait(&cond, &mutex); + pthread_mutex_unlock(&mutex); + + ksft_test_result(siginfo.si_signo == SIGSEGV && + siginfo.si_code == SEGV_MAPERR && + siginfo.si_addr == (void *)1, + "%s\n", __func__); +} + +/* + * Verify that the PKRU value set by the application is correctly + * restored upon return from signal handling. + */ +static void test_pkru_preserved_after_sigusr1(void) +{ + struct sigaction sa; + unsigned long pkru = 0x45454544; + + sa.sa_flags = SA_SIGINFO; + + sa.sa_sigaction = sigusr1_handler; + sigemptyset(&sa.sa_mask); + if (sigaction(SIGUSR1, &sa, NULL) == -1) { + perror("sigaction"); + exit(EXIT_FAILURE); + } + + memset(&siginfo, 0, sizeof(siginfo)); + + __write_pkey_reg(pkru); + + raise(SIGUSR1); + + pthread_mutex_lock(&mutex); + while (siginfo.si_signo == 0) + pthread_cond_wait(&cond, &mutex); + pthread_mutex_unlock(&mutex); + + /* Ensure the pkru value is the same after returning from signal. */ + ksft_test_result(pkru == __read_pkey_reg() && + siginfo.si_signo == SIGUSR1, + "%s\n", __func__); +} + +static noinline void *thread_sigusr2_self(void *ptr) +{ + /* + * A const char array like "Resuming after SIGUSR2" won't be stored on + * the stack and the code could access it via an offset from the program + * counter. This makes sure it's on the function's stack frame. + */ + char str[] = {'R', 'e', 's', 'u', 'm', 'i', 'n', 'g', ' ', + 'a', 'f', 't', 'e', 'r', ' ', + 'S', 'I', 'G', 'U', 'S', 'R', '2', + '.', '.', '.', '\n', '\0'}; + stack_t *stack = ptr; + + /* + * Setup alternate signal stack, which should be pkey_mprotect()ed by + * MPK 0. The thread's stack cannot be used for signals because it is + * not accessible by the default init_pkru value of 0x55555554. + */ + syscall(SYS_sigaltstack, (long)stack, 0, 0, 0, 0, 0); + + /* Disable MPK 0. Only MPK 2 is enabled. */ + __write_pkey_reg(0x55555545); + + raise_sigusr2(); + + /* Do something, to show the thread resumed execution after the signal */ + syscall_raw(SYS_write, 1, (long) str, sizeof(str) - 1, 0, 0, 0); + + /* + * We can't return to test_pkru_sigreturn because it + * will attempt to use a %rbp value which is on the stack + * of the main thread. + */ + syscall_raw(SYS_exit, 0, 0, 0, 0, 0, 0); + return NULL; +} + +/* + * Verify that sigreturn is able to restore altstack even if the thread had + * disabled pkey 0. + */ +static void test_pkru_sigreturn(void) +{ + struct sigaction sa = {0}; + static stack_t sigstack; + void *stack; + int pkey; + int parent_pid = 0; + int child_pid = 0; + + sa.sa_handler = SIG_DFL; + sa.sa_flags = 0; + sigemptyset(&sa.sa_mask); + + /* + * For this testcase, we do not want to handle SIGSEGV. Reset handler + * to default so that the application can crash if it receives SIGSEGV. + */ + if (sigaction(SIGSEGV, &sa, NULL) == -1) { + perror("sigaction"); + exit(EXIT_FAILURE); + } + + sa.sa_flags = SA_SIGINFO | SA_ONSTACK; + sa.sa_sigaction = sigusr2_handler; + sigemptyset(&sa.sa_mask); + + if (sigaction(SIGUSR2, &sa, NULL) == -1) { + perror("sigaction"); + exit(EXIT_FAILURE); + } + + stack = mmap(0, STACK_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + assert(stack != MAP_FAILED); + + /* + * Allow access to MPK 0 and MPK 2. The child thread (to be created + * later in this flow) will have its stack protected by MPK 2, whereas + * the current thread's stack is protected by the default MPK 0. Hence + * both need to be enabled. + */ + __write_pkey_reg(0x55555544); + + /* Protect the stack with MPK 2 */ + pkey = pkey_alloc(0, 0); + pkey_mprotect(stack, STACK_SIZE, PROT_READ | PROT_WRITE, pkey); + + /* Set up alternate signal stack that will use the default MPK */ + sigstack.ss_sp = mmap(0, STACK_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + sigstack.ss_flags = 0; + sigstack.ss_size = STACK_SIZE; + + /* Use clone to avoid newer glibcs using rseq on new threads */ + long ret = syscall_raw(SYS_clone, + CLONE_VM | CLONE_FS | CLONE_FILES | + CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM | + CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID | + CLONE_DETACHED, + (long) ((char *)(stack) + STACK_SIZE), + (long) &parent_pid, + (long) &child_pid, 0, 0); + + if (ret < 0) { + errno = -ret; + perror("clone"); + } else if (ret == 0) { + thread_sigusr2_self(&sigstack); + syscall_raw(SYS_exit, 0, 0, 0, 0, 0, 0); + } + + child_pid = ret; + /* Check that thread exited */ + do { + sched_yield(); + ret = syscall_raw(SYS_tkill, child_pid, 0, 0, 0, 0, 0); + } while (ret != -ESRCH && ret != -EINVAL); + + ksft_test_result_pass("%s\n", __func__); +} + +static void (*pkey_tests[])(void) = { + test_sigsegv_handler_with_pkey0_disabled, + test_sigsegv_handler_cannot_access_stack, + test_sigsegv_handler_with_different_pkey_for_stack, + test_pkru_preserved_after_sigusr1, + test_pkru_sigreturn +}; + +int main(int argc, char *argv[]) +{ + int i; + + ksft_print_header(); + ksft_set_plan(ARRAY_SIZE(pkey_tests)); + + for (i = 0; i < ARRAY_SIZE(pkey_tests); i++) + (*pkey_tests[i])(); + + ksft_finished(); + return 0; +} diff --git a/tools/testing/selftests/mm/protection_keys.c b/tools/testing/selftests/mm/protection_keys.c index eaa6d1fc5328..4990f7ab4cb7 100644 --- a/tools/testing/selftests/mm/protection_keys.c +++ b/tools/testing/selftests/mm/protection_keys.c @@ -147,7 +147,7 @@ void abort_hooks(void) * will then fault, which makes sure that the fault code handles * execute-only memory properly. */ -#ifdef __powerpc64__ +#if defined(__powerpc64__) || defined(__aarch64__) /* This way, both 4K and 64K alignment are maintained */ __attribute__((__aligned__(65536))) #else @@ -212,7 +212,6 @@ void pkey_disable_set(int pkey, int flags) unsigned long syscall_flags = 0; int ret; int pkey_rights; - u64 orig_pkey_reg = read_pkey_reg(); dprintf1("START->%s(%d, 0x%x)\n", __func__, pkey, flags); @@ -242,8 +241,6 @@ void pkey_disable_set(int pkey, int flags) dprintf1("%s(%d) pkey_reg: 0x%016llx\n", __func__, pkey, read_pkey_reg()); - if (flags) - pkey_assert(read_pkey_reg() >= orig_pkey_reg); dprintf1("END<---%s(%d, 0x%x)\n", __func__, pkey, flags); } @@ -253,7 +250,6 @@ void pkey_disable_clear(int pkey, int flags) unsigned long syscall_flags = 0; int ret; int pkey_rights = hw_pkey_get(pkey, syscall_flags); - u64 orig_pkey_reg = read_pkey_reg(); pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); @@ -273,8 +269,6 @@ void pkey_disable_clear(int pkey, int flags) dprintf1("%s(%d) pkey_reg: 0x%016llx\n", __func__, pkey, read_pkey_reg()); - if (flags) - assert(read_pkey_reg() <= orig_pkey_reg); } void pkey_write_allow(int pkey) @@ -314,7 +308,9 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) ucontext_t *uctxt = vucontext; int trapno; unsigned long ip; +#ifdef MCONTEXT_FPREGS char *fpregs; +#endif #if defined(__i386__) || defined(__x86_64__) /* arch */ u32 *pkey_reg_ptr; int pkey_reg_offset; @@ -328,9 +324,11 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) __func__, __LINE__, __read_pkey_reg(), shadow_pkey_reg); - trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO]; - ip = uctxt->uc_mcontext.gregs[REG_IP_IDX]; + trapno = MCONTEXT_TRAPNO(uctxt->uc_mcontext); + ip = MCONTEXT_IP(uctxt->uc_mcontext); +#ifdef MCONTEXT_FPREGS fpregs = (char *) uctxt->uc_mcontext.fpregs; +#endif dprintf2("%s() trapno: %d ip: 0x%016lx info->si_code: %s/%d\n", __func__, trapno, ip, si_code_str(si->si_code), @@ -359,7 +357,9 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) #endif /* arch */ dprintf1("siginfo: %p\n", si); +#ifdef MCONTEXT_FPREGS dprintf1(" fpregs: %p\n", fpregs); +#endif if ((si->si_code == SEGV_MAPERR) || (si->si_code == SEGV_ACCERR) || @@ -389,6 +389,8 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) #elif defined(__powerpc64__) /* arch */ /* restore access and let the faulting instruction continue */ pkey_access_allow(siginfo_pkey); +#elif defined(__aarch64__) + aarch64_write_signal_pkey(uctxt, PKEY_ALLOW_ALL); #endif /* arch */ pkey_faults++; dprintf1("<<<<==================================================\n"); @@ -902,7 +904,9 @@ void expected_pkey_fault(int pkey) * test program continue. We now have to restore it. */ if (__read_pkey_reg() != 0) -#else /* arch */ +#elif defined(__aarch64__) + if (__read_pkey_reg() != PKEY_ALLOW_ALL) +#else if (__read_pkey_reg() != shadow_pkey_reg) #endif /* arch */ pkey_assert(0); @@ -950,16 +954,6 @@ void close_test_fds(void) nr_test_fds = 0; } -#define barrier() __asm__ __volatile__("": : :"memory") -__attribute__((noinline)) int read_ptr(int *ptr) -{ - /* - * Keep GCC from optimizing this away somehow - */ - barrier(); - return *ptr; -} - void test_pkey_alloc_free_attach_pkey0(int *ptr, u16 pkey) { int i, err; @@ -1492,6 +1486,11 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey) lots_o_noops_around_write(&scratch); do_not_expect_pkey_fault("executing on PROT_EXEC memory"); expect_fault_on_read_execonly_key(p1, pkey); + + // Reset back to PROT_EXEC | PROT_READ for architectures that support + // non-PKEY execute-only permissions. + ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC | PROT_READ, (u64)pkey); + pkey_assert(!ret); } void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) @@ -1665,6 +1664,84 @@ void test_ptrace_modifies_pkru(int *ptr, u16 pkey) } #endif +#if defined(__aarch64__) +void test_ptrace_modifies_pkru(int *ptr, u16 pkey) +{ + pid_t child; + int status, ret; + struct iovec iov; + u64 trace_pkey; + /* Just a random pkey value.. */ + u64 new_pkey = (POE_X << PKEY_BITS_PER_PKEY * 2) | + (POE_NONE << PKEY_BITS_PER_PKEY) | + POE_RWX; + + child = fork(); + pkey_assert(child >= 0); + dprintf3("[%d] fork() ret: %d\n", getpid(), child); + if (!child) { + ptrace(PTRACE_TRACEME, 0, 0, 0); + + /* Stop and allow the tracer to modify PKRU directly */ + raise(SIGSTOP); + + /* + * need __read_pkey_reg() version so we do not do shadow_pkey_reg + * checking + */ + if (__read_pkey_reg() != new_pkey) + exit(1); + + raise(SIGSTOP); + + exit(0); + } + + pkey_assert(child == waitpid(child, &status, 0)); + dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); + pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); + + iov.iov_base = &trace_pkey; + iov.iov_len = 8; + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_ARM_POE, &iov); + pkey_assert(ret == 0); + pkey_assert(trace_pkey == read_pkey_reg()); + + trace_pkey = new_pkey; + + ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_ARM_POE, &iov); + pkey_assert(ret == 0); + + /* Test that the modification is visible in ptrace before any execution */ + memset(&trace_pkey, 0, sizeof(trace_pkey)); + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_ARM_POE, &iov); + pkey_assert(ret == 0); + pkey_assert(trace_pkey == new_pkey); + + /* Execute the tracee */ + ret = ptrace(PTRACE_CONT, child, 0, 0); + pkey_assert(ret == 0); + + /* Test that the tracee saw the PKRU value change */ + pkey_assert(child == waitpid(child, &status, 0)); + dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); + pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); + + /* Test that the modification is visible in ptrace after execution */ + memset(&trace_pkey, 0, sizeof(trace_pkey)); + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_ARM_POE, &iov); + pkey_assert(ret == 0); + pkey_assert(trace_pkey == new_pkey); + + ret = ptrace(PTRACE_CONT, child, 0, 0); + pkey_assert(ret == 0); + pkey_assert(child == waitpid(child, &status, 0)); + dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); + pkey_assert(WIFEXITED(status)); + pkey_assert(WEXITSTATUS(status) == 0); +} +#endif + void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) { int size = PAGE_SIZE; @@ -1700,7 +1777,7 @@ void (*pkey_tests[])(int *ptr, u16 pkey) = { test_pkey_syscalls_bad_args, test_pkey_alloc_exhaust, test_pkey_alloc_free_attach_pkey0, -#if defined(__i386__) || defined(__x86_64__) +#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) test_ptrace_modifies_pkru, #endif }; diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 36045edb10de..c5797ad1d37b 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -189,7 +189,7 @@ else fi # filter 64bit architectures -ARCH64STR="arm64 ia64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sparc64 x86_64" +ARCH64STR="arm64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sparc64 x86_64" if [ -z "$ARCH" ]; then ARCH=$(uname -m 2>/dev/null | sed -e 's/aarch64.*/arm64/') fi diff --git a/tools/testing/selftests/mm/seal_elf.c b/tools/testing/selftests/mm/seal_elf.c index 7aa1366063e4..d9f8ba8d5050 100644 --- a/tools/testing/selftests/mm/seal_elf.c +++ b/tools/testing/selftests/mm/seal_elf.c @@ -30,17 +30,6 @@ static int sys_mseal(void *start, size_t len) return sret; } -static void *sys_mmap(void *addr, unsigned long len, unsigned long prot, - unsigned long flags, unsigned long fd, unsigned long offset) -{ - void *sret; - - errno = 0; - sret = (void *) syscall(__NR_mmap, addr, len, prot, - flags, fd, offset); - return sret; -} - static inline int sys_mprotect(void *ptr, size_t size, unsigned long prot) { int sret; @@ -56,7 +45,7 @@ static bool seal_support(void) void *ptr; unsigned long page_size = getpagesize(); - ptr = sys_mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + ptr = mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); if (ptr == (void *) -1) return false; diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c index e5e8dafc9d94..eb6d1b9fc362 100644 --- a/tools/testing/selftests/mm/split_huge_page_test.c +++ b/tools/testing/selftests/mm/split_huge_page_test.c @@ -84,6 +84,76 @@ static void write_debugfs(const char *fmt, ...) write_file(SPLIT_DEBUGFS, input, ret + 1); } +static char *allocate_zero_filled_hugepage(size_t len) +{ + char *result; + size_t i; + + result = memalign(pmd_pagesize, len); + if (!result) { + printf("Fail to allocate memory\n"); + exit(EXIT_FAILURE); + } + + madvise(result, len, MADV_HUGEPAGE); + + for (i = 0; i < len; i++) + result[i] = (char)0; + + return result; +} + +static void verify_rss_anon_split_huge_page_all_zeroes(char *one_page, int nr_hpages, size_t len) +{ + unsigned long rss_anon_before, rss_anon_after; + size_t i; + + if (!check_huge_anon(one_page, 4, pmd_pagesize)) { + printf("No THP is allocated\n"); + exit(EXIT_FAILURE); + } + + rss_anon_before = rss_anon(); + if (!rss_anon_before) { + printf("No RssAnon is allocated before split\n"); + exit(EXIT_FAILURE); + } + + /* split all THPs */ + write_debugfs(PID_FMT, getpid(), (uint64_t)one_page, + (uint64_t)one_page + len, 0); + + for (i = 0; i < len; i++) + if (one_page[i] != (char)0) { + printf("%ld byte corrupted\n", i); + exit(EXIT_FAILURE); + } + + if (!check_huge_anon(one_page, 0, pmd_pagesize)) { + printf("Still AnonHugePages not split\n"); + exit(EXIT_FAILURE); + } + + rss_anon_after = rss_anon(); + if (rss_anon_after >= rss_anon_before) { + printf("Incorrect RssAnon value. Before: %ld After: %ld\n", + rss_anon_before, rss_anon_after); + exit(EXIT_FAILURE); + } +} + +void split_pmd_zero_pages(void) +{ + char *one_page; + int nr_hpages = 4; + size_t len = nr_hpages * pmd_pagesize; + + one_page = allocate_zero_filled_hugepage(len); + verify_rss_anon_split_huge_page_all_zeroes(one_page, nr_hpages, len); + printf("Split zero filled huge pages successful\n"); + free(one_page); +} + void split_pmd_thp(void) { char *one_page; @@ -431,6 +501,7 @@ int main(int argc, char **argv) fd_size = 2 * pmd_pagesize; + split_pmd_zero_pages(); split_pmd_thp(); split_pte_mapped_thp(); split_file_backed_thp(); diff --git a/tools/testing/selftests/mm/thp_settings.c b/tools/testing/selftests/mm/thp_settings.c index a4163438108e..577eaab6266f 100644 --- a/tools/testing/selftests/mm/thp_settings.c +++ b/tools/testing/selftests/mm/thp_settings.c @@ -33,10 +33,11 @@ static const char * const thp_defrag_strings[] = { }; static const char * const shmem_enabled_strings[] = { + "never", "always", "within_size", "advise", - "never", + "inherit", "deny", "force", NULL @@ -200,6 +201,7 @@ void thp_write_num(const char *name, unsigned long num) void thp_read_settings(struct thp_settings *settings) { unsigned long orders = thp_supported_orders(); + unsigned long shmem_orders = thp_shmem_supported_orders(); char path[PATH_MAX]; int i; @@ -234,12 +236,24 @@ void thp_read_settings(struct thp_settings *settings) settings->hugepages[i].enabled = thp_read_string(path, thp_enabled_strings); } + + for (i = 0; i < NR_ORDERS; i++) { + if (!((1 << i) & shmem_orders)) { + settings->shmem_hugepages[i].enabled = SHMEM_NEVER; + continue; + } + snprintf(path, PATH_MAX, "hugepages-%ukB/shmem_enabled", + (getpagesize() >> 10) << i); + settings->shmem_hugepages[i].enabled = + thp_read_string(path, shmem_enabled_strings); + } } void thp_write_settings(struct thp_settings *settings) { struct khugepaged_settings *khugepaged = &settings->khugepaged; unsigned long orders = thp_supported_orders(); + unsigned long shmem_orders = thp_shmem_supported_orders(); char path[PATH_MAX]; int enabled; int i; @@ -271,6 +285,15 @@ void thp_write_settings(struct thp_settings *settings) enabled = settings->hugepages[i].enabled; thp_write_string(path, thp_enabled_strings[enabled]); } + + for (i = 0; i < NR_ORDERS; i++) { + if (!((1 << i) & shmem_orders)) + continue; + snprintf(path, PATH_MAX, "hugepages-%ukB/shmem_enabled", + (getpagesize() >> 10) << i); + enabled = settings->shmem_hugepages[i].enabled; + thp_write_string(path, shmem_enabled_strings[enabled]); + } } struct thp_settings *thp_current_settings(void) @@ -324,17 +347,18 @@ void thp_set_read_ahead_path(char *path) dev_queue_read_ahead_path[sizeof(dev_queue_read_ahead_path) - 1] = '\0'; } -unsigned long thp_supported_orders(void) +static unsigned long __thp_supported_orders(bool is_shmem) { unsigned long orders = 0; char path[PATH_MAX]; char buf[256]; - int ret; - int i; + int ret, i; + char anon_dir[] = "enabled"; + char shmem_dir[] = "shmem_enabled"; for (i = 0; i < NR_ORDERS; i++) { - ret = snprintf(path, PATH_MAX, THP_SYSFS "hugepages-%ukB/enabled", - (getpagesize() >> 10) << i); + ret = snprintf(path, PATH_MAX, THP_SYSFS "hugepages-%ukB/%s", + (getpagesize() >> 10) << i, is_shmem ? shmem_dir : anon_dir); if (ret >= PATH_MAX) { printf("%s: Pathname is too long\n", __func__); exit(EXIT_FAILURE); @@ -347,3 +371,13 @@ unsigned long thp_supported_orders(void) return orders; } + +unsigned long thp_supported_orders(void) +{ + return __thp_supported_orders(false); +} + +unsigned long thp_shmem_supported_orders(void) +{ + return __thp_supported_orders(true); +} diff --git a/tools/testing/selftests/mm/thp_settings.h b/tools/testing/selftests/mm/thp_settings.h index 71cbff05f4c7..876235a23460 100644 --- a/tools/testing/selftests/mm/thp_settings.h +++ b/tools/testing/selftests/mm/thp_settings.h @@ -22,10 +22,11 @@ enum thp_defrag { }; enum shmem_enabled { + SHMEM_NEVER, SHMEM_ALWAYS, SHMEM_WITHIN_SIZE, SHMEM_ADVISE, - SHMEM_NEVER, + SHMEM_INHERIT, SHMEM_DENY, SHMEM_FORCE, }; @@ -46,6 +47,10 @@ struct khugepaged_settings { unsigned long pages_to_scan; }; +struct shmem_hugepages_settings { + enum shmem_enabled enabled; +}; + struct thp_settings { enum thp_enabled thp_enabled; enum thp_defrag thp_defrag; @@ -54,6 +59,7 @@ struct thp_settings { struct khugepaged_settings khugepaged; unsigned long read_ahead_kb; struct hugepages_settings hugepages[NR_ORDERS]; + struct shmem_hugepages_settings shmem_hugepages[NR_ORDERS]; }; int read_file(const char *path, char *buf, size_t buflen); @@ -76,5 +82,6 @@ void thp_save_settings(void); void thp_set_read_ahead_path(char *path); unsigned long thp_supported_orders(void); +unsigned long thp_shmem_supported_orders(void); #endif /* __THP_SETTINGS_H__ */ diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c index 5a62530da3b5..d8d0cf04bb57 100644 --- a/tools/testing/selftests/mm/vm_util.c +++ b/tools/testing/selftests/mm/vm_util.c @@ -12,6 +12,7 @@ #define PMD_SIZE_FILE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size" #define SMAP_FILE_PATH "/proc/self/smaps" +#define STATUS_FILE_PATH "/proc/self/status" #define MAX_LINE_LENGTH 500 unsigned int __page_size; @@ -171,6 +172,27 @@ uint64_t read_pmd_pagesize(void) return strtoul(buf, NULL, 10); } +unsigned long rss_anon(void) +{ + unsigned long rss_anon = 0; + FILE *fp; + char buffer[MAX_LINE_LENGTH]; + + fp = fopen(STATUS_FILE_PATH, "r"); + if (!fp) + ksft_exit_fail_msg("%s: Failed to open file %s\n", __func__, STATUS_FILE_PATH); + + if (!check_for_pattern(fp, "RssAnon:", buffer, sizeof(buffer))) + goto err_out; + + if (sscanf(buffer, "RssAnon:%10lu kB", &rss_anon) != 1) + ksft_exit_fail_msg("Reading status error\n"); + +err_out: + fclose(fp); + return rss_anon; +} + bool __check_huge(void *addr, char *pattern, int nr_hpages, uint64_t hpage_size) { diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h index 9007c420d52c..2eaed8209925 100644 --- a/tools/testing/selftests/mm/vm_util.h +++ b/tools/testing/selftests/mm/vm_util.h @@ -39,6 +39,7 @@ unsigned long pagemap_get_pfn(int fd, char *start); void clear_softdirty(void); bool check_for_pattern(FILE *fp, const char *pattern, char *buf, size_t len); uint64_t read_pmd_pagesize(void); +unsigned long rss_anon(void); bool check_huge_anon(void *addr, int nr_hpages, uint64_t hpage_size); bool check_huge_file(void *addr, int nr_hpages, uint64_t hpage_size); bool check_huge_shmem(void *addr, int nr_hpages, uint64_t hpage_size); diff --git a/tools/testing/selftests/mm/write_to_hugetlbfs.c b/tools/testing/selftests/mm/write_to_hugetlbfs.c index 6a2caba19ee1..1289d311efd7 100644 --- a/tools/testing/selftests/mm/write_to_hugetlbfs.c +++ b/tools/testing/selftests/mm/write_to_hugetlbfs.c @@ -28,7 +28,7 @@ enum method { /* Global variables. */ static const char *self; -static char *shmaddr; +static int *shmaddr; static int shmid; /* @@ -47,15 +47,17 @@ void sig_handler(int signo) { printf("Received %d.\n", signo); if (signo == SIGINT) { - printf("Deleting the memory\n"); - if (shmdt((const void *)shmaddr) != 0) { - perror("Detach failure"); + if (shmaddr) { + printf("Deleting the memory\n"); + if (shmdt((const void *)shmaddr) != 0) { + perror("Detach failure"); + shmctl(shmid, IPC_RMID, NULL); + exit(4); + } + shmctl(shmid, IPC_RMID, NULL); - exit(4); + printf("Done deleting the memory\n"); } - - shmctl(shmid, IPC_RMID, NULL); - printf("Done deleting the memory\n"); } exit(2); } @@ -211,7 +213,8 @@ int main(int argc, char **argv) shmctl(shmid, IPC_RMID, NULL); exit(2); } - printf("shmaddr: %p\n", ptr); + shmaddr = ptr; + printf("shmaddr: %p\n", shmaddr); break; default: diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 666ab7d9390b..1c04c780db66 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -17,6 +17,7 @@ ipv6_flowlabel ipv6_flowlabel_mgr log.txt msg_zerocopy +ncdevmem nettest psock_fanout psock_snd @@ -34,6 +35,7 @@ scm_pidfd scm_rights sk_bind_sendto_listen sk_connect_zero_addr +sk_so_peek_off socket so_incoming_cpu so_netns_cookie diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 8eaffd7a641c..649f1fe0dc46 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -56,7 +56,7 @@ TEST_PROGS += ip_local_port_range.sh TEST_PROGS += rps_default_mask.sh TEST_PROGS += big_tcp.sh TEST_PROGS += netns-sysctl.sh -TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh +TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh xfrm_policy_add_speed.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite @@ -80,12 +80,14 @@ TEST_PROGS += io_uring_zerocopy_tx.sh TEST_GEN_FILES += bind_bhash TEST_GEN_PROGS += sk_bind_sendto_listen TEST_GEN_PROGS += sk_connect_zero_addr +TEST_GEN_PROGS += sk_so_peek_off TEST_PROGS += test_ingress_egress_chaining.sh TEST_GEN_PROGS += so_incoming_cpu TEST_PROGS += sctp_vrf.sh TEST_GEN_FILES += sctp_hello TEST_GEN_FILES += ip_local_port_range -TEST_GEN_FILES += bind_wildcard +TEST_GEN_PROGS += bind_wildcard +TEST_GEN_PROGS += bind_timewait TEST_PROGS += test_vxlan_mdb.sh TEST_PROGS += test_bridge_neigh_suppress.sh TEST_PROGS += test_vxlan_nolocalbypass.sh @@ -95,6 +97,11 @@ TEST_PROGS += fq_band_pktlimit.sh TEST_PROGS += vlan_hw_filter.sh TEST_PROGS += bpf_offload.py +# YNL files, must be before "include ..lib.mk" +EXTRA_CLEAN += $(OUTPUT)/libynl.a +YNL_GEN_FILES := ncdevmem +TEST_GEN_FILES += $(YNL_GEN_FILES) + TEST_FILES := settings TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh @@ -104,6 +111,10 @@ TEST_INCLUDES := forwarding/lib.sh include ../lib.mk +# YNL build +YNL_GENS := netdev +include ynl.mk + $(OUTPUT)/epoll_busy_poll: LDLIBS += -lcap $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma $(OUTPUT)/tcp_mmap: LDLIBS += -lpthread -lcrypto diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c index 535eb2c3d7d1..3ed3882a93b8 100644 --- a/tools/testing/selftests/net/af_unix/msg_oob.c +++ b/tools/testing/selftests/net/af_unix/msg_oob.c @@ -525,6 +525,29 @@ TEST_F(msg_oob, ex_oob_drop_2) } } +TEST_F(msg_oob, ex_oob_oob) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("x", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + sendpair("y", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("", -EAGAIN, 1, 0); + epollpair(false); + siocatmarkpair(false); + + recvpair("", -EINVAL, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); +} + TEST_F(msg_oob, ex_oob_ahead_break) { sendpair("hello", 5, MSG_OOB); diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 386ebd829df5..899dbad0104b 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -4304,14 +4304,7 @@ elif [ "$TESTS" = "ipv6" ]; then TESTS="$TESTS_IPV6" fi -# nettest can be run from PATH or from same directory as this selftest -if ! which nettest >/dev/null; then - PATH=$PWD:$PATH - if ! which nettest >/dev/null; then - echo "'nettest' command not found; skipping tests" - exit $ksft_skip - fi -fi +check_gen_prog "nettest" declare -i nfail=0 declare -i nsuccess=0 diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index ac0b2c6a5761..77c83d9508d3 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -78,7 +78,12 @@ log_test() else ret=1 nfail=$((nfail+1)) - printf "TEST: %-60s [FAIL]\n" "${msg}" + if [[ $rc -eq $ksft_skip ]]; then + printf "TEST: %-60s [SKIP]\n" "${msg}" + else + printf "TEST: %-60s [FAIL]\n" "${msg}" + fi + if [ "$VERBOSE" = "1" ]; then echo " rc=$rc, expected $expected" fi @@ -923,6 +928,29 @@ ipv6_grp_fcnal() ipv6_grp_refs log_test $? 0 "Nexthop group replace refcounts" + + # + # 16-bit weights. + # + run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1" + run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1" + run_cmd "$IP nexthop add id 64 via 2001:db8:91::4 dev veth1" + run_cmd "$IP nexthop add id 65 via 2001:db8:91::5 dev veth1" + run_cmd "$IP nexthop add id 66 dev veth1" + + run_cmd "$IP nexthop add id 103 group 62,1000" + if [[ $? == 0 ]]; then + local GRP="id 103 group 62,254/63,255/64,256/65,257/66,65535" + run_cmd "$IP nexthop replace $GRP" + check_nexthop "id 103" "$GRP" + rc=$? + else + rc=$ksft_skip + fi + + $IP nexthop flush >/dev/null 2>&1 + + log_test $rc 0 "16-bit weights" } ipv6_res_grp_fcnal() @@ -987,6 +1015,31 @@ ipv6_res_grp_fcnal() check_nexthop_bucket "list id 102" \ "id 102 index 0 nhid 63 id 102 index 1 nhid 62 id 102 index 2 nhid 62 id 102 index 3 nhid 62" log_test $? 0 "Nexthop buckets updated after replace - nECMP" + + # + # 16-bit weights. + # + run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1" + run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1" + run_cmd "$IP nexthop add id 64 via 2001:db8:91::4 dev veth1" + run_cmd "$IP nexthop add id 65 via 2001:db8:91::5 dev veth1" + run_cmd "$IP nexthop add id 66 dev veth1" + + run_cmd "$IP nexthop add id 103 group 62,1000 type resilient buckets 32" + if [[ $? == 0 ]]; then + local GRP="id 103 group 62,254/63,255/64,256/65,257/66,65535 $(: + )type resilient buckets 32 idle_timer 0 $(: + )unbalanced_timer 0" + run_cmd "$IP nexthop replace $GRP" + check_nexthop "id 103" "$GRP unbalanced_time 0" + rc=$? + else + rc=$ksft_skip + fi + + $IP nexthop flush >/dev/null 2>&1 + + log_test $rc 0 "16-bit weights" } ipv6_fcnal_runtime() diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index 7c01f58a20de..1d58b3b87465 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -35,18 +35,13 @@ log_test() local expected=$2 local msg="$3" - $IP rule show | grep -q l3mdev - if [ $? -eq 0 ]; then - msg="$msg (VRF)" - fi - if [ ${rc} -eq ${expected} ]; then nsuccess=$((nsuccess+1)) - printf "\n TEST: %-60s [ OK ]\n" "${msg}" + printf " TEST: %-60s [ OK ]\n" "${msg}" else ret=1 nfail=$((nfail+1)) - printf "\n TEST: %-60s [FAIL]\n" "${msg}" + printf " TEST: %-60s [FAIL]\n" "${msg}" if [ "${PAUSE_ON_FAIL}" = "yes" ]; then echo echo "hit enter to continue, 'q' to quit" @@ -56,39 +51,6 @@ log_test() fi } -log_section() -{ - echo - echo "######################################################################" - echo "TEST SECTION: $*" - echo "######################################################################" -} - -check_nettest() -{ - if which nettest > /dev/null 2>&1; then - return 0 - fi - - # Add the selftest directory to PATH if not already done - if [ "${SELFTEST_PATH}" = "" ]; then - SELFTEST_PATH="$(dirname $0)" - PATH="${PATH}:${SELFTEST_PATH}" - - # Now retry with the new path - if which nettest > /dev/null 2>&1; then - return 0 - fi - - if [ "${ret}" -eq 0 ]; then - ret="${ksft_skip}" - fi - echo "nettest not found (try 'make -C ${SELFTEST_PATH} nettest')" - fi - - return 1 -} - setup() { set -e @@ -187,12 +149,17 @@ fib_rule6_test_match_n_redirect() { local match="$1" local getmatch="$2" - local description="$3" + local getnomatch="$3" + local description="$4" + local nomatch_description="$5" $IP -6 rule add $match table $RTABLE $IP -6 route get $GW_IP6 $getmatch | grep -q "table $RTABLE" log_test $? 0 "rule6 check: $description" + $IP -6 route get $GW_IP6 $getnomatch 2>&1 | grep -q "table $RTABLE" + log_test $? 1 "rule6 check: $nomatch_description" + fib_rule6_del_by_pref "$match" log_test $? 0 "rule6 del by pref: $description" } @@ -213,18 +180,27 @@ fib_rule6_test_reject() fib_rule6_test() { + local ext_name=$1; shift + local getnomatch local getmatch local match local cnt + echo + echo "IPv6 FIB rule tests $ext_name" + # setup the fib rule redirect route $IP -6 route add table $RTABLE default via $GW_IP6 dev $DEV onlink match="oif $DEV" - fib_rule6_test_match_n_redirect "$match" "$match" "oif redirect to table" + getnomatch="oif lo" + fib_rule6_test_match_n_redirect "$match" "$match" "$getnomatch" \ + "oif redirect to table" "oif no redirect to table" match="from $SRC_IP6 iif $DEV" - fib_rule6_test_match_n_redirect "$match" "$match" "iif redirect to table" + getnomatch="from $SRC_IP6 iif lo" + fib_rule6_test_match_n_redirect "$match" "$match" "$getnomatch" \ + "iif redirect to table" "iif no redirect to table" # Reject dsfield (tos) options which have ECN bits set for cnt in $(seq 1 3); do @@ -238,44 +214,89 @@ fib_rule6_test() # Using option 'tos' instead of 'dsfield' as old iproute2 # versions don't support 'dsfield' in ip rule show. getmatch="tos $cnt" + getnomatch="tos 0x20" fib_rule6_test_match_n_redirect "$match" "$getmatch" \ - "$getmatch redirect to table" + "$getnomatch" "$getmatch redirect to table" \ + "$getnomatch no redirect to table" + done + + # Re-test TOS matching, but with input routes since they are handled + # differently from output routes. + match="tos 0x10" + for cnt in "0x10" "0x11" "0x12" "0x13"; do + getmatch="tos $cnt" + getnomatch="tos 0x20" + fib_rule6_test_match_n_redirect "$match" \ + "from $SRC_IP6 iif $DEV $getmatch" \ + "from $SRC_IP6 iif $DEV $getnomatch" \ + "iif $getmatch redirect to table" \ + "iif $getnomatch no redirect to table" done match="fwmark 0x64" getmatch="mark 0x64" - fib_rule6_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table" + getnomatch="mark 0x63" + fib_rule6_test_match_n_redirect "$match" "$getmatch" "$getnomatch" \ + "fwmark redirect to table" "fwmark no redirect to table" fib_check_iproute_support "uidrange" "uid" if [ $? -eq 0 ]; then match="uidrange 100-100" getmatch="uid 100" - fib_rule6_test_match_n_redirect "$match" "$getmatch" "uid redirect to table" + getnomatch="uid 101" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "uid redirect to table" \ + "uid no redirect to table" fi fib_check_iproute_support "sport" "sport" if [ $? -eq 0 ]; then match="sport 666 dport 777" - fib_rule6_test_match_n_redirect "$match" "$match" "sport and dport redirect to table" + getnomatch="sport 667 dport 778" + fib_rule6_test_match_n_redirect "$match" "$match" \ + "$getnomatch" "sport and dport redirect to table" \ + "sport and dport no redirect to table" fi fib_check_iproute_support "ipproto" "ipproto" if [ $? -eq 0 ]; then match="ipproto tcp" - fib_rule6_test_match_n_redirect "$match" "$match" "ipproto match" + getnomatch="ipproto udp" + fib_rule6_test_match_n_redirect "$match" "$match" \ + "$getnomatch" "ipproto tcp match" "ipproto udp no match" fi fib_check_iproute_support "ipproto" "ipproto" if [ $? -eq 0 ]; then match="ipproto ipv6-icmp" - fib_rule6_test_match_n_redirect "$match" "$match" "ipproto ipv6-icmp match" + getnomatch="ipproto tcp" + fib_rule6_test_match_n_redirect "$match" "$match" \ + "$getnomatch" "ipproto ipv6-icmp match" \ + "ipproto ipv6-tcp no match" + fi + + fib_check_iproute_support "dscp" "tos" + if [ $? -eq 0 ]; then + match="dscp 0x3f" + getmatch="tos 0xfc" + getnomatch="tos 0xf4" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "dscp redirect to table" \ + "dscp no redirect to table" + + match="dscp 0x3f" + getmatch="from $SRC_IP6 iif $DEV tos 0xfc" + getnomatch="from $SRC_IP6 iif $DEV tos 0xf4" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "iif dscp redirect to table" \ + "iif dscp no redirect to table" fi } fib_rule6_vrf_test() { setup_vrf - fib_rule6_test + fib_rule6_test "- with VRF" cleanup_vrf } @@ -285,10 +306,8 @@ fib_rule6_connect_test() { local dsfield - if ! check_nettest; then - echo "SKIP: Could not run test without nettest tool" - return - fi + echo + echo "IPv6 FIB rule connect tests" setup_peer $IP -6 rule add dsfield 0x04 table $RTABLE_PEER @@ -306,7 +325,45 @@ fib_rule6_connect_test() log_test $? 0 "rule6 dsfield tcp connect (dsfield ${dsfield})" done + # Check that UDP and TCP connections fail when using a DS Field that + # does not match the previously configured FIB rule. + nettest -q -6 -B -t 5 -N $testns -O $peerns -U -D \ + -Q 0x20 -l 2001:db8::1:11 -r 2001:db8::1:11 + log_test $? 1 "rule6 dsfield udp no connect (dsfield 0x20)" + + nettest -q -6 -B -t 5 -N $testns -O $peerns -Q 0x20 \ + -l 2001:db8::1:11 -r 2001:db8::1:11 + log_test $? 1 "rule6 dsfield tcp no connect (dsfield 0x20)" + $IP -6 rule del dsfield 0x04 table $RTABLE_PEER + + ip rule help 2>&1 | grep -q dscp + if [ $? -ne 0 ]; then + echo "SKIP: iproute2 iprule too old, missing dscp match" + cleanup_peer + return + fi + + $IP -6 rule add dscp 0x3f table $RTABLE_PEER + + nettest -q -6 -B -t 5 -N $testns -O $peerns -U -D -Q 0xfc \ + -l 2001:db8::1:11 -r 2001:db8::1:11 + log_test $? 0 "rule6 dscp udp connect" + + nettest -q -6 -B -t 5 -N $testns -O $peerns -Q 0xfc \ + -l 2001:db8::1:11 -r 2001:db8::1:11 + log_test $? 0 "rule6 dscp tcp connect" + + nettest -q -6 -B -t 5 -N $testns -O $peerns -U -D -Q 0xf4 \ + -l 2001:db8::1:11 -r 2001:db8::1:11 + log_test $? 1 "rule6 dscp udp no connect" + + nettest -q -6 -B -t 5 -N $testns -O $peerns -Q 0xf4 \ + -l 2001:db8::1:11 -r 2001:db8::1:11 + log_test $? 1 "rule6 dscp tcp no connect" + + $IP -6 rule del dscp 0x3f table $RTABLE_PEER + cleanup_peer } @@ -326,12 +383,17 @@ fib_rule4_test_match_n_redirect() { local match="$1" local getmatch="$2" - local description="$3" + local getnomatch="$3" + local description="$4" + local nomatch_description="$5" $IP rule add $match table $RTABLE $IP route get $GW_IP4 $getmatch | grep -q "table $RTABLE" log_test $? 0 "rule4 check: $description" + $IP route get $GW_IP4 $getnomatch 2>&1 | grep -q "table $RTABLE" + log_test $? 1 "rule4 check: $nomatch_description" + fib_rule4_del_by_pref "$match" log_test $? 0 "rule4 del by pref: $description" } @@ -352,23 +414,31 @@ fib_rule4_test_reject() fib_rule4_test() { + local ext_name=$1; shift + local getnomatch local getmatch local match local cnt + echo + echo "IPv4 FIB rule tests $ext_name" + # setup the fib rule redirect route $IP route add table $RTABLE default via $GW_IP4 dev $DEV onlink match="oif $DEV" - fib_rule4_test_match_n_redirect "$match" "$match" "oif redirect to table" + getnomatch="oif lo" + fib_rule4_test_match_n_redirect "$match" "$match" "$getnomatch" \ + "oif redirect to table" "oif no redirect to table" - # need enable forwarding and disable rp_filter temporarily as all the - # addresses are in the same subnet and egress device == ingress device. + # Enable forwarding and disable rp_filter as all the addresses are in + # the same subnet and egress device == ingress device. ip netns exec $testns sysctl -qw net.ipv4.ip_forward=1 ip netns exec $testns sysctl -qw net.ipv4.conf.$DEV.rp_filter=0 match="from $SRC_IP iif $DEV" - fib_rule4_test_match_n_redirect "$match" "$match" "iif redirect to table" - ip netns exec $testns sysctl -qw net.ipv4.ip_forward=0 + getnomatch="from $SRC_IP iif lo" + fib_rule4_test_match_n_redirect "$match" "$match" "$getnomatch" \ + "iif redirect to table" "iif no redirect to table" # Reject dsfield (tos) options which have ECN bits set for cnt in $(seq 1 3); do @@ -382,44 +452,90 @@ fib_rule4_test() # Using option 'tos' instead of 'dsfield' as old iproute2 # versions don't support 'dsfield' in ip rule show. getmatch="tos $cnt" + getnomatch="tos 0x20" fib_rule4_test_match_n_redirect "$match" "$getmatch" \ - "$getmatch redirect to table" + "$getnomatch" "$getmatch redirect to table" \ + "$getnomatch no redirect to table" + done + + # Re-test TOS matching, but with input routes since they are handled + # differently from output routes. + match="tos 0x10" + for cnt in "0x10" "0x11" "0x12" "0x13"; do + getmatch="tos $cnt" + getnomatch="tos 0x20" + fib_rule4_test_match_n_redirect "$match" \ + "from $SRC_IP iif $DEV $getmatch" \ + "from $SRC_IP iif $DEV $getnomatch" \ + "iif $getmatch redirect to table" \ + "iif $getnomatch no redirect to table" done match="fwmark 0x64" getmatch="mark 0x64" - fib_rule4_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table" + getnomatch="mark 0x63" + fib_rule4_test_match_n_redirect "$match" "$getmatch" "$getnomatch" \ + "fwmark redirect to table" "fwmark no redirect to table" fib_check_iproute_support "uidrange" "uid" if [ $? -eq 0 ]; then match="uidrange 100-100" getmatch="uid 100" - fib_rule4_test_match_n_redirect "$match" "$getmatch" "uid redirect to table" + getnomatch="uid 101" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "uid redirect to table" \ + "uid no redirect to table" fi fib_check_iproute_support "sport" "sport" if [ $? -eq 0 ]; then match="sport 666 dport 777" - fib_rule4_test_match_n_redirect "$match" "$match" "sport and dport redirect to table" + getnomatch="sport 667 dport 778" + fib_rule4_test_match_n_redirect "$match" "$match" \ + "$getnomatch" "sport and dport redirect to table" \ + "sport and dport no redirect to table" fi fib_check_iproute_support "ipproto" "ipproto" if [ $? -eq 0 ]; then match="ipproto tcp" - fib_rule4_test_match_n_redirect "$match" "$match" "ipproto tcp match" + getnomatch="ipproto udp" + fib_rule4_test_match_n_redirect "$match" "$match" \ + "$getnomatch" "ipproto tcp match" \ + "ipproto udp no match" fi fib_check_iproute_support "ipproto" "ipproto" if [ $? -eq 0 ]; then match="ipproto icmp" - fib_rule4_test_match_n_redirect "$match" "$match" "ipproto icmp match" + getnomatch="ipproto tcp" + fib_rule4_test_match_n_redirect "$match" "$match" \ + "$getnomatch" "ipproto icmp match" \ + "ipproto tcp no match" + fi + + fib_check_iproute_support "dscp" "tos" + if [ $? -eq 0 ]; then + match="dscp 0x3f" + getmatch="tos 0xfc" + getnomatch="tos 0xf4" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "dscp redirect to table" \ + "dscp no redirect to table" + + match="dscp 0x3f" + getmatch="from $SRC_IP iif $DEV tos 0xfc" + getnomatch="from $SRC_IP iif $DEV tos 0xf4" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "iif dscp redirect to table" \ + "iif dscp no redirect to table" fi } fib_rule4_vrf_test() { setup_vrf - fib_rule4_test + fib_rule4_test "- with VRF" cleanup_vrf } @@ -429,10 +545,8 @@ fib_rule4_connect_test() { local dsfield - if ! check_nettest; then - echo "SKIP: Could not run test without nettest tool" - return - fi + echo + echo "IPv4 FIB rule connect tests" setup_peer $IP -4 rule add dsfield 0x04 table $RTABLE_PEER @@ -450,16 +564,46 @@ fib_rule4_connect_test() log_test $? 0 "rule4 dsfield tcp connect (dsfield ${dsfield})" done + # Check that UDP and TCP connections fail when using a DS Field that + # does not match the previously configured FIB rule. + nettest -q -B -t 5 -N $testns -O $peerns -D -U -Q 0x20 \ + -l 198.51.100.11 -r 198.51.100.11 + log_test $? 1 "rule4 dsfield udp no connect (dsfield 0x20)" + + nettest -q -B -t 5 -N $testns -O $peerns -Q 0x20 \ + -l 198.51.100.11 -r 198.51.100.11 + log_test $? 1 "rule4 dsfield tcp no connect (dsfield 0x20)" + $IP -4 rule del dsfield 0x04 table $RTABLE_PEER - cleanup_peer -} -run_fibrule_tests() -{ - log_section "IPv4 fib rule" - fib_rule4_test - log_section "IPv6 fib rule" - fib_rule6_test + ip rule help 2>&1 | grep -q dscp + if [ $? -ne 0 ]; then + echo "SKIP: iproute2 iprule too old, missing dscp match" + cleanup_peer + return + fi + + $IP -4 rule add dscp 0x3f table $RTABLE_PEER + + nettest -q -B -t 5 -N $testns -O $peerns -D -U -Q 0xfc \ + -l 198.51.100.11 -r 198.51.100.11 + log_test $? 0 "rule4 dscp udp connect" + + nettest -q -B -t 5 -N $testns -O $peerns -Q 0xfc \ + -l 198.51.100.11 -r 198.51.100.11 + log_test $? 0 "rule4 dscp tcp connect" + + nettest -q -B -t 5 -N $testns -O $peerns -D -U -Q 0xf4 \ + -l 198.51.100.11 -r 198.51.100.11 + log_test $? 1 "rule4 dscp udp no connect" + + nettest -q -B -t 5 -N $testns -O $peerns -Q 0xf4 \ + -l 198.51.100.11 -r 198.51.100.11 + log_test $? 1 "rule4 dscp tcp no connect" + + $IP -4 rule del dscp 0x3f table $RTABLE_PEER + + cleanup_peer } ################################################################################ # usage @@ -495,6 +639,8 @@ if [ ! -x "$(command -v ip)" ]; then exit $ksft_skip fi +check_gen_prog "nettest" + # start clean cleanup &> /dev/null setup diff --git a/tools/testing/selftests/net/forwarding/README b/tools/testing/selftests/net/forwarding/README index 7fdb6a9ca543..a652429bfd53 100644 --- a/tools/testing/selftests/net/forwarding/README +++ b/tools/testing/selftests/net/forwarding/README @@ -6,7 +6,7 @@ to easily create and test complex environments. Unfortunately, these namespaces can not be used with actual switching ASICs, as their ports can not be migrated to other network namespaces -(NETIF_F_NETNS_LOCAL) and most of them probably do not support the +(dev->netns_local) and most of them probably do not support the L1-separation provided by namespaces. However, a similar kind of flexibility can be achieved by using VRFs and diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh index 64bd00fe9a4f..90f8a244ea90 100755 --- a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh @@ -1,7 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn" +ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn other_tpid" NUM_NETIFS=4 CHECK_TC="yes" source lib.sh @@ -142,6 +142,58 @@ extern_learn() bridge fdb del de:ad:be:ef:13:37 dev $swp1 master vlan 1 &> /dev/null } +other_tpid() +{ + local mac=de:ad:be:ef:13:37 + + # Test that packets with TPID 802.1ad VID 3 + TPID 802.1Q VID 5 are + # classified as untagged by a bridge with vlan_protocol 802.1Q, and + # are processed in the PVID of the ingress port (here 1). Not VID 3, + # and not VID 5. + RET=0 + + tc qdisc add dev $h2 clsact + tc filter add dev $h2 ingress protocol all pref 1 handle 101 \ + flower dst_mac $mac action drop + ip link set $h2 promisc on + ethtool -K $h2 rx-vlan-filter off rx-vlan-stag-filter off + + $MZ -q $h1 -c 1 -b $mac -a own "88:a8 00:03 81:00 00:05 08:00 aa-aa-aa-aa-aa-aa-aa-aa-aa" + sleep 1 + + # Match on 'self' addresses as well, for those drivers which + # do not push their learned addresses to the bridge software + # database + bridge -j fdb show $swp1 | \ + jq -e ".[] | select(.mac == \"$(mac_get $h1)\") | select(.vlan == 1)" &> /dev/null + check_err $? "FDB entry was not learned when it should" + + log_test "FDB entry in PVID for VLAN-tagged with other TPID" + + RET=0 + tc -j -s filter show dev $h2 ingress \ + | jq -e ".[] | select(.options.handle == 101) \ + | select(.options.actions[0].stats.packets == 1)" &> /dev/null + check_err $? "Packet was not forwarded when it should" + log_test "Reception of VLAN with other TPID as untagged" + + bridge vlan del dev $swp1 vid 1 + + $MZ -q $h1 -c 1 -b $mac -a own "88:a8 00:03 81:00 00:05 08:00 aa-aa-aa-aa-aa-aa-aa-aa-aa" + sleep 1 + + RET=0 + tc -j -s filter show dev $h2 ingress \ + | jq -e ".[] | select(.options.handle == 101) \ + | select(.options.actions[0].stats.packets == 1)" &> /dev/null + check_err $? "Packet was forwarded when should not" + log_test "Reception of VLAN with other TPID as untagged (no PVID)" + + bridge vlan add dev $swp1 vid 1 pvid untagged + ip link set $h2 promisc off + tc qdisc del dev $h2 clsact +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh index 1783c10215e5..7d531f7091e6 100755 --- a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh +++ b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh @@ -224,10 +224,10 @@ send_dst_ipv6() send_flowlabel() { # Generate 16384 echo requests, each with a random flow label. - for _ in $(seq 1 16384); do - ip vrf exec v$h1 \ - $PING6 2001:db8:4::2 -F 0 -c 1 -q >/dev/null 2>&1 - done + ip vrf exec v$h1 sh -c \ + "for _ in {1..16384}; do \ + $PING6 2001:db8:4::2 -F 0 -c 1 -q >/dev/null 2>&1; \ + done" } send_src_udp6() diff --git a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh index 9788bd0f6e8b..dda11a4a9450 100755 --- a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh +++ b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh @@ -319,10 +319,10 @@ send_dst_ipv6() send_flowlabel() { # Generate 16384 echo requests, each with a random flow label. - for _ in $(seq 1 16384); do - ip vrf exec v$h1 \ - $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1 - done + ip vrf exec v$h1 sh -c \ + "for _ in {1..16384}; do \ + $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1; \ + done" } send_src_udp6() diff --git a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh index 2ab9eaaa5532..e28b4a079e52 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh @@ -321,10 +321,10 @@ send_dst_ipv6() send_flowlabel() { # Generate 16384 echo requests, each with a random flow label. - for _ in $(seq 1 16384); do - ip vrf exec v$h1 \ - $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1 - done + ip vrf exec v$h1 sh -c \ + "for _ in {1..16384}; do \ + $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1; \ + done" } send_src_udp6() diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index ff96bb7535ff..c992e385159c 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -500,6 +500,11 @@ check_err_fail() fi } +xfail() +{ + FAIL_TO_XFAIL=yes "$@" +} + xfail_on_slow() { if [[ $KSFT_MACHINE_SLOW = yes ]]; then @@ -509,6 +514,13 @@ xfail_on_slow() fi } +omit_on_slow() +{ + if [[ $KSFT_MACHINE_SLOW != yes ]]; then + "$@" + fi +} + xfail_on_veth() { local dev=$1; shift @@ -1113,6 +1125,39 @@ mac_get() ip -j link show dev $if_name | jq -r '.[]["address"]' } +ether_addr_to_u64() +{ + local addr="$1" + local order="$((1 << 40))" + local val=0 + local byte + + addr="${addr//:/ }" + + for byte in $addr; do + byte="0x$byte" + val=$((val + order * byte)) + order=$((order >> 8)) + done + + printf "0x%x" $val +} + +u64_to_ether_addr() +{ + local val=$1 + local byte + local i + + for ((i = 40; i >= 0; i -= 8)); do + byte=$(((val & (0xff << i)) >> i)) + printf "%02x" $byte + if [ $i -ne 0 ]; then + printf ":" + fi + done +} + ipv6_lladdr_get() { local if_name=$1 @@ -2229,3 +2274,22 @@ absval() echo $((v > 0 ? v : -v)) } + +has_unicast_flt() +{ + local dev=$1; shift + local mac_addr=$(mac_get $dev) + local tmp=$(ether_addr_to_u64 $mac_addr) + local promisc + + ip link set $dev up + ip link add link $dev name macvlan-tmp type macvlan mode private + ip link set macvlan-tmp address $(u64_to_ether_addr $((tmp + 1))) + ip link set macvlan-tmp up + + promisc=$(ip -j -d link show dev $dev | jq -r '.[].promiscuity') + + ip link del macvlan-tmp + + [[ $promisc == 1 ]] && echo "no" || echo "yes" +} diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh index 4b364cdf3ef0..c35548767756 100755 --- a/tools/testing/selftests/net/forwarding/local_termination.sh +++ b/tools/testing/selftests/net/forwarding/local_termination.sh @@ -1,7 +1,9 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="standalone bridge" +ALL_TESTS="standalone vlan_unaware_bridge vlan_aware_bridge test_vlan \ + vlan_over_vlan_unaware_bridged_port vlan_over_vlan_aware_bridged_port \ + vlan_over_vlan_unaware_bridge vlan_over_vlan_aware_bridge" NUM_NETIFS=2 PING_COUNT=1 REQUIRE_MTOOLS=yes @@ -37,9 +39,68 @@ UNKNOWN_MACV6_MC_ADDR1="33:33:01:02:03:05" UNKNOWN_MACV6_MC_ADDR2="33:33:01:02:03:06" UNKNOWN_MACV6_MC_ADDR3="33:33:01:02:03:07" -NON_IP_MC="01:02:03:04:05:06" -NON_IP_PKT="00:04 48:45:4c:4f" -BC="ff:ff:ff:ff:ff:ff" +PTP_1588_L2_SYNC=" \ +01:1b:19:00:00:00 00:00:de:ad:be:ef 88:f7 00 02 \ +00 2c 00 00 02 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 00 00 \ +00 00 00 00 00 00 00 00 00 00" +PTP_1588_L2_FOLLOW_UP=" \ +01:1b:19:00:00:00 00:00:de:ad:be:ef 88:f7 08 02 \ +00 2c 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 02 00 \ +00 00 66 83 c5 f1 17 97 ed f0" +PTP_1588_L2_PDELAY_REQ=" \ +01:80:c2:00:00:0e 00:00:de:ad:be:ef 88:f7 02 02 \ +00 36 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 06 05 7f \ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 00 00" +PTP_1588_IPV4_SYNC=" \ +01:00:5e:00:01:81 00:00:de:ad:be:ef 08:00 45 00 \ +00 48 0a 9a 40 00 01 11 cb 88 c0 00 02 01 e0 00 \ +01 81 01 3f 01 3f 00 34 a3 c8 00 02 00 2c 00 00 \ +02 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \ +63 ff fe cf 17 0e 00 01 00 00 00 00 00 00 00 00 \ +00 00 00 00 00 00" +PTP_1588_IPV4_FOLLOW_UP=" +01:00:5e:00:01:81 00:00:de:ad:be:ef 08:00 45 00 \ +00 48 0a 9b 40 00 01 11 cb 87 c0 00 02 01 e0 00 \ +01 81 01 40 01 40 00 34 a3 c8 08 02 00 2c 00 00 \ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \ +63 ff fe cf 17 0e 00 01 00 00 02 00 00 00 66 83 \ +c6 0f 1d 9a 61 87" +PTP_1588_IPV4_PDELAY_REQ=" \ +01:00:5e:00:00:6b 00:00:de:ad:be:ef 08:00 45 00 \ +00 52 35 a9 40 00 01 11 a1 85 c0 00 02 01 e0 00 \ +00 6b 01 3f 01 3f 00 3e a2 bc 02 02 00 36 00 00 \ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \ +63 ff fe cf 17 0e 00 01 00 01 05 7f 00 00 00 00 \ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00" +PTP_1588_IPV6_SYNC=" \ +33:33:00:00:01:81 00:00:de:ad:be:ef 86:dd 60 06 \ +7c 2f 00 36 11 01 20 01 0d b8 00 01 00 00 00 00 \ +00 00 00 00 00 01 ff 0e 00 00 00 00 00 00 00 00 \ +00 00 00 00 01 81 01 3f 01 3f 00 36 2e 92 00 02 \ +00 2c 00 00 02 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 00 00 \ +00 00 00 00 00 00 00 00 00 00 00 00" +PTP_1588_IPV6_FOLLOW_UP=" \ +33:33:00:00:01:81 00:00:de:ad:be:ef 86:dd 60 0a \ +00 bc 00 36 11 01 20 01 0d b8 00 01 00 00 00 00 \ +00 00 00 00 00 01 ff 0e 00 00 00 00 00 00 00 00 \ +00 00 00 00 01 81 01 40 01 40 00 36 2e 92 08 02 \ +00 2c 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 02 00 \ +00 00 66 83 c6 2a 32 09 bd 74 00 00" +PTP_1588_IPV6_PDELAY_REQ=" \ +33:33:00:00:00:6b 00:00:de:ad:be:ef 86:dd 60 0c \ +5c fd 00 40 11 01 fe 80 00 00 00 00 00 00 3c 37 \ +63 ff fe cf 17 0e ff 02 00 00 00 00 00 00 00 00 \ +00 00 00 00 00 6b 01 3f 01 3f 00 40 b4 54 02 02 \ +00 36 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 01 05 7f \ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 00 00 00 00" # Disable promisc to ensure we don't receive unknown MAC DA packets export TCPDUMP_EXTRA_FLAGS="-pl" @@ -47,13 +108,15 @@ export TCPDUMP_EXTRA_FLAGS="-pl" h1=${NETIFS[p1]} h2=${NETIFS[p2]} -send_non_ip() +send_raw() { - local if_name=$1 - local smac=$2 - local dmac=$3 + local if_name=$1; shift + local pkt="$1"; shift + local smac=$(mac_get $if_name) + + pkt="${pkt/00:00:de:ad:be:ef/$smac}" - $MZ -q $if_name "$dmac $smac $NON_IP_PKT" + $MZ -q $if_name "$pkt" } send_uc_ipv4() @@ -68,10 +131,11 @@ send_uc_ipv4() check_rcv() { - local if_name=$1 - local type=$2 - local pattern=$3 - local should_receive=$4 + local if_name=$1; shift + local type=$1; shift + local pattern=$1; shift + local should_receive=$1; shift + local test_name="$1"; shift local should_fail= [ $should_receive = true ] && should_fail=0 || should_fail=1 @@ -81,7 +145,7 @@ check_rcv() check_err_fail "$should_fail" "$?" "reception" - log_test "$if_name: $type" + log_test "$test_name: $type" } mc_route_prepare() @@ -104,44 +168,78 @@ mc_route_destroy() run_test() { - local rcv_if_name=$1 - local smac=$(mac_get $h1) + local send_if_name=$1; shift + local rcv_if_name=$1; shift + local skip_ptp=$1; shift + local no_unicast_flt=$1; shift + local test_name="$1"; shift + local smac=$(mac_get $send_if_name) local rcv_dmac=$(mac_get $rcv_if_name) + local should_receive tcpdump_start $rcv_if_name - mc_route_prepare $h1 + mc_route_prepare $send_if_name mc_route_prepare $rcv_if_name - send_uc_ipv4 $h1 $rcv_dmac - send_uc_ipv4 $h1 $MACVLAN_ADDR - send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR1 + send_uc_ipv4 $send_if_name $rcv_dmac + send_uc_ipv4 $send_if_name $MACVLAN_ADDR + send_uc_ipv4 $send_if_name $UNKNOWN_UC_ADDR1 ip link set dev $rcv_if_name promisc on - send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR2 - mc_send $h1 $UNKNOWN_IPV4_MC_ADDR2 - mc_send $h1 $UNKNOWN_IPV6_MC_ADDR2 + send_uc_ipv4 $send_if_name $UNKNOWN_UC_ADDR2 + mc_send $send_if_name $UNKNOWN_IPV4_MC_ADDR2 + mc_send $send_if_name $UNKNOWN_IPV6_MC_ADDR2 ip link set dev $rcv_if_name promisc off mc_join $rcv_if_name $JOINED_IPV4_MC_ADDR - mc_send $h1 $JOINED_IPV4_MC_ADDR + mc_send $send_if_name $JOINED_IPV4_MC_ADDR mc_leave mc_join $rcv_if_name $JOINED_IPV6_MC_ADDR - mc_send $h1 $JOINED_IPV6_MC_ADDR + mc_send $send_if_name $JOINED_IPV6_MC_ADDR mc_leave - mc_send $h1 $UNKNOWN_IPV4_MC_ADDR1 - mc_send $h1 $UNKNOWN_IPV6_MC_ADDR1 + mc_send $send_if_name $UNKNOWN_IPV4_MC_ADDR1 + mc_send $send_if_name $UNKNOWN_IPV6_MC_ADDR1 ip link set dev $rcv_if_name allmulticast on - send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR3 - mc_send $h1 $UNKNOWN_IPV4_MC_ADDR3 - mc_send $h1 $UNKNOWN_IPV6_MC_ADDR3 + send_uc_ipv4 $send_if_name $UNKNOWN_UC_ADDR3 + mc_send $send_if_name $UNKNOWN_IPV4_MC_ADDR3 + mc_send $send_if_name $UNKNOWN_IPV6_MC_ADDR3 ip link set dev $rcv_if_name allmulticast off mc_route_destroy $rcv_if_name - mc_route_destroy $h1 + mc_route_destroy $send_if_name + + if [ $skip_ptp = false ]; then + ip maddress add 01:1b:19:00:00:00 dev $rcv_if_name + send_raw $send_if_name "$PTP_1588_L2_SYNC" + send_raw $send_if_name "$PTP_1588_L2_FOLLOW_UP" + ip maddress del 01:1b:19:00:00:00 dev $rcv_if_name + + ip maddress add 01:80:c2:00:00:0e dev $rcv_if_name + send_raw $send_if_name "$PTP_1588_L2_PDELAY_REQ" + ip maddress del 01:80:c2:00:00:0e dev $rcv_if_name + + mc_join $rcv_if_name 224.0.1.129 + send_raw $send_if_name "$PTP_1588_IPV4_SYNC" + send_raw $send_if_name "$PTP_1588_IPV4_FOLLOW_UP" + mc_leave + + mc_join $rcv_if_name 224.0.0.107 + send_raw $send_if_name "$PTP_1588_IPV4_PDELAY_REQ" + mc_leave + + mc_join $rcv_if_name ff0e::181 + send_raw $send_if_name "$PTP_1588_IPV6_SYNC" + send_raw $send_if_name "$PTP_1588_IPV6_FOLLOW_UP" + mc_leave + + mc_join $rcv_if_name ff02::6b + send_raw $send_if_name "$PTP_1588_IPV6_PDELAY_REQ" + mc_leave + fi sleep 1 @@ -149,61 +247,99 @@ run_test() check_rcv $rcv_if_name "Unicast IPv4 to primary MAC address" \ "$smac > $rcv_dmac, ethertype IPv4 (0x0800)" \ - true + true "$test_name" check_rcv $rcv_if_name "Unicast IPv4 to macvlan MAC address" \ "$smac > $MACVLAN_ADDR, ethertype IPv4 (0x0800)" \ - true + true "$test_name" - xfail_on_veth $h1 \ - check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \ - "$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \ - false + [ $no_unicast_flt = true ] && should_receive=true || should_receive=false + check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \ + "$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \ + $should_receive "$test_name" check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address, promisc" \ "$smac > $UNKNOWN_UC_ADDR2, ethertype IPv4 (0x0800)" \ - true + true "$test_name" - xfail_on_veth $h1 \ - check_rcv $rcv_if_name \ - "Unicast IPv4 to unknown MAC address, allmulti" \ - "$smac > $UNKNOWN_UC_ADDR3, ethertype IPv4 (0x0800)" \ - false + [ $no_unicast_flt = true ] && should_receive=true || should_receive=false + check_rcv $rcv_if_name \ + "Unicast IPv4 to unknown MAC address, allmulti" \ + "$smac > $UNKNOWN_UC_ADDR3, ethertype IPv4 (0x0800)" \ + $should_receive "$test_name" check_rcv $rcv_if_name "Multicast IPv4 to joined group" \ "$smac > $JOINED_MACV4_MC_ADDR, ethertype IPv4 (0x0800)" \ - true + true "$test_name" - xfail_on_veth $h1 \ + xfail \ check_rcv $rcv_if_name \ "Multicast IPv4 to unknown group" \ "$smac > $UNKNOWN_MACV4_MC_ADDR1, ethertype IPv4 (0x0800)" \ - false + false "$test_name" check_rcv $rcv_if_name "Multicast IPv4 to unknown group, promisc" \ "$smac > $UNKNOWN_MACV4_MC_ADDR2, ethertype IPv4 (0x0800)" \ - true + true "$test_name" check_rcv $rcv_if_name "Multicast IPv4 to unknown group, allmulti" \ "$smac > $UNKNOWN_MACV4_MC_ADDR3, ethertype IPv4 (0x0800)" \ - true + true "$test_name" check_rcv $rcv_if_name "Multicast IPv6 to joined group" \ "$smac > $JOINED_MACV6_MC_ADDR, ethertype IPv6 (0x86dd)" \ - true + true "$test_name" - xfail_on_veth $h1 \ + xfail \ check_rcv $rcv_if_name "Multicast IPv6 to unknown group" \ "$smac > $UNKNOWN_MACV6_MC_ADDR1, ethertype IPv6 (0x86dd)" \ - false + false "$test_name" check_rcv $rcv_if_name "Multicast IPv6 to unknown group, promisc" \ "$smac > $UNKNOWN_MACV6_MC_ADDR2, ethertype IPv6 (0x86dd)" \ - true + true "$test_name" check_rcv $rcv_if_name "Multicast IPv6 to unknown group, allmulti" \ "$smac > $UNKNOWN_MACV6_MC_ADDR3, ethertype IPv6 (0x86dd)" \ - true + true "$test_name" + + if [ $skip_ptp = false ]; then + check_rcv $rcv_if_name "1588v2 over L2 transport, Sync" \ + "ethertype PTP (0x88f7).* PTPv2.* msg type : sync msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over L2 transport, Follow-Up" \ + "ethertype PTP (0x88f7).* PTPv2.* msg type : follow up msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over L2 transport, Peer Delay Request" \ + "ethertype PTP (0x88f7).* PTPv2.* msg type : peer delay req msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv4, Sync" \ + "ethertype IPv4 (0x0800).* PTPv2.* msg type : sync msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv4, Follow-Up" \ + "ethertype IPv4 (0x0800).* PTPv2.* msg type : follow up msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv4, Peer Delay Request" \ + "ethertype IPv4 (0x0800).* PTPv2.* msg type : peer delay req msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv6, Sync" \ + "ethertype IPv6 (0x86dd).* PTPv2.* msg type : sync msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv6, Follow-Up" \ + "ethertype IPv6 (0x86dd).* PTPv2.* msg type : follow up msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv6, Peer Delay Request" \ + "ethertype IPv6 (0x86dd).* PTPv2.* msg type : peer delay req msg" \ + true "$test_name" + fi tcpdump_cleanup $rcv_if_name } @@ -228,62 +364,217 @@ h2_destroy() simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64 } +h1_vlan_create() +{ + simple_if_init $h1 + vlan_create $h1 100 v$h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h1_vlan_destroy() +{ + vlan_destroy $h1 100 + simple_if_fini $h1 +} + +h2_vlan_create() +{ + simple_if_init $h2 + vlan_create $h2 100 v$h2 $H2_IPV4/24 $H2_IPV6/64 +} + +h2_vlan_destroy() +{ + vlan_destroy $h2 100 + simple_if_fini $h2 +} + bridge_create() { - ip link add br0 type bridge + local vlan_filtering=$1 + + ip link add br0 type bridge vlan_filtering $vlan_filtering ip link set br0 address $BRIDGE_ADDR ip link set br0 up ip link set $h2 master br0 ip link set $h2 up - - simple_if_init br0 $H2_IPV4/24 $H2_IPV6/64 } bridge_destroy() { - simple_if_fini br0 $H2_IPV4/24 $H2_IPV6/64 - ip link del br0 } -standalone() +macvlan_create() { - h1_create - h2_create + local lower=$1 - ip link add link $h2 name macvlan0 type macvlan mode private + ip link add link $lower name macvlan0 type macvlan mode private ip link set macvlan0 address $MACVLAN_ADDR ip link set macvlan0 up +} - run_test $h2 - +macvlan_destroy() +{ ip link del macvlan0 +} + +standalone() +{ + local no_unicast_flt=true + local skip_ptp=false + if [ $(has_unicast_flt $h2) = yes ]; then + no_unicast_flt=false + fi + + h1_create + h2_create + macvlan_create $h2 + + run_test $h1 $h2 $skip_ptp $no_unicast_flt "$h2" + + macvlan_destroy h2_destroy h1_destroy } -bridge() +test_bridge() { + local no_unicast_flt=true + local vlan_filtering=$1 + local skip_ptp=true + h1_create - bridge_create + bridge_create $vlan_filtering + simple_if_init br0 $H2_IPV4/24 $H2_IPV6/64 + macvlan_create br0 - ip link add link br0 name macvlan0 type macvlan mode private - ip link set macvlan0 address $MACVLAN_ADDR - ip link set macvlan0 up + run_test $h1 br0 $skip_ptp $no_unicast_flt \ + "vlan_filtering=$vlan_filtering bridge" - run_test br0 + macvlan_destroy + simple_if_fini br0 $H2_IPV4/24 $H2_IPV6/64 + bridge_destroy + h1_destroy +} - ip link del macvlan0 +vlan_unaware_bridge() +{ + test_bridge 0 +} + +vlan_aware_bridge() +{ + test_bridge 1 +} + +test_vlan() +{ + local no_unicast_flt=true + local skip_ptp=false + + if [ $(has_unicast_flt $h2) = yes ]; then + no_unicast_flt=false + fi + + h1_vlan_create + h2_vlan_create + macvlan_create $h2.100 + run_test $h1.100 $h2.100 $skip_ptp $no_unicast_flt "VLAN upper" + + macvlan_destroy + h2_vlan_destroy + h1_vlan_destroy +} + +vlan_over_bridged_port() +{ + local no_unicast_flt=true + local vlan_filtering=$1 + local skip_ptp=false + + # br_manage_promisc() will not force a single vlan_filtering port to + # promiscuous mode, so we should still expect unicast filtering to take + # place if the device can do it. + if [ $(has_unicast_flt $h2) = yes ] && [ $vlan_filtering = 1 ]; then + no_unicast_flt=false + fi + + h1_vlan_create + h2_vlan_create + bridge_create $vlan_filtering + macvlan_create $h2.100 + + run_test $h1.100 $h2.100 $skip_ptp $no_unicast_flt \ + "VLAN over vlan_filtering=$vlan_filtering bridged port" + + macvlan_destroy bridge_destroy - h1_destroy + h2_vlan_destroy + h1_vlan_destroy +} + +vlan_over_vlan_unaware_bridged_port() +{ + vlan_over_bridged_port 0 +} + +vlan_over_vlan_aware_bridged_port() +{ + vlan_over_bridged_port 1 +} + +vlan_over_bridge() +{ + local no_unicast_flt=true + local vlan_filtering=$1 + local skip_ptp=true + + h1_vlan_create + bridge_create $vlan_filtering + simple_if_init br0 + vlan_create br0 100 vbr0 $H2_IPV4/24 $H2_IPV6/64 + macvlan_create br0.100 + + if [ $vlan_filtering = 1 ]; then + bridge vlan add dev $h2 vid 100 master + bridge vlan add dev br0 vid 100 self + fi + + run_test $h1.100 br0.100 $skip_ptp $no_unicast_flt \ + "VLAN over vlan_filtering=$vlan_filtering bridge" + + if [ $vlan_filtering = 1 ]; then + bridge vlan del dev br0 vid 100 self + bridge vlan del dev $h2 vid 100 master + fi + + macvlan_destroy + vlan_destroy br0 100 + simple_if_fini br0 + bridge_destroy + h1_vlan_destroy +} + +vlan_over_vlan_unaware_bridge() +{ + vlan_over_bridge 0 +} + +vlan_over_vlan_aware_bridge() +{ + vlan_over_bridge 1 } cleanup() { pre_cleanup + + ip link set $h2 down + ip link set $h1 down + vrf_cleanup } diff --git a/tools/testing/selftests/net/forwarding/no_forwarding.sh b/tools/testing/selftests/net/forwarding/no_forwarding.sh index af3b398d13f0..9e677aa64a06 100755 --- a/tools/testing/selftests/net/forwarding/no_forwarding.sh +++ b/tools/testing/selftests/net/forwarding/no_forwarding.sh @@ -233,6 +233,9 @@ cleanup() { pre_cleanup + ip link set dev $swp2 down + ip link set dev $swp1 down + h2_destroy h1_destroy diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh index 2ba44247c60a..a7d8399c8d4f 100755 --- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh +++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh @@ -40,6 +40,7 @@ ALL_TESTS=" ping_ipv4 ping_ipv6 multipath_test + multipath16_test ping_ipv4_blackhole ping_ipv6_blackhole nh_stats_test_v4 @@ -226,9 +227,11 @@ routing_nh_obj() multipath4_test() { - local desc="$1" - local weight_rp12=$2 - local weight_rp13=$3 + local desc=$1; shift + local weight_rp12=$1; shift + local weight_rp13=$1; shift + local ports=${1-sp=1024,dp=0-32768}; shift + local t0_rp12 t0_rp13 t1_rp12 t1_rp13 local packets_rp12 packets_rp13 @@ -242,7 +245,8 @@ multipath4_test() t0_rp13=$(link_stats_tx_packets_get $rp13) ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ - -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "$ports" + sleep 1 t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) @@ -258,9 +262,11 @@ multipath4_test() multipath6_test() { - local desc="$1" - local weight_rp12=$2 - local weight_rp13=$3 + local desc=$1; shift + local weight_rp12=$1; shift + local weight_rp13=$1; shift + local ports=${1-sp=1024,dp=0-32768}; shift + local t0_rp12 t0_rp13 t1_rp12 t1_rp13 local packets_rp12 packets_rp13 @@ -275,7 +281,8 @@ multipath6_test() t0_rp13=$(link_stats_tx_packets_get $rp13) $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \ - -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "$ports" + sleep 1 t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) @@ -313,6 +320,23 @@ multipath_test() multipath6_test "Weighted MP 11:45" 11 45 } +multipath16_test() +{ + check_nhgw16 104 || return + + log_info "Running 16-bit IPv4 multipath tests" + multipath4_test "65535:65535" 65535 65535 + multipath4_test "128:512" 128 512 + omit_on_slow \ + multipath4_test "255:65535" 255 65535 sp=1024-1026,dp=0-65535 + + log_info "Running 16-bit IPv6 multipath tests" + multipath6_test "65535:65535" 65535 65535 + multipath6_test "128:512" 128 512 + omit_on_slow \ + multipath6_test "255:65535" 255 65535 sp=1024-1026,dp=0-65535 +} + ping_ipv4_blackhole() { RET=0 diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh index 2903294d8bca..507b2852dabe 100644 --- a/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh +++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh @@ -117,3 +117,16 @@ __nh_stats_test_v6() $MZ -6 $h1 -A 2001:db8:1::2 -B 2001:db8:2::2 sysctl_restore net.ipv6.fib_multipath_hash_policy } + +check_nhgw16() +{ + local nhid=$1; shift + + ip nexthop replace id 9999 group "$nhid,65535" &>/dev/null + if (( $? )); then + log_test_skip "16-bit multipath tests" \ + "iproute2 or the kernel do not support 16-bit next hop weights" + return 1 + fi + ip nexthop del id 9999 ||: +} diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh index cd9e346436fc..88ddae05b39d 100755 --- a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh +++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh @@ -40,6 +40,7 @@ ALL_TESTS=" ping_ipv4 ping_ipv6 multipath_test + multipath16_test nh_stats_test_v4 nh_stats_test_v6 " @@ -228,9 +229,11 @@ routing_nh_obj() multipath4_test() { - local desc="$1" - local weight_rp12=$2 - local weight_rp13=$3 + local desc=$1; shift + local weight_rp12=$1; shift + local weight_rp13=$1; shift + local ports=${1-sp=1024,dp=0-32768}; shift + local t0_rp12 t0_rp13 t1_rp12 t1_rp13 local packets_rp12 packets_rp13 @@ -243,7 +246,8 @@ multipath4_test() t0_rp13=$(link_stats_tx_packets_get $rp13) ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ - -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "$ports" + sleep 1 t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) @@ -258,9 +262,11 @@ multipath4_test() multipath6_l4_test() { - local desc="$1" - local weight_rp12=$2 - local weight_rp13=$3 + local desc=$1; shift + local weight_rp12=$1; shift + local weight_rp13=$1; shift + local ports=${1-sp=1024,dp=0-32768}; shift + local t0_rp12 t0_rp13 t1_rp12 t1_rp13 local packets_rp12 packets_rp13 @@ -273,7 +279,8 @@ multipath6_l4_test() t0_rp13=$(link_stats_tx_packets_get $rp13) $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \ - -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "$ports" + sleep 1 t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) @@ -371,6 +378,41 @@ multipath_test() ip nexthop replace id 106 group 104,1/105,1 type resilient } +multipath16_test() +{ + check_nhgw16 104 || return + + log_info "Running 16-bit IPv4 multipath tests" + ip nexthop replace id 103 group 101/102 type resilient idle_timer 0 + + ip nexthop replace id 103 group 101,65535/102,65535 type resilient + multipath4_test "65535:65535" 65535 65535 + + ip nexthop replace id 103 group 101,128/102,512 type resilient + multipath4_test "128:512" 128 512 + + ip nexthop replace id 103 group 101,255/102,65535 type resilient + omit_on_slow \ + multipath4_test "255:65535" 255 65535 sp=1024-1026,dp=0-65535 + + ip nexthop replace id 103 group 101,1/102,1 type resilient + + log_info "Running 16-bit IPv6 L4 hash multipath tests" + ip nexthop replace id 106 group 104/105 type resilient idle_timer 0 + + ip nexthop replace id 106 group 104,65535/105,65535 type resilient + multipath6_l4_test "65535:65535" 65535 65535 + + ip nexthop replace id 106 group 104,128/105,512 type resilient + multipath6_l4_test "128:512" 128 512 + + ip nexthop replace id 106 group 104,255/105,65535 type resilient + omit_on_slow \ + multipath6_l4_test "255:65535" 255 65535 sp=1024-1026,dp=0-65535 + + ip nexthop replace id 106 group 104,1/105,1 type resilient +} + nh_stats_test_v4() { __nh_stats_test_v4 resilient diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh index e2be354167a1..46f365b557b7 100755 --- a/tools/testing/selftests/net/forwarding/router_multipath.sh +++ b/tools/testing/selftests/net/forwarding/router_multipath.sh @@ -180,6 +180,7 @@ multipath4_test() ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" + sleep 1 t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) @@ -217,6 +218,7 @@ multipath6_test() $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \ -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" + sleep 1 t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh index 589629636502..ea89e558672d 100755 --- a/tools/testing/selftests/net/forwarding/tc_actions.sh +++ b/tools/testing/selftests/net/forwarding/tc_actions.sh @@ -4,7 +4,8 @@ ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \ mirred_egress_mirror_test matchall_mirred_egress_mirror_test \ gact_trap_test mirred_egress_to_ingress_test \ - mirred_egress_to_ingress_tcp_test" + mirred_egress_to_ingress_tcp_test \ + ingress_2nd_vlan_push egress_2nd_vlan_push" NUM_NETIFS=4 source tc_common.sh source lib.sh @@ -244,6 +245,49 @@ mirred_egress_to_ingress_tcp_test() log_test "mirred_egress_to_ingress_tcp ($tcflags)" } +ingress_2nd_vlan_push() +{ + tc filter add dev $swp1 ingress pref 20 chain 0 handle 20 flower \ + $tcflags num_of_vlans 1 \ + action vlan push id 100 protocol 0x8100 action goto chain 5 + tc filter add dev $swp1 ingress pref 30 chain 5 handle 30 flower \ + $tcflags num_of_vlans 2 \ + cvlan_ethtype 0x800 action pass + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -Q 10 -q + + tc_check_packets "dev $swp1 ingress" 30 1 + check_err $? "No double-vlan packets received" + + tc filter del dev $swp1 ingress pref 20 chain 0 handle 20 flower + tc filter del dev $swp1 ingress pref 30 chain 5 handle 30 flower + + log_test "ingress_2nd_vlan_push ($tcflags)" +} + +egress_2nd_vlan_push() +{ + tc filter add dev $h1 egress pref 20 chain 0 handle 20 flower \ + $tcflags num_of_vlans 0 \ + action vlan push id 10 protocol 0x8100 \ + pipe action vlan push id 100 protocol 0x8100 action goto chain 5 + tc filter add dev $h1 egress pref 30 chain 5 handle 30 flower \ + $tcflags num_of_vlans 2 \ + cvlan_ethtype 0x800 action pass + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h1 egress" 30 1 + check_err $? "No double-vlan packets received" + + tc filter del dev $h1 egress pref 20 chain 0 handle 20 flower + tc filter del dev $h1 egress pref 30 chain 5 handle 30 flower + + log_test "egress_2nd_vlan_push ($tcflags)" +} + setup_prepare() { h1=${NETIFS[p1]} diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index 8ee4489238ca..be8707bfb46e 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -125,6 +125,21 @@ slowwait_for_counter() slowwait "$timeout" until_counter_is ">= $((base + delta))" "$@" } +# Check for existence of tools which are built as part of selftests +# but may also already exist in $PATH +check_gen_prog() +{ + local prog_name=$1; shift + + if ! which $prog_name >/dev/null 2>/dev/null; then + PATH=$PWD:$PATH + if ! which $prog_name >/dev/null; then + echo "'$prog_name' command not found; skipping tests" + exit $ksft_skip + fi + fi +} + remove_ns_list() { local item=$1 diff --git a/tools/testing/selftests/net/lib/csum.c b/tools/testing/selftests/net/lib/csum.c index b9f3fc3c3426..e0a34e5e8dd5 100644 --- a/tools/testing/selftests/net/lib/csum.c +++ b/tools/testing/selftests/net/lib/csum.c @@ -654,10 +654,16 @@ static int recv_verify_packet_ipv4(void *nh, int len) { struct iphdr *iph = nh; uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto; + uint16_t ip_len; if (len < sizeof(*iph) || iph->protocol != proto) return -1; + ip_len = ntohs(iph->tot_len); + if (ip_len > len || ip_len < sizeof(*iph)) + return -1; + + len = ip_len; iph_addr_p = &iph->saddr; if (proto == IPPROTO_TCP) return recv_verify_packet_tcp(iph + 1, len - sizeof(*iph)); @@ -669,16 +675,22 @@ static int recv_verify_packet_ipv6(void *nh, int len) { struct ipv6hdr *ip6h = nh; uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto; + uint16_t ip_len; if (len < sizeof(*ip6h) || ip6h->nexthdr != proto) return -1; + ip_len = ntohs(ip6h->payload_len); + if (ip_len > len - sizeof(*ip6h)) + return -1; + + len = ip_len; iph_addr_p = &ip6h->saddr; if (proto == IPPROTO_TCP) - return recv_verify_packet_tcp(ip6h + 1, len - sizeof(*ip6h)); + return recv_verify_packet_tcp(ip6h + 1, len); else - return recv_verify_packet_udp(ip6h + 1, len - sizeof(*ip6h)); + return recv_verify_packet_udp(ip6h + 1, len); } /* return whether auxdata includes TP_STATUS_CSUM_VALID */ diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py index f26c20df9db4..477ae76de93d 100644 --- a/tools/testing/selftests/net/lib/py/ksft.py +++ b/tools/testing/selftests/net/lib/py/ksft.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 import builtins +import functools import inspect import sys import time @@ -10,6 +11,7 @@ from .utils import global_defer_queue KSFT_RESULT = None KSFT_RESULT_ALL = True +KSFT_DISRUPTIVE = True class KsftFailEx(Exception): @@ -32,8 +34,18 @@ def _fail(*args): global KSFT_RESULT KSFT_RESULT = False - frame = inspect.stack()[2] - ksft_pr("At " + frame.filename + " line " + str(frame.lineno) + ":") + stack = inspect.stack() + started = False + for frame in reversed(stack[2:]): + # Start printing from the test case function + if not started: + if frame.function == 'ksft_run': + started = True + continue + + ksft_pr("Check| At " + frame.filename + ", line " + str(frame.lineno) + + ", in " + frame.function + ":") + ksft_pr("Check| " + frame.code_context[0].strip()) ksft_pr(*args) @@ -43,6 +55,12 @@ def ksft_eq(a, b, comment=""): _fail("Check failed", a, "!=", b, comment) +def ksft_ne(a, b, comment=""): + global KSFT_RESULT + if a == b: + _fail("Check failed", a, "==", b, comment) + + def ksft_true(a, comment=""): if not a: _fail("Check failed", a, "does not eval to True", comment) @@ -127,6 +145,44 @@ def ksft_flush_defer(): KSFT_RESULT = False +def ksft_disruptive(func): + """ + Decorator that marks the test as disruptive (e.g. the test + that can down the interface). Disruptive tests can be skipped + by passing DISRUPTIVE=False environment variable. + """ + + @functools.wraps(func) + def wrapper(*args, **kwargs): + if not KSFT_DISRUPTIVE: + raise KsftSkipEx(f"marked as disruptive") + return func(*args, **kwargs) + return wrapper + + +def ksft_setup(env): + """ + Setup test framework global state from the environment. + """ + + def get_bool(env, name): + value = env.get(name, "").lower() + if value in ["yes", "true"]: + return True + if value in ["no", "false"]: + return False + try: + return bool(int(value)) + except: + raise Exception(f"failed to parse {name}") + + if "DISRUPTIVE" in env: + global KSFT_DISRUPTIVE + KSFT_DISRUPTIVE = get_bool(env, "DISRUPTIVE") + + return env + + def ksft_run(cases=None, globs=None, case_pfx=None, args=()): cases = cases or [] diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index 7b936a926859..5d796622e730 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -11,6 +11,8 @@ TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq TEST_FILES := mptcp_lib.sh settings +TEST_INCLUDES := ../lib.sh ../net_helper.sh + EXTRA_CLEAN := *.pcap include ../../lib.mk diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 776d43a6922d..2bd0c1eb70c5 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -284,7 +284,7 @@ echo "b" | \ ./mptcp_connect -p 10000 -r 0 -t ${timeout_poll} -w 20 \ 127.0.0.1 >/dev/null & wait_connected $ns 10000 -chk_msk_nr 2 "after MPC handshake " +chk_msk_nr 2 "after MPC handshake" chk_last_time_info 10000 chk_msk_remote_key_nr 2 "....chk remote_key" chk_msk_fallback_nr 0 "....chk no fallback" diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index b77fb7065bfb..57325d57e4c6 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -345,9 +345,11 @@ do_transfer() local addr_port addr_port=$(printf "%s:%d" ${connect_addr} ${port}) - local result_msg - result_msg="$(printf "%.3s %-5s -> %.3s (%-20s) %-5s" ${connector_ns} ${cl_proto} ${listener_ns} ${addr_port} ${srv_proto})" - mptcp_lib_print_title "${result_msg}" + local pretty_title + pretty_title="$(printf "%.3s %-5s -> %.3s (%-20s) %-5s" ${connector_ns} ${cl_proto} ${listener_ns} ${addr_port} ${srv_proto})" + mptcp_lib_print_title "${pretty_title}" + + local tap_title="${connector_ns:0:3} ${cl_proto} -> ${listener_ns:0:3} (${addr_port}) ${srv_proto}" if $capture; then local capuser @@ -431,7 +433,6 @@ do_transfer() local duration duration=$((stop-start)) - result_msg+=" # time=${duration}ms" printf "(duration %05sms) " "${duration}" if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then mptcp_lib_pr_fail "client exit code $retc, server $rets" @@ -444,7 +445,7 @@ do_transfer() echo cat "$capout" - mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}" + mptcp_lib_result_fail "${TEST_GROUP}: ${tap_title}" return 1 fi @@ -544,12 +545,12 @@ do_transfer() if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then mptcp_lib_pr_ok "${extra:1}" - mptcp_lib_result_pass "${TEST_GROUP}: ${result_msg}" + mptcp_lib_result_pass "${TEST_GROUP}: ${tap_title}" else if [ -n "${extra}" ]; then mptcp_lib_print_warn "${extra:1}" fi - mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}" + mptcp_lib_result_fail "${TEST_GROUP}: ${tap_title}" fi cat "$capout" @@ -848,6 +849,8 @@ stop_if_error() make_file "$cin" "client" make_file "$sin" "server" +mptcp_lib_subtests_last_ts_reset + check_mptcp_disabled stop_if_error "The kernel configuration is not valid for MPTCP" diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 9ea6d698e9d3..e8d0a01b4144 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -61,6 +61,16 @@ unset sflags unset fastclose unset fullmesh unset speed +unset join_csum_ns1 +unset join_csum_ns2 +unset join_fail_nr +unset join_rst_nr +unset join_infi_nr +unset join_corrupted_pkts +unset join_syn_tx +unset join_create_err +unset join_bind_err +unset join_connect_err # generated using "nfbpf_compile '(ip && (ip[54] & 0xf0) == 0x30) || # (ip6 && (ip6[74] & 0xf0) == 0x30)'" @@ -196,6 +206,22 @@ print_skip() mptcp_lib_pr_skip "${@}" } +# $1: check name; $2: rc +print_results() +{ + local check="${1}" + local rc=${2} + + print_check "${check}" + if [ ${rc} = ${KSFT_PASS} ]; then + print_ok + elif [ ${rc} = ${KSFT_SKIP} ]; then + print_skip + else + fail_test "see above" + fi +} + # [ $1: fail msg ] mark_as_skipped() { @@ -337,7 +363,7 @@ reset_with_checksum() local ns1_enable=$1 local ns2_enable=$2 - reset "checksum test ${1} ${2}" || return 1 + reset "checksum test ${ns1_enable} ${ns2_enable}" || return 1 ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=$ns1_enable ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=$ns2_enable @@ -420,12 +446,17 @@ reset_with_fail() fi } +start_events() +{ + mptcp_lib_events "${ns1}" "${evts_ns1}" evts_ns1_pid + mptcp_lib_events "${ns2}" "${evts_ns2}" evts_ns2_pid +} + reset_with_events() { reset "${1}" || return 1 - mptcp_lib_events "${ns1}" "${evts_ns1}" evts_ns1_pid - mptcp_lib_events "${ns2}" "${evts_ns2}" evts_ns2_pid + start_events } reset_with_tcp_filter() @@ -436,9 +467,10 @@ reset_with_tcp_filter() local ns="${!1}" local src="${2}" local target="${3}" + local chain="${4:-INPUT}" if ! ip netns exec "${ns}" ${iptables} \ - -A INPUT \ + -A "${chain}" \ -s "${src}" \ -p tcp \ -j "${target}"; then @@ -833,7 +865,7 @@ chk_cestab_nr() local cestab=$2 local count - print_check "cestab $cestab" + print_check "currently established: $cestab" count=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPCurrEstab") if [ -z "$count" ]; then print_skip @@ -1109,28 +1141,29 @@ chk_csum_nr() csum_ns2=${csum_ns2:1} fi - print_check "sum" + print_check "checksum server" count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtDataCsumErr") - if [ "$count" != "$csum_ns1" ]; then + if [ -n "$count" ] && [ "$count" != "$csum_ns1" ]; then extra_msg+=" ns1=$count" fi if [ -z "$count" ]; then print_skip elif { [ "$count" != $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 0 ]; } || - { [ "$count" -lt $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 1 ]; }; then + { [ "$count" -lt $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 1 ]; }; then fail_test "got $count data checksum error[s] expected $csum_ns1" else print_ok fi - print_check "csum" + + print_check "checksum client" count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtDataCsumErr") - if [ "$count" != "$csum_ns2" ]; then + if [ -n "$count" ] && [ "$count" != "$csum_ns2" ]; then extra_msg+=" ns2=$count" fi if [ -z "$count" ]; then print_skip elif { [ "$count" != $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 0 ]; } || - { [ "$count" -lt $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 1 ]; }; then + { [ "$count" -lt $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 1 ]; }; then fail_test "got $count data checksum error[s] expected $csum_ns2" else print_ok @@ -1147,6 +1180,8 @@ chk_fail_nr() local count local ns_tx=$ns1 local ns_rx=$ns2 + local tx="server" + local rx="client" local extra_msg="" local allow_tx_lost=0 local allow_rx_lost=0 @@ -1154,7 +1189,8 @@ chk_fail_nr() if [[ $ns_invert = "invert" ]]; then ns_tx=$ns2 ns_rx=$ns1 - extra_msg="invert" + tx="client" + rx="server" fi if [[ "${fail_tx}" = "-"* ]]; then @@ -1166,29 +1202,29 @@ chk_fail_nr() fail_rx=${fail_rx:1} fi - print_check "ftx" + print_check "fail tx ${tx}" count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFailTx") - if [ "$count" != "$fail_tx" ]; then - extra_msg+=",tx=$count" + if [ -n "$count" ] && [ "$count" != "$fail_tx" ]; then + extra_msg+=" tx=$count" fi if [ -z "$count" ]; then print_skip elif { [ "$count" != "$fail_tx" ] && [ $allow_tx_lost -eq 0 ]; } || - { [ "$count" -gt "$fail_tx" ] && [ $allow_tx_lost -eq 1 ]; }; then + { [ "$count" -gt "$fail_tx" ] && [ $allow_tx_lost -eq 1 ]; }; then fail_test "got $count MP_FAIL[s] TX expected $fail_tx" else print_ok fi - print_check "failrx" + print_check "fail rx ${rx}" count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFailRx") - if [ "$count" != "$fail_rx" ]; then - extra_msg+=",rx=$count" + if [ -n "$count" ] && [ "$count" != "$fail_rx" ]; then + extra_msg+=" rx=$count" fi if [ -z "$count" ]; then print_skip elif { [ "$count" != "$fail_rx" ] && [ $allow_rx_lost -eq 0 ]; } || - { [ "$count" -gt "$fail_rx" ] && [ $allow_rx_lost -eq 1 ]; }; then + { [ "$count" -gt "$fail_rx" ] && [ $allow_rx_lost -eq 1 ]; }; then fail_test "got $count MP_FAIL[s] RX expected $fail_rx" else print_ok @@ -1205,37 +1241,35 @@ chk_fclose_nr() local count local ns_tx=$ns2 local ns_rx=$ns1 - local extra_msg="" + local tx="client" + local rx="server" if [[ $ns_invert = "invert" ]]; then ns_tx=$ns1 ns_rx=$ns2 - extra_msg="invert" + tx="server" + rx="client" fi - print_check "ctx" + print_check "fast close tx ${tx}" count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFastcloseTx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$fclose_tx" ]; then - extra_msg+=",tx=$count" fail_test "got $count MP_FASTCLOSE[s] TX expected $fclose_tx" else print_ok fi - print_check "fclzrx" + print_check "fast close rx ${rx}" count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFastcloseRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$fclose_rx" ]; then - extra_msg+=",rx=$count" fail_test "got $count MP_FASTCLOSE[s] RX expected $fclose_rx" else print_ok fi - - print_info "$extra_msg" } chk_rst_nr() @@ -1246,15 +1280,17 @@ chk_rst_nr() local count local ns_tx=$ns1 local ns_rx=$ns2 - local extra_msg="" + local tx="server" + local rx="client" if [[ $ns_invert = "invert" ]]; then ns_tx=$ns2 ns_rx=$ns1 - extra_msg="invert" + tx="client" + rx="server" fi - print_check "rtx" + print_check "reset tx ${tx}" count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPRstTx") if [ -z "$count" ]; then print_skip @@ -1266,7 +1302,7 @@ chk_rst_nr() print_ok fi - print_check "rstrx" + print_check "reset rx ${rx}" count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPRstRx") if [ -z "$count" ]; then print_skip @@ -1277,8 +1313,6 @@ chk_rst_nr() else print_ok fi - - print_info "$extra_msg" } chk_infi_nr() @@ -1287,7 +1321,7 @@ chk_infi_nr() local infi_rx=$2 local count - print_check "itx" + print_check "infi tx client" count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtInfiniteMapTx") if [ -z "$count" ]; then print_skip @@ -1297,7 +1331,7 @@ chk_infi_nr() print_ok fi - print_check "infirx" + print_check "infi rx server" count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtInfiniteMapRx") if [ -z "$count" ]; then print_skip @@ -1308,17 +1342,66 @@ chk_infi_nr() fi } +chk_join_tx_nr() +{ + local syn_tx=${join_syn_tx:-0} + local create=${join_create_err:-0} + local bind=${join_bind_err:-0} + local connect=${join_connect_err:-0} + local rc=${KSFT_PASS} + local count + + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTx") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$syn_tx" ]; then + rc=${KSFT_FAIL} + print_check "syn tx" + fail_test "got $count JOIN[s] syn tx expected $syn_tx" + fi + + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxCreatSkErr") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$create" ]; then + rc=${KSFT_FAIL} + print_check "syn tx create socket error" + fail_test "got $count JOIN[s] syn tx create socket error expected $create" + fi + + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxBindErr") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$bind" ]; then + rc=${KSFT_FAIL} + print_check "syn tx bind error" + fail_test "got $count JOIN[s] syn tx bind error expected $bind" + fi + + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxConnectErr") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$connect" ]; then + rc=${KSFT_FAIL} + print_check "syn tx connect error" + fail_test "got $count JOIN[s] syn tx connect error expected $connect" + fi + + print_results "join Tx" ${rc} +} + chk_join_nr() { local syn_nr=$1 local syn_ack_nr=$2 local ack_nr=$3 - local csum_ns1=${4:-0} - local csum_ns2=${5:-0} - local fail_nr=${6:-0} - local rst_nr=${7:-0} - local infi_nr=${8:-0} - local corrupted_pkts=${9:-0} + local csum_ns1=${join_csum_ns1:-0} + local csum_ns2=${join_csum_ns2:-0} + local fail_nr=${join_fail_nr:-0} + local rst_nr=${join_rst_nr:-0} + local infi_nr=${join_infi_nr:-0} + local corrupted_pkts=${join_corrupted_pkts:-0} + local rc=${KSFT_PASS} local count local with_cookie @@ -1326,43 +1409,44 @@ chk_join_nr() print_info "${corrupted_pkts} corrupted pkts" fi - print_check "syn" count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinSynRx") if [ -z "$count" ]; then - print_skip + rc=${KSFT_SKIP} elif [ "$count" != "$syn_nr" ]; then - fail_test "got $count JOIN[s] syn expected $syn_nr" - else - print_ok + rc=${KSFT_FAIL} + print_check "syn rx" + fail_test "got $count JOIN[s] syn rx expected $syn_nr" fi - print_check "synack" with_cookie=$(ip netns exec $ns2 sysctl -n net.ipv4.tcp_syncookies) count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckRx") if [ -z "$count" ]; then - print_skip + rc=${KSFT_SKIP} elif [ "$count" != "$syn_ack_nr" ]; then # simult connections exceeding the limit with cookie enabled could go up to # synack validation as the conn limit can be enforced reliably only after # the subflow creation - if [ "$with_cookie" = 2 ] && [ "$count" -gt "$syn_ack_nr" ] && [ "$count" -le "$syn_nr" ]; then - print_ok - else - fail_test "got $count JOIN[s] synack expected $syn_ack_nr" + if [ "$with_cookie" != 2 ] || [ "$count" -le "$syn_ack_nr" ] || [ "$count" -gt "$syn_nr" ]; then + rc=${KSFT_FAIL} + print_check "synack rx" + fail_test "got $count JOIN[s] synack rx expected $syn_ack_nr" fi - else - print_ok fi - print_check "ack" count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckRx") if [ -z "$count" ]; then - print_skip + rc=${KSFT_SKIP} elif [ "$count" != "$ack_nr" ]; then - fail_test "got $count JOIN[s] ack expected $ack_nr" - else - print_ok + rc=${KSFT_FAIL} + print_check "ack rx" + fail_test "got $count JOIN[s] ack rx expected $ack_nr" fi + + print_results "join Rx" ${rc} + + join_syn_tx="${join_syn_tx:-${syn_nr}}" \ + chk_join_tx_nr + if $validate_checksum; then chk_csum_nr $csum_ns1 $csum_ns2 chk_fail_nr $fail_nr $fail_nr @@ -1423,19 +1507,21 @@ chk_add_nr() local mis_ack_nr=0 local ns_tx=$ns1 local ns_rx=$ns2 - local extra_msg="" + local tx="" + local rx="" local count local timeout if [[ $ns_invert = "invert" ]]; then ns_tx=$ns2 ns_rx=$ns1 - extra_msg="invert" + tx=" client" + rx=" server" fi timeout=$(ip netns exec ${ns_tx} sysctl -n net.mptcp.add_addr_timeout) - print_check "add" + print_check "add addr rx${rx}" count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtAddAddr") if [ -z "$count" ]; then print_skip @@ -1447,7 +1533,7 @@ chk_add_nr() print_ok fi - print_check "echo" + print_check "add addr echo rx${tx}" count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtEchoAdd") if [ -z "$count" ]; then print_skip @@ -1458,7 +1544,7 @@ chk_add_nr() fi if [ $port_nr -gt 0 ]; then - print_check "pt" + print_check "add addr rx with port${rx}" count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtPortAdd") if [ -z "$count" ]; then print_skip @@ -1468,7 +1554,7 @@ chk_add_nr() print_ok fi - print_check "syn" + print_check "syn rx port${tx}" count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPJoinPortSynRx") if [ -z "$count" ]; then print_skip @@ -1479,7 +1565,7 @@ chk_add_nr() print_ok fi - print_check "synack" + print_check "synack rx port${rx}" count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPJoinPortSynAckRx") if [ -z "$count" ]; then print_skip @@ -1490,7 +1576,7 @@ chk_add_nr() print_ok fi - print_check "ack" + print_check "ack rx port${tx}" count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPJoinPortAckRx") if [ -z "$count" ]; then print_skip @@ -1501,7 +1587,7 @@ chk_add_nr() print_ok fi - print_check "syn" + print_check "syn rx port mismatch${tx}" count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMismatchPortSynRx") if [ -z "$count" ]; then print_skip @@ -1512,7 +1598,7 @@ chk_add_nr() print_ok fi - print_check "ack" + print_check "ack rx port mismatch${tx}" count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMismatchPortAckRx") if [ -z "$count" ]; then print_skip @@ -1523,8 +1609,6 @@ chk_add_nr() print_ok fi fi - - print_info "$extra_msg" } chk_add_tx_nr() @@ -1536,7 +1620,7 @@ chk_add_tx_nr() timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout) - print_check "add TX" + print_check "add addr tx" count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtAddAddrTx") if [ -z "$count" ]; then print_skip @@ -1548,7 +1632,7 @@ chk_add_tx_nr() print_ok fi - print_check "echo TX" + print_check "add addr echo tx" count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtEchoAddTx") if [ -z "$count" ]; then print_skip @@ -1568,6 +1652,8 @@ chk_rm_nr() local count local addr_ns=$ns1 local subflow_ns=$ns2 + local addr="server" + local subflow="client" local extra_msg="" shift 2 @@ -1577,16 +1663,14 @@ chk_rm_nr() shift done - if [ -z $invert ]; then - addr_ns=$ns1 - subflow_ns=$ns2 - elif [ $invert = "true" ]; then + if [ "$invert" = "true" ]; then addr_ns=$ns2 subflow_ns=$ns1 - extra_msg="invert" + addr="client" + subflow="server" fi - print_check "rm" + print_check "rm addr rx ${addr}" count=$(mptcp_lib_get_counter ${addr_ns} "MPTcpExtRmAddr") if [ -z "$count" ]; then print_skip @@ -1596,7 +1680,7 @@ chk_rm_nr() print_ok fi - print_check "rmsf" + print_check "rm subflow ${subflow}" count=$(mptcp_lib_get_counter ${subflow_ns} "MPTcpExtRmSubflow") if [ -z "$count" ]; then print_skip @@ -1610,7 +1694,7 @@ chk_rm_nr() count=$((count + cnt)) if [ "$count" != "$rm_subflow_nr" ]; then suffix="$count in [$rm_subflow_nr:$((rm_subflow_nr*2))]" - extra_msg+=" simult" + extra_msg="simult" fi if [ $count -ge "$rm_subflow_nr" ] && \ [ "$count" -le "$((rm_subflow_nr *2 ))" ]; then @@ -1631,7 +1715,7 @@ chk_rm_tx_nr() { local rm_addr_tx_nr=$1 - print_check "rm TX" + print_check "rm addr tx client" count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtRmAddrTx") if [ -z "$count" ]; then print_skip @@ -1650,7 +1734,7 @@ chk_prio_nr() local mpj_syn_ack=$4 local count - print_check "ptx" + print_check "mp_prio tx server" count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPPrioTx") if [ -z "$count" ]; then print_skip @@ -1660,7 +1744,7 @@ chk_prio_nr() print_ok fi - print_check "prx" + print_check "mp_prio rx client" count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPPrioRx") if [ -z "$count" ]; then print_skip @@ -1903,9 +1987,11 @@ subflows_error_tests() pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 0 1 pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.12.2 flags subflow speed=slow \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 0 0 0 + join_bind_err=1 \ + chk_join_nr 0 0 0 fi # multiple subflows, with subflow creation error @@ -1917,7 +2003,8 @@ subflows_error_tests() pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow speed=slow \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 1 + join_syn_tx=2 \ + chk_join_nr 1 1 1 fi # multiple subflows, with subflow timeout on MPJ @@ -1929,7 +2016,8 @@ subflows_error_tests() pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow speed=slow \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 1 + join_syn_tx=2 \ + chk_join_nr 1 1 1 fi # multiple subflows, check that the endpoint corresponding to @@ -1950,7 +2038,8 @@ subflows_error_tests() # additional subflow could be created only if the PM select # the later endpoint, skipping the already used one - chk_join_nr 1 1 1 + join_syn_tx=2 \ + chk_join_nr 1 1 1 fi } @@ -2036,7 +2125,8 @@ signal_address_tests() pm_nl_add_endpoint $ns1 10.0.14.1 flags signal pm_nl_set_limits $ns2 3 3 run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 1 + join_syn_tx=3 \ + chk_join_nr 1 1 1 chk_add_nr 3 3 fi @@ -2204,7 +2294,8 @@ add_addr_timeout_tests() pm_nl_set_limits $ns2 2 2 speed=10 \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 1 + join_syn_tx=2 \ + chk_join_nr 1 1 1 chk_add_nr 8 0 fi } @@ -2304,7 +2395,8 @@ remove_tests() pm_nl_set_limits $ns2 2 2 addr_nr_ns1=-3 speed=10 \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 1 + join_syn_tx=2 join_connect_err=1 \ + chk_join_nr 1 1 1 chk_add_nr 3 3 chk_rm_nr 3 1 invert chk_rst_nr 0 0 @@ -2369,7 +2461,8 @@ remove_tests() pm_nl_set_limits $ns2 3 3 addr_nr_ns1=-8 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 1 + join_syn_tx=3 \ + chk_join_nr 1 1 1 chk_add_nr 3 3 chk_rm_nr 3 1 invert chk_rst_nr 0 0 @@ -2939,37 +3032,16 @@ syncookies_tests() checksum_tests() { - # checksum test 0 0 - if reset_with_checksum 0 0; then - pm_nl_set_limits $ns1 0 1 - pm_nl_set_limits $ns2 0 1 - run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 0 0 0 - fi - - # checksum test 1 1 - if reset_with_checksum 1 1; then - pm_nl_set_limits $ns1 0 1 - pm_nl_set_limits $ns2 0 1 - run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 0 0 0 - fi - - # checksum test 0 1 - if reset_with_checksum 0 1; then - pm_nl_set_limits $ns1 0 1 - pm_nl_set_limits $ns2 0 1 - run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 0 0 0 - fi - - # checksum test 1 0 - if reset_with_checksum 1 0; then - pm_nl_set_limits $ns1 0 1 - pm_nl_set_limits $ns2 0 1 - run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 0 0 0 - fi + local checksum_enable + for checksum_enable in "0 0" "1 1" "0 1" "1 0"; do + # checksum test 0 0, 1 1, 0 1, 1 0 + if reset_with_checksum ${checksum_enable}; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + fi + done } deny_join_id0_tests() @@ -3058,6 +3130,9 @@ fullmesh_tests() pm_nl_set_limits $ns1 1 3 pm_nl_set_limits $ns2 1 3 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + if mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,fullmesh + fi fullmesh=1 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 @@ -3157,7 +3232,8 @@ fastclose_tests() MPTCP_LIB_SUBTEST_FLAKY=1 test_linkfail=1024 fastclose=server \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 0 0 0 0 0 0 1 + join_rst_nr=1 \ + chk_join_nr 0 0 0 chk_fclose_nr 1 1 invert chk_rst_nr 1 1 fi @@ -3176,7 +3252,10 @@ fail_tests() MPTCP_LIB_SUBTEST_FLAKY=1 test_linkfail=128 \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 0 0 0 +1 +0 1 0 1 "$(pedit_action_pkts)" + join_csum_ns1=+1 join_csum_ns2=+0 \ + join_fail_nr=1 join_rst_nr=0 join_infi_nr=1 \ + join_corrupted_pkts="$(pedit_action_pkts)" \ + chk_join_nr 0 0 0 chk_fail_nr 1 -1 invert fi @@ -3189,7 +3268,10 @@ fail_tests() pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow test_linkfail=1024 \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 1 1 0 1 1 0 "$(pedit_action_pkts)" + join_csum_ns1=1 join_csum_ns2=0 \ + join_fail_nr=1 join_rst_nr=1 join_infi_nr=0 \ + join_corrupted_pkts="$(pedit_action_pkts)" \ + chk_join_nr 1 1 1 fi } @@ -3331,6 +3413,36 @@ userspace_pm_chk_get_addr() fi } +# $1: ns ; $2: event type ; $3: count +chk_evt_nr() +{ + local ns=${1} + local evt_name="${2}" + local exp="${3}" + + local evts="${evts_ns1}" + local evt="${!evt_name}" + local count + + evt_name="${evt_name:16}" # without MPTCP_LIB_EVENT_ + [ "${ns}" == "ns2" ] && evts="${evts_ns2}" + + print_check "event ${ns} ${evt_name} (${exp})" + + if [[ "${evt_name}" = "LISTENER_"* ]] && + ! mptcp_lib_kallsyms_has "mptcp_event_pm_listener$"; then + print_skip "event not supported" + return + fi + + count=$(grep -cw "type:${evt}" "${evts}") + if [ "${count}" != "${exp}" ]; then + fail_test "got ${count} events, expected ${exp}" + else + print_ok + fi +} + userspace_tests() { # userspace pm type prevents add_addr @@ -3411,8 +3523,8 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns1 pm_nl_set_limits $ns2 2 2 - speed=5 \ - run_tests $ns1 $ns2 10.0.1.1 & + { speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns1 userspace_pm_add_addr $ns1 10.0.2.1 10 @@ -3427,14 +3539,12 @@ userspace_tests() "signal" userspace_pm_chk_get_addr "${ns1}" "10" "id 10 flags signal 10.0.2.1" userspace_pm_chk_get_addr "${ns1}" "20" "id 20 flags signal 10.0.3.1" - userspace_pm_rm_addr $ns1 10 userspace_pm_rm_sf $ns1 "::ffff:10.0.2.1" $MPTCP_LIB_EVENT_SUB_ESTABLISHED userspace_pm_chk_dump_addr "${ns1}" \ - "id 20 flags signal 10.0.3.1" "after rm_addr 10" + "id 20 flags signal 10.0.3.1" "after rm_sf 10" userspace_pm_rm_addr $ns1 20 - userspace_pm_rm_sf $ns1 10.0.3.1 $MPTCP_LIB_EVENT_SUB_ESTABLISHED userspace_pm_chk_dump_addr "${ns1}" "" "after rm_addr 20" - chk_rm_nr 2 2 invert + chk_rm_nr 1 1 invert chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 kill_events_pids @@ -3446,8 +3556,8 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 1 - speed=5 \ - run_tests $ns1 $ns2 10.0.1.1 & + { speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns2 userspace_pm_add_sf $ns2 10.0.3.2 20 @@ -3458,12 +3568,11 @@ userspace_tests() "id 20 flags subflow 10.0.3.2" \ "subflow" userspace_pm_chk_get_addr "${ns2}" "20" "id 20 flags subflow 10.0.3.2" - userspace_pm_rm_addr $ns2 20 userspace_pm_rm_sf $ns2 10.0.3.2 $MPTCP_LIB_EVENT_SUB_ESTABLISHED userspace_pm_chk_dump_addr "${ns2}" \ "" \ - "after rm_addr 20" - chk_rm_nr 1 1 + "after rm_sf 20" + chk_rm_nr 0 1 chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 kill_events_pids @@ -3475,8 +3584,8 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 1 - speed=5 \ - run_tests $ns1 $ns2 10.0.1.1 & + { speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns2 chk_mptcp_info subflows 0 subflows 0 @@ -3496,8 +3605,8 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 1 - speed=5 \ - run_tests $ns1 $ns2 10.0.1.1 & + { speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns2 userspace_pm_add_sf $ns2 10.0.3.2 20 @@ -3520,8 +3629,8 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns1 pm_nl_set_limits $ns2 1 1 - speed=5 \ - run_tests $ns1 $ns2 10.0.1.1 & + { speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns1 userspace_pm_add_addr $ns1 10.0.2.1 10 @@ -3551,8 +3660,8 @@ endpoint_tests() pm_nl_set_limits $ns1 2 2 pm_nl_set_limits $ns2 2 2 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal - speed=slow \ - run_tests $ns1 $ns2 10.0.1.1 & + { speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns1 @@ -3571,41 +3680,92 @@ endpoint_tests() mptcp_lib_kill_wait $tests_pid fi - if reset "delete and re-add" && + if reset_with_tcp_filter "delete and re-add" ns2 10.0.3.2 REJECT OUTPUT && mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then - pm_nl_set_limits $ns1 1 1 - pm_nl_set_limits $ns2 1 1 + start_events + pm_nl_set_limits $ns1 0 3 + pm_nl_set_limits $ns2 0 3 + pm_nl_add_endpoint $ns2 10.0.1.2 id 1 dev ns2eth1 flags subflow pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow - test_linkfail=4 speed=20 \ - run_tests $ns1 $ns2 10.0.1.1 & + { test_linkfail=4 speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns2 pm_nl_check_endpoint "creation" \ $ns2 10.0.2.2 id 2 flags subflow dev ns2eth2 - chk_subflow_nr "before delete" 2 + chk_subflow_nr "before delete id 2" 2 chk_mptcp_info subflows 1 subflows 1 pm_nl_del_endpoint $ns2 2 10.0.2.2 sleep 0.5 - chk_subflow_nr "after delete" 1 + chk_subflow_nr "after delete id 2" 1 chk_mptcp_info subflows 0 subflows 0 - pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow + pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow wait_mpj $ns2 - chk_subflow_nr "after re-add" 2 + chk_subflow_nr "after re-add id 2" 2 chk_mptcp_info subflows 1 subflows 1 + + pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow + wait_attempt_fail $ns2 + chk_subflow_nr "after new reject" 2 + chk_mptcp_info subflows 1 subflows 1 + + ip netns exec "${ns2}" ${iptables} -D OUTPUT -s "10.0.3.2" -p tcp -j REJECT + pm_nl_del_endpoint $ns2 3 10.0.3.2 + pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow + wait_mpj $ns2 + chk_subflow_nr "after no reject" 3 + chk_mptcp_info subflows 2 subflows 2 + + local i + for i in $(seq 3); do + pm_nl_del_endpoint $ns2 1 10.0.1.2 + sleep 0.5 + chk_subflow_nr "after delete id 0 ($i)" 2 + chk_mptcp_info subflows 2 subflows 2 # only decr for additional sf + + pm_nl_add_endpoint $ns2 10.0.1.2 id 1 dev ns2eth1 flags subflow + wait_mpj $ns2 + chk_subflow_nr "after re-add id 0 ($i)" 3 + chk_mptcp_info subflows 3 subflows 3 + done + mptcp_lib_kill_wait $tests_pid + + kill_events_pids + chk_evt_nr ns1 MPTCP_LIB_EVENT_LISTENER_CREATED 1 + chk_evt_nr ns1 MPTCP_LIB_EVENT_CREATED 1 + chk_evt_nr ns1 MPTCP_LIB_EVENT_ESTABLISHED 1 + chk_evt_nr ns1 MPTCP_LIB_EVENT_ANNOUNCED 0 + chk_evt_nr ns1 MPTCP_LIB_EVENT_REMOVED 4 + chk_evt_nr ns1 MPTCP_LIB_EVENT_SUB_ESTABLISHED 6 + chk_evt_nr ns1 MPTCP_LIB_EVENT_SUB_CLOSED 4 + + chk_evt_nr ns2 MPTCP_LIB_EVENT_CREATED 1 + chk_evt_nr ns2 MPTCP_LIB_EVENT_ESTABLISHED 1 + chk_evt_nr ns2 MPTCP_LIB_EVENT_ANNOUNCED 0 + chk_evt_nr ns2 MPTCP_LIB_EVENT_REMOVED 0 + chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 6 + chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_CLOSED 5 # one has been closed before estab + + join_syn_tx=7 \ + chk_join_nr 6 6 6 + chk_rm_nr 4 4 fi # remove and re-add - if reset "delete re-add signal" && + if reset_with_events "delete re-add signal" && mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then - pm_nl_set_limits $ns1 1 1 - pm_nl_set_limits $ns2 1 1 + pm_nl_set_limits $ns1 0 3 + pm_nl_set_limits $ns2 3 3 pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal - test_linkfail=4 speed=20 \ - run_tests $ns1 $ns2 10.0.1.1 & + # broadcast IP: no packet for this address will be received on ns1 + pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal + pm_nl_add_endpoint $ns1 10.0.1.1 id 42 flags signal + { test_linkfail=4 speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns2 @@ -3615,17 +3775,91 @@ endpoint_tests() chk_mptcp_info subflows 1 subflows 1 pm_nl_del_endpoint $ns1 1 10.0.2.1 + pm_nl_del_endpoint $ns1 2 224.0.0.1 sleep 0.5 chk_subflow_nr "after delete" 1 chk_mptcp_info subflows 0 subflows 0 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal + pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal + wait_mpj $ns2 + chk_subflow_nr "after re-add" 3 + chk_mptcp_info subflows 2 subflows 2 + + pm_nl_del_endpoint $ns1 42 10.0.1.1 + sleep 0.5 + chk_subflow_nr "after delete ID 0" 2 + chk_mptcp_info subflows 2 subflows 2 + + pm_nl_add_endpoint $ns1 10.0.1.1 id 99 flags signal + wait_mpj $ns2 + chk_subflow_nr "after re-add ID 0" 3 + chk_mptcp_info subflows 3 subflows 3 + + pm_nl_del_endpoint $ns1 99 10.0.1.1 + sleep 0.5 + chk_subflow_nr "after re-delete ID 0" 2 + chk_mptcp_info subflows 2 subflows 2 + + pm_nl_add_endpoint $ns1 10.0.1.1 id 88 flags signal + wait_mpj $ns2 + chk_subflow_nr "after re-re-add ID 0" 3 + chk_mptcp_info subflows 3 subflows 3 + mptcp_lib_kill_wait $tests_pid + + kill_events_pids + chk_evt_nr ns1 MPTCP_LIB_EVENT_LISTENER_CREATED 1 + chk_evt_nr ns1 MPTCP_LIB_EVENT_CREATED 1 + chk_evt_nr ns1 MPTCP_LIB_EVENT_ESTABLISHED 1 + chk_evt_nr ns1 MPTCP_LIB_EVENT_ANNOUNCED 0 + chk_evt_nr ns1 MPTCP_LIB_EVENT_REMOVED 0 + chk_evt_nr ns1 MPTCP_LIB_EVENT_SUB_ESTABLISHED 5 + chk_evt_nr ns1 MPTCP_LIB_EVENT_SUB_CLOSED 3 + + chk_evt_nr ns2 MPTCP_LIB_EVENT_CREATED 1 + chk_evt_nr ns2 MPTCP_LIB_EVENT_ESTABLISHED 1 + chk_evt_nr ns2 MPTCP_LIB_EVENT_ANNOUNCED 6 + chk_evt_nr ns2 MPTCP_LIB_EVENT_REMOVED 4 + chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 5 + chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_CLOSED 3 + + join_connect_err=1 \ + chk_join_nr 5 5 5 + chk_add_nr 6 6 + chk_rm_nr 4 3 invert + fi + + # flush and re-add + if reset_with_tcp_filter "flush re-add" ns2 10.0.3.2 REJECT OUTPUT && + mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 1 2 + # broadcast IP: no packet for this address will be received on ns1 + pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal + pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow + { test_linkfail=4 speed=20 \ + run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null + local tests_pid=$! + + wait_attempt_fail $ns2 + chk_subflow_nr "before flush" 1 + chk_mptcp_info subflows 0 subflows 0 + + pm_nl_flush_endpoint $ns2 + pm_nl_flush_endpoint $ns1 + wait_rm_addr $ns2 0 + ip netns exec "${ns2}" ${iptables} -D OUTPUT -s "10.0.3.2" -p tcp -j REJECT + pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow + wait_mpj $ns2 + pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal wait_mpj $ns2 - chk_subflow_nr "after re-add" 2 - chk_mptcp_info subflows 1 subflows 1 mptcp_lib_kill_wait $tests_pid - fi + join_syn_tx=3 join_connect_err=1 \ + chk_join_nr 2 2 2 + chk_add_nr 2 2 + chk_rm_nr 1 0 invert + fi } # [$1: error message] @@ -3727,9 +3961,11 @@ if [ ${#tests[@]} -eq 0 ]; then tests=("${all_tests_names[@]}") fi +mptcp_lib_subtests_last_ts_reset for subtests in "${tests[@]}"; do "${subtests}" done +append_prev_results if [ ${ret} -ne 0 ]; then echo @@ -3740,7 +3976,6 @@ if [ ${ret} -ne 0 ]; then echo fi -append_prev_results mptcp_lib_result_print_all_tap exit $ret diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 438280e68434..975d4d4c862a 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -12,10 +12,14 @@ readonly KSFT_SKIP=4 readonly KSFT_TEST="${MPTCP_LIB_KSFT_TEST:-$(basename "${0}" .sh)}" # These variables are used in some selftests, read-only +declare -rx MPTCP_LIB_EVENT_CREATED=1 # MPTCP_EVENT_CREATED +declare -rx MPTCP_LIB_EVENT_ESTABLISHED=2 # MPTCP_EVENT_ESTABLISHED +declare -rx MPTCP_LIB_EVENT_CLOSED=3 # MPTCP_EVENT_CLOSED declare -rx MPTCP_LIB_EVENT_ANNOUNCED=6 # MPTCP_EVENT_ANNOUNCED declare -rx MPTCP_LIB_EVENT_REMOVED=7 # MPTCP_EVENT_REMOVED declare -rx MPTCP_LIB_EVENT_SUB_ESTABLISHED=10 # MPTCP_EVENT_SUB_ESTABLISHED declare -rx MPTCP_LIB_EVENT_SUB_CLOSED=11 # MPTCP_EVENT_SUB_CLOSED +declare -rx MPTCP_LIB_EVENT_SUB_PRIORITY=13 # MPTCP_EVENT_SUB_PRIORITY declare -rx MPTCP_LIB_EVENT_LISTENER_CREATED=15 # MPTCP_EVENT_LISTENER_CREATED declare -rx MPTCP_LIB_EVENT_LISTENER_CLOSED=16 # MPTCP_EVENT_LISTENER_CLOSED @@ -25,6 +29,7 @@ declare -rx MPTCP_LIB_AF_INET6=10 MPTCP_LIB_SUBTESTS=() MPTCP_LIB_SUBTESTS_DUPLICATED=0 MPTCP_LIB_SUBTEST_FLAKY=0 +MPTCP_LIB_SUBTESTS_LAST_TS_MS= MPTCP_LIB_TEST_COUNTER=0 MPTCP_LIB_TEST_FORMAT="%02u %-50s" MPTCP_LIB_IP_MPTCP=0 @@ -201,6 +206,11 @@ mptcp_lib_kversion_ge() { mptcp_lib_fail_if_expected_feature "kernel version ${1} lower than ${v}" } +mptcp_lib_subtests_last_ts_reset() { + MPTCP_LIB_SUBTESTS_LAST_TS_MS="$(date +%s%3N)" +} +mptcp_lib_subtests_last_ts_reset + __mptcp_lib_result_check_duplicated() { local subtest @@ -215,13 +225,22 @@ __mptcp_lib_result_check_duplicated() { __mptcp_lib_result_add() { local result="${1}" + local time="time=" + local ts_prev_ms shift local id=$((${#MPTCP_LIB_SUBTESTS[@]} + 1)) __mptcp_lib_result_check_duplicated "${*}" - MPTCP_LIB_SUBTESTS+=("${result} ${id} - ${KSFT_TEST}: ${*}") + # not to add two '#' + [[ "${*}" != *"#"* ]] && time="# ${time}" + + ts_prev_ms="${MPTCP_LIB_SUBTESTS_LAST_TS_MS}" + mptcp_lib_subtests_last_ts_reset + time+="$((MPTCP_LIB_SUBTESTS_LAST_TS_MS - ts_prev_ms))ms" + + MPTCP_LIB_SUBTESTS+=("${result} ${id} - ${KSFT_TEST}: ${*} ${time}") } # $1: test name diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index 68899a303a1a..5e8d5b83e2d0 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -349,6 +349,7 @@ init make_file "$cin" "client" 1 make_file "$sin" "server" 1 trap cleanup EXIT +mptcp_lib_subtests_last_ts_reset run_tests $ns1 $ns2 10.0.1.1 run_tests $ns1 $ns2 dead:beef:1::1 diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 2757378b1b13..2e6648a2b2c0 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -137,6 +137,8 @@ check() fi } +mptcp_lib_subtests_last_ts_reset + check "show_endpoints" "" "defaults addr list" default_limits="$(get_limits)" diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c index 7ad5a59adff2..994a556f46c1 100644 --- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c +++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c @@ -19,12 +19,6 @@ #include "linux/mptcp.h" -#ifndef MPTCP_PM_NAME -#define MPTCP_PM_NAME "mptcp_pm" -#endif -#ifndef MPTCP_PM_EVENTS -#define MPTCP_PM_EVENTS "mptcp_pm_events" -#endif #ifndef IPPROTO_MPTCP #define IPPROTO_MPTCP 262 #endif @@ -116,7 +110,7 @@ static int capture_events(int fd, int event_group) if (setsockopt(fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &event_group, sizeof(event_group)) < 0) - error(1, errno, "could not join the " MPTCP_PM_EVENTS " mcast group"); + error(1, errno, "could not join the " MPTCP_PM_EV_GRP_NAME " mcast group"); do { FD_ZERO(&rfds); @@ -288,7 +282,7 @@ static int genl_parse_getfamily(struct nlmsghdr *nlh, int *pm_family, if (grp->rta_type == CTRL_ATTR_MCAST_GRP_ID) *events_mcast_grp = *(__u32 *)RTA_DATA(grp); else if (grp->rta_type == CTRL_ATTR_MCAST_GRP_NAME && - !strcmp(RTA_DATA(grp), MPTCP_PM_EVENTS)) + !strcmp(RTA_DATA(grp), MPTCP_PM_EV_GRP_NAME)) got_events_grp = 1; grp = RTA_NEXT(grp, grp_len); diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index f74e1c3c126d..8fa77c8e9b65 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -286,6 +286,7 @@ while getopts "bcdhi" option;do done setup +mptcp_lib_subtests_last_ts_reset run_test 10 10 0 0 "balanced bwidth" run_test 10 10 1 25 "balanced bwidth with unbalanced delay" diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 9cb05978269d..3651f73451cf 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -150,6 +150,7 @@ mptcp_lib_events "${ns2}" "${client_evts}" client_evts_pid server_evts=$(mktemp) mptcp_lib_events "${ns1}" "${server_evts}" server_evts_pid sleep 0.5 +mptcp_lib_subtests_last_ts_reset print_title "Init" print_test "Created network namespaces ns1, ns2" diff --git a/tools/testing/selftests/net/ncdevmem.c b/tools/testing/selftests/net/ncdevmem.c new file mode 100644 index 000000000000..64d6805381c5 --- /dev/null +++ b/tools/testing/selftests/net/ncdevmem.c @@ -0,0 +1,570 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#define __EXPORTED_HEADERS__ + +#include <linux/uio.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <stdbool.h> +#include <string.h> +#include <errno.h> +#define __iovec_defined +#include <fcntl.h> +#include <malloc.h> +#include <error.h> + +#include <arpa/inet.h> +#include <sys/socket.h> +#include <sys/mman.h> +#include <sys/ioctl.h> +#include <sys/syscall.h> + +#include <linux/memfd.h> +#include <linux/dma-buf.h> +#include <linux/udmabuf.h> +#include <libmnl/libmnl.h> +#include <linux/types.h> +#include <linux/netlink.h> +#include <linux/genetlink.h> +#include <linux/netdev.h> +#include <time.h> +#include <net/if.h> + +#include "netdev-user.h" +#include <ynl.h> + +#define PAGE_SHIFT 12 +#define TEST_PREFIX "ncdevmem" +#define NUM_PAGES 16000 + +#ifndef MSG_SOCK_DEVMEM +#define MSG_SOCK_DEVMEM 0x2000000 +#endif + +/* + * tcpdevmem netcat. Works similarly to netcat but does device memory TCP + * instead of regular TCP. Uses udmabuf to mock a dmabuf provider. + * + * Usage: + * + * On server: + * ncdevmem -s <server IP> -c <client IP> -f eth1 -l -p 5201 -v 7 + * + * On client: + * yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06) | \ + * tr \\n \\0 | \ + * head -c 5G | \ + * nc <server IP> 5201 -p 5201 + * + * Note this is compatible with regular netcat. i.e. the sender or receiver can + * be replaced with regular netcat to test the RX or TX path in isolation. + */ + +static char *server_ip = "192.168.1.4"; +static char *client_ip = "192.168.1.2"; +static char *port = "5201"; +static size_t do_validation; +static int start_queue = 8; +static int num_queues = 8; +static char *ifname = "eth1"; +static unsigned int ifindex; +static unsigned int dmabuf_id; + +void print_bytes(void *ptr, size_t size) +{ + unsigned char *p = ptr; + int i; + + for (i = 0; i < size; i++) + printf("%02hhX ", p[i]); + printf("\n"); +} + +void print_nonzero_bytes(void *ptr, size_t size) +{ + unsigned char *p = ptr; + unsigned int i; + + for (i = 0; i < size; i++) + putchar(p[i]); + printf("\n"); +} + +void validate_buffer(void *line, size_t size) +{ + static unsigned char seed = 1; + unsigned char *ptr = line; + int errors = 0; + size_t i; + + for (i = 0; i < size; i++) { + if (ptr[i] != seed) { + fprintf(stderr, + "Failed validation: expected=%u, actual=%u, index=%lu\n", + seed, ptr[i], i); + errors++; + if (errors > 20) + error(1, 0, "validation failed."); + } + seed++; + if (seed == do_validation) + seed = 0; + } + + fprintf(stdout, "Validated buffer\n"); +} + +#define run_command(cmd, ...) \ + ({ \ + char command[256]; \ + memset(command, 0, sizeof(command)); \ + snprintf(command, sizeof(command), cmd, ##__VA_ARGS__); \ + printf("Running: %s\n", command); \ + system(command); \ + }) + +static int reset_flow_steering(void) +{ + int ret = 0; + + ret = run_command("sudo ethtool -K %s ntuple off", ifname); + if (ret) + return ret; + + return run_command("sudo ethtool -K %s ntuple on", ifname); +} + +static int configure_headersplit(bool on) +{ + return run_command("sudo ethtool -G %s tcp-data-split %s", ifname, + on ? "on" : "off"); +} + +static int configure_rss(void) +{ + return run_command("sudo ethtool -X %s equal %d", ifname, start_queue); +} + +static int configure_channels(unsigned int rx, unsigned int tx) +{ + return run_command("sudo ethtool -L %s rx %u tx %u", ifname, rx, tx); +} + +static int configure_flow_steering(void) +{ + return run_command("sudo ethtool -N %s flow-type tcp4 src-ip %s dst-ip %s src-port %s dst-port %s queue %d", + ifname, client_ip, server_ip, port, port, start_queue); +} + +static int bind_rx_queue(unsigned int ifindex, unsigned int dmabuf_fd, + struct netdev_queue_id *queues, + unsigned int n_queue_index, struct ynl_sock **ys) +{ + struct netdev_bind_rx_req *req = NULL; + struct netdev_bind_rx_rsp *rsp = NULL; + struct ynl_error yerr; + + *ys = ynl_sock_create(&ynl_netdev_family, &yerr); + if (!*ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return -1; + } + + req = netdev_bind_rx_req_alloc(); + netdev_bind_rx_req_set_ifindex(req, ifindex); + netdev_bind_rx_req_set_fd(req, dmabuf_fd); + __netdev_bind_rx_req_set_queues(req, queues, n_queue_index); + + rsp = netdev_bind_rx(*ys, req); + if (!rsp) { + perror("netdev_bind_rx"); + goto err_close; + } + + if (!rsp->_present.id) { + perror("id not present"); + goto err_close; + } + + printf("got dmabuf id=%d\n", rsp->id); + dmabuf_id = rsp->id; + + netdev_bind_rx_req_free(req); + netdev_bind_rx_rsp_free(rsp); + + return 0; + +err_close: + fprintf(stderr, "YNL failed: %s\n", (*ys)->err.msg); + netdev_bind_rx_req_free(req); + ynl_sock_destroy(*ys); + return -1; +} + +static void create_udmabuf(int *devfd, int *memfd, int *buf, size_t dmabuf_size) +{ + struct udmabuf_create create; + int ret; + + *devfd = open("/dev/udmabuf", O_RDWR); + if (*devfd < 0) { + error(70, 0, + "%s: [skip,no-udmabuf: Unable to access DMA buffer device file]\n", + TEST_PREFIX); + } + + *memfd = memfd_create("udmabuf-test", MFD_ALLOW_SEALING); + if (*memfd < 0) + error(70, 0, "%s: [skip,no-memfd]\n", TEST_PREFIX); + + /* Required for udmabuf */ + ret = fcntl(*memfd, F_ADD_SEALS, F_SEAL_SHRINK); + if (ret < 0) + error(73, 0, "%s: [skip,fcntl-add-seals]\n", TEST_PREFIX); + + ret = ftruncate(*memfd, dmabuf_size); + if (ret == -1) + error(74, 0, "%s: [FAIL,memfd-truncate]\n", TEST_PREFIX); + + memset(&create, 0, sizeof(create)); + + create.memfd = *memfd; + create.offset = 0; + create.size = dmabuf_size; + *buf = ioctl(*devfd, UDMABUF_CREATE, &create); + if (*buf < 0) + error(75, 0, "%s: [FAIL, create udmabuf]\n", TEST_PREFIX); +} + +int do_server(void) +{ + char ctrl_data[sizeof(int) * 20000]; + struct netdev_queue_id *queues; + size_t non_page_aligned_frags = 0; + struct sockaddr_in client_addr; + struct sockaddr_in server_sin; + size_t page_aligned_frags = 0; + int devfd, memfd, buf, ret; + size_t total_received = 0; + socklen_t client_addr_len; + bool is_devmem = false; + char *buf_mem = NULL; + struct ynl_sock *ys; + size_t dmabuf_size; + char iobuf[819200]; + char buffer[256]; + int socket_fd; + int client_fd; + size_t i = 0; + int opt = 1; + + dmabuf_size = getpagesize() * NUM_PAGES; + + create_udmabuf(&devfd, &memfd, &buf, dmabuf_size); + + if (reset_flow_steering()) + error(1, 0, "Failed to reset flow steering\n"); + + /* Configure RSS to divert all traffic from our devmem queues */ + if (configure_rss()) + error(1, 0, "Failed to configure rss\n"); + + /* Flow steer our devmem flows to start_queue */ + if (configure_flow_steering()) + error(1, 0, "Failed to configure flow steering\n"); + + sleep(1); + + queues = malloc(sizeof(*queues) * num_queues); + + for (i = 0; i < num_queues; i++) { + queues[i]._present.type = 1; + queues[i]._present.id = 1; + queues[i].type = NETDEV_QUEUE_TYPE_RX; + queues[i].id = start_queue + i; + } + + if (bind_rx_queue(ifindex, buf, queues, num_queues, &ys)) + error(1, 0, "Failed to bind\n"); + + buf_mem = mmap(NULL, dmabuf_size, PROT_READ | PROT_WRITE, MAP_SHARED, + buf, 0); + if (buf_mem == MAP_FAILED) + error(1, 0, "mmap()"); + + server_sin.sin_family = AF_INET; + server_sin.sin_port = htons(atoi(port)); + + ret = inet_pton(server_sin.sin_family, server_ip, &server_sin.sin_addr); + if (socket < 0) + error(79, 0, "%s: [FAIL, create socket]\n", TEST_PREFIX); + + socket_fd = socket(server_sin.sin_family, SOCK_STREAM, 0); + if (socket < 0) + error(errno, errno, "%s: [FAIL, create socket]\n", TEST_PREFIX); + + ret = setsockopt(socket_fd, SOL_SOCKET, SO_REUSEPORT, &opt, + sizeof(opt)); + if (ret) + error(errno, errno, "%s: [FAIL, set sock opt]\n", TEST_PREFIX); + + ret = setsockopt(socket_fd, SOL_SOCKET, SO_REUSEADDR, &opt, + sizeof(opt)); + if (ret) + error(errno, errno, "%s: [FAIL, set sock opt]\n", TEST_PREFIX); + + printf("binding to address %s:%d\n", server_ip, + ntohs(server_sin.sin_port)); + + ret = bind(socket_fd, &server_sin, sizeof(server_sin)); + if (ret) + error(errno, errno, "%s: [FAIL, bind]\n", TEST_PREFIX); + + ret = listen(socket_fd, 1); + if (ret) + error(errno, errno, "%s: [FAIL, listen]\n", TEST_PREFIX); + + client_addr_len = sizeof(client_addr); + + inet_ntop(server_sin.sin_family, &server_sin.sin_addr, buffer, + sizeof(buffer)); + printf("Waiting or connection on %s:%d\n", buffer, + ntohs(server_sin.sin_port)); + client_fd = accept(socket_fd, &client_addr, &client_addr_len); + + inet_ntop(client_addr.sin_family, &client_addr.sin_addr, buffer, + sizeof(buffer)); + printf("Got connection from %s:%d\n", buffer, + ntohs(client_addr.sin_port)); + + while (1) { + struct iovec iov = { .iov_base = iobuf, + .iov_len = sizeof(iobuf) }; + struct dmabuf_cmsg *dmabuf_cmsg = NULL; + struct dma_buf_sync sync = { 0 }; + struct cmsghdr *cm = NULL; + struct msghdr msg = { 0 }; + struct dmabuf_token token; + ssize_t ret; + + is_devmem = false; + printf("\n\n"); + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = ctrl_data; + msg.msg_controllen = sizeof(ctrl_data); + ret = recvmsg(client_fd, &msg, MSG_SOCK_DEVMEM); + printf("recvmsg ret=%ld\n", ret); + if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) + continue; + if (ret < 0) { + perror("recvmsg"); + continue; + } + if (ret == 0) { + printf("client exited\n"); + goto cleanup; + } + + i++; + for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) { + if (cm->cmsg_level != SOL_SOCKET || + (cm->cmsg_type != SCM_DEVMEM_DMABUF && + cm->cmsg_type != SCM_DEVMEM_LINEAR)) { + fprintf(stdout, "skipping non-devmem cmsg\n"); + continue; + } + + dmabuf_cmsg = (struct dmabuf_cmsg *)CMSG_DATA(cm); + is_devmem = true; + + if (cm->cmsg_type == SCM_DEVMEM_LINEAR) { + /* TODO: process data copied from skb's linear + * buffer. + */ + fprintf(stdout, + "SCM_DEVMEM_LINEAR. dmabuf_cmsg->frag_size=%u\n", + dmabuf_cmsg->frag_size); + + continue; + } + + token.token_start = dmabuf_cmsg->frag_token; + token.token_count = 1; + + total_received += dmabuf_cmsg->frag_size; + printf("received frag_page=%llu, in_page_offset=%llu, frag_offset=%llu, frag_size=%u, token=%u, total_received=%lu, dmabuf_id=%u\n", + dmabuf_cmsg->frag_offset >> PAGE_SHIFT, + dmabuf_cmsg->frag_offset % getpagesize(), + dmabuf_cmsg->frag_offset, dmabuf_cmsg->frag_size, + dmabuf_cmsg->frag_token, total_received, + dmabuf_cmsg->dmabuf_id); + + if (dmabuf_cmsg->dmabuf_id != dmabuf_id) + error(1, 0, + "received on wrong dmabuf_id: flow steering error\n"); + + if (dmabuf_cmsg->frag_size % getpagesize()) + non_page_aligned_frags++; + else + page_aligned_frags++; + + sync.flags = DMA_BUF_SYNC_READ | DMA_BUF_SYNC_START; + ioctl(buf, DMA_BUF_IOCTL_SYNC, &sync); + + if (do_validation) + validate_buffer( + ((unsigned char *)buf_mem) + + dmabuf_cmsg->frag_offset, + dmabuf_cmsg->frag_size); + else + print_nonzero_bytes( + ((unsigned char *)buf_mem) + + dmabuf_cmsg->frag_offset, + dmabuf_cmsg->frag_size); + + sync.flags = DMA_BUF_SYNC_READ | DMA_BUF_SYNC_END; + ioctl(buf, DMA_BUF_IOCTL_SYNC, &sync); + + ret = setsockopt(client_fd, SOL_SOCKET, + SO_DEVMEM_DONTNEED, &token, + sizeof(token)); + if (ret != 1) + error(1, 0, + "SO_DEVMEM_DONTNEED not enough tokens"); + } + if (!is_devmem) + error(1, 0, "flow steering error\n"); + + printf("total_received=%lu\n", total_received); + } + + fprintf(stdout, "%s: ok\n", TEST_PREFIX); + + fprintf(stdout, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n", + page_aligned_frags, non_page_aligned_frags); + + fprintf(stdout, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n", + page_aligned_frags, non_page_aligned_frags); + +cleanup: + + munmap(buf_mem, dmabuf_size); + close(client_fd); + close(socket_fd); + close(buf); + close(memfd); + close(devfd); + ynl_sock_destroy(ys); + + return 0; +} + +void run_devmem_tests(void) +{ + struct netdev_queue_id *queues; + int devfd, memfd, buf; + struct ynl_sock *ys; + size_t dmabuf_size; + size_t i = 0; + + dmabuf_size = getpagesize() * NUM_PAGES; + + create_udmabuf(&devfd, &memfd, &buf, dmabuf_size); + + /* Configure RSS to divert all traffic from our devmem queues */ + if (configure_rss()) + error(1, 0, "rss error\n"); + + queues = calloc(num_queues, sizeof(*queues)); + + if (configure_headersplit(1)) + error(1, 0, "Failed to configure header split\n"); + + if (!bind_rx_queue(ifindex, buf, queues, num_queues, &ys)) + error(1, 0, "Binding empty queues array should have failed\n"); + + for (i = 0; i < num_queues; i++) { + queues[i]._present.type = 1; + queues[i]._present.id = 1; + queues[i].type = NETDEV_QUEUE_TYPE_RX; + queues[i].id = start_queue + i; + } + + if (configure_headersplit(0)) + error(1, 0, "Failed to configure header split\n"); + + if (!bind_rx_queue(ifindex, buf, queues, num_queues, &ys)) + error(1, 0, "Configure dmabuf with header split off should have failed\n"); + + if (configure_headersplit(1)) + error(1, 0, "Failed to configure header split\n"); + + for (i = 0; i < num_queues; i++) { + queues[i]._present.type = 1; + queues[i]._present.id = 1; + queues[i].type = NETDEV_QUEUE_TYPE_RX; + queues[i].id = start_queue + i; + } + + if (bind_rx_queue(ifindex, buf, queues, num_queues, &ys)) + error(1, 0, "Failed to bind\n"); + + /* Deactivating a bound queue should not be legal */ + if (!configure_channels(num_queues, num_queues - 1)) + error(1, 0, "Deactivating a bound queue should be illegal.\n"); + + /* Closing the netlink socket does an implicit unbind */ + ynl_sock_destroy(ys); +} + +int main(int argc, char *argv[]) +{ + int is_server = 0, opt; + + while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:")) != -1) { + switch (opt) { + case 'l': + is_server = 1; + break; + case 's': + server_ip = optarg; + break; + case 'c': + client_ip = optarg; + break; + case 'p': + port = optarg; + break; + case 'v': + do_validation = atoll(optarg); + break; + case 'q': + num_queues = atoi(optarg); + break; + case 't': + start_queue = atoi(optarg); + break; + case 'f': + ifname = optarg; + break; + case '?': + printf("unknown option: %c\n", optopt); + break; + } + } + + ifindex = if_nametoindex(ifname); + + for (; optind < argc; optind++) + printf("extra arguments: %s\n", argv[optind]); + + run_devmem_tests(); + + if (is_server) + return do_server(); + + return 0; +} diff --git a/tools/testing/selftests/net/netdevice.sh b/tools/testing/selftests/net/netdevice.sh index e3afcb424710..438f7b2acc5f 100755 --- a/tools/testing/selftests/net/netdevice.sh +++ b/tools/testing/selftests/net/netdevice.sh @@ -67,8 +67,12 @@ kci_net_setup() return $ksft_skip fi - # TODO what ipaddr to set ? DHCP ? - echo "SKIP: $netdev: set IP address" + if [ "$veth_created" ]; then + echo "XFAIL: $netdev: set IP address unsupported for veth*" + else + # TODO what ipaddr to set ? DHCP ? + echo "SKIP: $netdev: set IP address" + fi return $ksft_skip } @@ -86,7 +90,7 @@ kci_netdev_ethtool_test() ret=$? if [ $ret -ne 0 ];then if [ $ret -eq "$1" ];then - echo "SKIP: $netdev: ethtool $2 not supported" + echo "XFAIL: $netdev: ethtool $2 not supported" return $ksft_skip else echo "FAIL: $netdev: ethtool $2" @@ -124,11 +128,45 @@ kci_netdev_ethtool() return 1 fi echo "PASS: $netdev: ethtool list features" - #TODO for each non fixed features, try to turn them on/off + + while read -r FEATURE VALUE FIXED; do + [ "$FEATURE" != "Features" ] || continue # Skip "Features" + [ "$FIXED" != "[fixed]" ] || continue # Skip fixed features + feature="${FEATURE%:*}" + + ethtool --offload "$netdev" "$feature" off + if [ $? -eq 0 ]; then + echo "PASS: $netdev: Turned off feature: $feature" + else + echo "FAIL: $netdev: Failed to turn off feature:" \ + "$feature" + fi + + ethtool --offload "$netdev" "$feature" on + if [ $? -eq 0 ]; then + echo "PASS: $netdev: Turned on feature: $feature" + else + echo "FAIL: $netdev: Failed to turn on feature:" \ + "$feature" + fi + + #restore the feature to its initial state + ethtool --offload "$netdev" "$feature" "$VALUE" + if [ $? -eq 0 ]; then + echo "PASS: $netdev: Restore feature $feature" \ + "to initial state $VALUE" + else + echo "FAIL: $netdev: Failed to restore feature" \ + "$feature to initial state $VALUE" + fi + + done < "$TMP_ETHTOOL_FEATURES" + rm "$TMP_ETHTOOL_FEATURES" kci_netdev_ethtool_test 74 'dump' "ethtool -d $netdev" kci_netdev_ethtool_test 94 'stats' "ethtool -S $netdev" + return 0 } @@ -196,10 +234,24 @@ if [ ! -e "$TMP_LIST_NETDEV" ];then fi ip link show |grep '^[0-9]' | grep -oE '[[:space:]].*eth[0-9]*:|[[:space:]].*enp[0-9]s[0-9]:' | cut -d\ -f2 | cut -d: -f1> "$TMP_LIST_NETDEV" + +if [ ! -s "$TMP_LIST_NETDEV" ]; then + echo "No valid network device found, creating veth pair" + ip link add veth0 type veth peer name veth1 + echo "veth0" > "$TMP_LIST_NETDEV" + veth_created=1 +fi + while read netdev do kci_test_netdev "$netdev" done < "$TMP_LIST_NETDEV" +#clean up veth interface pair if it was created +if [ "$veth_created" ]; then + ip link delete veth0 + echo "Removed veth pair" +fi + rm "$TMP_LIST_NETDEV" exit 0 diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config index 63ef80ef47a4..b2dd4db45215 100644 --- a/tools/testing/selftests/net/netfilter/config +++ b/tools/testing/selftests/net/netfilter/config @@ -87,3 +87,5 @@ CONFIG_XFRM_USER=m CONFIG_XFRM_STATISTICS=y CONFIG_NET_PKTGEN=m CONFIG_TUN=m +CONFIG_INET_DIAG=m +CONFIG_SCTP_DIAG=m diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh index c61d23a8c88d..d66e3c4dfec6 100755 --- a/tools/testing/selftests/net/netfilter/nft_queue.sh +++ b/tools/testing/selftests/net/netfilter/nft_queue.sh @@ -8,7 +8,7 @@ source lib.sh ret=0 -timeout=2 +timeout=5 cleanup() { @@ -25,6 +25,9 @@ cleanup() } checktool "nft --version" "test without nft tool" +checktool "socat -h" "run test without socat" + +modprobe -q sctp trap cleanup EXIT @@ -36,7 +39,9 @@ TMPFILE2=$(mktemp) TMPFILE3=$(mktemp) TMPINPUT=$(mktemp) -dd conv=sparse status=none if=/dev/zero bs=1M count=200 of="$TMPINPUT" +COUNT=200 +[ "$KSFT_MACHINE_SLOW" = "yes" ] && COUNT=25 +dd conv=sparse status=none if=/dev/zero bs=1M count=$COUNT of="$TMPINPUT" if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then echo "SKIP: No virtual ethernet pair device support in kernel" @@ -250,45 +255,49 @@ listener_ready() test_tcp_forward() { - ip netns exec "$nsrouter" ./nf_queue -q 2 -t "$timeout" & + ip netns exec "$nsrouter" ./nf_queue -q 2 & local nfqpid=$! timeout 5 ip netns exec "$ns2" socat -u TCP-LISTEN:12345 STDOUT >/dev/null & local rpid=$! busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2" + busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 2 ip netns exec "$ns1" socat -u STDIN TCP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null wait "$rpid" && echo "PASS: tcp and nfqueue in forward chain" + kill "$nfqpid" } test_tcp_localhost() { - dd conv=sparse status=none if=/dev/zero bs=1M count=200 of="$TMPINPUT" timeout 5 ip netns exec "$nsrouter" socat -u TCP-LISTEN:12345 STDOUT >/dev/null & local rpid=$! - ip netns exec "$nsrouter" ./nf_queue -q 3 -t "$timeout" & + ip netns exec "$nsrouter" ./nf_queue -q 3 & local nfqpid=$! busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter" + busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 3 ip netns exec "$nsrouter" socat -u STDIN TCP:127.0.0.1:12345 <"$TMPINPUT" >/dev/null wait "$rpid" && echo "PASS: tcp via loopback" - wait 2>/dev/null + kill "$nfqpid" } test_tcp_localhost_connectclose() { - ip netns exec "$nsrouter" ./connect_close -p 23456 -t "$timeout" & - ip netns exec "$nsrouter" ./nf_queue -q 3 -t "$timeout" & + ip netns exec "$nsrouter" ./nf_queue -q 3 & + local nfqpid=$! busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 3 + timeout 10 ip netns exec "$nsrouter" ./connect_close -p 23456 -t 3 + + kill "$nfqpid" wait && echo "PASS: tcp via loopback with connect/close" - wait 2>/dev/null } test_tcp_localhost_requeue() @@ -353,7 +362,7 @@ table inet filter { } } EOF - ip netns exec "$ns1" ./nf_queue -q 1 -t "$timeout" & + ip netns exec "$ns1" ./nf_queue -q 1 & local nfqpid=$! busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$ns1" 1 @@ -363,6 +372,7 @@ EOF for n in output post; do for d in tvrf eth0; do if ! ip netns exec "$ns1" nft list chain inet filter "$n" | grep -q "oifname \"$d\" icmp type echo-request counter packets 1"; then + kill "$nfqpid" echo "FAIL: chain $n: icmp packet counter mismatch for device $d" 1>&2 ip netns exec "$ns1" nft list ruleset ret=1 @@ -371,8 +381,96 @@ EOF done done - wait "$nfqpid" && echo "PASS: icmp+nfqueue via vrf" - wait 2>/dev/null + kill "$nfqpid" + echo "PASS: icmp+nfqueue via vrf" +} + +sctp_listener_ready() +{ + ss -S -N "$1" -ln -o "sport = :12345" | grep -q 12345 +} + +check_output_files() +{ + local f1="$1" + local f2="$2" + local err="$3" + + if ! cmp "$f1" "$f2" ; then + echo "FAIL: $err: input and output file differ" 1>&2 + echo -n " Input file" 1>&2 + ls -l "$f1" 1>&2 + echo -n "Output file" 1>&2 + ls -l "$f2" 1>&2 + ret=1 + fi +} + +test_sctp_forward() +{ + ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF +flush ruleset +table inet sctpq { + chain forward { + type filter hook forward priority 0; policy accept; + sctp dport 12345 queue num 10 + } +} +EOF + timeout 60 ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" & + local rpid=$! + + busywait "$BUSYWAIT_TIMEOUT" sctp_listener_ready "$ns2" + + ip netns exec "$nsrouter" ./nf_queue -q 10 -G & + local nfqpid=$! + + ip netns exec "$ns1" socat -u STDIN SCTP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null + + if ! ip netns exec "$nsrouter" nft delete table inet sctpq; then + echo "FAIL: Could not delete sctpq table" + exit 1 + fi + + wait "$rpid" && echo "PASS: sctp and nfqueue in forward chain" + kill "$nfqpid" + + check_output_files "$TMPINPUT" "$TMPFILE1" "sctp forward" +} + +test_sctp_output() +{ + ip netns exec "$ns1" nft -f /dev/stdin <<EOF +table inet sctpq { + chain output { + type filter hook output priority 0; policy accept; + sctp dport 12345 queue num 11 + } +} +EOF + # reduce test file size, software segmentation causes sk wmem increase. + dd conv=sparse status=none if=/dev/zero bs=1M count=$((COUNT/2)) of="$TMPINPUT" + + timeout 60 ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" & + local rpid=$! + + busywait "$BUSYWAIT_TIMEOUT" sctp_listener_ready "$ns2" + + ip netns exec "$ns1" ./nf_queue -q 11 & + local nfqpid=$! + + ip netns exec "$ns1" socat -u STDIN SCTP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null + + if ! ip netns exec "$ns1" nft delete table inet sctpq; then + echo "FAIL: Could not delete sctpq table" + exit 1 + fi + + # must wait before checking completeness of output file. + wait "$rpid" && echo "PASS: sctp and nfqueue in output chain with GSO" + kill "$nfqpid" + + check_output_files "$TMPINPUT" "$TMPFILE1" "sctp output" } test_queue_removal() @@ -388,7 +486,7 @@ table ip filter { } } EOF - ip netns exec "$ns1" ./nf_queue -q 0 -d 30000 -t "$timeout" & + ip netns exec "$ns1" ./nf_queue -q 0 -d 30000 & local nfqpid=$! busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$ns1" 0 @@ -443,11 +541,16 @@ test_queue 10 # same. We queue to a second program as well. load_ruleset "filter2" 20 test_queue 20 +ip netns exec "$ns1" nft flush ruleset test_tcp_forward test_tcp_localhost test_tcp_localhost_connectclose test_tcp_localhost_requeue +test_sctp_forward +test_sctp_output + +# should be last, adds vrf device in ns1 and changes routes test_icmp_vrf test_queue_removal diff --git a/tools/testing/selftests/net/packetdrill/Makefile b/tools/testing/selftests/net/packetdrill/Makefile new file mode 100644 index 000000000000..31cfb666ba8b --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0 + +TEST_INCLUDES := ksft_runner.sh \ + defaults.sh \ + set_sysctls.py \ + ../../kselftest/ktap_helpers.sh + +TEST_PROGS := $(wildcard *.pkt) + +include ../../lib.mk diff --git a/tools/testing/selftests/net/packetdrill/config b/tools/testing/selftests/net/packetdrill/config new file mode 100644 index 000000000000..0237ed98f3c0 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/config @@ -0,0 +1,11 @@ +CONFIG_IPV6=y +CONFIG_HZ_1000=y +CONFIG_HZ=1000 +CONFIG_NET_NS=y +CONFIG_NET_SCH_FIFO=y +CONFIG_NET_SCH_FQ=y +CONFIG_PROC_SYSCTL=y +CONFIG_SYN_COOKIES=y +CONFIG_TCP_CONG_CUBIC=y +CONFIG_TCP_MD5SIG=y +CONFIG_TUN=y diff --git a/tools/testing/selftests/net/packetdrill/defaults.sh b/tools/testing/selftests/net/packetdrill/defaults.sh new file mode 100755 index 000000000000..1095a7b22f44 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/defaults.sh @@ -0,0 +1,63 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Set standard production config values that relate to TCP behavior. + +# Flush old cached data (fastopen cookies). +ip tcp_metrics flush all > /dev/null 2>&1 + +# TCP min, default, and max receive and send buffer sizes. +sysctl -q net.ipv4.tcp_rmem="4096 540000 $((15*1024*1024))" +sysctl -q net.ipv4.tcp_wmem="4096 $((256*1024)) 4194304" + +# TCP timestamps. +sysctl -q net.ipv4.tcp_timestamps=1 + +# TCP SYN(ACK) retry thresholds +sysctl -q net.ipv4.tcp_syn_retries=5 +sysctl -q net.ipv4.tcp_synack_retries=5 + +# TCP Forward RTO-Recovery, RFC 5682. +sysctl -q net.ipv4.tcp_frto=2 + +# TCP Selective Acknowledgements (SACK) +sysctl -q net.ipv4.tcp_sack=1 + +# TCP Duplicate Selective Acknowledgements (DSACK) +sysctl -q net.ipv4.tcp_dsack=1 + +# TCP FACK (Forward Acknowldgement) +sysctl -q net.ipv4.tcp_fack=0 + +# TCP reordering degree ("dupthresh" threshold for entering Fast Recovery). +sysctl -q net.ipv4.tcp_reordering=3 + +# TCP congestion control. +sysctl -q net.ipv4.tcp_congestion_control=cubic + +# TCP slow start after idle. +sysctl -q net.ipv4.tcp_slow_start_after_idle=0 + +# TCP RACK and TLP. +sysctl -q net.ipv4.tcp_early_retrans=4 net.ipv4.tcp_recovery=1 + +# TCP method for deciding when to defer sending to accumulate big TSO packets. +sysctl -q net.ipv4.tcp_tso_win_divisor=3 + +# TCP Explicit Congestion Notification (ECN) +sysctl -q net.ipv4.tcp_ecn=0 + +sysctl -q net.ipv4.tcp_pacing_ss_ratio=200 +sysctl -q net.ipv4.tcp_pacing_ca_ratio=120 +sysctl -q net.ipv4.tcp_notsent_lowat=4294967295 > /dev/null 2>&1 + +sysctl -q net.ipv4.tcp_fastopen=0x70403 +sysctl -q net.ipv4.tcp_fastopen_key=a1a1a1a1-b2b2b2b2-c3c3c3c3-d4d4d4d4 + +sysctl -q net.ipv4.tcp_syncookies=1 + +# Override the default qdisc on the tun device. +# Many tests fail with timing errors if the default +# is FQ and that paces their flows. +tc qdisc add dev tun0 root pfifo + diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh new file mode 100755 index 000000000000..7478c0c0c9aa --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh @@ -0,0 +1,41 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source "$(dirname $(realpath $0))/../../kselftest/ktap_helpers.sh" + +readonly ipv4_args=('--ip_version=ipv4 ' + '--local_ip=192.168.0.1 ' + '--gateway_ip=192.168.0.1 ' + '--netmask_ip=255.255.0.0 ' + '--remote_ip=192.0.2.1 ' + '-D CMSG_LEVEL_IP=SOL_IP ' + '-D CMSG_TYPE_RECVERR=IP_RECVERR ') + +readonly ipv6_args=('--ip_version=ipv6 ' + '--mtu=1520 ' + '--local_ip=fd3d:0a0b:17d6::1 ' + '--gateway_ip=fd3d:0a0b:17d6:8888::1 ' + '--remote_ip=fd3d:fa7b:d17d::1 ' + '-D CMSG_LEVEL_IP=SOL_IPV6 ' + '-D CMSG_TYPE_RECVERR=IPV6_RECVERR ') + +if [ $# -ne 1 ]; then + ktap_exit_fail_msg "usage: $0 <script>" + exit "$KSFT_FAIL" +fi +script="$1" + +if [ -z "$(which packetdrill)" ]; then + ktap_skip_all "packetdrill not found in PATH" + exit "$KSFT_SKIP" +fi + +ktap_print_header +ktap_set_plan 2 + +unshare -n packetdrill ${ipv4_args[@]} $(basename $script) > /dev/null \ + && ktap_test_pass "ipv4" || ktap_test_fail "ipv4" +unshare -n packetdrill ${ipv6_args[@]} $(basename $script) > /dev/null \ + && ktap_test_pass "ipv6" || ktap_test_fail "ipv6" + +ktap_finished diff --git a/tools/testing/selftests/net/packetdrill/set_sysctls.py b/tools/testing/selftests/net/packetdrill/set_sysctls.py new file mode 100755 index 000000000000..5ddf456ae973 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/set_sysctls.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +"""Sets sysctl values and writes a file that restores them. + +The arguments are of the form "<proc-file>=<val>" separated by spaces. +The program first reads the current value of the proc-file and creates +a shell script named "/tmp/sysctl_restore_${PACKETDRILL_PID}.sh" which +restores the values when executed. It then sets the new values. + +PACKETDRILL_PID is set by packetdrill to the pid of itself, so a .pkt +file could restore sysctls by running `/tmp/sysctl_restore_${PPID}.sh` +at the end. +""" + +import os +import subprocess +import sys + +filename = '/tmp/sysctl_restore_%s.sh' % os.environ['PACKETDRILL_PID'] + +# Open file for restoring sysctl values +restore_file = open(filename, 'w') +print('#!/bin/bash', file=restore_file) + +for a in sys.argv[1:]: + sysctl = a.split('=') + # sysctl[0] contains the proc-file name, sysctl[1] the new value + + # read current value and add restore command to file + cur_val = subprocess.check_output(['cat', sysctl[0]], universal_newlines=True) + print('echo "%s" > %s' % (cur_val.strip(), sysctl[0]), file=restore_file) + + # set new value + cmd = 'echo "%s" > %s' % (sysctl[1], sysctl[0]) + os.system(cmd) + +os.system('chmod u+x %s' % filename) diff --git a/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt new file mode 100644 index 000000000000..df49c67645ac --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP_INQ and TCP_CM_INQ on the client side. +`./defaults.sh +` + +// Create a socket and set it to non-blocking. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +// Connect to the server and enable TCP_INQ. + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 setsockopt(3, SOL_TCP, TCP_INQ, [1], 4) = 0 + + +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> + +.01 < S. 0:0(0) ack 1 win 5792 <mss 1460,sackOK,TS val 700 ecr 100,nop,wscale 7> + +0 > . 1:1(0) ack 1 <nop,nop,TS val 200 ecr 700> + +// Now we have 10K of data ready on the socket. + +0 < . 1:10001(10000) ack 1 win 514 + +0 > . 1:1(0) ack 10001 <nop,nop,TS val 200 ecr 700> + +// We read 1K and we should have 9K ready to read. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{..., 1000}], + msg_flags=0, + msg_control=[{cmsg_level=SOL_TCP, + cmsg_type=TCP_CM_INQ, + cmsg_data=9000}]}, 0) = 1000 +// We read 9K and we should have no further data ready to read. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{..., 9000}], + msg_flags=0, + msg_control=[{cmsg_level=SOL_TCP, + cmsg_type=TCP_CM_INQ, + cmsg_data=0}]}, 0) = 9000 + +// Server sends more data and closes the connections. + +0 < F. 10001:20001(10000) ack 1 win 514 + +0 > . 1:1(0) ack 20002 <nop,nop,TS val 200 ecr 700> + +// We read 10K and we should have one "fake" byte because the connection is +// closed. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{..., 10000}], + msg_flags=0, + msg_control=[{cmsg_level=SOL_TCP, + cmsg_type=TCP_CM_INQ, + cmsg_data=1}]}, 0) = 10000 +// Now, receive EOF. + +0 read(3, ..., 2000) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt b/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt new file mode 100644 index 000000000000..04a5e2590c62 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP_INQ and TCP_CM_INQ on the server side. +`./defaults.sh +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 514 + +// Accept the connection and enable TCP_INQ. + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_TCP, TCP_INQ, [1], 4) = 0 + +// Now we have 10K of data ready on the socket. + +0 < . 1:10001(10000) ack 1 win 514 + +0 > . 1:1(0) ack 10001 + +// We read 2K and we should have 8K ready to read. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 2000}], + msg_flags=0, + msg_control=[{cmsg_level=SOL_TCP, + cmsg_type=TCP_CM_INQ, + cmsg_data=8000}]}, 0) = 2000 +// We read 8K and we should have no further data ready to read. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 8000}], + msg_flags=0, + msg_control=[{cmsg_level=SOL_TCP, + cmsg_type=TCP_CM_INQ, + cmsg_data=0}]}, 0) = 8000 +// Client sends more data and closes the connections. + +0 < F. 10001:20001(10000) ack 1 win 514 + +0 > . 1:1(0) ack 20002 + +// We read 10K and we should have one "fake" byte because the connection is +// closed. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 10000}], + msg_flags=0, + msg_control=[{cmsg_level=SOL_TCP, + cmsg_type=TCP_CM_INQ, + cmsg_data=1}]}, 0) = 10000 +// Now, receive error. + +0 read(3, ..., 2000) = -1 ENOTCONN (Transport endpoint is not connected) diff --git a/tools/testing/selftests/net/packetdrill/tcp_md5_md5-only-on-client-ack.pkt b/tools/testing/selftests/net/packetdrill/tcp_md5_md5-only-on-client-ack.pkt new file mode 100644 index 000000000000..25dfef95d3f8 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_md5_md5-only-on-client-ack.pkt @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test what happens when client does not provide MD5 on SYN, +// but then does on the ACK that completes the three-way handshake. + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> +// Ooh, weird: client provides MD5 option on the ACK: + +.01 < . 1:1(0) ack 1 win 514 <md5 000102030405060708090a0b0c0d0e0f,nop,nop> + +.01 < . 1:1(0) ack 1 win 514 <md5 000102030405060708090a0b0c0d0e0f,nop,nop> + +// The TCP listener refcount should be 2, but on buggy kernels it can be 0: + +0 `grep " 0A " /proc/net/tcp /proc/net/tcp6 | grep ":1F90"` + +// Now here comes the legit ACK: + +.01 < . 1:1(0) ack 1 win 514 + +// Make sure the connection is OK: + +0 accept(3, ..., ...) = 4 + + +.01 write(4, ..., 1000) = 1000 diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-1pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-1pkt.pkt new file mode 100644 index 000000000000..795c476d222d --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-1pkt.pkt @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start when not application-limited, so that +// the cwnd continues to grow. +// In this variant, the receiver ACKs every packet. + +// Set up config. To keep things simple, disable the +// mechanism that defers sending in order to send bigger TSO packets. +`./defaults.sh +sysctl -q net.ipv4.tcp_tso_win_divisor=100` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0 + + +0 write(4, ..., 30000) = 30000 + +0 > P. 1:10001(10000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + ++.105 < . 1:1(0) ack 1001 win 257 + +0 > P. 10001:12001(2000) ack 1 + + +0 < . 1:1(0) ack 2001 win 257 + +0 > P. 12001:14001(2000) ack 1 + ++.005 < . 1:1(0) ack 3001 win 257 + +0 > P. 14001:16001(2000) ack 1 + + +0 < . 1:1(0) ack 4001 win 257 + +0 > P. 16001:18001(2000) ack 1 + ++.005 < . 1:1(0) ack 5001 win 257 + +0 > P. 18001:20001(2000) ack 1 + + +0 < . 1:1(0) ack 6001 win 257 + +0 > P. 20001:22001(2000) ack 1 + ++.005 < . 1:1(0) ack 7001 win 257 + +0 > P. 22001:24001(2000) ack 1 + + +0 < . 1:1(0) ack 8001 win 257 + +0 > P. 24001:26001(2000) ack 1 + ++.005 < . 1:1(0) ack 9001 win 257 + +0 > P. 26001:28001(2000) ack 1 + + +0 < . 1:1(0) ack 10001 win 257 + +0 > P. 28001:30001(2000) ack 1 + + +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-5pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-5pkt.pkt new file mode 100644 index 000000000000..9212ae1fd0f2 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-5pkt.pkt @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start when an outstanding flight of packets is +// less than the current cwnd, and not big enough to bump up cwnd. +// +// In this variant, the receiver ACKs every other packet, +// approximating standard delayed ACKs. + +// Set up config. +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +// Only send 5 packets. + +0 write(4, ..., 5000) = 5000 + +0 > P. 1:5001(5000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + + +0 < . 1:1(0) ack 2001 win 257 + +0 %{ assert tcpi_snd_cwnd == 10, 'cwnd=%d' % tcpi_snd_cwnd }% + + +0 < . 1:1(0) ack 4001 win 257 + +0 %{ assert tcpi_snd_cwnd == 10, 'cwnd=%d' % tcpi_snd_cwnd }% + + +0 < . 1:1(0) ack 5001 win 257 + +0 %{ assert tcpi_snd_cwnd == 10, 'cwnd=%d' % tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-6pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-6pkt.pkt new file mode 100644 index 000000000000..416c901ddf51 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-6pkt.pkt @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start when an outstanding flight of packets is +// less than the current cwnd, but still big enough that in slow +// start we want to increase our cwnd a little. +// +// In this variant, the receiver ACKs every other packet, +// approximating standard delayed ACKs. + +// Set up config. +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +// Only send 6 packets. + +0 write(4, ..., 6000) = 6000 + +0 > P. 1:6001(6000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + + +0 < . 1:1(0) ack 2001 win 257 + +0 %{ assert tcpi_snd_cwnd == 12, 'cwnd=%d' % tcpi_snd_cwnd }% + + +0 < . 1:1(0) ack 4001 win 257 + +0 %{ assert tcpi_snd_cwnd == 12, 'cwnd=%d' % tcpi_snd_cwnd }% + + +0 < . 1:1(0) ack 6001 win 257 + +0 %{ assert tcpi_snd_cwnd == 12, 'cwnd=%d' % tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt.pkt new file mode 100644 index 000000000000..a894b7d4559c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt.pkt @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start when not application-limited, so that +// the cwnd continues to grow. +// In this variant, the receiver ACKs every other packet, +// approximating standard delayed ACKs. + +// Set up config. To keep things simple, disable the +// mechanism that defers sending in order to send bigger TSO packets. +`./defaults.sh +sysctl -q net.ipv4.tcp_tso_win_divisor=100` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0 + + +0 write(4, ..., 30000) = 30000 + +0 > P. 1:10001(10000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + ++.105 < . 1:1(0) ack 2001 win 257 + +0 > P. 10001:14001(4000) ack 1 + ++.005 < . 1:1(0) ack 4001 win 257 + +0 > P. 14001:18001(4000) ack 1 + ++.005 < . 1:1(0) ack 6001 win 257 + +0 > P. 18001:22001(4000) ack 1 + ++.005 < . 1:1(0) ack 8001 win 257 + +0 > P. 22001:26001(4000) ack 1 + ++.005 < . 1:1(0) ack 10001 win 257 + +0 > P. 26001:30001(4000) ack 1 + + +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-4pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-4pkt.pkt new file mode 100644 index 000000000000..065fae9e9abd --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-4pkt.pkt @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start when not application-limited, so that +// the cwnd continues to grow. +// In this variant, the receiver sends one ACK per 4 packets. + +// Set up config. To keep things simple, disable the +// mechanism that defers sending in order to send bigger TSO packets. +`./defaults.sh +sysctl -q net.ipv4.tcp_tso_win_divisor=100` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0 + + +0 write(4, ..., 30000) = 30000 + +0 > P. 1:10001(10000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + + +.11 < . 1:1(0) ack 4001 win 257 + +0 > P. 10001:18001(8000) ack 1 + + +.01 < . 1:1(0) ack 8001 win 257 + +0 > P. 18001:26001(8000) ack 1 + ++.005 < . 1:1(0) ack 10001 win 257 + +0 > P. 26001:30001(4000) ack 1 + + +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-idle.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-idle.pkt new file mode 100644 index 000000000000..11b213be1138 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-idle.pkt @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start after idle +// This test expects tso size to be at least initial cwnd * mss + +`./defaults.sh +./set_sysctls.py /proc/sys/net/ipv4/tcp_slow_start_after_idle=1 \ + /proc/sys/net/ipv4/tcp_min_tso_segs=10` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 65535 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 511 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0 + + +0 write(4, ..., 26000) = 26000 + +0 > P. 1:5001(5000) ack 1 + +0 > P. 5001:10001(5000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + + +.1 < . 1:1(0) ack 10001 win 511 + +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }% + +0 > P. 10001:20001(10000) ack 1 + +0 > P. 20001:26001(6000) ack 1 + + +.1 < . 1:1(0) ack 26001 win 511 + +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }% + + +2 write(4, ..., 20000) = 20000 +// If slow start after idle works properly, we should send 5 MSS here (cwnd/2) + +0 > P. 26001:31001(5000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + +// Reset sysctls +`/tmp/sysctl_restore_${PPID}.sh` diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-win-update.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-win-update.pkt new file mode 100644 index 000000000000..577ed8c8852c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-win-update.pkt @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start after window update +// This test expects tso size to be at least initial cwnd * mss + +`./defaults.sh +./set_sysctls.py /proc/sys/net/ipv4/tcp_slow_start_after_idle=1 \ + /proc/sys/net/ipv4/tcp_min_tso_segs=10` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 65535 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 511 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0 + + +0 write(4, ..., 26000) = 26000 + +0 > P. 1:5001(5000) ack 1 + +0 > P. 5001:10001(5000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + + +.1 < . 1:1(0) ack 10001 win 511 + +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }% + +0 > P. 10001:20001(10000) ack 1 + +0 > P. 20001:26001(6000) ack 1 + + +.1 < . 1:1(0) ack 26001 win 0 + +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }% + + +0 write(4, ..., 20000) = 20000 +// 1st win0 probe ++.3~+.310 > . 26000:26000(0) ack 1 + +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }% + +// 2nd win0 probe ++.6~+.620 > . 26000:26000(0) ack 1 + +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }% + +// 3rd win0 probe ++1.2~+1.240 > . 26000:26000(0) ack 1 + +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }% + + +.9 < . 1:1(0) ack 26001 win 511 + +0 > P. 26001:31001(5000) ack 1 + +// Reset sysctls +`/tmp/sysctl_restore_${PPID}.sh` diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited-9-packets-out.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited-9-packets-out.pkt new file mode 100644 index 000000000000..869f32c35a2a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited-9-packets-out.pkt @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start when application-limited: in this case, +// with IW10, if we don't fully use our cwnd but instead +// send just 9 packets, then cwnd should grow to twice that +// value, or 18 packets. + +// Set up config. +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0 + + +0 write(4, ..., 9000) = 9000 + +0 > P. 1:9001(9000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + ++.105 < . 1:1(0) ack 2001 win 257 + +0 %{ assert tcpi_snd_cwnd == 12, tcpi_snd_cwnd }% + ++.005 < . 1:1(0) ack 4001 win 257 + +0 %{ assert tcpi_snd_cwnd == 14, tcpi_snd_cwnd }% + ++.005 < . 1:1(0) ack 6001 win 257 + +0 %{ assert tcpi_snd_cwnd == 16, tcpi_snd_cwnd }% + ++.005 < . 1:1(0) ack 8001 win 257 + +0 %{ assert tcpi_snd_cwnd == 18, tcpi_snd_cwnd }% + ++.005 < . 1:1(0) ack 9001 win 257 + +0 %{ assert tcpi_snd_cwnd == 18, tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited.pkt new file mode 100644 index 000000000000..0f77b7955db6 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited.pkt @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start when application-limited: in this case, +// with IW10, if we send exactly 10 packets then cwnd should grow to 20. + +// Set up config. +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0 + + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + ++.105 < . 1:1(0) ack 2001 win 257 + +0 %{ assert tcpi_snd_cwnd == 12, tcpi_snd_cwnd }% + ++.005 < . 1:1(0) ack 4001 win 257 + +0 %{ assert tcpi_snd_cwnd == 14, tcpi_snd_cwnd }% + ++.005 < . 1:1(0) ack 6001 win 257 + +0 %{ assert tcpi_snd_cwnd == 16, tcpi_snd_cwnd }% + ++.005 < . 1:1(0) ack 8001 win 257 + +0 %{ assert tcpi_snd_cwnd == 18, tcpi_snd_cwnd }% + ++.005 < . 1:1(0) ack 10001 win 257 + +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-fq-ack-per-2pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-fq-ack-per-2pkt.pkt new file mode 100644 index 000000000000..7e9c83d617c2 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-fq-ack-per-2pkt.pkt @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start when not application-limited, so that +// the cwnd continues to grow, even if TSQ triggers. +// In this variant, the receiver ACKs every other packet, +// approximating standard delayed ACKs. + +// Note we use FQ/pacing to check if TCP Small Queues is not hurting + +`./defaults.sh +tc qdisc replace dev tun0 root fq +sysctl -q net/ipv4/tcp_pacing_ss_ratio=200 +sysctl -e -q net.ipv4.tcp_min_tso_segs=2` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1460,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 500 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0 + + +0 write(4, ..., 40000) = 40000 +// This might change if we cook the initial packet with 10 MSS. + +0 > P. 1:2921(2920) ack 1 + +0 > P. 2921:5841(2920) ack 1 + +0 > P. 5841:8761(2920) ack 1 + +0 > P. 8761:11681(2920) ack 1 + +0 > P. 11681:14601(2920) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + ++.105 < . 1:1(0) ack 2921 win 500 + +0 %{ assert tcpi_snd_cwnd == 12, tcpi_snd_cwnd }% + +// Note: after this commit : "net_sched: sch_fq: account for schedule/timers drifts" +// FQ notices that this packet missed the 'time to send next packet' computed +// when prior packet (11681:14601(2920)) was sent. +// So FQ will allow following packet to be sent a bit earlier (quantum/2) +// (FQ commit allows an application/cwnd limited flow to get at most quantum/2 extra credit) + +0 > P. 14601:17521(2920) ack 1 + ++.003 < . 1:1(0) ack 5841 win 500 + +0 %{ assert tcpi_snd_cwnd == 14, tcpi_snd_cwnd }% + ++.001 > P. 17521:20441(2920) ack 1 + ++.001 < . 1:1(0) ack 8761 win 500 + +0 %{ assert tcpi_snd_cwnd == 16, tcpi_snd_cwnd }% + +// remaining packets are delivered at a constant rate. ++.007 > P. 20441:23361(2920) ack 1 + ++.002 < . 1:1(0) ack 11681 win 500 + +0 %{ assert tcpi_snd_cwnd == 18, tcpi_snd_cwnd }% ++.001 < . 1:1(0) ack 14601 win 500 + ++.004 > P. 23361:26281(2920) ack 1 + ++.007 > P. 26281:29201(2920) ack 1 + + +0 %{ assert tcpi_snd_cwnd == 20, 'cwnd=%d' % tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt new file mode 100644 index 000000000000..a82c8899d36b --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0 +// basic zerocopy test: +// +// send a packet with MSG_ZEROCOPY and receive the notification ID +// repeat and verify IDs are consecutive + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 1:4001(4000) ack 1 + +0 < . 1:1(0) ack 4001 win 257 + + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=0}} + ]}, MSG_ERRQUEUE) = 0 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 4001:8001(4000) ack 1 + +0 < . 1:1(0) ack 8001 win 257 + + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=1, + ee_data=1}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt new file mode 100644 index 000000000000..c01915e7f4a1 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 +// batch zerocopy test: +// +// send multiple packets, then read one range of all notifications. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_MARK, [666], 4) = 0 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 1:4001(4000) ack 1 + +0 < . 1:1(0) ack 4001 win 257 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 4001:8001(4000) ack 1 + +0 < . 1:1(0) ack 8001 win 257 + + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=1}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt new file mode 100644 index 000000000000..6509882932e9 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +// Minimal client-side zerocopy test + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0...0 connect(4, ..., ...) = 0 + + +0 > S 0:0(0) <mss 1460,sackOK,TS val 0 ecr 0,nop,wscale 8> + +0 < S. 0:0(0) ack 1 win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > . 1:1(0) ack 1 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 1:4001(4000) ack 1 + +0 < . 1:1(0) ack 4001 win 257 + + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=0}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt new file mode 100644 index 000000000000..2cd78755cb2a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 +// send with MSG_ZEROCOPY on a non-established socket +// +// verify that a send in state TCP_CLOSE correctly aborts the zerocopy +// operation, specifically it does not increment the zerocopy counter. +// +// First send on a closed socket and wait for (absent) notification. +// Then connect and send and verify that notification nr. is zero. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = -1 EPIPE (Broken pipe) + + +0.1 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[]}, MSG_ERRQUEUE) = -1 EAGAIN (Resource temporarily unavailable) + + +0...0 connect(4, ..., ...) = 0 + + +0 > S 0:0(0) <mss 1460,sackOK,TS val 0 ecr 0,nop,wscale 8> + +0 < S. 0:0(0) ack 1 win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > . 1:1(0) ack 1 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 1:4001(4000) ack 1 + +0 < . 1:1(0) ack 4001 win 257 + + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=0}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt new file mode 100644 index 000000000000..7671c20e01cf --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 +// epoll zerocopy test: +// +// EPOLLERR is known to be not edge-triggered unlike EPOLLIN and EPOLLOUT but +// it is not level-triggered either. +// +// fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR +// is correctly fired only once, when EPOLLET is set. send another packet with +// MSG_ZEROCOPY. confirm that EPOLLERR is correctly fired again only once. +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 epoll_create(1) = 5 + +0 epoll_ctl(5, EPOLL_CTL_ADD, 4, {events=EPOLLOUT|EPOLLET, fd=4}) = 0 + +0 epoll_wait(5, {events=EPOLLOUT, fd=4}, 1, 0) = 1 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 1:4001(4000) ack 1 + +0 < . 1:1(0) ack 4001 win 257 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 4001:8001(4000) ack 1 + +0 < . 1:1(0) ack 8001 win 257 + +// receive only one EPOLLERR for the two sends above. + +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1 + +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 8001:12001(4000) ack 1 + +0 < . 1:1(0) ack 12001 win 257 + +// receive only one EPOLLERR for the third send above. + +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1 + +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0 + + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=2}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt new file mode 100644 index 000000000000..fadc480fdb7f --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +// epoll zerocopy test: +// +// EPOLLERR is known to be not edge-triggered unlike EPOLLIN and EPOLLOUT but +// it is not level-triggered either. this tests verify that the same behavior is +// maintained when we have EPOLLEXCLUSIVE. +// +// fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR +// is correctly fired only once, when EPOLLET is set. send another packet with +// MSG_ZEROCOPY. confirm that EPOLLERR is correctly fired again only once. +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 epoll_create(1) = 5 + +0 epoll_ctl(5, EPOLL_CTL_ADD, 4, + {events=EPOLLOUT|EPOLLET|EPOLLEXCLUSIVE, fd=4}) = 0 + +0 epoll_wait(5, {events=EPOLLOUT, fd=4}, 1, 0) = 1 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 1:4001(4000) ack 1 + +0 < . 1:1(0) ack 4001 win 257 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 4001:8001(4000) ack 1 + +0 < . 1:1(0) ack 8001 win 257 + +// receive only one EPOLLERR for the two sends above. + +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1 + +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 8001:12001(4000) ack 1 + +0 < . 1:1(0) ack 12001 win 257 + +// receive only one EPOLLERR for the third send above. + +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1 + +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0 + + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=2}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt new file mode 100644 index 000000000000..5bfa0d1d2f4a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +// epoll zerocopy test: +// +// This is a test to confirm that EPOLLERR is only fired once for an FD when +// EPOLLONESHOT is set. +// +// fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR +// is correctly fired only once, when EPOLLONESHOT is set. send another packet +// with MSG_ZEROCOPY. confirm that EPOLLERR is not fired. Rearm the FD and +// confirm that EPOLLERR is correctly set. +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 epoll_create(1) = 5 + +0 epoll_ctl(5, EPOLL_CTL_ADD, 4, + {events=EPOLLOUT|EPOLLET|EPOLLONESHOT, fd=4}) = 0 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 1:4001(4000) ack 1 + +0 < . 1:1(0) ack 4001 win 257 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 4001:8001(4000) ack 1 + +0 < . 1:1(0) ack 8001 win 257 + +// receive only one EPOLLERR for the two sends above. + +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1 + +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 8001:12001(4000) ack 1 + +0 < . 1:1(0) ack 12001 win 257 + +// receive no EPOLLERR for the third send above. + +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0 + +// rearm the FD and verify the EPOLLERR is fired again. + +0 epoll_ctl(5, EPOLL_CTL_MOD, 4, {events=EPOLLOUT|EPOLLONESHOT, fd=4}) = 0 + +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1 + +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0 + + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=2}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt new file mode 100644 index 000000000000..4a73bbf46961 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +// Fastopen client zerocopy test: +// +// send data with MSG_FASTOPEN | MSG_ZEROCOPY and verify that the +// kernel returns the notification ID. +// +// Fastopen requires a stored cookie. Create two sockets. The first +// one will have no data in the initial send. On return 0 the +// zerocopy notification counter is not incremented. Verify this too. + +`./defaults.sh` + +// Send a FastOpen request, no cookie yet so no data in SYN + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 sendto(3, ..., 500, MSG_FASTOPEN|MSG_ZEROCOPY, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) <mss 1460,sackOK,TS val 1000 ecr 0,nop,wscale 8,FO,nop,nop> + +.01 < S. 123:123(0) ack 1 win 14600 <mss 940,TS val 2000 ecr 1000,sackOK,nop,wscale 6, FO abcd1234,nop,nop> + +0 > . 1:1(0) ack 1 <nop,nop,TS val 1001 ecr 2000> + +// Read from error queue: no zerocopy notification + +1 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[]}, MSG_ERRQUEUE) = -1 EAGAIN (Resource temporarily unavailable) + + +.01 close(3) = 0 + +0 > F. 1:1(0) ack 1 <nop,nop,TS val 1002 ecr 2000> + +.01 < F. 1:1(0) ack 2 win 92 <nop,nop,TS val 2001 ecr 1002> + +0 > . 2:2(0) ack 2 <nop,nop,TS val 1003 ecr 2001> + +// Send another Fastopen request, now SYN will have data + +.07 `sysctl -q net.ipv4.tcp_timestamps=0` + +.1 socket(..., SOCK_STREAM, IPPROTO_TCP) = 5 + +0 fcntl(5, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 setsockopt(5, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 sendto(5, ..., 500, MSG_FASTOPEN|MSG_ZEROCOPY, ..., ...) = 500 + +0 > S 0:500(500) <mss 1460,nop,nop,sackOK,nop,wscale 8,FO abcd1234,nop,nop> + +.05 < S. 5678:5678(0) ack 501 win 14600 <mss 1460,nop,nop,sackOK,nop,wscale 6> + +0 > . 501:501(0) ack 1 + +// Read from error queue: now has first zerocopy notification + +0.5 recvmsg(5, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=0}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt new file mode 100644 index 000000000000..36086c5877ce --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 +// Fastopen server zerocopy test: +// +// send data with MSG_FASTOPEN | MSG_ZEROCOPY and verify that the +// kernel returns the notification ID. + +`./defaults.sh + ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen=0x207` + +// Set up a TFO server listening socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +.1 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [2], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +// Client sends a SYN with data. + +.1 < S 0:1000(1000) win 32792 <mss 1460,sackOK,nop,nop> + +0 > S. 0:0(0) ack 1001 <mss 1460,nop,nop,sackOK> + +// Server accepts and replies with data. ++.005 accept(3, ..., ...) = 4 + +0 read(4, ..., 1024) = 1000 + +0 sendto(4, ..., 1000, MSG_ZEROCOPY, ..., ...) = 1000 + +0 > P. 1:1001(1000) ack 1001 + +.05 < . 1001:1001(0) ack 1001 win 32792 + +// Read from error queue: now has first zerocopy notification + +0.1 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=0}} + ]}, MSG_ERRQUEUE) = 0 + +`/tmp/sysctl_restore_${PPID}.sh` diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt new file mode 100644 index 000000000000..672f817faca0 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: GPL-2.0 +// tcp_MAX_SKB_FRAGS test +// +// Verify that sending an iovec of tcp_MAX_SKB_FRAGS + 1 elements will +// 1) fit in a single packet without zerocopy +// 2) spill over into a second packet with zerocopy, +// because each iovec element becomes a frag +// 3) the PSH bit is set on an skb when it runs out of fragments + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + + // Each pinned zerocopy page is fully accounted to skb->truesize. + // This test generates a worst case packet with each frag storing + // one byte, but increasing truesize with a page (64KB on PPC). + +0 setsockopt(3, SOL_SOCKET, SO_SNDBUF, [2000000], 4) = 0 + + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + // send an iov of 18 elements: just becomes a linear skb + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(18)=[{..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}], + msg_flags=0}, 0) = 18 + + +0 > P. 1:19(18) ack 1 + +0 < . 1:1(0) ack 19 win 257 + + // send a zerocopy iov of 18 elements: + +1 sendmsg(4, {msg_name(...)=..., + msg_iov(18)=[{..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}], + msg_flags=0}, MSG_ZEROCOPY) = 18 + + // verify that it is split in one skb of 17 frags + 1 of 1 frag + // verify that both have the PSH bit set + +0 > P. 19:36(17) ack 1 + +0 < . 1:1(0) ack 36 win 257 + + +0 > P. 36:37(1) ack 1 + +0 < . 1:1(0) ack 37 win 257 + + +1 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=0}} + ]}, MSG_ERRQUEUE) = 0 + + // send a zerocopy iov of 64 elements: + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(64)=[{..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}], + msg_flags=0}, MSG_ZEROCOPY) = 64 + + // verify that it is split in skbs with 17 frags + +0 > P. 37:54(17) ack 1 + +0 < . 1:1(0) ack 54 win 257 + + +0 > P. 54:71(17) ack 1 + +0 < . 1:1(0) ack 71 win 257 + + +0 > P. 71:88(17) ack 1 + +0 < . 1:1(0) ack 88 win 257 + + +0 > P. 88:101(13) ack 1 + +0 < . 1:1(0) ack 101 win 257 + + +1 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=1, + ee_data=1}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt new file mode 100644 index 000000000000..a9a1ac0aea4f --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +// small packet zerocopy test: +// +// verify that SO_EE_CODE_ZEROCOPY_COPIED is set on zerocopy +// packets of all sizes, including the smallest payload, 1B. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + // send 1B + +0 send(4, ..., 1, MSG_ZEROCOPY) = 1 + +0 > P. 1:2(1) ack 1 + +0 < . 1:1(0) ack 2 win 257 + + +1 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=0}} + ]}, MSG_ERRQUEUE) = 0 + + // send 1B again + +0 send(4, ..., 1, MSG_ZEROCOPY) = 1 + +0 > P. 2:3(1) ack 1 + +0 < . 1:1(0) ack 3 win 257 + + +1 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=1, + ee_data=1}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 5175c0c83a23..569bce8b6383 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -681,13 +681,7 @@ setup_xfrm() { } setup_nettest_xfrm() { - if ! which nettest >/dev/null; then - PATH=$PWD:$PATH - if ! which nettest >/dev/null; then - echo "'nettest' command not found; skipping tests" - return 1 - fi - fi + check_gen_prog "nettest" [ ${1} -eq 6 ] && proto="-6" || proto="" port=${2} @@ -1447,7 +1441,7 @@ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() { size=$(du -sb $tmpoutfile) size=${size%%/tmp/*} - [ $size -ne 1048576 ] && err "File size $size mismatches exepcted value in locally bridged vxlan test" && return 1 + [ $size -ne 1048576 ] && err "File size $size mismatches expected value in locally bridged vxlan test" && return 1 done rm -f "$tmpoutfile" diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c index 1a736f700be4..4f31e92ebd96 100644 --- a/tools/testing/selftests/net/psock_fanout.c +++ b/tools/testing/selftests/net/psock_fanout.c @@ -165,9 +165,9 @@ static void sock_fanout_set_ebpf(int fd) attr.insns = (unsigned long) prog; attr.insn_cnt = ARRAY_SIZE(prog); attr.license = (unsigned long) "GPL"; - attr.log_buf = (unsigned long) log_buf, - attr.log_size = sizeof(log_buf), - attr.log_level = 1, + attr.log_buf = (unsigned long) log_buf; + attr.log_size = sizeof(log_buf); + attr.log_level = 1; pfd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); if (pfd < 0) { diff --git a/tools/testing/selftests/net/rds/Makefile b/tools/testing/selftests/net/rds/Makefile new file mode 100644 index 000000000000..da9714bc7aad --- /dev/null +++ b/tools/testing/selftests/net/rds/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0 + +all: + @echo mk_build_dir="$(shell pwd)" > include.sh + +TEST_PROGS := run.sh \ + include.sh \ + test.py + +EXTRA_CLEAN := /tmp/rds_logs + +include ../../lib.mk diff --git a/tools/testing/selftests/net/rds/README.txt b/tools/testing/selftests/net/rds/README.txt new file mode 100644 index 000000000000..cbde2951ab13 --- /dev/null +++ b/tools/testing/selftests/net/rds/README.txt @@ -0,0 +1,41 @@ +RDS self-tests +============== + +These scripts provide a coverage test for RDS-TCP by creating two +network namespaces and running rds packets between them. A loopback +network is provisioned with optional probability of packet loss or +corruption. A workload of 50000 hashes, each 64 characters in size, +are passed over an RDS socket on this test network. A passing test means +the RDS-TCP stack was able to recover properly. The provided config.sh +can be used to compile the kernel with the necessary gcov options. The +kernel may optionally be configured to omit the coverage report as well. + +USAGE: + run.sh [-d logdir] [-l packet_loss] [-c packet_corruption] + [-u packet_duplcate] + +OPTIONS: + -d Log directory. Defaults to tools/testing/selftests/net/rds/rds_logs + + -l Simulates a percentage of packet loss + + -c Simulates a percentage of packet corruption + + -u Simulates a percentage of packet duplication. + +EXAMPLE: + + # Create a suitable gcov enabled .config + tools/testing/selftests/net/rds/config.sh -g + + # Alternatly create a gcov disabled .config + tools/testing/selftests/net/rds/config.sh + + # build the kernel + vng --build --config tools/testing/selftests/net/config + + # launch the tests in a VM + vng -v --rwdir ./ --run . --user root --cpus 4 -- \ + "export PYTHONPATH=tools/testing/selftests/net/; tools/testing/selftests/net/rds/run.sh" + +An HTML coverage report will be output in tools/testing/selftests/net/rds/rds_logs/coverage/. diff --git a/tools/testing/selftests/net/rds/config.sh b/tools/testing/selftests/net/rds/config.sh new file mode 100755 index 000000000000..791c8dbe1095 --- /dev/null +++ b/tools/testing/selftests/net/rds/config.sh @@ -0,0 +1,53 @@ +#! /bin/bash +# SPDX-License-Identifier: GPL-2.0 + +set -e +set -u +set -x + +unset KBUILD_OUTPUT + +GENERATE_GCOV_REPORT=0 +while getopts "g" opt; do + case ${opt} in + g) + GENERATE_GCOV_REPORT=1 + ;; + :) + echo "USAGE: config.sh [-g]" + exit 1 + ;; + ?) + echo "Invalid option: -${OPTARG}." + exit 1 + ;; + esac +done + +CONF_FILE="tools/testing/selftests/net/config" + +# no modules +scripts/config --file "$CONF_FILE" --disable CONFIG_MODULES + +# enable RDS +scripts/config --file "$CONF_FILE" --enable CONFIG_RDS +scripts/config --file "$CONF_FILE" --enable CONFIG_RDS_TCP + +if [ "$GENERATE_GCOV_REPORT" -eq 1 ]; then + # instrument RDS and only RDS + scripts/config --file "$CONF_FILE" --enable CONFIG_GCOV_KERNEL + scripts/config --file "$CONF_FILE" --disable GCOV_PROFILE_ALL + scripts/config --file "$CONF_FILE" --enable GCOV_PROFILE_RDS +else + scripts/config --file "$CONF_FILE" --disable CONFIG_GCOV_KERNEL + scripts/config --file "$CONF_FILE" --disable GCOV_PROFILE_ALL + scripts/config --file "$CONF_FILE" --disable GCOV_PROFILE_RDS +fi + +# need network namespaces to run tests with veth network interfaces +scripts/config --file "$CONF_FILE" --enable CONFIG_NET_NS +scripts/config --file "$CONF_FILE" --enable CONFIG_VETH + +# simulate packet loss +scripts/config --file "$CONF_FILE" --enable CONFIG_NET_SCH_NETEM + diff --git a/tools/testing/selftests/net/rds/run.sh b/tools/testing/selftests/net/rds/run.sh new file mode 100755 index 000000000000..8aee244f582a --- /dev/null +++ b/tools/testing/selftests/net/rds/run.sh @@ -0,0 +1,224 @@ +#! /bin/bash +# SPDX-License-Identifier: GPL-2.0 + +set -e +set -u + +unset KBUILD_OUTPUT + +current_dir="$(realpath "$(dirname "$0")")" +build_dir="$current_dir" + +build_include="$current_dir/include.sh" +if test -f "$build_include"; then + # this include will define "$mk_build_dir" as the location the test was + # built. We will need this if the tests are installed in a location + # other than the kernel source + + source "$build_include" + build_dir="$mk_build_dir" +fi + +# This test requires kernel source and the *.gcda data therein +# Locate the top level of the kernel source, and the net/rds +# subfolder with the appropriate *.gcno object files +ksrc_dir="$(realpath "$build_dir"/../../../../../)" +kconfig="$ksrc_dir/.config" +obj_dir="$ksrc_dir/net/rds" + +GCOV_CMD=gcov + +#check to see if the host has the required packages to generate a gcov report +check_gcov_env() +{ + if ! which "$GCOV_CMD" > /dev/null 2>&1; then + echo "Warning: Could not find gcov. " + GENERATE_GCOV_REPORT=0 + return + fi + + # the gcov version must match the gcc version + GCC_VER=$(gcc -dumpfullversion) + GCOV_VER=$($GCOV_CMD -v | grep gcov | awk '{print $3}'| awk 'BEGIN {FS="-"}{print $1}') + if [ "$GCOV_VER" != "$GCC_VER" ]; then + #attempt to find a matching gcov version + GCOV_CMD=gcov-$(gcc -dumpversion) + + if ! which "$GCOV_CMD" > /dev/null 2>&1; then + echo "Warning: Could not find an appropriate gcov installation. \ + gcov version must match gcc version" + GENERATE_GCOV_REPORT=0 + return + fi + + #recheck version number of found gcov executable + GCOV_VER=$($GCOV_CMD -v | grep gcov | awk '{print $3}'| \ + awk 'BEGIN {FS="-"}{print $1}') + if [ "$GCOV_VER" != "$GCC_VER" ]; then + echo "Warning: Could not find an appropriate gcov installation. \ + gcov version must match gcc version" + GENERATE_GCOV_REPORT=0 + else + echo "Warning: Mismatched gcc and gcov detected. Using $GCOV_CMD" + fi + fi +} + +# Check to see if the kconfig has the required configs to generate a coverage report +check_gcov_conf() +{ + if ! grep -x "CONFIG_GCOV_PROFILE_RDS=y" "$kconfig" > /dev/null 2>&1; then + echo "INFO: CONFIG_GCOV_PROFILE_RDS should be enabled for coverage reports" + GENERATE_GCOV_REPORT=0 + fi + if ! grep -x "CONFIG_GCOV_KERNEL=y" "$kconfig" > /dev/null 2>&1; then + echo "INFO: CONFIG_GCOV_KERNEL should be enabled for coverage reports" + GENERATE_GCOV_REPORT=0 + fi + if grep -x "CONFIG_GCOV_PROFILE_ALL=y" "$kconfig" > /dev/null 2>&1; then + echo "INFO: CONFIG_GCOV_PROFILE_ALL should be disabled for coverage reports" + GENERATE_GCOV_REPORT=0 + fi + + if [ "$GENERATE_GCOV_REPORT" -eq 0 ]; then + echo "To enable gcov reports, please run "\ + "\"tools/testing/selftests/net/rds/config.sh -g\" and rebuild the kernel" + else + # if we have the required kernel configs, proceed to check the environment to + # ensure we have the required gcov packages + check_gcov_env + fi +} + +# Kselftest framework requirement - SKIP code is 4. +check_conf_enabled() { + if ! grep -x "$1=y" "$kconfig" > /dev/null 2>&1; then + echo "selftests: [SKIP] This test requires $1 enabled" + echo "Please run tools/testing/selftests/net/rds/config.sh and rebuild the kernel" + exit 4 + fi +} +check_conf_disabled() { + if grep -x "$1=y" "$kconfig" > /dev/null 2>&1; then + echo "selftests: [SKIP] This test requires $1 disabled" + echo "Please run tools/testing/selftests/net/rds/config.sh and rebuild the kernel" + exit 4 + fi +} +check_conf() { + check_conf_enabled CONFIG_NET_SCH_NETEM + check_conf_enabled CONFIG_VETH + check_conf_enabled CONFIG_NET_NS + check_conf_enabled CONFIG_RDS_TCP + check_conf_enabled CONFIG_RDS + check_conf_disabled CONFIG_MODULES +} + +check_env() +{ + if ! test -d "$obj_dir"; then + echo "selftests: [SKIP] This test requires a kernel source tree" + exit 4 + fi + if ! test -e "$kconfig"; then + echo "selftests: [SKIP] This test requires a configured kernel source tree" + exit 4 + fi + if ! which strace > /dev/null 2>&1; then + echo "selftests: [SKIP] Could not run test without strace" + exit 4 + fi + if ! which tcpdump > /dev/null 2>&1; then + echo "selftests: [SKIP] Could not run test without tcpdump" + exit 4 + fi + + if ! which python3 > /dev/null 2>&1; then + echo "selftests: [SKIP] Could not run test without python3" + exit 4 + fi + + python_major=$(python3 -c "import sys; print(sys.version_info[0])") + python_minor=$(python3 -c "import sys; print(sys.version_info[1])") + if [[ python_major -lt 3 || ( python_major -eq 3 && python_minor -lt 9 ) ]] ; then + echo "selftests: [SKIP] Could not run test without at least python3.9" + python3 -V + exit 4 + fi +} + +LOG_DIR="$current_dir"/rds_logs +PLOSS=0 +PCORRUPT=0 +PDUP=0 +GENERATE_GCOV_REPORT=1 +while getopts "d:l:c:u:" opt; do + case ${opt} in + d) + LOG_DIR=${OPTARG} + ;; + l) + PLOSS=${OPTARG} + ;; + c) + PCORRUPT=${OPTARG} + ;; + u) + PDUP=${OPTARG} + ;; + :) + echo "USAGE: run.sh [-d logdir] [-l packet_loss] [-c packet_corruption]" \ + "[-u packet_duplcate] [-g]" + exit 1 + ;; + ?) + echo "Invalid option: -${OPTARG}." + exit 1 + ;; + esac +done + + +check_env +check_conf +check_gcov_conf + + +rm -fr "$LOG_DIR" +TRACE_FILE="${LOG_DIR}/rds-strace.txt" +COVR_DIR="${LOG_DIR}/coverage/" +mkdir -p "$LOG_DIR" +mkdir -p "$COVR_DIR" + +set +e +echo running RDS tests... +echo Traces will be logged to "$TRACE_FILE" +rm -f "$TRACE_FILE" +strace -T -tt -o "$TRACE_FILE" python3 "$(dirname "$0")/test.py" --timeout 400 -d "$LOG_DIR" \ + -l "$PLOSS" -c "$PCORRUPT" -u "$PDUP" + +test_rc=$? +dmesg > "${LOG_DIR}/dmesg.out" + +if [ "$GENERATE_GCOV_REPORT" -eq 1 ]; then + echo saving coverage data... + (set +x; cd /sys/kernel/debug/gcov; find ./* -name '*.gcda' | \ + while read -r f + do + cat < "/sys/kernel/debug/gcov/$f" > "/$f" + done) + + echo running gcovr... + gcovr -s --html-details --gcov-executable "$GCOV_CMD" --gcov-ignore-parse-errors \ + -o "${COVR_DIR}/gcovr" "${ksrc_dir}/net/rds/" +else + echo "Coverage report will be skipped" +fi + +if [ "$test_rc" -eq 0 ]; then + echo "PASS: Test completed successfully" +else + echo "FAIL: Test failed" +fi + +exit "$test_rc" diff --git a/tools/testing/selftests/net/rds/test.py b/tools/testing/selftests/net/rds/test.py new file mode 100644 index 000000000000..e6bb109bcead --- /dev/null +++ b/tools/testing/selftests/net/rds/test.py @@ -0,0 +1,262 @@ +#! /usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import argparse +import ctypes +import errno +import hashlib +import os +import select +import signal +import socket +import subprocess +import sys +import atexit +from pwd import getpwuid +from os import stat +from lib.py import ip + + +libc = ctypes.cdll.LoadLibrary('libc.so.6') +setns = libc.setns + +net0 = 'net0' +net1 = 'net1' + +veth0 = 'veth0' +veth1 = 'veth1' + +# Helper function for creating a socket inside a network namespace. +# We need this because otherwise RDS will detect that the two TCP +# sockets are on the same interface and use the loop transport instead +# of the TCP transport. +def netns_socket(netns, *args): + u0, u1 = socket.socketpair(socket.AF_UNIX, socket.SOCK_SEQPACKET) + + child = os.fork() + if child == 0: + # change network namespace + with open(f'/var/run/netns/{netns}') as f: + try: + ret = setns(f.fileno(), 0) + except IOError as e: + print(e.errno) + print(e) + + # create socket in target namespace + s = socket.socket(*args) + + # send resulting socket to parent + socket.send_fds(u0, [], [s.fileno()]) + + sys.exit(0) + + # receive socket from child + _, s, _, _ = socket.recv_fds(u1, 0, 1) + os.waitpid(child, 0) + u0.close() + u1.close() + return socket.fromfd(s[0], *args) + +def signal_handler(sig, frame): + print('Test timed out') + sys.exit(1) + +#Parse out command line arguments. We take an optional +# timeout parameter and an optional log output folder +parser = argparse.ArgumentParser(description="init script args", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) +parser.add_argument("-d", "--logdir", action="store", + help="directory to store logs", default="/tmp") +parser.add_argument('--timeout', help="timeout to terminate hung test", + type=int, default=0) +parser.add_argument('-l', '--loss', help="Simulate tcp packet loss", + type=int, default=0) +parser.add_argument('-c', '--corruption', help="Simulate tcp packet corruption", + type=int, default=0) +parser.add_argument('-u', '--duplicate', help="Simulate tcp packet duplication", + type=int, default=0) +args = parser.parse_args() +logdir=args.logdir +packet_loss=str(args.loss)+'%' +packet_corruption=str(args.corruption)+'%' +packet_duplicate=str(args.duplicate)+'%' + +ip(f"netns add {net0}") +ip(f"netns add {net1}") +ip(f"link add type veth") + +addrs = [ + # we technically don't need different port numbers, but this will + # help identify traffic in the network analyzer + ('10.0.0.1', 10000), + ('10.0.0.2', 20000), +] + +# move interfaces to separate namespaces so they can no longer be +# bound directly; this prevents rds from switching over from the tcp +# transport to the loop transport. +ip(f"link set {veth0} netns {net0} up") +ip(f"link set {veth1} netns {net1} up") + + + +# add addresses +ip(f"-n {net0} addr add {addrs[0][0]}/32 dev {veth0}") +ip(f"-n {net1} addr add {addrs[1][0]}/32 dev {veth1}") + +# add routes +ip(f"-n {net0} route add {addrs[1][0]}/32 dev {veth0}") +ip(f"-n {net1} route add {addrs[0][0]}/32 dev {veth1}") + +# sanity check that our two interfaces/addresses are correctly set up +# and communicating by doing a single ping +ip(f"netns exec {net0} ping -c 1 {addrs[1][0]}") + +# Start a packet capture on each network +for net in [net0, net1]: + tcpdump_pid = os.fork() + if tcpdump_pid == 0: + pcap = logdir+'/'+net+'.pcap' + subprocess.check_call(['touch', pcap]) + user = getpwuid(stat(pcap).st_uid).pw_name + ip(f"netns exec {net} /usr/sbin/tcpdump -Z {user} -i any -w {pcap}") + sys.exit(0) + +# simulate packet loss, duplication and corruption +for net, iface in [(net0, veth0), (net1, veth1)]: + ip(f"netns exec {net} /usr/sbin/tc qdisc add dev {iface} root netem \ + corrupt {packet_corruption} loss {packet_loss} duplicate \ + {packet_duplicate}") + +# add a timeout +if args.timeout > 0: + signal.alarm(args.timeout) + signal.signal(signal.SIGALRM, signal_handler) + +sockets = [ + netns_socket(net0, socket.AF_RDS, socket.SOCK_SEQPACKET), + netns_socket(net1, socket.AF_RDS, socket.SOCK_SEQPACKET), +] + +for s, addr in zip(sockets, addrs): + s.bind(addr) + s.setblocking(0) + +fileno_to_socket = { + s.fileno(): s for s in sockets +} + +addr_to_socket = { + addr: s for addr, s in zip(addrs, sockets) +} + +socket_to_addr = { + s: addr for addr, s in zip(addrs, sockets) +} + +send_hashes = {} +recv_hashes = {} + +ep = select.epoll() + +for s in sockets: + ep.register(s, select.EPOLLRDNORM) + +n = 50000 +nr_send = 0 +nr_recv = 0 + +while nr_send < n: + # Send as much as we can without blocking + print("sending...", nr_send, nr_recv) + while nr_send < n: + send_data = hashlib.sha256( + f'packet {nr_send}'.encode('utf-8')).hexdigest().encode('utf-8') + + # pseudo-random send/receive pattern + sender = sockets[nr_send % 2] + receiver = sockets[1 - (nr_send % 3) % 2] + + try: + sender.sendto(send_data, socket_to_addr[receiver]) + send_hashes.setdefault((sender.fileno(), receiver.fileno()), + hashlib.sha256()).update(f'<{send_data}>'.encode('utf-8')) + nr_send = nr_send + 1 + except BlockingIOError as e: + break + except OSError as e: + if e.errno in [errno.ENOBUFS, errno.ECONNRESET, errno.EPIPE]: + break + raise + + # Receive as much as we can without blocking + print("receiving...", nr_send, nr_recv) + while nr_recv < nr_send: + for fileno, eventmask in ep.poll(): + receiver = fileno_to_socket[fileno] + + if eventmask & select.EPOLLRDNORM: + while True: + try: + recv_data, address = receiver.recvfrom(1024) + sender = addr_to_socket[address] + recv_hashes.setdefault((sender.fileno(), + receiver.fileno()), hashlib.sha256()).update( + f'<{recv_data}>'.encode('utf-8')) + nr_recv = nr_recv + 1 + except BlockingIOError as e: + break + + # exercise net/rds/tcp.c:rds_tcp_sysctl_reset() + for net in [net0, net1]: + ip(f"netns exec {net} /usr/sbin/sysctl net.rds.tcp.rds_tcp_rcvbuf=10000") + ip(f"netns exec {net} /usr/sbin/sysctl net.rds.tcp.rds_tcp_sndbuf=10000") + +print("done", nr_send, nr_recv) + +# the Python socket module doesn't know these +RDS_INFO_FIRST = 10000 +RDS_INFO_LAST = 10017 + +nr_success = 0 +nr_error = 0 + +for s in sockets: + for optname in range(RDS_INFO_FIRST, RDS_INFO_LAST + 1): + # Sigh, the Python socket module doesn't allow us to pass + # buffer lengths greater than 1024 for some reason. RDS + # wants multiple pages. + try: + s.getsockopt(socket.SOL_RDS, optname, 1024) + nr_success = nr_success + 1 + except OSError as e: + nr_error = nr_error + 1 + if e.errno == errno.ENOSPC: + # ignore + pass + +print(f"getsockopt(): {nr_success}/{nr_error}") + +print("Stopping network packet captures") +subprocess.check_call(['killall', '-q', 'tcpdump']) + +# We're done sending and receiving stuff, now let's check if what +# we received is what we sent. +for (sender, receiver), send_hash in send_hashes.items(): + recv_hash = recv_hashes.get((sender, receiver)) + + if recv_hash is None: + print("FAIL: No data received") + sys.exit(1) + + if send_hash.hexdigest() != recv_hash.hexdigest(): + print("FAIL: Send/recv mismatch") + print("hash expected:", send_hash.hexdigest()) + print("hash received:", recv_hash.hexdigest()) + sys.exit(1) + + print(f"{sender}/{receiver}: ok") + +print("Success") +sys.exit(0) diff --git a/tools/testing/selftests/net/rxtimestamp.c b/tools/testing/selftests/net/rxtimestamp.c index 9eb42570294d..16ac4df55fdb 100644 --- a/tools/testing/selftests/net/rxtimestamp.c +++ b/tools/testing/selftests/net/rxtimestamp.c @@ -57,6 +57,8 @@ static struct sof_flag sof_flags[] = { SOF_FLAG(SOF_TIMESTAMPING_SOFTWARE), SOF_FLAG(SOF_TIMESTAMPING_RX_SOFTWARE), SOF_FLAG(SOF_TIMESTAMPING_RX_HARDWARE), + SOF_FLAG(SOF_TIMESTAMPING_OPT_RX_FILTER), + SOF_FLAG(SOF_TIMESTAMPING_RAW_HARDWARE), }; static struct socket_type socket_types[] = { @@ -98,6 +100,22 @@ static struct test_case test_cases[] = { {} }, { + { .so_timestamping = SOF_TIMESTAMPING_RAW_HARDWARE + | SOF_TIMESTAMPING_OPT_RX_FILTER }, + {} + }, + { + { .so_timestamping = SOF_TIMESTAMPING_SOFTWARE + | SOF_TIMESTAMPING_OPT_RX_FILTER }, + {} + }, + { + { .so_timestamping = SOF_TIMESTAMPING_SOFTWARE + | SOF_TIMESTAMPING_RX_SOFTWARE + | SOF_TIMESTAMPING_OPT_RX_FILTER }, + { .swtstamp = true } + }, + { { .so_timestamping = SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_RX_SOFTWARE }, { .swtstamp = true } diff --git a/tools/testing/selftests/net/sk_so_peek_off.c b/tools/testing/selftests/net/sk_so_peek_off.c new file mode 100644 index 000000000000..d87dd8d8d491 --- /dev/null +++ b/tools/testing/selftests/net/sk_so_peek_off.c @@ -0,0 +1,202 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#include <sys/types.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include "../kselftest.h" + +static char *afstr(int af, int proto) +{ + if (proto == IPPROTO_TCP) + return af == AF_INET ? "TCP/IPv4" : "TCP/IPv6"; + else + return af == AF_INET ? "UDP/IPv4" : "UDP/IPv6"; +} + +int sk_peek_offset_probe(sa_family_t af, int proto) +{ + int type = (proto == IPPROTO_TCP ? SOCK_STREAM : SOCK_DGRAM); + int optv = 0; + int ret = 0; + int s; + + s = socket(af, type, proto); + if (s < 0) { + ksft_perror("Temporary TCP socket creation failed"); + } else { + if (!setsockopt(s, SOL_SOCKET, SO_PEEK_OFF, &optv, sizeof(int))) + ret = 1; + else + printf("%s does not support SO_PEEK_OFF\n", afstr(af, proto)); + close(s); + } + return ret; +} + +static void sk_peek_offset_set(int s, int offset) +{ + if (setsockopt(s, SOL_SOCKET, SO_PEEK_OFF, &offset, sizeof(offset))) + ksft_perror("Failed to set SO_PEEK_OFF value\n"); +} + +static int sk_peek_offset_get(int s) +{ + int offset; + socklen_t len = sizeof(offset); + + if (getsockopt(s, SOL_SOCKET, SO_PEEK_OFF, &offset, &len)) + ksft_perror("Failed to get SO_PEEK_OFF value\n"); + return offset; +} + +static int sk_peek_offset_test(sa_family_t af, int proto) +{ + int type = (proto == IPPROTO_TCP ? SOCK_STREAM : SOCK_DGRAM); + union { + struct sockaddr sa; + struct sockaddr_in a4; + struct sockaddr_in6 a6; + } a; + int res = 0; + int s[2] = {0, 0}; + int recv_sock = 0; + int offset = 0; + ssize_t len; + char buf[2]; + + memset(&a, 0, sizeof(a)); + a.sa.sa_family = af; + + s[0] = recv_sock = socket(af, type, proto); + s[1] = socket(af, type, proto); + + if (s[0] < 0 || s[1] < 0) { + ksft_perror("Temporary socket creation failed\n"); + goto out; + } + if (bind(s[0], &a.sa, sizeof(a)) < 0) { + ksft_perror("Temporary socket bind() failed\n"); + goto out; + } + if (getsockname(s[0], &a.sa, &((socklen_t) { sizeof(a) })) < 0) { + ksft_perror("Temporary socket getsockname() failed\n"); + goto out; + } + if (proto == IPPROTO_TCP && listen(s[0], 0) < 0) { + ksft_perror("Temporary socket listen() failed\n"); + goto out; + } + if (connect(s[1], &a.sa, sizeof(a)) < 0) { + ksft_perror("Temporary socket connect() failed\n"); + goto out; + } + if (proto == IPPROTO_TCP) { + recv_sock = accept(s[0], NULL, NULL); + if (recv_sock <= 0) { + ksft_perror("Temporary socket accept() failed\n"); + goto out; + } + } + + /* Some basic tests of getting/setting offset */ + offset = sk_peek_offset_get(recv_sock); + if (offset != -1) { + ksft_perror("Initial value of socket offset not -1\n"); + goto out; + } + sk_peek_offset_set(recv_sock, 0); + offset = sk_peek_offset_get(recv_sock); + if (offset != 0) { + ksft_perror("Failed to set socket offset to 0\n"); + goto out; + } + + /* Transfer a message */ + if (send(s[1], (char *)("ab"), 2, 0) != 2) { + ksft_perror("Temporary probe socket send() failed\n"); + goto out; + } + /* Read first byte */ + len = recv(recv_sock, buf, 1, MSG_PEEK); + if (len != 1 || buf[0] != 'a') { + ksft_perror("Failed to read first byte of message\n"); + goto out; + } + offset = sk_peek_offset_get(recv_sock); + if (offset != 1) { + ksft_perror("Offset not forwarded correctly at first byte\n"); + goto out; + } + /* Try to read beyond last byte */ + len = recv(recv_sock, buf, 2, MSG_PEEK); + if (len != 1 || buf[0] != 'b') { + ksft_perror("Failed to read last byte of message\n"); + goto out; + } + offset = sk_peek_offset_get(recv_sock); + if (offset != 2) { + ksft_perror("Offset not forwarded correctly at last byte\n"); + goto out; + } + /* Flush message */ + len = recv(recv_sock, buf, 2, MSG_TRUNC); + if (len != 2) { + ksft_perror("Failed to flush message\n"); + goto out; + } + offset = sk_peek_offset_get(recv_sock); + if (offset != 0) { + ksft_perror("Offset not reverted correctly after flush\n"); + goto out; + } + + printf("%s with MSG_PEEK_OFF works correctly\n", afstr(af, proto)); + res = 1; +out: + if (proto == IPPROTO_TCP && recv_sock >= 0) + close(recv_sock); + if (s[1] >= 0) + close(s[1]); + if (s[0] >= 0) + close(s[0]); + return res; +} + +static int do_test(int proto) +{ + int res4, res6; + + res4 = sk_peek_offset_probe(AF_INET, proto); + res6 = sk_peek_offset_probe(AF_INET6, proto); + + if (!res4 && !res6) + return KSFT_SKIP; + + if (res4) + res4 = sk_peek_offset_test(AF_INET, proto); + + if (res6) + res6 = sk_peek_offset_test(AF_INET6, proto); + + if (!res4 || !res6) + return KSFT_FAIL; + + return KSFT_PASS; +} + +int main(void) +{ + int restcp, resudp; + + restcp = do_test(IPPROTO_TCP); + resudp = do_test(IPPROTO_UDP); + if (restcp == KSFT_FAIL || resudp == KSFT_FAIL) + return KSFT_FAIL; + + return KSFT_PASS; +} diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile index bd88b90b902b..5b0205c70c39 100644 --- a/tools/testing/selftests/net/tcp_ao/Makefile +++ b/tools/testing/selftests/net/tcp_ao/Makefile @@ -31,7 +31,8 @@ CFLAGS += $(KHDR_INCLUDES) CFLAGS += -iquote ./lib/ -I ../../../../include/ # Library -LIBSRC := kconfig.c netlink.c proc.c repair.c setup.c sock.c utils.c +LIBSRC := ftrace.c ftrace-tcp.c kconfig.c netlink.c +LIBSRC += proc.c repair.c setup.c sock.c utils.c LIBOBJ := $(LIBSRC:%.c=$(LIBDIR)/%.o) EXTRA_CLEAN += $(LIBOBJ) $(LIB) diff --git a/tools/testing/selftests/net/tcp_ao/bench-lookups.c b/tools/testing/selftests/net/tcp_ao/bench-lookups.c index a1e6e007c291..6736484996a3 100644 --- a/tools/testing/selftests/net/tcp_ao/bench-lookups.c +++ b/tools/testing/selftests/net/tcp_ao/bench-lookups.c @@ -355,6 +355,6 @@ static void *client_fn(void *arg) int main(int argc, char *argv[]) { - test_init(30, server_fn, client_fn); + test_init(31, server_fn, client_fn); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/config b/tools/testing/selftests/net/tcp_ao/config index d3277a9de987..3605e38711cb 100644 --- a/tools/testing/selftests/net/tcp_ao/config +++ b/tools/testing/selftests/net/tcp_ao/config @@ -7,4 +7,5 @@ CONFIG_NET_L3_MASTER_DEV=y CONFIG_NET_VRF=y CONFIG_TCP_AO=y CONFIG_TCP_MD5SIG=y +CONFIG_TRACEPOINTS=y CONFIG_VETH=m diff --git a/tools/testing/selftests/net/tcp_ao/connect-deny.c b/tools/testing/selftests/net/tcp_ao/connect-deny.c index 185a2f6e5ff3..d418162d335f 100644 --- a/tools/testing/selftests/net/tcp_ao/connect-deny.c +++ b/tools/testing/selftests/net/tcp_ao/connect-deny.c @@ -71,10 +71,12 @@ static void try_accept(const char *tst_name, unsigned int port, const char *pwd, } } + synchronize_threads(); /* before counter checks */ if (pwd && test_get_tcp_ao_counters(lsk, &ao_cnt2)) test_error("test_get_tcp_ao_counters()"); close(lsk); + if (pwd) test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected); @@ -84,10 +86,10 @@ static void try_accept(const char *tst_name, unsigned int port, const char *pwd, after_cnt = netstat_get_one(cnt_name, NULL); if (after_cnt <= before_cnt) { - test_fail("%s: %s counter did not increase: %zu <= %zu", + test_fail("%s: %s counter did not increase: %" PRIu64 " <= %" PRIu64, tst_name, cnt_name, after_cnt, before_cnt); } else { - test_ok("%s: counter %s increased %zu => %zu", + test_ok("%s: counter %s increased %" PRIu64 " => %" PRIu64, tst_name, cnt_name, before_cnt, after_cnt); } @@ -180,6 +182,7 @@ static void try_connect(const char *tst_name, unsigned int port, timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; ret = _test_connect_socket(sk, this_ip_dest, port, timeout); + synchronize_threads(); /* before counter checks */ if (ret < 0) { if (fault(KEYREJECT) && ret == -EKEYREJECTED) { test_ok("%s: connect() was prevented", tst_name); @@ -212,30 +215,44 @@ out: static void *client_fn(void *arg) { - union tcp_addr wrong_addr, network_addr; + union tcp_addr wrong_addr, network_addr, addr_any = {}; unsigned int port = test_server_port; if (inet_pton(TEST_FAMILY, TEST_WRONG_IP, &wrong_addr) != 1) test_error("Can't convert ip address %s", TEST_WRONG_IP); + trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest, + -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1); try_connect("Non-AO server + AO client", port++, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + trace_hash_event_expect(TCP_HASH_AO_REQUIRED, this_ip_addr, this_ip_dest, + -1, port, 0, 0, 1, 0, 0, 0); try_connect("AO server + Non-AO client", port++, NULL, this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_addr, this_ip_dest, + -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1); try_connect("Wrong password", port++, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest, + -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1); try_connect("Wrong rcv id", port++, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + trace_ao_event_sk_expect(TCP_AO_SYNACK_NO_KEY, this_ip_dest, addr_any, + port, 0, 100, 100); try_connect("Wrong snd id", port++, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + trace_ao_event_expect(TCP_AO_WRONG_MACLEN, this_ip_addr, this_ip_dest, + -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1); try_connect("Different maclen", port++, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest, + -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1); try_connect("Server: Wrong addr", port++, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); @@ -259,6 +276,6 @@ static void *client_fn(void *arg) int main(int argc, char *argv[]) { - test_init(21, server_fn, client_fn); + test_init(22, server_fn, client_fn); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/connect.c b/tools/testing/selftests/net/tcp_ao/connect.c index 81653b47f303..f1d8d29e393f 100644 --- a/tools/testing/selftests/net/tcp_ao/connect.c +++ b/tools/testing/selftests/net/tcp_ao/connect.c @@ -67,14 +67,14 @@ static void *client_fn(void *arg) netstat_free(ns_after); if (nr_packets > (after_aogood - before_aogood)) { - test_fail("TCPAOGood counter mismatch: %zu > (%zu - %zu)", + test_fail("TCPAOGood counter mismatch: %zu > (%" PRIu64 " - %" PRIu64 ")", nr_packets, after_aogood, before_aogood); return NULL; } if (test_tcp_ao_counters_cmp("connect", &ao1, &ao2, TEST_CNT_GOOD)) return NULL; - test_ok("connect TCPAOGood %" PRIu64 "/%" PRIu64 "/%" PRIu64 " => %" PRIu64 "/%" PRIu64 "/%" PRIu64 ", sent %" PRIu64, + test_ok("connect TCPAOGood %" PRIu64 "/%" PRIu64 "/%" PRIu64 " => %" PRIu64 "/%" PRIu64 "/%" PRIu64 ", sent %zu", before_aogood, ao1.ao_info_pkt_good, ao1.key_cnts[0].pkt_good, after_aogood, ao2.ao_info_pkt_good, @@ -85,6 +85,6 @@ static void *client_fn(void *arg) int main(int argc, char *argv[]) { - test_init(1, server_fn, client_fn); + test_init(2, server_fn, client_fn); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/icmps-discard.c b/tools/testing/selftests/net/tcp_ao/icmps-discard.c index d69bcba3c929..a1614f0d8c44 100644 --- a/tools/testing/selftests/net/tcp_ao/icmps-discard.c +++ b/tools/testing/selftests/net/tcp_ao/icmps-discard.c @@ -444,6 +444,6 @@ static void *client_fn(void *arg) int main(int argc, char *argv[]) { - test_init(3, server_fn, client_fn); + test_init(4, server_fn, client_fn); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/key-management.c b/tools/testing/selftests/net/tcp_ao/key-management.c index 24e62120b792..d4385b52c10b 100644 --- a/tools/testing/selftests/net/tcp_ao/key-management.c +++ b/tools/testing/selftests/net/tcp_ao/key-management.c @@ -965,7 +965,7 @@ static void end_client(const char *tst_name, int sk, unsigned int nr_keys, synchronize_threads(); /* 5: counters */ } -static void try_unmatched_keys(int sk, int *rnext_index) +static void try_unmatched_keys(int sk, int *rnext_index, unsigned int port) { struct test_key *key; unsigned int i = 0; @@ -1013,6 +1013,9 @@ static void try_unmatched_keys(int sk, int *rnext_index) test_error("all keys on server match the client"); if (test_set_key(sk, -1, key->server_keyid)) test_error("Can't change the current key"); + trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, this_ip_addr, this_ip_dest, + -1, port, 0, -1, -1, -1, -1, -1, + -1, key->server_keyid, -1); if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) test_fail("verify failed"); *rnext_index = i; @@ -1054,6 +1057,10 @@ static void check_current_back(const char *tst_name, unsigned int port, return; if (test_set_key(sk, collection.keys[rotate_to_index].client_keyid, -1)) test_error("Can't change the current key"); + trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, this_ip_dest, this_ip_addr, + port, -1, 0, -1, -1, -1, -1, -1, + collection.keys[rotate_to_index].client_keyid, + collection.keys[current_index].client_keyid, -1); if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) test_fail("verify failed"); /* There is a race here: between setting the current_key with @@ -1085,6 +1092,11 @@ static void roll_over_keys(const char *tst_name, unsigned int port, for (i = rnext_index + 1; rotations > 0; i++, rotations--) { if (i >= collection.nr_keys) i = 0; + trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, + this_ip_addr, this_ip_dest, + -1, port, 0, -1, -1, -1, -1, -1, + i == 0 ? -1 : collection.keys[i - 1].server_keyid, + collection.keys[i].server_keyid, -1); if (test_set_key(sk, -1, collection.keys[i].server_keyid)) test_error("Can't change the Rnext key"); if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) { @@ -1124,7 +1136,7 @@ static void try_client_match(const char *tst_name, unsigned int port, rnext_index, msg_len, nr_packets); if (sk < 0) return; - try_unmatched_keys(sk, &rnext_index); + try_unmatched_keys(sk, &rnext_index, port); end_client(tst_name, sk, nr_keys, current_index, rnext_index, NULL); } @@ -1181,6 +1193,6 @@ static void *client_fn(void *arg) int main(int argc, char *argv[]) { - test_init(120, server_fn, client_fn); + test_init(121, server_fn, client_fn); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/lib/aolib.h b/tools/testing/selftests/net/tcp_ao/lib/aolib.h index fbc7f6111815..db44e77428dd 100644 --- a/tools/testing/selftests/net/tcp_ao/lib/aolib.h +++ b/tools/testing/selftests/net/tcp_ao/lib/aolib.h @@ -37,17 +37,58 @@ extern void __test_xfail(const char *buf); extern void __test_error(const char *buf); extern void __test_skip(const char *buf); -__attribute__((__format__(__printf__, 2, 3))) -static inline void __test_print(void (*fn)(const char *), const char *fmt, ...) +static inline char *test_snprintf(const char *fmt, va_list vargs) { -#define TEST_MSG_BUFFER_SIZE 4096 - char buf[TEST_MSG_BUFFER_SIZE]; - va_list arg; - - va_start(arg, fmt); - vsnprintf(buf, sizeof(buf), fmt, arg); - va_end(arg); - fn(buf); + char *ret = NULL; + size_t size = 0; + va_list tmp; + int n = 0; + + va_copy(tmp, vargs); + n = vsnprintf(ret, size, fmt, tmp); + if (n < 0) + return NULL; + + size = n + 1; + ret = malloc(size); + if (!ret) + return NULL; + + n = vsnprintf(ret, size, fmt, vargs); + if (n < 0 || n > size - 1) { + free(ret); + return NULL; + } + return ret; +} + +static __printf(1, 2) inline char *test_sprintf(const char *fmt, ...) +{ + va_list vargs; + char *ret; + + va_start(vargs, fmt); + ret = test_snprintf(fmt, vargs); + va_end(vargs); + + return ret; +} + +static __printf(2, 3) inline void __test_print(void (*fn)(const char *), + const char *fmt, ...) +{ + va_list vargs; + char *msg; + + va_start(vargs, fmt); + msg = test_snprintf(fmt, vargs); + va_end(vargs); + + if (!msg) + return; + + fn(msg); + free(msg); } #define test_print(fmt, ...) \ @@ -103,6 +144,7 @@ enum test_needs_kconfig { KCONFIG_TCP_AO, /* required */ KCONFIG_TCP_MD5, /* optional, for TCP-MD5 features */ KCONFIG_NET_VRF, /* optional, for L3/VRF testing */ + KCONFIG_FTRACE, /* optional, for tracepoints checks */ __KCONFIG_LAST__ }; extern bool kernel_config_has(enum test_needs_kconfig k); @@ -142,6 +184,8 @@ static inline void test_init2(unsigned int ntests, __test_init(ntests, family, prefix, taddr1, taddr2, peer1, peer2); } extern void test_add_destructor(void (*d)(void)); +extern void test_init_ftrace(int nsfd1, int nsfd2); +extern int test_setup_tracing(void); /* To adjust optmem socket limit, approximately estimate a number, * that is bigger than sizeof(struct tcp_ao_key). @@ -216,12 +260,17 @@ static inline void test_init(unsigned int ntests, } extern void synchronize_threads(void); extern void switch_ns(int fd); +extern int switch_save_ns(int fd); +extern void switch_close_ns(int fd); extern __thread union tcp_addr this_ip_addr; extern __thread union tcp_addr this_ip_dest; extern int test_family; extern void randomize_buffer(void *buf, size_t buflen); +extern __printf(3, 4) int test_echo(const char *fname, bool append, + const char *fmt, ...); + extern int open_netns(void); extern int unshare_open_netns(void); extern const char veth_name[]; @@ -602,4 +651,115 @@ static inline int test_add_repaired_key(int sk, return test_verify_socket_key(sk, &tmp); } +#define DEFAULT_FTRACE_BUFFER_KB 10000 +#define DEFAULT_TRACER_LINES_ARR 200 +struct test_ftracer; +extern uint64_t ns_cookie1, ns_cookie2; + +enum ftracer_op { + FTRACER_LINE_DISCARD = 0, + FTRACER_LINE_PRESERVE, + FTRACER_EXIT, +}; + +extern struct test_ftracer *create_ftracer(const char *name, + enum ftracer_op (*process_line)(const char *line), + void (*destructor)(struct test_ftracer *tracer), + bool (*expecting_more)(void), + size_t lines_buf_sz, size_t buffer_size_kb); +extern int setup_trace_event(struct test_ftracer *tracer, + const char *event, const char *filter); +extern void destroy_ftracer(struct test_ftracer *tracer); +extern const size_t tracer_get_savedlines_nr(struct test_ftracer *tracer); +extern const char **tracer_get_savedlines(struct test_ftracer *tracer); + +enum trace_events { + /* TCP_HASH_EVENT */ + TCP_HASH_BAD_HEADER = 0, + TCP_HASH_MD5_REQUIRED, + TCP_HASH_MD5_UNEXPECTED, + TCP_HASH_MD5_MISMATCH, + TCP_HASH_AO_REQUIRED, + /* TCP_AO_EVENT */ + TCP_AO_HANDSHAKE_FAILURE, + TCP_AO_WRONG_MACLEN, + TCP_AO_MISMATCH, + TCP_AO_KEY_NOT_FOUND, + TCP_AO_RNEXT_REQUEST, + /* TCP_AO_EVENT_SK */ + TCP_AO_SYNACK_NO_KEY, + /* TCP_AO_EVENT_SNE */ + TCP_AO_SND_SNE_UPDATE, + TCP_AO_RCV_SNE_UPDATE, + __MAX_TRACE_EVENTS +}; + +extern int __trace_event_expect(enum trace_events type, int family, + union tcp_addr src, union tcp_addr dst, + int src_port, int dst_port, int L3index, + int fin, int syn, int rst, int psh, int ack, + int keyid, int rnext, int maclen, int sne); + +static inline void trace_hash_event_expect(enum trace_events type, + union tcp_addr src, union tcp_addr dst, + int src_port, int dst_port, int L3index, + int fin, int syn, int rst, int psh, int ack) +{ + int err; + + err = __trace_event_expect(type, TEST_FAMILY, src, dst, + src_port, dst_port, L3index, + fin, syn, rst, psh, ack, + -1, -1, -1, -1); + if (err) + test_error("Couldn't add a trace event: %d", err); +} + +static inline void trace_ao_event_expect(enum trace_events type, + union tcp_addr src, union tcp_addr dst, + int src_port, int dst_port, int L3index, + int fin, int syn, int rst, int psh, int ack, + int keyid, int rnext, int maclen) +{ + int err; + + err = __trace_event_expect(type, TEST_FAMILY, src, dst, + src_port, dst_port, L3index, + fin, syn, rst, psh, ack, + keyid, rnext, maclen, -1); + if (err) + test_error("Couldn't add a trace event: %d", err); +} + +static inline void trace_ao_event_sk_expect(enum trace_events type, + union tcp_addr src, union tcp_addr dst, + int src_port, int dst_port, + int keyid, int rnext) +{ + int err; + + err = __trace_event_expect(type, TEST_FAMILY, src, dst, + src_port, dst_port, -1, + -1, -1, -1, -1, -1, + keyid, rnext, -1, -1); + if (err) + test_error("Couldn't add a trace event: %d", err); +} + +static inline void trace_ao_event_sne_expect(enum trace_events type, + union tcp_addr src, union tcp_addr dst, + int src_port, int dst_port, int sne) +{ + int err; + + err = __trace_event_expect(type, TEST_FAMILY, src, dst, + src_port, dst_port, -1, + -1, -1, -1, -1, -1, + -1, -1, -1, sne); + if (err) + test_error("Couldn't add a trace event: %d", err); +} + +extern int setup_aolib_ftracer(void); + #endif /* _AOLIB_H_ */ diff --git a/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c b/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c new file mode 100644 index 000000000000..24380c68fec6 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c @@ -0,0 +1,559 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <inttypes.h> +#include <pthread.h> +#include "aolib.h" + +static const char *trace_event_names[__MAX_TRACE_EVENTS] = { + /* TCP_HASH_EVENT */ + "tcp_hash_bad_header", + "tcp_hash_md5_required", + "tcp_hash_md5_unexpected", + "tcp_hash_md5_mismatch", + "tcp_hash_ao_required", + /* TCP_AO_EVENT */ + "tcp_ao_handshake_failure", + "tcp_ao_wrong_maclen", + "tcp_ao_mismatch", + "tcp_ao_key_not_found", + "tcp_ao_rnext_request", + /* TCP_AO_EVENT_SK */ + "tcp_ao_synack_no_key", + /* TCP_AO_EVENT_SNE */ + "tcp_ao_snd_sne_update", + "tcp_ao_rcv_sne_update" +}; + +struct expected_trace_point { + /* required */ + enum trace_events type; + int family; + union tcp_addr src; + union tcp_addr dst; + + /* optional */ + int src_port; + int dst_port; + int L3index; + + int fin; + int syn; + int rst; + int psh; + int ack; + + int keyid; + int rnext; + int maclen; + int sne; + + size_t matched; +}; + +static struct expected_trace_point *exp_tps; +static size_t exp_tps_nr; +static size_t exp_tps_size; +static pthread_mutex_t exp_tps_mutex = PTHREAD_MUTEX_INITIALIZER; + +int __trace_event_expect(enum trace_events type, int family, + union tcp_addr src, union tcp_addr dst, + int src_port, int dst_port, int L3index, + int fin, int syn, int rst, int psh, int ack, + int keyid, int rnext, int maclen, int sne) +{ + struct expected_trace_point new_tp = { + .type = type, + .family = family, + .src = src, + .dst = dst, + .src_port = src_port, + .dst_port = dst_port, + .L3index = L3index, + .fin = fin, + .syn = syn, + .rst = rst, + .psh = psh, + .ack = ack, + .keyid = keyid, + .rnext = rnext, + .maclen = maclen, + .sne = sne, + .matched = 0, + }; + int ret = 0; + + if (!kernel_config_has(KCONFIG_FTRACE)) + return 0; + + pthread_mutex_lock(&exp_tps_mutex); + if (exp_tps_nr == exp_tps_size) { + struct expected_trace_point *tmp; + + if (exp_tps_size == 0) + exp_tps_size = 10; + else + exp_tps_size = exp_tps_size * 1.6; + + tmp = reallocarray(exp_tps, exp_tps_size, sizeof(exp_tps[0])); + if (!tmp) { + ret = -ENOMEM; + goto out; + } + exp_tps = tmp; + } + exp_tps[exp_tps_nr] = new_tp; + exp_tps_nr++; +out: + pthread_mutex_unlock(&exp_tps_mutex); + return ret; +} + +static void free_expected_events(void) +{ + /* We're from the process destructor - not taking the mutex */ + exp_tps_size = 0; + exp_tps = NULL; + free(exp_tps); +} + +struct trace_point { + int family; + union tcp_addr src; + union tcp_addr dst; + unsigned int src_port; + unsigned int dst_port; + int L3index; + unsigned int fin:1, + syn:1, + rst:1, + psh:1, + ack:1; + + unsigned int keyid; + unsigned int rnext; + unsigned int maclen; + + unsigned int sne; +}; + +static bool lookup_expected_event(int event_type, struct trace_point *e) +{ + size_t i; + + pthread_mutex_lock(&exp_tps_mutex); + for (i = 0; i < exp_tps_nr; i++) { + struct expected_trace_point *p = &exp_tps[i]; + size_t sk_size; + + if (p->type != event_type) + continue; + if (p->family != e->family) + continue; + if (p->family == AF_INET) + sk_size = sizeof(p->src.a4); + else + sk_size = sizeof(p->src.a6); + if (memcmp(&p->src, &e->src, sk_size)) + continue; + if (memcmp(&p->dst, &e->dst, sk_size)) + continue; + if (p->src_port >= 0 && p->src_port != e->src_port) + continue; + if (p->dst_port >= 0 && p->dst_port != e->dst_port) + continue; + if (p->L3index >= 0 && p->L3index != e->L3index) + continue; + + if (p->fin >= 0 && p->fin != e->fin) + continue; + if (p->syn >= 0 && p->syn != e->syn) + continue; + if (p->rst >= 0 && p->rst != e->rst) + continue; + if (p->psh >= 0 && p->psh != e->psh) + continue; + if (p->ack >= 0 && p->ack != e->ack) + continue; + + if (p->keyid >= 0 && p->keyid != e->keyid) + continue; + if (p->rnext >= 0 && p->rnext != e->rnext) + continue; + if (p->maclen >= 0 && p->maclen != e->maclen) + continue; + if (p->sne >= 0 && p->sne != e->sne) + continue; + p->matched++; + pthread_mutex_unlock(&exp_tps_mutex); + return true; + } + pthread_mutex_unlock(&exp_tps_mutex); + return false; +} + +static int check_event_type(const char *line) +{ + size_t i; + + /* + * This should have been a set or hashmap, but it's a selftest, + * so... KISS. + */ + for (i = 0; i < __MAX_TRACE_EVENTS; i++) { + if (!strncmp(trace_event_names[i], line, strlen(trace_event_names[i]))) + return i; + } + return -1; +} + +static bool event_has_flags(enum trace_events event) +{ + switch (event) { + case TCP_HASH_BAD_HEADER: + case TCP_HASH_MD5_REQUIRED: + case TCP_HASH_MD5_UNEXPECTED: + case TCP_HASH_MD5_MISMATCH: + case TCP_HASH_AO_REQUIRED: + case TCP_AO_HANDSHAKE_FAILURE: + case TCP_AO_WRONG_MACLEN: + case TCP_AO_MISMATCH: + case TCP_AO_KEY_NOT_FOUND: + case TCP_AO_RNEXT_REQUEST: + return true; + default: + return false; + } +} + +static int tracer_ip_split(int family, char *src, char **addr, char **port) +{ + char *p; + + if (family == AF_INET) { + /* fomat is <addr>:port, i.e.: 10.0.254.1:7015 */ + *addr = src; + p = strchr(src, ':'); + if (!p) { + test_print("Couldn't parse trace event addr:port %s", src); + return -EINVAL; + } + *p++ = '\0'; + *port = p; + return 0; + } + if (family != AF_INET6) + return -EAFNOSUPPORT; + + /* format is [<addr>]:port, i.e.: [2001:db8:254::1]:7013 */ + *addr = strchr(src, '['); + p = strchr(src, ']'); + + if (!p || !*addr) { + test_print("Couldn't parse trace event [addr]:port %s", src); + return -EINVAL; + } + + *addr = *addr + 1; /* '[' */ + *p++ = '\0'; /* ']' */ + if (*p != ':') { + test_print("Couldn't parse trace event :port %s", p); + return -EINVAL; + } + *p++ = '\0'; /* ':' */ + *port = p; + return 0; +} + +static int tracer_scan_address(int family, char *src, + union tcp_addr *dst, unsigned int *port) +{ + char *addr, *port_str; + int ret; + + ret = tracer_ip_split(family, src, &addr, &port_str); + if (ret) + return ret; + + if (inet_pton(family, addr, dst) != 1) { + test_print("Couldn't parse trace event addr %s", addr); + return -EINVAL; + } + errno = 0; + *port = (unsigned int)strtoul(port_str, NULL, 10); + if (errno != 0) { + test_print("Couldn't parse trace event port %s", port_str); + return -errno; + } + return 0; +} + +static int tracer_scan_event(const char *line, enum trace_events event, + struct trace_point *out) +{ + char *src = NULL, *dst = NULL, *family = NULL; + char fin, syn, rst, psh, ack; + int nr_matched, ret = 0; + uint64_t netns_cookie; + + switch (event) { + case TCP_HASH_BAD_HEADER: + case TCP_HASH_MD5_REQUIRED: + case TCP_HASH_MD5_UNEXPECTED: + case TCP_HASH_MD5_MISMATCH: + case TCP_HASH_AO_REQUIRED: { + nr_matched = sscanf(line, "%*s net=%" PRIu64 " state%*s family=%ms src=%ms dest=%ms L3index=%d [%c%c%c%c%c]", + &netns_cookie, &family, + &src, &dst, &out->L3index, + &fin, &syn, &rst, &psh, &ack); + if (nr_matched != 10) + test_print("Couldn't parse trace event, matched = %d/10", + nr_matched); + break; + } + case TCP_AO_HANDSHAKE_FAILURE: + case TCP_AO_WRONG_MACLEN: + case TCP_AO_MISMATCH: + case TCP_AO_KEY_NOT_FOUND: + case TCP_AO_RNEXT_REQUEST: { + nr_matched = sscanf(line, "%*s net=%" PRIu64 " state%*s family=%ms src=%ms dest=%ms L3index=%d [%c%c%c%c%c] keyid=%u rnext=%u maclen=%u", + &netns_cookie, &family, + &src, &dst, &out->L3index, + &fin, &syn, &rst, &psh, &ack, + &out->keyid, &out->rnext, &out->maclen); + if (nr_matched != 13) + test_print("Couldn't parse trace event, matched = %d/13", + nr_matched); + break; + } + case TCP_AO_SYNACK_NO_KEY: { + nr_matched = sscanf(line, "%*s net=%" PRIu64 " state%*s family=%ms src=%ms dest=%ms keyid=%u rnext=%u", + &netns_cookie, &family, + &src, &dst, &out->keyid, &out->rnext); + if (nr_matched != 6) + test_print("Couldn't parse trace event, matched = %d/6", + nr_matched); + break; + } + case TCP_AO_SND_SNE_UPDATE: + case TCP_AO_RCV_SNE_UPDATE: { + nr_matched = sscanf(line, "%*s net=%" PRIu64 " state%*s family=%ms src=%ms dest=%ms sne=%u", + &netns_cookie, &family, + &src, &dst, &out->sne); + if (nr_matched != 5) + test_print("Couldn't parse trace event, matched = %d/5", + nr_matched); + break; + } + default: + return -1; + } + + if (family) { + if (!strcmp(family, "AF_INET")) { + out->family = AF_INET; + } else if (!strcmp(family, "AF_INET6")) { + out->family = AF_INET6; + } else { + test_print("Couldn't parse trace event family %s", family); + ret = -EINVAL; + goto out_free; + } + } + + if (event_has_flags(event)) { + out->fin = (fin == 'F'); + out->syn = (syn == 'S'); + out->rst = (rst == 'R'); + out->psh = (psh == 'P'); + out->ack = (ack == '.'); + + if ((fin != 'F' && fin != ' ') || + (syn != 'S' && syn != ' ') || + (rst != 'R' && rst != ' ') || + (psh != 'P' && psh != ' ') || + (ack != '.' && ack != ' ')) { + test_print("Couldn't parse trace event flags %c%c%c%c%c", + fin, syn, rst, psh, ack); + ret = -EINVAL; + goto out_free; + } + } + + if (src && tracer_scan_address(out->family, src, &out->src, &out->src_port)) { + ret = -EINVAL; + goto out_free; + } + + if (dst && tracer_scan_address(out->family, dst, &out->dst, &out->dst_port)) { + ret = -EINVAL; + goto out_free; + } + + if (netns_cookie != ns_cookie1 && netns_cookie != ns_cookie2) { + test_print("Net namespace filter for trace event didn't work: %" PRIu64 " != %" PRIu64 " OR %" PRIu64, + netns_cookie, ns_cookie1, ns_cookie2); + ret = -EINVAL; + } + +out_free: + free(src); + free(dst); + free(family); + return ret; +} + +static enum ftracer_op aolib_tracer_process_event(const char *line) +{ + int event_type = check_event_type(line); + struct trace_point tmp = {}; + + if (event_type < 0) + return FTRACER_LINE_PRESERVE; + + if (tracer_scan_event(line, event_type, &tmp)) + return FTRACER_LINE_PRESERVE; + + return lookup_expected_event(event_type, &tmp) ? + FTRACER_LINE_DISCARD : FTRACER_LINE_PRESERVE; +} + +static void dump_trace_event(struct expected_trace_point *e) +{ + char src[INET6_ADDRSTRLEN], dst[INET6_ADDRSTRLEN]; + + if (!inet_ntop(e->family, &e->src, src, INET6_ADDRSTRLEN)) + test_error("inet_ntop()"); + if (!inet_ntop(e->family, &e->dst, dst, INET6_ADDRSTRLEN)) + test_error("inet_ntop()"); + test_print("trace event filter %s [%s:%d => %s:%d, L3index %d, flags: %s%s%s%s%s, keyid: %d, rnext: %d, maclen: %d, sne: %d] = %zu", + trace_event_names[e->type], + src, e->src_port, dst, e->dst_port, e->L3index, + (e->fin > 0) ? "F" : (e->fin == 0) ? "!F" : "", + (e->syn > 0) ? "S" : (e->syn == 0) ? "!S" : "", + (e->rst > 0) ? "R" : (e->rst == 0) ? "!R" : "", + (e->psh > 0) ? "P" : (e->psh == 0) ? "!P" : "", + (e->ack > 0) ? "." : (e->ack == 0) ? "!." : "", + e->keyid, e->rnext, e->maclen, e->sne, e->matched); +} + +static void print_match_stats(bool unexpected_events) +{ + size_t matches_per_type[__MAX_TRACE_EVENTS] = {}; + bool expected_but_none = false; + size_t i, total_matched = 0; + char *stat_line = NULL; + + for (i = 0; i < exp_tps_nr; i++) { + struct expected_trace_point *e = &exp_tps[i]; + + total_matched += e->matched; + matches_per_type[e->type] += e->matched; + if (!e->matched) + expected_but_none = true; + } + for (i = 0; i < __MAX_TRACE_EVENTS; i++) { + if (!matches_per_type[i]) + continue; + stat_line = test_sprintf("%s%s[%zu] ", stat_line ?: "", + trace_event_names[i], + matches_per_type[i]); + if (!stat_line) + test_error("test_sprintf()"); + } + + if (unexpected_events || expected_but_none) { + for (i = 0; i < exp_tps_nr; i++) + dump_trace_event(&exp_tps[i]); + } + + if (unexpected_events) + return; + + if (expected_but_none) + test_fail("Some trace events were expected, but didn't occur"); + else if (total_matched) + test_ok("Trace events matched expectations: %zu %s", + total_matched, stat_line); + else + test_ok("No unexpected trace events during the test run"); +} + +#define dump_events(fmt, ...) \ + __test_print(__test_msg, fmt, ##__VA_ARGS__) +static void check_free_events(struct test_ftracer *tracer) +{ + const char **lines; + size_t nr; + + if (!kernel_config_has(KCONFIG_FTRACE)) { + test_skip("kernel config doesn't have ftrace - no checks"); + return; + } + + nr = tracer_get_savedlines_nr(tracer); + lines = tracer_get_savedlines(tracer); + print_match_stats(!!nr); + if (!nr) + return; + + errno = 0; + test_xfail("Trace events [%zu] were not expected:", nr); + while (nr) + dump_events("\t%s", lines[--nr]); +} + +static int setup_tcp_trace_events(struct test_ftracer *tracer) +{ + char *filter; + size_t i; + int ret; + + filter = test_sprintf("net_cookie == %zu || net_cookie == %zu", + ns_cookie1, ns_cookie2); + if (!filter) + return -ENOMEM; + + for (i = 0; i < __MAX_TRACE_EVENTS; i++) { + char *event_name = test_sprintf("tcp/%s", trace_event_names[i]); + + if (!event_name) { + ret = -ENOMEM; + break; + } + ret = setup_trace_event(tracer, event_name, filter); + free(event_name); + if (ret) + break; + } + + free(filter); + return ret; +} + +static void aolib_tracer_destroy(struct test_ftracer *tracer) +{ + check_free_events(tracer); + free_expected_events(); +} + +static bool aolib_tracer_expecting_more(void) +{ + size_t i; + + for (i = 0; i < exp_tps_nr; i++) + if (!exp_tps[i].matched) + return true; + return false; +} + +int setup_aolib_ftracer(void) +{ + struct test_ftracer *f; + + f = create_ftracer("aolib", aolib_tracer_process_event, + aolib_tracer_destroy, aolib_tracer_expecting_more, + DEFAULT_FTRACE_BUFFER_KB, DEFAULT_TRACER_LINES_ARR); + if (!f) + return -1; + + return setup_tcp_trace_events(f); +} diff --git a/tools/testing/selftests/net/tcp_ao/lib/ftrace.c b/tools/testing/selftests/net/tcp_ao/lib/ftrace.c new file mode 100644 index 000000000000..e4d0b173bc94 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/lib/ftrace.c @@ -0,0 +1,543 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <inttypes.h> +#include <pthread.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/mount.h> +#include <sys/time.h> +#include <unistd.h> +#include "../../../../../include/linux/kernel.h" +#include "aolib.h" + +static char ftrace_path[] = "ksft-ftrace-XXXXXX"; +static bool ftrace_mounted; +uint64_t ns_cookie1, ns_cookie2; + +struct test_ftracer { + pthread_t tracer_thread; + int error; + char *instance_path; + FILE *trace_pipe; + + enum ftracer_op (*process_line)(const char *line); + void (*destructor)(struct test_ftracer *tracer); + bool (*expecting_more)(void); + + char **saved_lines; + size_t saved_lines_size; + size_t next_line_ind; + + pthread_cond_t met_all_expected; + pthread_mutex_t met_all_expected_lock; + + struct test_ftracer *next; +}; + +static struct test_ftracer *ftracers; +static pthread_mutex_t ftracers_lock = PTHREAD_MUTEX_INITIALIZER; + +static int mount_ftrace(void) +{ + if (!mkdtemp(ftrace_path)) + test_error("Can't create temp dir"); + + if (mount("tracefs", ftrace_path, "tracefs", 0, "rw")) + return -errno; + + ftrace_mounted = true; + + return 0; +} + +static void unmount_ftrace(void) +{ + if (ftrace_mounted && umount(ftrace_path)) + test_print("Failed on cleanup: can't unmount tracefs: %m"); + + if (rmdir(ftrace_path)) + test_error("Failed on cleanup: can't remove ftrace dir %s", + ftrace_path); +} + +struct opts_list_t { + char *opt_name; + struct opts_list_t *next; +}; + +static int disable_trace_options(const char *ftrace_path) +{ + struct opts_list_t *opts_list = NULL; + char *fopts, *line = NULL; + size_t buf_len = 0; + ssize_t line_len; + int ret = 0; + FILE *opts; + + fopts = test_sprintf("%s/%s", ftrace_path, "trace_options"); + if (!fopts) + return -ENOMEM; + + opts = fopen(fopts, "r+"); + if (!opts) { + ret = -errno; + goto out_free; + } + + while ((line_len = getline(&line, &buf_len, opts)) != -1) { + struct opts_list_t *tmp; + + if (!strncmp(line, "no", 2)) + continue; + + tmp = malloc(sizeof(*tmp)); + if (!tmp) { + ret = -ENOMEM; + goto out_free_opts_list; + } + tmp->next = opts_list; + tmp->opt_name = test_sprintf("no%s", line); + if (!tmp->opt_name) { + ret = -ENOMEM; + free(tmp); + goto out_free_opts_list; + } + opts_list = tmp; + } + + while (opts_list) { + struct opts_list_t *tmp = opts_list; + + fseek(opts, 0, SEEK_SET); + fwrite(tmp->opt_name, 1, strlen(tmp->opt_name), opts); + + opts_list = opts_list->next; + free(tmp->opt_name); + free(tmp); + } + +out_free_opts_list: + while (opts_list) { + struct opts_list_t *tmp = opts_list; + + opts_list = opts_list->next; + free(tmp->opt_name); + free(tmp); + } + free(line); + fclose(opts); +out_free: + free(fopts); + return ret; +} + +static int setup_buffer_size(const char *ftrace_path, size_t sz) +{ + char *fbuf_size = test_sprintf("%s/buffer_size_kb", ftrace_path); + int ret; + + if (!fbuf_size) + return -1; + + ret = test_echo(fbuf_size, 0, "%zu", sz); + free(fbuf_size); + return ret; +} + +static int setup_ftrace_instance(struct test_ftracer *tracer, const char *name) +{ + char *tmp; + + tmp = test_sprintf("%s/instances/ksft-%s-XXXXXX", ftrace_path, name); + if (!tmp) + return -ENOMEM; + + tracer->instance_path = mkdtemp(tmp); + if (!tracer->instance_path) { + free(tmp); + return -errno; + } + + return 0; +} + +static void remove_ftrace_instance(struct test_ftracer *tracer) +{ + if (rmdir(tracer->instance_path)) + test_print("Failed on cleanup: can't remove ftrace instance %s", + tracer->instance_path); + free(tracer->instance_path); +} + +static void tracer_cleanup(void *arg) +{ + struct test_ftracer *tracer = arg; + + fclose(tracer->trace_pipe); +} + +static void tracer_set_error(struct test_ftracer *tracer, int error) +{ + if (!tracer->error) + tracer->error = error; +} + +const size_t tracer_get_savedlines_nr(struct test_ftracer *tracer) +{ + return tracer->next_line_ind; +} + +const char **tracer_get_savedlines(struct test_ftracer *tracer) +{ + return (const char **)tracer->saved_lines; +} + +static void *tracer_thread_func(void *arg) +{ + struct test_ftracer *tracer = arg; + + pthread_cleanup_push(tracer_cleanup, arg); + + while (tracer->next_line_ind < tracer->saved_lines_size) { + char **lp = &tracer->saved_lines[tracer->next_line_ind]; + enum ftracer_op op; + size_t buf_len = 0; + ssize_t line_len; + + line_len = getline(lp, &buf_len, tracer->trace_pipe); + if (line_len == -1) + break; + + pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); + op = tracer->process_line(*lp); + pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); + + if (tracer->expecting_more) { + pthread_mutex_lock(&tracer->met_all_expected_lock); + if (!tracer->expecting_more()) + pthread_cond_signal(&tracer->met_all_expected); + pthread_mutex_unlock(&tracer->met_all_expected_lock); + } + + if (op == FTRACER_LINE_DISCARD) + continue; + if (op == FTRACER_EXIT) + break; + if (op != FTRACER_LINE_PRESERVE) + test_error("unexpected tracer command %d", op); + + tracer->next_line_ind++; + buf_len = 0; + } + test_print("too many lines in ftracer buffer %zu, exiting tracer", + tracer->next_line_ind); + + pthread_cleanup_pop(1); + return NULL; +} + +static int setup_trace_thread(struct test_ftracer *tracer) +{ + int ret = 0; + char *path; + + path = test_sprintf("%s/trace_pipe", tracer->instance_path); + if (!path) + return -ENOMEM; + + tracer->trace_pipe = fopen(path, "r"); + if (!tracer->trace_pipe) { + ret = -errno; + goto out_free; + } + + if (pthread_create(&tracer->tracer_thread, NULL, + tracer_thread_func, (void *)tracer)) { + ret = -errno; + fclose(tracer->trace_pipe); + } + +out_free: + free(path); + return ret; +} + +static void stop_trace_thread(struct test_ftracer *tracer) +{ + void *res; + + if (pthread_cancel(tracer->tracer_thread)) { + test_print("Can't stop tracer pthread: %m"); + tracer_set_error(tracer, -errno); + } + if (pthread_join(tracer->tracer_thread, &res)) { + test_print("Can't join tracer pthread: %m"); + tracer_set_error(tracer, -errno); + } + if (res != PTHREAD_CANCELED) { + test_print("Tracer thread wasn't canceled"); + tracer_set_error(tracer, -errno); + } + if (tracer->error) + test_fail("tracer errored by %s", strerror(tracer->error)); +} + +static void final_wait_for_events(struct test_ftracer *tracer, + unsigned timeout_sec) +{ + struct timespec timeout; + struct timeval now; + int ret = 0; + + if (!tracer->expecting_more) + return; + + pthread_mutex_lock(&tracer->met_all_expected_lock); + gettimeofday(&now, NULL); + timeout.tv_sec = now.tv_sec + timeout_sec; + timeout.tv_nsec = now.tv_usec * 1000; + + while (tracer->expecting_more() && ret != ETIMEDOUT) + ret = pthread_cond_timedwait(&tracer->met_all_expected, + &tracer->met_all_expected_lock, &timeout); + pthread_mutex_unlock(&tracer->met_all_expected_lock); +} + +int setup_trace_event(struct test_ftracer *tracer, + const char *event, const char *filter) +{ + char *enable_path, *filter_path, *instance = tracer->instance_path; + int ret; + + enable_path = test_sprintf("%s/events/%s/enable", instance, event); + if (!enable_path) + return -ENOMEM; + + filter_path = test_sprintf("%s/events/%s/filter", instance, event); + if (!filter_path) { + ret = -ENOMEM; + goto out_free; + } + + ret = test_echo(filter_path, 0, "%s", filter); + if (!ret) + ret = test_echo(enable_path, 0, "1"); + +out_free: + free(filter_path); + free(enable_path); + return ret; +} + +struct test_ftracer *create_ftracer(const char *name, + enum ftracer_op (*process_line)(const char *line), + void (*destructor)(struct test_ftracer *tracer), + bool (*expecting_more)(void), + size_t lines_buf_sz, size_t buffer_size_kb) +{ + struct test_ftracer *tracer; + int err; + + /* XXX: separate __create_ftracer() helper and do here + * if (!kernel_config_has(KCONFIG_FTRACE)) + * return NULL; + */ + + tracer = malloc(sizeof(*tracer)); + if (!tracer) { + test_print("malloc()"); + return NULL; + } + + memset(tracer, 0, sizeof(*tracer)); + + err = setup_ftrace_instance(tracer, name); + if (err) { + test_print("setup_ftrace_instance(): %d", err); + goto err_free; + } + + err = disable_trace_options(tracer->instance_path); + if (err) { + test_print("disable_trace_options(): %d", err); + goto err_remove; + } + + err = setup_buffer_size(tracer->instance_path, buffer_size_kb); + if (err) { + test_print("disable_trace_options(): %d", err); + goto err_remove; + } + + tracer->saved_lines = calloc(lines_buf_sz, sizeof(tracer->saved_lines[0])); + if (!tracer->saved_lines) { + test_print("calloc()"); + goto err_remove; + } + tracer->saved_lines_size = lines_buf_sz; + + tracer->process_line = process_line; + tracer->destructor = destructor; + tracer->expecting_more = expecting_more; + + err = pthread_cond_init(&tracer->met_all_expected, NULL); + if (err) { + test_print("pthread_cond_init(): %d", err); + goto err_free_lines; + } + + err = pthread_mutex_init(&tracer->met_all_expected_lock, NULL); + if (err) { + test_print("pthread_mutex_init(): %d", err); + goto err_cond_destroy; + } + + err = setup_trace_thread(tracer); + if (err) { + test_print("setup_trace_thread(): %d", err); + goto err_mutex_destroy; + } + + pthread_mutex_lock(&ftracers_lock); + tracer->next = ftracers; + ftracers = tracer; + pthread_mutex_unlock(&ftracers_lock); + + return tracer; + +err_mutex_destroy: + pthread_mutex_destroy(&tracer->met_all_expected_lock); +err_cond_destroy: + pthread_cond_destroy(&tracer->met_all_expected); +err_free_lines: + free(tracer->saved_lines); +err_remove: + remove_ftrace_instance(tracer); +err_free: + free(tracer); + return NULL; +} + +static void __destroy_ftracer(struct test_ftracer *tracer) +{ + size_t i; + + final_wait_for_events(tracer, TEST_TIMEOUT_SEC); + stop_trace_thread(tracer); + remove_ftrace_instance(tracer); + if (tracer->destructor) + tracer->destructor(tracer); + for (i = 0; i < tracer->saved_lines_size; i++) + free(tracer->saved_lines[i]); + pthread_cond_destroy(&tracer->met_all_expected); + pthread_mutex_destroy(&tracer->met_all_expected_lock); + free(tracer); +} + +void destroy_ftracer(struct test_ftracer *tracer) +{ + pthread_mutex_lock(&ftracers_lock); + if (tracer == ftracers) { + ftracers = tracer->next; + } else { + struct test_ftracer *f = ftracers; + + while (f->next != tracer) { + if (!f->next) + test_error("tracers list corruption or double free %p", tracer); + f = f->next; + } + f->next = tracer->next; + } + tracer->next = NULL; + pthread_mutex_unlock(&ftracers_lock); + __destroy_ftracer(tracer); +} + +static void destroy_all_ftracers(void) +{ + struct test_ftracer *f; + + pthread_mutex_lock(&ftracers_lock); + f = ftracers; + ftracers = NULL; + pthread_mutex_unlock(&ftracers_lock); + + while (f) { + struct test_ftracer *n = f->next; + + f->next = NULL; + __destroy_ftracer(f); + f = n; + } +} + +static void test_unset_tracing(void) +{ + destroy_all_ftracers(); + unmount_ftrace(); +} + +int test_setup_tracing(void) +{ + /* + * Just a basic protection - this should be called only once from + * lib/kconfig. Not thread safe, which is fine as it's early, before + * threads are created. + */ + static int already_set; + int err; + + if (already_set) + return -1; + + /* Needs net-namespace cookies for filters */ + if (ns_cookie1 == ns_cookie2) { + test_print("net-namespace cookies: %" PRIu64 " == %" PRIu64 ", can't set up tracing", + ns_cookie1, ns_cookie2); + return -1; + } + + already_set = 1; + + test_add_destructor(test_unset_tracing); + + err = mount_ftrace(); + if (err) { + test_print("failed to mount_ftrace(): %d", err); + return err; + } + + return setup_aolib_ftracer(); +} + +static int get_ns_cookie(int nsfd, uint64_t *out) +{ + int old_ns = switch_save_ns(nsfd); + socklen_t size = sizeof(*out); + int sk; + + sk = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) { + test_print("socket(): %m"); + return -errno; + } + + if (getsockopt(sk, SOL_SOCKET, SO_NETNS_COOKIE, out, &size)) { + test_print("getsockopt(SO_NETNS_COOKIE): %m"); + close(sk); + return -errno; + } + + close(sk); + switch_close_ns(old_ns); + return 0; +} + +void test_init_ftrace(int nsfd1, int nsfd2) +{ + get_ns_cookie(nsfd1, &ns_cookie1); + get_ns_cookie(nsfd2, &ns_cookie2); + /* Populate kernel config state */ + kernel_config_has(KCONFIG_FTRACE); +} diff --git a/tools/testing/selftests/net/tcp_ao/lib/kconfig.c b/tools/testing/selftests/net/tcp_ao/lib/kconfig.c index f279ffc3843b..9f1c175846f8 100644 --- a/tools/testing/selftests/net/tcp_ao/lib/kconfig.c +++ b/tools/testing/selftests/net/tcp_ao/lib/kconfig.c @@ -6,7 +6,7 @@ #include "aolib.h" struct kconfig_t { - int _errno; /* the returned error if not supported */ + int _error; /* negative errno if not supported */ int (*check_kconfig)(int *error); }; @@ -62,7 +62,7 @@ static int has_tcp_ao(int *err) memcpy(&tmp.addr, &addr, sizeof(addr)); *err = 0; if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp)) < 0) { - *err = errno; + *err = -errno; if (errno != ENOPROTOOPT) ret = -errno; } @@ -87,7 +87,7 @@ static int has_tcp_md5(int *err) */ *err = 0; if (test_set_md5(sk, addr_any, 0, -1, DEFAULT_TEST_PASSWORD)) { - *err = errno; + *err = -errno; if (errno != ENOPROTOOPT && errno == ENOMEM) { test_print("setsockopt(TCP_MD5SIG_EXT): %m"); ret = -errno; @@ -116,13 +116,21 @@ static int has_vrfs(int *err) return ret; } +static int has_ftrace(int *err) +{ + *err = test_setup_tracing(); + return 0; +} + +#define KCONFIG_UNKNOWN 1 static pthread_mutex_t kconfig_lock = PTHREAD_MUTEX_INITIALIZER; static struct kconfig_t kconfig[__KCONFIG_LAST__] = { - { -1, has_net_ns }, - { -1, has_veth }, - { -1, has_tcp_ao }, - { -1, has_tcp_md5 }, - { -1, has_vrfs }, + { KCONFIG_UNKNOWN, has_net_ns }, + { KCONFIG_UNKNOWN, has_veth }, + { KCONFIG_UNKNOWN, has_tcp_ao }, + { KCONFIG_UNKNOWN, has_tcp_md5 }, + { KCONFIG_UNKNOWN, has_vrfs }, + { KCONFIG_UNKNOWN, has_ftrace }, }; const char *tests_skip_reason[__KCONFIG_LAST__] = { @@ -131,6 +139,7 @@ const char *tests_skip_reason[__KCONFIG_LAST__] = { "Tests require TCP-AO support (CONFIG_TCP_AO)", "setsockopt(TCP_MD5SIG_EXT) is not supported (CONFIG_TCP_MD5)", "VRFs are not supported (CONFIG_NET_VRF)", + "Ftrace points are not supported (CONFIG_TRACEPOINTS)", }; bool kernel_config_has(enum test_needs_kconfig k) @@ -138,11 +147,11 @@ bool kernel_config_has(enum test_needs_kconfig k) bool ret; pthread_mutex_lock(&kconfig_lock); - if (kconfig[k]._errno == -1) { - if (kconfig[k].check_kconfig(&kconfig[k]._errno)) + if (kconfig[k]._error == KCONFIG_UNKNOWN) { + if (kconfig[k].check_kconfig(&kconfig[k]._error)) test_error("Failed to initialize kconfig %u", k); } - ret = kconfig[k]._errno == 0; + ret = kconfig[k]._error == 0; pthread_mutex_unlock(&kconfig_lock); return ret; } diff --git a/tools/testing/selftests/net/tcp_ao/lib/setup.c b/tools/testing/selftests/net/tcp_ao/lib/setup.c index e408b9243b2c..a27cc03c9fbd 100644 --- a/tools/testing/selftests/net/tcp_ao/lib/setup.c +++ b/tools/testing/selftests/net/tcp_ao/lib/setup.c @@ -111,7 +111,7 @@ static void sig_int(int signo) int open_netns(void) { - const char *netns_path = "/proc/self/ns/net"; + const char *netns_path = "/proc/thread-self/ns/net"; int fd; fd = open(netns_path, O_RDONLY); @@ -142,6 +142,13 @@ int switch_save_ns(int new_ns) return ret; } +void switch_close_ns(int fd) +{ + if (setns(fd, CLONE_NEWNET)) + test_error("setns()"); + close(fd); +} + static int nsfd_outside = -1; static int nsfd_parent = -1; static int nsfd_child = -1; @@ -243,9 +250,9 @@ void __test_init(unsigned int ntests, int family, unsigned int prefix, test_print("rand seed %u", (unsigned int)seed); srand(seed); - ksft_print_header(); init_namespaces(); + test_init_ftrace(nsfd_parent, nsfd_child); if (add_veth(veth_name, nsfd_parent, nsfd_child)) test_error("Failed to add veth"); @@ -296,7 +303,7 @@ static bool is_optmem_namespaced(void) int old_ns = switch_save_ns(nsfd_child); optmem_ns = !access(optmem_file, F_OK); - switch_ns(old_ns); + switch_close_ns(old_ns); } return !!optmem_ns; } @@ -317,7 +324,7 @@ size_t test_get_optmem(void) test_error("can't read from %s", optmem_file); fclose(foptmem); if (!is_optmem_namespaced()) - switch_ns(old_ns); + switch_close_ns(old_ns); return ret; } @@ -339,7 +346,7 @@ static void __test_set_optmem(size_t new, size_t *old) test_error("can't write %zu to %s", new, optmem_file); fclose(foptmem); if (!is_optmem_namespaced()) - switch_ns(old_ns); + switch_close_ns(old_ns); } static void test_revert_optmem(void) diff --git a/tools/testing/selftests/net/tcp_ao/lib/sock.c b/tools/testing/selftests/net/tcp_ao/lib/sock.c index 15aeb0963058..0ffda966c677 100644 --- a/tools/testing/selftests/net/tcp_ao/lib/sock.c +++ b/tools/testing/selftests/net/tcp_ao/lib/sock.c @@ -379,7 +379,6 @@ int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out) key_dump[0].nkeys = nr_keys; key_dump[0].get_all = 1; - key_dump[0].get_all = 1; err = getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, key_dump, &key_dump_sz); if (err) { diff --git a/tools/testing/selftests/net/tcp_ao/lib/utils.c b/tools/testing/selftests/net/tcp_ao/lib/utils.c index 372daca525f5..bdf5522c9213 100644 --- a/tools/testing/selftests/net/tcp_ao/lib/utils.c +++ b/tools/testing/selftests/net/tcp_ao/lib/utils.c @@ -21,6 +21,32 @@ void randomize_buffer(void *buf, size_t buflen) } } +__printf(3, 4) int test_echo(const char *fname, bool append, + const char *fmt, ...) +{ + size_t len, written; + va_list vargs; + char *msg; + FILE *f; + + f = fopen(fname, append ? "a" : "w"); + if (!f) + return -errno; + + va_start(vargs, fmt); + msg = test_snprintf(fmt, vargs); + va_end(vargs); + if (!msg) { + fclose(f); + return -1; + } + len = strlen(msg); + written = fwrite(msg, 1, len, f); + fclose(f); + free(msg); + return written == len ? 0 : -1; +} + const struct sockaddr_in6 addr_any6 = { .sin6_family = AF_INET6, }; diff --git a/tools/testing/selftests/net/tcp_ao/restore.c b/tools/testing/selftests/net/tcp_ao/restore.c index 8fdc808df325..ecc6f1e3a414 100644 --- a/tools/testing/selftests/net/tcp_ao/restore.c +++ b/tools/testing/selftests/net/tcp_ao/restore.c @@ -64,6 +64,7 @@ static void try_server_run(const char *tst_name, unsigned int port, else test_ok("%s: server alive", tst_name); } + synchronize_threads(); /* 3: counters checks */ if (test_get_tcp_ao_counters(sk, &ao2)) test_error("test_get_tcp_ao_counters()"); after_cnt = netstat_get_one(cnt_name, NULL); @@ -71,10 +72,10 @@ static void try_server_run(const char *tst_name, unsigned int port, test_tcp_ao_counters_cmp(tst_name, &ao1, &ao2, cnt_expected); if (after_cnt <= before_cnt) { - test_fail("%s: %s counter did not increase: %zu <= %zu", + test_fail("%s: %s counter did not increase: %" PRIu64 " <= %" PRIu64, tst_name, cnt_name, after_cnt, before_cnt); } else { - test_ok("%s: counter %s increased %zu => %zu", + test_ok("%s: counter %s increased %" PRIu64 " => %" PRIu64, tst_name, cnt_name, before_cnt, after_cnt); } @@ -82,7 +83,7 @@ static void try_server_run(const char *tst_name, unsigned int port, * Before close() as that will send FIN and move the peer in TCP_CLOSE * and that will prevent reading AO counters from the peer's socket. */ - synchronize_threads(); /* 3: verified => closed */ + synchronize_threads(); /* 4: verified => closed */ out: close(sk); } @@ -176,6 +177,7 @@ static void test_sk_restore(const char *tst_name, unsigned int server_port, else test_ok("%s: post-migrate connection is alive", tst_name); } + synchronize_threads(); /* 3: counters checks */ if (test_get_tcp_ao_counters(sk, &ao2)) test_error("test_get_tcp_ao_counters()"); after_cnt = netstat_get_one(cnt_name, NULL); @@ -183,13 +185,13 @@ static void test_sk_restore(const char *tst_name, unsigned int server_port, test_tcp_ao_counters_cmp(tst_name, &ao1, &ao2, cnt_expected); if (after_cnt <= before_cnt) { - test_fail("%s: %s counter did not increase: %zu <= %zu", + test_fail("%s: %s counter did not increase: %" PRIu64 " <= %" PRIu64, tst_name, cnt_name, after_cnt, before_cnt); } else { - test_ok("%s: counter %s increased %zu => %zu", + test_ok("%s: counter %s increased %" PRIu64 " => %" PRIu64, tst_name, cnt_name, before_cnt, after_cnt); } - synchronize_threads(); /* 3: verified => closed */ + synchronize_threads(); /* 4: verified => closed */ close(sk); } @@ -206,22 +208,36 @@ static void *client_fn(void *arg) test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); ao_img.snt_isn += 1; + trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_addr, this_ip_dest, + -1, port, 0, -1, -1, -1, -1, -1, 100, 100, -1); + trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_dest, this_ip_addr, + port, -1, 0, -1, -1, -1, -1, -1, 100, 100, -1); test_sk_restore("TCP-AO with wrong send ISN", port++, &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_BAD); test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); ao_img.rcv_isn += 1; + trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_addr, this_ip_dest, + -1, port, 0, -1, -1, -1, -1, -1, 100, 100, -1); + trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_dest, this_ip_addr, + port, -1, 0, -1, -1, -1, -1, -1, 100, 100, -1); test_sk_restore("TCP-AO with wrong receive ISN", port++, &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_BAD); test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); ao_img.snd_sne += 1; + trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_addr, this_ip_dest, + -1, port, 0, -1, -1, -1, -1, -1, 100, 100, -1); + /* not expecting server => client mismatches as only snd sne is broken */ test_sk_restore("TCP-AO with wrong send SEQ ext number", port++, &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_NS_BAD | TEST_CNT_GOOD); test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); ao_img.rcv_sne += 1; + /* not expecting client => server mismatches as only rcv sne is broken */ + trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_dest, this_ip_addr, + port, -1, 0, -1, -1, -1, -1, -1, 100, 100, -1); test_sk_restore("TCP-AO with wrong receive SEQ ext number", port++, &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_NS_GOOD | TEST_CNT_BAD); @@ -231,6 +247,6 @@ static void *client_fn(void *arg) int main(int argc, char *argv[]) { - test_init(20, server_fn, client_fn); + test_init(21, server_fn, client_fn); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/rst.c b/tools/testing/selftests/net/tcp_ao/rst.c index a2fe88d35ac0..6364facaa63e 100644 --- a/tools/testing/selftests/net/tcp_ao/rst.c +++ b/tools/testing/selftests/net/tcp_ao/rst.c @@ -455,6 +455,6 @@ static void *client_fn(void *arg) int main(int argc, char *argv[]) { - test_init(14, server_fn, client_fn); + test_init(15, server_fn, client_fn); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/self-connect.c b/tools/testing/selftests/net/tcp_ao/self-connect.c index a5698b0a3718..3ecd2b58de6a 100644 --- a/tools/testing/selftests/net/tcp_ao/self-connect.c +++ b/tools/testing/selftests/net/tcp_ao/self-connect.c @@ -87,7 +87,7 @@ static void tcp_self_connect(const char *tst, unsigned int port, netstat_free(ns_after); if (after_aogood <= before_aogood) { - test_fail("%s: TCPAOGood counter mismatch: %zu <= %zu", + test_fail("%s: TCPAOGood counter mismatch: %" PRIu64 " <= %" PRIu64, tst, after_aogood, before_aogood); close(sk); return; @@ -148,7 +148,7 @@ static void tcp_self_connect(const char *tst, unsigned int port, netstat_free(ns_after); close(sk); if (after_aogood <= before_aogood) { - test_fail("%s: TCPAOGood counter mismatch: %zu <= %zu", + test_fail("%s: TCPAOGood counter mismatch: %" PRIu64 " <= %" PRIu64, tst, after_aogood, before_aogood); return; } @@ -163,17 +163,26 @@ static void *client_fn(void *arg) setup_lo_intf("lo"); tcp_self_connect("self-connect(same keyids)", port++, false, false); + + /* expecting rnext to change based on the first segment RNext != Current */ + trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, local_addr, local_addr, + port, port, 0, -1, -1, -1, -1, -1, 7, 5, -1); tcp_self_connect("self-connect(different keyids)", port++, true, false); tcp_self_connect("self-connect(restore)", port, false, true); - port += 2; + port += 2; /* restore test restores over different port */ + trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, local_addr, local_addr, + port, port, 0, -1, -1, -1, -1, -1, 7, 5, -1); + /* intentionally on restore they are added to the socket in different order */ + trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, local_addr, local_addr, + port + 1, port + 1, 0, -1, -1, -1, -1, -1, 5, 7, -1); tcp_self_connect("self-connect(restore, different keyids)", port, true, true); - port += 2; + port += 2; /* restore test restores over different port */ return NULL; } int main(int argc, char *argv[]) { - test_init(4, client_fn, NULL); + test_init(5, client_fn, NULL); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/seq-ext.c b/tools/testing/selftests/net/tcp_ao/seq-ext.c index ad4e77d6823e..8901a6785dc8 100644 --- a/tools/testing/selftests/net/tcp_ao/seq-ext.c +++ b/tools/testing/selftests/net/tcp_ao/seq-ext.c @@ -116,7 +116,15 @@ static void *server_fn(void *arg) sk = test_sk_restore(&img, &ao_img, &saddr, this_ip_dest, client_new_port, &ao1); - synchronize_threads(); /* 5: verify counters during SEQ-number rollover */ + trace_ao_event_sne_expect(TCP_AO_SND_SNE_UPDATE, this_ip_addr, + this_ip_dest, test_server_port + 1, client_new_port, 1); + trace_ao_event_sne_expect(TCP_AO_SND_SNE_UPDATE, this_ip_dest, + this_ip_addr, client_new_port, test_server_port + 1, 1); + trace_ao_event_sne_expect(TCP_AO_RCV_SNE_UPDATE, this_ip_addr, + this_ip_dest, test_server_port + 1, client_new_port, 1); + trace_ao_event_sne_expect(TCP_AO_RCV_SNE_UPDATE, this_ip_dest, + this_ip_addr, client_new_port, test_server_port + 1, 1); + synchronize_threads(); /* 5: verify the connection during SEQ-number rollover */ bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC); if (bytes != quota) { if (bytes > 0) @@ -127,6 +135,7 @@ static void *server_fn(void *arg) test_ok("server alive"); } + synchronize_threads(); /* 6: verify counters after SEQ-number rollover */ if (test_get_tcp_ao_counters(sk, &ao2)) test_error("test_get_tcp_ao_counters()"); after_good = netstat_get_one("TCPAOGood", NULL); @@ -134,15 +143,15 @@ static void *server_fn(void *arg) test_tcp_ao_counters_cmp(NULL, &ao1, &ao2, TEST_CNT_GOOD); if (after_good <= before_good) { - test_fail("TCPAOGood counter did not increase: %zu <= %zu", + test_fail("TCPAOGood counter did not increase: %" PRIu64 " <= %" PRIu64, after_good, before_good); } else { - test_ok("TCPAOGood counter increased %zu => %zu", + test_ok("TCPAOGood counter increased %" PRIu64 " => %" PRIu64, before_good, after_good); } after_bad = netstat_get_one("TCPAOBad", NULL); if (after_bad) - test_fail("TCPAOBad counter is non-zero: %zu", after_bad); + test_fail("TCPAOBad counter is non-zero: %" PRIu64, after_bad); else test_ok("TCPAOBad counter didn't increase"); test_enable_repair(sk); @@ -206,12 +215,13 @@ static void *client_fn(void *arg) sk = test_sk_restore(&img, &ao_img, &saddr, this_ip_dest, test_server_port + 1, &ao1); - synchronize_threads(); /* 5: verify counters during SEQ-number rollover */ + synchronize_threads(); /* 5: verify the connection during SEQ-number rollover */ if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) test_fail("post-migrate verify failed"); else test_ok("post-migrate connection alive"); + synchronize_threads(); /* 5: verify counters after SEQ-number rollover */ if (test_get_tcp_ao_counters(sk, &ao2)) test_error("test_get_tcp_ao_counters()"); after_good = netstat_get_one("TCPAOGood", NULL); @@ -219,15 +229,15 @@ static void *client_fn(void *arg) test_tcp_ao_counters_cmp(NULL, &ao1, &ao2, TEST_CNT_GOOD); if (after_good <= before_good) { - test_fail("TCPAOGood counter did not increase: %zu <= %zu", + test_fail("TCPAOGood counter did not increase: %" PRIu64 " <= %" PRIu64, after_good, before_good); } else { - test_ok("TCPAOGood counter increased %zu => %zu", + test_ok("TCPAOGood counter increased %" PRIu64 " => %" PRIu64, before_good, after_good); } after_bad = netstat_get_one("TCPAOBad", NULL); if (after_bad) - test_fail("TCPAOBad counter is non-zero: %zu", after_bad); + test_fail("TCPAOBad counter is non-zero: %" PRIu64, after_bad); else test_ok("TCPAOBad counter didn't increase"); @@ -240,6 +250,6 @@ static void *client_fn(void *arg) int main(int argc, char *argv[]) { - test_init(7, server_fn, client_fn); + test_init(8, server_fn, client_fn); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c index 517930f9721b..084db4ecdff6 100644 --- a/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c +++ b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c @@ -30,8 +30,8 @@ static void test_vefify_ao_info(int sk, struct tcp_ao_info_opt *info, #define __cmp_ao(member) \ do { \ if (info->member != tmp.member) { \ - test_fail("%s: getsockopt(): " __stringify(member) " %zu != %zu", \ - tst, (size_t)info->member, (size_t)tmp.member); \ + test_fail("%s: getsockopt(): " __stringify(member) " %" PRIu64 " != %" PRIu64, \ + tst, (uint64_t)info->member, (uint64_t)tmp.member); \ return; \ } \ } while(0) @@ -830,6 +830,6 @@ static void *client_fn(void *arg) int main(int argc, char *argv[]) { - test_init(120, client_fn, NULL); + test_init(121, client_fn, NULL); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c index 6b59a652159f..f779e5892bc1 100644 --- a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c +++ b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c @@ -70,6 +70,7 @@ static void try_accept(const char *tst_name, unsigned int port, timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; err = test_wait_fd(lsk, timeout, 0); + synchronize_threads(); /* connect()/accept() timeouts */ if (err == -ETIMEDOUT) { if (!fault(TIMEOUT)) test_fail("timed out for accept()"); @@ -100,10 +101,10 @@ static void try_accept(const char *tst_name, unsigned int port, after_cnt = netstat_get_one(cnt_name, NULL); if (after_cnt <= before_cnt) { - test_fail("%s: %s counter did not increase: %zu <= %zu", + test_fail("%s: %s counter did not increase: %" PRIu64 " <= %" PRIu64, tst_name, cnt_name, after_cnt, before_cnt); } else { - test_ok("%s: counter %s increased %zu => %zu", + test_ok("%s: counter %s increased %" PRIu64 " => %" PRIu64, tst_name, cnt_name, before_cnt, after_cnt); } if (ao_addr) @@ -283,6 +284,7 @@ static void try_connect(const char *tst_name, unsigned int port, timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; ret = _test_connect_socket(sk, this_ip_dest, port, timeout); + synchronize_threads(); /* connect()/accept() timeouts */ if (ret < 0) { if (fault(KEYREJECT) && ret == -EKEYREJECTED) test_ok("%s: connect() was prevented", tst_name); @@ -451,6 +453,7 @@ static void try_to_add(const char *tst_name, unsigned int port, timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; ret = _test_connect_socket(sk, this_ip_dest, port, timeout); + synchronize_threads(); /* connect()/accept() timeouts */ if (ret <= 0) { test_error("%s: connect() returned %d", tst_name, ret); goto out; @@ -671,24 +674,38 @@ static void *client_fn(void *arg) try_connect("AO server (INADDR_ANY): AO client", port++, NULL, 0, &addr_any, 0, 100, 100, 0, 0, 0, &this_ip_addr); + trace_hash_event_expect(TCP_HASH_MD5_UNEXPECTED, this_ip_addr, + this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0); try_connect("AO server (INADDR_ANY): MD5 client", port++, &addr_any, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr); + trace_hash_event_expect(TCP_HASH_AO_REQUIRED, this_ip_addr, + this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0); try_connect("AO server (INADDR_ANY): unsigned client", port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &this_ip_addr); try_connect("AO server (AO_REQUIRED): AO client", port++, NULL, 0, &addr_any, 0, 100, 100, 0, 0, 0, &this_ip_addr); + trace_hash_event_expect(TCP_HASH_AO_REQUIRED, client2, + this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0); try_connect("AO server (AO_REQUIRED): unsigned client", port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &client2); + trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest, + -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1); try_connect("MD5 server (INADDR_ANY): AO client", port++, NULL, 0, &addr_any, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr); try_connect("MD5 server (INADDR_ANY): MD5 client", port++, &addr_any, 0, NULL, 0, 100, 100, 0, 0, 1, &this_ip_addr); + trace_hash_event_expect(TCP_HASH_MD5_REQUIRED, this_ip_addr, + this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0); try_connect("MD5 server (INADDR_ANY): no sign client", port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr); + trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest, + -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1); try_connect("no sign server: AO client", port++, NULL, 0, &addr_any, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &this_ip_addr); + trace_hash_event_expect(TCP_HASH_MD5_UNEXPECTED, this_ip_addr, + this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0); try_connect("no sign server: MD5 client", port++, &addr_any, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr); try_connect("no sign server: no sign client", port++, NULL, 0, @@ -696,25 +713,37 @@ static void *client_fn(void *arg) try_connect("AO+MD5 server: AO client (matching)", port++, NULL, 0, &addr_any, 0, 100, 100, 0, 0, 1, &client2); + trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest, + -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1); try_connect("AO+MD5 server: AO client (misconfig, matching MD5)", port++, NULL, 0, &addr_any, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr); + trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, client3, this_ip_dest, + -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1); try_connect("AO+MD5 server: AO client (misconfig, non-matching)", port++, NULL, 0, &addr_any, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &client3); try_connect("AO+MD5 server: MD5 client (matching)", port++, &addr_any, 0, NULL, 0, 100, 100, 0, 0, 1, &this_ip_addr); + trace_hash_event_expect(TCP_HASH_MD5_UNEXPECTED, client2, + this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0); try_connect("AO+MD5 server: MD5 client (misconfig, matching AO)", port++, &addr_any, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &client2); + trace_hash_event_expect(TCP_HASH_MD5_UNEXPECTED, client3, + this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0); try_connect("AO+MD5 server: MD5 client (misconfig, non-matching)", port++, &addr_any, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &client3); try_connect("AO+MD5 server: no sign client (unmatched)", port++, NULL, 0, NULL, 0, 100, 100, 0, 0, 1, &client3); + trace_hash_event_expect(TCP_HASH_AO_REQUIRED, client2, + this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0); try_connect("AO+MD5 server: no sign client (misconfig, matching AO)", port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &client2); + trace_hash_event_expect(TCP_HASH_MD5_REQUIRED, this_ip_addr, + this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0); try_connect("AO+MD5 server: no sign client (misconfig, matching MD5)", port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr); @@ -736,6 +765,6 @@ static void *client_fn(void *arg) int main(int argc, char *argv[]) { - test_init(72, server_fn, client_fn); + test_init(73, server_fn, client_fn); return 0; } diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c index ec60a16c9307..d626f22f9550 100644 --- a/tools/testing/selftests/net/txtimestamp.c +++ b/tools/testing/selftests/net/txtimestamp.c @@ -356,8 +356,12 @@ static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len) } } - if (batch > 1) + if (batch > 1) { fprintf(stderr, "batched %d timestamps\n", batch); + } else if (!batch) { + fprintf(stderr, "Failed to report timestamps\n"); + test_failed = true; + } } static int recv_errmsg(int fd) diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index 11a1ebda564f..d5ffd8c9172e 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -7,8 +7,6 @@ source net_helper.sh readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" -BPF_FILE="xdp_dummy.bpf.o" - # set global exit status, but never reset nonzero one. check_err() { @@ -38,7 +36,7 @@ cfg_veth() { ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24 ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad ip -netns "${PEER_NS}" link set dev veth1 up - ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp + ip netns exec "${PEER_NS}" ethtool -K veth1 gro on } run_one() { @@ -46,17 +44,19 @@ run_one() { local -r all="$@" local -r tx_args=${all%rx*} local -r rx_args=${all#*rx} + local ret=0 cfg_veth - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} && \ - echo "ok" || \ - echo "failed" & + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} & + local PID1=$! wait_local_port_listen ${PEER_NS} 8000 udp ./udpgso_bench_tx ${tx_args} - ret=$? - wait $(jobs -p) + check_err $? + wait ${PID1} + check_err $? + [ "$ret" -eq 0 ] && echo "ok" || echo "failed" return $ret } @@ -73,6 +73,7 @@ run_one_nat() { local -r all="$@" local -r tx_args=${all%rx*} local -r rx_args=${all#*rx} + local ret=0 if [[ ${tx_args} = *-4* ]]; then ipt_cmd=iptables @@ -93,16 +94,17 @@ run_one_nat() { # ... so that GRO will match the UDP_GRO enabled socket, but packets # will land on the 'plain' one ip netns exec "${PEER_NS}" ./udpgso_bench_rx -G ${family} -b ${addr1} -n 0 & - pid=$! - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${family} -b ${addr2%/*} ${rx_args} && \ - echo "ok" || \ - echo "failed"& + local PID1=$! + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${family} -b ${addr2%/*} ${rx_args} & + local PID2=$! wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} - ret=$? - kill -INT $pid - wait $(jobs -p) + check_err $? + kill -INT ${PID1} + wait ${PID2} + check_err $? + [ "$ret" -eq 0 ] && echo "ok" || echo "failed" return $ret } @@ -111,20 +113,26 @@ run_one_2sock() { local -r all="$@" local -r tx_args=${all%rx*} local -r rx_args=${all#*rx} + local ret=0 cfg_veth ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} -p 12345 & - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 10 ${rx_args} && \ - echo "ok" || \ - echo "failed" & + local PID1=$! + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 10 ${rx_args} & + local PID2=$! wait_local_port_listen "${PEER_NS}" 12345 udp ./udpgso_bench_tx ${tx_args} -p 12345 + check_err $? wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} - ret=$? - wait $(jobs -p) + check_err $? + wait ${PID1} + check_err $? + wait ${PID2} + check_err $? + [ "$ret" -eq 0 ] && echo "ok" || echo "failed" return $ret } @@ -196,11 +204,6 @@ run_all() { return $ret } -if [ ! -f ${BPF_FILE} ]; then - echo "Missing ${BPF_FILE}. Run 'make' first" - exit -1 -fi - if [[ $# -eq 0 ]]; then run_all elif [[ $1 == "__subprocess" ]]; then diff --git a/tools/testing/selftests/net/unicast_extensions.sh b/tools/testing/selftests/net/unicast_extensions.sh index f52aa5f7da52..3e751234ccfe 100755 --- a/tools/testing/selftests/net/unicast_extensions.sh +++ b/tools/testing/selftests/net/unicast_extensions.sh @@ -30,14 +30,7 @@ source lib.sh -# nettest can be run from PATH or from same directory as this selftest -if ! which nettest >/dev/null; then - PATH=$PWD:$PATH - if ! which nettest >/dev/null; then - echo "'nettest' command not found; skipping tests" - exit $ksft_skip - fi -fi +check_gen_prog "nettest" result=0 diff --git a/tools/testing/selftests/net/vrf_route_leaking.sh b/tools/testing/selftests/net/vrf_route_leaking.sh index 152171fb1fc8..e9c2f71da207 100755 --- a/tools/testing/selftests/net/vrf_route_leaking.sh +++ b/tools/testing/selftests/net/vrf_route_leaking.sh @@ -59,7 +59,6 @@ # while it is forwarded between different vrfs. source lib.sh -PATH=$PWD:$PWD/tools/testing/selftests/net:$PATH VERBOSE=0 PAUSE_ON_FAIL=no DEFAULT_TTYPE=sym @@ -636,6 +635,8 @@ EOF # Some systems don't have a ping6 binary anymore command -v ping6 > /dev/null 2>&1 && ping6=$(command -v ping6) || ping6=$(command -v ping) +check_gen_prog "nettest" + TESTS_IPV4="ipv4_ping_ttl ipv4_traceroute ipv4_ping_frag ipv4_ping_local ipv4_tcp_local ipv4_udp_local ipv4_ping_ttl_asym ipv4_traceroute_asym" TESTS_IPV6="ipv6_ping_ttl ipv6_traceroute ipv6_ping_local ipv6_tcp_local ipv6_udp_local diff --git a/tools/testing/selftests/net/xfrm_policy_add_speed.sh b/tools/testing/selftests/net/xfrm_policy_add_speed.sh new file mode 100755 index 000000000000..2fab29d3cb91 --- /dev/null +++ b/tools/testing/selftests/net/xfrm_policy_add_speed.sh @@ -0,0 +1,83 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +source lib.sh + +timeout=4m +ret=0 +tmp=$(mktemp) +cleanup() { + cleanup_all_ns + rm -f "$tmp" +} + +trap cleanup EXIT + +maxpolicies=100000 +[ "$KSFT_MACHINE_SLOW" = "yes" ] && maxpolicies=10000 + +do_dummies4() { + local dir="$1" + local max="$2" + + local policies + local pfx + pfx=30 + policies=0 + + ip netns exec "$ns" ip xfrm policy flush + + for i in $(seq 1 100);do + local s + local d + for j in $(seq 1 255);do + s=$((i+0)) + d=$((i+100)) + + for a in $(seq 1 8 255); do + policies=$((policies+1)) + [ "$policies" -gt "$max" ] && return + echo xfrm policy add src 10.$s.$j.0/30 dst 10.$d.$j.$a/$pfx dir $dir action block + done + for a in $(seq 1 8 255); do + policies=$((policies+1)) + [ "$policies" -gt "$max" ] && return + echo xfrm policy add src 10.$s.$j.$a/30 dst 10.$d.$j.0/$pfx dir $dir action block + done + done + done +} + +setup_ns ns + +do_bench() +{ + local max="$1" + + start=$(date +%s%3N) + do_dummies4 "out" "$max" > "$tmp" + if ! timeout "$timeout" ip netns exec "$ns" ip -batch "$tmp";then + echo "WARNING: policy insertion cancelled after $timeout" + ret=1 + fi + stop=$(date +%s%3N) + + result=$((stop-start)) + + policies=$(wc -l < "$tmp") + printf "Inserted %-06s policies in $result ms\n" $policies + + have=$(ip netns exec "$ns" ip xfrm policy show | grep "action block" | wc -l) + if [ "$have" -ne "$policies" ]; then + echo "WARNING: mismatch, have $have policies, expected $policies" + ret=1 + fi +} + +p=100 +while [ $p -le "$maxpolicies" ]; do + do_bench "$p" + p="${p}0" +done + +exit $ret diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile index 3fbabab46958..8de98ea7af80 100644 --- a/tools/testing/selftests/nolibc/Makefile +++ b/tools/testing/selftests/nolibc/Makefile @@ -1,19 +1,21 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for nolibc tests -include ../../../scripts/Makefile.include -include ../../../scripts/utilities.mak -# We need this for the "cc-option" macro. -include ../../../build/Build.include +# we're in ".../tools/testing/selftests/nolibc" +ifeq ($(srctree),) +srctree := $(patsubst %/tools/testing/selftests/,%,$(dir $(CURDIR))) +endif + +include $(srctree)/tools/scripts/utilities.mak +# We need this for the "__cc-option" macro. +include $(srctree)/scripts/Makefile.compiler ifneq ($(O),) ifneq ($(call is-absolute,$(O)),y) $(error Only absolute O= parameters are supported) endif -endif - -# we're in ".../tools/testing/selftests/nolibc" -ifeq ($(srctree),) -srctree := $(patsubst %/tools/testing/selftests/,%,$(dir $(CURDIR))) +objtree := $(O) +else +objtree ?= $(srctree) endif ifeq ($(ARCH),) @@ -21,7 +23,7 @@ include $(srctree)/scripts/subarch.include ARCH = $(SUBARCH) endif -objtree ?= $(srctree) +cc-option = $(call __cc-option, $(CC),$(CLANG_CROSS_FLAGS),$(1),$(2)) # XARCH extends the kernel's ARCH with a few variants of the same # architecture that only differ by the configuration, the toolchain @@ -155,9 +157,22 @@ CFLAGS ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 -W -Wall -Wex $(CFLAGS_$(XARCH)) $(CFLAGS_STACKPROTECTOR) $(CFLAGS_EXTRA) LDFLAGS := +LIBGCC := -lgcc + +ifneq ($(LLVM),) +# Not needed for clang +LIBGCC := +endif + +# Modify CFLAGS based on LLVM= +include $(srctree)/tools/scripts/Makefile.include + +# GCC uses "s390", clang "systemz" +CLANG_CROSS_FLAGS := $(subst --target=s390-linux,--target=systemz-linux,$(CLANG_CROSS_FLAGS)) + REPORT ?= awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{if (!f) printf("\n"); f++; print;} /\[SKIPPED\][\r]*$$/{s++} \ END{ printf("\n%3d test(s): %3d passed, %3d skipped, %3d failed => status: ", p+s+f, p, s, f); \ - if (f) printf("failure\n"); else if (s) printf("warning\n"); else printf("success\n");; \ + if (f || !p) printf("failure\n"); else if (s) printf("warning\n"); else printf("success\n");; \ printf("\nSee all results in %s\n", ARGV[1]); }' help: @@ -204,11 +219,11 @@ sysroot/$(ARCH)/include: ifneq ($(NOLIBC_SYSROOT),0) nolibc-test: nolibc-test.c nolibc-test-linkage.c sysroot/$(ARCH)/include $(QUIET_CC)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ \ - -nostdlib -nostdinc -static -Isysroot/$(ARCH)/include nolibc-test.c nolibc-test-linkage.c -lgcc + -nostdlib -nostdinc -static -Isysroot/$(ARCH)/include nolibc-test.c nolibc-test-linkage.c $(LIBGCC) else nolibc-test: nolibc-test.c nolibc-test-linkage.c $(QUIET_CC)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ \ - -nostdlib -static -include $(srctree)/tools/include/nolibc/nolibc.h nolibc-test.c nolibc-test-linkage.c -lgcc + -nostdlib -static -include $(srctree)/tools/include/nolibc/nolibc.h nolibc-test.c nolibc-test-linkage.c $(LIBGCC) endif libc-test: nolibc-test.c nolibc-test-linkage.c diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index 093d0512f4c5..6fba7025c5e3 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -542,7 +542,7 @@ int expect_strzr(const char *expr, int llen) { int ret = 0; - llen += printf(" = <%s> ", expr); + llen += printf(" = <%s> ", expr ? expr : "(null)"); if (expr) { ret = 1; result(llen, FAIL); @@ -561,7 +561,7 @@ int expect_strnz(const char *expr, int llen) { int ret = 0; - llen += printf(" = <%s> ", expr); + llen += printf(" = <%s> ", expr ? expr : "(null)"); if (!expr) { ret = 1; result(llen, FAIL); @@ -686,9 +686,10 @@ static void constructor1(void) } __attribute__((constructor)) -static void constructor2(void) +static void constructor2(int argc, char **argv, char **envp) { - constructor_test_value *= 2; + if (argc && argv && envp) + constructor_test_value *= 2; } int run_startup(int min, int max) diff --git a/tools/testing/selftests/nolibc/run-tests.sh b/tools/testing/selftests/nolibc/run-tests.sh index 0446e6326a40..e7ecda4ae796 100755 --- a/tools/testing/selftests/nolibc/run-tests.sh +++ b/tools/testing/selftests/nolibc/run-tests.sh @@ -15,10 +15,11 @@ download_location="${cache_dir}/crosstools/" build_location="$(realpath "${cache_dir}"/nolibc-tests/)" perform_download=0 test_mode=system -CFLAGS_EXTRA="-Werror" +werror=1 +llvm= archs="i386 x86_64 arm64 arm mips32le mips32be ppc ppc64 ppc64le riscv s390 loongarch" -TEMP=$(getopt -o 'j:d:c:b:a:m:peh' -n "$0" -- "$@") +TEMP=$(getopt -o 'j:d:c:b:a:m:pelh' -n "$0" -- "$@") eval set -- "$TEMP" unset TEMP @@ -42,6 +43,7 @@ Options: -b [DIR] Build location (default: ${build_location}) -m [MODE] Test mode user/system (default: ${test_mode}) -e Disable -Werror + -l Build with LLVM/clang EOF } @@ -69,7 +71,10 @@ while true; do test_mode="$2" shift 2; continue ;; '-e') - CFLAGS_EXTRA="" + werror=0 + shift; continue ;; + '-l') + llvm=1 shift; continue ;; '-h') print_usage @@ -140,7 +145,10 @@ test_arch() { ct_abi=$(crosstool_abi "$1") cross_compile=$(realpath "${download_location}gcc-${crosstool_version}-nolibc/${ct_arch}-${ct_abi}/bin/${ct_arch}-${ct_abi}-") build_dir="${build_location}/${arch}" - MAKE=(make -j"${nproc}" XARCH="${arch}" CROSS_COMPILE="${cross_compile}" O="${build_dir}") + if [ "$werror" -ne 0 ]; then + CFLAGS_EXTRA="$CFLAGS_EXTRA -Werror" + fi + MAKE=(make -j"${nproc}" XARCH="${arch}" CROSS_COMPILE="${cross_compile}" LLVM="${llvm}" O="${build_dir}") mkdir -p "$build_dir" if [ "$test_mode" = "system" ] && [ ! -f "${build_dir}/.config" ]; then diff --git a/tools/testing/selftests/powerpc/benchmarks/Makefile b/tools/testing/selftests/powerpc/benchmarks/Makefile index 1321922038d0..ca4483c238b9 100644 --- a/tools/testing/selftests/powerpc/benchmarks/Makefile +++ b/tools/testing/selftests/powerpc/benchmarks/Makefile @@ -18,4 +18,4 @@ $(OUTPUT)/context_switch: LDLIBS += -lpthread $(OUTPUT)/fork: LDLIBS += -lpthread -$(OUTPUT)/exec_target: CFLAGS += -static -nostartfiles +$(OUTPUT)/exec_target: CFLAGS += -nostartfiles diff --git a/tools/testing/selftests/powerpc/benchmarks/exec_target.c b/tools/testing/selftests/powerpc/benchmarks/exec_target.c index c14b0fc1edde..a6408d3f26cd 100644 --- a/tools/testing/selftests/powerpc/benchmarks/exec_target.c +++ b/tools/testing/selftests/powerpc/benchmarks/exec_target.c @@ -7,10 +7,22 @@ */ #define _GNU_SOURCE -#include <unistd.h> #include <sys/syscall.h> void _start(void) { - syscall(SYS_exit, 0); + asm volatile ( + "li %%r0, %[sys_exit];" + "li %%r3, 0;" + "sc;" + : + : [sys_exit] "i" (SYS_exit) + /* + * "sc" will clobber r0, r3-r13, cr0, ctr, xer and memory. + * Even though sys_exit never returns, handle clobber + * registers. + */ + : "r0", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", + "r11", "r12", "r13", "cr0", "ctr", "xer", "memory" + ); } diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh index b33cd8753689..ad79784e552d 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh @@ -68,6 +68,8 @@ config_override_param "--gdb options" KcList "$TORTURE_KCONFIG_GDB_ARG" config_override_param "--kasan options" KcList "$TORTURE_KCONFIG_KASAN_ARG" config_override_param "--kcsan options" KcList "$TORTURE_KCONFIG_KCSAN_ARG" config_override_param "--kconfig argument" KcList "$TORTURE_KCONFIG_ARG" +config_override_param "$config_dir/CFcommon.$(uname -m)" KcList \ + "`cat $config_dir/CFcommon.$(uname -m) 2> /dev/null`" cp $T/KcList $resdir/ConfigFragment base_resdir=`echo $resdir | sed -e 's/\.[0-9]\+$//'` diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index 990d24696fd3..0447c4a00cc4 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -19,10 +19,10 @@ PATH=${RCUTORTURE}/bin:$PATH; export PATH TORTURE_ALLOTED_CPUS="`identify_qemu_vcpus`" MAKE_ALLOTED_CPUS=$((TORTURE_ALLOTED_CPUS*2)) -HALF_ALLOTED_CPUS=$((TORTURE_ALLOTED_CPUS/2)) -if test "$HALF_ALLOTED_CPUS" -lt 1 +SCALE_ALLOTED_CPUS=$((TORTURE_ALLOTED_CPUS/2)) +if test "$SCALE_ALLOTED_CPUS" -lt 1 then - HALF_ALLOTED_CPUS=1 + SCALE_ALLOTED_CPUS=1 fi VERBOSE_BATCH_CPUS=$((TORTURE_ALLOTED_CPUS/16)) if test "$VERBOSE_BATCH_CPUS" -lt 2 @@ -90,6 +90,7 @@ usage () { echo " --do-scftorture / --do-no-scftorture / --no-scftorture" echo " --do-srcu-lockdep / --do-no-srcu-lockdep / --no-srcu-lockdep" echo " --duration [ <minutes> | <hours>h | <days>d ]" + echo " --guest-cpu-limit N" echo " --kcsan-kmake-arg kernel-make-arguments" exit 1 } @@ -203,6 +204,21 @@ do duration_base=$(($ts*mult)) shift ;; + --guest-cpu-limit|--guest-cpu-lim) + checkarg --guest-cpu-limit "(number)" "$#" "$2" '^[0-9]*$' '^--' + if (("$2" <= "$TORTURE_ALLOTED_CPUS" / 2)) + then + SCALE_ALLOTED_CPUS="$2" + VERBOSE_BATCH_CPUS="$((SCALE_ALLOTED_CPUS/8))" + if (("$VERBOSE_BATCH_CPUS" < 2)) + then + VERBOSE_BATCH_CPUS=0 + fi + else + echo "Ignoring value of $2 for --guest-cpu-limit which is greater than (("$TORTURE_ALLOTED_CPUS" / 2))." + fi + shift + ;; --kcsan-kmake-arg|--kcsan-kmake-args) checkarg --kcsan-kmake-arg "(kernel make arguments)" $# "$2" '.*' '^error$' kcsan_kmake_args="`echo "$kcsan_kmake_args $2" | sed -e 's/^ *//' -e 's/ *$//'`" @@ -425,9 +441,9 @@ fi if test "$do_scftorture" = "yes" then # Scale memory based on the number of CPUs. - scfmem=$((3+HALF_ALLOTED_CPUS/16)) - torture_bootargs="scftorture.nthreads=$HALF_ALLOTED_CPUS torture.disable_onoff_at_boot csdlock_debug=1" - torture_set "scftorture" tools/testing/selftests/rcutorture/bin/kvm.sh --torture scf --allcpus --duration "$duration_scftorture" --configs "$configs_scftorture" --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory ${scfmem}G --trust-make + scfmem=$((3+SCALE_ALLOTED_CPUS/16)) + torture_bootargs="scftorture.nthreads=$SCALE_ALLOTED_CPUS torture.disable_onoff_at_boot csdlock_debug=1" + torture_set "scftorture" tools/testing/selftests/rcutorture/bin/kvm.sh --torture scf --allcpus --duration "$duration_scftorture" --configs "$configs_scftorture" --kconfig "CONFIG_NR_CPUS=$SCALE_ALLOTED_CPUS" --memory ${scfmem}G --trust-make fi if test "$do_rt" = "yes" @@ -471,8 +487,8 @@ for prim in $primlist do if test -n "$firsttime" then - torture_bootargs="refscale.scale_type="$prim" refscale.nreaders=$HALF_ALLOTED_CPUS refscale.loops=10000 refscale.holdoff=20 torture.disable_onoff_at_boot" - torture_set "refscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture refscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --bootargs "refscale.verbose_batched=$VERBOSE_BATCH_CPUS torture.verbose_sleep_frequency=8 torture.verbose_sleep_duration=$VERBOSE_BATCH_CPUS" --trust-make + torture_bootargs="refscale.scale_type="$prim" refscale.nreaders=$SCALE_ALLOTED_CPUS refscale.loops=10000 refscale.holdoff=20 torture.disable_onoff_at_boot" + torture_set "refscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture refscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$SCALE_ALLOTED_CPUS" --bootargs "refscale.verbose_batched=$VERBOSE_BATCH_CPUS torture.verbose_sleep_frequency=8 torture.verbose_sleep_duration=$VERBOSE_BATCH_CPUS" --trust-make mv $T/last-resdir-nodebug $T/first-resdir-nodebug || : if test -f "$T/last-resdir-kasan" then @@ -520,8 +536,8 @@ for prim in $primlist do if test -n "$firsttime" then - torture_bootargs="rcuscale.scale_type="$prim" rcuscale.nwriters=$HALF_ALLOTED_CPUS rcuscale.holdoff=20 torture.disable_onoff_at_boot" - torture_set "rcuscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --trust-make + torture_bootargs="rcuscale.scale_type="$prim" rcuscale.nwriters=$SCALE_ALLOTED_CPUS rcuscale.holdoff=20 torture.disable_onoff_at_boot" + torture_set "rcuscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$SCALE_ALLOTED_CPUS" --trust-make mv $T/last-resdir-nodebug $T/first-resdir-nodebug || : if test -f "$T/last-resdir-kasan" then @@ -559,7 +575,7 @@ do_kcsan="$do_kcsan_save" if test "$do_kvfree" = "yes" then torture_bootargs="rcuscale.kfree_rcu_test=1 rcuscale.kfree_nthreads=16 rcuscale.holdoff=20 rcuscale.kfree_loops=10000 torture.disable_onoff_at_boot" - torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration $duration_rcutorture --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 2G --trust-make + torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration $duration_rcutorture --kconfig "CONFIG_NR_CPUS=$SCALE_ALLOTED_CPUS" --memory 2G --trust-make fi if test "$do_clocksourcewd" = "yes" diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon index 0e92d85313aa..217597e84905 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon +++ b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon @@ -1,7 +1,5 @@ CONFIG_RCU_TORTURE_TEST=y CONFIG_PRINTK_TIME=y -CONFIG_HYPERVISOR_GUEST=y CONFIG_PARAVIRT=y -CONFIG_KVM_GUEST=y CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon.i686 b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon.i686 new file mode 100644 index 000000000000..d8b2f555686f --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon.i686 @@ -0,0 +1,2 @@ +CONFIG_HYPERVISOR_GUEST=y +CONFIG_KVM_GUEST=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon.ppc64le b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon.ppc64le new file mode 100644 index 000000000000..133da04247ee --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon.ppc64le @@ -0,0 +1 @@ +CONFIG_KVM_GUEST=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon.x86_64 b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon.x86_64 new file mode 100644 index 000000000000..d8b2f555686f --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon.x86_64 @@ -0,0 +1,2 @@ +CONFIG_HYPERVISOR_GUEST=y +CONFIG_KVM_GUEST=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot index 979edbf4c820..55ce305b2a3d 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot @@ -2,3 +2,4 @@ nohz_full=2-9 rcutorture.stall_cpu=14 rcutorture.stall_cpu_holdoff=90 rcutorture.fwd_progress=0 +rcutree.nohz_full_patience_delay=1000 diff --git a/tools/testing/selftests/rcutorture/configs/refscale/TINY b/tools/testing/selftests/rcutorture/configs/refscale/TINY new file mode 100644 index 000000000000..759343980b80 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/refscale/TINY @@ -0,0 +1,20 @@ +CONFIG_SMP=n +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +CONFIG_PREEMPT_DYNAMIC=n +#CHECK#CONFIG_PREEMPT_RCU=n +CONFIG_HZ_PERIODIC=n +CONFIG_NO_HZ_IDLE=y +CONFIG_NO_HZ_FULL=n +CONFIG_HOTPLUG_CPU=n +CONFIG_SUSPEND=n +CONFIG_HIBERNATION=n +CONFIG_RCU_NOCB_CPU=n +CONFIG_DEBUG_LOCK_ALLOC=n +CONFIG_PROVE_LOCKING=n +CONFIG_RCU_BOOST=n +CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_RCU_EXPERT=y +CONFIG_KPROBES=n +CONFIG_FTRACE=n diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c index 742782438ca3..94cfdba5308d 100644 --- a/tools/testing/selftests/resctrl/cat_test.c +++ b/tools/testing/selftests/resctrl/cat_test.c @@ -290,12 +290,12 @@ static int cat_run_test(const struct resctrl_test *test, const struct user_param static bool arch_supports_noncont_cat(const struct resctrl_test *test) { - unsigned int eax, ebx, ecx, edx; - /* AMD always supports non-contiguous CBM. */ if (get_vendor() == ARCH_AMD) return true; +#if defined(__i386__) || defined(__x86_64__) /* arch */ + unsigned int eax, ebx, ecx, edx; /* Intel support for non-contiguous CBM needs to be discovered. */ if (!strcmp(test->resource, "L3")) __cpuid_count(0x10, 1, eax, ebx, ecx, edx); @@ -305,6 +305,9 @@ static bool arch_supports_noncont_cat(const struct resctrl_test *test) return false; return ((ecx >> 3) & 1); +#endif /* end arch */ + + return false; } static int noncont_cat_run_test(const struct resctrl_test *test, diff --git a/tools/testing/selftests/riscv/mm/mmap_bottomup.c b/tools/testing/selftests/riscv/mm/mmap_bottomup.c index 7f7d3eb8b9c9..f9ccae50349b 100644 --- a/tools/testing/selftests/riscv/mm/mmap_bottomup.c +++ b/tools/testing/selftests/riscv/mm/mmap_bottomup.c @@ -7,8 +7,6 @@ TEST(infinite_rlimit) { EXPECT_EQ(BOTTOM_UP, memory_layout()); - - TEST_MMAPS; } TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/riscv/mm/mmap_default.c b/tools/testing/selftests/riscv/mm/mmap_default.c index 2ba3ec990006..3f53b6ecc326 100644 --- a/tools/testing/selftests/riscv/mm/mmap_default.c +++ b/tools/testing/selftests/riscv/mm/mmap_default.c @@ -7,8 +7,6 @@ TEST(default_rlimit) { EXPECT_EQ(TOP_DOWN, memory_layout()); - - TEST_MMAPS; } TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/riscv/mm/mmap_test.h b/tools/testing/selftests/riscv/mm/mmap_test.h index 3b29ca3bb3d4..75918d15919f 100644 --- a/tools/testing/selftests/riscv/mm/mmap_test.h +++ b/tools/testing/selftests/riscv/mm/mmap_test.h @@ -10,76 +10,9 @@ #define TOP_DOWN 0 #define BOTTOM_UP 1 -#if __riscv_xlen == 64 -uint64_t random_addresses[] = { - 0x19764f0d73b3a9f0, 0x016049584cecef59, 0x3580bdd3562f4acd, - 0x1164219f20b17da0, 0x07d97fcb40ff2373, 0x76ec528921272ee7, - 0x4dd48c38a3de3f70, 0x2e11415055f6997d, 0x14b43334ac476c02, - 0x375a60795aff19f6, 0x47f3051725b8ee1a, 0x4e697cf240494a9f, - 0x456b59b5c2f9e9d1, 0x101724379d63cb96, 0x7fe9ad31619528c1, - 0x2f417247c495c2ea, 0x329a5a5b82943a5e, 0x06d7a9d6adcd3827, - 0x327b0b9ee37f62d5, 0x17c7b1851dfd9b76, 0x006ebb6456ec2cd9, - 0x00836cd14146a134, 0x00e5c4dcde7126db, 0x004c29feadf75753, - 0x00d8b20149ed930c, 0x00d71574c269387a, 0x0006ebe4a82acb7a, - 0x0016135df51f471b, 0x00758bdb55455160, 0x00d0bdd949b13b32, - 0x00ecea01e7c5f54b, 0x00e37b071b9948b1, 0x0011fdd00ff57ab3, - 0x00e407294b52f5ea, 0x00567748c200ed20, 0x000d073084651046, - 0x00ac896f4365463c, 0x00eb0d49a0b26216, 0x0066a2564a982a31, - 0x002e0d20237784ae, 0x0000554ff8a77a76, 0x00006ce07a54c012, - 0x000009570516d799, 0x00000954ca15b84d, 0x0000684f0d453379, - 0x00002ae5816302b5, 0x0000042403fb54bf, 0x00004bad7392bf30, - 0x00003e73bfa4b5e3, 0x00005442c29978e0, 0x00002803f11286b6, - 0x000073875d745fc6, 0x00007cede9cb8240, 0x000027df84cc6a4f, - 0x00006d7e0e74242a, 0x00004afd0b836e02, 0x000047d0e837cd82, - 0x00003b42405efeda, 0x00001531bafa4c95, 0x00007172cae34ac4, -}; -#else -uint32_t random_addresses[] = { - 0x8dc302e0, 0x929ab1e0, 0xb47683ba, 0xea519c73, 0xa19f1c90, 0xc49ba213, - 0x8f57c625, 0xadfe5137, 0x874d4d95, 0xaa20f09d, 0xcf21ebfc, 0xda7737f1, - 0xcedf392a, 0x83026c14, 0xccedca52, 0xc6ccf826, 0xe0cd9415, 0x997472ca, - 0xa21a44c1, 0xe82196f5, 0xa23fd66b, 0xc28d5590, 0xd009cdce, 0xcf0be646, - 0x8fc8c7ff, 0xe2a85984, 0xa3d3236b, 0x89a0619d, 0xc03db924, 0xb5d4cc1b, - 0xb96ee04c, 0xd191da48, 0xb432a000, 0xaa2bebbc, 0xa2fcb289, 0xb0cca89b, - 0xb0c18d6a, 0x88f58deb, 0xa4d42d1c, 0xe4d74e86, 0x99902b09, 0x8f786d31, - 0xbec5e381, 0x9a727e65, 0xa9a65040, 0xa880d789, 0x8f1b335e, 0xfc821c1e, - 0x97e34be4, 0xbbef84ed, 0xf447d197, 0xfd7ceee2, 0xe632348d, 0xee4590f4, - 0x958992a5, 0xd57e05d6, 0xfd240970, 0xc5b0dcff, 0xd96da2c2, 0xa7ae041d, -}; -#endif - -// Only works on 64 bit -#if __riscv_xlen == 64 #define PROT (PROT_READ | PROT_WRITE) #define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS) -/* mmap must return a value that doesn't use more bits than the hint address. */ -static inline unsigned long get_max_value(unsigned long input) -{ - unsigned long max_bit = (1UL << (((sizeof(unsigned long) * 8) - 1 - - __builtin_clzl(input)))); - - return max_bit + (max_bit - 1); -} - -#define TEST_MMAPS \ - ({ \ - void *mmap_addr; \ - for (int i = 0; i < ARRAY_SIZE(random_addresses); i++) { \ - mmap_addr = mmap((void *)random_addresses[i], \ - 5 * sizeof(int), PROT, FLAGS, 0, 0); \ - EXPECT_NE(MAP_FAILED, mmap_addr); \ - EXPECT_GE((void *)get_max_value(random_addresses[i]), \ - mmap_addr); \ - mmap_addr = mmap((void *)random_addresses[i], \ - 5 * sizeof(int), PROT, FLAGS, 0, 0); \ - EXPECT_NE(MAP_FAILED, mmap_addr); \ - EXPECT_GE((void *)get_max_value(random_addresses[i]), \ - mmap_addr); \ - } \ - }) -#endif /* __riscv_xlen == 64 */ - static inline int memory_layout(void) { void *value1 = mmap(NULL, sizeof(int), PROT, FLAGS, 0, 0); diff --git a/tools/testing/selftests/rtc/rtctest.c b/tools/testing/selftests/rtc/rtctest.c index 63ce02d1d5cc..9647b14b47c5 100644 --- a/tools/testing/selftests/rtc/rtctest.c +++ b/tools/testing/selftests/rtc/rtctest.c @@ -410,13 +410,6 @@ TEST_F_TIMEOUT(rtc, alarm_wkalm_set_minute, 65) { ASSERT_EQ(new, secs); } -static void __attribute__((constructor)) -__constructor_order_last(void) -{ - if (!__constructor_order) - __constructor_order = _CONSTRUCTOR_ORDER_BACKWARD; -} - int main(int argc, char **argv) { switch (argc) { diff --git a/tools/testing/selftests/rust/config b/tools/testing/selftests/rust/config index b4002acd40bc..5f942b5c8c17 100644 --- a/tools/testing/selftests/rust/config +++ b/tools/testing/selftests/rust/config @@ -1,5 +1,6 @@ +# CONFIG_GCC_PLUGINS is not set CONFIG_RUST=y CONFIG_SAMPLES=y CONFIG_SAMPLES_RUST=y CONFIG_SAMPLE_RUST_MINIMAL=m -CONFIG_SAMPLE_RUST_PRINT=m
\ No newline at end of file +CONFIG_SAMPLE_RUST_PRINT=m diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index ee349187636f..4f255cec0c22 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -143,7 +143,6 @@ class PluginMgr: except Exception as ee: print('exception {} in call to pre_case for {} plugin'. format(ee, pgn_inst.__class__)) - print('test_ordinal is {}'.format(test_ordinal)) print('testid is {}'.format(caseinfo['id'])) raise diff --git a/tools/testing/selftests/timers/change_skew.c b/tools/testing/selftests/timers/change_skew.c index 4421cd562c24..18e794a46c23 100644 --- a/tools/testing/selftests/timers/change_skew.c +++ b/tools/testing/selftests/timers/change_skew.c @@ -30,9 +30,6 @@ #include <time.h> #include "../kselftest.h" -#define NSEC_PER_SEC 1000000000LL - - int change_skew_test(int ppm) { struct timex tx; diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c index 07c81c0093c0..16bd49492efa 100644 --- a/tools/testing/selftests/timers/posix_timers.c +++ b/tools/testing/selftests/timers/posix_timers.c @@ -6,10 +6,13 @@ * * Kernel loop code stolen from Steven Rostedt <srostedt@redhat.com> */ - +#define _GNU_SOURCE #include <sys/time.h> +#include <sys/types.h> #include <stdio.h> #include <signal.h> +#include <stdint.h> +#include <string.h> #include <unistd.h> #include <time.h> #include <pthread.h> @@ -18,6 +21,21 @@ #define DELAY 2 #define USECS_PER_SEC 1000000 +#define NSECS_PER_SEC 1000000000 + +static void __fatal_error(const char *test, const char *name, const char *what) +{ + char buf[64]; + + strerror_r(errno, buf, sizeof(buf)); + + if (name && strlen(name)) + ksft_exit_fail_msg("%s %s %s %s\n", test, name, what, buf); + else + ksft_exit_fail_msg("%s %s %s\n", test, what, buf); +} + +#define fatal_error(name, what) __fatal_error(__func__, name, what) static volatile int done; @@ -74,24 +92,13 @@ static int check_diff(struct timeval start, struct timeval end) return 0; } -static int check_itimer(int which) +static void check_itimer(int which, const char *name) { - const char *name; - int err; struct timeval start, end; struct itimerval val = { .it_value.tv_sec = DELAY, }; - if (which == ITIMER_VIRTUAL) - name = "ITIMER_VIRTUAL"; - else if (which == ITIMER_PROF) - name = "ITIMER_PROF"; - else if (which == ITIMER_REAL) - name = "ITIMER_REAL"; - else - return -1; - done = 0; if (which == ITIMER_VIRTUAL) @@ -101,17 +108,11 @@ static int check_itimer(int which) else if (which == ITIMER_REAL) signal(SIGALRM, sig_handler); - err = gettimeofday(&start, NULL); - if (err < 0) { - ksft_perror("Can't call gettimeofday()"); - return -1; - } + if (gettimeofday(&start, NULL) < 0) + fatal_error(name, "gettimeofday()"); - err = setitimer(which, &val, NULL); - if (err < 0) { - ksft_perror("Can't set timer"); - return -1; - } + if (setitimer(which, &val, NULL) < 0) + fatal_error(name, "setitimer()"); if (which == ITIMER_VIRTUAL) user_loop(); @@ -120,68 +121,41 @@ static int check_itimer(int which) else if (which == ITIMER_REAL) idle_loop(); - err = gettimeofday(&end, NULL); - if (err < 0) { - ksft_perror("Can't call gettimeofday()"); - return -1; - } + if (gettimeofday(&end, NULL) < 0) + fatal_error(name, "gettimeofday()"); ksft_test_result(check_diff(start, end) == 0, "%s\n", name); - - return 0; } -static int check_timer_create(int which) +static void check_timer_create(int which, const char *name) { - const char *type; - int err; - timer_t id; struct timeval start, end; struct itimerspec val = { .it_value.tv_sec = DELAY, }; - - if (which == CLOCK_THREAD_CPUTIME_ID) { - type = "thread"; - } else if (which == CLOCK_PROCESS_CPUTIME_ID) { - type = "process"; - } else { - ksft_print_msg("Unknown timer_create() type %d\n", which); - return -1; - } + timer_t id; done = 0; - err = timer_create(which, NULL, &id); - if (err < 0) { - ksft_perror("Can't create timer"); - return -1; - } - signal(SIGALRM, sig_handler); - err = gettimeofday(&start, NULL); - if (err < 0) { - ksft_perror("Can't call gettimeofday()"); - return -1; - } + if (timer_create(which, NULL, &id) < 0) + fatal_error(name, "timer_create()"); - err = timer_settime(id, 0, &val, NULL); - if (err < 0) { - ksft_perror("Can't set timer"); - return -1; - } + if (signal(SIGALRM, sig_handler) == SIG_ERR) + fatal_error(name, "signal()"); + + if (gettimeofday(&start, NULL) < 0) + fatal_error(name, "gettimeofday()"); + + if (timer_settime(id, 0, &val, NULL) < 0) + fatal_error(name, "timer_settime()"); user_loop(); - err = gettimeofday(&end, NULL); - if (err < 0) { - ksft_perror("Can't call gettimeofday()"); - return -1; - } + if (gettimeofday(&end, NULL) < 0) + fatal_error(name, "gettimeofday()"); ksft_test_result(check_diff(start, end) == 0, - "timer_create() per %s\n", type); - - return 0; + "timer_create() per %s\n", name); } static pthread_t ctd_thread; @@ -209,15 +183,14 @@ static void *ctd_thread_func(void *arg) ctd_count = 100; if (timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id)) - return "Can't create timer\n"; + fatal_error(NULL, "timer_create()"); if (timer_settime(id, 0, &val, NULL)) - return "Can't set timer\n"; - + fatal_error(NULL, "timer_settime()"); while (ctd_count > 0 && !ctd_failed) ; if (timer_delete(id)) - return "Can't delete timer\n"; + fatal_error(NULL, "timer_delete()"); return NULL; } @@ -225,19 +198,16 @@ static void *ctd_thread_func(void *arg) /* * Test that only the running thread receives the timer signal. */ -static int check_timer_distribution(void) +static void check_timer_distribution(void) { - const char *errmsg; + if (signal(SIGALRM, ctd_sighandler) == SIG_ERR) + fatal_error(NULL, "signal()"); - signal(SIGALRM, ctd_sighandler); - - errmsg = "Can't create thread\n"; if (pthread_create(&ctd_thread, NULL, ctd_thread_func, NULL)) - goto err; + fatal_error(NULL, "pthread_create()"); - errmsg = "Can't join thread\n"; - if (pthread_join(ctd_thread, (void **)&errmsg) || errmsg) - goto err; + if (pthread_join(ctd_thread, NULL)) + fatal_error(NULL, "pthread_join()"); if (!ctd_failed) ksft_test_result_pass("check signal distribution\n"); @@ -245,31 +215,399 @@ static int check_timer_distribution(void) ksft_test_result_fail("check signal distribution\n"); else ksft_test_result_skip("check signal distribution (old kernel)\n"); - return 0; -err: - ksft_print_msg("%s", errmsg); - return -1; +} + +struct tmrsig { + int signals; + int overruns; +}; + +static void siginfo_handler(int sig, siginfo_t *si, void *uc) +{ + struct tmrsig *tsig = si ? si->si_ptr : NULL; + + if (tsig) { + tsig->signals++; + tsig->overruns += si->si_overrun; + } +} + +static void *ignore_thread(void *arg) +{ + unsigned int *tid = arg; + sigset_t set; + + sigemptyset(&set); + sigaddset(&set, SIGUSR1); + if (sigprocmask(SIG_BLOCK, &set, NULL)) + fatal_error(NULL, "sigprocmask(SIG_BLOCK)"); + + *tid = gettid(); + sleep(100); + + if (sigprocmask(SIG_UNBLOCK, &set, NULL)) + fatal_error(NULL, "sigprocmask(SIG_UNBLOCK)"); + return NULL; +} + +static void check_sig_ign(int thread) +{ + struct tmrsig tsig = { }; + struct itimerspec its; + unsigned int tid = 0; + struct sigaction sa; + struct sigevent sev; + pthread_t pthread; + timer_t timerid; + sigset_t set; + + if (thread) { + if (pthread_create(&pthread, NULL, ignore_thread, &tid)) + fatal_error(NULL, "pthread_create()"); + sleep(1); + } + + sa.sa_flags = SA_SIGINFO; + sa.sa_sigaction = siginfo_handler; + sigemptyset(&sa.sa_mask); + if (sigaction(SIGUSR1, &sa, NULL)) + fatal_error(NULL, "sigaction()"); + + /* Block the signal */ + sigemptyset(&set); + sigaddset(&set, SIGUSR1); + if (sigprocmask(SIG_BLOCK, &set, NULL)) + fatal_error(NULL, "sigprocmask(SIG_BLOCK)"); + + memset(&sev, 0, sizeof(sev)); + sev.sigev_notify = SIGEV_SIGNAL; + sev.sigev_signo = SIGUSR1; + sev.sigev_value.sival_ptr = &tsig; + if (thread) { + sev.sigev_notify = SIGEV_THREAD_ID; + sev._sigev_un._tid = tid; + } + + if (timer_create(CLOCK_MONOTONIC, &sev, &timerid)) + fatal_error(NULL, "timer_create()"); + + /* Start the timer to expire in 100ms and 100ms intervals */ + its.it_value.tv_sec = 0; + its.it_value.tv_nsec = 100000000; + its.it_interval.tv_sec = 0; + its.it_interval.tv_nsec = 100000000; + timer_settime(timerid, 0, &its, NULL); + + sleep(1); + + /* Set the signal to be ignored */ + if (signal(SIGUSR1, SIG_IGN) == SIG_ERR) + fatal_error(NULL, "signal(SIG_IGN)"); + + sleep(1); + + if (thread) { + /* Stop the thread first. No signal should be delivered to it */ + if (pthread_cancel(pthread)) + fatal_error(NULL, "pthread_cancel()"); + if (pthread_join(pthread, NULL)) + fatal_error(NULL, "pthread_join()"); + } + + /* Restore the handler */ + if (sigaction(SIGUSR1, &sa, NULL)) + fatal_error(NULL, "sigaction()"); + + sleep(1); + + /* Unblock it, which should deliver the signal in the !thread case*/ + if (sigprocmask(SIG_UNBLOCK, &set, NULL)) + fatal_error(NULL, "sigprocmask(SIG_UNBLOCK)"); + + if (timer_delete(timerid)) + fatal_error(NULL, "timer_delete()"); + + if (!thread) { + ksft_test_result(tsig.signals == 1 && tsig.overruns == 29, + "check_sig_ign SIGEV_SIGNAL\n"); + } else { + ksft_test_result(tsig.signals == 0 && tsig.overruns == 0, + "check_sig_ign SIGEV_THREAD_ID\n"); + } +} + +static void check_rearm(void) +{ + struct tmrsig tsig = { }; + struct itimerspec its; + struct sigaction sa; + struct sigevent sev; + timer_t timerid; + sigset_t set; + + sa.sa_flags = SA_SIGINFO; + sa.sa_sigaction = siginfo_handler; + sigemptyset(&sa.sa_mask); + if (sigaction(SIGUSR1, &sa, NULL)) + fatal_error(NULL, "sigaction()"); + + /* Block the signal */ + sigemptyset(&set); + sigaddset(&set, SIGUSR1); + if (sigprocmask(SIG_BLOCK, &set, NULL)) + fatal_error(NULL, "sigprocmask(SIG_BLOCK)"); + + memset(&sev, 0, sizeof(sev)); + sev.sigev_notify = SIGEV_SIGNAL; + sev.sigev_signo = SIGUSR1; + sev.sigev_value.sival_ptr = &tsig; + if (timer_create(CLOCK_MONOTONIC, &sev, &timerid)) + fatal_error(NULL, "timer_create()"); + + /* Start the timer to expire in 100ms and 100ms intervals */ + its.it_value.tv_sec = 0; + its.it_value.tv_nsec = 100000000; + its.it_interval.tv_sec = 0; + its.it_interval.tv_nsec = 100000000; + if (timer_settime(timerid, 0, &its, NULL)) + fatal_error(NULL, "timer_settime()"); + + sleep(1); + + /* Reprogram the timer to single shot */ + its.it_value.tv_sec = 10; + its.it_value.tv_nsec = 0; + its.it_interval.tv_sec = 0; + its.it_interval.tv_nsec = 0; + if (timer_settime(timerid, 0, &its, NULL)) + fatal_error(NULL, "timer_settime()"); + + /* Unblock it, which should not deliver a signal */ + if (sigprocmask(SIG_UNBLOCK, &set, NULL)) + fatal_error(NULL, "sigprocmask(SIG_UNBLOCK)"); + + if (timer_delete(timerid)) + fatal_error(NULL, "timer_delete()"); + + ksft_test_result(!tsig.signals, "check_rearm\n"); +} + +static void check_delete(void) +{ + struct tmrsig tsig = { }; + struct itimerspec its; + struct sigaction sa; + struct sigevent sev; + timer_t timerid; + sigset_t set; + + sa.sa_flags = SA_SIGINFO; + sa.sa_sigaction = siginfo_handler; + sigemptyset(&sa.sa_mask); + if (sigaction(SIGUSR1, &sa, NULL)) + fatal_error(NULL, "sigaction()"); + + /* Block the signal */ + sigemptyset(&set); + sigaddset(&set, SIGUSR1); + if (sigprocmask(SIG_BLOCK, &set, NULL)) + fatal_error(NULL, "sigprocmask(SIG_BLOCK)"); + + memset(&sev, 0, sizeof(sev)); + sev.sigev_notify = SIGEV_SIGNAL; + sev.sigev_signo = SIGUSR1; + sev.sigev_value.sival_ptr = &tsig; + if (timer_create(CLOCK_MONOTONIC, &sev, &timerid)) + fatal_error(NULL, "timer_create()"); + + /* Start the timer to expire in 100ms and 100ms intervals */ + its.it_value.tv_sec = 0; + its.it_value.tv_nsec = 100000000; + its.it_interval.tv_sec = 0; + its.it_interval.tv_nsec = 100000000; + if (timer_settime(timerid, 0, &its, NULL)) + fatal_error(NULL, "timer_settime()"); + + sleep(1); + + if (timer_delete(timerid)) + fatal_error(NULL, "timer_delete()"); + + /* Unblock it, which should not deliver a signal */ + if (sigprocmask(SIG_UNBLOCK, &set, NULL)) + fatal_error(NULL, "sigprocmask(SIG_UNBLOCK)"); + + ksft_test_result(!tsig.signals, "check_delete\n"); +} + +static inline int64_t calcdiff_ns(struct timespec t1, struct timespec t2) +{ + int64_t diff; + + diff = NSECS_PER_SEC * (int64_t)((int) t1.tv_sec - (int) t2.tv_sec); + diff += ((int) t1.tv_nsec - (int) t2.tv_nsec); + return diff; +} + +static void check_sigev_none(int which, const char *name) +{ + struct timespec start, now; + struct itimerspec its; + struct sigevent sev; + timer_t timerid; + + memset(&sev, 0, sizeof(sev)); + sev.sigev_notify = SIGEV_NONE; + + if (timer_create(which, &sev, &timerid)) + fatal_error(name, "timer_create()"); + + /* Start the timer to expire in 100ms and 100ms intervals */ + its.it_value.tv_sec = 0; + its.it_value.tv_nsec = 100000000; + its.it_interval.tv_sec = 0; + its.it_interval.tv_nsec = 100000000; + timer_settime(timerid, 0, &its, NULL); + + if (clock_gettime(which, &start)) + fatal_error(name, "clock_gettime()"); + + do { + if (clock_gettime(which, &now)) + fatal_error(name, "clock_gettime()"); + } while (calcdiff_ns(now, start) < NSECS_PER_SEC); + + if (timer_gettime(timerid, &its)) + fatal_error(name, "timer_gettime()"); + + if (timer_delete(timerid)) + fatal_error(name, "timer_delete()"); + + ksft_test_result(its.it_value.tv_sec || its.it_value.tv_nsec, + "check_sigev_none %s\n", name); +} + +static void check_gettime(int which, const char *name) +{ + struct itimerspec its, prev; + struct timespec start, now; + struct sigevent sev; + timer_t timerid; + int wraps = 0; + sigset_t set; + + /* Block the signal */ + sigemptyset(&set); + sigaddset(&set, SIGUSR1); + if (sigprocmask(SIG_BLOCK, &set, NULL)) + fatal_error(name, "sigprocmask(SIG_BLOCK)"); + + memset(&sev, 0, sizeof(sev)); + sev.sigev_notify = SIGEV_SIGNAL; + sev.sigev_signo = SIGUSR1; + + if (timer_create(which, &sev, &timerid)) + fatal_error(name, "timer_create()"); + + /* Start the timer to expire in 100ms and 100ms intervals */ + its.it_value.tv_sec = 0; + its.it_value.tv_nsec = 100000000; + its.it_interval.tv_sec = 0; + its.it_interval.tv_nsec = 100000000; + if (timer_settime(timerid, 0, &its, NULL)) + fatal_error(name, "timer_settime()"); + + if (timer_gettime(timerid, &prev)) + fatal_error(name, "timer_gettime()"); + + if (clock_gettime(which, &start)) + fatal_error(name, "clock_gettime()"); + + do { + if (clock_gettime(which, &now)) + fatal_error(name, "clock_gettime()"); + if (timer_gettime(timerid, &its)) + fatal_error(name, "timer_gettime()"); + if (its.it_value.tv_nsec > prev.it_value.tv_nsec) + wraps++; + prev = its; + + } while (calcdiff_ns(now, start) < NSECS_PER_SEC); + + if (timer_delete(timerid)) + fatal_error(name, "timer_delete()"); + + ksft_test_result(wraps > 1, "check_gettime %s\n", name); +} + +static void check_overrun(int which, const char *name) +{ + struct timespec start, now; + struct tmrsig tsig = { }; + struct itimerspec its; + struct sigaction sa; + struct sigevent sev; + timer_t timerid; + sigset_t set; + + sa.sa_flags = SA_SIGINFO; + sa.sa_sigaction = siginfo_handler; + sigemptyset(&sa.sa_mask); + if (sigaction(SIGUSR1, &sa, NULL)) + fatal_error(name, "sigaction()"); + + /* Block the signal */ + sigemptyset(&set); + sigaddset(&set, SIGUSR1); + if (sigprocmask(SIG_BLOCK, &set, NULL)) + fatal_error(name, "sigprocmask(SIG_BLOCK)"); + + memset(&sev, 0, sizeof(sev)); + sev.sigev_notify = SIGEV_SIGNAL; + sev.sigev_signo = SIGUSR1; + sev.sigev_value.sival_ptr = &tsig; + if (timer_create(which, &sev, &timerid)) + fatal_error(name, "timer_create()"); + + /* Start the timer to expire in 100ms and 100ms intervals */ + its.it_value.tv_sec = 0; + its.it_value.tv_nsec = 100000000; + its.it_interval.tv_sec = 0; + its.it_interval.tv_nsec = 100000000; + if (timer_settime(timerid, 0, &its, NULL)) + fatal_error(name, "timer_settime()"); + + if (clock_gettime(which, &start)) + fatal_error(name, "clock_gettime()"); + + do { + if (clock_gettime(which, &now)) + fatal_error(name, "clock_gettime()"); + } while (calcdiff_ns(now, start) < NSECS_PER_SEC); + + /* Unblock it, which should deliver a signal */ + if (sigprocmask(SIG_UNBLOCK, &set, NULL)) + fatal_error(name, "sigprocmask(SIG_UNBLOCK)"); + + if (timer_delete(timerid)) + fatal_error(name, "timer_delete()"); + + ksft_test_result(tsig.signals == 1 && tsig.overruns == 9, + "check_overrun %s\n", name); } int main(int argc, char **argv) { ksft_print_header(); - ksft_set_plan(6); + ksft_set_plan(18); ksft_print_msg("Testing posix timers. False negative may happen on CPU execution \n"); ksft_print_msg("based timers if other threads run on the CPU...\n"); - if (check_itimer(ITIMER_VIRTUAL) < 0) - ksft_exit_fail(); - - if (check_itimer(ITIMER_PROF) < 0) - ksft_exit_fail(); - - if (check_itimer(ITIMER_REAL) < 0) - ksft_exit_fail(); - - if (check_timer_create(CLOCK_THREAD_CPUTIME_ID) < 0) - ksft_exit_fail(); + check_itimer(ITIMER_VIRTUAL, "ITIMER_VIRTUAL"); + check_itimer(ITIMER_PROF, "ITIMER_PROF"); + check_itimer(ITIMER_REAL, "ITIMER_REAL"); + check_timer_create(CLOCK_THREAD_CPUTIME_ID, "CLOCK_THREAD_CPUTIME_ID"); /* * It's unfortunately hard to reliably test a timer expiration @@ -280,11 +618,21 @@ int main(int argc, char **argv) * to ensure true parallelism. So test only one thread until we * find a better solution. */ - if (check_timer_create(CLOCK_PROCESS_CPUTIME_ID) < 0) - ksft_exit_fail(); - - if (check_timer_distribution() < 0) - ksft_exit_fail(); + check_timer_create(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID"); + check_timer_distribution(); + + check_sig_ign(0); + check_sig_ign(1); + check_rearm(); + check_delete(); + check_sigev_none(CLOCK_MONOTONIC, "CLOCK_MONOTONIC"); + check_sigev_none(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID"); + check_gettime(CLOCK_MONOTONIC, "CLOCK_MONOTONIC"); + check_gettime(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID"); + check_gettime(CLOCK_THREAD_CPUTIME_ID, "CLOCK_THREAD_CPUTIME_ID"); + check_overrun(CLOCK_MONOTONIC, "CLOCK_MONOTONIC"); + check_overrun(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID"); + check_overrun(CLOCK_THREAD_CPUTIME_ID, "CLOCK_THREAD_CPUTIME_ID"); ksft_finished(); } diff --git a/tools/testing/selftests/timers/skew_consistency.c b/tools/testing/selftests/timers/skew_consistency.c index c8e6bffe4e0a..83450145fe65 100644 --- a/tools/testing/selftests/timers/skew_consistency.c +++ b/tools/testing/selftests/timers/skew_consistency.c @@ -36,8 +36,6 @@ #include <sys/wait.h> #include "../kselftest.h" -#define NSEC_PER_SEC 1000000000LL - int main(int argc, char **argv) { struct timex tx; diff --git a/tools/testing/selftests/timers/threadtest.c b/tools/testing/selftests/timers/threadtest.c index 76b38e41d9c7..d5564bbf0e50 100644 --- a/tools/testing/selftests/timers/threadtest.c +++ b/tools/testing/selftests/timers/threadtest.c @@ -38,10 +38,10 @@ struct timespec global_list[LISTSIZE]; int listcount = 0; -void checklist(struct timespec *list, int size) +void checklist(const struct timespec *list, int size) { int i, j; - struct timespec *a, *b; + const struct timespec *a, *b; /* scan the list */ for (i = 0; i < size-1; i++) { diff --git a/tools/testing/selftests/tpm2/test_async.sh b/tools/testing/selftests/tpm2/test_async.sh index 43bf5bd772fd..cf5a9c826097 100755 --- a/tools/testing/selftests/tpm2/test_async.sh +++ b/tools/testing/selftests/tpm2/test_async.sh @@ -7,4 +7,4 @@ ksft_skip=4 [ -e /dev/tpm0 ] || exit $ksft_skip [ -e /dev/tpmrm0 ] || exit $ksft_skip -python3 -m unittest -v tpm2_tests.AsyncTest +python3 -m unittest -v tpm2_tests.AsyncTest 2>&1 diff --git a/tools/testing/selftests/tpm2/test_smoke.sh b/tools/testing/selftests/tpm2/test_smoke.sh index 58af963e5b55..168f4b166234 100755 --- a/tools/testing/selftests/tpm2/test_smoke.sh +++ b/tools/testing/selftests/tpm2/test_smoke.sh @@ -5,5 +5,7 @@ ksft_skip=4 [ -e /dev/tpm0 ] || exit $ksft_skip +read tpm_version < /sys/class/tpm/tpm0/tpm_version_major +[ "$tpm_version" == 2 ] || exit $ksft_skip -python3 -m unittest -v tpm2_tests.SmokeTest +python3 -m unittest -v tpm2_tests.SmokeTest 2>&1 diff --git a/tools/testing/selftests/tpm2/test_space.sh b/tools/testing/selftests/tpm2/test_space.sh index 04c47b13fe8a..93894cbc89a8 100755 --- a/tools/testing/selftests/tpm2/test_space.sh +++ b/tools/testing/selftests/tpm2/test_space.sh @@ -6,4 +6,4 @@ ksft_skip=4 [ -e /dev/tpmrm0 ] || exit $ksft_skip -python3 -m unittest -v tpm2_tests.SpaceTest +python3 -m unittest -v tpm2_tests.SpaceTest 2>&1 diff --git a/tools/testing/selftests/user/Makefile b/tools/testing/selftests/user/Makefile deleted file mode 100644 index 640a40f9b72b..000000000000 --- a/tools/testing/selftests/user/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -# Makefile for user memory selftests - -# No binaries, but make sure arg-less "make" doesn't trigger "run_tests" -all: - -TEST_PROGS := test_user_copy.sh - -include ../lib.mk diff --git a/tools/testing/selftests/user/config b/tools/testing/selftests/user/config deleted file mode 100644 index 784ed8416324..000000000000 --- a/tools/testing/selftests/user/config +++ /dev/null @@ -1 +0,0 @@ -CONFIG_TEST_USER_COPY=m diff --git a/tools/testing/selftests/user/test_user_copy.sh b/tools/testing/selftests/user/test_user_copy.sh deleted file mode 100755 index f9b31a57439b..000000000000 --- a/tools/testing/selftests/user/test_user_copy.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 -# Runs copy_to/from_user infrastructure using test_user_copy kernel module - -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 - -if ! /sbin/modprobe -q -n test_user_copy; then - echo "user: module test_user_copy is not found [SKIP]" - exit $ksft_skip -fi -if /sbin/modprobe -q test_user_copy; then - /sbin/modprobe -q -r test_user_copy - echo "user_copy: ok" -else - echo "user_copy: [FAIL]" - exit 1 -fi diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile index 3de8e7e052ae..af9cedbf5357 100644 --- a/tools/testing/selftests/vDSO/Makefile +++ b/tools/testing/selftests/vDSO/Makefile @@ -1,7 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -uname_M := $(shell uname -m 2>/dev/null || echo not) -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) -SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null) +include ../../../scripts/Makefile.arch TEST_GEN_PROGS := vdso_test_gettimeofday TEST_GEN_PROGS += vdso_test_getcpu @@ -11,14 +9,12 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64)) TEST_GEN_PROGS += vdso_standalone_test_x86 endif TEST_GEN_PROGS += vdso_test_correctness -ifeq ($(uname_M),x86_64) +ifeq ($(ARCH)$(CONFIG_X86_32),$(filter $(ARCH)$(CONFIG_X86_32),x86 x86_64 loongarch arm64 powerpc s390)) TEST_GEN_PROGS += vdso_test_getrandom -ifneq ($(SODIUM),) TEST_GEN_PROGS += vdso_test_chacha endif -endif -CFLAGS := -std=gnu99 +CFLAGS := -std=gnu99 -O2 ifeq ($(CONFIG_X86_32),y) LDLIBS += -lgcc_s @@ -38,11 +34,12 @@ $(OUTPUT)/vdso_test_correctness: LDFLAGS += -ldl $(OUTPUT)/vdso_test_getrandom: parse_vdso.c $(OUTPUT)/vdso_test_getrandom: CFLAGS += -isystem $(top_srcdir)/tools/include \ + $(KHDR_INCLUDES) \ -isystem $(top_srcdir)/include/uapi -$(OUTPUT)/vdso_test_chacha: $(top_srcdir)/arch/$(ARCH)/entry/vdso/vgetrandom-chacha.S +$(OUTPUT)/vdso_test_chacha: $(top_srcdir)/tools/arch/$(SRCARCH)/vdso/vgetrandom-chacha.S $(OUTPUT)/vdso_test_chacha: CFLAGS += -idirafter $(top_srcdir)/tools/include \ - -isystem $(top_srcdir)/arch/$(ARCH)/include \ - -isystem $(top_srcdir)/include \ - -D__ASSEMBLY__ -DBULID_VDSO -DCONFIG_FUNCTION_ALIGNMENT=0 \ - -Wa,--noexecstack $(SODIUM) + -idirafter $(top_srcdir)/tools/include/generated \ + -idirafter $(top_srcdir)/arch/$(SRCARCH)/include \ + -idirafter $(top_srcdir)/include \ + -D__ASSEMBLY__ -Wa,--noexecstack diff --git a/tools/testing/selftests/vDSO/parse_vdso.c b/tools/testing/selftests/vDSO/parse_vdso.c index 4ae417372e9e..7dd5668ea8a6 100644 --- a/tools/testing/selftests/vDSO/parse_vdso.c +++ b/tools/testing/selftests/vDSO/parse_vdso.c @@ -36,6 +36,12 @@ #define ELF_BITS_XFORM(bits, x) ELF_BITS_XFORM2(bits, x) #define ELF(x) ELF_BITS_XFORM(ELF_BITS, x) +#ifdef __s390x__ +#define ELF_HASH_ENTRY ELF(Xword) +#else +#define ELF_HASH_ENTRY ELF(Word) +#endif + static struct vdso_info { bool valid; @@ -47,8 +53,8 @@ static struct vdso_info /* Symbol table */ ELF(Sym) *symtab; const char *symstrings; - ELF(Word) *bucket, *chain; - ELF(Word) nbucket, nchain; + ELF_HASH_ENTRY *bucket, *chain; + ELF_HASH_ENTRY nbucket, nchain; /* Version table */ ELF(Versym) *versym; @@ -115,7 +121,7 @@ void vdso_init_from_sysinfo_ehdr(uintptr_t base) /* * Fish out the useful bits of the dynamic table. */ - ELF(Word) *hash = 0; + ELF_HASH_ENTRY *hash = 0; vdso_info.symstrings = 0; vdso_info.symtab = 0; vdso_info.versym = 0; @@ -133,7 +139,7 @@ void vdso_init_from_sysinfo_ehdr(uintptr_t base) + vdso_info.load_offset); break; case DT_HASH: - hash = (ELF(Word) *) + hash = (ELF_HASH_ENTRY *) ((uintptr_t)dyn[i].d_un.d_ptr + vdso_info.load_offset); break; @@ -216,7 +222,8 @@ void *vdso_sym(const char *version, const char *name) ELF(Sym) *sym = &vdso_info.symtab[chain]; /* Check for a defined global or weak function w/ right name. */ - if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC) + if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC && + ELF64_ST_TYPE(sym->st_info) != STT_NOTYPE) continue; if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL && ELF64_ST_BIND(sym->st_info) != STB_WEAK) diff --git a/tools/testing/selftests/vDSO/vdso_call.h b/tools/testing/selftests/vDSO/vdso_call.h new file mode 100644 index 000000000000..bb237d771051 --- /dev/null +++ b/tools/testing/selftests/vDSO/vdso_call.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Macro to call vDSO functions + * + * Copyright (C) 2024 Christophe Leroy <christophe.leroy@csgroup.eu>, CS GROUP France + */ +#ifndef __VDSO_CALL_H__ +#define __VDSO_CALL_H__ + +#ifdef __powerpc__ + +#define LOADARGS_1(fn, __arg1) do { \ + _r0 = fn; \ + _r3 = (long)__arg1; \ +} while (0) + +#define LOADARGS_2(fn, __arg1, __arg2) do { \ + _r0 = fn; \ + _r3 = (long)__arg1; \ + _r4 = (long)__arg2; \ +} while (0) + +#define LOADARGS_3(fn, __arg1, __arg2, __arg3) do { \ + _r0 = fn; \ + _r3 = (long)__arg1; \ + _r4 = (long)__arg2; \ + _r5 = (long)__arg3; \ +} while (0) + +#define LOADARGS_5(fn, __arg1, __arg2, __arg3, __arg4, __arg5) do { \ + _r0 = fn; \ + _r3 = (long)__arg1; \ + _r4 = (long)__arg2; \ + _r5 = (long)__arg3; \ + _r6 = (long)__arg4; \ + _r7 = (long)__arg5; \ +} while (0) + +#define VDSO_CALL(fn, nr, args...) ({ \ + register void *_r0 asm ("r0"); \ + register long _r3 asm ("r3"); \ + register long _r4 asm ("r4"); \ + register long _r5 asm ("r5"); \ + register long _r6 asm ("r6"); \ + register long _r7 asm ("r7"); \ + register long _r8 asm ("r8"); \ + register long _rval asm ("r3"); \ + \ + LOADARGS_##nr(fn, args); \ + \ + asm volatile( \ + " mtctr %0\n" \ + " bctrl\n" \ + " bns+ 1f\n" \ + " neg 3, 3\n" \ + "1:" \ + : "+r" (_r0), "=r" (_r3), "+r" (_r4), "+r" (_r5), \ + "+r" (_r6), "+r" (_r7), "+r" (_r8) \ + : "r" (_rval) \ + : "r9", "r10", "r11", "r12", "cr0", "cr1", "cr5", \ + "cr6", "cr7", "xer", "lr", "ctr", "memory" \ + ); \ + _rval; \ +}) + +#else +#define VDSO_CALL(fn, nr, args...) fn(args) +#endif + +#endif diff --git a/tools/testing/selftests/vDSO/vdso_config.h b/tools/testing/selftests/vDSO/vdso_config.h index 7b543e7f04d7..722260f97561 100644 --- a/tools/testing/selftests/vDSO/vdso_config.h +++ b/tools/testing/selftests/vDSO/vdso_config.h @@ -18,18 +18,18 @@ #elif defined(__aarch64__) #define VDSO_VERSION 3 #define VDSO_NAMES 0 -#elif defined(__powerpc__) +#elif defined(__powerpc64__) #define VDSO_VERSION 1 #define VDSO_NAMES 0 -#define VDSO_32BIT 1 -#elif defined(__powerpc64__) +#elif defined(__powerpc__) #define VDSO_VERSION 1 #define VDSO_NAMES 0 -#elif defined (__s390__) +#define VDSO_32BIT 1 +#elif defined (__s390__) && !defined(__s390x__) #define VDSO_VERSION 2 #define VDSO_NAMES 0 #define VDSO_32BIT 1 -#elif defined (__s390X__) +#elif defined (__s390x__) #define VDSO_VERSION 2 #define VDSO_NAMES 0 #elif defined(__mips__) @@ -68,16 +68,15 @@ static const char *versions[7] = { "LINUX_5.10" }; -static const char *names[2][6] = { +static const char *names[2][7] = { { "__kernel_gettimeofday", "__kernel_clock_gettime", "__kernel_time", "__kernel_clock_getres", "__kernel_getcpu", -#if defined(VDSO_32BIT) "__kernel_clock_gettime64", -#endif + "__kernel_getrandom", }, { "__vdso_gettimeofday", @@ -85,9 +84,8 @@ static const char *names[2][6] = { "__vdso_time", "__vdso_clock_getres", "__vdso_getcpu", -#if defined(VDSO_32BIT) "__vdso_clock_gettime64", -#endif + "__vdso_getrandom", }, }; diff --git a/tools/testing/selftests/vDSO/vdso_test_abi.c b/tools/testing/selftests/vDSO/vdso_test_abi.c index 96d32fd65b42..a54424e2336f 100644 --- a/tools/testing/selftests/vDSO/vdso_test_abi.c +++ b/tools/testing/selftests/vDSO/vdso_test_abi.c @@ -20,10 +20,8 @@ #include "../kselftest.h" #include "vdso_config.h" - -extern void *vdso_sym(const char *version, const char *name); -extern void vdso_init_from_sysinfo_ehdr(uintptr_t base); -extern void vdso_init_from_auxv(void *auxv); +#include "vdso_call.h" +#include "parse_vdso.h" static const char *version; static const char **name; @@ -61,7 +59,7 @@ static void vdso_test_gettimeofday(void) } struct timeval tv; - long ret = vdso_gettimeofday(&tv, 0); + long ret = VDSO_CALL(vdso_gettimeofday, 2, &tv, 0); if (ret == 0) { ksft_print_msg("The time is %lld.%06lld\n", @@ -86,7 +84,7 @@ static void vdso_test_clock_gettime(clockid_t clk_id) } struct timespec ts; - long ret = vdso_clock_gettime(clk_id, &ts); + long ret = VDSO_CALL(vdso_clock_gettime, 2, clk_id, &ts); if (ret == 0) { ksft_print_msg("The time is %lld.%06lld\n", @@ -111,7 +109,7 @@ static void vdso_test_time(void) return; } - long ret = vdso_time(NULL); + long ret = VDSO_CALL(vdso_time, 1, NULL); if (ret > 0) { ksft_print_msg("The time in hours since January 1, 1970 is %lld\n", @@ -138,7 +136,7 @@ static void vdso_test_clock_getres(clockid_t clk_id) } struct timespec ts, sys_ts; - long ret = vdso_clock_getres(clk_id, &ts); + long ret = VDSO_CALL(vdso_clock_getres, 2, clk_id, &ts); if (ret == 0) { ksft_print_msg("The vdso resolution is %lld %lld\n", diff --git a/tools/testing/selftests/vDSO/vdso_test_chacha.c b/tools/testing/selftests/vDSO/vdso_test_chacha.c index e38f44e5f803..b1ea532c5996 100644 --- a/tools/testing/selftests/vDSO/vdso_test_chacha.c +++ b/tools/testing/selftests/vDSO/vdso_test_chacha.c @@ -3,23 +3,90 @@ * Copyright (C) 2022-2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */ -#include <sodium/crypto_stream_chacha20.h> +#include <tools/le_byteshift.h> #include <sys/random.h> +#include <sys/auxv.h> #include <string.h> #include <stdint.h> +#include <stdbool.h> #include "../kselftest.h" -extern void __arch_chacha20_blocks_nostack(uint8_t *dst_bytes, const uint8_t *key, uint32_t *counter, size_t nblocks); +#if defined(__aarch64__) +static bool cpu_has_capabilities(void) +{ + return getauxval(AT_HWCAP) & HWCAP_ASIMD; +} +#elif defined(__s390x__) +static bool cpu_has_capabilities(void) +{ + return getauxval(AT_HWCAP) & HWCAP_S390_VXRS; +} +#else +static bool cpu_has_capabilities(void) +{ + return true; +} +#endif + +static uint32_t rol32(uint32_t word, unsigned int shift) +{ + return (word << (shift & 31)) | (word >> ((-shift) & 31)); +} + +static void reference_chacha20_blocks(uint8_t *dst_bytes, const uint32_t *key, uint32_t *counter, size_t nblocks) +{ + uint32_t s[16] = { + 0x61707865U, 0x3320646eU, 0x79622d32U, 0x6b206574U, + key[0], key[1], key[2], key[3], key[4], key[5], key[6], key[7], + counter[0], counter[1], 0, 0 + }; + + while (nblocks--) { + uint32_t x[16]; + memcpy(x, s, sizeof(x)); + for (unsigned int r = 0; r < 20; r += 2) { + #define QR(a, b, c, d) ( \ + x[a] += x[b], \ + x[d] = rol32(x[d] ^ x[a], 16), \ + x[c] += x[d], \ + x[b] = rol32(x[b] ^ x[c], 12), \ + x[a] += x[b], \ + x[d] = rol32(x[d] ^ x[a], 8), \ + x[c] += x[d], \ + x[b] = rol32(x[b] ^ x[c], 7)) + + QR(0, 4, 8, 12); + QR(1, 5, 9, 13); + QR(2, 6, 10, 14); + QR(3, 7, 11, 15); + QR(0, 5, 10, 15); + QR(1, 6, 11, 12); + QR(2, 7, 8, 13); + QR(3, 4, 9, 14); + } + for (unsigned int i = 0; i < 16; ++i, dst_bytes += sizeof(uint32_t)) + put_unaligned_le32(x[i] + s[i], dst_bytes); + if (!++s[12]) + ++s[13]; + } + counter[0] = s[12]; + counter[1] = s[13]; +} + +typedef uint8_t u8; +typedef uint32_t u32; +typedef uint64_t u64; +#include <vdso/getrandom.h> int main(int argc, char *argv[]) { enum { TRIALS = 1000, BLOCKS = 128, BLOCK_SIZE = 64 }; - static const uint8_t nonce[8] = { 0 }; - uint32_t counter[2]; - uint8_t key[32]; + uint32_t key[8], counter1[2], counter2[2]; uint8_t output1[BLOCK_SIZE * BLOCKS], output2[BLOCK_SIZE * BLOCKS]; ksft_print_header(); + if (!cpu_has_capabilities()) + ksft_exit_skip("Required CPU capabilities missing\n"); ksft_set_plan(1); for (unsigned int trial = 0; trial < TRIALS; ++trial) { @@ -27,17 +94,33 @@ int main(int argc, char *argv[]) printf("getrandom() failed!\n"); return KSFT_SKIP; } - crypto_stream_chacha20(output1, sizeof(output1), nonce, key); + memset(counter1, 0, sizeof(counter1)); + reference_chacha20_blocks(output1, key, counter1, BLOCKS); for (unsigned int split = 0; split < BLOCKS; ++split) { memset(output2, 'X', sizeof(output2)); - memset(counter, 0, sizeof(counter)); + memset(counter2, 0, sizeof(counter2)); if (split) - __arch_chacha20_blocks_nostack(output2, key, counter, split); - __arch_chacha20_blocks_nostack(output2 + split * BLOCK_SIZE, key, counter, BLOCKS - split); - if (memcmp(output1, output2, sizeof(output1))) + __arch_chacha20_blocks_nostack(output2, key, counter2, split); + __arch_chacha20_blocks_nostack(output2 + split * BLOCK_SIZE, key, counter2, BLOCKS - split); + if (memcmp(output1, output2, sizeof(output1)) || memcmp(counter1, counter2, sizeof(counter1))) return KSFT_FAIL; } } + memset(counter1, 0, sizeof(counter1)); + counter1[0] = (uint32_t)-BLOCKS + 2; + memset(counter2, 0, sizeof(counter2)); + counter2[0] = (uint32_t)-BLOCKS + 2; + + reference_chacha20_blocks(output1, key, counter1, BLOCKS); + __arch_chacha20_blocks_nostack(output2, key, counter2, BLOCKS); + if (memcmp(output1, output2, sizeof(output1)) || memcmp(counter1, counter2, sizeof(counter1))) + return KSFT_FAIL; + + reference_chacha20_blocks(output1, key, counter1, BLOCKS); + __arch_chacha20_blocks_nostack(output2, key, counter2, BLOCKS); + if (memcmp(output1, output2, sizeof(output1)) || memcmp(counter1, counter2, sizeof(counter1))) + return KSFT_FAIL; + ksft_test_result_pass("chacha: PASS\n"); return KSFT_PASS; } diff --git a/tools/testing/selftests/vDSO/vdso_test_correctness.c b/tools/testing/selftests/vDSO/vdso_test_correctness.c index e691a3cf1491..5fb97ad67eea 100644 --- a/tools/testing/selftests/vDSO/vdso_test_correctness.c +++ b/tools/testing/selftests/vDSO/vdso_test_correctness.c @@ -20,6 +20,7 @@ #include <limits.h> #include "vdso_config.h" +#include "vdso_call.h" #include "../kselftest.h" static const char **name; @@ -114,6 +115,12 @@ static void fill_function_pointers() if (!vdso) vdso = dlopen("linux-gate.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); + if (!vdso) + vdso = dlopen("linux-vdso32.so.1", + RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); + if (!vdso) + vdso = dlopen("linux-vdso64.so.1", + RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); if (!vdso) { printf("[WARN]\tfailed to find vDSO\n"); return; @@ -180,7 +187,7 @@ static void test_getcpu(void) ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0); if (vdso_getcpu) - ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0); + ret_vdso = VDSO_CALL(vdso_getcpu, 3, &cpu_vdso, &node_vdso, 0); if (vgetcpu) ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0); @@ -263,7 +270,7 @@ static void test_one_clock_gettime(int clock, const char *name) if (sys_clock_gettime(clock, &start) < 0) { if (errno == EINVAL) { - vdso_ret = vdso_clock_gettime(clock, &vdso); + vdso_ret = VDSO_CALL(vdso_clock_gettime, 2, clock, &vdso); if (vdso_ret == -EINVAL) { printf("[OK]\tNo such clock.\n"); } else { @@ -276,7 +283,7 @@ static void test_one_clock_gettime(int clock, const char *name) return; } - vdso_ret = vdso_clock_gettime(clock, &vdso); + vdso_ret = VDSO_CALL(vdso_clock_gettime, 2, clock, &vdso); end_ret = sys_clock_gettime(clock, &end); if (vdso_ret != 0 || end_ret != 0) { @@ -325,7 +332,7 @@ static void test_one_clock_gettime64(int clock, const char *name) if (sys_clock_gettime64(clock, &start) < 0) { if (errno == EINVAL) { - vdso_ret = vdso_clock_gettime64(clock, &vdso); + vdso_ret = VDSO_CALL(vdso_clock_gettime64, 2, clock, &vdso); if (vdso_ret == -EINVAL) { printf("[OK]\tNo such clock.\n"); } else { @@ -338,7 +345,7 @@ static void test_one_clock_gettime64(int clock, const char *name) return; } - vdso_ret = vdso_clock_gettime64(clock, &vdso); + vdso_ret = VDSO_CALL(vdso_clock_gettime64, 2, clock, &vdso); end_ret = sys_clock_gettime64(clock, &end); if (vdso_ret != 0 || end_ret != 0) { @@ -395,7 +402,7 @@ static void test_gettimeofday(void) return; } - vdso_ret = vdso_gettimeofday(&vdso, &vdso_tz); + vdso_ret = VDSO_CALL(vdso_gettimeofday, 2, &vdso, &vdso_tz); end_ret = sys_gettimeofday(&end, NULL); if (vdso_ret != 0 || end_ret != 0) { @@ -425,7 +432,7 @@ static void test_gettimeofday(void) } /* And make sure that passing NULL for tz doesn't crash. */ - vdso_gettimeofday(&vdso, NULL); + VDSO_CALL(vdso_gettimeofday, 2, &vdso, NULL); } int main(int argc, char **argv) diff --git a/tools/testing/selftests/vDSO/vdso_test_getcpu.c b/tools/testing/selftests/vDSO/vdso_test_getcpu.c index b758f68c6c9c..cdeaed45fb26 100644 --- a/tools/testing/selftests/vDSO/vdso_test_getcpu.c +++ b/tools/testing/selftests/vDSO/vdso_test_getcpu.c @@ -14,6 +14,7 @@ #include "../kselftest.h" #include "parse_vdso.h" #include "vdso_config.h" +#include "vdso_call.h" struct getcpu_cache; typedef long (*getcpu_t)(unsigned int *, unsigned int *, @@ -42,7 +43,7 @@ int main(int argc, char **argv) return KSFT_SKIP; } - ret = get_cpu(&cpu, &node, 0); + ret = VDSO_CALL(get_cpu, 3, &cpu, &node, 0); if (ret == 0) { printf("Running on CPU %u node %u\n", cpu, node); } else { diff --git a/tools/testing/selftests/vDSO/vdso_test_getrandom.c b/tools/testing/selftests/vDSO/vdso_test_getrandom.c index 05122425a873..72a1d9b43a84 100644 --- a/tools/testing/selftests/vDSO/vdso_test_getrandom.c +++ b/tools/testing/selftests/vDSO/vdso_test_getrandom.c @@ -16,11 +16,17 @@ #include <sys/mman.h> #include <sys/random.h> #include <sys/syscall.h> +#include <sys/ptrace.h> +#include <sys/wait.h> #include <sys/types.h> #include <linux/random.h> +#include <linux/compiler.h> +#include <linux/ptrace.h> #include "../kselftest.h" #include "parse_vdso.h" +#include "vdso_config.h" +#include "vdso_call.h" #ifndef timespecsub #define timespecsub(tsp, usp, vsp) \ @@ -38,50 +44,43 @@ static struct { pthread_mutex_t lock; void **states; size_t len, cap; -} grnd_allocator = { - .lock = PTHREAD_MUTEX_INITIALIZER -}; - -static struct { ssize_t(*fn)(void *, size_t, unsigned long, void *, size_t); - pthread_key_t key; - pthread_once_t initialized; struct vgetrandom_opaque_params params; -} grnd_ctx = { - .initialized = PTHREAD_ONCE_INIT +} vgrnd = { + .lock = PTHREAD_MUTEX_INITIALIZER }; static void *vgetrandom_get_state(void) { void *state = NULL; - pthread_mutex_lock(&grnd_allocator.lock); - if (!grnd_allocator.len) { + pthread_mutex_lock(&vgrnd.lock); + if (!vgrnd.len) { size_t page_size = getpagesize(); size_t new_cap; size_t alloc_size, num = sysconf(_SC_NPROCESSORS_ONLN); /* Just a decent heuristic. */ void *new_block, *new_states; - alloc_size = (num * grnd_ctx.params.size_of_opaque_state + page_size - 1) & (~(page_size - 1)); - num = (page_size / grnd_ctx.params.size_of_opaque_state) * (alloc_size / page_size); - new_block = mmap(0, alloc_size, grnd_ctx.params.mmap_prot, grnd_ctx.params.mmap_flags, -1, 0); + alloc_size = (num * vgrnd.params.size_of_opaque_state + page_size - 1) & (~(page_size - 1)); + num = (page_size / vgrnd.params.size_of_opaque_state) * (alloc_size / page_size); + new_block = mmap(0, alloc_size, vgrnd.params.mmap_prot, vgrnd.params.mmap_flags, -1, 0); if (new_block == MAP_FAILED) goto out; - new_cap = grnd_allocator.cap + num; - new_states = reallocarray(grnd_allocator.states, new_cap, sizeof(*grnd_allocator.states)); + new_cap = vgrnd.cap + num; + new_states = reallocarray(vgrnd.states, new_cap, sizeof(*vgrnd.states)); if (!new_states) goto unmap; - grnd_allocator.cap = new_cap; - grnd_allocator.states = new_states; + vgrnd.cap = new_cap; + vgrnd.states = new_states; for (size_t i = 0; i < num; ++i) { - if (((uintptr_t)new_block & (page_size - 1)) + grnd_ctx.params.size_of_opaque_state > page_size) + if (((uintptr_t)new_block & (page_size - 1)) + vgrnd.params.size_of_opaque_state > page_size) new_block = (void *)(((uintptr_t)new_block + page_size - 1) & (~(page_size - 1))); - grnd_allocator.states[i] = new_block; - new_block += grnd_ctx.params.size_of_opaque_state; + vgrnd.states[i] = new_block; + new_block += vgrnd.params.size_of_opaque_state; } - grnd_allocator.len = num; + vgrnd.len = num; goto success; unmap: @@ -89,10 +88,10 @@ static void *vgetrandom_get_state(void) goto out; } success: - state = grnd_allocator.states[--grnd_allocator.len]; + state = vgrnd.states[--vgrnd.len]; out: - pthread_mutex_unlock(&grnd_allocator.lock); + pthread_mutex_unlock(&vgrnd.lock); return state; } @@ -100,27 +99,33 @@ static void vgetrandom_put_state(void *state) { if (!state) return; - pthread_mutex_lock(&grnd_allocator.lock); - grnd_allocator.states[grnd_allocator.len++] = state; - pthread_mutex_unlock(&grnd_allocator.lock); + pthread_mutex_lock(&vgrnd.lock); + vgrnd.states[vgrnd.len++] = state; + pthread_mutex_unlock(&vgrnd.lock); } static void vgetrandom_init(void) { - if (pthread_key_create(&grnd_ctx.key, vgetrandom_put_state) != 0) - return; + const char *version = versions[VDSO_VERSION]; + const char *name = names[VDSO_NAMES][6]; unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR); + size_t ret; + if (!sysinfo_ehdr) { printf("AT_SYSINFO_EHDR is not present!\n"); exit(KSFT_SKIP); } vdso_init_from_sysinfo_ehdr(sysinfo_ehdr); - grnd_ctx.fn = (__typeof__(grnd_ctx.fn))vdso_sym("LINUX_2.6", "__vdso_getrandom"); - if (!grnd_ctx.fn) { - printf("__vdso_getrandom is missing!\n"); + vgrnd.fn = (__typeof__(vgrnd.fn))vdso_sym(version, name); + if (!vgrnd.fn) { + printf("%s is missing!\n", name); exit(KSFT_FAIL); } - if (grnd_ctx.fn(NULL, 0, 0, &grnd_ctx.params, ~0UL) != 0) { + ret = VDSO_CALL(vgrnd.fn, 5, NULL, 0, 0, &vgrnd.params, ~0UL); + if (ret == -ENOSYS) { + printf("unsupported architecture\n"); + exit(KSFT_SKIP); + } else if (ret) { printf("failed to fetch vgetrandom params!\n"); exit(KSFT_FAIL); } @@ -128,27 +133,21 @@ static void vgetrandom_init(void) static ssize_t vgetrandom(void *buf, size_t len, unsigned long flags) { - void *state; + static __thread void *state; - pthread_once(&grnd_ctx.initialized, vgetrandom_init); - state = pthread_getspecific(grnd_ctx.key); if (!state) { state = vgetrandom_get_state(); - if (pthread_setspecific(grnd_ctx.key, state) != 0) { - vgetrandom_put_state(state); - state = NULL; - } if (!state) { printf("vgetrandom_get_state failed!\n"); exit(KSFT_FAIL); } } - return grnd_ctx.fn(buf, len, flags, state, grnd_ctx.params.size_of_opaque_state); + return VDSO_CALL(vgrnd.fn, 5, buf, len, flags, state, vgrnd.params.size_of_opaque_state); } enum { TRIALS = 25000000, THREADS = 256 }; -static void *test_vdso_getrandom(void *) +static void *test_vdso_getrandom(void *ctx) { for (size_t i = 0; i < TRIALS; ++i) { unsigned int val; @@ -158,7 +157,7 @@ static void *test_vdso_getrandom(void *) return NULL; } -static void *test_libc_getrandom(void *) +static void *test_libc_getrandom(void *ctx) { for (size_t i = 0; i < TRIALS; ++i) { unsigned int val; @@ -168,7 +167,7 @@ static void *test_libc_getrandom(void *) return NULL; } -static void *test_syscall_getrandom(void *) +static void *test_syscall_getrandom(void *ctx) { for (size_t i = 0; i < TRIALS; ++i) { unsigned int val; @@ -244,9 +243,10 @@ static void fill(void) static void kselftest(void) { uint8_t weird_size[1263]; + pid_t child; ksft_print_header(); - ksft_set_plan(1); + ksft_set_plan(2); for (size_t i = 0; i < 1000; ++i) { ssize_t ret = vgetrandom(weird_size, sizeof(weird_size), 0); @@ -255,6 +255,42 @@ static void kselftest(void) } ksft_test_result_pass("getrandom: PASS\n"); + + unshare(CLONE_NEWUSER); + assert(unshare(CLONE_NEWTIME) == 0); + child = fork(); + assert(child >= 0); + if (!child) { + vgetrandom_init(); + child = getpid(); + assert(ptrace(PTRACE_TRACEME, 0, NULL, NULL) == 0); + assert(kill(child, SIGSTOP) == 0); + assert(vgetrandom(weird_size, sizeof(weird_size), 0) == sizeof(weird_size)); + _exit(0); + } + for (;;) { + struct ptrace_syscall_info info = { 0 }; + int status, ret; + assert(waitpid(child, &status, 0) >= 0); + if (WIFEXITED(status)) { + if (WEXITSTATUS(status) != 0) + exit(KSFT_FAIL); + break; + } + assert(WIFSTOPPED(status)); + if (WSTOPSIG(status) == SIGSTOP) + assert(ptrace(PTRACE_SETOPTIONS, child, 0, PTRACE_O_TRACESYSGOOD) == 0); + else if (WSTOPSIG(status) == (SIGTRAP | 0x80)) { + assert(ptrace(PTRACE_GET_SYSCALL_INFO, child, sizeof(info), &info) > 0); + if (info.op == PTRACE_SYSCALL_INFO_ENTRY && info.entry.nr == __NR_getrandom && + info.entry.args[0] == (uintptr_t)weird_size && info.entry.args[1] == sizeof(weird_size)) + exit(KSFT_FAIL); + } + assert(ptrace(PTRACE_SYSCALL, child, 0, 0) == 0); + } + + ksft_test_result_pass("getrandom timens: PASS\n"); + exit(KSFT_PASS); } @@ -265,6 +301,8 @@ static void usage(const char *argv0) int main(int argc, char *argv[]) { + vgetrandom_init(); + if (argc == 1) { kselftest(); return 0; diff --git a/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c b/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c index ee4f1ca56a71..e31b18ffae33 100644 --- a/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c +++ b/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c @@ -19,6 +19,7 @@ #include "../kselftest.h" #include "parse_vdso.h" #include "vdso_config.h" +#include "vdso_call.h" int main(int argc, char **argv) { @@ -43,7 +44,7 @@ int main(int argc, char **argv) } struct timeval tv; - long ret = gtod(&tv, 0); + long ret = VDSO_CALL(gtod, 2, &tv, 0); if (ret == 0) { printf("The time is %lld.%06lld\n", diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 5c8757a25998..d51249f14e2f 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -77,7 +77,7 @@ all_32: $(BINARIES_32) all_64: $(BINARIES_64) -EXTRA_CLEAN := $(BINARIES_32) $(BINARIES_64) +EXTRA_CLEAN := $(BINARIES_32) $(BINARIES_64) srso $(BINARIES_32): $(OUTPUT)/%_32: %.c helpers.h $(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $< $(EXTRA_FILES) -lrt -ldl -lm diff --git a/tools/testing/selftests/x86/srso.c b/tools/testing/selftests/x86/srso.c new file mode 100644 index 000000000000..394ec8bdeb00 --- /dev/null +++ b/tools/testing/selftests/x86/srso.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/perf_event.h> +#include <cpuid.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/syscall.h> +#include <unistd.h> + +int main(void) +{ + struct perf_event_attr ret_attr, mret_attr; + long long count_rets, count_rets_mispred; + int rrets_fd, mrrets_fd; + unsigned int cpuid1_eax, b, c, d; + + __cpuid(1, cpuid1_eax, b, c, d); + + if (cpuid1_eax < 0x00800f00 || + cpuid1_eax > 0x00afffff) { + fprintf(stderr, "This needs to run on a Zen[1-4] machine (CPUID(1).EAX: 0x%x). Exiting...\n", cpuid1_eax); + exit(EXIT_FAILURE); + } + + memset(&ret_attr, 0, sizeof(struct perf_event_attr)); + memset(&mret_attr, 0, sizeof(struct perf_event_attr)); + + ret_attr.type = mret_attr.type = PERF_TYPE_RAW; + ret_attr.size = mret_attr.size = sizeof(struct perf_event_attr); + ret_attr.config = 0xc8; + mret_attr.config = 0xc9; + ret_attr.disabled = mret_attr.disabled = 1; + ret_attr.exclude_user = mret_attr.exclude_user = 1; + ret_attr.exclude_hv = mret_attr.exclude_hv = 1; + + rrets_fd = syscall(SYS_perf_event_open, &ret_attr, 0, -1, -1, 0); + if (rrets_fd == -1) { + perror("opening retired RETs fd"); + exit(EXIT_FAILURE); + } + + mrrets_fd = syscall(SYS_perf_event_open, &mret_attr, 0, -1, -1, 0); + if (mrrets_fd == -1) { + perror("opening retired mispredicted RETs fd"); + exit(EXIT_FAILURE); + } + + ioctl(rrets_fd, PERF_EVENT_IOC_RESET, 0); + ioctl(mrrets_fd, PERF_EVENT_IOC_RESET, 0); + + ioctl(rrets_fd, PERF_EVENT_IOC_ENABLE, 0); + ioctl(mrrets_fd, PERF_EVENT_IOC_ENABLE, 0); + + printf("Sleeping for 10 seconds\n"); + sleep(10); + + ioctl(rrets_fd, PERF_EVENT_IOC_DISABLE, 0); + ioctl(mrrets_fd, PERF_EVENT_IOC_DISABLE, 0); + + read(rrets_fd, &count_rets, sizeof(long long)); + read(mrrets_fd, &count_rets_mispred, sizeof(long long)); + + printf("RETs: (%lld retired <-> %lld mispredicted)\n", + count_rets, count_rets_mispred); + printf("SRSO Safe-RET mitigation works correctly if both counts are almost equal.\n"); + + return 0; +} diff --git a/tools/testing/radix-tree/generated/autoconf.h b/tools/testing/shared/autoconf.h index 92dc474c349b..92dc474c349b 100644 --- a/tools/testing/radix-tree/generated/autoconf.h +++ b/tools/testing/shared/autoconf.h diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/shared/linux.c index 4eb442206d01..17263696b5d8 100644 --- a/tools/testing/radix-tree/linux.c +++ b/tools/testing/shared/linux.c @@ -26,8 +26,21 @@ struct kmem_cache { unsigned int non_kernel; unsigned long nr_allocated; unsigned long nr_tallocated; + bool exec_callback; + void (*callback)(void *); + void *private; }; +void kmem_cache_set_callback(struct kmem_cache *cachep, void (*callback)(void *)) +{ + cachep->callback = callback; +} + +void kmem_cache_set_private(struct kmem_cache *cachep, void *private) +{ + cachep->private = private; +} + void kmem_cache_set_non_kernel(struct kmem_cache *cachep, unsigned int val) { cachep->non_kernel = val; @@ -58,9 +71,17 @@ void *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru, { void *p; + if (cachep->exec_callback) { + if (cachep->callback) + cachep->callback(cachep->private); + cachep->exec_callback = false; + } + if (!(gfp & __GFP_DIRECT_RECLAIM)) { - if (!cachep->non_kernel) + if (!cachep->non_kernel) { + cachep->exec_callback = true; return NULL; + } cachep->non_kernel--; } @@ -223,6 +244,9 @@ kmem_cache_create(const char *name, unsigned int size, unsigned int align, ret->objs = NULL; ret->ctor = ctor; ret->non_kernel = 0; + ret->exec_callback = false; + ret->callback = NULL; + ret->private = NULL; return ret; } diff --git a/tools/testing/radix-tree/linux/bug.h b/tools/testing/shared/linux/bug.h index 03dc8a57eb99..03dc8a57eb99 100644 --- a/tools/testing/radix-tree/linux/bug.h +++ b/tools/testing/shared/linux/bug.h diff --git a/tools/testing/radix-tree/linux/cpu.h b/tools/testing/shared/linux/cpu.h index a45530d78107..a45530d78107 100644 --- a/tools/testing/radix-tree/linux/cpu.h +++ b/tools/testing/shared/linux/cpu.h diff --git a/tools/testing/radix-tree/linux/idr.h b/tools/testing/shared/linux/idr.h index 4e342f2e37cf..4e342f2e37cf 100644 --- a/tools/testing/radix-tree/linux/idr.h +++ b/tools/testing/shared/linux/idr.h diff --git a/tools/testing/radix-tree/linux/init.h b/tools/testing/shared/linux/init.h index 81563c3dfce7..81563c3dfce7 100644 --- a/tools/testing/radix-tree/linux/init.h +++ b/tools/testing/shared/linux/init.h diff --git a/tools/testing/radix-tree/linux/kconfig.h b/tools/testing/shared/linux/kconfig.h index 6c8675859913..6c8675859913 100644 --- a/tools/testing/radix-tree/linux/kconfig.h +++ b/tools/testing/shared/linux/kconfig.h diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/shared/linux/kernel.h index c0a2bb785b92..c0a2bb785b92 100644 --- a/tools/testing/radix-tree/linux/kernel.h +++ b/tools/testing/shared/linux/kernel.h diff --git a/tools/testing/radix-tree/linux/kmemleak.h b/tools/testing/shared/linux/kmemleak.h index 155f112786c4..155f112786c4 100644 --- a/tools/testing/radix-tree/linux/kmemleak.h +++ b/tools/testing/shared/linux/kmemleak.h diff --git a/tools/testing/radix-tree/linux/local_lock.h b/tools/testing/shared/linux/local_lock.h index b3cf8b233ca4..b3cf8b233ca4 100644 --- a/tools/testing/radix-tree/linux/local_lock.h +++ b/tools/testing/shared/linux/local_lock.h diff --git a/tools/testing/radix-tree/linux/lockdep.h b/tools/testing/shared/linux/lockdep.h index 62473ab57f99..62473ab57f99 100644 --- a/tools/testing/radix-tree/linux/lockdep.h +++ b/tools/testing/shared/linux/lockdep.h diff --git a/tools/testing/radix-tree/linux/maple_tree.h b/tools/testing/shared/linux/maple_tree.h index 06c89bdcc515..06c89bdcc515 100644 --- a/tools/testing/radix-tree/linux/maple_tree.h +++ b/tools/testing/shared/linux/maple_tree.h diff --git a/tools/testing/radix-tree/linux/percpu.h b/tools/testing/shared/linux/percpu.h index b2403aa743b2..b2403aa743b2 100644 --- a/tools/testing/radix-tree/linux/percpu.h +++ b/tools/testing/shared/linux/percpu.h diff --git a/tools/testing/radix-tree/linux/preempt.h b/tools/testing/shared/linux/preempt.h index edb10302b903..edb10302b903 100644 --- a/tools/testing/radix-tree/linux/preempt.h +++ b/tools/testing/shared/linux/preempt.h diff --git a/tools/testing/radix-tree/linux/radix-tree.h b/tools/testing/shared/linux/radix-tree.h index d1635a5bef02..d1635a5bef02 100644 --- a/tools/testing/radix-tree/linux/radix-tree.h +++ b/tools/testing/shared/linux/radix-tree.h diff --git a/tools/testing/radix-tree/linux/rcupdate.h b/tools/testing/shared/linux/rcupdate.h index fed468fb0c78..fed468fb0c78 100644 --- a/tools/testing/radix-tree/linux/rcupdate.h +++ b/tools/testing/shared/linux/rcupdate.h diff --git a/tools/testing/radix-tree/linux/xarray.h b/tools/testing/shared/linux/xarray.h index df3812cda376..df3812cda376 100644 --- a/tools/testing/radix-tree/linux/xarray.h +++ b/tools/testing/shared/linux/xarray.h diff --git a/tools/testing/shared/maple-shared.h b/tools/testing/shared/maple-shared.h new file mode 100644 index 000000000000..3d847edd149d --- /dev/null +++ b/tools/testing/shared/maple-shared.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#define CONFIG_DEBUG_MAPLE_TREE +#define CONFIG_MAPLE_SEARCH +#define MAPLE_32BIT (MAPLE_NODE_SLOTS > 31) +#include "shared.h" +#include <stdlib.h> +#include <time.h> +#include "linux/init.h" diff --git a/tools/testing/shared/maple-shim.c b/tools/testing/shared/maple-shim.c new file mode 100644 index 000000000000..640df76f483e --- /dev/null +++ b/tools/testing/shared/maple-shim.c @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +/* Very simple shim around the maple tree. */ + +#include "maple-shared.h" + +#include "../../../lib/maple_tree.c" diff --git a/tools/testing/shared/shared.h b/tools/testing/shared/shared.h new file mode 100644 index 000000000000..f08f683812ad --- /dev/null +++ b/tools/testing/shared/shared.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include <linux/types.h> +#include <linux/bug.h> +#include <linux/kernel.h> +#include <linux/bitops.h> + +#include <linux/gfp.h> +#include <linux/rcupdate.h> + +#ifndef module_init +#define module_init(x) +#endif + +#ifndef module_exit +#define module_exit(x) +#endif + +#ifndef MODULE_AUTHOR +#define MODULE_AUTHOR(x) +#endif + +#ifndef MODULE_LICENSE +#define MODULE_LICENSE(x) +#endif + +#ifndef MODULE_DESCRIPTION +#define MODULE_DESCRIPTION(x) +#endif + +#ifndef dump_stack +#define dump_stack() assert(0) +#endif diff --git a/tools/testing/shared/shared.mk b/tools/testing/shared/shared.mk new file mode 100644 index 000000000000..a05f0588513a --- /dev/null +++ b/tools/testing/shared/shared.mk @@ -0,0 +1,72 @@ +# SPDX-License-Identifier: GPL-2.0 + +CFLAGS += -I../shared -I. -I../../include -I../../../lib -g -Og -Wall \ + -D_LGPL_SOURCE -fsanitize=address -fsanitize=undefined +LDFLAGS += -fsanitize=address -fsanitize=undefined +LDLIBS += -lpthread -lurcu +LIBS := slab.o find_bit.o bitmap.o hweight.o vsprintf.o +SHARED_OFILES = xarray-shared.o radix-tree.o idr.o linux.o $(LIBS) + +SHARED_DEPS = Makefile ../shared/shared.mk ../shared/*.h generated/map-shift.h \ + generated/bit-length.h generated/autoconf.h \ + ../../include/linux/*.h \ + ../../include/asm/*.h \ + ../../../include/linux/xarray.h \ + ../../../include/linux/maple_tree.h \ + ../../../include/linux/radix-tree.h \ + ../../../lib/radix-tree.h \ + ../../../include/linux/idr.h + +ifndef SHIFT + SHIFT=3 +endif + +ifeq ($(BUILD), 32) + CFLAGS += -m32 + LDFLAGS += -m32 +LONG_BIT := 32 +endif + +ifndef LONG_BIT +LONG_BIT := $(shell getconf LONG_BIT) +endif + +%.o: ../shared/%.c + $(CC) -c $(CFLAGS) $< -o $@ + +vpath %.c ../../lib + +$(SHARED_OFILES): $(SHARED_DEPS) + +radix-tree.c: ../../../lib/radix-tree.c + sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@ + +idr.c: ../../../lib/idr.c + sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@ + +xarray-shared.o: ../shared/xarray-shared.c ../../../lib/xarray.c \ + ../../../lib/test_xarray.c + +maple-shared.o: ../shared/maple-shared.c ../../../lib/maple_tree.c \ + ../../../lib/test_maple_tree.c + +generated/autoconf.h: + @mkdir -p generated + cp ../shared/autoconf.h generated/autoconf.h + +generated/map-shift.h: + @mkdir -p generated + @if ! grep -qws $(SHIFT) generated/map-shift.h; then \ + echo "Generating $@"; \ + echo "#define XA_CHUNK_SHIFT $(SHIFT)" > \ + generated/map-shift.h; \ + fi + +generated/bit-length.h: FORCE + @mkdir -p generated + @if ! grep -qws CONFIG_$(LONG_BIT)BIT generated/bit-length.h; then \ + echo "Generating $@"; \ + echo "#define CONFIG_$(LONG_BIT)BIT 1" > $@; \ + fi + +FORCE: ; diff --git a/tools/testing/radix-tree/trace/events/maple_tree.h b/tools/testing/shared/trace/events/maple_tree.h index 97d0e1ddcf08..97d0e1ddcf08 100644 --- a/tools/testing/radix-tree/trace/events/maple_tree.h +++ b/tools/testing/shared/trace/events/maple_tree.h diff --git a/tools/testing/shared/xarray-shared.c b/tools/testing/shared/xarray-shared.c new file mode 100644 index 000000000000..e90901958dcd --- /dev/null +++ b/tools/testing/shared/xarray-shared.c @@ -0,0 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "xarray-shared.h" + +#include "../../../lib/xarray.c" diff --git a/tools/testing/shared/xarray-shared.h b/tools/testing/shared/xarray-shared.h new file mode 100644 index 000000000000..ac2d16ff53ae --- /dev/null +++ b/tools/testing/shared/xarray-shared.h @@ -0,0 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#define XA_DEBUG +#include "shared.h" diff --git a/tools/testing/vma/.gitignore b/tools/testing/vma/.gitignore new file mode 100644 index 000000000000..b003258eba79 --- /dev/null +++ b/tools/testing/vma/.gitignore @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only +generated/bit-length.h +generated/map-shift.h +generated/autoconf.h +idr.c +radix-tree.c +vma diff --git a/tools/testing/vma/Makefile b/tools/testing/vma/Makefile new file mode 100644 index 000000000000..860fd2311dcc --- /dev/null +++ b/tools/testing/vma/Makefile @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: GPL-2.0-or-later + +.PHONY: default clean + +default: vma + +include ../shared/shared.mk + +OFILES = $(SHARED_OFILES) vma.o maple-shim.o +TARGETS = vma + +vma.o: vma.c vma_internal.h ../../../mm/vma.c ../../../mm/vma.h + +vma: $(OFILES) + $(CC) $(CFLAGS) -o $@ $(OFILES) $(LDLIBS) + +clean: + $(RM) $(TARGETS) *.o radix-tree.c idr.c generated/map-shift.h generated/bit-length.h generated/autoconf.h diff --git a/tools/testing/vma/linux/atomic.h b/tools/testing/vma/linux/atomic.h new file mode 100644 index 000000000000..e01f66f98982 --- /dev/null +++ b/tools/testing/vma/linux/atomic.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef _LINUX_ATOMIC_H +#define _LINUX_ATOMIC_H + +#define atomic_t int32_t +#define atomic_inc(x) uatomic_inc(x) +#define atomic_read(x) uatomic_read(x) +#define atomic_set(x, y) do {} while (0) +#define U8_MAX UCHAR_MAX + +#endif /* _LINUX_ATOMIC_H */ diff --git a/tools/testing/vma/linux/mmzone.h b/tools/testing/vma/linux/mmzone.h new file mode 100644 index 000000000000..33cd1517f7a3 --- /dev/null +++ b/tools/testing/vma/linux/mmzone.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef _LINUX_MMZONE_H +#define _LINUX_MMZONE_H + +#include <linux/atomic.h> + +struct pglist_data *first_online_pgdat(void); +struct pglist_data *next_online_pgdat(struct pglist_data *pgdat); + +#define for_each_online_pgdat(pgdat) \ + for (pgdat = first_online_pgdat(); \ + pgdat; \ + pgdat = next_online_pgdat(pgdat)) + +enum zone_type { + __MAX_NR_ZONES +}; + +#define MAX_NR_ZONES __MAX_NR_ZONES +#define MAX_PAGE_ORDER 10 +#define MAX_ORDER_NR_PAGES (1 << MAX_PAGE_ORDER) + +#define pageblock_order MAX_PAGE_ORDER +#define pageblock_nr_pages BIT(pageblock_order) +#define pageblock_align(pfn) ALIGN((pfn), pageblock_nr_pages) +#define pageblock_start_pfn(pfn) ALIGN_DOWN((pfn), pageblock_nr_pages) + +struct zone { + atomic_long_t managed_pages; +}; + +typedef struct pglist_data { + struct zone node_zones[MAX_NR_ZONES]; + +} pg_data_t; + +#endif /* _LINUX_MMZONE_H */ diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/vma.c new file mode 100644 index 000000000000..c53f220eb6cc --- /dev/null +++ b/tools/testing/vma/vma.c @@ -0,0 +1,1563 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> + +#include "maple-shared.h" +#include "vma_internal.h" + +/* Include so header guard set. */ +#include "../../../mm/vma.h" + +static bool fail_prealloc; + +/* Then override vma_iter_prealloc() so we can choose to fail it. */ +#define vma_iter_prealloc(vmi, vma) \ + (fail_prealloc ? -ENOMEM : mas_preallocate(&(vmi)->mas, (vma), GFP_KERNEL)) + +/* + * Directly import the VMA implementation here. Our vma_internal.h wrapper + * provides userland-equivalent functionality for everything vma.c uses. + */ +#include "../../../mm/vma.c" + +const struct vm_operations_struct vma_dummy_vm_ops; +static struct anon_vma dummy_anon_vma; + +#define ASSERT_TRUE(_expr) \ + do { \ + if (!(_expr)) { \ + fprintf(stderr, \ + "Assert FAILED at %s:%d:%s(): %s is FALSE.\n", \ + __FILE__, __LINE__, __FUNCTION__, #_expr); \ + return false; \ + } \ + } while (0) +#define ASSERT_FALSE(_expr) ASSERT_TRUE(!(_expr)) +#define ASSERT_EQ(_val1, _val2) ASSERT_TRUE((_val1) == (_val2)) +#define ASSERT_NE(_val1, _val2) ASSERT_TRUE((_val1) != (_val2)) + +static struct task_struct __current; + +struct task_struct *get_current(void) +{ + return &__current; +} + +/* Helper function to simply allocate a VMA. */ +static struct vm_area_struct *alloc_vma(struct mm_struct *mm, + unsigned long start, + unsigned long end, + pgoff_t pgoff, + vm_flags_t flags) +{ + struct vm_area_struct *ret = vm_area_alloc(mm); + + if (ret == NULL) + return NULL; + + ret->vm_start = start; + ret->vm_end = end; + ret->vm_pgoff = pgoff; + ret->__vm_flags = flags; + + return ret; +} + +/* Helper function to allocate a VMA and link it to the tree. */ +static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm, + unsigned long start, + unsigned long end, + pgoff_t pgoff, + vm_flags_t flags) +{ + struct vm_area_struct *vma = alloc_vma(mm, start, end, pgoff, flags); + + if (vma == NULL) + return NULL; + + if (vma_link(mm, vma)) { + vm_area_free(vma); + return NULL; + } + + /* + * Reset this counter which we use to track whether writes have + * begun. Linking to the tree will have caused this to be incremented, + * which means we will get a false positive otherwise. + */ + vma->vm_lock_seq = -1; + + return vma; +} + +/* Helper function which provides a wrapper around a merge new VMA operation. */ +static struct vm_area_struct *merge_new(struct vma_merge_struct *vmg) +{ + /* + * For convenience, get prev and next VMAs. Which the new VMA operation + * requires. + */ + vmg->next = vma_next(vmg->vmi); + vmg->prev = vma_prev(vmg->vmi); + vma_iter_next_range(vmg->vmi); + + return vma_merge_new_range(vmg); +} + +/* + * Helper function which provides a wrapper around a merge existing VMA + * operation. + */ +static struct vm_area_struct *merge_existing(struct vma_merge_struct *vmg) +{ + return vma_merge_existing_range(vmg); +} + +/* + * Helper function which provides a wrapper around the expansion of an existing + * VMA. + */ +static int expand_existing(struct vma_merge_struct *vmg) +{ + return vma_expand(vmg); +} + +/* + * Helper function to reset merge state the associated VMA iterator to a + * specified new range. + */ +static void vmg_set_range(struct vma_merge_struct *vmg, unsigned long start, + unsigned long end, pgoff_t pgoff, vm_flags_t flags) +{ + vma_iter_set(vmg->vmi, start); + + vmg->prev = NULL; + vmg->next = NULL; + vmg->vma = NULL; + + vmg->start = start; + vmg->end = end; + vmg->pgoff = pgoff; + vmg->flags = flags; +} + +/* + * Helper function to try to merge a new VMA. + * + * Update vmg and the iterator for it and try to merge, otherwise allocate a new + * VMA, link it to the maple tree and return it. + */ +static struct vm_area_struct *try_merge_new_vma(struct mm_struct *mm, + struct vma_merge_struct *vmg, + unsigned long start, unsigned long end, + pgoff_t pgoff, vm_flags_t flags, + bool *was_merged) +{ + struct vm_area_struct *merged; + + vmg_set_range(vmg, start, end, pgoff, flags); + + merged = merge_new(vmg); + if (merged) { + *was_merged = true; + ASSERT_EQ(vmg->state, VMA_MERGE_SUCCESS); + return merged; + } + + *was_merged = false; + + ASSERT_EQ(vmg->state, VMA_MERGE_NOMERGE); + + return alloc_and_link_vma(mm, start, end, pgoff, flags); +} + +/* + * Helper function to reset the dummy anon_vma to indicate it has not been + * duplicated. + */ +static void reset_dummy_anon_vma(void) +{ + dummy_anon_vma.was_cloned = false; + dummy_anon_vma.was_unlinked = false; +} + +/* + * Helper function to remove all VMAs and destroy the maple tree associated with + * a virtual address space. Returns a count of VMAs in the tree. + */ +static int cleanup_mm(struct mm_struct *mm, struct vma_iterator *vmi) +{ + struct vm_area_struct *vma; + int count = 0; + + fail_prealloc = false; + reset_dummy_anon_vma(); + + vma_iter_set(vmi, 0); + for_each_vma(*vmi, vma) { + vm_area_free(vma); + count++; + } + + mtree_destroy(&mm->mm_mt); + mm->map_count = 0; + return count; +} + +/* Helper function to determine if VMA has had vma_start_write() performed. */ +static bool vma_write_started(struct vm_area_struct *vma) +{ + int seq = vma->vm_lock_seq; + + /* We reset after each check. */ + vma->vm_lock_seq = -1; + + /* The vma_start_write() stub simply increments this value. */ + return seq > -1; +} + +/* Helper function providing a dummy vm_ops->close() method.*/ +static void dummy_close(struct vm_area_struct *) +{ +} + +static bool test_simple_merge(void) +{ + struct vm_area_struct *vma; + unsigned long flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; + struct mm_struct mm = {}; + struct vm_area_struct *vma_left = alloc_vma(&mm, 0, 0x1000, 0, flags); + struct vm_area_struct *vma_right = alloc_vma(&mm, 0x2000, 0x3000, 2, flags); + VMA_ITERATOR(vmi, &mm, 0x1000); + struct vma_merge_struct vmg = { + .mm = &mm, + .vmi = &vmi, + .start = 0x1000, + .end = 0x2000, + .flags = flags, + .pgoff = 1, + }; + + ASSERT_FALSE(vma_link(&mm, vma_left)); + ASSERT_FALSE(vma_link(&mm, vma_right)); + + vma = merge_new(&vmg); + ASSERT_NE(vma, NULL); + + ASSERT_EQ(vma->vm_start, 0); + ASSERT_EQ(vma->vm_end, 0x3000); + ASSERT_EQ(vma->vm_pgoff, 0); + ASSERT_EQ(vma->vm_flags, flags); + + vm_area_free(vma); + mtree_destroy(&mm.mm_mt); + + return true; +} + +static bool test_simple_modify(void) +{ + struct vm_area_struct *vma; + unsigned long flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; + struct mm_struct mm = {}; + struct vm_area_struct *init_vma = alloc_vma(&mm, 0, 0x3000, 0, flags); + VMA_ITERATOR(vmi, &mm, 0x1000); + + ASSERT_FALSE(vma_link(&mm, init_vma)); + + /* + * The flags will not be changed, the vma_modify_flags() function + * performs the merge/split only. + */ + vma = vma_modify_flags(&vmi, init_vma, init_vma, + 0x1000, 0x2000, VM_READ | VM_MAYREAD); + ASSERT_NE(vma, NULL); + /* We modify the provided VMA, and on split allocate new VMAs. */ + ASSERT_EQ(vma, init_vma); + + ASSERT_EQ(vma->vm_start, 0x1000); + ASSERT_EQ(vma->vm_end, 0x2000); + ASSERT_EQ(vma->vm_pgoff, 1); + + /* + * Now walk through the three split VMAs and make sure they are as + * expected. + */ + + vma_iter_set(&vmi, 0); + vma = vma_iter_load(&vmi); + + ASSERT_EQ(vma->vm_start, 0); + ASSERT_EQ(vma->vm_end, 0x1000); + ASSERT_EQ(vma->vm_pgoff, 0); + + vm_area_free(vma); + vma_iter_clear(&vmi); + + vma = vma_next(&vmi); + + ASSERT_EQ(vma->vm_start, 0x1000); + ASSERT_EQ(vma->vm_end, 0x2000); + ASSERT_EQ(vma->vm_pgoff, 1); + + vm_area_free(vma); + vma_iter_clear(&vmi); + + vma = vma_next(&vmi); + + ASSERT_EQ(vma->vm_start, 0x2000); + ASSERT_EQ(vma->vm_end, 0x3000); + ASSERT_EQ(vma->vm_pgoff, 2); + + vm_area_free(vma); + mtree_destroy(&mm.mm_mt); + + return true; +} + +static bool test_simple_expand(void) +{ + unsigned long flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; + struct mm_struct mm = {}; + struct vm_area_struct *vma = alloc_vma(&mm, 0, 0x1000, 0, flags); + VMA_ITERATOR(vmi, &mm, 0); + struct vma_merge_struct vmg = { + .vmi = &vmi, + .vma = vma, + .start = 0, + .end = 0x3000, + .pgoff = 0, + }; + + ASSERT_FALSE(vma_link(&mm, vma)); + + ASSERT_FALSE(expand_existing(&vmg)); + + ASSERT_EQ(vma->vm_start, 0); + ASSERT_EQ(vma->vm_end, 0x3000); + ASSERT_EQ(vma->vm_pgoff, 0); + + vm_area_free(vma); + mtree_destroy(&mm.mm_mt); + + return true; +} + +static bool test_simple_shrink(void) +{ + unsigned long flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; + struct mm_struct mm = {}; + struct vm_area_struct *vma = alloc_vma(&mm, 0, 0x3000, 0, flags); + VMA_ITERATOR(vmi, &mm, 0); + + ASSERT_FALSE(vma_link(&mm, vma)); + + ASSERT_FALSE(vma_shrink(&vmi, vma, 0, 0x1000, 0)); + + ASSERT_EQ(vma->vm_start, 0); + ASSERT_EQ(vma->vm_end, 0x1000); + ASSERT_EQ(vma->vm_pgoff, 0); + + vm_area_free(vma); + mtree_destroy(&mm.mm_mt); + + return true; +} + +static bool test_merge_new(void) +{ + unsigned long flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; + struct mm_struct mm = {}; + VMA_ITERATOR(vmi, &mm, 0); + struct vma_merge_struct vmg = { + .mm = &mm, + .vmi = &vmi, + }; + struct anon_vma_chain dummy_anon_vma_chain_a = { + .anon_vma = &dummy_anon_vma, + }; + struct anon_vma_chain dummy_anon_vma_chain_b = { + .anon_vma = &dummy_anon_vma, + }; + struct anon_vma_chain dummy_anon_vma_chain_c = { + .anon_vma = &dummy_anon_vma, + }; + struct anon_vma_chain dummy_anon_vma_chain_d = { + .anon_vma = &dummy_anon_vma, + }; + const struct vm_operations_struct vm_ops = { + .close = dummy_close, + }; + int count; + struct vm_area_struct *vma, *vma_a, *vma_b, *vma_c, *vma_d; + bool merged; + + /* + * 0123456789abc + * AA B CC + */ + vma_a = alloc_and_link_vma(&mm, 0, 0x2000, 0, flags); + ASSERT_NE(vma_a, NULL); + /* We give each VMA a single avc so we can test anon_vma duplication. */ + INIT_LIST_HEAD(&vma_a->anon_vma_chain); + list_add(&dummy_anon_vma_chain_a.same_vma, &vma_a->anon_vma_chain); + + vma_b = alloc_and_link_vma(&mm, 0x3000, 0x4000, 3, flags); + ASSERT_NE(vma_b, NULL); + INIT_LIST_HEAD(&vma_b->anon_vma_chain); + list_add(&dummy_anon_vma_chain_b.same_vma, &vma_b->anon_vma_chain); + + vma_c = alloc_and_link_vma(&mm, 0xb000, 0xc000, 0xb, flags); + ASSERT_NE(vma_c, NULL); + INIT_LIST_HEAD(&vma_c->anon_vma_chain); + list_add(&dummy_anon_vma_chain_c.same_vma, &vma_c->anon_vma_chain); + + /* + * NO merge. + * + * 0123456789abc + * AA B ** CC + */ + vma_d = try_merge_new_vma(&mm, &vmg, 0x7000, 0x9000, 7, flags, &merged); + ASSERT_NE(vma_d, NULL); + INIT_LIST_HEAD(&vma_d->anon_vma_chain); + list_add(&dummy_anon_vma_chain_d.same_vma, &vma_d->anon_vma_chain); + ASSERT_FALSE(merged); + ASSERT_EQ(mm.map_count, 4); + + /* + * Merge BOTH sides. + * + * 0123456789abc + * AA*B DD CC + */ + vma_a->vm_ops = &vm_ops; /* This should have no impact. */ + vma_b->anon_vma = &dummy_anon_vma; + vma = try_merge_new_vma(&mm, &vmg, 0x2000, 0x3000, 2, flags, &merged); + ASSERT_EQ(vma, vma_a); + /* Merge with A, delete B. */ + ASSERT_TRUE(merged); + ASSERT_EQ(vma->vm_start, 0); + ASSERT_EQ(vma->vm_end, 0x4000); + ASSERT_EQ(vma->vm_pgoff, 0); + ASSERT_EQ(vma->anon_vma, &dummy_anon_vma); + ASSERT_TRUE(vma_write_started(vma)); + ASSERT_EQ(mm.map_count, 3); + + /* + * Merge to PREVIOUS VMA. + * + * 0123456789abc + * AAAA* DD CC + */ + vma = try_merge_new_vma(&mm, &vmg, 0x4000, 0x5000, 4, flags, &merged); + ASSERT_EQ(vma, vma_a); + /* Extend A. */ + ASSERT_TRUE(merged); + ASSERT_EQ(vma->vm_start, 0); + ASSERT_EQ(vma->vm_end, 0x5000); + ASSERT_EQ(vma->vm_pgoff, 0); + ASSERT_EQ(vma->anon_vma, &dummy_anon_vma); + ASSERT_TRUE(vma_write_started(vma)); + ASSERT_EQ(mm.map_count, 3); + + /* + * Merge to NEXT VMA. + * + * 0123456789abc + * AAAAA *DD CC + */ + vma_d->anon_vma = &dummy_anon_vma; + vma_d->vm_ops = &vm_ops; /* This should have no impact. */ + vma = try_merge_new_vma(&mm, &vmg, 0x6000, 0x7000, 6, flags, &merged); + ASSERT_EQ(vma, vma_d); + /* Prepend. */ + ASSERT_TRUE(merged); + ASSERT_EQ(vma->vm_start, 0x6000); + ASSERT_EQ(vma->vm_end, 0x9000); + ASSERT_EQ(vma->vm_pgoff, 6); + ASSERT_EQ(vma->anon_vma, &dummy_anon_vma); + ASSERT_TRUE(vma_write_started(vma)); + ASSERT_EQ(mm.map_count, 3); + + /* + * Merge BOTH sides. + * + * 0123456789abc + * AAAAA*DDD CC + */ + vma_d->vm_ops = NULL; /* This would otherwise degrade the merge. */ + vma = try_merge_new_vma(&mm, &vmg, 0x5000, 0x6000, 5, flags, &merged); + ASSERT_EQ(vma, vma_a); + /* Merge with A, delete D. */ + ASSERT_TRUE(merged); + ASSERT_EQ(vma->vm_start, 0); + ASSERT_EQ(vma->vm_end, 0x9000); + ASSERT_EQ(vma->vm_pgoff, 0); + ASSERT_EQ(vma->anon_vma, &dummy_anon_vma); + ASSERT_TRUE(vma_write_started(vma)); + ASSERT_EQ(mm.map_count, 2); + + /* + * Merge to NEXT VMA. + * + * 0123456789abc + * AAAAAAAAA *CC + */ + vma_c->anon_vma = &dummy_anon_vma; + vma = try_merge_new_vma(&mm, &vmg, 0xa000, 0xb000, 0xa, flags, &merged); + ASSERT_EQ(vma, vma_c); + /* Prepend C. */ + ASSERT_TRUE(merged); + ASSERT_EQ(vma->vm_start, 0xa000); + ASSERT_EQ(vma->vm_end, 0xc000); + ASSERT_EQ(vma->vm_pgoff, 0xa); + ASSERT_EQ(vma->anon_vma, &dummy_anon_vma); + ASSERT_TRUE(vma_write_started(vma)); + ASSERT_EQ(mm.map_count, 2); + + /* + * Merge BOTH sides. + * + * 0123456789abc + * AAAAAAAAA*CCC + */ + vma = try_merge_new_vma(&mm, &vmg, 0x9000, 0xa000, 0x9, flags, &merged); + ASSERT_EQ(vma, vma_a); + /* Extend A and delete C. */ + ASSERT_TRUE(merged); + ASSERT_EQ(vma->vm_start, 0); + ASSERT_EQ(vma->vm_end, 0xc000); + ASSERT_EQ(vma->vm_pgoff, 0); + ASSERT_EQ(vma->anon_vma, &dummy_anon_vma); + ASSERT_TRUE(vma_write_started(vma)); + ASSERT_EQ(mm.map_count, 1); + + /* + * Final state. + * + * 0123456789abc + * AAAAAAAAAAAAA + */ + + count = 0; + vma_iter_set(&vmi, 0); + for_each_vma(vmi, vma) { + ASSERT_NE(vma, NULL); + ASSERT_EQ(vma->vm_start, 0); + ASSERT_EQ(vma->vm_end, 0xc000); + ASSERT_EQ(vma->vm_pgoff, 0); + ASSERT_EQ(vma->anon_vma, &dummy_anon_vma); + + vm_area_free(vma); + count++; + } + + /* Should only have one VMA left (though freed) after all is done.*/ + ASSERT_EQ(count, 1); + + mtree_destroy(&mm.mm_mt); + return true; +} + +static bool test_vma_merge_special_flags(void) +{ + unsigned long flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; + struct mm_struct mm = {}; + VMA_ITERATOR(vmi, &mm, 0); + struct vma_merge_struct vmg = { + .mm = &mm, + .vmi = &vmi, + }; + vm_flags_t special_flags[] = { VM_IO, VM_DONTEXPAND, VM_PFNMAP, VM_MIXEDMAP }; + vm_flags_t all_special_flags = 0; + int i; + struct vm_area_struct *vma_left, *vma; + + /* Make sure there aren't new VM_SPECIAL flags. */ + for (i = 0; i < ARRAY_SIZE(special_flags); i++) { + all_special_flags |= special_flags[i]; + } + ASSERT_EQ(all_special_flags, VM_SPECIAL); + + /* + * 01234 + * AAA + */ + vma_left = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags); + ASSERT_NE(vma_left, NULL); + + /* 1. Set up new VMA with special flag that would otherwise merge. */ + + /* + * 01234 + * AAA* + * + * This should merge if not for the VM_SPECIAL flag. + */ + vmg_set_range(&vmg, 0x3000, 0x4000, 3, flags); + for (i = 0; i < ARRAY_SIZE(special_flags); i++) { + vm_flags_t special_flag = special_flags[i]; + + vma_left->__vm_flags = flags | special_flag; + vmg.flags = flags | special_flag; + vma = merge_new(&vmg); + ASSERT_EQ(vma, NULL); + ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); + } + + /* 2. Modify VMA with special flag that would otherwise merge. */ + + /* + * 01234 + * AAAB + * + * Create a VMA to modify. + */ + vma = alloc_and_link_vma(&mm, 0x3000, 0x4000, 3, flags); + ASSERT_NE(vma, NULL); + vmg.vma = vma; + + for (i = 0; i < ARRAY_SIZE(special_flags); i++) { + vm_flags_t special_flag = special_flags[i]; + + vma_left->__vm_flags = flags | special_flag; + vmg.flags = flags | special_flag; + vma = merge_existing(&vmg); + ASSERT_EQ(vma, NULL); + ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); + } + + cleanup_mm(&mm, &vmi); + return true; +} + +static bool test_vma_merge_with_close(void) +{ + unsigned long flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; + struct mm_struct mm = {}; + VMA_ITERATOR(vmi, &mm, 0); + struct vma_merge_struct vmg = { + .mm = &mm, + .vmi = &vmi, + }; + const struct vm_operations_struct vm_ops = { + .close = dummy_close, + }; + struct vm_area_struct *vma_prev, *vma_next, *vma; + + /* + * When merging VMAs we are not permitted to remove any VMA that has a + * vm_ops->close() hook. + * + * Considering the two possible adjacent VMAs to which a VMA can be + * merged: + * + * [ prev ][ vma ][ next ] + * + * In no case will we need to delete prev. If the operation is + * mergeable, then prev will be extended with one or both of vma and + * next deleted. + * + * As a result, during initial mergeability checks, only + * can_vma_merge_before() (which implies the VMA being merged with is + * 'next' as shown above) bothers to check to see whether the next VMA + * has a vm_ops->close() callback that will need to be called when + * removed. + * + * If it does, then we cannot merge as the resources that the close() + * operation potentially clears down are tied only to the existing VMA + * range and we have no way of extending those to the nearly merged one. + * + * We must consider two scenarios: + * + * A. + * + * vm_ops->close: - - !NULL + * [ prev ][ vma ][ next ] + * + * Where prev may or may not be present/mergeable. + * + * This is picked up by a specific check in can_vma_merge_before(). + * + * B. + * + * vm_ops->close: - !NULL + * [ prev ][ vma ] + * + * Where prev and vma are present and mergeable. + * + * This is picked up by a specific check in the modified VMA merge. + * + * IMPORTANT NOTE: We make the assumption that the following case: + * + * - !NULL NULL + * [ prev ][ vma ][ next ] + * + * Cannot occur, because vma->vm_ops being the same implies the same + * vma->vm_file, and therefore this would mean that next->vm_ops->close + * would be set too, and thus scenario A would pick this up. + */ + + /* + * The only case of a new VMA merge that results in a VMA being deleted + * is one where both the previous and next VMAs are merged - in this + * instance the next VMA is deleted, and the previous VMA is extended. + * + * If we are unable to do so, we reduce the operation to simply + * extending the prev VMA and not merging next. + * + * 0123456789 + * PPP**NNNN + * -> + * 0123456789 + * PPPPPPNNN + */ + + vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags); + vma_next = alloc_and_link_vma(&mm, 0x5000, 0x9000, 5, flags); + vma_next->vm_ops = &vm_ops; + + vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags); + ASSERT_EQ(merge_new(&vmg), vma_prev); + ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); + ASSERT_EQ(vma_prev->vm_start, 0); + ASSERT_EQ(vma_prev->vm_end, 0x5000); + ASSERT_EQ(vma_prev->vm_pgoff, 0); + + ASSERT_EQ(cleanup_mm(&mm, &vmi), 2); + + /* + * When modifying an existing VMA there are further cases where we + * delete VMAs. + * + * <> + * 0123456789 + * PPPVV + * + * In this instance, if vma has a close hook, the merge simply cannot + * proceed. + */ + + vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags); + vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, flags); + vma->vm_ops = &vm_ops; + + vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags); + vmg.prev = vma_prev; + vmg.vma = vma; + + /* + * The VMA being modified in a way that would otherwise merge should + * also fail. + */ + ASSERT_EQ(merge_existing(&vmg), NULL); + ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); + + ASSERT_EQ(cleanup_mm(&mm, &vmi), 2); + + /* + * This case is mirrored if merging with next. + * + * <> + * 0123456789 + * VVNNNN + * + * In this instance, if vma has a close hook, the merge simply cannot + * proceed. + */ + + vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, flags); + vma_next = alloc_and_link_vma(&mm, 0x5000, 0x9000, 5, flags); + vma->vm_ops = &vm_ops; + + vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags); + vmg.vma = vma; + ASSERT_EQ(merge_existing(&vmg), NULL); + /* + * Initially this is misapprehended as an out of memory report, as the + * close() check is handled in the same way as anon_vma duplication + * failures, however a subsequent patch resolves this. + */ + ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); + + ASSERT_EQ(cleanup_mm(&mm, &vmi), 2); + + /* + * Finally, we consider two variants of the case where we modify a VMA + * to merge with both the previous and next VMAs. + * + * The first variant is where vma has a close hook. In this instance, no + * merge can proceed. + * + * <> + * 0123456789 + * PPPVVNNNN + */ + + vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags); + vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, flags); + vma_next = alloc_and_link_vma(&mm, 0x5000, 0x9000, 5, flags); + vma->vm_ops = &vm_ops; + + vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags); + vmg.prev = vma_prev; + vmg.vma = vma; + + ASSERT_EQ(merge_existing(&vmg), NULL); + ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); + + ASSERT_EQ(cleanup_mm(&mm, &vmi), 3); + + /* + * The second variant is where next has a close hook. In this instance, + * we reduce the operation to a merge between prev and vma. + * + * <> + * 0123456789 + * PPPVVNNNN + * -> + * 0123456789 + * PPPPPNNNN + */ + + vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags); + vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, flags); + vma_next = alloc_and_link_vma(&mm, 0x5000, 0x9000, 5, flags); + vma_next->vm_ops = &vm_ops; + + vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags); + vmg.prev = vma_prev; + vmg.vma = vma; + + ASSERT_EQ(merge_existing(&vmg), vma_prev); + ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); + ASSERT_EQ(vma_prev->vm_start, 0); + ASSERT_EQ(vma_prev->vm_end, 0x5000); + ASSERT_EQ(vma_prev->vm_pgoff, 0); + + ASSERT_EQ(cleanup_mm(&mm, &vmi), 2); + + return true; +} + +static bool test_vma_merge_new_with_close(void) +{ + unsigned long flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; + struct mm_struct mm = {}; + VMA_ITERATOR(vmi, &mm, 0); + struct vma_merge_struct vmg = { + .mm = &mm, + .vmi = &vmi, + }; + struct vm_area_struct *vma_prev = alloc_and_link_vma(&mm, 0, 0x2000, 0, flags); + struct vm_area_struct *vma_next = alloc_and_link_vma(&mm, 0x5000, 0x7000, 5, flags); + const struct vm_operations_struct vm_ops = { + .close = dummy_close, + }; + struct vm_area_struct *vma; + + /* + * We should allow the partial merge of a proposed new VMA if the + * surrounding VMAs have vm_ops->close() hooks (but are otherwise + * compatible), e.g.: + * + * New VMA + * A v-------v B + * |-----| |-----| + * close close + * + * Since the rule is to not DELETE a VMA with a close operation, this + * should be permitted, only rather than expanding A and deleting B, we + * should simply expand A and leave B intact, e.g.: + * + * New VMA + * A B + * |------------||-----| + * close close + */ + + /* Have prev and next have a vm_ops->close() hook. */ + vma_prev->vm_ops = &vm_ops; + vma_next->vm_ops = &vm_ops; + + vmg_set_range(&vmg, 0x2000, 0x5000, 2, flags); + vma = merge_new(&vmg); + ASSERT_NE(vma, NULL); + ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); + ASSERT_EQ(vma->vm_start, 0); + ASSERT_EQ(vma->vm_end, 0x5000); + ASSERT_EQ(vma->vm_pgoff, 0); + ASSERT_EQ(vma->vm_ops, &vm_ops); + ASSERT_TRUE(vma_write_started(vma)); + ASSERT_EQ(mm.map_count, 2); + + cleanup_mm(&mm, &vmi); + return true; +} + +static bool test_merge_existing(void) +{ + unsigned long flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; + struct mm_struct mm = {}; + VMA_ITERATOR(vmi, &mm, 0); + struct vm_area_struct *vma, *vma_prev, *vma_next; + struct vma_merge_struct vmg = { + .mm = &mm, + .vmi = &vmi, + }; + const struct vm_operations_struct vm_ops = { + .close = dummy_close, + }; + + /* + * Merge right case - partial span. + * + * <-> + * 0123456789 + * VVVVNNN + * -> + * 0123456789 + * VNNNNNN + */ + vma = alloc_and_link_vma(&mm, 0x2000, 0x6000, 2, flags); + vma->vm_ops = &vm_ops; /* This should have no impact. */ + vma_next = alloc_and_link_vma(&mm, 0x6000, 0x9000, 6, flags); + vma_next->vm_ops = &vm_ops; /* This should have no impact. */ + vmg_set_range(&vmg, 0x3000, 0x6000, 3, flags); + vmg.vma = vma; + vmg.prev = vma; + vma->anon_vma = &dummy_anon_vma; + ASSERT_EQ(merge_existing(&vmg), vma_next); + ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); + ASSERT_EQ(vma_next->vm_start, 0x3000); + ASSERT_EQ(vma_next->vm_end, 0x9000); + ASSERT_EQ(vma_next->vm_pgoff, 3); + ASSERT_EQ(vma_next->anon_vma, &dummy_anon_vma); + ASSERT_EQ(vma->vm_start, 0x2000); + ASSERT_EQ(vma->vm_end, 0x3000); + ASSERT_EQ(vma->vm_pgoff, 2); + ASSERT_TRUE(vma_write_started(vma)); + ASSERT_TRUE(vma_write_started(vma_next)); + ASSERT_EQ(mm.map_count, 2); + + /* Clear down and reset. */ + ASSERT_EQ(cleanup_mm(&mm, &vmi), 2); + + /* + * Merge right case - full span. + * + * <--> + * 0123456789 + * VVVVNNN + * -> + * 0123456789 + * NNNNNNN + */ + vma = alloc_and_link_vma(&mm, 0x2000, 0x6000, 2, flags); + vma_next = alloc_and_link_vma(&mm, 0x6000, 0x9000, 6, flags); + vma_next->vm_ops = &vm_ops; /* This should have no impact. */ + vmg_set_range(&vmg, 0x2000, 0x6000, 2, flags); + vmg.vma = vma; + vma->anon_vma = &dummy_anon_vma; + ASSERT_EQ(merge_existing(&vmg), vma_next); + ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); + ASSERT_EQ(vma_next->vm_start, 0x2000); + ASSERT_EQ(vma_next->vm_end, 0x9000); + ASSERT_EQ(vma_next->vm_pgoff, 2); + ASSERT_EQ(vma_next->anon_vma, &dummy_anon_vma); + ASSERT_TRUE(vma_write_started(vma_next)); + ASSERT_EQ(mm.map_count, 1); + + /* Clear down and reset. We should have deleted vma. */ + ASSERT_EQ(cleanup_mm(&mm, &vmi), 1); + + /* + * Merge left case - partial span. + * + * <-> + * 0123456789 + * PPPVVVV + * -> + * 0123456789 + * PPPPPPV + */ + vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags); + vma_prev->vm_ops = &vm_ops; /* This should have no impact. */ + vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, flags); + vma->vm_ops = &vm_ops; /* This should have no impact. */ + vmg_set_range(&vmg, 0x3000, 0x6000, 3, flags); + vmg.prev = vma_prev; + vmg.vma = vma; + vma->anon_vma = &dummy_anon_vma; + + ASSERT_EQ(merge_existing(&vmg), vma_prev); + ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); + ASSERT_EQ(vma_prev->vm_start, 0); + ASSERT_EQ(vma_prev->vm_end, 0x6000); + ASSERT_EQ(vma_prev->vm_pgoff, 0); + ASSERT_EQ(vma_prev->anon_vma, &dummy_anon_vma); + ASSERT_EQ(vma->vm_start, 0x6000); + ASSERT_EQ(vma->vm_end, 0x7000); + ASSERT_EQ(vma->vm_pgoff, 6); + ASSERT_TRUE(vma_write_started(vma_prev)); + ASSERT_TRUE(vma_write_started(vma)); + ASSERT_EQ(mm.map_count, 2); + + /* Clear down and reset. */ + ASSERT_EQ(cleanup_mm(&mm, &vmi), 2); + + /* + * Merge left case - full span. + * + * <--> + * 0123456789 + * PPPVVVV + * -> + * 0123456789 + * PPPPPPP + */ + vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags); + vma_prev->vm_ops = &vm_ops; /* This should have no impact. */ + vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, flags); + vmg_set_range(&vmg, 0x3000, 0x7000, 3, flags); + vmg.prev = vma_prev; + vmg.vma = vma; + vma->anon_vma = &dummy_anon_vma; + ASSERT_EQ(merge_existing(&vmg), vma_prev); + ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); + ASSERT_EQ(vma_prev->vm_start, 0); + ASSERT_EQ(vma_prev->vm_end, 0x7000); + ASSERT_EQ(vma_prev->vm_pgoff, 0); + ASSERT_EQ(vma_prev->anon_vma, &dummy_anon_vma); + ASSERT_TRUE(vma_write_started(vma_prev)); + ASSERT_EQ(mm.map_count, 1); + + /* Clear down and reset. We should have deleted vma. */ + ASSERT_EQ(cleanup_mm(&mm, &vmi), 1); + + /* + * Merge both case. + * + * <--> + * 0123456789 + * PPPVVVVNNN + * -> + * 0123456789 + * PPPPPPPPPP + */ + vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags); + vma_prev->vm_ops = &vm_ops; /* This should have no impact. */ + vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, flags); + vma_next = alloc_and_link_vma(&mm, 0x7000, 0x9000, 7, flags); + vmg_set_range(&vmg, 0x3000, 0x7000, 3, flags); + vmg.prev = vma_prev; + vmg.vma = vma; + vma->anon_vma = &dummy_anon_vma; + ASSERT_EQ(merge_existing(&vmg), vma_prev); + ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); + ASSERT_EQ(vma_prev->vm_start, 0); + ASSERT_EQ(vma_prev->vm_end, 0x9000); + ASSERT_EQ(vma_prev->vm_pgoff, 0); + ASSERT_EQ(vma_prev->anon_vma, &dummy_anon_vma); + ASSERT_TRUE(vma_write_started(vma_prev)); + ASSERT_EQ(mm.map_count, 1); + + /* Clear down and reset. We should have deleted prev and next. */ + ASSERT_EQ(cleanup_mm(&mm, &vmi), 1); + + /* + * Non-merge ranges. the modified VMA merge operation assumes that the + * caller always specifies ranges within the input VMA so we need only + * examine these cases. + * + * - + * - + * - + * <-> + * <> + * <> + * 0123456789a + * PPPVVVVVNNN + */ + + vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags); + vma = alloc_and_link_vma(&mm, 0x3000, 0x8000, 3, flags); + vma_next = alloc_and_link_vma(&mm, 0x8000, 0xa000, 8, flags); + + vmg_set_range(&vmg, 0x4000, 0x5000, 4, flags); + vmg.prev = vma; + vmg.vma = vma; + ASSERT_EQ(merge_existing(&vmg), NULL); + ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); + + vmg_set_range(&vmg, 0x5000, 0x6000, 5, flags); + vmg.prev = vma; + vmg.vma = vma; + ASSERT_EQ(merge_existing(&vmg), NULL); + ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); + + vmg_set_range(&vmg, 0x6000, 0x7000, 6, flags); + vmg.prev = vma; + vmg.vma = vma; + ASSERT_EQ(merge_existing(&vmg), NULL); + ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); + + vmg_set_range(&vmg, 0x4000, 0x7000, 4, flags); + vmg.prev = vma; + vmg.vma = vma; + ASSERT_EQ(merge_existing(&vmg), NULL); + ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); + + vmg_set_range(&vmg, 0x4000, 0x6000, 4, flags); + vmg.prev = vma; + vmg.vma = vma; + ASSERT_EQ(merge_existing(&vmg), NULL); + ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); + + vmg_set_range(&vmg, 0x5000, 0x6000, 5, flags); + vmg.prev = vma; + vmg.vma = vma; + ASSERT_EQ(merge_existing(&vmg), NULL); + ASSERT_EQ(vmg.state, VMA_MERGE_NOMERGE); + + ASSERT_EQ(cleanup_mm(&mm, &vmi), 3); + + return true; +} + +static bool test_anon_vma_non_mergeable(void) +{ + unsigned long flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; + struct mm_struct mm = {}; + VMA_ITERATOR(vmi, &mm, 0); + struct vm_area_struct *vma, *vma_prev, *vma_next; + struct vma_merge_struct vmg = { + .mm = &mm, + .vmi = &vmi, + }; + struct anon_vma_chain dummy_anon_vma_chain1 = { + .anon_vma = &dummy_anon_vma, + }; + struct anon_vma_chain dummy_anon_vma_chain2 = { + .anon_vma = &dummy_anon_vma, + }; + + /* + * In the case of modified VMA merge, merging both left and right VMAs + * but where prev and next have incompatible anon_vma objects, we revert + * to a merge of prev and VMA: + * + * <--> + * 0123456789 + * PPPVVVVNNN + * -> + * 0123456789 + * PPPPPPPNNN + */ + vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags); + vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, flags); + vma_next = alloc_and_link_vma(&mm, 0x7000, 0x9000, 7, flags); + + /* + * Give both prev and next single anon_vma_chain fields, so they will + * merge with the NULL vmg->anon_vma. + * + * However, when prev is compared to next, the merge should fail. + */ + + INIT_LIST_HEAD(&vma_prev->anon_vma_chain); + list_add(&dummy_anon_vma_chain1.same_vma, &vma_prev->anon_vma_chain); + ASSERT_TRUE(list_is_singular(&vma_prev->anon_vma_chain)); + vma_prev->anon_vma = &dummy_anon_vma; + ASSERT_TRUE(is_mergeable_anon_vma(NULL, vma_prev->anon_vma, vma_prev)); + + INIT_LIST_HEAD(&vma_next->anon_vma_chain); + list_add(&dummy_anon_vma_chain2.same_vma, &vma_next->anon_vma_chain); + ASSERT_TRUE(list_is_singular(&vma_next->anon_vma_chain)); + vma_next->anon_vma = (struct anon_vma *)2; + ASSERT_TRUE(is_mergeable_anon_vma(NULL, vma_next->anon_vma, vma_next)); + + ASSERT_FALSE(is_mergeable_anon_vma(vma_prev->anon_vma, vma_next->anon_vma, NULL)); + + vmg_set_range(&vmg, 0x3000, 0x7000, 3, flags); + vmg.prev = vma_prev; + vmg.vma = vma; + + ASSERT_EQ(merge_existing(&vmg), vma_prev); + ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); + ASSERT_EQ(vma_prev->vm_start, 0); + ASSERT_EQ(vma_prev->vm_end, 0x7000); + ASSERT_EQ(vma_prev->vm_pgoff, 0); + ASSERT_TRUE(vma_write_started(vma_prev)); + ASSERT_FALSE(vma_write_started(vma_next)); + + /* Clear down and reset. */ + ASSERT_EQ(cleanup_mm(&mm, &vmi), 2); + + /* + * Now consider the new VMA case. This is equivalent, only adding a new + * VMA in a gap between prev and next. + * + * <--> + * 0123456789 + * PPP****NNN + * -> + * 0123456789 + * PPPPPPPNNN + */ + vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags); + vma_next = alloc_and_link_vma(&mm, 0x7000, 0x9000, 7, flags); + + INIT_LIST_HEAD(&vma_prev->anon_vma_chain); + list_add(&dummy_anon_vma_chain1.same_vma, &vma_prev->anon_vma_chain); + vma_prev->anon_vma = (struct anon_vma *)1; + + INIT_LIST_HEAD(&vma_next->anon_vma_chain); + list_add(&dummy_anon_vma_chain2.same_vma, &vma_next->anon_vma_chain); + vma_next->anon_vma = (struct anon_vma *)2; + + vmg_set_range(&vmg, 0x3000, 0x7000, 3, flags); + vmg.prev = vma_prev; + + ASSERT_EQ(merge_new(&vmg), vma_prev); + ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); + ASSERT_EQ(vma_prev->vm_start, 0); + ASSERT_EQ(vma_prev->vm_end, 0x7000); + ASSERT_EQ(vma_prev->vm_pgoff, 0); + ASSERT_TRUE(vma_write_started(vma_prev)); + ASSERT_FALSE(vma_write_started(vma_next)); + + /* Final cleanup. */ + ASSERT_EQ(cleanup_mm(&mm, &vmi), 2); + + return true; +} + +static bool test_dup_anon_vma(void) +{ + unsigned long flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; + struct mm_struct mm = {}; + VMA_ITERATOR(vmi, &mm, 0); + struct vma_merge_struct vmg = { + .mm = &mm, + .vmi = &vmi, + }; + struct anon_vma_chain dummy_anon_vma_chain = { + .anon_vma = &dummy_anon_vma, + }; + struct vm_area_struct *vma_prev, *vma_next, *vma; + + reset_dummy_anon_vma(); + + /* + * Expanding a VMA delete the next one duplicates next's anon_vma and + * assigns it to the expanded VMA. + * + * This covers new VMA merging, as these operations amount to a VMA + * expand. + */ + vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags); + vma_next = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, flags); + vma_next->anon_vma = &dummy_anon_vma; + + vmg_set_range(&vmg, 0, 0x5000, 0, flags); + vmg.vma = vma_prev; + vmg.next = vma_next; + + ASSERT_EQ(expand_existing(&vmg), 0); + + /* Will have been cloned. */ + ASSERT_EQ(vma_prev->anon_vma, &dummy_anon_vma); + ASSERT_TRUE(vma_prev->anon_vma->was_cloned); + + /* Cleanup ready for next run. */ + cleanup_mm(&mm, &vmi); + + /* + * next has anon_vma, we assign to prev. + * + * |<----->| + * |-------*********-------| + * prev vma next + * extend delete delete + */ + + vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags); + vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, flags); + vma_next = alloc_and_link_vma(&mm, 0x5000, 0x8000, 5, flags); + + /* Initialise avc so mergeability check passes. */ + INIT_LIST_HEAD(&vma_next->anon_vma_chain); + list_add(&dummy_anon_vma_chain.same_vma, &vma_next->anon_vma_chain); + + vma_next->anon_vma = &dummy_anon_vma; + vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags); + vmg.prev = vma_prev; + vmg.vma = vma; + + ASSERT_EQ(merge_existing(&vmg), vma_prev); + ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); + + ASSERT_EQ(vma_prev->vm_start, 0); + ASSERT_EQ(vma_prev->vm_end, 0x8000); + + ASSERT_EQ(vma_prev->anon_vma, &dummy_anon_vma); + ASSERT_TRUE(vma_prev->anon_vma->was_cloned); + + cleanup_mm(&mm, &vmi); + + /* + * vma has anon_vma, we assign to prev. + * + * |<----->| + * |-------*********-------| + * prev vma next + * extend delete delete + */ + + vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags); + vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, flags); + vma_next = alloc_and_link_vma(&mm, 0x5000, 0x8000, 5, flags); + + vma->anon_vma = &dummy_anon_vma; + vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags); + vmg.prev = vma_prev; + vmg.vma = vma; + + ASSERT_EQ(merge_existing(&vmg), vma_prev); + ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); + + ASSERT_EQ(vma_prev->vm_start, 0); + ASSERT_EQ(vma_prev->vm_end, 0x8000); + + ASSERT_EQ(vma_prev->anon_vma, &dummy_anon_vma); + ASSERT_TRUE(vma_prev->anon_vma->was_cloned); + + cleanup_mm(&mm, &vmi); + + /* + * vma has anon_vma, we assign to prev. + * + * |<----->| + * |-------************* + * prev vma + * extend shrink/delete + */ + + vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags); + vma = alloc_and_link_vma(&mm, 0x3000, 0x8000, 3, flags); + + vma->anon_vma = &dummy_anon_vma; + vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags); + vmg.prev = vma_prev; + vmg.vma = vma; + + ASSERT_EQ(merge_existing(&vmg), vma_prev); + ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); + + ASSERT_EQ(vma_prev->vm_start, 0); + ASSERT_EQ(vma_prev->vm_end, 0x5000); + + ASSERT_EQ(vma_prev->anon_vma, &dummy_anon_vma); + ASSERT_TRUE(vma_prev->anon_vma->was_cloned); + + cleanup_mm(&mm, &vmi); + + /* + * vma has anon_vma, we assign to next. + * + * |<----->| + * *************-------| + * vma next + * shrink/delete extend + */ + + vma = alloc_and_link_vma(&mm, 0, 0x5000, 0, flags); + vma_next = alloc_and_link_vma(&mm, 0x5000, 0x8000, 5, flags); + + vma->anon_vma = &dummy_anon_vma; + vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags); + vmg.prev = vma; + vmg.vma = vma; + + ASSERT_EQ(merge_existing(&vmg), vma_next); + ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); + + ASSERT_EQ(vma_next->vm_start, 0x3000); + ASSERT_EQ(vma_next->vm_end, 0x8000); + + ASSERT_EQ(vma_next->anon_vma, &dummy_anon_vma); + ASSERT_TRUE(vma_next->anon_vma->was_cloned); + + cleanup_mm(&mm, &vmi); + return true; +} + +static bool test_vmi_prealloc_fail(void) +{ + unsigned long flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; + struct mm_struct mm = {}; + VMA_ITERATOR(vmi, &mm, 0); + struct vma_merge_struct vmg = { + .mm = &mm, + .vmi = &vmi, + }; + struct vm_area_struct *vma_prev, *vma; + + /* + * We are merging vma into prev, with vma possessing an anon_vma, which + * will be duplicated. We cause the vmi preallocation to fail and assert + * the duplicated anon_vma is unlinked. + */ + + vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags); + vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, flags); + vma->anon_vma = &dummy_anon_vma; + + vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags); + vmg.prev = vma_prev; + vmg.vma = vma; + + fail_prealloc = true; + + /* This will cause the merge to fail. */ + ASSERT_EQ(merge_existing(&vmg), NULL); + ASSERT_EQ(vmg.state, VMA_MERGE_ERROR_NOMEM); + /* We will already have assigned the anon_vma. */ + ASSERT_EQ(vma_prev->anon_vma, &dummy_anon_vma); + /* And it was both cloned and unlinked. */ + ASSERT_TRUE(dummy_anon_vma.was_cloned); + ASSERT_TRUE(dummy_anon_vma.was_unlinked); + + cleanup_mm(&mm, &vmi); /* Resets fail_prealloc too. */ + + /* + * We repeat the same operation for expanding a VMA, which is what new + * VMA merging ultimately uses too. This asserts that unlinking is + * performed in this case too. + */ + + vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags); + vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, flags); + vma->anon_vma = &dummy_anon_vma; + + vmg_set_range(&vmg, 0, 0x5000, 3, flags); + vmg.vma = vma_prev; + vmg.next = vma; + + fail_prealloc = true; + ASSERT_EQ(expand_existing(&vmg), -ENOMEM); + ASSERT_EQ(vmg.state, VMA_MERGE_ERROR_NOMEM); + + ASSERT_EQ(vma_prev->anon_vma, &dummy_anon_vma); + ASSERT_TRUE(dummy_anon_vma.was_cloned); + ASSERT_TRUE(dummy_anon_vma.was_unlinked); + + cleanup_mm(&mm, &vmi); + return true; +} + +static bool test_merge_extend(void) +{ + unsigned long flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; + struct mm_struct mm = {}; + VMA_ITERATOR(vmi, &mm, 0x1000); + struct vm_area_struct *vma; + + vma = alloc_and_link_vma(&mm, 0, 0x1000, 0, flags); + alloc_and_link_vma(&mm, 0x3000, 0x4000, 3, flags); + + /* + * Extend a VMA into the gap between itself and the following VMA. + * This should result in a merge. + * + * <-> + * * * + * + */ + + ASSERT_EQ(vma_merge_extend(&vmi, vma, 0x2000), vma); + ASSERT_EQ(vma->vm_start, 0); + ASSERT_EQ(vma->vm_end, 0x4000); + ASSERT_EQ(vma->vm_pgoff, 0); + ASSERT_TRUE(vma_write_started(vma)); + ASSERT_EQ(mm.map_count, 1); + + cleanup_mm(&mm, &vmi); + return true; +} + +static bool test_copy_vma(void) +{ + unsigned long flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; + struct mm_struct mm = {}; + bool need_locks = false; + VMA_ITERATOR(vmi, &mm, 0); + struct vm_area_struct *vma, *vma_new, *vma_next; + + /* Move backwards and do not merge. */ + + vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, flags); + vma_new = copy_vma(&vma, 0, 0x2000, 0, &need_locks); + + ASSERT_NE(vma_new, vma); + ASSERT_EQ(vma_new->vm_start, 0); + ASSERT_EQ(vma_new->vm_end, 0x2000); + ASSERT_EQ(vma_new->vm_pgoff, 0); + + cleanup_mm(&mm, &vmi); + + /* Move a VMA into position next to another and merge the two. */ + + vma = alloc_and_link_vma(&mm, 0, 0x2000, 0, flags); + vma_next = alloc_and_link_vma(&mm, 0x6000, 0x8000, 6, flags); + vma_new = copy_vma(&vma, 0x4000, 0x2000, 4, &need_locks); + + ASSERT_EQ(vma_new, vma_next); + + cleanup_mm(&mm, &vmi); + return true; +} + +int main(void) +{ + int num_tests = 0, num_fail = 0; + + maple_tree_init(); + +#define TEST(name) \ + do { \ + num_tests++; \ + if (!test_##name()) { \ + num_fail++; \ + fprintf(stderr, "Test " #name " FAILED\n"); \ + } \ + } while (0) + + /* Very simple tests to kick the tyres. */ + TEST(simple_merge); + TEST(simple_modify); + TEST(simple_expand); + TEST(simple_shrink); + + TEST(merge_new); + TEST(vma_merge_special_flags); + TEST(vma_merge_with_close); + TEST(vma_merge_new_with_close); + TEST(merge_existing); + TEST(anon_vma_non_mergeable); + TEST(dup_anon_vma); + TEST(vmi_prealloc_fail); + TEST(merge_extend); + TEST(copy_vma); + +#undef TEST + + printf("%d tests run, %d passed, %d failed.\n", + num_tests, num_tests - num_fail, num_fail); + + return num_fail == 0 ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h new file mode 100644 index 000000000000..c5b9da034511 --- /dev/null +++ b/tools/testing/vma/vma_internal.h @@ -0,0 +1,923 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * vma_internal.h + * + * Header providing userland wrappers and shims for the functionality provided + * by mm/vma_internal.h. + * + * We make the header guard the same as mm/vma_internal.h, so if this shim + * header is included, it precludes the inclusion of the kernel one. + */ + +#ifndef __MM_VMA_INTERNAL_H +#define __MM_VMA_INTERNAL_H + +#define __private +#define __bitwise +#define __randomize_layout + +#define CONFIG_MMU +#define CONFIG_PER_VMA_LOCK + +#include <stdlib.h> + +#include <linux/list.h> +#include <linux/maple_tree.h> +#include <linux/mm.h> +#include <linux/rbtree.h> +#include <linux/rwsem.h> + +#define VM_WARN_ON(_expr) (WARN_ON(_expr)) +#define VM_WARN_ON_ONCE(_expr) (WARN_ON_ONCE(_expr)) +#define VM_BUG_ON(_expr) (BUG_ON(_expr)) +#define VM_BUG_ON_VMA(_expr, _vma) (BUG_ON(_expr)) + +#define VM_NONE 0x00000000 +#define VM_READ 0x00000001 +#define VM_WRITE 0x00000002 +#define VM_EXEC 0x00000004 +#define VM_SHARED 0x00000008 +#define VM_MAYREAD 0x00000010 +#define VM_MAYWRITE 0x00000020 +#define VM_GROWSDOWN 0x00000100 +#define VM_PFNMAP 0x00000400 +#define VM_LOCKED 0x00002000 +#define VM_IO 0x00004000 +#define VM_DONTEXPAND 0x00040000 +#define VM_ACCOUNT 0x00100000 +#define VM_MIXEDMAP 0x10000000 +#define VM_STACK VM_GROWSDOWN +#define VM_SHADOW_STACK VM_NONE +#define VM_SOFTDIRTY 0 + +#define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC) +#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP) + +#define FIRST_USER_ADDRESS 0UL +#define USER_PGTABLES_CEILING 0UL + +#define vma_policy(vma) NULL + +#define down_write_nest_lock(sem, nest_lock) + +#define pgprot_val(x) ((x).pgprot) +#define __pgprot(x) ((pgprot_t) { (x) } ) + +#define for_each_vma(__vmi, __vma) \ + while (((__vma) = vma_next(&(__vmi))) != NULL) + +/* The MM code likes to work with exclusive end addresses */ +#define for_each_vma_range(__vmi, __vma, __end) \ + while (((__vma) = vma_find(&(__vmi), (__end))) != NULL) + +#define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) + +#define PHYS_PFN(x) ((unsigned long)((x) >> PAGE_SHIFT)) + +#define test_and_set_bit(nr, addr) __test_and_set_bit(nr, addr) +#define test_and_clear_bit(nr, addr) __test_and_clear_bit(nr, addr) + +#define TASK_SIZE ((1ul << 47)-PAGE_SIZE) + +#define AS_MM_ALL_LOCKS 2 + +/* We hardcode this for now. */ +#define sysctl_max_map_count 0x1000000UL + +#define pgoff_t unsigned long +typedef unsigned long pgprotval_t; +typedef struct pgprot { pgprotval_t pgprot; } pgprot_t; +typedef unsigned long vm_flags_t; +typedef __bitwise unsigned int vm_fault_t; + +/* + * The shared stubs do not implement this, it amounts to an fprintf(STDERR,...) + * either way :) + */ +#define pr_warn_once pr_err + +typedef struct refcount_struct { + atomic_t refs; +} refcount_t; + +struct kref { + refcount_t refcount; +}; + +/* + * Define the task command name length as enum, then it can be visible to + * BPF programs. + */ +enum { + TASK_COMM_LEN = 16, +}; + +struct task_struct { + char comm[TASK_COMM_LEN]; + pid_t pid; + struct mm_struct *mm; +}; + +struct task_struct *get_current(void); +#define current get_current() + +struct anon_vma { + struct anon_vma *root; + struct rb_root_cached rb_root; + + /* Test fields. */ + bool was_cloned; + bool was_unlinked; +}; + +struct anon_vma_chain { + struct anon_vma *anon_vma; + struct list_head same_vma; +}; + +struct anon_vma_name { + struct kref kref; + /* The name needs to be at the end because it is dynamically sized. */ + char name[]; +}; + +struct vma_iterator { + struct ma_state mas; +}; + +#define VMA_ITERATOR(name, __mm, __addr) \ + struct vma_iterator name = { \ + .mas = { \ + .tree = &(__mm)->mm_mt, \ + .index = __addr, \ + .node = NULL, \ + .status = ma_start, \ + }, \ + } + +struct address_space { + struct rb_root_cached i_mmap; + unsigned long flags; + atomic_t i_mmap_writable; +}; + +struct vm_userfaultfd_ctx {}; +struct mempolicy {}; +struct mmu_gather {}; +struct mutex {}; +#define DEFINE_MUTEX(mutexname) \ + struct mutex mutexname = {} + +struct mm_struct { + struct maple_tree mm_mt; + int map_count; /* number of VMAs */ + unsigned long total_vm; /* Total pages mapped */ + unsigned long locked_vm; /* Pages that have PG_mlocked set */ + unsigned long data_vm; /* VM_WRITE & ~VM_SHARED & ~VM_STACK */ + unsigned long exec_vm; /* VM_EXEC & ~VM_WRITE & ~VM_STACK */ + unsigned long stack_vm; /* VM_STACK */ +}; + +struct vma_lock { + struct rw_semaphore lock; +}; + + +struct file { + struct address_space *f_mapping; +}; + +struct vm_area_struct { + /* The first cache line has the info for VMA tree walking. */ + + union { + struct { + /* VMA covers [vm_start; vm_end) addresses within mm */ + unsigned long vm_start; + unsigned long vm_end; + }; +#ifdef CONFIG_PER_VMA_LOCK + struct rcu_head vm_rcu; /* Used for deferred freeing. */ +#endif + }; + + struct mm_struct *vm_mm; /* The address space we belong to. */ + pgprot_t vm_page_prot; /* Access permissions of this VMA. */ + + /* + * Flags, see mm.h. + * To modify use vm_flags_{init|reset|set|clear|mod} functions. + */ + union { + const vm_flags_t vm_flags; + vm_flags_t __private __vm_flags; + }; + +#ifdef CONFIG_PER_VMA_LOCK + /* Flag to indicate areas detached from the mm->mm_mt tree */ + bool detached; + + /* + * Can only be written (using WRITE_ONCE()) while holding both: + * - mmap_lock (in write mode) + * - vm_lock->lock (in write mode) + * Can be read reliably while holding one of: + * - mmap_lock (in read or write mode) + * - vm_lock->lock (in read or write mode) + * Can be read unreliably (using READ_ONCE()) for pessimistic bailout + * while holding nothing (except RCU to keep the VMA struct allocated). + * + * This sequence counter is explicitly allowed to overflow; sequence + * counter reuse can only lead to occasional unnecessary use of the + * slowpath. + */ + int vm_lock_seq; + struct vma_lock *vm_lock; +#endif + + /* + * For areas with an address space and backing store, + * linkage into the address_space->i_mmap interval tree. + * + */ + struct { + struct rb_node rb; + unsigned long rb_subtree_last; + } shared; + + /* + * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma + * list, after a COW of one of the file pages. A MAP_SHARED vma + * can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack + * or brk vma (with NULL file) can only be in an anon_vma list. + */ + struct list_head anon_vma_chain; /* Serialized by mmap_lock & + * page_table_lock */ + struct anon_vma *anon_vma; /* Serialized by page_table_lock */ + + /* Function pointers to deal with this struct. */ + const struct vm_operations_struct *vm_ops; + + /* Information about our backing store: */ + unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE + units */ + struct file * vm_file; /* File we map to (can be NULL). */ + void * vm_private_data; /* was vm_pte (shared mem) */ + +#ifdef CONFIG_ANON_VMA_NAME + /* + * For private and shared anonymous mappings, a pointer to a null + * terminated string containing the name given to the vma, or NULL if + * unnamed. Serialized by mmap_lock. Use anon_vma_name to access. + */ + struct anon_vma_name *anon_name; +#endif +#ifdef CONFIG_SWAP + atomic_long_t swap_readahead_info; +#endif +#ifndef CONFIG_MMU + struct vm_region *vm_region; /* NOMMU mapping region */ +#endif +#ifdef CONFIG_NUMA + struct mempolicy *vm_policy; /* NUMA policy for the VMA */ +#endif +#ifdef CONFIG_NUMA_BALANCING + struct vma_numab_state *numab_state; /* NUMA Balancing state */ +#endif + struct vm_userfaultfd_ctx vm_userfaultfd_ctx; +} __randomize_layout; + +struct vm_fault {}; + +struct vm_operations_struct { + void (*open)(struct vm_area_struct * area); + /** + * @close: Called when the VMA is being removed from the MM. + * Context: User context. May sleep. Caller holds mmap_lock. + */ + void (*close)(struct vm_area_struct * area); + /* Called any time before splitting to check if it's allowed */ + int (*may_split)(struct vm_area_struct *area, unsigned long addr); + int (*mremap)(struct vm_area_struct *area); + /* + * Called by mprotect() to make driver-specific permission + * checks before mprotect() is finalised. The VMA must not + * be modified. Returns 0 if mprotect() can proceed. + */ + int (*mprotect)(struct vm_area_struct *vma, unsigned long start, + unsigned long end, unsigned long newflags); + vm_fault_t (*fault)(struct vm_fault *vmf); + vm_fault_t (*huge_fault)(struct vm_fault *vmf, unsigned int order); + vm_fault_t (*map_pages)(struct vm_fault *vmf, + pgoff_t start_pgoff, pgoff_t end_pgoff); + unsigned long (*pagesize)(struct vm_area_struct * area); + + /* notification that a previously read-only page is about to become + * writable, if an error is returned it will cause a SIGBUS */ + vm_fault_t (*page_mkwrite)(struct vm_fault *vmf); + + /* same as page_mkwrite when using VM_PFNMAP|VM_MIXEDMAP */ + vm_fault_t (*pfn_mkwrite)(struct vm_fault *vmf); + + /* called by access_process_vm when get_user_pages() fails, typically + * for use by special VMAs. See also generic_access_phys() for a generic + * implementation useful for any iomem mapping. + */ + int (*access)(struct vm_area_struct *vma, unsigned long addr, + void *buf, int len, int write); + + /* Called by the /proc/PID/maps code to ask the vma whether it + * has a special name. Returning non-NULL will also cause this + * vma to be dumped unconditionally. */ + const char *(*name)(struct vm_area_struct *vma); + +#ifdef CONFIG_NUMA + /* + * set_policy() op must add a reference to any non-NULL @new mempolicy + * to hold the policy upon return. Caller should pass NULL @new to + * remove a policy and fall back to surrounding context--i.e. do not + * install a MPOL_DEFAULT policy, nor the task or system default + * mempolicy. + */ + int (*set_policy)(struct vm_area_struct *vma, struct mempolicy *new); + + /* + * get_policy() op must add reference [mpol_get()] to any policy at + * (vma,addr) marked as MPOL_SHARED. The shared policy infrastructure + * in mm/mempolicy.c will do this automatically. + * get_policy() must NOT add a ref if the policy at (vma,addr) is not + * marked as MPOL_SHARED. vma policies are protected by the mmap_lock. + * If no [shared/vma] mempolicy exists at the addr, get_policy() op + * must return NULL--i.e., do not "fallback" to task or system default + * policy. + */ + struct mempolicy *(*get_policy)(struct vm_area_struct *vma, + unsigned long addr, pgoff_t *ilx); +#endif + /* + * Called by vm_normal_page() for special PTEs to find the + * page for @addr. This is useful if the default behavior + * (using pte_page()) would not find the correct page. + */ + struct page *(*find_special_page)(struct vm_area_struct *vma, + unsigned long addr); +}; + +static inline void vma_iter_invalidate(struct vma_iterator *vmi) +{ + mas_pause(&vmi->mas); +} + +static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) +{ + return __pgprot(pgprot_val(oldprot) | pgprot_val(newprot)); +} + +static inline pgprot_t vm_get_page_prot(unsigned long vm_flags) +{ + return __pgprot(vm_flags); +} + +static inline bool is_shared_maywrite(vm_flags_t vm_flags) +{ + return (vm_flags & (VM_SHARED | VM_MAYWRITE)) == + (VM_SHARED | VM_MAYWRITE); +} + +static inline bool vma_is_shared_maywrite(struct vm_area_struct *vma) +{ + return is_shared_maywrite(vma->vm_flags); +} + +static inline struct vm_area_struct *vma_next(struct vma_iterator *vmi) +{ + /* + * Uses mas_find() to get the first VMA when the iterator starts. + * Calling mas_next() could skip the first entry. + */ + return mas_find(&vmi->mas, ULONG_MAX); +} + +static inline bool vma_lock_alloc(struct vm_area_struct *vma) +{ + vma->vm_lock = calloc(1, sizeof(struct vma_lock)); + + if (!vma->vm_lock) + return false; + + init_rwsem(&vma->vm_lock->lock); + vma->vm_lock_seq = -1; + + return true; +} + +static inline void vma_assert_write_locked(struct vm_area_struct *); +static inline void vma_mark_detached(struct vm_area_struct *vma, bool detached) +{ + /* When detaching vma should be write-locked */ + if (detached) + vma_assert_write_locked(vma); + vma->detached = detached; +} + +extern const struct vm_operations_struct vma_dummy_vm_ops; + +static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) +{ + memset(vma, 0, sizeof(*vma)); + vma->vm_mm = mm; + vma->vm_ops = &vma_dummy_vm_ops; + INIT_LIST_HEAD(&vma->anon_vma_chain); + vma_mark_detached(vma, false); +} + +static inline struct vm_area_struct *vm_area_alloc(struct mm_struct *mm) +{ + struct vm_area_struct *vma = calloc(1, sizeof(struct vm_area_struct)); + + if (!vma) + return NULL; + + vma_init(vma, mm); + if (!vma_lock_alloc(vma)) { + free(vma); + return NULL; + } + + return vma; +} + +static inline struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig) +{ + struct vm_area_struct *new = calloc(1, sizeof(struct vm_area_struct)); + + if (!new) + return NULL; + + memcpy(new, orig, sizeof(*new)); + if (!vma_lock_alloc(new)) { + free(new); + return NULL; + } + INIT_LIST_HEAD(&new->anon_vma_chain); + + return new; +} + +/* + * These are defined in vma.h, but sadly vm_stat_account() is referenced by + * kernel/fork.c, so we have to these broadly available there, and temporarily + * define them here to resolve the dependency cycle. + */ + +#define is_exec_mapping(flags) \ + ((flags & (VM_EXEC | VM_WRITE | VM_STACK)) == VM_EXEC) + +#define is_stack_mapping(flags) \ + (((flags & VM_STACK) == VM_STACK) || (flags & VM_SHADOW_STACK)) + +#define is_data_mapping(flags) \ + ((flags & (VM_WRITE | VM_SHARED | VM_STACK)) == VM_WRITE) + +static inline void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, + long npages) +{ + WRITE_ONCE(mm->total_vm, READ_ONCE(mm->total_vm)+npages); + + if (is_exec_mapping(flags)) + mm->exec_vm += npages; + else if (is_stack_mapping(flags)) + mm->stack_vm += npages; + else if (is_data_mapping(flags)) + mm->data_vm += npages; +} + +#undef is_exec_mapping +#undef is_stack_mapping +#undef is_data_mapping + +/* Currently stubbed but we may later wish to un-stub. */ +static inline void vm_acct_memory(long pages); +static inline void vm_unacct_memory(long pages) +{ + vm_acct_memory(-pages); +} + +static inline void mapping_allow_writable(struct address_space *mapping) +{ + atomic_inc(&mapping->i_mmap_writable); +} + +static inline void vma_set_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end, + pgoff_t pgoff) +{ + vma->vm_start = start; + vma->vm_end = end; + vma->vm_pgoff = pgoff; +} + +static inline +struct vm_area_struct *vma_find(struct vma_iterator *vmi, unsigned long max) +{ + return mas_find(&vmi->mas, max - 1); +} + +static inline int vma_iter_clear_gfp(struct vma_iterator *vmi, + unsigned long start, unsigned long end, gfp_t gfp) +{ + __mas_set_range(&vmi->mas, start, end - 1); + mas_store_gfp(&vmi->mas, NULL, gfp); + if (unlikely(mas_is_err(&vmi->mas))) + return -ENOMEM; + + return 0; +} + +static inline void mmap_assert_locked(struct mm_struct *); +static inline struct vm_area_struct *find_vma_intersection(struct mm_struct *mm, + unsigned long start_addr, + unsigned long end_addr) +{ + unsigned long index = start_addr; + + mmap_assert_locked(mm); + return mt_find(&mm->mm_mt, &index, end_addr - 1); +} + +static inline +struct vm_area_struct *vma_lookup(struct mm_struct *mm, unsigned long addr) +{ + return mtree_load(&mm->mm_mt, addr); +} + +static inline struct vm_area_struct *vma_prev(struct vma_iterator *vmi) +{ + return mas_prev(&vmi->mas, 0); +} + +static inline void vma_iter_set(struct vma_iterator *vmi, unsigned long addr) +{ + mas_set(&vmi->mas, addr); +} + +static inline bool vma_is_anonymous(struct vm_area_struct *vma) +{ + return !vma->vm_ops; +} + +/* Defined in vma.h, so temporarily define here to avoid circular dependency. */ +#define vma_iter_load(vmi) \ + mas_walk(&(vmi)->mas) + +static inline struct vm_area_struct * +find_vma_prev(struct mm_struct *mm, unsigned long addr, + struct vm_area_struct **pprev) +{ + struct vm_area_struct *vma; + VMA_ITERATOR(vmi, mm, addr); + + vma = vma_iter_load(&vmi); + *pprev = vma_prev(&vmi); + if (!vma) + vma = vma_next(&vmi); + return vma; +} + +#undef vma_iter_load + +static inline void vma_iter_init(struct vma_iterator *vmi, + struct mm_struct *mm, unsigned long addr) +{ + mas_init(&vmi->mas, &mm->mm_mt, addr); +} + +/* Stubbed functions. */ + +static inline struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma) +{ + return NULL; +} + +static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, + struct vm_userfaultfd_ctx vm_ctx) +{ + return true; +} + +static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1, + struct anon_vma_name *anon_name2) +{ + return true; +} + +static inline void might_sleep(void) +{ +} + +static inline unsigned long vma_pages(struct vm_area_struct *vma) +{ + return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; +} + +static inline void fput(struct file *) +{ +} + +static inline void mpol_put(struct mempolicy *) +{ +} + +static inline void vma_lock_free(struct vm_area_struct *vma) +{ + free(vma->vm_lock); +} + +static inline void __vm_area_free(struct vm_area_struct *vma) +{ + vma_lock_free(vma); + free(vma); +} + +static inline void vm_area_free(struct vm_area_struct *vma) +{ + __vm_area_free(vma); +} + +static inline void lru_add_drain(void) +{ +} + +static inline void tlb_gather_mmu(struct mmu_gather *, struct mm_struct *) +{ +} + +static inline void update_hiwater_rss(struct mm_struct *) +{ +} + +static inline void update_hiwater_vm(struct mm_struct *) +{ +} + +static inline void unmap_vmas(struct mmu_gather *tlb, struct ma_state *mas, + struct vm_area_struct *vma, unsigned long start_addr, + unsigned long end_addr, unsigned long tree_end, + bool mm_wr_locked) +{ + (void)tlb; + (void)mas; + (void)vma; + (void)start_addr; + (void)end_addr; + (void)tree_end; + (void)mm_wr_locked; +} + +static inline void free_pgtables(struct mmu_gather *tlb, struct ma_state *mas, + struct vm_area_struct *vma, unsigned long floor, + unsigned long ceiling, bool mm_wr_locked) +{ + (void)tlb; + (void)mas; + (void)vma; + (void)floor; + (void)ceiling; + (void)mm_wr_locked; +} + +static inline void mapping_unmap_writable(struct address_space *) +{ +} + +static inline void flush_dcache_mmap_lock(struct address_space *) +{ +} + +static inline void tlb_finish_mmu(struct mmu_gather *) +{ +} + +static inline void get_file(struct file *) +{ +} + +static inline int vma_dup_policy(struct vm_area_struct *, struct vm_area_struct *) +{ + return 0; +} + +static inline int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) +{ + /* For testing purposes. We indicate that an anon_vma has been cloned. */ + if (src->anon_vma != NULL) { + dst->anon_vma = src->anon_vma; + dst->anon_vma->was_cloned = true; + } + + return 0; +} + +static inline void vma_start_write(struct vm_area_struct *vma) +{ + /* Used to indicate to tests that a write operation has begun. */ + vma->vm_lock_seq++; +} + +static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, + unsigned long start, + unsigned long end, + long adjust_next) +{ + (void)vma; + (void)start; + (void)end; + (void)adjust_next; +} + +static inline void vma_iter_free(struct vma_iterator *vmi) +{ + mas_destroy(&vmi->mas); +} + +static inline +struct vm_area_struct *vma_iter_next_range(struct vma_iterator *vmi) +{ + return mas_next_range(&vmi->mas, ULONG_MAX); +} + +static inline void vm_acct_memory(long pages) +{ +} + +static inline void vma_interval_tree_insert(struct vm_area_struct *, + struct rb_root_cached *) +{ +} + +static inline void vma_interval_tree_remove(struct vm_area_struct *, + struct rb_root_cached *) +{ +} + +static inline void flush_dcache_mmap_unlock(struct address_space *) +{ +} + +static inline void anon_vma_interval_tree_insert(struct anon_vma_chain*, + struct rb_root_cached *) +{ +} + +static inline void anon_vma_interval_tree_remove(struct anon_vma_chain*, + struct rb_root_cached *) +{ +} + +static inline void uprobe_mmap(struct vm_area_struct *) +{ +} + +static inline void uprobe_munmap(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + (void)vma; + (void)start; + (void)end; +} + +static inline void i_mmap_lock_write(struct address_space *) +{ +} + +static inline void anon_vma_lock_write(struct anon_vma *) +{ +} + +static inline void vma_assert_write_locked(struct vm_area_struct *) +{ +} + +static inline void unlink_anon_vmas(struct vm_area_struct *vma) +{ + /* For testing purposes, indicate that the anon_vma was unlinked. */ + vma->anon_vma->was_unlinked = true; +} + +static inline void anon_vma_unlock_write(struct anon_vma *) +{ +} + +static inline void i_mmap_unlock_write(struct address_space *) +{ +} + +static inline void anon_vma_merge(struct vm_area_struct *, + struct vm_area_struct *) +{ +} + +static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma, + unsigned long start, + unsigned long end, + struct list_head *unmaps) +{ + (void)vma; + (void)start; + (void)end; + (void)unmaps; + + return 0; +} + +static inline void mmap_write_downgrade(struct mm_struct *) +{ +} + +static inline void mmap_read_unlock(struct mm_struct *) +{ +} + +static inline void mmap_write_unlock(struct mm_struct *) +{ +} + +static inline bool can_modify_mm(struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ + (void)mm; + (void)start; + (void)end; + + return true; +} + +static inline void arch_unmap(struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ + (void)mm; + (void)start; + (void)end; +} + +static inline void mmap_assert_locked(struct mm_struct *) +{ +} + +static inline bool mpol_equal(struct mempolicy *, struct mempolicy *) +{ + return true; +} + +static inline void khugepaged_enter_vma(struct vm_area_struct *vma, + unsigned long vm_flags) +{ + (void)vma; + (void)vm_flags; +} + +static inline bool mapping_can_writeback(struct address_space *) +{ + return true; +} + +static inline bool is_vm_hugetlb_page(struct vm_area_struct *) +{ + return false; +} + +static inline bool vma_soft_dirty_enabled(struct vm_area_struct *) +{ + return false; +} + +static inline bool userfaultfd_wp(struct vm_area_struct *) +{ + return false; +} + +static inline void mmap_assert_write_locked(struct mm_struct *) +{ +} + +static inline void mutex_lock(struct mutex *) +{ +} + +static inline void mutex_unlock(struct mutex *) +{ +} + +static inline bool mutex_is_locked(struct mutex *) +{ + return true; +} + +static inline bool signal_pending(void *) +{ + return false; +} + +#endif /* __MM_VMA_INTERNAL_H */ diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c index 554b290fefdc..a3d448a075e3 100644 --- a/tools/testing/vsock/util.c +++ b/tools/testing/vsock/util.c @@ -139,7 +139,7 @@ int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_po } /* Connect to <cid, port> and return the file descriptor. */ -static int vsock_connect(unsigned int cid, unsigned int port, int type) +int vsock_connect(unsigned int cid, unsigned int port, int type) { union { struct sockaddr sa; @@ -226,8 +226,8 @@ static int vsock_listen(unsigned int cid, unsigned int port, int type) /* Listen on <cid, port> and return the first incoming connection. The remote * address is stored to clientaddrp. clientaddrp may be NULL. */ -static int vsock_accept(unsigned int cid, unsigned int port, - struct sockaddr_vm *clientaddrp, int type) +int vsock_accept(unsigned int cid, unsigned int port, + struct sockaddr_vm *clientaddrp, int type) { union { struct sockaddr sa; diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h index e95e62485959..fff22d4a14c0 100644 --- a/tools/testing/vsock/util.h +++ b/tools/testing/vsock/util.h @@ -39,6 +39,9 @@ struct test_case { void init_signals(void); unsigned int parse_cid(const char *str); unsigned int parse_port(const char *str); +int vsock_connect(unsigned int cid, unsigned int port, int type); +int vsock_accept(unsigned int cid, unsigned int port, + struct sockaddr_vm *clientaddrp, int type); int vsock_stream_connect(unsigned int cid, unsigned int port); int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_port, int type); diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index f851f8961247..8d38dbf8f41f 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -20,6 +20,8 @@ #include <sys/mman.h> #include <poll.h> #include <signal.h> +#include <sys/ioctl.h> +#include <linux/sockios.h> #include "vsock_test_zerocopy.h" #include "timeout.h" @@ -1238,6 +1240,79 @@ static void test_double_bind_connect_client(const struct test_opts *opts) } } +#define MSG_BUF_IOCTL_LEN 64 +static void test_unsent_bytes_server(const struct test_opts *opts, int type) +{ + unsigned char buf[MSG_BUF_IOCTL_LEN]; + int client_fd; + + client_fd = vsock_accept(VMADDR_CID_ANY, opts->peer_port, NULL, type); + if (client_fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + recv_buf(client_fd, buf, sizeof(buf), 0, sizeof(buf)); + control_writeln("RECEIVED"); + + close(client_fd); +} + +static void test_unsent_bytes_client(const struct test_opts *opts, int type) +{ + unsigned char buf[MSG_BUF_IOCTL_LEN]; + int ret, fd, sock_bytes_unsent; + + fd = vsock_connect(opts->peer_cid, opts->peer_port, type); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + for (int i = 0; i < sizeof(buf); i++) + buf[i] = rand() & 0xFF; + + send_buf(fd, buf, sizeof(buf), 0, sizeof(buf)); + control_expectln("RECEIVED"); + + ret = ioctl(fd, SIOCOUTQ, &sock_bytes_unsent); + if (ret < 0) { + if (errno == EOPNOTSUPP) { + fprintf(stderr, "Test skipped, SIOCOUTQ not supported.\n"); + } else { + perror("ioctl"); + exit(EXIT_FAILURE); + } + } else if (ret == 0 && sock_bytes_unsent != 0) { + fprintf(stderr, + "Unexpected 'SIOCOUTQ' value, expected 0, got %i\n", + sock_bytes_unsent); + exit(EXIT_FAILURE); + } + + close(fd); +} + +static void test_stream_unsent_bytes_client(const struct test_opts *opts) +{ + test_unsent_bytes_client(opts, SOCK_STREAM); +} + +static void test_stream_unsent_bytes_server(const struct test_opts *opts) +{ + test_unsent_bytes_server(opts, SOCK_STREAM); +} + +static void test_seqpacket_unsent_bytes_client(const struct test_opts *opts) +{ + test_unsent_bytes_client(opts, SOCK_SEQPACKET); +} + +static void test_seqpacket_unsent_bytes_server(const struct test_opts *opts) +{ + test_unsent_bytes_server(opts, SOCK_SEQPACKET); +} + #define RCVLOWAT_CREDIT_UPD_BUF_SIZE (1024 * 128) /* This define is the same as in 'include/linux/virtio_vsock.h': * it is used to decide when to send credit update message during @@ -1523,6 +1598,16 @@ static struct test_case test_cases[] = { .run_client = test_stream_rcvlowat_def_cred_upd_client, .run_server = test_stream_cred_upd_on_low_rx_bytes, }, + { + .name = "SOCK_STREAM ioctl(SIOCOUTQ) 0 unsent bytes", + .run_client = test_stream_unsent_bytes_client, + .run_server = test_stream_unsent_bytes_server, + }, + { + .name = "SOCK_SEQPACKET ioctl(SIOCOUTQ) 0 unsent bytes", + .run_client = test_seqpacket_unsent_bytes_client, + .run_server = test_seqpacket_unsent_bytes_server, + }, {}, }; |