diff options
author | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2021-12-27 16:35:44 +0100 |
---|---|---|
committer | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2021-12-27 16:35:44 +0100 |
commit | 3f0bb496ee41d0eb99d308768c9f8593cbd3eb9f (patch) | |
tree | c56b697a7e818e91974d951872781bfd9b3af5b5 /tools | |
parent | d7fbdc575b33c374ce88ccfe3ab364f7eb240f8a (diff) | |
parent | 9c33eef84e3110963d6f41d87d63bc40d716eb1f (diff) |
Merge branches 'thermal-tools' and 'thermal-int340x'
Merge tmon fix and int340x driver improvement for 5.17-rc1.
* thermal-tools:
thermal: tools: tmon: remove unneeded local variable
* thermal-int340x:
thermal: int340x: Use struct_group() for memcpy() region
Diffstat (limited to 'tools')
23 files changed, 1340 insertions, 290 deletions
diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h index a7e54a08fb54..3e8df500cfbd 100644 --- a/tools/include/linux/kernel.h +++ b/tools/include/linux/kernel.h @@ -7,6 +7,7 @@ #include <assert.h> #include <linux/build_bug.h> #include <linux/compiler.h> +#include <linux/math.h> #include <endian.h> #include <byteswap.h> @@ -14,8 +15,6 @@ #define UINT_MAX (~0U) #endif -#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) - #define PERF_ALIGN(x, a) __PERF_ALIGN_MASK(x, (typeof(x))(a)-1) #define __PERF_ALIGN_MASK(x, mask) (((x)+(mask))&~(mask)) @@ -52,15 +51,6 @@ _min1 < _min2 ? _min1 : _min2; }) #endif -#ifndef roundup -#define roundup(x, y) ( \ -{ \ - const typeof(y) __y = y; \ - (((x) + (__y - 1)) / __y) * __y; \ -} \ -) -#endif - #ifndef BUG_ON #ifdef NDEBUG #define BUG_ON(cond) do { if (cond) {} } while (0) @@ -104,16 +94,6 @@ int scnprintf_pad(char * buf, size_t size, const char * fmt, ...); #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) -/* - * This looks more complex than it should be. But we need to - * get the type for the ~ right in round_down (it needs to be - * as wide as the result!), and we want to evaluate the macro - * arguments just once each. - */ -#define __round_mask(x, y) ((__typeof__(x))((y)-1)) -#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) -#define round_down(x, y) ((x) & ~__round_mask(x, y)) - #define current_gfp_context(k) 0 #define synchronize_rcu() diff --git a/tools/include/linux/math.h b/tools/include/linux/math.h new file mode 100644 index 000000000000..4e7af99ec9eb --- /dev/null +++ b/tools/include/linux/math.h @@ -0,0 +1,25 @@ +#ifndef _TOOLS_MATH_H +#define _TOOLS_MATH_H + +/* + * This looks more complex than it should be. But we need to + * get the type for the ~ right in round_down (it needs to be + * as wide as the result!), and we want to evaluate the macro + * arguments just once each. + */ +#define __round_mask(x, y) ((__typeof__(x))((y)-1)) +#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) +#define round_down(x, y) ((x) & ~__round_mask(x, y)) + +#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) + +#ifndef roundup +#define roundup(x, y) ( \ +{ \ + const typeof(y) __y = y; \ + (((x) + (__y - 1)) / __y) * __y; \ +} \ +) +#endif + +#endif diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index b3610fdd1fee..eebd3894fe89 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -7,24 +7,23 @@ /* This struct should be in sync with struct rtnl_link_stats64 */ struct rtnl_link_stats { - __u32 rx_packets; /* total packets received */ - __u32 tx_packets; /* total packets transmitted */ - __u32 rx_bytes; /* total bytes received */ - __u32 tx_bytes; /* total bytes transmitted */ - __u32 rx_errors; /* bad packets received */ - __u32 tx_errors; /* packet transmit problems */ - __u32 rx_dropped; /* no space in linux buffers */ - __u32 tx_dropped; /* no space available in linux */ - __u32 multicast; /* multicast packets received */ + __u32 rx_packets; + __u32 tx_packets; + __u32 rx_bytes; + __u32 tx_bytes; + __u32 rx_errors; + __u32 tx_errors; + __u32 rx_dropped; + __u32 tx_dropped; + __u32 multicast; __u32 collisions; - /* detailed rx_errors: */ __u32 rx_length_errors; - __u32 rx_over_errors; /* receiver ring buff overflow */ - __u32 rx_crc_errors; /* recved pkt with crc error */ - __u32 rx_frame_errors; /* recv'd frame alignment error */ - __u32 rx_fifo_errors; /* recv'r fifo overrun */ - __u32 rx_missed_errors; /* receiver missed packet */ + __u32 rx_over_errors; + __u32 rx_crc_errors; + __u32 rx_frame_errors; + __u32 rx_fifo_errors; + __u32 rx_missed_errors; /* detailed tx_errors */ __u32 tx_aborted_errors; @@ -37,29 +36,201 @@ struct rtnl_link_stats { __u32 rx_compressed; __u32 tx_compressed; - __u32 rx_nohandler; /* dropped, no handler found */ + __u32 rx_nohandler; }; -/* The main device statistics structure */ +/** + * struct rtnl_link_stats64 - The main device statistics structure. + * + * @rx_packets: Number of good packets received by the interface. + * For hardware interfaces counts all good packets received from the device + * by the host, including packets which host had to drop at various stages + * of processing (even in the driver). + * + * @tx_packets: Number of packets successfully transmitted. + * For hardware interfaces counts packets which host was able to successfully + * hand over to the device, which does not necessarily mean that packets + * had been successfully transmitted out of the device, only that device + * acknowledged it copied them out of host memory. + * + * @rx_bytes: Number of good received bytes, corresponding to @rx_packets. + * + * For IEEE 802.3 devices should count the length of Ethernet Frames + * excluding the FCS. + * + * @tx_bytes: Number of good transmitted bytes, corresponding to @tx_packets. + * + * For IEEE 802.3 devices should count the length of Ethernet Frames + * excluding the FCS. + * + * @rx_errors: Total number of bad packets received on this network device. + * This counter must include events counted by @rx_length_errors, + * @rx_crc_errors, @rx_frame_errors and other errors not otherwise + * counted. + * + * @tx_errors: Total number of transmit problems. + * This counter must include events counter by @tx_aborted_errors, + * @tx_carrier_errors, @tx_fifo_errors, @tx_heartbeat_errors, + * @tx_window_errors and other errors not otherwise counted. + * + * @rx_dropped: Number of packets received but not processed, + * e.g. due to lack of resources or unsupported protocol. + * For hardware interfaces this counter may include packets discarded + * due to L2 address filtering but should not include packets dropped + * by the device due to buffer exhaustion which are counted separately in + * @rx_missed_errors (since procfs folds those two counters together). + * + * @tx_dropped: Number of packets dropped on their way to transmission, + * e.g. due to lack of resources. + * + * @multicast: Multicast packets received. + * For hardware interfaces this statistic is commonly calculated + * at the device level (unlike @rx_packets) and therefore may include + * packets which did not reach the host. + * + * For IEEE 802.3 devices this counter may be equivalent to: + * + * - 30.3.1.1.21 aMulticastFramesReceivedOK + * + * @collisions: Number of collisions during packet transmissions. + * + * @rx_length_errors: Number of packets dropped due to invalid length. + * Part of aggregate "frame" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices this counter should be equivalent to a sum + * of the following attributes: + * + * - 30.3.1.1.23 aInRangeLengthErrors + * - 30.3.1.1.24 aOutOfRangeLengthField + * - 30.3.1.1.25 aFrameTooLongErrors + * + * @rx_over_errors: Receiver FIFO overflow event counter. + * + * Historically the count of overflow events. Such events may be + * reported in the receive descriptors or via interrupts, and may + * not correspond one-to-one with dropped packets. + * + * The recommended interpretation for high speed interfaces is - + * number of packets dropped because they did not fit into buffers + * provided by the host, e.g. packets larger than MTU or next buffer + * in the ring was not available for a scatter transfer. + * + * Part of aggregate "frame" errors in `/proc/net/dev`. + * + * This statistics was historically used interchangeably with + * @rx_fifo_errors. + * + * This statistic corresponds to hardware events and is not commonly used + * on software devices. + * + * @rx_crc_errors: Number of packets received with a CRC error. + * Part of aggregate "frame" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices this counter must be equivalent to: + * + * - 30.3.1.1.6 aFrameCheckSequenceErrors + * + * @rx_frame_errors: Receiver frame alignment errors. + * Part of aggregate "frame" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices this counter should be equivalent to: + * + * - 30.3.1.1.7 aAlignmentErrors + * + * @rx_fifo_errors: Receiver FIFO error counter. + * + * Historically the count of overflow events. Those events may be + * reported in the receive descriptors or via interrupts, and may + * not correspond one-to-one with dropped packets. + * + * This statistics was used interchangeably with @rx_over_errors. + * Not recommended for use in drivers for high speed interfaces. + * + * This statistic is used on software devices, e.g. to count software + * packet queue overflow (can) or sequencing errors (GRE). + * + * @rx_missed_errors: Count of packets missed by the host. + * Folded into the "drop" counter in `/proc/net/dev`. + * + * Counts number of packets dropped by the device due to lack + * of buffer space. This usually indicates that the host interface + * is slower than the network interface, or host is not keeping up + * with the receive packet rate. + * + * This statistic corresponds to hardware events and is not used + * on software devices. + * + * @tx_aborted_errors: + * Part of aggregate "carrier" errors in `/proc/net/dev`. + * For IEEE 802.3 devices capable of half-duplex operation this counter + * must be equivalent to: + * + * - 30.3.1.1.11 aFramesAbortedDueToXSColls + * + * High speed interfaces may use this counter as a general device + * discard counter. + * + * @tx_carrier_errors: Number of frame transmission errors due to loss + * of carrier during transmission. + * Part of aggregate "carrier" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices this counter must be equivalent to: + * + * - 30.3.1.1.13 aCarrierSenseErrors + * + * @tx_fifo_errors: Number of frame transmission errors due to device + * FIFO underrun / underflow. This condition occurs when the device + * begins transmission of a frame but is unable to deliver the + * entire frame to the transmitter in time for transmission. + * Part of aggregate "carrier" errors in `/proc/net/dev`. + * + * @tx_heartbeat_errors: Number of Heartbeat / SQE Test errors for + * old half-duplex Ethernet. + * Part of aggregate "carrier" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices possibly equivalent to: + * + * - 30.3.2.1.4 aSQETestErrors + * + * @tx_window_errors: Number of frame transmission errors due + * to late collisions (for Ethernet - after the first 64B of transmission). + * Part of aggregate "carrier" errors in `/proc/net/dev`. + * + * For IEEE 802.3 devices this counter must be equivalent to: + * + * - 30.3.1.1.10 aLateCollisions + * + * @rx_compressed: Number of correctly received compressed packets. + * This counters is only meaningful for interfaces which support + * packet compression (e.g. CSLIP, PPP). + * + * @tx_compressed: Number of transmitted compressed packets. + * This counters is only meaningful for interfaces which support + * packet compression (e.g. CSLIP, PPP). + * + * @rx_nohandler: Number of packets received on the interface + * but dropped by the networking stack because the device is + * not designated to receive packets (e.g. backup link in a bond). + */ struct rtnl_link_stats64 { - __u64 rx_packets; /* total packets received */ - __u64 tx_packets; /* total packets transmitted */ - __u64 rx_bytes; /* total bytes received */ - __u64 tx_bytes; /* total bytes transmitted */ - __u64 rx_errors; /* bad packets received */ - __u64 tx_errors; /* packet transmit problems */ - __u64 rx_dropped; /* no space in linux buffers */ - __u64 tx_dropped; /* no space available in linux */ - __u64 multicast; /* multicast packets received */ + __u64 rx_packets; + __u64 tx_packets; + __u64 rx_bytes; + __u64 tx_bytes; + __u64 rx_errors; + __u64 tx_errors; + __u64 rx_dropped; + __u64 tx_dropped; + __u64 multicast; __u64 collisions; /* detailed rx_errors: */ __u64 rx_length_errors; - __u64 rx_over_errors; /* receiver ring buff overflow */ - __u64 rx_crc_errors; /* recved pkt with crc error */ - __u64 rx_frame_errors; /* recv'd frame alignment error */ - __u64 rx_fifo_errors; /* recv'r fifo overrun */ - __u64 rx_missed_errors; /* receiver missed packet */ + __u64 rx_over_errors; + __u64 rx_crc_errors; + __u64 rx_frame_errors; + __u64 rx_fifo_errors; + __u64 rx_missed_errors; /* detailed tx_errors */ __u64 tx_aborted_errors; @@ -71,8 +242,7 @@ struct rtnl_link_stats64 { /* for cslip etc */ __u64 rx_compressed; __u64 tx_compressed; - - __u64 rx_nohandler; /* dropped, no handler found */ + __u64 rx_nohandler; }; /* The struct should be in sync with struct ifmap */ @@ -170,12 +340,29 @@ enum { IFLA_PROP_LIST, IFLA_ALT_IFNAME, /* Alternative ifname */ IFLA_PERM_ADDRESS, + IFLA_PROTO_DOWN_REASON, + + /* device (sysfs) name as parent, used instead + * of IFLA_LINK where there's no parent netdev + */ + IFLA_PARENT_DEV_NAME, + IFLA_PARENT_DEV_BUS_NAME, + __IFLA_MAX }; #define IFLA_MAX (__IFLA_MAX - 1) +enum { + IFLA_PROTO_DOWN_REASON_UNSPEC, + IFLA_PROTO_DOWN_REASON_MASK, /* u32, mask for reason bits */ + IFLA_PROTO_DOWN_REASON_VALUE, /* u32, reason bit value */ + + __IFLA_PROTO_DOWN_REASON_CNT, + IFLA_PROTO_DOWN_REASON_MAX = __IFLA_PROTO_DOWN_REASON_CNT - 1 +}; + /* backwards compatibility for userspace */ #ifndef __KERNEL__ #define IFLA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifinfomsg)))) @@ -293,6 +480,7 @@ enum { IFLA_BR_MCAST_MLD_VERSION, IFLA_BR_VLAN_STATS_PER_PORT, IFLA_BR_MULTI_BOOLOPT, + IFLA_BR_MCAST_QUERIER_STATE, __IFLA_BR_MAX, }; @@ -346,6 +534,8 @@ enum { IFLA_BRPORT_BACKUP_PORT, IFLA_BRPORT_MRP_RING_OPEN, IFLA_BRPORT_MRP_IN_OPEN, + IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT, + IFLA_BRPORT_MCAST_EHT_HOSTS_CNT, __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) @@ -433,6 +623,7 @@ enum macvlan_macaddr_mode { }; #define MACVLAN_FLAG_NOPROMISC 1 +#define MACVLAN_FLAG_NODST 2 /* skip dst macvlan if matching src macvlan */ /* VRF section */ enum { @@ -597,6 +788,18 @@ enum ifla_geneve_df { GENEVE_DF_MAX = __GENEVE_DF_END - 1, }; +/* Bareudp section */ +enum { + IFLA_BAREUDP_UNSPEC, + IFLA_BAREUDP_PORT, + IFLA_BAREUDP_ETHERTYPE, + IFLA_BAREUDP_SRCPORT_MIN, + IFLA_BAREUDP_MULTIPROTO_MODE, + __IFLA_BAREUDP_MAX +}; + +#define IFLA_BAREUDP_MAX (__IFLA_BAREUDP_MAX - 1) + /* PPP section */ enum { IFLA_PPP_UNSPEC, @@ -899,7 +1102,14 @@ enum { #define IFLA_IPOIB_MAX (__IFLA_IPOIB_MAX - 1) -/* HSR section */ +/* HSR/PRP section, both uses same interface */ + +/* Different redundancy protocols for hsr device */ +enum { + HSR_PROTOCOL_HSR, + HSR_PROTOCOL_PRP, + HSR_PROTOCOL_MAX, +}; enum { IFLA_HSR_UNSPEC, @@ -909,6 +1119,9 @@ enum { IFLA_HSR_SUPERVISION_ADDR, /* Supervision frame multicast addr */ IFLA_HSR_SEQ_NR, IFLA_HSR_VERSION, /* HSR version */ + IFLA_HSR_PROTOCOL, /* Indicate different protocol than + * HSR. For example PRP. + */ __IFLA_HSR_MAX, }; @@ -1033,6 +1246,8 @@ enum { #define RMNET_FLAGS_INGRESS_MAP_COMMANDS (1U << 1) #define RMNET_FLAGS_INGRESS_MAP_CKSUMV4 (1U << 2) #define RMNET_FLAGS_EGRESS_MAP_CKSUMV4 (1U << 3) +#define RMNET_FLAGS_INGRESS_MAP_CKSUMV5 (1U << 4) +#define RMNET_FLAGS_EGRESS_MAP_CKSUMV5 (1U << 5) enum { IFLA_RMNET_UNSPEC, @@ -1048,4 +1263,14 @@ struct ifla_rmnet_flags { __u32 mask; }; +/* MCTP section */ + +enum { + IFLA_MCTP_UNSPEC, + IFLA_MCTP_NET, + __IFLA_MCTP_MAX, +}; + +#define IFLA_MCTP_MAX (__IFLA_MCTP_MAX - 1) + #endif /* _UAPI_LINUX_IF_LINK_H */ diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 81a4c543ff7e..4b384c907027 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -375,6 +375,7 @@ static int read_symbols(struct elf *elf) return -1; } memset(sym, 0, sizeof(*sym)); + INIT_LIST_HEAD(&sym->pv_target); sym->alias = sym; sym->idx = i; diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c index c90c7084e45a..bdf699f6552b 100644 --- a/tools/objtool/objtool.c +++ b/tools/objtool/objtool.c @@ -153,6 +153,10 @@ void objtool_pv_add(struct objtool_file *f, int idx, struct symbol *func) !strcmp(func->name, "_paravirt_ident_64")) return; + /* already added this function */ + if (!list_empty(&func->pv_target)) + return; + list_add(&func->pv_target, &f->pv_ops[idx].targets); f->pv_ops[idx].clean = false; } diff --git a/tools/testing/radix-tree/linux/lockdep.h b/tools/testing/radix-tree/linux/lockdep.h index 565fccdfe6e9..016cff473cfc 100644 --- a/tools/testing/radix-tree/linux/lockdep.h +++ b/tools/testing/radix-tree/linux/lockdep.h @@ -1,5 +1,8 @@ #ifndef _LINUX_LOCKDEP_H #define _LINUX_LOCKDEP_H + +#include <linux/spinlock.h> + struct lock_class_key { unsigned int a; }; diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c index f968dfd4ee88..aed9dc3ca1e9 100644 --- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c +++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c @@ -12,6 +12,7 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <sys/resource.h> #include "test_util.h" @@ -40,11 +41,40 @@ int main(int argc, char *argv[]) { int kvm_max_vcpu_id = kvm_check_cap(KVM_CAP_MAX_VCPU_ID); int kvm_max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); + /* + * Number of file descriptors reqired, KVM_CAP_MAX_VCPUS for vCPU fds + + * an arbitrary number for everything else. + */ + int nr_fds_wanted = kvm_max_vcpus + 100; + struct rlimit rl; pr_info("KVM_CAP_MAX_VCPU_ID: %d\n", kvm_max_vcpu_id); pr_info("KVM_CAP_MAX_VCPUS: %d\n", kvm_max_vcpus); /* + * Check that we're allowed to open nr_fds_wanted file descriptors and + * try raising the limits if needed. + */ + TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!"); + + if (rl.rlim_cur < nr_fds_wanted) { + rl.rlim_cur = nr_fds_wanted; + if (rl.rlim_max < nr_fds_wanted) { + int old_rlim_max = rl.rlim_max; + rl.rlim_max = nr_fds_wanted; + + int r = setrlimit(RLIMIT_NOFILE, &rl); + if (r < 0) { + printf("RLIMIT_NOFILE hard limit is too low (%d, wanted %d)\n", + old_rlim_max, nr_fds_wanted); + exit(KSFT_SKIP); + } + } else { + TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!"); + } + } + + /* * Upstream KVM prior to 4.8 does not support KVM_CAP_MAX_VCPU_ID. * Userspace is supposed to use KVM_CAP_MAX_VCPUS as the maximum ID * in this case. diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c index 3836322add00..ba1fdc3dcf4a 100644 --- a/tools/testing/selftests/kvm/kvm_page_table_test.c +++ b/tools/testing/selftests/kvm/kvm_page_table_test.c @@ -280,7 +280,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg) #ifdef __s390x__ alignment = max(0x100000, alignment); #endif - guest_test_phys_mem = align_down(guest_test_virt_mem, alignment); + guest_test_phys_mem = align_down(guest_test_phys_mem, alignment); /* Set up the shared data structure test_args */ test_args.vm = vm; diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c index 91d88aaa9899..672915ce73d8 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c @@ -165,10 +165,10 @@ static void hv_set_cpuid(struct kvm_vm *vm, struct kvm_cpuid2 *cpuid, vcpu_set_cpuid(vm, VCPU_ID, cpuid); } -static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr, - struct kvm_cpuid2 *best) +static void guest_test_msrs_access(void) { struct kvm_run *run; + struct kvm_vm *vm; struct ucall uc; int stage = 0, r; struct kvm_cpuid_entry2 feat = { @@ -180,11 +180,34 @@ static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr, struct kvm_cpuid_entry2 dbg = { .function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES }; - struct kvm_enable_cap cap = {0}; - - run = vcpu_state(vm, VCPU_ID); + struct kvm_cpuid2 *best; + vm_vaddr_t msr_gva; + struct kvm_enable_cap cap = { + .cap = KVM_CAP_HYPERV_ENFORCE_CPUID, + .args = {1} + }; + struct msr_data *msr; while (true) { + vm = vm_create_default(VCPU_ID, 0, guest_msr); + + msr_gva = vm_vaddr_alloc_page(vm); + memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize()); + msr = addr_gva2hva(vm, msr_gva); + + vcpu_args_set(vm, VCPU_ID, 1, msr_gva); + vcpu_enable_cap(vm, VCPU_ID, &cap); + + vcpu_set_hv_cpuid(vm, VCPU_ID); + + best = kvm_get_supported_hv_cpuid(); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler); + + run = vcpu_state(vm, VCPU_ID); + switch (stage) { case 0: /* @@ -315,6 +338,7 @@ static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr, * capability enabled and guest visible CPUID bit unset. */ cap.cap = KVM_CAP_HYPERV_SYNIC2; + cap.args[0] = 0; vcpu_enable_cap(vm, VCPU_ID, &cap); break; case 22: @@ -461,9 +485,9 @@ static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr, switch (get_ucall(vm, VCPU_ID, &uc)) { case UCALL_SYNC: - TEST_ASSERT(uc.args[1] == stage, - "Unexpected stage: %ld (%d expected)\n", - uc.args[1], stage); + TEST_ASSERT(uc.args[1] == 0, + "Unexpected stage: %ld (0 expected)\n", + uc.args[1]); break; case UCALL_ABORT: TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], @@ -474,13 +498,14 @@ static void guest_test_msrs_access(struct kvm_vm *vm, struct msr_data *msr, } stage++; + kvm_vm_free(vm); } } -static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall, - void *input, void *output, struct kvm_cpuid2 *best) +static void guest_test_hcalls_access(void) { struct kvm_run *run; + struct kvm_vm *vm; struct ucall uc; int stage = 0, r; struct kvm_cpuid_entry2 feat = { @@ -493,10 +518,38 @@ static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall struct kvm_cpuid_entry2 dbg = { .function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES }; - - run = vcpu_state(vm, VCPU_ID); + struct kvm_enable_cap cap = { + .cap = KVM_CAP_HYPERV_ENFORCE_CPUID, + .args = {1} + }; + vm_vaddr_t hcall_page, hcall_params; + struct hcall_data *hcall; + struct kvm_cpuid2 *best; while (true) { + vm = vm_create_default(VCPU_ID, 0, guest_hcall); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler); + + /* Hypercall input/output */ + hcall_page = vm_vaddr_alloc_pages(vm, 2); + hcall = addr_gva2hva(vm, hcall_page); + memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize()); + + hcall_params = vm_vaddr_alloc_page(vm); + memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize()); + + vcpu_args_set(vm, VCPU_ID, 2, addr_gva2gpa(vm, hcall_page), hcall_params); + vcpu_enable_cap(vm, VCPU_ID, &cap); + + vcpu_set_hv_cpuid(vm, VCPU_ID); + + best = kvm_get_supported_hv_cpuid(); + + run = vcpu_state(vm, VCPU_ID); + switch (stage) { case 0: hcall->control = 0xdeadbeef; @@ -606,9 +659,9 @@ static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall switch (get_ucall(vm, VCPU_ID, &uc)) { case UCALL_SYNC: - TEST_ASSERT(uc.args[1] == stage, - "Unexpected stage: %ld (%d expected)\n", - uc.args[1], stage); + TEST_ASSERT(uc.args[1] == 0, + "Unexpected stage: %ld (0 expected)\n", + uc.args[1]); break; case UCALL_ABORT: TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], @@ -619,66 +672,15 @@ static void guest_test_hcalls_access(struct kvm_vm *vm, struct hcall_data *hcall } stage++; + kvm_vm_free(vm); } } int main(void) { - struct kvm_cpuid2 *best; - struct kvm_vm *vm; - vm_vaddr_t msr_gva, hcall_page, hcall_params; - struct kvm_enable_cap cap = { - .cap = KVM_CAP_HYPERV_ENFORCE_CPUID, - .args = {1} - }; - - /* Test MSRs */ - vm = vm_create_default(VCPU_ID, 0, guest_msr); - - msr_gva = vm_vaddr_alloc_page(vm); - memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize()); - vcpu_args_set(vm, VCPU_ID, 1, msr_gva); - vcpu_enable_cap(vm, VCPU_ID, &cap); - - vcpu_set_hv_cpuid(vm, VCPU_ID); - - best = kvm_get_supported_hv_cpuid(); - - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vm, VCPU_ID); - vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler); - pr_info("Testing access to Hyper-V specific MSRs\n"); - guest_test_msrs_access(vm, addr_gva2hva(vm, msr_gva), - best); - kvm_vm_free(vm); - - /* Test hypercalls */ - vm = vm_create_default(VCPU_ID, 0, guest_hcall); - - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(vm, VCPU_ID); - vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler); - - /* Hypercall input/output */ - hcall_page = vm_vaddr_alloc_pages(vm, 2); - memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize()); - - hcall_params = vm_vaddr_alloc_page(vm); - memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize()); - - vcpu_args_set(vm, VCPU_ID, 2, addr_gva2gpa(vm, hcall_page), hcall_params); - vcpu_enable_cap(vm, VCPU_ID, &cap); - - vcpu_set_hv_cpuid(vm, VCPU_ID); - - best = kvm_get_supported_hv_cpuid(); + guest_test_msrs_access(); pr_info("Testing access to Hyper-V hypercalls\n"); - guest_test_hcalls_access(vm, addr_gva2hva(vm, hcall_params), - addr_gva2hva(vm, hcall_page), - addr_gva2hva(vm, hcall_page) + getpagesize(), - best); - - kvm_vm_free(vm); + guest_test_hcalls_access(); } diff --git a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c index 5ba325cd64bf..29b18d565cf4 100644 --- a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c +++ b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c @@ -54,12 +54,15 @@ static struct kvm_vm *sev_vm_create(bool es) return vm; } -static struct kvm_vm *__vm_create(void) +static struct kvm_vm *aux_vm_create(bool with_vcpus) { struct kvm_vm *vm; int i; vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR); + if (!with_vcpus) + return vm; + for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i) vm_vcpu_add(vm, i); @@ -89,11 +92,11 @@ static void test_sev_migrate_from(bool es) { struct kvm_vm *src_vm; struct kvm_vm *dst_vms[NR_MIGRATE_TEST_VMS]; - int i; + int i, ret; src_vm = sev_vm_create(es); for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i) - dst_vms[i] = __vm_create(); + dst_vms[i] = aux_vm_create(true); /* Initial migration from the src to the first dst. */ sev_migrate_from(dst_vms[0]->fd, src_vm->fd); @@ -102,7 +105,10 @@ static void test_sev_migrate_from(bool es) sev_migrate_from(dst_vms[i]->fd, dst_vms[i - 1]->fd); /* Migrate the guest back to the original VM. */ - sev_migrate_from(src_vm->fd, dst_vms[NR_MIGRATE_TEST_VMS - 1]->fd); + ret = __sev_migrate_from(src_vm->fd, dst_vms[NR_MIGRATE_TEST_VMS - 1]->fd); + TEST_ASSERT(ret == -1 && errno == EIO, + "VM that was migrated from should be dead. ret %d, errno: %d\n", ret, + errno); kvm_vm_free(src_vm); for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i) @@ -146,6 +152,8 @@ static void test_sev_migrate_locking(void) for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i) pthread_join(pt[i], NULL); + for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i) + kvm_vm_free(input[i].vm); } static void test_sev_migrate_parameters(void) @@ -157,12 +165,11 @@ static void test_sev_migrate_parameters(void) sev_vm = sev_vm_create(/* es= */ false); sev_es_vm = sev_vm_create(/* es= */ true); vm_no_vcpu = vm_create(VM_MODE_DEFAULT, 0, O_RDWR); - vm_no_sev = __vm_create(); + vm_no_sev = aux_vm_create(true); sev_es_vm_no_vmsa = vm_create(VM_MODE_DEFAULT, 0, O_RDWR); sev_ioctl(sev_es_vm_no_vmsa->fd, KVM_SEV_ES_INIT, NULL); vm_vcpu_add(sev_es_vm_no_vmsa, 1); - ret = __sev_migrate_from(sev_vm->fd, sev_es_vm->fd); TEST_ASSERT( ret == -1 && errno == EINVAL, @@ -191,13 +198,151 @@ static void test_sev_migrate_parameters(void) TEST_ASSERT(ret == -1 && errno == EINVAL, "Migrations require SEV enabled. ret %d, errno: %d\n", ret, errno); + + kvm_vm_free(sev_vm); + kvm_vm_free(sev_es_vm); + kvm_vm_free(sev_es_vm_no_vmsa); + kvm_vm_free(vm_no_vcpu); + kvm_vm_free(vm_no_sev); +} + +static int __sev_mirror_create(int dst_fd, int src_fd) +{ + struct kvm_enable_cap cap = { + .cap = KVM_CAP_VM_COPY_ENC_CONTEXT_FROM, + .args = { src_fd } + }; + + return ioctl(dst_fd, KVM_ENABLE_CAP, &cap); +} + + +static void sev_mirror_create(int dst_fd, int src_fd) +{ + int ret; + + ret = __sev_mirror_create(dst_fd, src_fd); + TEST_ASSERT(!ret, "Copying context failed, ret: %d, errno: %d\n", ret, errno); +} + +static void test_sev_mirror(bool es) +{ + struct kvm_vm *src_vm, *dst_vm; + struct kvm_sev_launch_start start = { + .policy = es ? SEV_POLICY_ES : 0 + }; + int i; + + src_vm = sev_vm_create(es); + dst_vm = aux_vm_create(false); + + sev_mirror_create(dst_vm->fd, src_vm->fd); + + /* Check that we can complete creation of the mirror VM. */ + for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i) + vm_vcpu_add(dst_vm, i); + sev_ioctl(dst_vm->fd, KVM_SEV_LAUNCH_START, &start); + if (es) + sev_ioctl(dst_vm->fd, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL); + + kvm_vm_free(src_vm); + kvm_vm_free(dst_vm); +} + +static void test_sev_mirror_parameters(void) +{ + struct kvm_vm *sev_vm, *sev_es_vm, *vm_no_vcpu, *vm_with_vcpu; + int ret; + + sev_vm = sev_vm_create(/* es= */ false); + sev_es_vm = sev_vm_create(/* es= */ true); + vm_with_vcpu = aux_vm_create(true); + vm_no_vcpu = aux_vm_create(false); + + ret = __sev_mirror_create(sev_vm->fd, sev_vm->fd); + TEST_ASSERT( + ret == -1 && errno == EINVAL, + "Should not be able copy context to self. ret: %d, errno: %d\n", + ret, errno); + + ret = __sev_mirror_create(sev_vm->fd, sev_es_vm->fd); + TEST_ASSERT( + ret == -1 && errno == EINVAL, + "Should not be able copy context to SEV enabled VM. ret: %d, errno: %d\n", + ret, errno); + + ret = __sev_mirror_create(sev_es_vm->fd, sev_vm->fd); + TEST_ASSERT( + ret == -1 && errno == EINVAL, + "Should not be able copy context to SEV-ES enabled VM. ret: %d, errno: %d\n", + ret, errno); + + ret = __sev_mirror_create(vm_no_vcpu->fd, vm_with_vcpu->fd); + TEST_ASSERT(ret == -1 && errno == EINVAL, + "Copy context requires SEV enabled. ret %d, errno: %d\n", ret, + errno); + + ret = __sev_mirror_create(vm_with_vcpu->fd, sev_vm->fd); + TEST_ASSERT( + ret == -1 && errno == EINVAL, + "SEV copy context requires no vCPUS on the destination. ret: %d, errno: %d\n", + ret, errno); + + kvm_vm_free(sev_vm); + kvm_vm_free(sev_es_vm); + kvm_vm_free(vm_with_vcpu); + kvm_vm_free(vm_no_vcpu); +} + +static void test_sev_move_copy(void) +{ + struct kvm_vm *dst_vm, *sev_vm, *mirror_vm, *dst_mirror_vm; + int ret; + + sev_vm = sev_vm_create(/* es= */ false); + dst_vm = aux_vm_create(true); + mirror_vm = aux_vm_create(false); + dst_mirror_vm = aux_vm_create(false); + + sev_mirror_create(mirror_vm->fd, sev_vm->fd); + ret = __sev_migrate_from(dst_vm->fd, sev_vm->fd); + TEST_ASSERT(ret == -1 && errno == EBUSY, + "Cannot migrate VM that has mirrors. ret %d, errno: %d\n", ret, + errno); + + /* The mirror itself can be migrated. */ + sev_migrate_from(dst_mirror_vm->fd, mirror_vm->fd); + ret = __sev_migrate_from(dst_vm->fd, sev_vm->fd); + TEST_ASSERT(ret == -1 && errno == EBUSY, + "Cannot migrate VM that has mirrors. ret %d, errno: %d\n", ret, + errno); + + /* + * mirror_vm is not a mirror anymore, dst_mirror_vm is. Thus, + * the owner can be copied as soon as dst_mirror_vm is gone. + */ + kvm_vm_free(dst_mirror_vm); + sev_migrate_from(dst_vm->fd, sev_vm->fd); + + kvm_vm_free(mirror_vm); + kvm_vm_free(dst_vm); + kvm_vm_free(sev_vm); } int main(int argc, char *argv[]) { - test_sev_migrate_from(/* es= */ false); - test_sev_migrate_from(/* es= */ true); - test_sev_migrate_locking(); - test_sev_migrate_parameters(); + if (kvm_check_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM)) { + test_sev_migrate_from(/* es= */ false); + test_sev_migrate_from(/* es= */ true); + test_sev_migrate_locking(); + test_sev_migrate_parameters(); + if (kvm_check_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM)) + test_sev_move_copy(); + } + if (kvm_check_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM)) { + test_sev_mirror(/* es= */ false); + test_sev_mirror(/* es= */ true); + test_sev_mirror_parameters(); + } return 0; } diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 7615f29831eb..9897fa9ab953 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -34,6 +34,7 @@ TEST_PROGS += srv6_end_dt46_l3vpn_test.sh TEST_PROGS += srv6_end_dt4_l3vpn_test.sh TEST_PROGS += srv6_end_dt6_l3vpn_test.sh TEST_PROGS += vrf_strict_mode_test.sh +TEST_PROGS += arp_ndisc_evict_nocarrier.sh TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh TEST_GEN_FILES = socket nettest diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 3313566ce906..7f5b265fcb90 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -4002,8 +4002,8 @@ EOF ################################################################################ # main -TESTS_IPV4="ipv4_ping ipv4_tcp ipv4_udp ipv4_addr_bind ipv4_runtime ipv4_netfilter" -TESTS_IPV6="ipv6_ping ipv6_tcp ipv6_udp ipv6_addr_bind ipv6_runtime ipv6_netfilter" +TESTS_IPV4="ipv4_ping ipv4_tcp ipv4_udp ipv4_bind ipv4_runtime ipv4_netfilter" +TESTS_IPV6="ipv6_ping ipv6_tcp ipv6_udp ipv6_bind ipv6_runtime ipv6_netfilter" TESTS_OTHER="use_cases" PAUSE_ON_FAIL=no diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index b5a69ad191b0..d444ee6aa3cb 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -629,6 +629,66 @@ ipv6_fcnal() log_test $? 0 "Nexthops removed on admin down" } +ipv6_grp_refs() +{ + if [ ! -x "$(command -v mausezahn)" ]; then + echo "SKIP: Could not run test; need mausezahn tool" + return + fi + + run_cmd "$IP link set dev veth1 up" + run_cmd "$IP link add veth1.10 link veth1 up type vlan id 10" + run_cmd "$IP link add veth1.20 link veth1 up type vlan id 20" + run_cmd "$IP -6 addr add 2001:db8:91::1/64 dev veth1.10" + run_cmd "$IP -6 addr add 2001:db8:92::1/64 dev veth1.20" + run_cmd "$IP -6 neigh add 2001:db8:91::2 lladdr 00:11:22:33:44:55 dev veth1.10" + run_cmd "$IP -6 neigh add 2001:db8:92::2 lladdr 00:11:22:33:44:55 dev veth1.20" + run_cmd "$IP nexthop add id 100 via 2001:db8:91::2 dev veth1.10" + run_cmd "$IP nexthop add id 101 via 2001:db8:92::2 dev veth1.20" + run_cmd "$IP nexthop add id 102 group 100" + run_cmd "$IP route add 2001:db8:101::1/128 nhid 102" + + # create per-cpu dsts through nh 100 + run_cmd "ip netns exec me mausezahn -6 veth1.10 -B 2001:db8:101::1 -A 2001:db8:91::1 -c 5 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1" + + # remove nh 100 from the group to delete the route potentially leaving + # a stale per-cpu dst which holds a reference to the nexthop's net + # device and to the IPv6 route + run_cmd "$IP nexthop replace id 102 group 101" + run_cmd "$IP route del 2001:db8:101::1/128" + + # add both nexthops to the group so a reference is taken on them + run_cmd "$IP nexthop replace id 102 group 100/101" + + # if the bug described in commit "net: nexthop: release IPv6 per-cpu + # dsts when replacing a nexthop group" exists at this point we have + # an unlinked IPv6 route (but not freed due to stale dst) with a + # reference over the group so we delete the group which will again + # only unlink it due to the route reference + run_cmd "$IP nexthop del id 102" + + # delete the nexthop with stale dst, since we have an unlinked + # group with a ref to it and an unlinked IPv6 route with ref to the + # group, the nh will only be unlinked and not freed so the stale dst + # remains forever and we get a net device refcount imbalance + run_cmd "$IP nexthop del id 100" + + # if a reference was lost this command will hang because the net device + # cannot be removed + timeout -s KILL 5 ip netns exec me ip link del veth1.10 >/dev/null 2>&1 + + # we can't cleanup if the command is hung trying to delete the netdev + if [ $? -eq 137 ]; then + return 1 + fi + + # cleanup + run_cmd "$IP link del veth1.20" + run_cmd "$IP nexthop flush" + + return 0 +} + ipv6_grp_fcnal() { local rc @@ -734,6 +794,9 @@ ipv6_grp_fcnal() run_cmd "$IP nexthop add id 108 group 31/24" log_test $? 2 "Nexthop group can not have a blackhole and another nexthop" + + ipv6_grp_refs + log_test $? 0 "Nexthop group replace refcounts" } ipv6_res_grp_fcnal() diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index e61fc4c32ba2..8a22db0cca49 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -78,26 +78,21 @@ static void memrnd(void *s, size_t n) *byte++ = rand(); } -FIXTURE(tls_basic) -{ - int fd, cfd; - bool notls; -}; - -FIXTURE_SETUP(tls_basic) +static void ulp_sock_pair(struct __test_metadata *_metadata, + int *fd, int *cfd, bool *notls) { struct sockaddr_in addr; socklen_t len; int sfd, ret; - self->notls = false; + *notls = false; len = sizeof(addr); addr.sin_family = AF_INET; addr.sin_addr.s_addr = htonl(INADDR_ANY); addr.sin_port = 0; - self->fd = socket(AF_INET, SOCK_STREAM, 0); + *fd = socket(AF_INET, SOCK_STREAM, 0); sfd = socket(AF_INET, SOCK_STREAM, 0); ret = bind(sfd, &addr, sizeof(addr)); @@ -108,26 +103,96 @@ FIXTURE_SETUP(tls_basic) ret = getsockname(sfd, &addr, &len); ASSERT_EQ(ret, 0); - ret = connect(self->fd, &addr, sizeof(addr)); + ret = connect(*fd, &addr, sizeof(addr)); ASSERT_EQ(ret, 0); - self->cfd = accept(sfd, &addr, &len); - ASSERT_GE(self->cfd, 0); + *cfd = accept(sfd, &addr, &len); + ASSERT_GE(*cfd, 0); close(sfd); - ret = setsockopt(self->fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")); + ret = setsockopt(*fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")); if (ret != 0) { ASSERT_EQ(errno, ENOENT); - self->notls = true; + *notls = true; printf("Failure setting TCP_ULP, testing without tls\n"); return; } - ret = setsockopt(self->cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")); + ret = setsockopt(*cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")); ASSERT_EQ(ret, 0); } +/* Produce a basic cmsg */ +static int tls_send_cmsg(int fd, unsigned char record_type, + void *data, size_t len, int flags) +{ + char cbuf[CMSG_SPACE(sizeof(char))]; + int cmsg_len = sizeof(char); + struct cmsghdr *cmsg; + struct msghdr msg; + struct iovec vec; + + vec.iov_base = data; + vec.iov_len = len; + memset(&msg, 0, sizeof(struct msghdr)); + msg.msg_iov = &vec; + msg.msg_iovlen = 1; + msg.msg_control = cbuf; + msg.msg_controllen = sizeof(cbuf); + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_TLS; + /* test sending non-record types. */ + cmsg->cmsg_type = TLS_SET_RECORD_TYPE; + cmsg->cmsg_len = CMSG_LEN(cmsg_len); + *CMSG_DATA(cmsg) = record_type; + msg.msg_controllen = cmsg->cmsg_len; + + return sendmsg(fd, &msg, flags); +} + +static int tls_recv_cmsg(struct __test_metadata *_metadata, + int fd, unsigned char record_type, + void *data, size_t len, int flags) +{ + char cbuf[CMSG_SPACE(sizeof(char))]; + struct cmsghdr *cmsg; + unsigned char ctype; + struct msghdr msg; + struct iovec vec; + int n; + + vec.iov_base = data; + vec.iov_len = len; + memset(&msg, 0, sizeof(struct msghdr)); + msg.msg_iov = &vec; + msg.msg_iovlen = 1; + msg.msg_control = cbuf; + msg.msg_controllen = sizeof(cbuf); + + n = recvmsg(fd, &msg, flags); + + cmsg = CMSG_FIRSTHDR(&msg); + EXPECT_NE(cmsg, NULL); + EXPECT_EQ(cmsg->cmsg_level, SOL_TLS); + EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE); + ctype = *((unsigned char *)CMSG_DATA(cmsg)); + EXPECT_EQ(ctype, record_type); + + return n; +} + +FIXTURE(tls_basic) +{ + int fd, cfd; + bool notls; +}; + +FIXTURE_SETUP(tls_basic) +{ + ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls); +} + FIXTURE_TEARDOWN(tls_basic) { close(self->fd); @@ -199,60 +264,21 @@ FIXTURE_VARIANT_ADD(tls, 13_sm4_ccm) FIXTURE_SETUP(tls) { struct tls_crypto_info_keys tls12; - struct sockaddr_in addr; - socklen_t len; - int sfd, ret; - - self->notls = false; - len = sizeof(addr); + int ret; tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12); - addr.sin_family = AF_INET; - addr.sin_addr.s_addr = htonl(INADDR_ANY); - addr.sin_port = 0; + ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls); - self->fd = socket(AF_INET, SOCK_STREAM, 0); - sfd = socket(AF_INET, SOCK_STREAM, 0); - - ret = bind(sfd, &addr, sizeof(addr)); - ASSERT_EQ(ret, 0); - ret = listen(sfd, 10); - ASSERT_EQ(ret, 0); + if (self->notls) + return; - ret = getsockname(sfd, &addr, &len); + ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len); ASSERT_EQ(ret, 0); - ret = connect(self->fd, &addr, sizeof(addr)); + ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len); ASSERT_EQ(ret, 0); - - ret = setsockopt(self->fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")); - if (ret != 0) { - self->notls = true; - printf("Failure setting TCP_ULP, testing without tls\n"); - } - - if (!self->notls) { - ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, - tls12.len); - ASSERT_EQ(ret, 0); - } - - self->cfd = accept(sfd, &addr, &len); - ASSERT_GE(self->cfd, 0); - - if (!self->notls) { - ret = setsockopt(self->cfd, IPPROTO_TCP, TCP_ULP, "tls", - sizeof("tls")); - ASSERT_EQ(ret, 0); - - ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, - tls12.len); - ASSERT_EQ(ret, 0); - } - - close(sfd); } FIXTURE_TEARDOWN(tls) @@ -613,6 +639,95 @@ TEST_F(tls, splice_to_pipe) EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); } +TEST_F(tls, splice_cmsg_to_pipe) +{ + char *test_str = "test_read"; + char record_type = 100; + int send_len = 10; + char buf[10]; + int p[2]; + + ASSERT_GE(pipe(p), 0); + EXPECT_EQ(tls_send_cmsg(self->fd, 100, test_str, send_len, 0), 10); + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, send_len, 0), -1); + EXPECT_EQ(errno, EINVAL); + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1); + EXPECT_EQ(errno, EIO); + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type, + buf, sizeof(buf), MSG_WAITALL), + send_len); + EXPECT_EQ(memcmp(test_str, buf, send_len), 0); +} + +TEST_F(tls, splice_dec_cmsg_to_pipe) +{ + char *test_str = "test_read"; + char record_type = 100; + int send_len = 10; + char buf[10]; + int p[2]; + + ASSERT_GE(pipe(p), 0); + EXPECT_EQ(tls_send_cmsg(self->fd, 100, test_str, send_len, 0), 10); + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1); + EXPECT_EQ(errno, EIO); + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, send_len, 0), -1); + EXPECT_EQ(errno, EINVAL); + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type, + buf, sizeof(buf), MSG_WAITALL), + send_len); + EXPECT_EQ(memcmp(test_str, buf, send_len), 0); +} + +TEST_F(tls, recv_and_splice) +{ + int send_len = TLS_PAYLOAD_MAX_LEN; + char mem_send[TLS_PAYLOAD_MAX_LEN]; + char mem_recv[TLS_PAYLOAD_MAX_LEN]; + int half = send_len / 2; + int p[2]; + + ASSERT_GE(pipe(p), 0); + EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len); + /* Recv hald of the record, splice the other half */ + EXPECT_EQ(recv(self->cfd, mem_recv, half, MSG_WAITALL), half); + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, half, SPLICE_F_NONBLOCK), + half); + EXPECT_EQ(read(p[0], &mem_recv[half], half), half); + EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); +} + +TEST_F(tls, peek_and_splice) +{ + int send_len = TLS_PAYLOAD_MAX_LEN; + char mem_send[TLS_PAYLOAD_MAX_LEN]; + char mem_recv[TLS_PAYLOAD_MAX_LEN]; + int chunk = TLS_PAYLOAD_MAX_LEN / 4; + int n, i, p[2]; + + memrnd(mem_send, sizeof(mem_send)); + + ASSERT_GE(pipe(p), 0); + for (i = 0; i < 4; i++) + EXPECT_EQ(send(self->fd, &mem_send[chunk * i], chunk, 0), + chunk); + + EXPECT_EQ(recv(self->cfd, mem_recv, chunk * 5 / 2, + MSG_WAITALL | MSG_PEEK), + chunk * 5 / 2); + EXPECT_EQ(memcmp(mem_send, mem_recv, chunk * 5 / 2), 0); + + n = 0; + while (n < send_len) { + i = splice(self->cfd, NULL, p[1], NULL, send_len - n, 0); + EXPECT_GT(i, 0); + n += i; + } + EXPECT_EQ(n, send_len); + EXPECT_EQ(read(p[0], mem_recv, send_len), send_len); + EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); +} + TEST_F(tls, recvmsg_single) { char const *test_str = "test_recvmsg_single"; @@ -1193,60 +1308,30 @@ TEST_F(tls, mutliproc_sendpage_writers) TEST_F(tls, control_msg) { - if (self->notls) - return; - - char cbuf[CMSG_SPACE(sizeof(char))]; - char const *test_str = "test_read"; - int cmsg_len = sizeof(char); + char *test_str = "test_read"; char record_type = 100; - struct cmsghdr *cmsg; - struct msghdr msg; int send_len = 10; - struct iovec vec; char buf[10]; - vec.iov_base = (char *)test_str; - vec.iov_len = 10; - memset(&msg, 0, sizeof(struct msghdr)); - msg.msg_iov = &vec; - msg.msg_iovlen = 1; - msg.msg_control = cbuf; - msg.msg_controllen = sizeof(cbuf); - cmsg = CMSG_FIRSTHDR(&msg); - cmsg->cmsg_level = SOL_TLS; - /* test sending non-record types. */ - cmsg->cmsg_type = TLS_SET_RECORD_TYPE; - cmsg->cmsg_len = CMSG_LEN(cmsg_len); - *CMSG_DATA(cmsg) = record_type; - msg.msg_controllen = cmsg->cmsg_len; + if (self->notls) + SKIP(return, "no TLS support"); - EXPECT_EQ(sendmsg(self->fd, &msg, 0), send_len); + EXPECT_EQ(tls_send_cmsg(self->fd, record_type, test_str, send_len, 0), + send_len); /* Should fail because we didn't provide a control message */ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1); - vec.iov_base = buf; - EXPECT_EQ(recvmsg(self->cfd, &msg, MSG_WAITALL | MSG_PEEK), send_len); - - cmsg = CMSG_FIRSTHDR(&msg); - EXPECT_NE(cmsg, NULL); - EXPECT_EQ(cmsg->cmsg_level, SOL_TLS); - EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE); - record_type = *((unsigned char *)CMSG_DATA(cmsg)); - EXPECT_EQ(record_type, 100); + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type, + buf, sizeof(buf), MSG_WAITALL | MSG_PEEK), + send_len); EXPECT_EQ(memcmp(buf, test_str, send_len), 0); /* Recv the message again without MSG_PEEK */ - record_type = 0; memset(buf, 0, sizeof(buf)); - EXPECT_EQ(recvmsg(self->cfd, &msg, MSG_WAITALL), send_len); - cmsg = CMSG_FIRSTHDR(&msg); - EXPECT_NE(cmsg, NULL); - EXPECT_EQ(cmsg->cmsg_level, SOL_TLS); - EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE); - record_type = *((unsigned char *)CMSG_DATA(cmsg)); - EXPECT_EQ(record_type, 100); + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type, + buf, sizeof(buf), MSG_WAITALL), + send_len); EXPECT_EQ(memcmp(buf, test_str, send_len), 0); } @@ -1301,6 +1386,160 @@ TEST_F(tls, shutdown_reuse) EXPECT_EQ(errno, EISCONN); } +FIXTURE(tls_err) +{ + int fd, cfd; + int fd2, cfd2; + bool notls; +}; + +FIXTURE_VARIANT(tls_err) +{ + uint16_t tls_version; +}; + +FIXTURE_VARIANT_ADD(tls_err, 12_aes_gcm) +{ + .tls_version = TLS_1_2_VERSION, +}; + +FIXTURE_VARIANT_ADD(tls_err, 13_aes_gcm) +{ + .tls_version = TLS_1_3_VERSION, +}; + +FIXTURE_SETUP(tls_err) +{ + struct tls_crypto_info_keys tls12; + int ret; + + tls_crypto_info_init(variant->tls_version, TLS_CIPHER_AES_GCM_128, + &tls12); + + ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls); + ulp_sock_pair(_metadata, &self->fd2, &self->cfd2, &self->notls); + if (self->notls) + return; + + ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len); + ASSERT_EQ(ret, 0); + + ret = setsockopt(self->cfd2, SOL_TLS, TLS_RX, &tls12, tls12.len); + ASSERT_EQ(ret, 0); +} + +FIXTURE_TEARDOWN(tls_err) +{ + close(self->fd); + close(self->cfd); + close(self->fd2); + close(self->cfd2); +} + +TEST_F(tls_err, bad_rec) +{ + char buf[64]; + + if (self->notls) + SKIP(return, "no TLS support"); + + memset(buf, 0x55, sizeof(buf)); + EXPECT_EQ(send(self->fd2, buf, sizeof(buf), 0), sizeof(buf)); + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EMSGSIZE); + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), MSG_DONTWAIT), -1); + EXPECT_EQ(errno, EAGAIN); +} + +TEST_F(tls_err, bad_auth) +{ + char buf[128]; + int n; + + if (self->notls) + SKIP(return, "no TLS support"); + + memrnd(buf, sizeof(buf) / 2); + EXPECT_EQ(send(self->fd, buf, sizeof(buf) / 2, 0), sizeof(buf) / 2); + n = recv(self->cfd, buf, sizeof(buf), 0); + EXPECT_GT(n, sizeof(buf) / 2); + + buf[n - 1]++; + + EXPECT_EQ(send(self->fd2, buf, n, 0), n); + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EBADMSG); + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EBADMSG); +} + +TEST_F(tls_err, bad_in_large_read) +{ + char txt[3][64]; + char cip[3][128]; + char buf[3 * 128]; + int i, n; + + if (self->notls) + SKIP(return, "no TLS support"); + + /* Put 3 records in the sockets */ + for (i = 0; i < 3; i++) { + memrnd(txt[i], sizeof(txt[i])); + EXPECT_EQ(send(self->fd, txt[i], sizeof(txt[i]), 0), + sizeof(txt[i])); + n = recv(self->cfd, cip[i], sizeof(cip[i]), 0); + EXPECT_GT(n, sizeof(txt[i])); + /* Break the third message */ + if (i == 2) + cip[2][n - 1]++; + EXPECT_EQ(send(self->fd2, cip[i], n, 0), n); + } + + /* We should be able to receive the first two messages */ + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), sizeof(txt[0]) * 2); + EXPECT_EQ(memcmp(buf, txt[0], sizeof(txt[0])), 0); + EXPECT_EQ(memcmp(buf + sizeof(txt[0]), txt[1], sizeof(txt[1])), 0); + /* Third mesasge is bad */ + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EBADMSG); + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EBADMSG); +} + +TEST_F(tls_err, bad_cmsg) +{ + char *test_str = "test_read"; + int send_len = 10; + char cip[128]; + char buf[128]; + char txt[64]; + int n; + + if (self->notls) + SKIP(return, "no TLS support"); + + /* Queue up one data record */ + memrnd(txt, sizeof(txt)); + EXPECT_EQ(send(self->fd, txt, sizeof(txt), 0), sizeof(txt)); + n = recv(self->cfd, cip, sizeof(cip), 0); + EXPECT_GT(n, sizeof(txt)); + EXPECT_EQ(send(self->fd2, cip, n, 0), n); + + EXPECT_EQ(tls_send_cmsg(self->fd, 100, test_str, send_len, 0), 10); + n = recv(self->cfd, cip, sizeof(cip), 0); + cip[n - 1]++; /* Break it */ + EXPECT_GT(n, send_len); + EXPECT_EQ(send(self->fd2, cip, n, 0), n); + + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), sizeof(txt)); + EXPECT_EQ(memcmp(buf, txt, sizeof(txt)), 0); + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EBADMSG); + EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EBADMSG); +} + TEST(non_established) { struct tls12_crypto_info_aes_gcm_256 tls12; struct sockaddr_in addr; @@ -1355,64 +1594,82 @@ TEST(non_established) { TEST(keysizes) { struct tls12_crypto_info_aes_gcm_256 tls12; - struct sockaddr_in addr; - int sfd, ret, fd, cfd; - socklen_t len; + int ret, fd, cfd; bool notls; - notls = false; - len = sizeof(addr); - memset(&tls12, 0, sizeof(tls12)); tls12.info.version = TLS_1_2_VERSION; tls12.info.cipher_type = TLS_CIPHER_AES_GCM_256; - addr.sin_family = AF_INET; - addr.sin_addr.s_addr = htonl(INADDR_ANY); - addr.sin_port = 0; + ulp_sock_pair(_metadata, &fd, &cfd, ¬ls); - fd = socket(AF_INET, SOCK_STREAM, 0); - sfd = socket(AF_INET, SOCK_STREAM, 0); + if (!notls) { + ret = setsockopt(fd, SOL_TLS, TLS_TX, &tls12, + sizeof(tls12)); + EXPECT_EQ(ret, 0); + + ret = setsockopt(cfd, SOL_TLS, TLS_RX, &tls12, + sizeof(tls12)); + EXPECT_EQ(ret, 0); + } + + close(fd); + close(cfd); +} + +TEST(tls_v6ops) { + struct tls_crypto_info_keys tls12; + struct sockaddr_in6 addr, addr2; + int sfd, ret, fd; + socklen_t len, len2; + + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12); + + addr.sin6_family = AF_INET6; + addr.sin6_addr = in6addr_any; + addr.sin6_port = 0; + + fd = socket(AF_INET6, SOCK_STREAM, 0); + sfd = socket(AF_INET6, SOCK_STREAM, 0); ret = bind(sfd, &addr, sizeof(addr)); ASSERT_EQ(ret, 0); ret = listen(sfd, 10); ASSERT_EQ(ret, 0); + len = sizeof(addr); ret = getsockname(sfd, &addr, &len); ASSERT_EQ(ret, 0); ret = connect(fd, &addr, sizeof(addr)); ASSERT_EQ(ret, 0); + len = sizeof(addr); + ret = getsockname(fd, &addr, &len); + ASSERT_EQ(ret, 0); + ret = setsockopt(fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")); - if (ret != 0) { - notls = true; - printf("Failure setting TCP_ULP, testing without tls\n"); + if (ret) { + ASSERT_EQ(errno, ENOENT); + SKIP(return, "no TLS support"); } + ASSERT_EQ(ret, 0); - if (!notls) { - ret = setsockopt(fd, SOL_TLS, TLS_TX, &tls12, - sizeof(tls12)); - EXPECT_EQ(ret, 0); - } + ret = setsockopt(fd, SOL_TLS, TLS_TX, &tls12, tls12.len); + ASSERT_EQ(ret, 0); - cfd = accept(sfd, &addr, &len); - ASSERT_GE(cfd, 0); + ret = setsockopt(fd, SOL_TLS, TLS_RX, &tls12, tls12.len); + ASSERT_EQ(ret, 0); - if (!notls) { - ret = setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls", - sizeof("tls")); - EXPECT_EQ(ret, 0); + len2 = sizeof(addr2); + ret = getsockname(fd, &addr2, &len2); + ASSERT_EQ(ret, 0); - ret = setsockopt(cfd, SOL_TLS, TLS_RX, &tls12, - sizeof(tls12)); - EXPECT_EQ(ret, 0); - } + EXPECT_EQ(len2, len); + EXPECT_EQ(memcmp(&addr, &addr2, len), 0); - close(sfd); close(fd); - close(cfd); + close(sfd); } TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index 8748199ac109..ffca314897c4 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -5,7 +5,8 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \ conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \ nft_concat_range.sh nft_conntrack_helper.sh \ nft_queue.sh nft_meta.sh nf_nat_edemux.sh \ - ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh + ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \ + conntrack_vrf.sh LDLIBS = -lmnl TEST_GEN_FILES = nf-queue diff --git a/tools/testing/selftests/netfilter/conntrack_vrf.sh b/tools/testing/selftests/netfilter/conntrack_vrf.sh new file mode 100755 index 000000000000..91f3ef0f1192 --- /dev/null +++ b/tools/testing/selftests/netfilter/conntrack_vrf.sh @@ -0,0 +1,219 @@ +#!/bin/sh + +# This script demonstrates interaction of conntrack and vrf. +# The vrf driver calls the netfilter hooks again, with oif/iif +# pointing at the VRF device. +# +# For ingress, this means first iteration has iifname of lower/real +# device. In this script, thats veth0. +# Second iteration is iifname set to vrf device, tvrf in this script. +# +# For egress, this is reversed: first iteration has the vrf device, +# second iteration is done with the lower/real/veth0 device. +# +# test_ct_zone_in demonstrates unexpected change of nftables +# behavior # caused by commit 09e856d54bda5f28 "vrf: Reset skb conntrack +# connection on VRF rcv" +# +# It was possible to assign conntrack zone to a packet (or mark it for +# `notracking`) in the prerouting chain before conntrack, based on real iif. +# +# After the change, the zone assignment is lost and the zone is assigned based +# on the VRF master interface (in case such a rule exists). +# assignment is lost. Instead, assignment based on the `iif` matching +# Thus it is impossible to distinguish packets based on the original +# interface. +# +# test_masquerade_vrf and test_masquerade_veth0 demonstrate the problem +# that was supposed to be fixed by the commit mentioned above to make sure +# that any fix to test case 1 won't break masquerade again. + +ksft_skip=4 + +IP0=172.30.30.1 +IP1=172.30.30.2 +PFXL=30 +ret=0 + +sfx=$(mktemp -u "XXXXXXXX") +ns0="ns0-$sfx" +ns1="ns1-$sfx" + +cleanup() +{ + ip netns pids $ns0 | xargs kill 2>/dev/null + ip netns pids $ns1 | xargs kill 2>/dev/null + + ip netns del $ns0 $ns1 +} + +nft --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +ip netns add "$ns0" +if [ $? -ne 0 ];then + echo "SKIP: Could not create net namespace $ns0" + exit $ksft_skip +fi +ip netns add "$ns1" + +trap cleanup EXIT + +ip netns exec $ns0 sysctl -q -w net.ipv4.conf.default.rp_filter=0 +ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0 +ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0 + +ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not add veth device" + exit $ksft_skip +fi + +ip -net $ns0 li add tvrf type vrf table 9876 +if [ $? -ne 0 ];then + echo "SKIP: Could not add vrf device" + exit $ksft_skip +fi + +ip -net $ns0 li set lo up + +ip -net $ns0 li set veth0 master tvrf +ip -net $ns0 li set tvrf up +ip -net $ns0 li set veth0 up +ip -net $ns1 li set veth0 up + +ip -net $ns0 addr add $IP0/$PFXL dev veth0 +ip -net $ns1 addr add $IP1/$PFXL dev veth0 + +ip netns exec $ns1 iperf3 -s > /dev/null 2>&1& +if [ $? -ne 0 ];then + echo "SKIP: Could not start iperf3" + exit $ksft_skip +fi + +# test vrf ingress handling. +# The incoming connection should be placed in conntrack zone 1, +# as decided by the first iteration of the ruleset. +test_ct_zone_in() +{ +ip netns exec $ns0 nft -f - <<EOF +table testct { + chain rawpre { + type filter hook prerouting priority raw; + + iif { veth0, tvrf } counter meta nftrace set 1 + iif veth0 counter ct zone set 1 counter return + iif tvrf counter ct zone set 2 counter return + ip protocol icmp counter + notrack counter + } + + chain rawout { + type filter hook output priority raw; + + oif veth0 counter ct zone set 1 counter return + oif tvrf counter ct zone set 2 counter return + notrack counter + } +} +EOF + ip netns exec $ns1 ping -W 1 -c 1 -I veth0 $IP0 > /dev/null + + # should be in zone 1, not zone 2 + count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 1 2>/dev/null | wc -l) + if [ $count -eq 1 ]; then + echo "PASS: entry found in conntrack zone 1" + else + echo "FAIL: entry not found in conntrack zone 1" + count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 2 2> /dev/null | wc -l) + if [ $count -eq 1 ]; then + echo "FAIL: entry found in zone 2 instead" + else + echo "FAIL: entry not in zone 1 or 2, dumping table" + ip netns exec $ns0 conntrack -L + ip netns exec $ns0 nft list ruleset + fi + fi +} + +# add masq rule that gets evaluated w. outif set to vrf device. +# This tests the first iteration of the packet through conntrack, +# oifname is the vrf device. +test_masquerade_vrf() +{ + ip netns exec $ns0 conntrack -F 2>/dev/null + +ip netns exec $ns0 nft -f - <<EOF +flush ruleset +table ip nat { + chain postrouting { + type nat hook postrouting priority 0; + # NB: masquerade should always be combined with 'oif(name) bla', + # lack of this is intentional here, we want to exercise double-snat. + ip saddr 172.30.30.0/30 counter masquerade random + } +} +EOF + ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 >/dev/null + if [ $? -ne 0 ]; then + echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on vrf device" + ret=1 + return + fi + + # must also check that nat table was evaluated on second (lower device) iteration. + ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2' + if [ $? -eq 0 ]; then + echo "PASS: iperf3 connect with masquerade + sport rewrite on vrf device" + else + echo "FAIL: vrf masq rule has unexpected counter value" + ret=1 + fi +} + +# add masq rule that gets evaluated w. outif set to veth device. +# This tests the 2nd iteration of the packet through conntrack, +# oifname is the lower device (veth0 in this case). +test_masquerade_veth() +{ + ip netns exec $ns0 conntrack -F 2>/dev/null +ip netns exec $ns0 nft -f - <<EOF +flush ruleset +table ip nat { + chain postrouting { + type nat hook postrouting priority 0; + meta oif veth0 ip saddr 172.30.30.0/30 counter masquerade random + } +} +EOF + ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 > /dev/null + if [ $? -ne 0 ]; then + echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on veth device" + ret=1 + return + fi + + # must also check that nat table was evaluated on second (lower device) iteration. + ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2' + if [ $? -eq 0 ]; then + echo "PASS: iperf3 connect with masquerade + sport rewrite on veth device" + else + echo "FAIL: vrf masq rule has unexpected counter value" + ret=1 + fi +} + +test_ct_zone_in +test_masquerade_vrf +test_masquerade_veth + +exit $ret diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh index da1c1e4b6c86..d88867d2fed7 100755 --- a/tools/testing/selftests/netfilter/nft_nat.sh +++ b/tools/testing/selftests/netfilter/nft_nat.sh @@ -759,19 +759,21 @@ test_port_shadow() local result="" local logmsg="" - echo ROUTER | ip netns exec "$ns0" nc -w 5 -u -l -p 1405 >/dev/null 2>&1 & - nc_r=$! + # make shadow entry, from client (ns2), going to (ns1), port 41404, sport 1405. + echo "fake-entry" | ip netns exec "$ns2" timeout 1 socat -u STDIN UDP:"$daddrc":41404,sourceport=1405 - echo CLIENT | ip netns exec "$ns2" nc -w 5 -u -l -p 1405 >/dev/null 2>&1 & - nc_c=$! + echo ROUTER | ip netns exec "$ns0" timeout 5 socat -u STDIN UDP4-LISTEN:1405 & + sc_r=$! - # make shadow entry, from client (ns2), going to (ns1), port 41404, sport 1405. - echo "fake-entry" | ip netns exec "$ns2" nc -w 1 -p 1405 -u "$daddrc" 41404 > /dev/null + echo CLIENT | ip netns exec "$ns2" timeout 5 socat -u STDIN UDP4-LISTEN:1405,reuseport & + sc_c=$! + + sleep 0.3 # ns1 tries to connect to ns0:1405. With default settings this should connect # to client, it matches the conntrack entry created above. - result=$(echo "" | ip netns exec "$ns1" nc -w 1 -p 41404 -u "$daddrs" 1405) + result=$(echo "data" | ip netns exec "$ns1" timeout 1 socat - UDP:"$daddrs":1405,sourceport=41404) if [ "$result" = "$expect" ] ;then echo "PASS: portshadow test $test: got reply from ${expect}${logmsg}" @@ -780,7 +782,7 @@ test_port_shadow() ret=1 fi - kill $nc_r $nc_c 2>/dev/null + kill $sc_r $sc_c 2>/dev/null # flush udp entries for next test round, if any ip netns exec "$ns0" conntrack -F >/dev/null 2>&1 @@ -816,11 +818,10 @@ table $family raw { chain prerouting { type filter hook prerouting priority -300; policy accept; meta iif veth0 udp dport 1405 notrack - udp dport 1405 notrack } chain output { type filter hook output priority -300; policy accept; - udp sport 1405 notrack + meta oif veth0 udp sport 1405 notrack } } EOF @@ -851,6 +852,18 @@ test_port_shadowing() { local family="ip" + conntrack -h >/dev/null 2>&1 + if [ $? -ne 0 ];then + echo "SKIP: Could not run nat port shadowing test without conntrack tool" + return + fi + + socat -h > /dev/null 2>&1 + if [ $? -ne 0 ];then + echo "SKIP: Could not run nat port shadowing test without socat tool" + return + fi + ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null diff --git a/tools/testing/selftests/netfilter/nft_queue.sh b/tools/testing/selftests/netfilter/nft_queue.sh index 3d202b90b33d..7d27f1f3bc01 100755 --- a/tools/testing/selftests/netfilter/nft_queue.sh +++ b/tools/testing/selftests/netfilter/nft_queue.sh @@ -16,6 +16,10 @@ timeout=4 cleanup() { + ip netns pids ${ns1} | xargs kill 2>/dev/null + ip netns pids ${ns2} | xargs kill 2>/dev/null + ip netns pids ${nsrouter} | xargs kill 2>/dev/null + ip netns del ${ns1} ip netns del ${ns2} ip netns del ${nsrouter} @@ -332,6 +336,55 @@ EOF echo "PASS: tcp via loopback and re-queueing" } +test_icmp_vrf() { + ip -net $ns1 link add tvrf type vrf table 9876 + if [ $? -ne 0 ];then + echo "SKIP: Could not add vrf device" + return + fi + + ip -net $ns1 li set eth0 master tvrf + ip -net $ns1 li set tvrf up + + ip -net $ns1 route add 10.0.2.0/24 via 10.0.1.1 dev eth0 table 9876 +ip netns exec ${ns1} nft -f /dev/stdin <<EOF +flush ruleset +table inet filter { + chain output { + type filter hook output priority 0; policy accept; + meta oifname "tvrf" icmp type echo-request counter queue num 1 + meta oifname "eth0" icmp type echo-request counter queue num 1 + } + chain post { + type filter hook postrouting priority 0; policy accept; + meta oifname "tvrf" icmp type echo-request counter queue num 1 + meta oifname "eth0" icmp type echo-request counter queue num 1 + } +} +EOF + ip netns exec ${ns1} ./nf-queue -q 1 -t $timeout & + local nfqpid=$! + + sleep 1 + ip netns exec ${ns1} ip vrf exec tvrf ping -c 1 10.0.2.99 > /dev/null + + for n in output post; do + for d in tvrf eth0; do + ip netns exec ${ns1} nft list chain inet filter $n | grep -q "oifname \"$d\" icmp type echo-request counter packets 1" + if [ $? -ne 0 ] ; then + echo "FAIL: chain $n: icmp packet counter mismatch for device $d" 1>&2 + ip netns exec ${ns1} nft list ruleset + ret=1 + return + fi + done + done + + wait $nfqpid + [ $? -eq 0 ] && echo "PASS: icmp+nfqueue via vrf" + wait 2>/dev/null +} + ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null @@ -372,5 +425,6 @@ test_queue 20 test_tcp_forward test_tcp_localhost test_tcp_localhost_requeue +test_icmp_vrf exit $ret diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json index 503982b8f295..91832400ddbd 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json @@ -68,7 +68,7 @@ "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ok index 667", "expExitCode": "0", "verifyCmd": "$TC action get action bpf index 667", - "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ok\\] id [0-9]* tag [0-9a-f]{16}( jited)? default-action pipe.*index 667 ref", + "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ok\\] id [0-9].* tag [0-9a-f]{16}( jited)? default-action pipe.*index 667 ref", "matchCount": "1", "teardown": [ "$TC action flush action bpf" diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json index 88a20c781e49..c6046096d9db 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json @@ -15,7 +15,7 @@ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq", "expExitCode": "0", "verifyCmd": "$TC qdisc show dev $ETH", - "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1", + "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]", "matchCount": "4", "teardown": [ "echo \"1\" > /sys/bus/netdevsim/del_device" @@ -37,7 +37,7 @@ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq", "expExitCode": "0", "verifyCmd": "$TC qdisc show dev $ETH", - "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-9,a-f][0-9,a-f]{0,2} bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1", + "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-9,a-f][0-9,a-f]{0,2}", "matchCount": "256", "teardown": [ "echo \"1\" > /sys/bus/netdevsim/del_device" @@ -60,7 +60,7 @@ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq", "expExitCode": "2", "verifyCmd": "$TC qdisc show dev $ETH", - "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1", + "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]", "matchCount": "4", "teardown": [ "echo \"1\" > /sys/bus/netdevsim/del_device" @@ -82,7 +82,7 @@ "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: mq", "expExitCode": "2", "verifyCmd": "$TC qdisc show dev $ETH", - "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1", + "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]", "matchCount": "0", "teardown": [ "echo \"1\" > /sys/bus/netdevsim/del_device" @@ -106,7 +106,7 @@ "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: mq", "expExitCode": "2", "verifyCmd": "$TC qdisc show dev $ETH", - "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1", + "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]", "matchCount": "0", "teardown": [ "echo \"1\" > /sys/bus/netdevsim/del_device" @@ -128,7 +128,7 @@ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq", "expExitCode": "2", "verifyCmd": "$TC qdisc show dev $ETH", - "matchPattern": "qdisc pfifo_fast 0: parent 1:[1-4] bands 3 priomap 1 2 2 2 1 2 0 0 1 1 1 1 1 1 1 1", + "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]", "matchCount": "0", "teardown": [ "echo \"1\" > /sys/bus/netdevsim/del_device" diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh index ebc4ee0fe179..8a9461aa0878 100755 --- a/tools/testing/selftests/wireguard/netns.sh +++ b/tools/testing/selftests/wireguard/netns.sh @@ -276,7 +276,11 @@ n0 ping -W 1 -c 1 192.168.241.2 n1 wg set wg0 peer "$pub2" endpoint 192.168.241.2:7 ip2 link del wg0 ip2 link del wg1 -! n0 ping -W 1 -c 10 -f 192.168.241.2 || false # Should not crash kernel +read _ _ tx_bytes_before < <(n0 wg show wg1 transfer) +! n0 ping -W 1 -c 10 -f 192.168.241.2 || false +sleep 1 +read _ _ tx_bytes_after < <(n0 wg show wg1 transfer) +(( tx_bytes_after - tx_bytes_before < 70000 )) ip0 link del wg1 ip1 link del wg0 @@ -609,6 +613,28 @@ ip0 link set wg0 up kill $ncat_pid ip0 link del wg0 +# Ensure that dst_cache references don't outlive netns lifetime +ip1 link add dev wg0 type wireguard +ip2 link add dev wg0 type wireguard +configure_peers +ip1 link add veth1 type veth peer name veth2 +ip1 link set veth2 netns $netns2 +ip1 addr add fd00:aa::1/64 dev veth1 +ip2 addr add fd00:aa::2/64 dev veth2 +ip1 link set veth1 up +ip2 link set veth2 up +waitiface $netns1 veth1 +waitiface $netns2 veth2 +ip1 -6 route add default dev veth1 via fd00:aa::2 +ip2 -6 route add default dev veth2 via fd00:aa::1 +n1 wg set wg0 peer "$pub2" endpoint [fd00:aa::2]:2 +n2 wg set wg0 peer "$pub1" endpoint [fd00:aa::1]:1 +n1 ping6 -c 1 fd00::2 +pp ip netns delete $netns1 +pp ip netns delete $netns2 +pp ip netns add $netns1 +pp ip netns add $netns2 + # Ensure there aren't circular reference loops ip1 link add wg1 type wireguard ip2 link add wg2 type wireguard @@ -627,7 +653,7 @@ while read -t 0.1 -r line 2>/dev/null || [[ $? -ne 142 ]]; do done < /dev/kmsg alldeleted=1 for object in "${!objects[@]}"; do - if [[ ${objects["$object"]} != *createddestroyed ]]; then + if [[ ${objects["$object"]} != *createddestroyed && ${objects["$object"]} != *createdcreateddestroyeddestroyed ]]; then echo "Error: $object: merely ${objects["$object"]}" >&3 alldeleted=0 fi diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config index fe07d97df9fa..2b321b8a96cf 100644 --- a/tools/testing/selftests/wireguard/qemu/debug.config +++ b/tools/testing/selftests/wireguard/qemu/debug.config @@ -47,7 +47,7 @@ CONFIG_DEBUG_ATOMIC_SLEEP=y CONFIG_TRACE_IRQFLAGS=y CONFIG_DEBUG_BUGVERBOSE=y CONFIG_DEBUG_LIST=y -CONFIG_DEBUG_PI_LIST=y +CONFIG_DEBUG_PLIST=y CONFIG_PROVE_RCU=y CONFIG_SPARSE_RCU_POINTER=y CONFIG_RCU_CPU_STALL_TIMEOUT=21 diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config index 74db83a0aedd..a9b5a520a1d2 100644 --- a/tools/testing/selftests/wireguard/qemu/kernel.config +++ b/tools/testing/selftests/wireguard/qemu/kernel.config @@ -66,6 +66,7 @@ CONFIG_PROC_SYSCTL=y CONFIG_SYSFS=y CONFIG_TMPFS=y CONFIG_CONSOLE_LOGLEVEL_DEFAULT=15 +CONFIG_LOG_BUF_SHIFT=18 CONFIG_PRINTK_TIME=y CONFIG_BLK_DEV_INITRD=y CONFIG_LEGACY_VSYSCALL_NONE=y |