diff options
446 files changed, 11820 insertions, 5027 deletions
diff --git a/.clang-format b/.clang-format index 1247d54f9e49..8d01225bfcb7 100644 --- a/.clang-format +++ b/.clang-format @@ -535,6 +535,7 @@ ForEachMacros: - 'perf_hpp_list__for_each_sort_list_safe' - 'perf_pmu__for_each_hybrid_pmu' - 'ping_portaddr_for_each_entry' + - 'ping_portaddr_for_each_entry_rcu' - 'plist_for_each' - 'plist_for_each_continue' - 'plist_for_each_entry' diff --git a/Documentation/devicetree/bindings/clock/samsung,exynosautov9-clock.yaml b/Documentation/devicetree/bindings/clock/samsung,exynosautov9-clock.yaml index 2ab4642679c0..55c4f94a14d1 100644 --- a/Documentation/devicetree/bindings/clock/samsung,exynosautov9-clock.yaml +++ b/Documentation/devicetree/bindings/clock/samsung,exynosautov9-clock.yaml @@ -148,7 +148,7 @@ allOf: items: - const: oscclk - const: dout_clkcmu_fsys1_bus - - const: dout_clkcmu_fsys1_mmc_card + - const: gout_clkcmu_fsys1_mmc_card - const: dout_clkcmu_fsys1_usbdrd - if: diff --git a/Documentation/loongarch/booting.rst b/Documentation/loongarch/booting.rst new file mode 100644 index 000000000000..91eccd410478 --- /dev/null +++ b/Documentation/loongarch/booting.rst @@ -0,0 +1,42 @@ +.. SPDX-License-Identifier: GPL-2.0 + +======================= +Booting Linux/LoongArch +======================= + +:Author: Yanteng Si <[email protected]> +:Date: 18 Nov 2022 + +Information passed from BootLoader to kernel +============================================ + +LoongArch supports ACPI and FDT. The information that needs to be passed +to the kernel includes the memmap, the initrd, the command line, optionally +the ACPI/FDT tables, and so on. + +The kernel is passed the following arguments on `kernel_entry` : + + - a0 = efi_boot: `efi_boot` is a flag indicating whether + this boot environment is fully UEFI-compliant. + + - a1 = cmdline: `cmdline` is a pointer to the kernel command line. + + - a2 = systemtable: `systemtable` points to the EFI system table. + All pointers involved at this stage are in physical addresses. + +Header of Linux/LoongArch kernel images +======================================= + +Linux/LoongArch kernel images are EFI images. Being PE files, they have +a 64-byte header structured like:: + + u32 MZ_MAGIC /* "MZ", MS-DOS header */ + u32 res0 = 0 /* Reserved */ + u64 kernel_entry /* Kernel entry point */ + u64 _end - _text /* Kernel image effective size */ + u64 load_offset /* Kernel image load offset from start of RAM */ + u64 res1 = 0 /* Reserved */ + u64 res2 = 0 /* Reserved */ + u64 res3 = 0 /* Reserved */ + u32 LINUX_PE_MAGIC /* Magic number */ + u32 pe_header - _head /* Offset to the PE header */ diff --git a/Documentation/loongarch/index.rst b/Documentation/loongarch/index.rst index aaba648db907..c779bfa00c05 100644 --- a/Documentation/loongarch/index.rst +++ b/Documentation/loongarch/index.rst @@ -9,6 +9,7 @@ LoongArch Architecture :numbered: introduction + booting irq-chip-model features diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst index e8fa7ac9e6b1..6969652f593c 100644 --- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst +++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst @@ -351,42 +351,26 @@ driver. MAC address setup ----------------- -mlx5 driver provides mechanism to setup the MAC address of the PCI VF/SF. +mlx5 driver support devlink port function attr mechanism to setup MAC +address. (refer to Documentation/networking/devlink/devlink-port.rst) -The configured MAC address of the PCI VF/SF will be used by netdevice and rdma -device created for the PCI VF/SF. +RoCE capability setup +--------------------- +Not all mlx5 PCI devices/SFs require RoCE capability. -- Get the MAC address of the VF identified by its unique devlink port index:: +When RoCE capability is disabled, it saves 1 Mbytes worth of system memory per +PCI devices/SF. - $ devlink port show pci/0000:06:00.0/2 - pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1 - function: - hw_addr 00:00:00:00:00:00 - -- Set the MAC address of the VF identified by its unique devlink port index:: - - $ devlink port function set pci/0000:06:00.0/2 hw_addr 00:11:22:33:44:55 - - $ devlink port show pci/0000:06:00.0/2 - pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1 - function: - hw_addr 00:11:22:33:44:55 - -- Get the MAC address of the SF identified by its unique devlink port index:: - - $ devlink port show pci/0000:06:00.0/32768 - pci/0000:06:00.0/32768: type eth netdev enp6s0pf0sf88 flavour pcisf pfnum 0 sfnum 88 - function: - hw_addr 00:00:00:00:00:00 - -- Set the MAC address of the SF identified by its unique devlink port index:: +mlx5 driver support devlink port function attr mechanism to setup RoCE +capability. (refer to Documentation/networking/devlink/devlink-port.rst) - $ devlink port function set pci/0000:06:00.0/32768 hw_addr 00:00:00:00:88:88 +migratable capability setup +--------------------------- +User who wants mlx5 PCI VFs to be able to perform live migration need to +explicitly enable the VF migratable capability. - $ devlink port show pci/0000:06:00.0/32768 - pci/0000:06:00.0/32768: type eth netdev enp6s0pf0sf88 flavour pcisf pfnum 0 sfnum 88 - function: - hw_addr 00:00:00:00:88:88 +mlx5 driver support devlink port function attr mechanism to setup migratable +capability. (refer to Documentation/networking/devlink/devlink-port.rst) SF state setup -------------- diff --git a/Documentation/networking/devlink/devlink-port.rst b/Documentation/networking/devlink/devlink-port.rst index 98557c2ab1c1..3da590953ce8 100644 --- a/Documentation/networking/devlink/devlink-port.rst +++ b/Documentation/networking/devlink/devlink-port.rst @@ -110,7 +110,7 @@ devlink ports for both the controllers. Function configuration ====================== -A user can configure the function attribute before enumerating the PCI +Users can configure one or more function attributes before enumerating the PCI function. Usually it means, user should configure function attribute before a bus specific device for the function is created. However, when SRIOV is enabled, virtual function devices are created on the PCI bus. @@ -119,9 +119,127 @@ function device to the driver. For subfunctions, this means user should configure port function attribute before activating the port function. A user may set the hardware address of the function using -'devlink port function set hw_addr' command. For Ethernet port function +`devlink port function set hw_addr` command. For Ethernet port function this means a MAC address. +Users may also set the RoCE capability of the function using +`devlink port function set roce` command. + +Users may also set the function as migratable using +'devlink port function set migratable' command. + +Function attributes +=================== + +MAC address setup +----------------- +The configured MAC address of the PCI VF/SF will be used by netdevice and rdma +device created for the PCI VF/SF. + +- Get the MAC address of the VF identified by its unique devlink port index:: + + $ devlink port show pci/0000:06:00.0/2 + pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1 + function: + hw_addr 00:00:00:00:00:00 + +- Set the MAC address of the VF identified by its unique devlink port index:: + + $ devlink port function set pci/0000:06:00.0/2 hw_addr 00:11:22:33:44:55 + + $ devlink port show pci/0000:06:00.0/2 + pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1 + function: + hw_addr 00:11:22:33:44:55 + +- Get the MAC address of the SF identified by its unique devlink port index:: + + $ devlink port show pci/0000:06:00.0/32768 + pci/0000:06:00.0/32768: type eth netdev enp6s0pf0sf88 flavour pcisf pfnum 0 sfnum 88 + function: + hw_addr 00:00:00:00:00:00 + +- Set the MAC address of the SF identified by its unique devlink port index:: + + $ devlink port function set pci/0000:06:00.0/32768 hw_addr 00:00:00:00:88:88 + + $ devlink port show pci/0000:06:00.0/32768 + pci/0000:06:00.0/32768: type eth netdev enp6s0pf0sf88 flavour pcisf pfnum 0 sfnum 88 + function: + hw_addr 00:00:00:00:88:88 + +RoCE capability setup +--------------------- +Not all PCI VFs/SFs require RoCE capability. + +When RoCE capability is disabled, it saves system memory per PCI VF/SF. + +When user disables RoCE capability for a VF/SF, user application cannot send or +receive any RoCE packets through this VF/SF and RoCE GID table for this PCI +will be empty. + +When RoCE capability is disabled in the device using port function attribute, +VF/SF driver cannot override it. + +- Get RoCE capability of the VF device:: + + $ devlink port show pci/0000:06:00.0/2 + pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1 + function: + hw_addr 00:00:00:00:00:00 roce enable + +- Set RoCE capability of the VF device:: + + $ devlink port function set pci/0000:06:00.0/2 roce disable + + $ devlink port show pci/0000:06:00.0/2 + pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1 + function: + hw_addr 00:00:00:00:00:00 roce disable + +migratable capability setup +--------------------------- +Live migration is the process of transferring a live virtual machine +from one physical host to another without disrupting its normal +operation. + +User who want PCI VFs to be able to perform live migration need to +explicitly enable the VF migratable capability. + +When user enables migratable capability for a VF, and the HV binds the VF to VFIO driver +with migration support, the user can migrate the VM with this VF from one HV to a +different one. + +However, when migratable capability is enable, device will disable features which cannot +be migrated. Thus migratable cap can impose limitations on a VF so let the user decide. + +Example of LM with migratable function configuration: +- Get migratable capability of the VF device:: + + $ devlink port show pci/0000:06:00.0/2 + pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1 + function: + hw_addr 00:00:00:00:00:00 migratable disable + +- Set migratable capability of the VF device:: + + $ devlink port function set pci/0000:06:00.0/2 migratable enable + + $ devlink port show pci/0000:06:00.0/2 + pci/0000:06:00.0/2: type eth netdev enp6s0pf0vf1 flavour pcivf pfnum 0 vfnum 1 + function: + hw_addr 00:00:00:00:00:00 migratable enable + +- Bind VF to VFIO driver with migration support:: + + $ echo <pci_id> > /sys/bus/pci/devices/0000:08:00.0/driver/unbind + $ echo mlx5_vfio_pci > /sys/bus/pci/devices/0000:08:00.0/driver_override + $ echo <pci_id> > /sys/bus/pci/devices/0000:08:00.0/driver/bind + +Attach VF to the VM. +Start the VM. +Perform live migration. + Subfunction ============ diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index bede24ef44fd..f10f8eb44255 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -222,6 +222,7 @@ Userspace to kernel: ``ETHTOOL_MSG_MODULE_GET`` get transceiver module parameters ``ETHTOOL_MSG_PSE_SET`` set PSE parameters ``ETHTOOL_MSG_PSE_GET`` get PSE parameters + ``ETHTOOL_MSG_RSS_GET`` get RSS settings ===================================== ================================= Kernel to userspace: @@ -263,6 +264,7 @@ Kernel to userspace: ``ETHTOOL_MSG_PHC_VCLOCKS_GET_REPLY`` PHC virtual clocks info ``ETHTOOL_MSG_MODULE_GET_REPLY`` transceiver module parameters ``ETHTOOL_MSG_PSE_GET_REPLY`` PSE parameters + ``ETHTOOL_MSG_RSS_GET_REPLY`` RSS settings ======================================== ================================= ``GET`` requests are sent by userspace applications to retrieve device @@ -1687,6 +1689,33 @@ to control PoDL PSE Admin functions. This option is implementing ``IEEE 802.3-2018`` 30.15.1.2.1 acPoDLPSEAdminControl. See ``ETHTOOL_A_PODL_PSE_ADMIN_STATE`` for supported values. +RSS_GET +======= + +Get indirection table, hash key and hash function info associated with a +RSS context of an interface similar to ``ETHTOOL_GRSSH`` ioctl request. + +Request contents: + +===================================== ====== ========================== + ``ETHTOOL_A_RSS_HEADER`` nested request header + ``ETHTOOL_A_RSS_CONTEXT`` u32 context number +===================================== ====== ========================== + +Kernel response contents: + +===================================== ====== ========================== + ``ETHTOOL_A_RSS_HEADER`` nested reply header + ``ETHTOOL_A_RSS_HFUNC`` u32 RSS hash func + ``ETHTOOL_A_RSS_INDIR`` binary Indir table bytes + ``ETHTOOL_A_RSS_HKEY`` binary Hash key bytes +===================================== ====== ========================== + +ETHTOOL_A_RSS_HFUNC attribute is bitmap indicating the hash function +being used. Current supported options are toeplitz, xor or crc32. +ETHTOOL_A_RSS_INDIR attribute returns RSS indrection table where each byte +indicates queue number. + Request translation =================== @@ -1768,7 +1797,7 @@ are netlink only. ``ETHTOOL_GMODULEEEPROM`` ``ETHTOOL_MSG_MODULE_EEPROM_GET`` ``ETHTOOL_GEEE`` ``ETHTOOL_MSG_EEE_GET`` ``ETHTOOL_SEEE`` ``ETHTOOL_MSG_EEE_SET`` - ``ETHTOOL_GRSSH`` n/a + ``ETHTOOL_GRSSH`` ``ETHTOOL_MSG_RSS_GET`` ``ETHTOOL_SRSSH`` n/a ``ETHTOOL_GTUNABLE`` n/a ``ETHTOOL_STUNABLE`` n/a diff --git a/Documentation/networking/timestamping.rst b/Documentation/networking/timestamping.rst index be4eb1242057..f17c01834a12 100644 --- a/Documentation/networking/timestamping.rst +++ b/Documentation/networking/timestamping.rst @@ -179,7 +179,8 @@ SOF_TIMESTAMPING_OPT_ID: identifier and returns that along with the timestamp. The identifier is derived from a per-socket u32 counter (that wraps). For datagram sockets, the counter increments with each sent packet. For stream - sockets, it increments with every byte. + sockets, it increments with every byte. For stream sockets, also set + SOF_TIMESTAMPING_OPT_ID_TCP, see the section below. The counter starts at zero. It is initialized the first time that the socket option is enabled. It is reset each time the option is @@ -192,6 +193,35 @@ SOF_TIMESTAMPING_OPT_ID: among all possibly concurrently outstanding timestamp requests for that socket. +SOF_TIMESTAMPING_OPT_ID_TCP: + Pass this modifier along with SOF_TIMESTAMPING_OPT_ID for new TCP + timestamping applications. SOF_TIMESTAMPING_OPT_ID defines how the + counter increments for stream sockets, but its starting point is + not entirely trivial. This option fixes that. + + For stream sockets, if SOF_TIMESTAMPING_OPT_ID is set, this should + always be set too. On datagram sockets the option has no effect. + + A reasonable expectation is that the counter is reset to zero with + the system call, so that a subsequent write() of N bytes generates + a timestamp with counter N-1. SOF_TIMESTAMPING_OPT_ID_TCP + implements this behavior under all conditions. + + SOF_TIMESTAMPING_OPT_ID without modifier often reports the same, + especially when the socket option is set when no data is in + transmission. If data is being transmitted, it may be off by the + length of the output queue (SIOCOUTQ). + + The difference is due to being based on snd_una versus write_seq. + snd_una is the offset in the stream acknowledged by the peer. This + depends on factors outside of process control, such as network RTT. + write_seq is the last byte written by the process. This offset is + not affected by external inputs. + + The difference is subtle and unlikely to be noticed when configured + at initial socket creation, when no data is queued or sent. But + SOF_TIMESTAMPING_OPT_ID_TCP behavior is more robust regardless of + when the socket option is set. SOF_TIMESTAMPING_OPT_CMSG: Support recv() cmsg for all timestamped packets. Control messages diff --git a/Documentation/translations/zh_CN/loongarch/booting.rst b/Documentation/translations/zh_CN/loongarch/booting.rst new file mode 100644 index 000000000000..fb6440c438f0 --- /dev/null +++ b/Documentation/translations/zh_CN/loongarch/booting.rst @@ -0,0 +1,48 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. include:: ../disclaimer-zh_CN.rst + +:Original: Documentation/loongarch/booting.rst + +:翻译: + + 司延腾 Yanteng Si <[email protected]> + +==================== +启动 Linux/LoongArch +==================== + +:作者: 司延腾 <[email protected]> +:日期: 2022年11月18日 + +BootLoader传递给内核的信息 +========================== + +LoongArch支持ACPI和FDT启动,需要传递给内核的信息包括memmap、initrd、cmdline、可 +选的ACPI/FDT表等。 + +内核在 `kernel_entry` 入口处被传递以下参数: + + - a0 = efi_boot: `efi_boot` 是一个标志,表示这个启动环境是否完全符合UEFI + 的要求。 + + - a1 = cmdline: `cmdline` 是一个指向内核命令行的指针。 + + - a2 = systemtable: `systemtable` 指向EFI的系统表,在这个阶段涉及的所有 + 指针都是物理地址。 + +Linux/LoongArch内核镜像文件头 +============================= + +内核镜像是EFI镜像。作为PE文件,它们有一个64字节的头部结构体,如下所示:: + + u32 MZ_MAGIC /* "MZ", MS-DOS 头 */ + u32 res0 = 0 /* 保留 */ + u64 kernel_entry /* 内核入口点 */ + u64 _end - _text /* 内核镜像有效大小 */ + u64 load_offset /* 加载内核镜像相对内存起始地址的偏移量 */ + u64 res1 = 0 /* 保留 */ + u64 res2 = 0 /* 保留 */ + u64 res3 = 0 /* 保留 */ + u32 LINUX_PE_MAGIC /* 魔术数 */ + u32 pe_header - _head /* 到PE头的偏移量 */ diff --git a/Documentation/translations/zh_CN/loongarch/index.rst b/Documentation/translations/zh_CN/loongarch/index.rst index 7d23eb78379d..0273a08342f7 100644 --- a/Documentation/translations/zh_CN/loongarch/index.rst +++ b/Documentation/translations/zh_CN/loongarch/index.rst @@ -14,6 +14,7 @@ LoongArch体系结构 :numbered: introduction + booting irq-chip-model features diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index eee9f857a986..896914e3a847 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -7213,14 +7213,13 @@ veto the transition. :Parameters: args[0] is the maximum poll time in nanoseconds :Returns: 0 on success; -1 on error -This capability overrides the kvm module parameter halt_poll_ns for the -target VM. - -VCPU polling allows a VCPU to poll for wakeup events instead of immediately -scheduling during guest halts. The maximum time a VCPU can spend polling is -controlled by the kvm module parameter halt_poll_ns. This capability allows -the maximum halt time to specified on a per-VM basis, effectively overriding -the module parameter for the target VM. +KVM_CAP_HALT_POLL overrides the kvm.halt_poll_ns module parameter to set the +maximum halt-polling time for all vCPUs in the target VM. This capability can +be invoked at any time and any number of times to dynamically change the +maximum halt-polling time. + +See Documentation/virt/kvm/halt-polling.rst for more information on halt +polling. 7.21 KVM_CAP_X86_USER_SPACE_MSR ------------------------------- diff --git a/Documentation/virt/kvm/x86/halt-polling.rst b/Documentation/virt/kvm/halt-polling.rst index 4922e4a15f18..3fae39b1a5ba 100644 --- a/Documentation/virt/kvm/x86/halt-polling.rst +++ b/Documentation/virt/kvm/halt-polling.rst @@ -119,6 +119,19 @@ These module parameters can be set from the debugfs files in: Note: that these module parameters are system wide values and are not able to be tuned on a per vm basis. +Any changes to these parameters will be picked up by new and existing vCPUs the +next time they halt, with the notable exception of VMs using KVM_CAP_HALT_POLL +(see next section). + +KVM_CAP_HALT_POLL +================= + +KVM_CAP_HALT_POLL is a VM capability that allows userspace to override halt_poll_ns +on a per-VM basis. VMs using KVM_CAP_HALT_POLL ignore halt_poll_ns completely (but +still obey halt_poll_ns_grow, halt_poll_ns_grow_start, and halt_poll_ns_shrink). + +See Documentation/virt/kvm/api.rst for more information on this capability. + Further Notes ============= diff --git a/Documentation/virt/kvm/index.rst b/Documentation/virt/kvm/index.rst index e0a2c74e1043..ad13ec55ddfe 100644 --- a/Documentation/virt/kvm/index.rst +++ b/Documentation/virt/kvm/index.rst @@ -17,4 +17,5 @@ KVM locking vcpu-requests + halt-polling review-checklist diff --git a/Documentation/virt/kvm/x86/index.rst b/Documentation/virt/kvm/x86/index.rst index 7ff588826b9f..9ece6b8dc817 100644 --- a/Documentation/virt/kvm/x86/index.rst +++ b/Documentation/virt/kvm/x86/index.rst @@ -10,7 +10,6 @@ KVM for x86 systems amd-memory-encryption cpuid errata - halt-polling hypercalls mmu msr @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 1 SUBLEVEL = 0 -EXTRAVERSION = -rc7 +EXTRAVERSION = -rc8 NAME = Hurr durr I'ma ninja sloth # *DOCUMENTATION* diff --git a/arch/arm/boot/dts/at91rm9200.dtsi b/arch/arm/boot/dts/at91rm9200.dtsi index 7a113325abb9..6f9004ebf424 100644 --- a/arch/arm/boot/dts/at91rm9200.dtsi +++ b/arch/arm/boot/dts/at91rm9200.dtsi @@ -666,7 +666,7 @@ compatible = "atmel,at91rm9200-udc"; reg = <0xfffb0000 0x4000>; interrupts = <11 IRQ_TYPE_LEVEL_HIGH 2>; - clocks = <&pmc PMC_TYPE_PERIPHERAL 11>, <&pmc PMC_TYPE_SYSTEM 2>; + clocks = <&pmc PMC_TYPE_PERIPHERAL 11>, <&pmc PMC_TYPE_SYSTEM 1>; clock-names = "pclk", "hclk"; status = "disabled"; }; diff --git a/arch/arm/mach-at91/sama5.c b/arch/arm/mach-at91/sama5.c index 67ed68fbe3a5..bf2b5c6a18c6 100644 --- a/arch/arm/mach-at91/sama5.c +++ b/arch/arm/mach-at91/sama5.c @@ -26,7 +26,7 @@ static void sama5_l2c310_write_sec(unsigned long val, unsigned reg) static void __init sama5_secure_cache_init(void) { sam_secure_init(); - if (sam_linux_is_optee_available()) + if (IS_ENABLED(CONFIG_OUTER_CACHE) && sam_linux_is_optee_available()) outer_cache.write_sec = sama5_l2c310_write_sec; } diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index d6cf535d8352..439e2bc5d5d8 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -14,16 +14,8 @@ #ifdef CONFIG_EFI extern void efi_init(void); - -bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg); #else #define efi_init() - -static inline -bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg) -{ - return false; -} #endif int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S index 67babd5f04c2..75691a2641c1 100644 --- a/arch/arm64/kernel/efi-rt-wrapper.S +++ b/arch/arm64/kernel/efi-rt-wrapper.S @@ -6,7 +6,7 @@ #include <linux/linkage.h> SYM_FUNC_START(__efi_rt_asm_wrapper) - stp x29, x30, [sp, #-112]! + stp x29, x30, [sp, #-32]! mov x29, sp /* @@ -17,20 +17,6 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) stp x1, x18, [sp, #16] /* - * Preserve all callee saved registers and record the stack pointer - * value in a per-CPU variable so we can recover from synchronous - * exceptions occurring while running the firmware routines. - */ - stp x19, x20, [sp, #32] - stp x21, x22, [sp, #48] - stp x23, x24, [sp, #64] - stp x25, x26, [sp, #80] - stp x27, x28, [sp, #96] - - adr_this_cpu x8, __efi_rt_asm_recover_sp, x9 - str x29, [x8] - - /* * We are lucky enough that no EFI runtime services take more than * 5 arguments, so all are passed in registers rather than via the * stack. @@ -45,7 +31,7 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) ldp x1, x2, [sp, #16] cmp x2, x18 - ldp x29, x30, [sp], #112 + ldp x29, x30, [sp], #32 b.ne 0f ret 0: @@ -59,18 +45,3 @@ SYM_FUNC_START(__efi_rt_asm_wrapper) mov x18, x2 b efi_handle_corrupted_x18 // tail call SYM_FUNC_END(__efi_rt_asm_wrapper) - -SYM_FUNC_START(__efi_rt_asm_recover) - ldr_this_cpu x8, __efi_rt_asm_recover_sp, x9 - mov sp, x8 - - ldp x0, x18, [sp, #16] - ldp x19, x20, [sp, #32] - ldp x21, x22, [sp, #48] - ldp x23, x24, [sp, #64] - ldp x25, x26, [sp, #80] - ldp x27, x28, [sp, #96] - ldp x29, x30, [sp], #112 - - b efi_handle_runtime_exception -SYM_FUNC_END(__efi_rt_asm_recover) diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index ee53f2a0aa03..a908a37f0367 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -9,7 +9,6 @@ #include <linux/efi.h> #include <linux/init.h> -#include <linux/percpu.h> #include <asm/efi.h> @@ -145,28 +144,3 @@ asmlinkage efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f) pr_err_ratelimited(FW_BUG "register x18 corrupted by EFI %s\n", f); return s; } - -asmlinkage DEFINE_PER_CPU(u64, __efi_rt_asm_recover_sp); - -asmlinkage efi_status_t __efi_rt_asm_recover(void); - -asmlinkage efi_status_t efi_handle_runtime_exception(const char *f) -{ - pr_err(FW_BUG "Synchronous exception occurred in EFI runtime service %s()\n", f); - clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); - return EFI_ABORTED; -} - -bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg) -{ - /* Check whether the exception occurred while running the firmware */ - if (current_work() != &efi_rts_work.work || regs->pc >= TASK_SIZE_64) - return false; - - pr_err(FW_BUG "Unable to handle %s in EFI runtime service\n", msg); - add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK); - dump_stack(); - - regs->pc = (u64)__efi_rt_asm_recover; - return true; -} diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 3cb101e8cb29..5240f6acad64 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -36,7 +36,22 @@ void arch_dma_prep_coherent(struct page *page, size_t size) { unsigned long start = (unsigned long)page_address(page); - dcache_clean_poc(start, start + size); + /* + * The architecture only requires a clean to the PoC here in order to + * meet the requirements of the DMA API. However, some vendors (i.e. + * Qualcomm) abuse the DMA API for transferring buffers from the + * non-secure to the secure world, resetting the system if a non-secure + * access shows up after the buffer has been transferred: + * + * https://lore.kernel.org/r/[email protected] + * + * Using clean+invalidate appears to make this issue less likely, but + * the drivers themselves still need fixing as the CPU could issue a + * speculative read from the buffer via the linear mapping irrespective + * of the cache maintenance we use. Once the drivers are fixed, we can + * relax this to a clean operation. + */ + dcache_clean_inval_poc(start, start + size); } #ifdef CONFIG_IOMMU_DMA diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 3e9cf9826417..5b391490e045 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -30,7 +30,6 @@ #include <asm/bug.h> #include <asm/cmpxchg.h> #include <asm/cpufeature.h> -#include <asm/efi.h> #include <asm/exception.h> #include <asm/daifflags.h> #include <asm/debug-monitors.h> @@ -392,9 +391,6 @@ static void __do_kernel_fault(unsigned long addr, unsigned long esr, msg = "paging request"; } - if (efi_runtime_fixup_exception(regs, msg)) - return; - die_kernel_fault(msg, addr, esr, regs); } diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index aa0e0e0d4ee5..79d5bfd913e0 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -490,6 +490,7 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd) return pmd; } +#define pmd_young pmd_young static inline int pmd_young(pmd_t pmd) { return !!(pmd_val(pmd) & _PAGE_ACCESSED); diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h index 3dd172d9ffea..d82687390b4a 100644 --- a/arch/loongarch/include/asm/smp.h +++ b/arch/loongarch/include/asm/smp.h @@ -78,16 +78,6 @@ extern void calculate_cpu_foreign_map(void); */ extern void show_ipi_list(struct seq_file *p, int prec); -/* - * This function sends a 'reschedule' IPI to another CPU. - * it goes straight through and wastes no time serializing - * anything. Worst case is that we lose a reschedule ... - */ -static inline void smp_send_reschedule(int cpu) -{ - loongson_send_ipi_single(cpu, SMP_RESCHEDULE); -} - static inline void arch_send_call_function_single_ipi(int cpu) { loongson_send_ipi_single(cpu, SMP_CALL_FUNCTION); diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 6ed72f7ff278..14508d429ffa 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -149,6 +149,17 @@ void loongson_send_ipi_mask(const struct cpumask *mask, unsigned int action) ipi_write_action(cpu_logical_map(i), (u32)action); } +/* + * This function sends a 'reschedule' IPI to another CPU. + * it goes straight through and wastes no time serializing + * anything. Worst case is that we lose a reschedule ... + */ +void smp_send_reschedule(int cpu) +{ + loongson_send_ipi_single(cpu, SMP_RESCHEDULE); +} +EXPORT_SYMBOL_GPL(smp_send_reschedule); + irqreturn_t loongson_ipi_interrupt(int irq, void *dev) { unsigned int action; diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S index d8ee8fbc8c67..58781c6e4191 100644 --- a/arch/loongarch/mm/tlbex.S +++ b/arch/loongarch/mm/tlbex.S @@ -10,6 +10,8 @@ #include <asm/regdef.h> #include <asm/stackframe.h> +#define INVTLB_ADDR_GFALSE_AND_ASID 5 + #define PTRS_PER_PGD_BITS (PAGE_SHIFT - 3) #define PTRS_PER_PUD_BITS (PAGE_SHIFT - 3) #define PTRS_PER_PMD_BITS (PAGE_SHIFT - 3) @@ -136,13 +138,10 @@ tlb_huge_update_load: ori t0, ra, _PAGE_VALID st.d t0, t1, 0 #endif - tlbsrch - addu16i.d t1, zero, -(CSR_TLBIDX_EHINV >> 16) - addi.d ra, t1, 0 - csrxchg ra, t1, LOONGARCH_CSR_TLBIDX - tlbwr - - csrxchg zero, t1, LOONGARCH_CSR_TLBIDX + csrrd ra, LOONGARCH_CSR_ASID + csrrd t1, LOONGARCH_CSR_BADV + andi ra, ra, CSR_ASID_ASID + invtlb INVTLB_ADDR_GFALSE_AND_ASID, ra, t1 /* * A huge PTE describes an area the size of the @@ -287,13 +286,11 @@ tlb_huge_update_store: ori t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) st.d t0, t1, 0 #endif - tlbsrch - addu16i.d t1, zero, -(CSR_TLBIDX_EHINV >> 16) - addi.d ra, t1, 0 - csrxchg ra, t1, LOONGARCH_CSR_TLBIDX - tlbwr + csrrd ra, LOONGARCH_CSR_ASID + csrrd t1, LOONGARCH_CSR_BADV + andi ra, ra, CSR_ASID_ASID + invtlb INVTLB_ADDR_GFALSE_AND_ASID, ra, t1 - csrxchg zero, t1, LOONGARCH_CSR_TLBIDX /* * A huge PTE describes an area the size of the * configured huge page size. This is twice the @@ -436,6 +433,11 @@ tlb_huge_update_modify: ori t0, ra, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED) st.d t0, t1, 0 #endif + csrrd ra, LOONGARCH_CSR_ASID + csrrd t1, LOONGARCH_CSR_BADV + andi ra, ra, CSR_ASID_ASID + invtlb INVTLB_ADDR_GFALSE_AND_ASID, ra, t1 + /* * A huge PTE describes an area the size of the * configured huge page size. This is twice the @@ -466,7 +468,7 @@ tlb_huge_update_modify: addu16i.d t1, zero, (PS_HUGE_SIZE << (CSR_TLBIDX_PS_SHIFT - 16)) csrxchg t1, t0, LOONGARCH_CSR_TLBIDX - tlbwr + tlbfill /* Reset default page size */ addu16i.d t0, zero, (CSR_TLBIDX_PS >> 16) diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 6caec386ad2f..4678627673df 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -622,6 +622,7 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd) return pmd; } +#define pmd_young pmd_young static inline int pmd_young(pmd_t pmd) { return !!(pmd_val(pmd) & _PAGE_ACCESSED); diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 4745bb9998bd..6d8492b6e2b8 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -602,6 +602,7 @@ ____##func(struct pt_regs *regs) /* kernel/traps.c */ DECLARE_INTERRUPT_HANDLER_NMI(system_reset_exception); #ifdef CONFIG_PPC_BOOK3S_64 +DECLARE_INTERRUPT_HANDLER_RAW(machine_check_early_boot); DECLARE_INTERRUPT_HANDLER_ASYNC(machine_check_exception_async); #endif DECLARE_INTERRUPT_HANDLER_NMI(machine_check_exception); diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 43f1c76d48ce..a379b0ce19ff 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -113,23 +113,19 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) { int i; - /* First arg comes in as a 32 bits pointer. */ - EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_1), _R3)); - EMIT(PPC_RAW_LI(bpf_to_ppc(BPF_REG_1) - 1, 0)); + /* Initialize tail_call_cnt, to be skipped if we do tail calls. */ + EMIT(PPC_RAW_LI(_R4, 0)); + +#define BPF_TAILCALL_PROLOGUE_SIZE 4 + EMIT(PPC_RAW_STWU(_R1, _R1, -BPF_PPC_STACKFRAME(ctx))); - /* - * Initialize tail_call_cnt in stack frame if we do tail calls. - * Otherwise, put in NOPs so that it can be skipped when we are - * invoked through a tail call. - */ if (ctx->seen & SEEN_TAILCALL) - EMIT(PPC_RAW_STW(bpf_to_ppc(BPF_REG_1) - 1, _R1, - bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); - else - EMIT(PPC_RAW_NOP()); + EMIT(PPC_RAW_STW(_R4, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); -#define BPF_TAILCALL_PROLOGUE_SIZE 16 + /* First arg comes in as a 32 bits pointer. */ + EMIT(PPC_RAW_MR(bpf_to_ppc(BPF_REG_1), _R3)); + EMIT(PPC_RAW_LI(bpf_to_ppc(BPF_REG_1) - 1, 0)); /* * We need a stack frame, but we don't necessarily need to @@ -170,24 +166,24 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx for (i = BPF_PPC_NVR_MIN; i <= 31; i++) if (bpf_is_seen_register(ctx, i)) EMIT(PPC_RAW_LWZ(i, _R1, bpf_jit_stack_offsetof(ctx, i))); -} - -void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) -{ - EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_0))); - - bpf_jit_emit_common_epilogue(image, ctx); - - /* Tear down our stack frame */ if (ctx->seen & SEEN_FUNC) EMIT(PPC_RAW_LWZ(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); + /* Tear down our stack frame */ EMIT(PPC_RAW_ADDI(_R1, _R1, BPF_PPC_STACKFRAME(ctx))); if (ctx->seen & SEEN_FUNC) EMIT(PPC_RAW_MTLR(_R0)); +} + +void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) +{ + EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_0))); + + bpf_jit_emit_common_epilogue(image, ctx); + EMIT(PPC_RAW_BLR()); } @@ -244,7 +240,6 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o EMIT(PPC_RAW_RLWINM(_R3, b2p_index, 2, 0, 29)); EMIT(PPC_RAW_ADD(_R3, _R3, b2p_bpf_array)); EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_array, ptrs))); - EMIT(PPC_RAW_STW(_R0, _R1, bpf_jit_stack_offsetof(ctx, BPF_PPC_TC))); /* * if (prog == NULL) @@ -255,19 +250,14 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o /* goto *(prog->bpf_func + prologue_size); */ EMIT(PPC_RAW_LWZ(_R3, _R3, offsetof(struct bpf_prog, bpf_func))); - - if (ctx->seen & SEEN_FUNC) - EMIT(PPC_RAW_LWZ(_R0, _R1, BPF_PPC_STACKFRAME(ctx) + PPC_LR_STKOFF)); - EMIT(PPC_RAW_ADDIC(_R3, _R3, BPF_TAILCALL_PROLOGUE_SIZE)); - - if (ctx->seen & SEEN_FUNC) - EMIT(PPC_RAW_MTLR(_R0)); - EMIT(PPC_RAW_MTCTR(_R3)); EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_1))); + /* Put tail_call_cnt in r4 */ + EMIT(PPC_RAW_MR(_R4, _R0)); + /* tear restore NVRs, ... */ bpf_jit_emit_common_epilogue(image, ctx); diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index fa78595a6089..593cf09264d8 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -317,9 +317,9 @@ config SMP config NR_CPUS int "Maximum number of CPUs (2-512)" depends on SMP - range 2 512 if !SBI_V01 - range 2 32 if SBI_V01 && 32BIT - range 2 64 if SBI_V01 && 64BIT + range 2 512 if !RISCV_SBI_V01 + range 2 32 if RISCV_SBI_V01 && 32BIT + range 2 64 if RISCV_SBI_V01 && 64BIT default "32" if 32BIT default "64" if 64BIT diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h index 1b471ff73178..816e753de636 100644 --- a/arch/riscv/include/asm/asm.h +++ b/arch/riscv/include/asm/asm.h @@ -23,6 +23,7 @@ #define REG_L __REG_SEL(ld, lw) #define REG_S __REG_SEL(sd, sw) #define REG_SC __REG_SEL(sc.d, sc.w) +#define REG_AMOSWAP_AQ __REG_SEL(amoswap.d.aq, amoswap.w.aq) #define REG_ASM __REG_SEL(.dword, .word) #define SZREG __REG_SEL(8, 4) #define LGREG __REG_SEL(3, 2) diff --git a/arch/riscv/include/asm/efi.h b/arch/riscv/include/asm/efi.h index f74879a8f1ea..e229d7be4b66 100644 --- a/arch/riscv/include/asm/efi.h +++ b/arch/riscv/include/asm/efi.h @@ -10,6 +10,7 @@ #include <asm/mmu_context.h> #include <asm/ptrace.h> #include <asm/tlbflush.h> +#include <asm/pgalloc.h> #ifdef CONFIG_EFI extern void efi_init(void); @@ -20,7 +21,10 @@ extern void efi_init(void); int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); -#define arch_efi_call_virt_setup() efi_virtmap_load() +#define arch_efi_call_virt_setup() ({ \ + sync_kernel_mappings(efi_mm.pgd); \ + efi_virtmap_load(); \ + }) #define arch_efi_call_virt_teardown() efi_virtmap_unload() #define ARCH_EFI_IRQ_FLAGS_MASK (SR_IE | SR_SPIE) diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h index 947f23d7b6af..59dc12b5b7e8 100644 --- a/arch/riscv/include/asm/pgalloc.h +++ b/arch/riscv/include/asm/pgalloc.h @@ -127,6 +127,13 @@ static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d) #define __p4d_free_tlb(tlb, p4d, addr) p4d_free((tlb)->mm, p4d) #endif /* __PAGETABLE_PMD_FOLDED */ +static inline void sync_kernel_mappings(pgd_t *pgd) +{ + memcpy(pgd + USER_PTRS_PER_PGD, + init_mm.pgd + USER_PTRS_PER_PGD, + (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); +} + static inline pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *pgd; @@ -135,9 +142,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) if (likely(pgd != NULL)) { memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); /* Copy kernel mappings */ - memcpy(pgd + USER_PTRS_PER_PGD, - init_mm.pgd + USER_PTRS_PER_PGD, - (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); + sync_kernel_mappings(pgd); } return pgd; } diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 7ec936910a96..92ec2d9d7273 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -600,6 +600,7 @@ static inline int pmd_dirty(pmd_t pmd) return pte_dirty(pmd_pte(pmd)); } +#define pmd_young pmd_young static inline int pmd_young(pmd_t pmd) { return pte_young(pmd_pte(pmd)); diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h index d3443be7eedc..3831b638ecab 100644 --- a/arch/riscv/include/asm/smp.h +++ b/arch/riscv/include/asm/smp.h @@ -50,6 +50,9 @@ void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops); /* Clear IPI for current CPU */ void riscv_clear_ipi(void); +/* Check other CPUs stop or not */ +bool smp_crash_stop_failed(void); + /* Secondary hart entry */ asmlinkage void smp_callin(void); diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index b9eda3fcbd6d..186abd146eaf 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -404,6 +404,19 @@ handle_syscall_trace_exit: #ifdef CONFIG_VMAP_STACK handle_kernel_stack_overflow: + /* + * Takes the psuedo-spinlock for the shadow stack, in case multiple + * harts are concurrently overflowing their kernel stacks. We could + * store any value here, but since we're overflowing the kernel stack + * already we only have SP to use as a scratch register. So we just + * swap in the address of the spinlock, as that's definately non-zero. + * + * Pairs with a store_release in handle_bad_stack(). + */ +1: la sp, spin_shadow_stack + REG_AMOSWAP_AQ sp, sp, (sp) + bnez sp, 1b + la sp, shadow_stack addi sp, sp, SHADOW_OVERFLOW_STACK_SIZE diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c index ee79e6839b86..2d139b724bc8 100644 --- a/arch/riscv/kernel/machine_kexec.c +++ b/arch/riscv/kernel/machine_kexec.c @@ -15,6 +15,8 @@ #include <linux/compiler.h> /* For unreachable() */ #include <linux/cpu.h> /* For cpu_down() */ #include <linux/reboot.h> +#include <linux/interrupt.h> +#include <linux/irq.h> /* * kexec_image_info - Print received image details @@ -138,20 +140,35 @@ void machine_shutdown(void) #endif } -/* Override the weak function in kernel/panic.c */ -void crash_smp_send_stop(void) +static void machine_kexec_mask_interrupts(void) { - static int cpus_stopped; + unsigned int i; + struct irq_desc *desc; - /* - * This function can be called twice in panic path, but obviously - * we execute this only once. - */ - if (cpus_stopped) - return; + for_each_irq_desc(i, desc) { + struct irq_chip *chip; + int ret; + + chip = irq_desc_get_chip(desc); + if (!chip) + continue; + + /* + * First try to remove the active state. If this + * fails, try to EOI the interrupt. + */ + ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false); + + if (ret && irqd_irq_inprogress(&desc->irq_data) && + chip->irq_eoi) + chip->irq_eoi(&desc->irq_data); - smp_send_stop(); - cpus_stopped = 1; + if (chip->irq_mask) + chip->irq_mask(&desc->irq_data); + + if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) + chip->irq_disable(&desc->irq_data); + } } /* @@ -169,6 +186,8 @@ machine_crash_shutdown(struct pt_regs *regs) crash_smp_send_stop(); crash_save_cpu(regs, smp_processor_id()); + machine_kexec_mask_interrupts(); + pr_info("Starting crashdump kernel...\n"); } @@ -195,6 +214,11 @@ machine_kexec(struct kimage *image) void *control_code_buffer = page_address(image->control_code_page); riscv_kexec_method kexec_method = NULL; +#ifdef CONFIG_SMP + WARN(smp_crash_stop_failed(), + "Some CPUs may be stale, kdump will be unreliable.\n"); +#endif + if (image->type != KEXEC_TYPE_CRASH) kexec_method = control_code_buffer; else diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 67ec1fadcfe2..86acd690d529 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -322,10 +322,11 @@ subsys_initcall(topology_init); void free_initmem(void) { - if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) - set_kernel_memory(lm_alias(__init_begin), lm_alias(__init_end), - IS_ENABLED(CONFIG_64BIT) ? - set_memory_rw : set_memory_rw_nx); + if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) { + set_kernel_memory(lm_alias(__init_begin), lm_alias(__init_end), set_memory_rw_nx); + if (IS_ENABLED(CONFIG_64BIT)) + set_kernel_memory(__init_begin, __init_end, set_memory_nx); + } free_initmem_default(POISON_FREE_INITMEM); } diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index 760a64518c58..8c3b59f1f9b8 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -12,6 +12,7 @@ #include <linux/clockchips.h> #include <linux/interrupt.h> #include <linux/module.h> +#include <linux/kexec.h> #include <linux/profile.h> #include <linux/smp.h> #include <linux/sched.h> @@ -22,11 +23,13 @@ #include <asm/sbi.h> #include <asm/tlbflush.h> #include <asm/cacheflush.h> +#include <asm/cpu_ops.h> enum ipi_message_type { IPI_RESCHEDULE, IPI_CALL_FUNC, IPI_CPU_STOP, + IPI_CPU_CRASH_STOP, IPI_IRQ_WORK, IPI_TIMER, IPI_MAX @@ -71,6 +74,32 @@ static void ipi_stop(void) wait_for_interrupt(); } +#ifdef CONFIG_KEXEC_CORE +static atomic_t waiting_for_crash_ipi = ATOMIC_INIT(0); + +static inline void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs) +{ + crash_save_cpu(regs, cpu); + + atomic_dec(&waiting_for_crash_ipi); + + local_irq_disable(); + +#ifdef CONFIG_HOTPLUG_CPU + if (cpu_has_hotplug(cpu)) + cpu_ops[cpu]->cpu_stop(); +#endif + + for(;;) + wait_for_interrupt(); +} +#else +static inline void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs) +{ + unreachable(); +} +#endif + static const struct riscv_ipi_ops *ipi_ops __ro_after_init; void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops) @@ -124,8 +153,9 @@ void arch_irq_work_raise(void) void handle_IPI(struct pt_regs *regs) { - unsigned long *pending_ipis = &ipi_data[smp_processor_id()].bits; - unsigned long *stats = ipi_data[smp_processor_id()].stats; + unsigned int cpu = smp_processor_id(); + unsigned long *pending_ipis = &ipi_data[cpu].bits; + unsigned long *stats = ipi_data[cpu].stats; riscv_clear_ipi(); @@ -154,6 +184,10 @@ void handle_IPI(struct pt_regs *regs) ipi_stop(); } + if (ops & (1 << IPI_CPU_CRASH_STOP)) { + ipi_cpu_crash_stop(cpu, get_irq_regs()); + } + if (ops & (1 << IPI_IRQ_WORK)) { stats[IPI_IRQ_WORK]++; irq_work_run(); @@ -176,6 +210,7 @@ static const char * const ipi_names[] = { [IPI_RESCHEDULE] = "Rescheduling interrupts", [IPI_CALL_FUNC] = "Function call interrupts", [IPI_CPU_STOP] = "CPU stop interrupts", + [IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts", [IPI_IRQ_WORK] = "IRQ work interrupts", [IPI_TIMER] = "Timer broadcast interrupts", }; @@ -235,6 +270,64 @@ void smp_send_stop(void) cpumask_pr_args(cpu_online_mask)); } +#ifdef CONFIG_KEXEC_CORE +/* + * The number of CPUs online, not counting this CPU (which may not be + * fully online and so not counted in num_online_cpus()). + */ +static inline unsigned int num_other_online_cpus(void) +{ + unsigned int this_cpu_online = cpu_online(smp_processor_id()); + + return num_online_cpus() - this_cpu_online; +} + +void crash_smp_send_stop(void) +{ + static int cpus_stopped; + cpumask_t mask; + unsigned long timeout; + + /* + * This function can be called twice in panic path, but obviously + * we execute this only once. + */ + if (cpus_stopped) + return; + + cpus_stopped = 1; + + /* + * If this cpu is the only one alive at this point in time, online or + * not, there are no stop messages to be sent around, so just back out. + */ + if (num_other_online_cpus() == 0) + return; + + cpumask_copy(&mask, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), &mask); + + atomic_set(&waiting_for_crash_ipi, num_other_online_cpus()); + + pr_crit("SMP: stopping secondary CPUs\n"); + send_ipi_mask(&mask, IPI_CPU_CRASH_STOP); + + /* Wait up to one second for other CPUs to stop */ + timeout = USEC_PER_SEC; + while ((atomic_read(&waiting_for_crash_ipi) > 0) && timeout--) + udelay(1); + + if (atomic_read(&waiting_for_crash_ipi) > 0) + pr_warn("SMP: failed to stop secondary CPUs %*pbl\n", + cpumask_pr_args(&mask)); +} + +bool smp_crash_stop_failed(void) +{ + return (atomic_read(&waiting_for_crash_ipi) > 0); +} +#endif + void smp_send_reschedule(int cpu) { send_ipi_single(cpu, IPI_RESCHEDULE); diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index f3e96d60a2ff..7abd8e4c4df6 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -221,11 +221,29 @@ asmlinkage unsigned long get_overflow_stack(void) OVERFLOW_STACK_SIZE; } +/* + * A pseudo spinlock to protect the shadow stack from being used by multiple + * harts concurrently. This isn't a real spinlock because the lock side must + * be taken without a valid stack and only a single register, it's only taken + * while in the process of panicing anyway so the performance and error + * checking a proper spinlock gives us doesn't matter. + */ +unsigned long spin_shadow_stack; + asmlinkage void handle_bad_stack(struct pt_regs *regs) { unsigned long tsk_stk = (unsigned long)current->stack; unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack); + /* + * We're done with the shadow stack by this point, as we're on the + * overflow stack. Tell any other concurrent overflowing harts that + * they can proceed with panicing by releasing the pseudo-spinlock. + * + * This pairs with an amoswap.aq in handle_kernel_stack_overflow. + */ + smp_store_release(&spin_shadow_stack, 0); + console_verbose(); pr_emerg("Insufficient stack space to handle exception!\n"); diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index db6548509bb3..06e6b27f3bcc 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -17,6 +17,7 @@ vdso-syms += flush_icache obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o ccflags-y := -fno-stack-protector +ccflags-y += -DDISABLE_BRANCH_PROFILING ifneq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index f1cb9391190d..11e901286414 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -763,6 +763,7 @@ static inline int pmd_dirty(pmd_t pmd) return (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0; } +#define pmd_young pmd_young static inline int pmd_young(pmd_t pmd) { return (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0; diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 94138f8f0c1c..ace2541ababd 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -546,8 +546,10 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_CEI)) scb_s->eca |= scb_o->eca & ECA_CEI; /* Epoch Extension */ - if (test_kvm_facility(vcpu->kvm, 139)) + if (test_kvm_facility(vcpu->kvm, 139)) { scb_s->ecd |= scb_o->ecd & ECD_MEF; + scb_s->epdx = scb_o->epdx; + } /* etoken */ if (test_kvm_facility(vcpu->kvm, 156)) diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index a779418ceba9..3bc9736bddb1 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -693,6 +693,7 @@ static inline unsigned long pmd_dirty(pmd_t pmd) return pte_dirty(pte); } +#define pmd_young pmd_young static inline unsigned long pmd_young(pmd_t pmd) { pte_t pte = __pte(pmd_val(pmd)); diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index c936ce9f0c47..dfdb103ae4f6 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -321,7 +321,7 @@ static inline void indirect_branch_prediction_barrier(void) /* The Intel SPEC CTRL MSR base value cache */ extern u64 x86_spec_ctrl_base; DECLARE_PER_CPU(u64, x86_spec_ctrl_current); -extern void write_spec_ctrl_current(u64 val, bool force); +extern void update_spec_ctrl_cond(u64 val); extern u64 spec_ctrl_current(void); /* diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 5059799bebe3..286a71810f9e 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -139,6 +139,7 @@ static inline int pmd_dirty(pmd_t pmd) return pmd_flags(pmd) & _PAGE_DIRTY; } +#define pmd_young pmd_young static inline int pmd_young(pmd_t pmd) { return pmd_flags(pmd) & _PAGE_ACCESSED; @@ -1438,6 +1439,14 @@ static inline bool arch_has_hw_pte_young(void) return true; } +#ifdef CONFIG_XEN_PV +#define arch_has_hw_nonleaf_pmd_young arch_has_hw_nonleaf_pmd_young +static inline bool arch_has_hw_nonleaf_pmd_young(void) +{ + return !cpu_feature_enabled(X86_FEATURE_XENPV); +} +#endif + #ifdef CONFIG_PAGE_TABLE_CHECK static inline bool pte_user_accessible_page(pte_t pte) { diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 3e3230cccaa7..6daf84229548 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -60,11 +60,18 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_current); static DEFINE_MUTEX(spec_ctrl_mutex); +/* Update SPEC_CTRL MSR and its cached copy unconditionally */ +static void update_spec_ctrl(u64 val) +{ + this_cpu_write(x86_spec_ctrl_current, val); + wrmsrl(MSR_IA32_SPEC_CTRL, val); +} + /* * Keep track of the SPEC_CTRL MSR value for the current task, which may differ * from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update(). */ -void write_spec_ctrl_current(u64 val, bool force) +void update_spec_ctrl_cond(u64 val) { if (this_cpu_read(x86_spec_ctrl_current) == val) return; @@ -75,7 +82,7 @@ void write_spec_ctrl_current(u64 val, bool force) * When KERNEL_IBRS this MSR is written on return-to-user, unless * forced the update can be delayed until that time. */ - if (force || !cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) + if (!cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS)) wrmsrl(MSR_IA32_SPEC_CTRL, val); } @@ -1328,7 +1335,7 @@ static void __init spec_ctrl_disable_kernel_rrsba(void) if (ia32_cap & ARCH_CAP_RRSBA) { x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S; - write_spec_ctrl_current(x86_spec_ctrl_base, true); + update_spec_ctrl(x86_spec_ctrl_base); } } @@ -1450,7 +1457,7 @@ static void __init spectre_v2_select_mitigation(void) if (spectre_v2_in_ibrs_mode(mode)) { x86_spec_ctrl_base |= SPEC_CTRL_IBRS; - write_spec_ctrl_current(x86_spec_ctrl_base, true); + update_spec_ctrl(x86_spec_ctrl_base); } switch (mode) { @@ -1564,7 +1571,7 @@ static void __init spectre_v2_select_mitigation(void) static void update_stibp_msr(void * __unused) { u64 val = spec_ctrl_current() | (x86_spec_ctrl_base & SPEC_CTRL_STIBP); - write_spec_ctrl_current(val, true); + update_spec_ctrl(val); } /* Update x86_spec_ctrl_base in case SMT state changed. */ @@ -1797,7 +1804,7 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void) x86_amd_ssb_disable(); } else { x86_spec_ctrl_base |= SPEC_CTRL_SSBD; - write_spec_ctrl_current(x86_spec_ctrl_base, true); + update_spec_ctrl(x86_spec_ctrl_base); } } @@ -2048,7 +2055,7 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) void x86_spec_ctrl_setup_ap(void) { if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) - write_spec_ctrl_current(x86_spec_ctrl_base, true); + update_spec_ctrl(x86_spec_ctrl_base); if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) x86_amd_ssb_disable(); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c21b7347a26d..e436c9c1ef3b 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -600,7 +600,7 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, } if (updmsr) - write_spec_ctrl_current(msr, false); + update_spec_ctrl_cond(msr); } static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2835bd796639..69227f77b201 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10574,8 +10574,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN; vcpu->mmio_needed = 0; r = 0; + goto out; } - goto out; } if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) { /* Page is swapped out. Do synthetic halt */ diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c index 23f49a2f4d14..6cceca64a6bc 100644 --- a/drivers/acpi/numa/hmat.c +++ b/drivers/acpi/numa/hmat.c @@ -562,17 +562,26 @@ static int initiator_cmp(void *priv, const struct list_head *a, { struct memory_initiator *ia; struct memory_initiator *ib; - unsigned long *p_nodes = priv; ia = list_entry(a, struct memory_initiator, node); ib = list_entry(b, struct memory_initiator, node); - set_bit(ia->processor_pxm, p_nodes); - set_bit(ib->processor_pxm, p_nodes); - return ia->processor_pxm - ib->processor_pxm; } +static int initiators_to_nodemask(unsigned long *p_nodes) +{ + struct memory_initiator *initiator; + + if (list_empty(&initiators)) + return -ENXIO; + + list_for_each_entry(initiator, &initiators, node) + set_bit(initiator->processor_pxm, p_nodes); + + return 0; +} + static void hmat_register_target_initiators(struct memory_target *target) { static DECLARE_BITMAP(p_nodes, MAX_NUMNODES); @@ -609,7 +618,10 @@ static void hmat_register_target_initiators(struct memory_target *target) * initiators. */ bitmap_zero(p_nodes, MAX_NUMNODES); - list_sort(p_nodes, &initiators, initiator_cmp); + list_sort(NULL, &initiators, initiator_cmp); + if (initiators_to_nodemask(p_nodes) < 0) + return; + if (!access0done) { for (i = WRITE_LATENCY; i <= READ_BANDWIDTH; i++) { loc = localities_types[i]; @@ -643,8 +655,9 @@ static void hmat_register_target_initiators(struct memory_target *target) /* Access 1 ignores Generic Initiators */ bitmap_zero(p_nodes, MAX_NUMNODES); - list_sort(p_nodes, &initiators, initiator_cmp); - best = 0; + if (initiators_to_nodemask(p_nodes) < 0) + return; + for (i = WRITE_LATENCY; i <= READ_BANDWIDTH; i++) { loc = localities_types[i]; if (!loc) diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c index ddf17e2d266c..b9e336bacf17 100644 --- a/drivers/ata/libahci_platform.c +++ b/drivers/ata/libahci_platform.c @@ -109,7 +109,7 @@ struct clk *ahci_platform_find_clk(struct ahci_host_priv *hpriv, const char *con int i; for (i = 0; i < hpriv->n_clks; i++) { - if (!strcmp(hpriv->clks[i].id, con_id)) + if (hpriv->clks[i].id && !strcmp(hpriv->clks[i].id, con_id)) return hpriv->clks[i].clk; } diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 271963805a38..f05018988a17 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -2056,6 +2056,11 @@ static int btusb_setup_csr(struct hci_dev *hdev) rp = (struct hci_rp_read_local_version *)skb->data; + bt_dev_info(hdev, "CSR: Setting up dongle with HCI ver=%u rev=%04x; LMP ver=%u subver=%04x; manufacturer=%u", + le16_to_cpu(rp->hci_ver), le16_to_cpu(rp->hci_rev), + le16_to_cpu(rp->lmp_ver), le16_to_cpu(rp->lmp_subver), + le16_to_cpu(rp->manufacturer)); + /* Detect a wide host of Chinese controllers that aren't CSR. * * Known fake bcdDevices: 0x0100, 0x0134, 0x1915, 0x2520, 0x7558, 0x8891 @@ -2118,6 +2123,7 @@ static int btusb_setup_csr(struct hci_dev *hdev) * without these the controller will lock up. */ set_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks); + set_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks); set_bit(HCI_QUIRK_BROKEN_FILTER_CLEAR_ALL, &hdev->quirks); set_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks); diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c index 1621ce818705..d69905233aff 100644 --- a/drivers/char/tpm/tpm-interface.c +++ b/drivers/char/tpm/tpm-interface.c @@ -401,13 +401,14 @@ int tpm_pm_suspend(struct device *dev) !pm_suspend_via_firmware()) goto suspended; - if (!tpm_chip_start(chip)) { + rc = tpm_try_get_ops(chip); + if (!rc) { if (chip->flags & TPM_CHIP_FLAG_TPM2) tpm2_shutdown(chip, TPM2_SU_STATE); else rc = tpm1_pm_suspend(chip, tpm_suspend_pcr); - tpm_chip_stop(chip); + tpm_put_ops(chip); } suspended: diff --git a/drivers/clk/at91/at91rm9200.c b/drivers/clk/at91/at91rm9200.c index b174f727a8ef..16870943a13e 100644 --- a/drivers/clk/at91/at91rm9200.c +++ b/drivers/clk/at91/at91rm9200.c @@ -40,7 +40,7 @@ static const struct clk_pll_characteristics rm9200_pll_characteristics = { }; static const struct sck at91rm9200_systemck[] = { - { .n = "udpck", .p = "usbck", .id = 2 }, + { .n = "udpck", .p = "usbck", .id = 1 }, { .n = "uhpck", .p = "usbck", .id = 4 }, { .n = "pck0", .p = "prog0", .id = 8 }, { .n = "pck1", .p = "prog1", .id = 9 }, diff --git a/drivers/clk/qcom/gcc-sc8280xp.c b/drivers/clk/qcom/gcc-sc8280xp.c index a18ed88f3b82..b3198784e1c3 100644 --- a/drivers/clk/qcom/gcc-sc8280xp.c +++ b/drivers/clk/qcom/gcc-sc8280xp.c @@ -5364,6 +5364,8 @@ static struct clk_branch gcc_ufs_1_card_clkref_clk = { .enable_mask = BIT(0), .hw.init = &(const struct clk_init_data) { .name = "gcc_ufs_1_card_clkref_clk", + .parent_data = &gcc_parent_data_tcxo, + .num_parents = 1, .ops = &clk_branch2_ops, }, }, @@ -5432,6 +5434,8 @@ static struct clk_branch gcc_ufs_card_clkref_clk = { .enable_mask = BIT(0), .hw.init = &(const struct clk_init_data) { .name = "gcc_ufs_card_clkref_clk", + .parent_data = &gcc_parent_data_tcxo, + .num_parents = 1, .ops = &clk_branch2_ops, }, }, @@ -5848,6 +5852,8 @@ static struct clk_branch gcc_ufs_ref_clkref_clk = { .enable_mask = BIT(0), .hw.init = &(const struct clk_init_data) { .name = "gcc_ufs_ref_clkref_clk", + .parent_data = &gcc_parent_data_tcxo, + .num_parents = 1, .ops = &clk_branch2_ops, }, }, diff --git a/drivers/clk/qcom/gdsc.c b/drivers/clk/qcom/gdsc.c index 7cf5e130e92f..0f21a8a767ac 100644 --- a/drivers/clk/qcom/gdsc.c +++ b/drivers/clk/qcom/gdsc.c @@ -11,7 +11,6 @@ #include <linux/kernel.h> #include <linux/ktime.h> #include <linux/pm_domain.h> -#include <linux/pm_runtime.h> #include <linux/regmap.h> #include <linux/regulator/consumer.h> #include <linux/reset-controller.h> @@ -56,22 +55,6 @@ enum gdsc_status { GDSC_ON }; -static int gdsc_pm_runtime_get(struct gdsc *sc) -{ - if (!sc->dev) - return 0; - - return pm_runtime_resume_and_get(sc->dev); -} - -static int gdsc_pm_runtime_put(struct gdsc *sc) -{ - if (!sc->dev) - return 0; - - return pm_runtime_put_sync(sc->dev); -} - /* Returns 1 if GDSC status is status, 0 if not, and < 0 on error */ static int gdsc_check_status(struct gdsc *sc, enum gdsc_status status) { @@ -271,8 +254,9 @@ static void gdsc_retain_ff_on(struct gdsc *sc) regmap_update_bits(sc->regmap, sc->gdscr, mask, mask); } -static int _gdsc_enable(struct gdsc *sc) +static int gdsc_enable(struct generic_pm_domain *domain) { + struct gdsc *sc = domain_to_gdsc(domain); int ret; if (sc->pwrsts == PWRSTS_ON) @@ -328,22 +312,11 @@ static int _gdsc_enable(struct gdsc *sc) return 0; } -static int gdsc_enable(struct generic_pm_domain *domain) +static int gdsc_disable(struct generic_pm_domain *domain) { struct gdsc *sc = domain_to_gdsc(domain); int ret; - ret = gdsc_pm_runtime_get(sc); - if (ret) - return ret; - - return _gdsc_enable(sc); -} - -static int _gdsc_disable(struct gdsc *sc) -{ - int ret; - if (sc->pwrsts == PWRSTS_ON) return gdsc_assert_reset(sc); @@ -388,18 +361,6 @@ static int _gdsc_disable(struct gdsc *sc) return 0; } -static int gdsc_disable(struct generic_pm_domain *domain) -{ - struct gdsc *sc = domain_to_gdsc(domain); - int ret; - - ret = _gdsc_disable(sc); - - gdsc_pm_runtime_put(sc); - - return ret; -} - static int gdsc_init(struct gdsc *sc) { u32 mask, val; @@ -447,11 +408,6 @@ static int gdsc_init(struct gdsc *sc) return ret; } - /* ...and the power-domain */ - ret = gdsc_pm_runtime_get(sc); - if (ret) - goto err_disable_supply; - /* * Votable GDSCs can be ON due to Vote from other masters. * If a Votable GDSC is ON, make sure we have a Vote. @@ -459,14 +415,14 @@ static int gdsc_init(struct gdsc *sc) if (sc->flags & VOTABLE) { ret = gdsc_update_collapse_bit(sc, false); if (ret) - goto err_put_rpm; + goto err_disable_supply; } /* Turn on HW trigger mode if supported */ if (sc->flags & HW_CTRL) { ret = gdsc_hwctrl(sc, true); if (ret < 0) - goto err_put_rpm; + goto err_disable_supply; } /* @@ -496,13 +452,10 @@ static int gdsc_init(struct gdsc *sc) ret = pm_genpd_init(&sc->pd, NULL, !on); if (ret) - goto err_put_rpm; + goto err_disable_supply; return 0; -err_put_rpm: - if (on) - gdsc_pm_runtime_put(sc); err_disable_supply: if (on && sc->rsupply) regulator_disable(sc->rsupply); @@ -541,8 +494,6 @@ int gdsc_register(struct gdsc_desc *desc, for (i = 0; i < num; i++) { if (!scs[i]) continue; - if (pm_runtime_enabled(dev)) - scs[i]->dev = dev; scs[i]->regmap = regmap; scs[i]->rcdev = rcdev; ret = gdsc_init(scs[i]); diff --git a/drivers/clk/qcom/gdsc.h b/drivers/clk/qcom/gdsc.h index 981a12c8502d..803512688336 100644 --- a/drivers/clk/qcom/gdsc.h +++ b/drivers/clk/qcom/gdsc.h @@ -30,7 +30,6 @@ struct reset_controller_dev; * @resets: ids of resets associated with this gdsc * @reset_count: number of @resets * @rcdev: reset controller - * @dev: the device holding the GDSC, used for pm_runtime calls */ struct gdsc { struct generic_pm_domain pd; @@ -74,7 +73,6 @@ struct gdsc { const char *supply; struct regulator *rsupply; - struct device *dev; }; struct gdsc_desc { diff --git a/drivers/clk/samsung/clk-exynos-clkout.c b/drivers/clk/samsung/clk-exynos-clkout.c index 273f77d54dab..e6d6cbf8c4e6 100644 --- a/drivers/clk/samsung/clk-exynos-clkout.c +++ b/drivers/clk/samsung/clk-exynos-clkout.c @@ -81,17 +81,19 @@ MODULE_DEVICE_TABLE(of, exynos_clkout_ids); static int exynos_clkout_match_parent_dev(struct device *dev, u32 *mux_mask) { const struct exynos_clkout_variant *variant; + const struct of_device_id *match; if (!dev->parent) { dev_err(dev, "not instantiated from MFD\n"); return -EINVAL; } - variant = of_device_get_match_data(dev->parent); - if (!variant) { + match = of_match_device(exynos_clkout_ids, dev->parent); + if (!match) { dev_err(dev, "cannot match parent device\n"); return -EINVAL; } + variant = match->data; *mux_mask = variant->mux_mask; diff --git a/drivers/clk/samsung/clk-exynos7885.c b/drivers/clk/samsung/clk-exynos7885.c index 62ce6814f141..0d2a950ed184 100644 --- a/drivers/clk/samsung/clk-exynos7885.c +++ b/drivers/clk/samsung/clk-exynos7885.c @@ -231,7 +231,7 @@ static const struct samsung_div_clock top_div_clks[] __initconst = { CLK_CON_DIV_PLL_SHARED0_DIV2, 0, 1), DIV(CLK_DOUT_SHARED0_DIV3, "dout_shared0_div3", "fout_shared0_pll", CLK_CON_DIV_PLL_SHARED0_DIV3, 0, 2), - DIV(CLK_DOUT_SHARED0_DIV4, "dout_shared0_div4", "fout_shared0_pll", + DIV(CLK_DOUT_SHARED0_DIV4, "dout_shared0_div4", "dout_shared0_div2", CLK_CON_DIV_PLL_SHARED0_DIV4, 0, 1), DIV(CLK_DOUT_SHARED0_DIV5, "dout_shared0_div5", "fout_shared0_pll", CLK_CON_DIV_PLL_SHARED0_DIV5, 0, 3), @@ -239,7 +239,7 @@ static const struct samsung_div_clock top_div_clks[] __initconst = { CLK_CON_DIV_PLL_SHARED1_DIV2, 0, 1), DIV(CLK_DOUT_SHARED1_DIV3, "dout_shared1_div3", "fout_shared1_pll", CLK_CON_DIV_PLL_SHARED1_DIV3, 0, 2), - DIV(CLK_DOUT_SHARED1_DIV4, "dout_shared1_div4", "fout_shared1_pll", + DIV(CLK_DOUT_SHARED1_DIV4, "dout_shared1_div4", "dout_shared1_div2", CLK_CON_DIV_PLL_SHARED1_DIV4, 0, 1), /* CORE */ diff --git a/drivers/clocksource/timer-riscv.c b/drivers/clocksource/timer-riscv.c index 969a552da8d2..a0d66fabf073 100644 --- a/drivers/clocksource/timer-riscv.c +++ b/drivers/clocksource/timer-riscv.c @@ -51,7 +51,7 @@ static int riscv_clock_next_event(unsigned long delta, static unsigned int riscv_clock_event_irq; static DEFINE_PER_CPU(struct clock_event_device, riscv_clock_event) = { .name = "riscv_timer_clockevent", - .features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP, + .features = CLOCK_EVT_FEAT_ONESHOT, .rating = 100, .set_next_event = riscv_clock_next_event, }; diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c index 97086fab698e..903325aac991 100644 --- a/drivers/dax/hmem/device.c +++ b/drivers/dax/hmem/device.c @@ -8,6 +8,13 @@ static bool nohmem; module_param_named(disable, nohmem, bool, 0444); +static struct resource hmem_active = { + .name = "HMEM devices", + .start = 0, + .end = -1, + .flags = IORESOURCE_MEM, +}; + void hmem_register_device(int target_nid, struct resource *r) { /* define a clean / non-busy resource for the platform device */ @@ -41,6 +48,12 @@ void hmem_register_device(int target_nid, struct resource *r) goto out_pdev; } + if (!__request_region(&hmem_active, res.start, resource_size(&res), + dev_name(&pdev->dev), 0)) { + dev_dbg(&pdev->dev, "hmem range %pr already active\n", &res); + goto out_active; + } + pdev->dev.numa_node = numa_map_to_online_node(target_nid); info = (struct memregion_info) { .target_node = target_nid, @@ -66,6 +79,8 @@ void hmem_register_device(int target_nid, struct resource *r) return; out_resource: + __release_region(&hmem_active, res.start, resource_size(&res)); +out_active: platform_device_put(pdev); out_pdev: memregion_free(id); @@ -73,15 +88,6 @@ out_pdev: static __init int hmem_register_one(struct resource *res, void *data) { - /* - * If the resource is not a top-level resource it was already - * assigned to a device by the HMAT parsing. - */ - if (res->parent != &iomem_resource) { - pr_info("HMEM: skip %pr, already claimed\n", res); - return 0; - } - hmem_register_device(phys_to_target_node(res->start), res); return 0; diff --git a/drivers/gpio/gpio-amd8111.c b/drivers/gpio/gpio-amd8111.c index 14e6b3e64add..6f3ded619c8b 100644 --- a/drivers/gpio/gpio-amd8111.c +++ b/drivers/gpio/gpio-amd8111.c @@ -226,7 +226,10 @@ found: ioport_unmap(gp.pm); goto out; } + return 0; + out: + pci_dev_put(pdev); return err; } @@ -234,6 +237,7 @@ static void __exit amd_gpio_exit(void) { gpiochip_remove(&gp.chip); ioport_unmap(gp.pm); + pci_dev_put(gp.pdev); } module_init(amd_gpio_init); diff --git a/drivers/gpio/gpio-rockchip.c b/drivers/gpio/gpio-rockchip.c index 870910bb9dd3..200e43a6f4b4 100644 --- a/drivers/gpio/gpio-rockchip.c +++ b/drivers/gpio/gpio-rockchip.c @@ -610,6 +610,7 @@ static int rockchip_gpiolib_register(struct rockchip_pin_bank *bank) return -ENODATA; pctldev = of_pinctrl_get(pctlnp); + of_node_put(pctlnp); if (!pctldev) return -ENODEV; diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 4756ea08894f..a70522aef355 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -526,12 +526,13 @@ static int gpiochip_setup_dev(struct gpio_device *gdev) if (ret) return ret; + /* From this point, the .release() function cleans up gpio_device */ + gdev->dev.release = gpiodevice_release; + ret = gpiochip_sysfs_register(gdev); if (ret) goto err_remove_device; - /* From this point, the .release() function cleans up gpio_device */ - gdev->dev.release = gpiodevice_release; dev_dbg(&gdev->dev, "registered GPIOs %d to %d on %s\n", gdev->base, gdev->base + gdev->ngpio - 1, gdev->chip->label ? : "generic"); @@ -597,10 +598,10 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, struct fwnode_handle *fwnode = NULL; struct gpio_device *gdev; unsigned long flags; - int base = gc->base; unsigned int i; + u32 ngpios = 0; + int base = 0; int ret = 0; - u32 ngpios; if (gc->fwnode) fwnode = gc->fwnode; @@ -647,17 +648,12 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, else gdev->owner = THIS_MODULE; - gdev->descs = kcalloc(gc->ngpio, sizeof(gdev->descs[0]), GFP_KERNEL); - if (!gdev->descs) { - ret = -ENOMEM; - goto err_free_dev_name; - } - /* * Try the device properties if the driver didn't supply the number * of GPIO lines. */ - if (gc->ngpio == 0) { + ngpios = gc->ngpio; + if (ngpios == 0) { ret = device_property_read_u32(&gdev->dev, "ngpios", &ngpios); if (ret == -ENODATA) /* @@ -668,7 +664,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, */ ngpios = 0; else if (ret) - goto err_free_descs; + goto err_free_dev_name; gc->ngpio = ngpios; } @@ -676,13 +672,19 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, if (gc->ngpio == 0) { chip_err(gc, "tried to insert a GPIO chip with zero lines\n"); ret = -EINVAL; - goto err_free_descs; + goto err_free_dev_name; } if (gc->ngpio > FASTPATH_NGPIO) chip_warn(gc, "line cnt %u is greater than fast path cnt %u\n", gc->ngpio, FASTPATH_NGPIO); + gdev->descs = kcalloc(gc->ngpio, sizeof(*gdev->descs), GFP_KERNEL); + if (!gdev->descs) { + ret = -ENOMEM; + goto err_free_dev_name; + } + gdev->label = kstrdup_const(gc->label ?: "unknown", GFP_KERNEL); if (!gdev->label) { ret = -ENOMEM; @@ -701,11 +703,13 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, * it may be a pipe dream. It will not happen before we get rid * of the sysfs interface anyways. */ + base = gc->base; if (base < 0) { base = gpiochip_find_base(gc->ngpio); if (base < 0) { - ret = base; spin_unlock_irqrestore(&gpio_lock, flags); + ret = base; + base = 0; goto err_free_label; } /* @@ -816,6 +820,11 @@ err_remove_of_chip: err_free_gpiochip_mask: gpiochip_remove_pin_ranges(gc); gpiochip_free_valid_mask(gc); + if (gdev->dev.release) { + /* release() has been registered by gpiochip_setup_dev() */ + put_device(&gdev->dev); + goto err_print_message; + } err_remove_from_list: spin_lock_irqsave(&gpio_lock, flags); list_del(&gdev->list); @@ -829,13 +838,14 @@ err_free_dev_name: err_free_ida: ida_free(&gpio_ida, gdev->id); err_free_gdev: + kfree(gdev); +err_print_message: /* failures here can mean systems won't boot... */ if (ret != -EPROBE_DEFER) { pr_err("%s: GPIOs %d..%d (%s) failed to register, %d\n", __func__, - gdev->base, gdev->base + gdev->ngpio - 1, + base, base + (int)ngpios - 1, gc->label ? : "generic", ret); } - kfree(gdev); return ret; } EXPORT_SYMBOL_GPL(gpiochip_add_data_with_key); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 0b52af415b28..ce64ca1c6e66 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -156,6 +156,9 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) break; case IP_VERSION(3, 0, 2): fw_name = FIRMWARE_VANGOGH; + if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) + adev->vcn.indirect_sram = true; break; case IP_VERSION(3, 0, 16): fw_name = FIRMWARE_DIMGREY_CAVEFISH; diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index 6925e0280dbe..f4f3d2665a6b 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -5,6 +5,7 @@ menu "Display Engine Configuration" config DRM_AMD_DC bool "AMD DC - Enable new display engine" default y + depends on BROKEN || !CC_IS_CLANG || X86_64 || SPARC64 || ARM64 select SND_HDA_COMPONENT if SND_HDA_CORE select DRM_AMD_DC_DCN if (X86 || PPC_LONG_DOUBLE_128) help @@ -12,6 +13,12 @@ config DRM_AMD_DC support for AMDGPU. This adds required support for Vega and Raven ASICs. + calculate_bandwidth() is presently broken on all !(X86_64 || SPARC64 || ARM64) + architectures built with Clang (all released versions), whereby the stack + frame gets blown up to well over 5k. This would cause an immediate kernel + panic on most architectures. We'll revert this when the following bug report + has been resolved: https://github.com/llvm/llvm-project/issues/41896. + config DRM_AMD_DC_DCN def_bool n help diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c index 461c62c88413..de77054195c6 100644 --- a/drivers/gpu/drm/i915/display/intel_display.c +++ b/drivers/gpu/drm/i915/display/intel_display.c @@ -3723,12 +3723,16 @@ out: static u8 bigjoiner_pipes(struct drm_i915_private *i915) { + u8 pipes; + if (DISPLAY_VER(i915) >= 12) - return BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D); + pipes = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D); else if (DISPLAY_VER(i915) >= 11) - return BIT(PIPE_B) | BIT(PIPE_C); + pipes = BIT(PIPE_B) | BIT(PIPE_C); else - return 0; + pipes = 0; + + return pipes & RUNTIME_INFO(i915)->pipe_mask; } static bool transcoder_ddi_func_is_enabled(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index d0b03a928b9a..7caa3412a244 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -625,8 +625,13 @@ int intel_gt_wait_for_idle(struct intel_gt *gt, long timeout) return -EINTR; } - return timeout ? timeout : intel_uc_wait_for_idle(>->uc, - remaining_timeout); + if (timeout) + return timeout; + + if (remaining_timeout < 0) + remaining_timeout = 0; + + return intel_uc_wait_for_idle(>->uc, remaining_timeout); } int intel_gt_init(struct intel_gt *gt) @@ -1017,6 +1022,11 @@ static void mmio_invalidate_full(struct intel_gt *gt) if (!i915_mmio_reg_offset(rb.reg)) continue; + if (GRAPHICS_VER(i915) == 12 && (engine->class == VIDEO_DECODE_CLASS || + engine->class == VIDEO_ENHANCEMENT_CLASS || + engine->class == COMPUTE_CLASS)) + rb.bit = _MASKED_BIT_ENABLE(rb.bit); + intel_uncore_write_fw(uncore, rb.reg, rb.bit); awake |= engine->mask; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c index edb881d75630..1dfd01668c79 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -199,7 +199,7 @@ out_active: spin_lock(&timelines->lock); if (remaining_timeout) *remaining_timeout = timeout; - return active_count ? timeout : 0; + return active_count ? timeout ?: -ETIME : 0; } static void retire_work_handler(struct work_struct *work) diff --git a/drivers/gpu/drm/i915/intel_dram.c b/drivers/gpu/drm/i915/intel_dram.c index 2403ccd52c74..bba8cb6e8ae4 100644 --- a/drivers/gpu/drm/i915/intel_dram.c +++ b/drivers/gpu/drm/i915/intel_dram.c @@ -471,8 +471,7 @@ static int xelpdp_get_dram_info(struct drm_i915_private *i915) u32 val = intel_uncore_read(&i915->uncore, MTL_MEM_SS_INFO_GLOBAL); struct dram_info *dram_info = &i915->dram_info; - val = REG_FIELD_GET(MTL_DDR_TYPE_MASK, val); - switch (val) { + switch (REG_FIELD_GET(MTL_DDR_TYPE_MASK, val)) { case 0: dram_info->type = INTEL_DRAM_DDR4; break; diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 9c1d31f63f85..bd47628da6be 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1315,6 +1315,9 @@ static s32 snto32(__u32 value, unsigned n) if (!value || !n) return 0; + if (n > 32) + n = 32; + switch (n) { case 8: return ((__s8)value); case 16: return ((__s16)value); diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index dad953f66996..8f58c3c1bec3 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -274,6 +274,7 @@ #define USB_DEVICE_ID_CH_AXIS_295 0x001c #define USB_VENDOR_ID_CHERRY 0x046a +#define USB_DEVICE_ID_CHERRY_MOUSE_000C 0x000c #define USB_DEVICE_ID_CHERRY_CYMOTION 0x0023 #define USB_DEVICE_ID_CHERRY_CYMOTION_SOLAR 0x0027 @@ -917,6 +918,7 @@ #define USB_DEVICE_ID_MS_XBOX_ONE_S_CONTROLLER 0x02fd #define USB_DEVICE_ID_MS_PIXART_MOUSE 0x00cb #define USB_DEVICE_ID_8BITDO_SN30_PRO_PLUS 0x02e0 +#define USB_DEVICE_ID_MS_MOUSE_0783 0x0783 #define USB_VENDOR_ID_MOJO 0x8282 #define USB_DEVICE_ID_RETRO_ADAPTER 0x3201 @@ -1215,6 +1217,7 @@ #define USB_DEVICE_ID_SYNAPTICS_DELL_K15A 0x6e21 #define USB_DEVICE_ID_SYNAPTICS_ACER_ONE_S1002 0x73f4 #define USB_DEVICE_ID_SYNAPTICS_ACER_ONE_S1003 0x73f5 +#define USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_017 0x73f6 #define USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5 0x81a7 #define USB_VENDOR_ID_TEXAS_INSTRUMENTS 0x2047 @@ -1381,6 +1384,7 @@ #define USB_VENDOR_ID_PRIMAX 0x0461 #define USB_DEVICE_ID_PRIMAX_MOUSE_4D22 0x4d22 +#define USB_DEVICE_ID_PRIMAX_MOUSE_4E2A 0x4e2a #define USB_DEVICE_ID_PRIMAX_KEYBOARD 0x4e05 #define USB_DEVICE_ID_PRIMAX_REZEL 0x4e72 #define USB_DEVICE_ID_PRIMAX_PIXART_MOUSE_4D0F 0x4d0f diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c index 430fa4f52ed3..75ebfcf31889 100644 --- a/drivers/hid/hid-ite.c +++ b/drivers/hid/hid-ite.c @@ -121,6 +121,11 @@ static const struct hid_device_id ite_devices[] = { USB_VENDOR_ID_SYNAPTICS, USB_DEVICE_ID_SYNAPTICS_ACER_ONE_S1003), .driver_data = QUIRK_TOUCHPAD_ON_OFF_REPORT }, + /* ITE8910 USB kbd ctlr, with Synaptics touchpad connected to it. */ + { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, + USB_VENDOR_ID_SYNAPTICS, + USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_017), + .driver_data = QUIRK_TOUCHPAD_ON_OFF_REPORT }, { } }; MODULE_DEVICE_TABLE(hid, ite_devices); diff --git a/drivers/hid/hid-lg4ff.c b/drivers/hid/hid-lg4ff.c index 5e6a0cef2a06..e3fcf1353fb3 100644 --- a/drivers/hid/hid-lg4ff.c +++ b/drivers/hid/hid-lg4ff.c @@ -872,6 +872,12 @@ static ssize_t lg4ff_alternate_modes_store(struct device *dev, struct device_att return -ENOMEM; i = strlen(lbuf); + + if (i == 0) { + kfree(lbuf); + return -EINVAL; + } + if (lbuf[i-1] == '\n') { if (i == 1) { kfree(lbuf); diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index 71a9c258a20b..8a2aac18dcc5 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -4269,21 +4269,6 @@ static void hidpp_remove(struct hid_device *hdev) mutex_destroy(&hidpp->send_mutex); } -static const struct hid_device_id unhandled_hidpp_devices[] = { - /* Logitech Harmony Adapter for PS3, handled in hid-sony */ - { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_HARMONY_PS3) }, - /* Handled in hid-generic */ - { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_DINOVO_EDGE_KBD) }, - {} -}; - -static bool hidpp_match(struct hid_device *hdev, - bool ignore_special_driver) -{ - /* Refuse to handle devices handled by other HID drivers */ - return !hid_match_id(hdev, unhandled_hidpp_devices); -} - #define LDJ_DEVICE(product) \ HID_DEVICE(BUS_USB, HID_GROUP_LOGITECH_DJ_DEVICE, \ USB_VENDOR_ID_LOGITECH, (product)) @@ -4367,9 +4352,15 @@ static const struct hid_device_id hidpp_devices[] = { { /* MX5500 keyboard over Bluetooth */ HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb30b), .driver_data = HIDPP_QUIRK_HIDPP_CONSUMER_VENDOR_KEYS }, - - { /* And try to enable HID++ for all the Logitech Bluetooth devices */ - HID_DEVICE(BUS_BLUETOOTH, HID_GROUP_ANY, USB_VENDOR_ID_LOGITECH, HID_ANY_ID) }, + { /* M-RCQ142 V470 Cordless Laser Mouse over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb008) }, + { /* MX Master mouse over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb012) }, + { /* MX Ergo trackball over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb01d) }, + { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb01e) }, + { /* MX Master 3 mouse over Bluetooth */ + HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_LOGITECH, 0xb023) }, {} }; @@ -4383,7 +4374,6 @@ static const struct hid_usage_id hidpp_usages[] = { static struct hid_driver hidpp_driver = { .name = "logitech-hidpp-device", .id_table = hidpp_devices, - .match = hidpp_match, .report_fixup = hidpp_report_fixup, .probe = hidpp_probe, .remove = hidpp_remove, diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index 50e1c717fc0a..0e9702c7f7d6 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -54,6 +54,7 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_FLIGHT_SIM_YOKE), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_PRO_PEDALS), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_CH, USB_DEVICE_ID_CH_PRO_THROTTLE), HID_QUIRK_NOGET }, + { HID_USB_DEVICE(USB_VENDOR_ID_CHERRY, USB_DEVICE_ID_CHERRY_MOUSE_000C), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K65RGB_RAPIDFIRE), HID_QUIRK_NO_INIT_REPORTS | HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_CORSAIR, USB_DEVICE_ID_CORSAIR_K70RGB), HID_QUIRK_NO_INIT_REPORTS }, @@ -122,6 +123,7 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOUSE_C05A), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH, USB_DEVICE_ID_LOGITECH_MOUSE_C06A), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_MCS, USB_DEVICE_ID_MCS_GAMEPADBLOCK), HID_QUIRK_MULTI_INPUT }, + { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_MOUSE_0783), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_PIXART_MOUSE), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_POWER_COVER), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_SURFACE3_COVER), HID_QUIRK_NO_INIT_REPORTS }, @@ -146,6 +148,7 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_OPTICAL_TOUCH_SCREEN), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_PIXART, USB_DEVICE_ID_PIXART_USB_OPTICAL_MOUSE), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_MOUSE_4D22), HID_QUIRK_ALWAYS_POLL }, + { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_MOUSE_4E2A), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_PIXART_MOUSE_4D0F), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_PIXART_MOUSE_4D65), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_PIXART_MOUSE_4E22), HID_QUIRK_ALWAYS_POLL }, diff --git a/drivers/hid/hid-uclogic-core.c b/drivers/hid/hid-uclogic-core.c index 0fbc408c2607..7fa6fe04f1b2 100644 --- a/drivers/hid/hid-uclogic-core.c +++ b/drivers/hid/hid-uclogic-core.c @@ -192,6 +192,7 @@ static int uclogic_probe(struct hid_device *hdev, * than the pen, so use QUIRK_MULTI_INPUT for all tablets. */ hdev->quirks |= HID_QUIRK_MULTI_INPUT; + hdev->quirks |= HID_QUIRK_HIDINPUT_FORCE; /* Allocate and assign driver data */ drvdata = devm_kzalloc(&hdev->dev, sizeof(*drvdata), GFP_KERNEL); diff --git a/drivers/hid/hid-uclogic-rdesc.c b/drivers/hid/hid-uclogic-rdesc.c index 4bd54c4fb5b0..6b73eb0df6bd 100644 --- a/drivers/hid/hid-uclogic-rdesc.c +++ b/drivers/hid/hid-uclogic-rdesc.c @@ -1193,7 +1193,7 @@ __u8 *uclogic_rdesc_template_apply(const __u8 *template_ptr, p[sizeof(btn_head)] < param_num) { v = param_list[p[sizeof(btn_head)]]; put_unaligned((__u8)0x2A, p); /* Usage Maximum */ - put_unaligned_le16((__force u16)cpu_to_le16(v), p + 1); + put_unaligned((__force u16)cpu_to_le16(v), (s16 *)(p + 1)); p += sizeof(btn_head) + 1; } else { p++; diff --git a/drivers/hid/i2c-hid/Kconfig b/drivers/hid/i2c-hid/Kconfig index 5273ee2bb134..d65abe65ce73 100644 --- a/drivers/hid/i2c-hid/Kconfig +++ b/drivers/hid/i2c-hid/Kconfig @@ -66,6 +66,6 @@ endmenu config I2C_HID_CORE tristate - default y if I2C_HID_ACPI=y || I2C_HID_OF=y || I2C_HID_OF_GOODIX=y - default m if I2C_HID_ACPI=m || I2C_HID_OF=m || I2C_HID_OF_GOODIX=m + default y if I2C_HID_ACPI=y || I2C_HID_OF=y || I2C_HID_OF_ELAN=y || I2C_HID_OF_GOODIX=y + default m if I2C_HID_ACPI=m || I2C_HID_OF=m || I2C_HID_OF_ELAN=m || I2C_HID_OF_GOODIX=m select HID diff --git a/drivers/hwmon/asus-ec-sensors.c b/drivers/hwmon/asus-ec-sensors.c index 81e688975c6a..a901e4e33d81 100644 --- a/drivers/hwmon/asus-ec-sensors.c +++ b/drivers/hwmon/asus-ec-sensors.c @@ -938,6 +938,8 @@ static int asus_ec_probe(struct platform_device *pdev) ec_data->nr_sensors = hweight_long(ec_data->board_info->sensors); ec_data->sensors = devm_kcalloc(dev, ec_data->nr_sensors, sizeof(struct ec_sensor), GFP_KERNEL); + if (!ec_data->sensors) + return -ENOMEM; status = setup_lock_data(dev); if (status) { diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c index 8bf32c6c85d9..9bee4d33fbdf 100644 --- a/drivers/hwmon/coretemp.c +++ b/drivers/hwmon/coretemp.c @@ -242,10 +242,13 @@ static int adjust_tjmax(struct cpuinfo_x86 *c, u32 id, struct device *dev) */ if (host_bridge && host_bridge->vendor == PCI_VENDOR_ID_INTEL) { for (i = 0; i < ARRAY_SIZE(tjmax_pci_table); i++) { - if (host_bridge->device == tjmax_pci_table[i].device) + if (host_bridge->device == tjmax_pci_table[i].device) { + pci_dev_put(host_bridge); return tjmax_pci_table[i].tjmax; + } } } + pci_dev_put(host_bridge); for (i = 0; i < ARRAY_SIZE(tjmax_table); i++) { if (strstr(c->x86_model_id, tjmax_table[i].id)) @@ -533,6 +536,10 @@ static void coretemp_remove_core(struct platform_data *pdata, int indx) { struct temp_data *tdata = pdata->core_data[indx]; + /* if we errored on add then this is already gone */ + if (!tdata) + return; + /* Remove the sysfs attributes */ sysfs_remove_group(&pdata->hwmon_dev->kobj, &tdata->attr_group); diff --git a/drivers/hwmon/i5500_temp.c b/drivers/hwmon/i5500_temp.c index 05f68e9c9477..23b9f94fe0a9 100644 --- a/drivers/hwmon/i5500_temp.c +++ b/drivers/hwmon/i5500_temp.c @@ -117,7 +117,7 @@ static int i5500_temp_probe(struct pci_dev *pdev, u32 tstimer; s8 tsfsc; - err = pci_enable_device(pdev); + err = pcim_enable_device(pdev); if (err) { dev_err(&pdev->dev, "Failed to enable device\n"); return err; diff --git a/drivers/hwmon/ibmpex.c b/drivers/hwmon/ibmpex.c index f6ec165c0fa8..1837cccd993c 100644 --- a/drivers/hwmon/ibmpex.c +++ b/drivers/hwmon/ibmpex.c @@ -502,6 +502,7 @@ static void ibmpex_register_bmc(int iface, struct device *dev) return; out_register: + list_del(&data->list); hwmon_device_unregister(data->hwmon_dev); out_user: ipmi_destroy_user(data->user); diff --git a/drivers/hwmon/ina3221.c b/drivers/hwmon/ina3221.c index 2a57f4b60c29..e06186986444 100644 --- a/drivers/hwmon/ina3221.c +++ b/drivers/hwmon/ina3221.c @@ -228,7 +228,7 @@ static int ina3221_read_value(struct ina3221_data *ina, unsigned int reg, * Shunt Voltage Sum register has 14-bit value with 1-bit shift * Other Shunt Voltage registers have 12 bits with 3-bit shift */ - if (reg == INA3221_SHUNT_SUM) + if (reg == INA3221_SHUNT_SUM || reg == INA3221_CRIT_SUM) *val = sign_extend32(regval >> 1, 14); else *val = sign_extend32(regval >> 3, 12); @@ -465,7 +465,7 @@ static int ina3221_write_curr(struct device *dev, u32 attr, * SHUNT_SUM: (1 / 40uV) << 1 = 1 / 20uV * SHUNT[1-3]: (1 / 40uV) << 3 = 1 / 5uV */ - if (reg == INA3221_SHUNT_SUM) + if (reg == INA3221_SHUNT_SUM || reg == INA3221_CRIT_SUM) regval = DIV_ROUND_CLOSEST(voltage_uv, 20) & 0xfffe; else regval = DIV_ROUND_CLOSEST(voltage_uv, 5) & 0xfff8; diff --git a/drivers/hwmon/ltc2947-core.c b/drivers/hwmon/ltc2947-core.c index 7404e974762f..2dbbbac9de09 100644 --- a/drivers/hwmon/ltc2947-core.c +++ b/drivers/hwmon/ltc2947-core.c @@ -396,7 +396,7 @@ static int ltc2947_read_temp(struct device *dev, const u32 attr, long *val, return ret; /* in milidegrees celcius, temp is given by: */ - *val = (__val * 204) + 550; + *val = (__val * 204) + 5500; return 0; } diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c index fe0cd205502d..f58943cb1341 100644 --- a/drivers/i2c/busses/i2c-cadence.c +++ b/drivers/i2c/busses/i2c-cadence.c @@ -852,7 +852,8 @@ static int cdns_i2c_master_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, CDNS_I2C_POLL_US, CDNS_I2C_TIMEOUT_US); if (ret) { ret = -EAGAIN; - i2c_recover_bus(adap); + if (id->adap.bus_recovery_info) + i2c_recover_bus(adap); goto out; } @@ -1263,8 +1264,13 @@ static int cdns_i2c_probe(struct platform_device *pdev) id->rinfo.pinctrl = devm_pinctrl_get(&pdev->dev); if (IS_ERR(id->rinfo.pinctrl)) { + int err = PTR_ERR(id->rinfo.pinctrl); + dev_info(&pdev->dev, "can't get pinctrl, bus recovery not supported\n"); - return PTR_ERR(id->rinfo.pinctrl); + if (err != -ENODEV) + return err; + } else { + id->adap.bus_recovery_info = &id->rinfo; } id->membase = devm_platform_get_and_ioremap_resource(pdev, 0, &r_mem); @@ -1283,7 +1289,6 @@ static int cdns_i2c_probe(struct platform_device *pdev) id->adap.retries = 3; /* Default retry value. */ id->adap.algo_data = id; id->adap.dev.parent = &pdev->dev; - id->adap.bus_recovery_info = &id->rinfo; init_completion(&id->xfer_done); snprintf(id->adap.name, sizeof(id->adap.name), "Cadence I2C at %08lx", (unsigned long)r_mem->start); diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index 3082183bd66a..fc70920c4dda 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -1132,7 +1132,8 @@ static int i2c_imx_read(struct imx_i2c_struct *i2c_imx, struct i2c_msg *msgs, int i, result; unsigned int temp; int block_data = msgs->flags & I2C_M_RECV_LEN; - int use_dma = i2c_imx->dma && msgs->len >= DMA_THRESHOLD && !block_data; + int use_dma = i2c_imx->dma && msgs->flags & I2C_M_DMA_SAFE && + msgs->len >= DMA_THRESHOLD && !block_data; dev_dbg(&i2c_imx->adapter.dev, "<%s> write slave address: addr=0x%x\n", @@ -1298,7 +1299,8 @@ static int i2c_imx_xfer_common(struct i2c_adapter *adapter, result = i2c_imx_read(i2c_imx, &msgs[i], is_lastmsg, atomic); } else { if (!atomic && - i2c_imx->dma && msgs[i].len >= DMA_THRESHOLD) + i2c_imx->dma && msgs[i].len >= DMA_THRESHOLD && + msgs[i].flags & I2C_M_DMA_SAFE) result = i2c_imx_dma_write(i2c_imx, &msgs[i]); else result = i2c_imx_write(i2c_imx, &msgs[i], atomic); diff --git a/drivers/i2c/busses/i2c-npcm7xx.c b/drivers/i2c/busses/i2c-npcm7xx.c index 0c365b57d957..83457359ec45 100644 --- a/drivers/i2c/busses/i2c-npcm7xx.c +++ b/drivers/i2c/busses/i2c-npcm7xx.c @@ -2393,8 +2393,17 @@ static struct platform_driver npcm_i2c_bus_driver = { static int __init npcm_i2c_init(void) { + int ret; + npcm_i2c_debugfs_dir = debugfs_create_dir("npcm_i2c", NULL); - return platform_driver_register(&npcm_i2c_bus_driver); + + ret = platform_driver_register(&npcm_i2c_bus_driver); + if (ret) { + debugfs_remove_recursive(npcm_i2c_debugfs_dir); + return ret; + } + + return 0; } module_init(npcm_i2c_init); diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index 84a77512614d..8fce98bb77ff 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -626,7 +626,6 @@ static int geni_i2c_gpi_xfer(struct geni_i2c_dev *gi2c, struct i2c_msg msgs[], i dev_err(gi2c->se.dev, "I2C timeout gpi flags:%d addr:0x%x\n", gi2c->cur->flags, gi2c->cur->addr); gi2c->err = -ETIMEDOUT; - goto err; } if (gi2c->err) { diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 9aa7b9d9a485..13fafb74bab8 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -467,6 +467,7 @@ static int i2c_device_probe(struct device *dev) { struct i2c_client *client = i2c_verify_client(dev); struct i2c_driver *driver; + bool do_power_on; int status; if (!client) @@ -545,8 +546,8 @@ static int i2c_device_probe(struct device *dev) if (status < 0) goto err_clear_wakeup_irq; - status = dev_pm_domain_attach(&client->dev, - !i2c_acpi_waive_d0_probe(dev)); + do_power_on = !i2c_acpi_waive_d0_probe(dev); + status = dev_pm_domain_attach(&client->dev, do_power_on); if (status) goto err_clear_wakeup_irq; @@ -585,7 +586,7 @@ static int i2c_device_probe(struct device *dev) err_release_driver_resources: devres_release_group(&client->dev, client->devres_group_id); err_detach_pm_domain: - dev_pm_domain_detach(&client->dev, !i2c_acpi_waive_d0_probe(dev)); + dev_pm_domain_detach(&client->dev, do_power_on); err_clear_wakeup_irq: dev_pm_clear_wake_irq(&client->dev); device_init_wakeup(&client->dev, false); @@ -610,7 +611,7 @@ static void i2c_device_remove(struct device *dev) devres_release_group(&client->dev, client->devres_group_id); - dev_pm_domain_detach(&client->dev, !i2c_acpi_waive_d0_probe(dev)); + dev_pm_domain_detach(&client->dev, true); dev_pm_clear_wake_irq(&client->dev); device_init_wakeup(&client->dev, false); diff --git a/drivers/input/touchscreen/raydium_i2c_ts.c b/drivers/input/touchscreen/raydium_i2c_ts.c index 3a4952935366..3d9c5758d8a4 100644 --- a/drivers/input/touchscreen/raydium_i2c_ts.c +++ b/drivers/input/touchscreen/raydium_i2c_ts.c @@ -211,12 +211,14 @@ static int raydium_i2c_send(struct i2c_client *client, error = raydium_i2c_xfer(client, addr, xfer, ARRAY_SIZE(xfer)); if (likely(!error)) - return 0; + goto out; msleep(RM_RETRY_DELAY_MS); } while (++tries < RM_MAX_RETRIES); dev_err(&client->dev, "%s failed: %d\n", __func__, error); +out: + kfree(tx_buf); return error; } diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 5a8f780e7ffd..bc94059a5b87 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -820,6 +820,7 @@ int __init dmar_dev_scope_init(void) info = dmar_alloc_pci_notify_info(dev, BUS_NOTIFY_ADD_DEVICE); if (!info) { + pci_dev_put(dev); return dmar_dev_scope_status; } else { dmar_pci_bus_add_dev(info); diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 996a8b5ee5ee..5287efe247b1 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -1396,6 +1396,24 @@ static void domain_update_iotlb(struct dmar_domain *domain) spin_unlock_irqrestore(&domain->lock, flags); } +/* + * The extra devTLB flush quirk impacts those QAT devices with PCI device + * IDs ranging from 0x4940 to 0x4943. It is exempted from risky_device() + * check because it applies only to the built-in QAT devices and it doesn't + * grant additional privileges. + */ +#define BUGGY_QAT_DEVID_MASK 0x494c +static bool dev_needs_extra_dtlb_flush(struct pci_dev *pdev) +{ + if (pdev->vendor != PCI_VENDOR_ID_INTEL) + return false; + + if ((pdev->device & 0xfffc) != BUGGY_QAT_DEVID_MASK) + return false; + + return true; +} + static void iommu_enable_pci_caps(struct device_domain_info *info) { struct pci_dev *pdev; @@ -1478,6 +1496,7 @@ static void __iommu_flush_dev_iotlb(struct device_domain_info *info, qdep = info->ats_qdep; qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, qdep, addr, mask); + quirk_extra_dev_tlb_flush(info, addr, mask, PASID_RID2PASID, qdep); } static void iommu_flush_dev_iotlb(struct dmar_domain *domain, @@ -3854,8 +3873,10 @@ static inline bool has_external_pci(void) struct pci_dev *pdev = NULL; for_each_pci_dev(pdev) - if (pdev->external_facing) + if (pdev->external_facing) { + pci_dev_put(pdev); return true; + } return false; } @@ -4490,9 +4511,10 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) if (dev_is_pci(dev)) { if (ecap_dev_iotlb_support(iommu->ecap) && pci_ats_supported(pdev) && - dmar_ats_supported(pdev, iommu)) + dmar_ats_supported(pdev, iommu)) { info->ats_supported = 1; - + info->dtlb_extra_inval = dev_needs_extra_dtlb_flush(pdev); + } if (sm_supported(iommu)) { if (pasid_supported(iommu)) { int features = pci_pasid_features(pdev); @@ -4931,3 +4953,48 @@ static void __init check_tylersburg_isoch(void) pr_warn("Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n", vtisochctrl); } + +/* + * Here we deal with a device TLB defect where device may inadvertently issue ATS + * invalidation completion before posted writes initiated with translated address + * that utilized translations matching the invalidation address range, violating + * the invalidation completion ordering. + * Therefore, any use cases that cannot guarantee DMA is stopped before unmap is + * vulnerable to this defect. In other words, any dTLB invalidation initiated not + * under the control of the trusted/privileged host device driver must use this + * quirk. + * Device TLBs are invalidated under the following six conditions: + * 1. Device driver does DMA API unmap IOVA + * 2. Device driver unbind a PASID from a process, sva_unbind_device() + * 3. PASID is torn down, after PASID cache is flushed. e.g. process + * exit_mmap() due to crash + * 4. Under SVA usage, called by mmu_notifier.invalidate_range() where + * VM has to free pages that were unmapped + * 5. Userspace driver unmaps a DMA buffer + * 6. Cache invalidation in vSVA usage (upcoming) + * + * For #1 and #2, device drivers are responsible for stopping DMA traffic + * before unmap/unbind. For #3, iommu driver gets mmu_notifier to + * invalidate TLB the same way as normal user unmap which will use this quirk. + * The dTLB invalidation after PASID cache flush does not need this quirk. + * + * As a reminder, #6 will *NEED* this quirk as we enable nested translation. + */ +void quirk_extra_dev_tlb_flush(struct device_domain_info *info, + unsigned long address, unsigned long mask, + u32 pasid, u16 qdep) +{ + u16 sid; + + if (likely(!info->dtlb_extra_inval)) + return; + + sid = PCI_DEVID(info->bus, info->devfn); + if (pasid == PASID_RID2PASID) { + qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, + qdep, address, mask); + } else { + qi_flush_dev_iotlb_pasid(info->iommu, sid, info->pfsid, + pasid, qdep, address, mask); + } +} diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 92023dff9513..db9df7c3790c 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -623,6 +623,7 @@ struct device_domain_info { u8 pri_enabled:1; u8 ats_supported:1; u8 ats_enabled:1; + u8 dtlb_extra_inval:1; /* Quirk for devices need extra flush */ u8 ats_qdep; struct device *dev; /* it's NULL for PCIe-to-PCI bridge */ struct intel_iommu *iommu; /* IOMMU used by this device */ @@ -728,6 +729,9 @@ void qi_flush_piotlb(struct intel_iommu *iommu, u16 did, u32 pasid, u64 addr, void qi_flush_dev_iotlb_pasid(struct intel_iommu *iommu, u16 sid, u16 pfsid, u32 pasid, u16 qdep, u64 addr, unsigned int size_order); +void quirk_extra_dev_tlb_flush(struct device_domain_info *info, + unsigned long address, unsigned long pages, + u32 pasid, u16 qdep); void qi_flush_pasid_cache(struct intel_iommu *iommu, u16 did, u64 granu, u32 pasid); diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 7d08eb034f2d..03b25358946c 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -184,10 +184,13 @@ static void __flush_svm_range_dev(struct intel_svm *svm, return; qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, address, pages, ih); - if (info->ats_enabled) + if (info->ats_enabled) { qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid, svm->pasid, sdev->qdep, address, order_base_2(pages)); + quirk_extra_dev_tlb_flush(info, address, order_base_2(pages), + svm->pasid, sdev->qdep); + } } static void intel_flush_svm_range_dev(struct intel_svm *svm, @@ -745,12 +748,16 @@ bad_req: * If prq is to be handled outside iommu driver via receiver of * the fault notifiers, we skip the page response here. */ - if (!pdev || intel_svm_prq_report(iommu, &pdev->dev, req)) - handle_bad_prq_event(iommu, req, QI_RESP_INVALID); + if (!pdev) + goto bad_req; - trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1, - req->priv_data[0], req->priv_data[1], - iommu->prq_seq_number++); + if (intel_svm_prq_report(iommu, &pdev->dev, req)) + handle_bad_prq_event(iommu, req, QI_RESP_INVALID); + else + trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1, + req->priv_data[0], req->priv_data[1], + iommu->prq_seq_number++); + pci_dev_put(pdev); prq_advance: head = (head + sizeof(*req)) & PRQ_RING_MASK; } diff --git a/drivers/media/common/videobuf2/frame_vector.c b/drivers/media/common/videobuf2/frame_vector.c index 542dde9d2609..144027035892 100644 --- a/drivers/media/common/videobuf2/frame_vector.c +++ b/drivers/media/common/videobuf2/frame_vector.c @@ -35,11 +35,7 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames, struct frame_vector *vec) { - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - int ret_pin_user_pages_fast = 0; - int ret = 0; - int err; + int ret; if (nr_frames == 0) return 0; @@ -52,57 +48,17 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames, ret = pin_user_pages_fast(start, nr_frames, FOLL_FORCE | FOLL_WRITE | FOLL_LONGTERM, (struct page **)(vec->ptrs)); - if (ret > 0) { - vec->got_ref = true; - vec->is_pfns = false; - goto out_unlocked; - } - ret_pin_user_pages_fast = ret; - - mmap_read_lock(mm); - vec->got_ref = false; - vec->is_pfns = true; - ret = 0; - do { - unsigned long *nums = frame_vector_pfns(vec); - - vma = vma_lookup(mm, start); - if (!vma) - break; - - while (ret < nr_frames && start + PAGE_SIZE <= vma->vm_end) { - err = follow_pfn(vma, start, &nums[ret]); - if (err) { - if (ret) - goto out; - // If follow_pfn() returns -EINVAL, then this - // is not an IO mapping or a raw PFN mapping. - // In that case, return the original error from - // pin_user_pages_fast(). Otherwise this - // function would return -EINVAL when - // pin_user_pages_fast() returned -ENOMEM, - // which makes debugging hard. - if (err == -EINVAL && ret_pin_user_pages_fast) - ret = ret_pin_user_pages_fast; - else - ret = err; - goto out; - } - start += PAGE_SIZE; - ret++; - } - /* Bail out if VMA doesn't completely cover the tail page. */ - if (start < vma->vm_end) - break; - } while (ret < nr_frames); -out: - mmap_read_unlock(mm); -out_unlocked: - if (!ret) - ret = -EFAULT; - if (ret > 0) - vec->nr_frames = ret; - return ret; + vec->got_ref = true; + vec->is_pfns = false; + vec->nr_frames = ret; + + if (likely(ret > 0)) + return ret; + + /* This used to (racily) return non-refcounted pfns. Let people know */ + WARN_ONCE(1, "get_vaddr_frames() cannot follow VM_IO mapping"); + vec->nr_frames = 0; + return ret ? ret : -EFAULT; } EXPORT_SYMBOL(get_vaddr_frames); diff --git a/drivers/media/common/videobuf2/videobuf2-core.c b/drivers/media/common/videobuf2/videobuf2-core.c index ab9697f3b5f1..92efc4676df6 100644 --- a/drivers/media/common/videobuf2/videobuf2-core.c +++ b/drivers/media/common/videobuf2/videobuf2-core.c @@ -813,7 +813,13 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory, num_buffers = max_t(unsigned int, *count, q->min_buffers_needed); num_buffers = min_t(unsigned int, num_buffers, VB2_MAX_FRAME); memset(q->alloc_devs, 0, sizeof(q->alloc_devs)); + /* + * Set this now to ensure that drivers see the correct q->memory value + * in the queue_setup op. + */ + mutex_lock(&q->mmap_lock); q->memory = memory; + mutex_unlock(&q->mmap_lock); set_queue_coherency(q, non_coherent_mem); /* @@ -823,22 +829,27 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory, ret = call_qop(q, queue_setup, q, &num_buffers, &num_planes, plane_sizes, q->alloc_devs); if (ret) - return ret; + goto error; /* Check that driver has set sane values */ - if (WARN_ON(!num_planes)) - return -EINVAL; + if (WARN_ON(!num_planes)) { + ret = -EINVAL; + goto error; + } for (i = 0; i < num_planes; i++) - if (WARN_ON(!plane_sizes[i])) - return -EINVAL; + if (WARN_ON(!plane_sizes[i])) { + ret = -EINVAL; + goto error; + } /* Finally, allocate buffers and video memory */ allocated_buffers = __vb2_queue_alloc(q, memory, num_buffers, num_planes, plane_sizes); if (allocated_buffers == 0) { dprintk(q, 1, "memory allocation failed\n"); - return -ENOMEM; + ret = -ENOMEM; + goto error; } /* @@ -879,7 +890,8 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory, if (ret < 0) { /* * Note: __vb2_queue_free() will subtract 'allocated_buffers' - * from q->num_buffers. + * from q->num_buffers and it will reset q->memory to + * VB2_MEMORY_UNKNOWN. */ __vb2_queue_free(q, allocated_buffers); mutex_unlock(&q->mmap_lock); @@ -895,6 +907,12 @@ int vb2_core_reqbufs(struct vb2_queue *q, enum vb2_memory memory, q->waiting_for_buffers = !q->is_output; return 0; + +error: + mutex_lock(&q->mmap_lock); + q->memory = VB2_MEMORY_UNKNOWN; + mutex_unlock(&q->mmap_lock); + return ret; } EXPORT_SYMBOL_GPL(vb2_core_reqbufs); @@ -906,6 +924,7 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory, unsigned int num_planes = 0, num_buffers, allocated_buffers; unsigned plane_sizes[VB2_MAX_PLANES] = { }; bool non_coherent_mem = flags & V4L2_MEMORY_FLAG_NON_COHERENT; + bool no_previous_buffers = !q->num_buffers; int ret; if (q->num_buffers == VB2_MAX_FRAME) { @@ -913,13 +932,19 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory, return -ENOBUFS; } - if (!q->num_buffers) { + if (no_previous_buffers) { if (q->waiting_in_dqbuf && *count) { dprintk(q, 1, "another dup()ped fd is waiting for a buffer\n"); return -EBUSY; } memset(q->alloc_devs, 0, sizeof(q->alloc_devs)); + /* + * Set this now to ensure that drivers see the correct q->memory + * value in the queue_setup op. + */ + mutex_lock(&q->mmap_lock); q->memory = memory; + mutex_unlock(&q->mmap_lock); q->waiting_for_buffers = !q->is_output; set_queue_coherency(q, non_coherent_mem); } else { @@ -945,14 +970,15 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory, ret = call_qop(q, queue_setup, q, &num_buffers, &num_planes, plane_sizes, q->alloc_devs); if (ret) - return ret; + goto error; /* Finally, allocate buffers and video memory */ allocated_buffers = __vb2_queue_alloc(q, memory, num_buffers, num_planes, plane_sizes); if (allocated_buffers == 0) { dprintk(q, 1, "memory allocation failed\n"); - return -ENOMEM; + ret = -ENOMEM; + goto error; } /* @@ -983,7 +1009,8 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory, if (ret < 0) { /* * Note: __vb2_queue_free() will subtract 'allocated_buffers' - * from q->num_buffers. + * from q->num_buffers and it will reset q->memory to + * VB2_MEMORY_UNKNOWN. */ __vb2_queue_free(q, allocated_buffers); mutex_unlock(&q->mmap_lock); @@ -998,6 +1025,14 @@ int vb2_core_create_bufs(struct vb2_queue *q, enum vb2_memory memory, *count = allocated_buffers; return 0; + +error: + if (no_previous_buffers) { + mutex_lock(&q->mmap_lock); + q->memory = VB2_MEMORY_UNKNOWN; + mutex_unlock(&q->mmap_lock); + } + return ret; } EXPORT_SYMBOL_GPL(vb2_core_create_bufs); @@ -2165,6 +2200,22 @@ static int __find_plane_by_offset(struct vb2_queue *q, unsigned long off, unsigned int buffer, plane; /* + * Sanity checks to ensure the lock is held, MEMORY_MMAP is + * used and fileio isn't active. + */ + lockdep_assert_held(&q->mmap_lock); + + if (q->memory != VB2_MEMORY_MMAP) { + dprintk(q, 1, "queue is not currently set up for mmap\n"); + return -EINVAL; + } + + if (vb2_fileio_is_active(q)) { + dprintk(q, 1, "file io in progress\n"); + return -EBUSY; + } + + /* * Go over all buffers and their planes, comparing the given offset * with an offset assigned to each plane. If a match is found, * return its buffer and plane numbers. @@ -2265,11 +2316,6 @@ int vb2_mmap(struct vb2_queue *q, struct vm_area_struct *vma) int ret; unsigned long length; - if (q->memory != VB2_MEMORY_MMAP) { - dprintk(q, 1, "queue is not currently set up for mmap\n"); - return -EINVAL; - } - /* * Check memory area access mode. */ @@ -2291,14 +2337,9 @@ int vb2_mmap(struct vb2_queue *q, struct vm_area_struct *vma) mutex_lock(&q->mmap_lock); - if (vb2_fileio_is_active(q)) { - dprintk(q, 1, "mmap: file io in progress\n"); - ret = -EBUSY; - goto unlock; - } - /* - * Find the plane corresponding to the offset passed by userspace. + * Find the plane corresponding to the offset passed by userspace. This + * will return an error if not MEMORY_MMAP or file I/O is in progress. */ ret = __find_plane_by_offset(q, off, &buffer, &plane); if (ret) @@ -2351,22 +2392,25 @@ unsigned long vb2_get_unmapped_area(struct vb2_queue *q, void *vaddr; int ret; - if (q->memory != VB2_MEMORY_MMAP) { - dprintk(q, 1, "queue is not currently set up for mmap\n"); - return -EINVAL; - } + mutex_lock(&q->mmap_lock); /* - * Find the plane corresponding to the offset passed by userspace. + * Find the plane corresponding to the offset passed by userspace. This + * will return an error if not MEMORY_MMAP or file I/O is in progress. */ ret = __find_plane_by_offset(q, off, &buffer, &plane); if (ret) - return ret; + goto unlock; vb = q->bufs[buffer]; vaddr = vb2_plane_vaddr(vb, plane); + mutex_unlock(&q->mmap_lock); return vaddr ? (unsigned long)vaddr : -EINVAL; + +unlock: + mutex_unlock(&q->mmap_lock); + return ret; } EXPORT_SYMBOL_GPL(vb2_get_unmapped_area); #endif diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index c5de202f530a..de1cc9e1ae57 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -1484,6 +1484,11 @@ void mmc_init_erase(struct mmc_card *card) card->pref_erase = 0; } +static bool is_trim_arg(unsigned int arg) +{ + return (arg & MMC_TRIM_OR_DISCARD_ARGS) && arg != MMC_DISCARD_ARG; +} + static unsigned int mmc_mmc_erase_timeout(struct mmc_card *card, unsigned int arg, unsigned int qty) { @@ -1766,7 +1771,7 @@ int mmc_erase(struct mmc_card *card, unsigned int from, unsigned int nr, !(card->ext_csd.sec_feature_support & EXT_CSD_SEC_ER_EN)) return -EOPNOTSUPP; - if (mmc_card_mmc(card) && (arg & MMC_TRIM_ARGS) && + if (mmc_card_mmc(card) && is_trim_arg(arg) && !(card->ext_csd.sec_feature_support & EXT_CSD_SEC_GB_CL_EN)) return -EOPNOTSUPP; @@ -1796,7 +1801,7 @@ int mmc_erase(struct mmc_card *card, unsigned int from, unsigned int nr, * identified by the card->eg_boundary flag. */ rem = card->erase_size - (from % card->erase_size); - if ((arg & MMC_TRIM_ARGS) && (card->eg_boundary) && (nr > rem)) { + if ((arg & MMC_TRIM_OR_DISCARD_ARGS) && card->eg_boundary && nr > rem) { err = mmc_do_erase(card, from, from + rem - 1, arg); from += rem; if ((err) || (to <= from)) diff --git a/drivers/mmc/core/mmc_test.c b/drivers/mmc/core/mmc_test.c index 8d9bceeff986..155ce2bdfe62 100644 --- a/drivers/mmc/core/mmc_test.c +++ b/drivers/mmc/core/mmc_test.c @@ -3179,7 +3179,8 @@ static int __mmc_test_register_dbgfs_file(struct mmc_card *card, struct mmc_test_dbgfs_file *df; if (card->debugfs_root) - debugfs_create_file(name, mode, card->debugfs_root, card, fops); + file = debugfs_create_file(name, mode, card->debugfs_root, + card, fops); df = kmalloc(sizeof(*df), GFP_KERNEL); if (!df) { diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c index df941438aef5..26bc59b5a7cc 100644 --- a/drivers/mmc/host/mtk-sd.c +++ b/drivers/mmc/host/mtk-sd.c @@ -2588,13 +2588,11 @@ static int msdc_of_clock_parse(struct platform_device *pdev, return PTR_ERR(host->src_clk_cg); } - host->sys_clk_cg = devm_clk_get_optional(&pdev->dev, "sys_cg"); + /* If present, always enable for this clock gate */ + host->sys_clk_cg = devm_clk_get_optional_enabled(&pdev->dev, "sys_cg"); if (IS_ERR(host->sys_clk_cg)) host->sys_clk_cg = NULL; - /* If present, always enable for this clock gate */ - clk_prepare_enable(host->sys_clk_cg); - host->bulk_clks[0].id = "pclk_cg"; host->bulk_clks[1].id = "axi_cg"; host->bulk_clks[2].id = "ahb_cg"; diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index 31ea0a2fce35..ffeb5759830f 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -1512,7 +1512,7 @@ static void esdhc_cqe_enable(struct mmc_host *mmc) * system resume back. */ cqhci_writel(cq_host, 0, CQHCI_CTL); - if (cqhci_readl(cq_host, CQHCI_CTL) && CQHCI_HALT) + if (cqhci_readl(cq_host, CQHCI_CTL) & CQHCI_HALT) dev_err(mmc_dev(host->mmc), "failed to exit halt state when enable CQE\n"); diff --git a/drivers/mmc/host/sdhci-sprd.c b/drivers/mmc/host/sdhci-sprd.c index b92a408f138d..bec3f9e3cd3f 100644 --- a/drivers/mmc/host/sdhci-sprd.c +++ b/drivers/mmc/host/sdhci-sprd.c @@ -470,7 +470,7 @@ static int sdhci_sprd_voltage_switch(struct mmc_host *mmc, struct mmc_ios *ios) } if (IS_ERR(sprd_host->pinctrl)) - return 0; + goto reset; switch (ios->signal_voltage) { case MMC_SIGNAL_VOLTAGE_180: @@ -498,6 +498,8 @@ static int sdhci_sprd_voltage_switch(struct mmc_host *mmc, struct mmc_ios *ios) /* Wait for 300 ~ 500 us for pin state stable */ usleep_range(300, 500); + +reset: sdhci_reset(host, SDHCI_RESET_CMD | SDHCI_RESET_DATA); return 0; diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index fef03de85b99..c7ad32a75b57 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -373,6 +373,7 @@ static void sdhci_init(struct sdhci_host *host, int soft) if (soft) { /* force clock reconfiguration */ host->clock = 0; + host->reinit_uhs = true; mmc->ops->set_ios(mmc, &mmc->ios); } } @@ -2293,11 +2294,46 @@ void sdhci_set_uhs_signaling(struct sdhci_host *host, unsigned timing) } EXPORT_SYMBOL_GPL(sdhci_set_uhs_signaling); +static bool sdhci_timing_has_preset(unsigned char timing) +{ + switch (timing) { + case MMC_TIMING_UHS_SDR12: + case MMC_TIMING_UHS_SDR25: + case MMC_TIMING_UHS_SDR50: + case MMC_TIMING_UHS_SDR104: + case MMC_TIMING_UHS_DDR50: + case MMC_TIMING_MMC_DDR52: + return true; + }; + return false; +} + +static bool sdhci_preset_needed(struct sdhci_host *host, unsigned char timing) +{ + return !(host->quirks2 & SDHCI_QUIRK2_PRESET_VALUE_BROKEN) && + sdhci_timing_has_preset(timing); +} + +static bool sdhci_presetable_values_change(struct sdhci_host *host, struct mmc_ios *ios) +{ + /* + * Preset Values are: Driver Strength, Clock Generator and SDCLK/RCLK + * Frequency. Check if preset values need to be enabled, or the Driver + * Strength needs updating. Note, clock changes are handled separately. + */ + return !host->preset_enabled && + (sdhci_preset_needed(host, ios->timing) || host->drv_type != ios->drv_type); +} + void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) { struct sdhci_host *host = mmc_priv(mmc); + bool reinit_uhs = host->reinit_uhs; + bool turning_on_clk = false; u8 ctrl; + host->reinit_uhs = false; + if (ios->power_mode == MMC_POWER_UNDEFINED) return; @@ -2323,6 +2359,8 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) sdhci_enable_preset_value(host, false); if (!ios->clock || ios->clock != host->clock) { + turning_on_clk = ios->clock && !host->clock; + host->ops->set_clock(host, ios->clock); host->clock = ios->clock; @@ -2349,6 +2387,17 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) host->ops->set_bus_width(host, ios->bus_width); + /* + * Special case to avoid multiple clock changes during voltage + * switching. + */ + if (!reinit_uhs && + turning_on_clk && + host->timing == ios->timing && + host->version >= SDHCI_SPEC_300 && + !sdhci_presetable_values_change(host, ios)) + return; + ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL); if (!(host->quirks & SDHCI_QUIRK_NO_HISPD_BIT)) { @@ -2392,6 +2441,7 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) } sdhci_writew(host, ctrl_2, SDHCI_HOST_CONTROL2); + host->drv_type = ios->drv_type; } else { /* * According to SDHC Spec v3.00, if the Preset Value @@ -2419,19 +2469,14 @@ void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) host->ops->set_uhs_signaling(host, ios->timing); host->timing = ios->timing; - if (!(host->quirks2 & SDHCI_QUIRK2_PRESET_VALUE_BROKEN) && - ((ios->timing == MMC_TIMING_UHS_SDR12) || - (ios->timing == MMC_TIMING_UHS_SDR25) || - (ios->timing == MMC_TIMING_UHS_SDR50) || - (ios->timing == MMC_TIMING_UHS_SDR104) || - (ios->timing == MMC_TIMING_UHS_DDR50) || - (ios->timing == MMC_TIMING_MMC_DDR52))) { + if (sdhci_preset_needed(host, ios->timing)) { u16 preset; sdhci_enable_preset_value(host, true); preset = sdhci_get_preset_value(host); ios->drv_type = FIELD_GET(SDHCI_PRESET_DRV_MASK, preset); + host->drv_type = ios->drv_type; } /* Re-enable SD Clock */ @@ -3768,6 +3813,7 @@ int sdhci_resume_host(struct sdhci_host *host) sdhci_init(host, 0); host->pwr = 0; host->clock = 0; + host->reinit_uhs = true; mmc->ops->set_ios(mmc, &mmc->ios); } else { sdhci_init(host, (mmc->pm_flags & MMC_PM_KEEP_POWER)); @@ -3830,6 +3876,7 @@ int sdhci_runtime_resume_host(struct sdhci_host *host, int soft_reset) /* Force clock and power re-program */ host->pwr = 0; host->clock = 0; + host->reinit_uhs = true; mmc->ops->start_signal_voltage_switch(mmc, &mmc->ios); mmc->ops->set_ios(mmc, &mmc->ios); diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index d750c464bd1e..87a3aaa07438 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -524,6 +524,8 @@ struct sdhci_host { unsigned int clock; /* Current clock (MHz) */ u8 pwr; /* Current voltage */ + u8 drv_type; /* Current UHS-I driver type */ + bool reinit_uhs; /* Force UHS-related re-initialization */ bool runtime_suspended; /* Host is runtime suspended */ bool bus_on; /* Bus power prevents runtime suspend */ diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index e01bb0412f1c..4048876f842c 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3249,7 +3249,7 @@ static int bond_na_rcv(const struct sk_buff *skb, struct bonding *bond, goto out; saddr = &combined->ip6.saddr; - daddr = &combined->ip6.saddr; + daddr = &combined->ip6.daddr; slave_dbg(bond->dev, slave->dev, "%s: %s/%d av %d sv %d sip %pI6c tip %pI6c\n", __func__, slave->dev->name, bond_slave_state(slave), diff --git a/drivers/net/can/can327.c b/drivers/net/can/can327.c index ed3d0b8989a0..dc7192ecb001 100644 --- a/drivers/net/can/can327.c +++ b/drivers/net/can/can327.c @@ -796,9 +796,9 @@ static int can327_netdev_close(struct net_device *dev) netif_stop_queue(dev); - /* Give UART one final chance to flush. */ - clear_bit(TTY_DO_WRITE_WAKEUP, &elm->tty->flags); - flush_work(&elm->tx_work); + /* We don't flush the UART TX queue here, as we want final stop + * commands (like the above dummy char) to be flushed out. + */ can_rx_offload_disable(&elm->offload); elm->can.state = CAN_STATE_STOPPED; @@ -1069,12 +1069,15 @@ static void can327_ldisc_close(struct tty_struct *tty) { struct can327 *elm = (struct can327 *)tty->disc_data; - /* unregister_netdev() calls .ndo_stop() so we don't have to. - * Our .ndo_stop() also flushes the TTY write wakeup handler, - * so we can safely set elm->tty = NULL after this. - */ + /* unregister_netdev() calls .ndo_stop() so we don't have to. */ unregister_candev(elm->dev); + /* Give UART one final chance to flush. + * No need to clear TTY_DO_WRITE_WAKEUP since .write_wakeup() is + * serialised against .close() and will not be called once we return. + */ + flush_work(&elm->tx_work); + /* Mark channel as dead */ spin_lock_bh(&elm->lock); tty->disc_data = NULL; diff --git a/drivers/net/can/slcan/slcan-core.c b/drivers/net/can/slcan/slcan-core.c index fbb34139daa1..f4db77007c13 100644 --- a/drivers/net/can/slcan/slcan-core.c +++ b/drivers/net/can/slcan/slcan-core.c @@ -864,12 +864,14 @@ static void slcan_close(struct tty_struct *tty) { struct slcan *sl = (struct slcan *)tty->disc_data; - /* unregister_netdev() calls .ndo_stop() so we don't have to. - * Our .ndo_stop() also flushes the TTY write wakeup handler, - * so we can safely set sl->tty = NULL after this. - */ unregister_candev(sl->dev); + /* + * The netdev needn't be UP (so .ndo_stop() is not called). Hence make + * sure this is not running before freeing it up. + */ + flush_work(&sl->tx_work); + /* Mark channel as dead */ spin_lock_bh(&sl->lock); tty->disc_data = NULL; diff --git a/drivers/net/can/usb/esd_usb.c b/drivers/net/can/usb/esd_usb.c index 81b88e9e5bdc..42323f5e6f3a 100644 --- a/drivers/net/can/usb/esd_usb.c +++ b/drivers/net/can/usb/esd_usb.c @@ -234,6 +234,10 @@ static void esd_usb_rx_event(struct esd_usb_net_priv *priv, u8 rxerr = msg->msg.rx.data[2]; u8 txerr = msg->msg.rx.data[3]; + netdev_dbg(priv->netdev, + "CAN_ERR_EV_EXT: dlc=%#02x state=%02x ecc=%02x rec=%02x tec=%02x\n", + msg->msg.rx.dlc, state, ecc, rxerr, txerr); + skb = alloc_can_err_skb(priv->netdev, &cf); if (skb == NULL) { stats->rx_dropped++; @@ -260,6 +264,8 @@ static void esd_usb_rx_event(struct esd_usb_net_priv *priv, break; default: priv->can.state = CAN_STATE_ERROR_ACTIVE; + txerr = 0; + rxerr = 0; break; } } else { diff --git a/drivers/net/dsa/microchip/ksz8.h b/drivers/net/dsa/microchip/ksz8.h index 8582b4b67d98..ea05abfbd51d 100644 --- a/drivers/net/dsa/microchip/ksz8.h +++ b/drivers/net/dsa/microchip/ksz8.h @@ -57,5 +57,6 @@ int ksz8_reset_switch(struct ksz_device *dev); int ksz8_switch_detect(struct ksz_device *dev); int ksz8_switch_init(struct ksz_device *dev); void ksz8_switch_exit(struct ksz_device *dev); +int ksz8_change_mtu(struct ksz_device *dev, int port, int mtu); #endif diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c index bd3b133e7085..003b0ac2854c 100644 --- a/drivers/net/dsa/microchip/ksz8795.c +++ b/drivers/net/dsa/microchip/ksz8795.c @@ -76,6 +76,57 @@ int ksz8_reset_switch(struct ksz_device *dev) return 0; } +static int ksz8863_change_mtu(struct ksz_device *dev, int frame_size) +{ + u8 ctrl2 = 0; + + if (frame_size <= KSZ8_LEGAL_PACKET_SIZE) + ctrl2 |= KSZ8863_LEGAL_PACKET_ENABLE; + else if (frame_size > KSZ8863_NORMAL_PACKET_SIZE) + ctrl2 |= KSZ8863_HUGE_PACKET_ENABLE; + + return ksz_rmw8(dev, REG_SW_CTRL_2, KSZ8863_LEGAL_PACKET_ENABLE | + KSZ8863_HUGE_PACKET_ENABLE, ctrl2); +} + +static int ksz8795_change_mtu(struct ksz_device *dev, int frame_size) +{ + u8 ctrl1 = 0, ctrl2 = 0; + int ret; + + if (frame_size > KSZ8_LEGAL_PACKET_SIZE) + ctrl2 |= SW_LEGAL_PACKET_DISABLE; + else if (frame_size > KSZ8863_NORMAL_PACKET_SIZE) + ctrl1 |= SW_HUGE_PACKET; + + ret = ksz_rmw8(dev, REG_SW_CTRL_1, SW_HUGE_PACKET, ctrl1); + if (ret) + return ret; + + return ksz_rmw8(dev, REG_SW_CTRL_2, SW_LEGAL_PACKET_DISABLE, ctrl2); +} + +int ksz8_change_mtu(struct ksz_device *dev, int port, int mtu) +{ + u16 frame_size; + + if (!dsa_is_cpu_port(dev->ds, port)) + return 0; + + frame_size = mtu + VLAN_ETH_HLEN + ETH_FCS_LEN; + + switch (dev->chip_id) { + case KSZ8795_CHIP_ID: + case KSZ8794_CHIP_ID: + case KSZ8765_CHIP_ID: + return ksz8795_change_mtu(dev, frame_size); + case KSZ8830_CHIP_ID: + return ksz8863_change_mtu(dev, frame_size); + } + + return -EOPNOTSUPP; +} + static void ksz8795_set_prio_queue(struct ksz_device *dev, int port, int queue) { u8 hi, lo; @@ -1233,8 +1284,6 @@ void ksz8_config_cpu_port(struct dsa_switch *ds) masks = dev->info->masks; regs = dev->info->regs; - /* Switch marks the maximum frame with extra byte as oversize. */ - ksz_cfg(dev, REG_SW_CTRL_2, SW_LEGAL_PACKET_DISABLE, true); ksz_cfg(dev, regs[S_TAIL_TAG_CTRL], masks[SW_TAIL_TAG_ENABLE], true); p = &dev->ports[dev->cpu_port]; @@ -1308,6 +1357,18 @@ int ksz8_setup(struct dsa_switch *ds) struct ksz_device *dev = ds->priv; int i; + ds->mtu_enforcement_ingress = true; + + /* We rely on software untagging on the CPU port, so that we + * can support both tagged and untagged VLANs + */ + ds->untag_bridge_pvid = true; + + /* VLAN filtering is partly controlled by the global VLAN + * Enable flag + */ + ds->vlan_filtering_is_global = true; + ksz_cfg(dev, S_REPLACE_VID_CTRL, SW_FLOW_CTRL, true); /* Enable automatic fast aging when link changed detected. */ @@ -1367,16 +1428,6 @@ int ksz8_switch_init(struct ksz_device *dev) dev->phy_port_cnt = dev->info->port_cnt - 1; dev->port_mask = (BIT(dev->phy_port_cnt) - 1) | dev->info->cpu_ports; - /* We rely on software untagging on the CPU port, so that we - * can support both tagged and untagged VLANs - */ - dev->ds->untag_bridge_pvid = true; - - /* VLAN filtering is partly controlled by the global VLAN - * Enable flag - */ - dev->ds->vlan_filtering_is_global = true; - return 0; } diff --git a/drivers/net/dsa/microchip/ksz8795_reg.h b/drivers/net/dsa/microchip/ksz8795_reg.h index 77487d611824..7a57c6088f80 100644 --- a/drivers/net/dsa/microchip/ksz8795_reg.h +++ b/drivers/net/dsa/microchip/ksz8795_reg.h @@ -48,6 +48,9 @@ #define NO_EXC_COLLISION_DROP BIT(3) #define SW_LEGAL_PACKET_DISABLE BIT(1) +#define KSZ8863_HUGE_PACKET_ENABLE BIT(2) +#define KSZ8863_LEGAL_PACKET_ENABLE BIT(1) + #define REG_SW_CTRL_3 0x05 #define WEIGHTED_FAIR_QUEUE_ENABLE BIT(3) diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c index 0d6b40968657..47b54ecf2c6f 100644 --- a/drivers/net/dsa/microchip/ksz9477.c +++ b/drivers/net/dsa/microchip/ksz9477.c @@ -45,24 +45,15 @@ static void ksz9477_port_cfg32(struct ksz_device *dev, int port, int offset, int ksz9477_change_mtu(struct ksz_device *dev, int port, int mtu) { - u16 frame_size, max_frame = 0; - int i; - - frame_size = mtu + VLAN_ETH_HLEN + ETH_FCS_LEN; + u16 frame_size; - /* Cache the per-port MTU setting */ - dev->ports[port].max_frame = frame_size; + if (!dsa_is_cpu_port(dev->ds, port)) + return 0; - for (i = 0; i < dev->info->port_cnt; i++) - max_frame = max(max_frame, dev->ports[i].max_frame); + frame_size = mtu + VLAN_ETH_HLEN + ETH_FCS_LEN; return regmap_update_bits(dev->regmap[1], REG_SW_MTU__2, - REG_SW_MTU_MASK, max_frame); -} - -int ksz9477_max_mtu(struct ksz_device *dev, int port) -{ - return KSZ9477_MAX_FRAME_SIZE - VLAN_ETH_HLEN - ETH_FCS_LEN; + REG_SW_MTU_MASK, frame_size); } static int ksz9477_wait_vlan_ctrl_ready(struct ksz_device *dev) @@ -1143,6 +1134,8 @@ int ksz9477_setup(struct dsa_switch *ds) struct ksz_device *dev = ds->priv; int ret = 0; + ds->mtu_enforcement_ingress = true; + /* Required for port partitioning. */ ksz9477_cfg32(dev, REG_SW_QM_CTRL__4, UNICAST_VLAN_BOUNDARY, true); diff --git a/drivers/net/dsa/microchip/ksz9477.h b/drivers/net/dsa/microchip/ksz9477.h index 00862c4cfb7f..7c5bb3032772 100644 --- a/drivers/net/dsa/microchip/ksz9477.h +++ b/drivers/net/dsa/microchip/ksz9477.h @@ -50,7 +50,6 @@ int ksz9477_mdb_add(struct ksz_device *dev, int port, int ksz9477_mdb_del(struct ksz_device *dev, int port, const struct switchdev_obj_port_mdb *mdb, struct dsa_db db); int ksz9477_change_mtu(struct ksz_device *dev, int port, int mtu); -int ksz9477_max_mtu(struct ksz_device *dev, int port); void ksz9477_config_cpu_port(struct dsa_switch *ds); int ksz9477_enable_stp_addr(struct ksz_device *dev); int ksz9477_reset_switch(struct ksz_device *dev); diff --git a/drivers/net/dsa/microchip/ksz9477_reg.h b/drivers/net/dsa/microchip/ksz9477_reg.h index 53c68d286dd3..cc457fa64939 100644 --- a/drivers/net/dsa/microchip/ksz9477_reg.h +++ b/drivers/net/dsa/microchip/ksz9477_reg.h @@ -1615,6 +1615,4 @@ #define PTP_TRIG_UNIT_M (BIT(MAX_TRIG_UNIT) - 1) #define PTP_TS_UNIT_M (BIT(MAX_TIMESTAMP_UNIT) - 1) -#define KSZ9477_MAX_FRAME_SIZE 9000 - #endif /* KSZ9477_REGS_H */ diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 8c8db315317d..423f944cc34c 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -14,6 +14,7 @@ #include <linux/phy.h> #include <linux/etherdevice.h> #include <linux/if_bridge.h> +#include <linux/if_vlan.h> #include <linux/irq.h> #include <linux/irqdomain.h> #include <linux/of_mdio.h> @@ -69,6 +70,43 @@ struct ksz_stats_raw { u64 tx_discards; }; +struct ksz88xx_stats_raw { + u64 rx; + u64 rx_hi; + u64 rx_undersize; + u64 rx_fragments; + u64 rx_oversize; + u64 rx_jabbers; + u64 rx_symbol_err; + u64 rx_crc_err; + u64 rx_align_err; + u64 rx_mac_ctrl; + u64 rx_pause; + u64 rx_bcast; + u64 rx_mcast; + u64 rx_ucast; + u64 rx_64_or_less; + u64 rx_65_127; + u64 rx_128_255; + u64 rx_256_511; + u64 rx_512_1023; + u64 rx_1024_1522; + u64 tx; + u64 tx_hi; + u64 tx_late_col; + u64 tx_pause; + u64 tx_bcast; + u64 tx_mcast; + u64 tx_ucast; + u64 tx_deferred; + u64 tx_total_col; + u64 tx_exc_col; + u64 tx_single_col; + u64 tx_mult_col; + u64 rx_discards; + u64 tx_discards; +}; + static const struct ksz_mib_names ksz88xx_mib_names[] = { { 0x00, "rx" }, { 0x01, "rx_hi" }, @@ -155,6 +193,7 @@ static const struct ksz_dev_ops ksz8_dev_ops = { .w_phy = ksz8_w_phy, .r_mib_cnt = ksz8_r_mib_cnt, .r_mib_pkt = ksz8_r_mib_pkt, + .r_mib_stat64 = ksz88xx_r_mib_stats64, .freeze_mib = ksz8_freeze_mib, .port_init_cnt = ksz8_port_init_cnt, .fdb_dump = ksz8_fdb_dump, @@ -171,6 +210,7 @@ static const struct ksz_dev_ops ksz8_dev_ops = { .reset = ksz8_reset_switch, .init = ksz8_switch_init, .exit = ksz8_switch_exit, + .change_mtu = ksz8_change_mtu, }; static void ksz9477_phylink_mac_link_up(struct ksz_device *dev, int port, @@ -206,7 +246,6 @@ static const struct ksz_dev_ops ksz9477_dev_ops = { .mdb_add = ksz9477_mdb_add, .mdb_del = ksz9477_mdb_del, .change_mtu = ksz9477_change_mtu, - .max_mtu = ksz9477_max_mtu, .phylink_mac_link_up = ksz9477_phylink_mac_link_up, .config_cpu_port = ksz9477_config_cpu_port, .enable_stp_addr = ksz9477_enable_stp_addr, @@ -243,7 +282,6 @@ static const struct ksz_dev_ops lan937x_dev_ops = { .mdb_add = ksz9477_mdb_add, .mdb_del = ksz9477_mdb_del, .change_mtu = lan937x_change_mtu, - .max_mtu = ksz9477_max_mtu, .phylink_mac_link_up = ksz9477_phylink_mac_link_up, .config_cpu_port = lan937x_config_cpu_port, .enable_stp_addr = ksz9477_enable_stp_addr, @@ -1583,6 +1621,55 @@ void ksz_r_mib_stats64(struct ksz_device *dev, int port) spin_unlock(&mib->stats64_lock); } +void ksz88xx_r_mib_stats64(struct ksz_device *dev, int port) +{ + struct ethtool_pause_stats *pstats; + struct rtnl_link_stats64 *stats; + struct ksz88xx_stats_raw *raw; + struct ksz_port_mib *mib; + + mib = &dev->ports[port].mib; + stats = &mib->stats64; + pstats = &mib->pause_stats; + raw = (struct ksz88xx_stats_raw *)mib->counters; + + spin_lock(&mib->stats64_lock); + + stats->rx_packets = raw->rx_bcast + raw->rx_mcast + raw->rx_ucast + + raw->rx_pause; + stats->tx_packets = raw->tx_bcast + raw->tx_mcast + raw->tx_ucast + + raw->tx_pause; + + /* HW counters are counting bytes + FCS which is not acceptable + * for rtnl_link_stats64 interface + */ + stats->rx_bytes = raw->rx + raw->rx_hi - stats->rx_packets * ETH_FCS_LEN; + stats->tx_bytes = raw->tx + raw->tx_hi - stats->tx_packets * ETH_FCS_LEN; + + stats->rx_length_errors = raw->rx_undersize + raw->rx_fragments + + raw->rx_oversize; + + stats->rx_crc_errors = raw->rx_crc_err; + stats->rx_frame_errors = raw->rx_align_err; + stats->rx_dropped = raw->rx_discards; + stats->rx_errors = stats->rx_length_errors + stats->rx_crc_errors + + stats->rx_frame_errors + stats->rx_dropped; + + stats->tx_window_errors = raw->tx_late_col; + stats->tx_fifo_errors = raw->tx_discards; + stats->tx_aborted_errors = raw->tx_exc_col; + stats->tx_errors = stats->tx_window_errors + stats->tx_fifo_errors + + stats->tx_aborted_errors; + + stats->multicast = raw->rx_mcast; + stats->collisions = raw->tx_total_col; + + pstats->tx_pause_frames = raw->tx_pause; + pstats->rx_pause_frames = raw->rx_pause; + + spin_unlock(&mib->stats64_lock); +} + static void ksz_get_stats64(struct dsa_switch *ds, int port, struct rtnl_link_stats64 *s) { @@ -2500,10 +2587,29 @@ static int ksz_max_mtu(struct dsa_switch *ds, int port) { struct ksz_device *dev = ds->priv; - if (!dev->dev_ops->max_mtu) - return -EOPNOTSUPP; + switch (dev->chip_id) { + case KSZ8795_CHIP_ID: + case KSZ8794_CHIP_ID: + case KSZ8765_CHIP_ID: + return KSZ8795_HUGE_PACKET_SIZE - VLAN_ETH_HLEN - ETH_FCS_LEN; + case KSZ8830_CHIP_ID: + return KSZ8863_HUGE_PACKET_SIZE - VLAN_ETH_HLEN - ETH_FCS_LEN; + case KSZ8563_CHIP_ID: + case KSZ9477_CHIP_ID: + case KSZ9563_CHIP_ID: + case KSZ9567_CHIP_ID: + case KSZ9893_CHIP_ID: + case KSZ9896_CHIP_ID: + case KSZ9897_CHIP_ID: + case LAN9370_CHIP_ID: + case LAN9371_CHIP_ID: + case LAN9372_CHIP_ID: + case LAN9373_CHIP_ID: + case LAN9374_CHIP_ID: + return KSZ9477_MAX_FRAME_SIZE - VLAN_ETH_HLEN - ETH_FCS_LEN; + } - return dev->dev_ops->max_mtu(dev, port); + return -EOPNOTSUPP; } static void ksz_set_xmii(struct ksz_device *dev, int port, diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h index c6726cbd5465..055d61ff3fb8 100644 --- a/drivers/net/dsa/microchip/ksz_common.h +++ b/drivers/net/dsa/microchip/ksz_common.h @@ -95,7 +95,6 @@ struct ksz_port { struct ksz_port_mib mib; phy_interface_t interface; - u16 max_frame; u32 rgmii_tx_val; u32 rgmii_rx_val; struct ksz_device *ksz_dev; @@ -322,7 +321,6 @@ struct ksz_dev_ops { void (*get_caps)(struct ksz_device *dev, int port, struct phylink_config *config); int (*change_mtu)(struct ksz_device *dev, int port, int mtu); - int (*max_mtu)(struct ksz_device *dev, int port); void (*freeze_mib)(struct ksz_device *dev, int port, bool freeze); void (*port_init_cnt)(struct ksz_device *dev, int port); void (*phylink_mac_config)(struct ksz_device *dev, int port, @@ -347,6 +345,7 @@ void ksz_switch_remove(struct ksz_device *dev); void ksz_init_mib_timer(struct ksz_device *dev); void ksz_r_mib_stats64(struct ksz_device *dev, int port); +void ksz88xx_r_mib_stats64(struct ksz_device *dev, int port); void ksz_port_stp_state_set(struct dsa_switch *ds, int port, u8 state); bool ksz_get_gbit(struct ksz_device *dev, int port); phy_interface_t ksz_get_xmii(struct ksz_device *dev, int port, bool gbit); @@ -456,6 +455,11 @@ static inline int ksz_write64(struct ksz_device *dev, u32 reg, u64 value) return regmap_bulk_write(dev->regmap[2], reg, val, 2); } +static inline int ksz_rmw8(struct ksz_device *dev, int offset, u8 mask, u8 val) +{ + return regmap_update_bits(dev->regmap[0], offset, mask, val); +} + static inline int ksz_pread8(struct ksz_device *dev, int port, int offset, u8 *data) { @@ -588,6 +592,12 @@ static inline int is_lan937x(struct ksz_device *dev) #define PORT_SRC_PHY_INT 1 +#define KSZ8795_HUGE_PACKET_SIZE 2000 +#define KSZ8863_HUGE_PACKET_SIZE 1916 +#define KSZ8863_NORMAL_PACKET_SIZE 1536 +#define KSZ8_LEGAL_PACKET_SIZE 1518 +#define KSZ9477_MAX_FRAME_SIZE 9000 + /* Regmap tables generation */ #define KSZ_SPI_OP_RD 3 #define KSZ_SPI_OP_WR 2 diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index ccfa4751d3b7..ba4fff8690aa 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -833,10 +833,13 @@ static void mv88e6xxx_get_caps(struct dsa_switch *ds, int port, chip->info->ops->phylink_get_caps(chip, port, config); - /* Internal ports need GMII for PHYLIB */ - if (mv88e6xxx_phy_is_internal(ds, port)) + if (mv88e6xxx_phy_is_internal(ds, port)) { + __set_bit(PHY_INTERFACE_MODE_INTERNAL, + config->supported_interfaces); + /* Internal ports with no phy-mode need GMII for PHYLIB */ __set_bit(PHY_INTERFACE_MODE_GMII, config->supported_interfaces); + } } static void mv88e6xxx_mac_config(struct dsa_switch *ds, int port, diff --git a/drivers/net/dsa/sja1105/sja1105_devlink.c b/drivers/net/dsa/sja1105/sja1105_devlink.c index da532614f34a..30b1f1ba762f 100644 --- a/drivers/net/dsa/sja1105/sja1105_devlink.c +++ b/drivers/net/dsa/sja1105/sja1105_devlink.c @@ -95,6 +95,8 @@ static int sja1105_setup_devlink_regions(struct dsa_switch *ds) if (IS_ERR(region)) { while (--i >= 0) dsa_devlink_region_destroy(priv->regions[i]); + + kfree(priv->regions); return PTR_ERR(region); } diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 412666111b0c..b70dcf32a26d 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -1038,7 +1038,7 @@ static int sja1105_init_l2_policing(struct sja1105_private *priv) policing[bcast].sharindx = port; /* Only SJA1110 has multicast policers */ - if (mcast <= table->ops->max_entry_count) + if (mcast < table->ops->max_entry_count) policing[mcast].sharindx = port; } diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c index e104fb02817d..aa0d2f3aaeaa 100644 --- a/drivers/net/ethernet/aeroflex/greth.c +++ b/drivers/net/ethernet/aeroflex/greth.c @@ -258,6 +258,7 @@ static int greth_init_rings(struct greth_private *greth) if (dma_mapping_error(greth->dev, dma_addr)) { if (netif_msg_ifup(greth)) dev_err(greth->dev, "Could not create initial DMA mapping\n"); + dev_kfree_skb(skb); goto cleanup; } greth->rx_skbuff[i] = skb; diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index 55dfdb34e37b..f4ca0c6c0f51 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig @@ -71,13 +71,14 @@ config BCM63XX_ENET config BCMGENET tristate "Broadcom GENET internal MAC support" depends on HAS_IOMEM + depends on PTP_1588_CLOCK_OPTIONAL || !ARCH_BCM2835 select MII select PHYLIB select FIXED_PHY select BCM7XXX_PHY select MDIO_BCM_UNIMAC select DIMLIB - select BROADCOM_PHY if (ARCH_BCM2835 && PTP_1588_CLOCK_OPTIONAL) + select BROADCOM_PHY if ARCH_BCM2835 help This driver supports the built-in Ethernet MACs found in the Broadcom BCM7xxx Set Top Box family chipset. diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index dbe310144780..9f473854b0f4 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -3045,7 +3045,7 @@ error: dma_unmap_single(&bp->pdev->dev, dma_addr, bp->rx_buf_use_size, DMA_FROM_DEVICE); - skb = build_skb(data, 0); + skb = slab_build_skb(data); if (!skb) { kfree(data); goto error; diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index a8ce8d0cf9c4..21973046b12b 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -117,24 +117,6 @@ static inline void dmadesc_set(struct bcmgenet_priv *priv, dmadesc_set_length_status(priv, d, val); } -static inline dma_addr_t dmadesc_get_addr(struct bcmgenet_priv *priv, - void __iomem *d) -{ - dma_addr_t addr; - - addr = bcmgenet_readl(d + DMA_DESC_ADDRESS_LO); - - /* Register writes to GISB bus can take couple hundred nanoseconds - * and are done for each packet, save these expensive writes unless - * the platform is explicitly configured for 64-bits/LPAE. - */ -#ifdef CONFIG_PHYS_ADDR_T_64BIT - if (priv->hw_params->flags & GENET_HAS_40BITS) - addr |= (u64)bcmgenet_readl(d + DMA_DESC_ADDRESS_HI) << 32; -#endif - return addr; -} - #define GENET_VER_FMT "%1d.%1d EPHY: 0x%04x" #define GENET_MSG_DEFAULT (NETIF_MSG_DRV | NETIF_MSG_PROBE | \ diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 98f3dc460ca7..f2f95493ec89 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -2239,7 +2239,7 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = register_netdev(netdev); if (err) { dev_err(dev, "Failed to register netdevice\n"); - goto err_unregister_interrupts; + goto err_destroy_workqueue; } nic->msg_enable = debug; @@ -2248,6 +2248,8 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; +err_destroy_workqueue: + destroy_workqueue(nic->nicvf_rx_mode_wq); err_unregister_interrupts: nicvf_unregister_interrupts(nic); err_free_netdev: diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c index cacd454ac696..c39b866e2582 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch-flower.c @@ -132,6 +132,7 @@ int dpaa2_switch_acl_entry_add(struct dpaa2_switch_filter_block *filter_block, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(dev, acl_entry_cfg->key_iova))) { dev_err(dev, "DMA mapping failed\n"); + kfree(cmd_buff); return -EFAULT; } @@ -142,6 +143,7 @@ int dpaa2_switch_acl_entry_add(struct dpaa2_switch_filter_block *filter_block, DMA_TO_DEVICE); if (err) { dev_err(dev, "dpsw_acl_add_entry() failed %d\n", err); + kfree(cmd_buff); return err; } @@ -172,6 +174,7 @@ dpaa2_switch_acl_entry_remove(struct dpaa2_switch_filter_block *block, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(dev, acl_entry_cfg->key_iova))) { dev_err(dev, "DMA mapping failed\n"); + kfree(cmd_buff); return -EFAULT; } @@ -182,6 +185,7 @@ dpaa2_switch_acl_entry_remove(struct dpaa2_switch_filter_block *block, DMA_TO_DEVICE); if (err) { dev_err(dev, "dpsw_acl_remove_entry() failed %d\n", err); + kfree(cmd_buff); return err; } diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 9471baa11d39..5528b0af82ae 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1216,7 +1216,8 @@ fec_restart(struct net_device *ndev) writel(0, fep->hwp + FEC_IMASK); /* Init the interrupt coalescing */ - fec_enet_itr_coal_set(ndev); + if (fep->quirks & FEC_QUIRK_HAS_COALESCE) + fec_enet_itr_coal_set(ndev); } static int fec_enet_ipc_handle_init(struct fec_enet_private *fep) diff --git a/drivers/net/ethernet/hisilicon/hisi_femac.c b/drivers/net/ethernet/hisilicon/hisi_femac.c index 93846bace028..ce2571c16e43 100644 --- a/drivers/net/ethernet/hisilicon/hisi_femac.c +++ b/drivers/net/ethernet/hisilicon/hisi_femac.c @@ -283,7 +283,7 @@ static int hisi_femac_rx(struct net_device *dev, int limit) skb->protocol = eth_type_trans(skb, dev); napi_gro_receive(&priv->napi, skb); dev->stats.rx_packets++; - dev->stats.rx_bytes += skb->len; + dev->stats.rx_bytes += len; next: pos = (pos + 1) % rxq->num; if (rx_pkts_num >= limit) diff --git a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c index ffcf797dfa90..f867e9531117 100644 --- a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c +++ b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c @@ -550,7 +550,7 @@ static int hix5hd2_rx(struct net_device *dev, int limit) skb->protocol = eth_type_trans(skb, dev); napi_gro_receive(&priv->napi, skb); dev->stats.rx_packets++; - dev->stats.rx_bytes += skb->len; + dev->stats.rx_bytes += len; next: pos = dma_ring_incr(pos, RX_DESC_NUM); } diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 36bc4fd91ef4..04acd1a992fa 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -5931,9 +5931,9 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb, e1000_tx_queue(tx_ring, tx_flags, count); /* Make sure there is space in the ring for the next send. */ e1000_maybe_stop_tx(tx_ring, - (MAX_SKB_FRAGS * + ((MAX_SKB_FRAGS + 1) * DIV_ROUND_UP(PAGE_SIZE, - adapter->tx_fifo_limit) + 2)); + adapter->tx_fifo_limit) + 4)); if (!netdev_xmit_more() || netif_xmit_stopped(netdev_get_tx_queue(netdev, 0))) { diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 616d27ec3226..887a735fe2a7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -4466,11 +4466,7 @@ static int i40e_check_fdir_input_set(struct i40e_vsi *vsi, return -EOPNOTSUPP; /* First 4 bytes of L4 header */ - if (usr_ip4_spec->l4_4_bytes == htonl(0xFFFFFFFF)) - new_mask |= I40E_L4_SRC_MASK | I40E_L4_DST_MASK; - else if (!usr_ip4_spec->l4_4_bytes) - new_mask &= ~(I40E_L4_SRC_MASK | I40E_L4_DST_MASK); - else + if (usr_ip4_spec->l4_4_bytes) return -EOPNOTSUPP; /* Filtering on Type of Service is not supported. */ @@ -4509,11 +4505,7 @@ static int i40e_check_fdir_input_set(struct i40e_vsi *vsi, else return -EOPNOTSUPP; - if (usr_ip6_spec->l4_4_bytes == htonl(0xFFFFFFFF)) - new_mask |= I40E_L4_SRC_MASK | I40E_L4_DST_MASK; - else if (!usr_ip6_spec->l4_4_bytes) - new_mask &= ~(I40E_L4_SRC_MASK | I40E_L4_DST_MASK); - else + if (usr_ip6_spec->l4_4_bytes) return -EOPNOTSUPP; /* Filtering on Traffic class is not supported. */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 6861b3e2ced3..95485b56d6c3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -10656,6 +10656,21 @@ static int i40e_rebuild_channels(struct i40e_vsi *vsi) } /** + * i40e_clean_xps_state - clean xps state for every tx_ring + * @vsi: ptr to the VSI + **/ +static void i40e_clean_xps_state(struct i40e_vsi *vsi) +{ + int i; + + if (vsi->tx_rings) + for (i = 0; i < vsi->num_queue_pairs; i++) + if (vsi->tx_rings[i]) + clear_bit(__I40E_TX_XPS_INIT_DONE, + vsi->tx_rings[i]->state); +} + +/** * i40e_prep_for_reset - prep for the core to reset * @pf: board private structure * @@ -10679,8 +10694,10 @@ static void i40e_prep_for_reset(struct i40e_pf *pf) i40e_pf_quiesce_all_vsi(pf); for (v = 0; v < pf->num_alloc_vsi; v++) { - if (pf->vsi[v]) + if (pf->vsi[v]) { + i40e_clean_xps_state(pf->vsi[v]); pf->vsi[v]->seid = 0; + } } i40e_shutdown_adminq(&pf->hw); diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 72ddcefc45b1..635f93d60318 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -1578,6 +1578,7 @@ bool i40e_reset_vf(struct i40e_vf *vf, bool flr) i40e_cleanup_reset_vf(vf); i40e_flush(hw); + usleep_range(20000, 40000); clear_bit(I40E_VF_STATE_RESETTING, &vf->vf_states); return true; @@ -1701,6 +1702,7 @@ bool i40e_reset_all_vfs(struct i40e_pf *pf, bool flr) } i40e_flush(hw); + usleep_range(20000, 40000); clear_bit(__I40E_VF_DISABLE, pf->state); return true; diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 2b23b4714a26..a9a7f8b52140 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -1111,8 +1111,7 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up, if (link_up == old_link && link_speed == old_link_speed) return 0; - if (!ice_is_e810(&pf->hw)) - ice_ptp_link_change(pf, pf->hw.pf_id, link_up); + ice_ptp_link_change(pf, pf->hw.pf_id, link_up); if (ice_is_dcb_active(pf)) { if (test_bit(ICE_FLAG_DCB_ENA, pf->flags)) @@ -6340,8 +6339,7 @@ static int ice_up_complete(struct ice_vsi *vsi) ice_print_link_msg(vsi, true); netif_tx_start_all_queues(vsi->netdev); netif_carrier_on(vsi->netdev); - if (!ice_is_e810(&pf->hw)) - ice_ptp_link_change(pf, pf->hw.pf_id, true); + ice_ptp_link_change(pf, pf->hw.pf_id, true); } /* Perform an initial read of the statistics registers now to @@ -6773,8 +6771,7 @@ int ice_down(struct ice_vsi *vsi) if (vsi->netdev && vsi->type == ICE_VSI_PF) { vlan_err = ice_vsi_del_vlan_zero(vsi); - if (!ice_is_e810(&vsi->back->hw)) - ice_ptp_link_change(vsi->back, vsi->back->hw.pf_id, false); + ice_ptp_link_change(vsi->back, vsi->back->hw.pf_id, false); netif_carrier_off(vsi->netdev); netif_tx_disable(vsi->netdev); } else if (vsi->type == ICE_VSI_SWITCHDEV_CTRL) { diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 13e75279e71c..d63161d73eb1 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -600,6 +600,23 @@ static u64 ice_ptp_extend_40b_ts(struct ice_pf *pf, u64 in_tstamp) } /** + * ice_ptp_is_tx_tracker_up - Check if Tx tracker is ready for new timestamps + * @tx: the PTP Tx timestamp tracker to check + * + * Check that a given PTP Tx timestamp tracker is up, i.e. that it is ready + * to accept new timestamp requests. + * + * Assumes the tx->lock spinlock is already held. + */ +static bool +ice_ptp_is_tx_tracker_up(struct ice_ptp_tx *tx) +{ + lockdep_assert_held(&tx->lock); + + return tx->init && !tx->calibrating; +} + +/** * ice_ptp_tx_tstamp - Process Tx timestamps for a port * @tx: the PTP Tx timestamp tracker * @@ -608,11 +625,13 @@ static u64 ice_ptp_extend_40b_ts(struct ice_pf *pf, u64 in_tstamp) * * If a given index has a valid timestamp, perform the following steps: * - * 1) copy the timestamp out of the PHY register - * 4) clear the timestamp valid bit in the PHY register - * 5) unlock the index by clearing the associated in_use bit. - * 2) extend the 40b timestamp value to get a 64bit timestamp - * 3) send that timestamp to the stack + * 1) check that the timestamp request is not stale + * 2) check that a timestamp is ready and available in the PHY memory bank + * 3) read and copy the timestamp out of the PHY register + * 4) unlock the index by clearing the associated in_use bit + * 5) check if the timestamp is stale, and discard if so + * 6) extend the 40 bit timestamp value to get a 64 bit timestamp value + * 7) send this 64 bit timestamp to the stack * * Returns true if all timestamps were handled, and false if any slots remain * without a timestamp. @@ -623,24 +642,45 @@ static u64 ice_ptp_extend_40b_ts(struct ice_pf *pf, u64 in_tstamp) * interrupt. In some cases hardware might not interrupt us again when the * timestamp is captured. * - * Note that we only take the tracking lock when clearing the bit and when - * checking if we need to re-queue this task. The only place where bits can be - * set is the hard xmit routine where an SKB has a request flag set. The only - * places where we clear bits are this work function, or the periodic cleanup - * thread. If the cleanup thread clears a bit we're processing we catch it - * when we lock to clear the bit and then grab the SKB pointer. If a Tx thread - * starts a new timestamp, we might not begin processing it right away but we - * will notice it at the end when we re-queue the task. If a Tx thread starts - * a new timestamp just after this function exits without re-queuing, - * the interrupt when the timestamp finishes should trigger. Avoiding holding - * the lock for the entire function is important in order to ensure that Tx - * threads do not get blocked while waiting for the lock. + * Note that we do not hold the tracking lock while reading the Tx timestamp. + * This is because reading the timestamp requires taking a mutex that might + * sleep. + * + * The only place where we set in_use is when a new timestamp is initiated + * with a slot index. This is only called in the hard xmit routine where an + * SKB has a request flag set. The only places where we clear this bit is this + * function, or during teardown when the Tx timestamp tracker is being + * removed. A timestamp index will never be re-used until the in_use bit for + * that index is cleared. + * + * If a Tx thread starts a new timestamp, we might not begin processing it + * right away but we will notice it at the end when we re-queue the task. + * + * If a Tx thread starts a new timestamp just after this function exits, the + * interrupt for that timestamp should re-trigger this function once + * a timestamp is ready. + * + * In cases where the PTP hardware clock was directly adjusted, some + * timestamps may not be able to safely use the timestamp extension math. In + * this case, software will set the stale bit for any outstanding Tx + * timestamps when the clock is adjusted. Then this function will discard + * those captured timestamps instead of sending them to the stack. + * + * If a Tx packet has been waiting for more than 2 seconds, it is not possible + * to correctly extend the timestamp using the cached PHC time. It is + * extremely unlikely that a packet will ever take this long to timestamp. If + * we detect a Tx timestamp request that has waited for this long we assume + * the packet will never be sent by hardware and discard it without reading + * the timestamp register. */ static bool ice_ptp_tx_tstamp(struct ice_ptp_tx *tx) { struct ice_ptp_port *ptp_port; - bool ts_handled = true; + bool more_timestamps; struct ice_pf *pf; + struct ice_hw *hw; + u64 tstamp_ready; + int err; u8 idx; if (!tx->init) @@ -648,44 +688,86 @@ static bool ice_ptp_tx_tstamp(struct ice_ptp_tx *tx) ptp_port = container_of(tx, struct ice_ptp_port, tx); pf = ptp_port_to_pf(ptp_port); + hw = &pf->hw; + + /* Read the Tx ready status first */ + err = ice_get_phy_tx_tstamp_ready(hw, tx->block, &tstamp_ready); + if (err) + return false; for_each_set_bit(idx, tx->in_use, tx->len) { struct skb_shared_hwtstamps shhwtstamps = {}; - u8 phy_idx = idx + tx->quad_offset; - u64 raw_tstamp, tstamp; + u8 phy_idx = idx + tx->offset; + u64 raw_tstamp = 0, tstamp; + bool drop_ts = false; struct sk_buff *skb; - int err; + + /* Drop packets which have waited for more than 2 seconds */ + if (time_is_before_jiffies(tx->tstamps[idx].start + 2 * HZ)) { + drop_ts = true; + + /* Count the number of Tx timestamps that timed out */ + pf->ptp.tx_hwtstamp_timeouts++; + } + + /* Only read a timestamp from the PHY if its marked as ready + * by the tstamp_ready register. This avoids unnecessary + * reading of timestamps which are not yet valid. This is + * important as we must read all timestamps which are valid + * and only timestamps which are valid during each interrupt. + * If we do not, the hardware logic for generating a new + * interrupt can get stuck on some devices. + */ + if (!(tstamp_ready & BIT_ULL(phy_idx))) { + if (drop_ts) + goto skip_ts_read; + + continue; + } ice_trace(tx_tstamp_fw_req, tx->tstamps[idx].skb, idx); - err = ice_read_phy_tstamp(&pf->hw, tx->quad, phy_idx, - &raw_tstamp); + err = ice_read_phy_tstamp(hw, tx->block, phy_idx, &raw_tstamp); if (err) continue; ice_trace(tx_tstamp_fw_done, tx->tstamps[idx].skb, idx); - /* Check if the timestamp is invalid or stale */ - if (!(raw_tstamp & ICE_PTP_TS_VALID) || + /* For PHYs which don't implement a proper timestamp ready + * bitmap, verify that the timestamp value is different + * from the last cached timestamp. If it is not, skip this for + * now assuming it hasn't yet been captured by hardware. + */ + if (!drop_ts && tx->verify_cached && raw_tstamp == tx->tstamps[idx].cached_tstamp) continue; - /* The timestamp is valid, so we'll go ahead and clear this - * index and then send the timestamp up to the stack. - */ + /* Discard any timestamp value without the valid bit set */ + if (!(raw_tstamp & ICE_PTP_TS_VALID)) + drop_ts = true; + +skip_ts_read: spin_lock(&tx->lock); - tx->tstamps[idx].cached_tstamp = raw_tstamp; + if (tx->verify_cached && raw_tstamp) + tx->tstamps[idx].cached_tstamp = raw_tstamp; clear_bit(idx, tx->in_use); skb = tx->tstamps[idx].skb; tx->tstamps[idx].skb = NULL; + if (test_and_clear_bit(idx, tx->stale)) + drop_ts = true; spin_unlock(&tx->lock); - /* it's (unlikely but) possible we raced with the cleanup - * thread for discarding old timestamp requests. + /* It is unlikely but possible that the SKB will have been + * flushed at this point due to link change or teardown. */ if (!skb) continue; + if (drop_ts) { + dev_kfree_skb_any(skb); + continue; + } + /* Extend the timestamp using cached PHC time */ tstamp = ice_ptp_extend_40b_ts(pf, raw_tstamp); if (tstamp) { @@ -701,11 +783,10 @@ static bool ice_ptp_tx_tstamp(struct ice_ptp_tx *tx) * poll for remaining timestamps. */ spin_lock(&tx->lock); - if (!bitmap_empty(tx->in_use, tx->len)) - ts_handled = false; + more_timestamps = tx->init && !bitmap_empty(tx->in_use, tx->len); spin_unlock(&tx->lock); - return ts_handled; + return !more_timestamps; } /** @@ -713,26 +794,33 @@ static bool ice_ptp_tx_tstamp(struct ice_ptp_tx *tx) * @tx: Tx tracking structure to initialize * * Assumes that the length has already been initialized. Do not call directly, - * use the ice_ptp_init_tx_e822 or ice_ptp_init_tx_e810 instead. + * use the ice_ptp_init_tx_* instead. */ static int ice_ptp_alloc_tx_tracker(struct ice_ptp_tx *tx) { - tx->tstamps = kcalloc(tx->len, sizeof(*tx->tstamps), GFP_KERNEL); - if (!tx->tstamps) - return -ENOMEM; + unsigned long *in_use, *stale; + struct ice_tx_tstamp *tstamps; + + tstamps = kcalloc(tx->len, sizeof(*tstamps), GFP_KERNEL); + in_use = bitmap_zalloc(tx->len, GFP_KERNEL); + stale = bitmap_zalloc(tx->len, GFP_KERNEL); + + if (!tstamps || !in_use || !stale) { + kfree(tstamps); + bitmap_free(in_use); + bitmap_free(stale); - tx->in_use = bitmap_zalloc(tx->len, GFP_KERNEL); - if (!tx->in_use) { - kfree(tx->tstamps); - tx->tstamps = NULL; return -ENOMEM; } - spin_lock_init(&tx->lock); - + tx->tstamps = tstamps; + tx->in_use = in_use; + tx->stale = stale; tx->init = 1; + spin_lock_init(&tx->lock); + return 0; } @@ -740,31 +828,71 @@ ice_ptp_alloc_tx_tracker(struct ice_ptp_tx *tx) * ice_ptp_flush_tx_tracker - Flush any remaining timestamps from the tracker * @pf: Board private structure * @tx: the tracker to flush + * + * Called during teardown when a Tx tracker is being removed. */ static void ice_ptp_flush_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx) { + struct ice_hw *hw = &pf->hw; + u64 tstamp_ready; + int err; u8 idx; - for (idx = 0; idx < tx->len; idx++) { - u8 phy_idx = idx + tx->quad_offset; + err = ice_get_phy_tx_tstamp_ready(hw, tx->block, &tstamp_ready); + if (err) { + dev_dbg(ice_pf_to_dev(pf), "Failed to get the Tx tstamp ready bitmap for block %u, err %d\n", + tx->block, err); + + /* If we fail to read the Tx timestamp ready bitmap just + * skip clearing the PHY timestamps. + */ + tstamp_ready = 0; + } + + for_each_set_bit(idx, tx->in_use, tx->len) { + u8 phy_idx = idx + tx->offset; + struct sk_buff *skb; + + /* In case this timestamp is ready, we need to clear it. */ + if (!hw->reset_ongoing && (tstamp_ready & BIT_ULL(phy_idx))) + ice_clear_phy_tstamp(hw, tx->block, phy_idx); spin_lock(&tx->lock); - if (tx->tstamps[idx].skb) { - dev_kfree_skb_any(tx->tstamps[idx].skb); - tx->tstamps[idx].skb = NULL; - pf->ptp.tx_hwtstamp_flushed++; - } + skb = tx->tstamps[idx].skb; + tx->tstamps[idx].skb = NULL; clear_bit(idx, tx->in_use); + clear_bit(idx, tx->stale); spin_unlock(&tx->lock); - /* Clear any potential residual timestamp in the PHY block */ - if (!pf->hw.reset_ongoing) - ice_clear_phy_tstamp(&pf->hw, tx->quad, phy_idx); + /* Count the number of Tx timestamps flushed */ + pf->ptp.tx_hwtstamp_flushed++; + + /* Free the SKB after we've cleared the bit */ + dev_kfree_skb_any(skb); } } /** + * ice_ptp_mark_tx_tracker_stale - Mark unfinished timestamps as stale + * @tx: the tracker to mark + * + * Mark currently outstanding Tx timestamps as stale. This prevents sending + * their timestamp value to the stack. This is required to prevent extending + * the 40bit hardware timestamp incorrectly. + * + * This should be called when the PTP clock is modified such as after a set + * time request. + */ +static void +ice_ptp_mark_tx_tracker_stale(struct ice_ptp_tx *tx) +{ + spin_lock(&tx->lock); + bitmap_or(tx->stale, tx->stale, tx->in_use, tx->len); + spin_unlock(&tx->lock); +} + +/** * ice_ptp_release_tx_tracker - Release allocated memory for Tx tracker * @pf: Board private structure * @tx: Tx tracking structure to release @@ -774,7 +902,12 @@ ice_ptp_flush_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx) static void ice_ptp_release_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx) { + spin_lock(&tx->lock); tx->init = 0; + spin_unlock(&tx->lock); + + /* wait for potentially outstanding interrupt to complete */ + synchronize_irq(pf->msix_entries[pf->oicr_idx].vector); ice_ptp_flush_tx_tracker(pf, tx); @@ -784,6 +917,9 @@ ice_ptp_release_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx) bitmap_free(tx->in_use); tx->in_use = NULL; + bitmap_free(tx->stale); + tx->stale = NULL; + tx->len = 0; } @@ -801,9 +937,10 @@ ice_ptp_release_tx_tracker(struct ice_pf *pf, struct ice_ptp_tx *tx) static int ice_ptp_init_tx_e822(struct ice_pf *pf, struct ice_ptp_tx *tx, u8 port) { - tx->quad = port / ICE_PORTS_PER_QUAD; - tx->quad_offset = (port % ICE_PORTS_PER_QUAD) * INDEX_PER_PORT; - tx->len = INDEX_PER_PORT; + tx->block = port / ICE_PORTS_PER_QUAD; + tx->offset = (port % ICE_PORTS_PER_QUAD) * INDEX_PER_PORT_E822; + tx->len = INDEX_PER_PORT_E822; + tx->verify_cached = 0; return ice_ptp_alloc_tx_tracker(tx); } @@ -819,59 +956,19 @@ ice_ptp_init_tx_e822(struct ice_pf *pf, struct ice_ptp_tx *tx, u8 port) static int ice_ptp_init_tx_e810(struct ice_pf *pf, struct ice_ptp_tx *tx) { - tx->quad = pf->hw.port_info->lport; - tx->quad_offset = 0; - tx->len = INDEX_PER_QUAD; + tx->block = pf->hw.port_info->lport; + tx->offset = 0; + tx->len = INDEX_PER_PORT_E810; + /* The E810 PHY does not provide a timestamp ready bitmap. Instead, + * verify new timestamps against cached copy of the last read + * timestamp. + */ + tx->verify_cached = 1; return ice_ptp_alloc_tx_tracker(tx); } /** - * ice_ptp_tx_tstamp_cleanup - Cleanup old timestamp requests that got dropped - * @pf: pointer to the PF struct - * @tx: PTP Tx tracker to clean up - * - * Loop through the Tx timestamp requests and see if any of them have been - * waiting for a long time. Discard any SKBs that have been waiting for more - * than 2 seconds. This is long enough to be reasonably sure that the - * timestamp will never be captured. This might happen if the packet gets - * discarded before it reaches the PHY timestamping block. - */ -static void ice_ptp_tx_tstamp_cleanup(struct ice_pf *pf, struct ice_ptp_tx *tx) -{ - struct ice_hw *hw = &pf->hw; - u8 idx; - - if (!tx->init) - return; - - for_each_set_bit(idx, tx->in_use, tx->len) { - struct sk_buff *skb; - u64 raw_tstamp; - - /* Check if this SKB has been waiting for too long */ - if (time_is_after_jiffies(tx->tstamps[idx].start + 2 * HZ)) - continue; - - /* Read tstamp to be able to use this register again */ - ice_read_phy_tstamp(hw, tx->quad, idx + tx->quad_offset, - &raw_tstamp); - - spin_lock(&tx->lock); - skb = tx->tstamps[idx].skb; - tx->tstamps[idx].skb = NULL; - clear_bit(idx, tx->in_use); - spin_unlock(&tx->lock); - - /* Count the number of Tx timestamps which have timed out */ - pf->ptp.tx_hwtstamp_timeouts++; - - /* Free the SKB after we've cleared the bit */ - dev_kfree_skb_any(skb); - } -} - -/** * ice_ptp_update_cached_phctime - Update the cached PHC time values * @pf: Board specific private structure * @@ -941,20 +1038,13 @@ static int ice_ptp_update_cached_phctime(struct ice_pf *pf) * @pf: Board specific private structure * * This function must be called when the cached PHC time is no longer valid, - * such as after a time adjustment. It discards any outstanding Tx timestamps, - * and updates the cached PHC time for both the PF and Rx rings. If updating - * the PHC time cannot be done immediately, a warning message is logged and - * the work item is scheduled. - * - * These steps are required in order to ensure that we do not accidentally - * report a timestamp extended by the wrong PHC cached copy. Note that we - * do not directly update the cached timestamp here because it is possible - * this might produce an error when ICE_CFG_BUSY is set. If this occurred, we - * would have to try again. During that time window, timestamps might be - * requested and returned with an invalid extension. Thus, on failure to - * immediately update the cached PHC time we would need to zero the value - * anyways. For this reason, we just zero the value immediately and queue the - * update work item. + * such as after a time adjustment. It marks any currently outstanding Tx + * timestamps as stale and updates the cached PHC time for both the PF and Rx + * rings. + * + * If updating the PHC time cannot be done immediately, a warning message is + * logged and the work item is scheduled immediately to minimize the window + * with a wrong cached timestamp. */ static void ice_ptp_reset_cached_phctime(struct ice_pf *pf) { @@ -978,8 +1068,12 @@ static void ice_ptp_reset_cached_phctime(struct ice_pf *pf) msecs_to_jiffies(10)); } - /* Flush any outstanding Tx timestamps */ - ice_ptp_flush_tx_tracker(pf, &pf->ptp.port.tx); + /* Mark any outstanding timestamps as stale, since they might have + * been captured in hardware before the time update. This could lead + * to us extending them with the wrong cached value resulting in + * incorrect timestamp values. + */ + ice_ptp_mark_tx_tracker_stale(&pf->ptp.port.tx); } /** @@ -1060,19 +1154,6 @@ static u64 ice_base_incval(struct ice_pf *pf) } /** - * ice_ptp_reset_ts_memory_quad - Reset timestamp memory for one quad - * @pf: The PF private data structure - * @quad: The quad (0-4) - */ -static void ice_ptp_reset_ts_memory_quad(struct ice_pf *pf, int quad) -{ - struct ice_hw *hw = &pf->hw; - - ice_write_quad_reg_e822(hw, quad, Q_REG_TS_CTRL, Q_REG_TS_CTRL_M); - ice_write_quad_reg_e822(hw, quad, Q_REG_TS_CTRL, ~(u32)Q_REG_TS_CTRL_M); -} - -/** * ice_ptp_check_tx_fifo - Check whether Tx FIFO is in an OK state * @port: PTP port for which Tx FIFO is checked */ @@ -1124,7 +1205,7 @@ static int ice_ptp_check_tx_fifo(struct ice_ptp_port *port) dev_dbg(ice_pf_to_dev(pf), "Port %d Tx FIFO still not empty; resetting quad %d\n", port->port_num, quad); - ice_ptp_reset_ts_memory_quad(pf, quad); + ice_ptp_reset_ts_memory_quad_e822(hw, quad); port->tx_fifo_busy_cnt = FIFO_OK; return 0; } @@ -1133,130 +1214,49 @@ static int ice_ptp_check_tx_fifo(struct ice_ptp_port *port) } /** - * ice_ptp_check_tx_offset_valid - Check if the Tx PHY offset is valid - * @port: the PTP port to check - * - * Checks whether the Tx offset for the PHY associated with this port is - * valid. Returns 0 if the offset is valid, and a non-zero error code if it is - * not. - */ -static int ice_ptp_check_tx_offset_valid(struct ice_ptp_port *port) -{ - struct ice_pf *pf = ptp_port_to_pf(port); - struct device *dev = ice_pf_to_dev(pf); - struct ice_hw *hw = &pf->hw; - u32 val; - int err; - - err = ice_ptp_check_tx_fifo(port); - if (err) - return err; - - err = ice_read_phy_reg_e822(hw, port->port_num, P_REG_TX_OV_STATUS, - &val); - if (err) { - dev_err(dev, "Failed to read TX_OV_STATUS for port %d, err %d\n", - port->port_num, err); - return -EAGAIN; - } - - if (!(val & P_REG_TX_OV_STATUS_OV_M)) - return -EAGAIN; - - return 0; -} - -/** - * ice_ptp_check_rx_offset_valid - Check if the Rx PHY offset is valid - * @port: the PTP port to check - * - * Checks whether the Rx offset for the PHY associated with this port is - * valid. Returns 0 if the offset is valid, and a non-zero error code if it is - * not. - */ -static int ice_ptp_check_rx_offset_valid(struct ice_ptp_port *port) -{ - struct ice_pf *pf = ptp_port_to_pf(port); - struct device *dev = ice_pf_to_dev(pf); - struct ice_hw *hw = &pf->hw; - int err; - u32 val; - - err = ice_read_phy_reg_e822(hw, port->port_num, P_REG_RX_OV_STATUS, - &val); - if (err) { - dev_err(dev, "Failed to read RX_OV_STATUS for port %d, err %d\n", - port->port_num, err); - return err; - } - - if (!(val & P_REG_RX_OV_STATUS_OV_M)) - return -EAGAIN; - - return 0; -} - -/** - * ice_ptp_check_offset_valid - Check port offset valid bit - * @port: Port for which offset valid bit is checked - * - * Returns 0 if both Tx and Rx offset are valid, and -EAGAIN if one of the - * offset is not ready. - */ -static int ice_ptp_check_offset_valid(struct ice_ptp_port *port) -{ - int tx_err, rx_err; - - /* always check both Tx and Rx offset validity */ - tx_err = ice_ptp_check_tx_offset_valid(port); - rx_err = ice_ptp_check_rx_offset_valid(port); - - if (tx_err || rx_err) - return -EAGAIN; - - return 0; -} - -/** - * ice_ptp_wait_for_offset_valid - Check for valid Tx and Rx offsets + * ice_ptp_wait_for_offsets - Check for valid Tx and Rx offsets * @work: Pointer to the kthread_work structure for this task * - * Check whether both the Tx and Rx offsets are valid for enabling the vernier - * calibration. + * Check whether hardware has completed measuring the Tx and Rx offset values + * used to configure and enable vernier timestamp calibration. + * + * Once the offset in either direction is measured, configure the associated + * registers with the calibrated offset values and enable timestamping. The Tx + * and Rx directions are configured independently as soon as their associated + * offsets are known. * - * Once we have valid offsets from hardware, update the total Tx and Rx - * offsets, and exit bypass mode. This enables more precise timestamps using - * the extra data measured during the vernier calibration process. + * This function reschedules itself until both Tx and Rx calibration have + * completed. */ -static void ice_ptp_wait_for_offset_valid(struct kthread_work *work) +static void ice_ptp_wait_for_offsets(struct kthread_work *work) { struct ice_ptp_port *port; - int err; - struct device *dev; struct ice_pf *pf; struct ice_hw *hw; + int tx_err; + int rx_err; port = container_of(work, struct ice_ptp_port, ov_work.work); pf = ptp_port_to_pf(port); hw = &pf->hw; - dev = ice_pf_to_dev(pf); - - if (ice_is_reset_in_progress(pf->state)) - return; - if (ice_ptp_check_offset_valid(port)) { - /* Offsets not ready yet, try again later */ + if (ice_is_reset_in_progress(pf->state)) { + /* wait for device driver to complete reset */ kthread_queue_delayed_work(pf->ptp.kworker, &port->ov_work, msecs_to_jiffies(100)); return; } - /* Offsets are valid, so it is safe to exit bypass mode */ - err = ice_phy_exit_bypass_e822(hw, port->port_num); - if (err) { - dev_warn(dev, "Failed to exit bypass mode for PHY port %u, err %d\n", - port->port_num, err); + tx_err = ice_ptp_check_tx_fifo(port); + if (!tx_err) + tx_err = ice_phy_cfg_tx_offset_e822(hw, port->port_num); + rx_err = ice_phy_cfg_rx_offset_e822(hw, port->port_num); + if (tx_err || rx_err) { + /* Tx and/or Rx offset not yet configured, try again later */ + kthread_queue_delayed_work(pf->ptp.kworker, + &port->ov_work, + msecs_to_jiffies(100)); return; } } @@ -1317,16 +1317,20 @@ ice_ptp_port_phy_restart(struct ice_ptp_port *ptp_port) kthread_cancel_delayed_work_sync(&ptp_port->ov_work); /* temporarily disable Tx timestamps while calibrating PHY offset */ + spin_lock(&ptp_port->tx.lock); ptp_port->tx.calibrating = true; + spin_unlock(&ptp_port->tx.lock); ptp_port->tx_fifo_busy_cnt = 0; - /* Start the PHY timer in bypass mode */ - err = ice_start_phy_timer_e822(hw, port, true); + /* Start the PHY timer in Vernier mode */ + err = ice_start_phy_timer_e822(hw, port); if (err) goto out_unlock; /* Enable Tx timestamps right away */ + spin_lock(&ptp_port->tx.lock); ptp_port->tx.calibrating = false; + spin_unlock(&ptp_port->tx.lock); kthread_queue_delayed_work(pf->ptp.kworker, &ptp_port->ov_work, 0); @@ -1341,45 +1345,33 @@ out_unlock: } /** - * ice_ptp_link_change - Set or clear port registers for timestamping + * ice_ptp_link_change - Reconfigure PTP after link status change * @pf: Board private structure * @port: Port for which the PHY start is set * @linkup: Link is up or down */ -int ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup) +void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup) { struct ice_ptp_port *ptp_port; - if (!test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) - return 0; + if (!test_bit(ICE_FLAG_PTP, pf->flags)) + return; - if (port >= ICE_NUM_EXTERNAL_PORTS) - return -EINVAL; + if (WARN_ON_ONCE(port >= ICE_NUM_EXTERNAL_PORTS)) + return; ptp_port = &pf->ptp.port; - if (ptp_port->port_num != port) - return -EINVAL; + if (WARN_ON_ONCE(ptp_port->port_num != port)) + return; - /* Update cached link err for this port immediately */ + /* Update cached link status for this port immediately */ ptp_port->link_up = linkup; - if (!test_bit(ICE_FLAG_PTP, pf->flags)) - /* PTP is not setup */ - return -EAGAIN; - - return ice_ptp_port_phy_restart(ptp_port); -} - -/** - * ice_ptp_reset_ts_memory - Reset timestamp memory for all quads - * @pf: The PF private data structure - */ -static void ice_ptp_reset_ts_memory(struct ice_pf *pf) -{ - int quad; + /* E810 devices do not need to reconfigure the PHY */ + if (ice_is_e810(&pf->hw)) + return; - quad = pf->hw.port_info->lport / ICE_PORTS_PER_QUAD; - ice_ptp_reset_ts_memory_quad(pf, quad); + ice_ptp_port_phy_restart(ptp_port); } /** @@ -1397,7 +1389,7 @@ static int ice_ptp_tx_ena_intr(struct ice_pf *pf, bool ena, u32 threshold) int quad; u32 val; - ice_ptp_reset_ts_memory(pf); + ice_ptp_reset_ts_memory(hw); for (quad = 0; quad < ICE_MAX_QUAD; quad++) { err = ice_read_quad_reg_e822(hw, quad, Q_REG_TX_MEM_GBL_CFG, @@ -2332,11 +2324,14 @@ s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb) { u8 idx; - /* Check if this tracker is initialized */ - if (!tx->init || tx->calibrating) + spin_lock(&tx->lock); + + /* Check that this tracker is accepting new timestamp requests */ + if (!ice_ptp_is_tx_tracker_up(tx)) { + spin_unlock(&tx->lock); return -1; + } - spin_lock(&tx->lock); /* Find and set the first available index */ idx = find_first_zero_bit(tx->in_use, tx->len); if (idx < tx->len) { @@ -2345,6 +2340,7 @@ s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb) * requests. */ set_bit(idx, tx->in_use); + clear_bit(idx, tx->stale); tx->tstamps[idx].start = jiffies; tx->tstamps[idx].skb = skb_get(skb); skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; @@ -2359,7 +2355,7 @@ s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb) if (idx >= tx->len) return -1; else - return idx + tx->quad_offset; + return idx + tx->offset; } /** @@ -2384,8 +2380,6 @@ static void ice_ptp_periodic_work(struct kthread_work *work) err = ice_ptp_update_cached_phctime(pf); - ice_ptp_tx_tstamp_cleanup(pf, &pf->ptp.port.tx); - /* Run twice a second or reschedule if phc update failed */ kthread_queue_delayed_work(ptp->kworker, &ptp->work, msecs_to_jiffies(err ? 10 : 500)); @@ -2462,7 +2456,7 @@ pfr: err = ice_ptp_init_tx_e810(pf, &ptp->port.tx); } else { kthread_init_delayed_work(&ptp->port.ov_work, - ice_ptp_wait_for_offset_valid); + ice_ptp_wait_for_offsets); err = ice_ptp_init_tx_e822(pf, &ptp->port.tx, ptp->port.port_num); } @@ -2625,7 +2619,7 @@ static int ice_ptp_init_port(struct ice_pf *pf, struct ice_ptp_port *ptp_port) return ice_ptp_init_tx_e810(pf, &ptp_port->tx); kthread_init_delayed_work(&ptp_port->ov_work, - ice_ptp_wait_for_offset_valid); + ice_ptp_wait_for_offsets); return ice_ptp_init_tx_e822(pf, &ptp_port->tx, ptp_port->port_num); } diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h index 028349295b71..9cda2f43e0e5 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp.h @@ -93,9 +93,14 @@ struct ice_perout_channel { * we discard old requests that were not fulfilled within a 2 second time * window. * Timestamp values in the PHY are read only and do not get cleared except at - * hardware reset or when a new timestamp value is captured. The cached_tstamp - * field is used to detect the case where a new timestamp has not yet been - * captured, ensuring that we avoid sending stale timestamp data to the stack. + * hardware reset or when a new timestamp value is captured. + * + * Some PHY types do not provide a "ready" bitmap indicating which timestamp + * indexes are valid. In these cases, we use a cached_tstamp to keep track of + * the last timestamp we read for a given index. If the current timestamp + * value is the same as the cached value, we assume a new timestamp hasn't + * been captured. This avoids reporting stale timestamps to the stack. This is + * only done if the verify_cached flag is set in ice_ptp_tx structure. */ struct ice_tx_tstamp { struct sk_buff *skb; @@ -105,30 +110,35 @@ struct ice_tx_tstamp { /** * struct ice_ptp_tx - Tracking structure for all Tx timestamp requests on a port - * @lock: lock to prevent concurrent write to in_use bitmap + * @lock: lock to prevent concurrent access to fields of this struct * @tstamps: array of len to store outstanding requests * @in_use: bitmap of len to indicate which slots are in use - * @quad: which quad the timestamps are captured in - * @quad_offset: offset into timestamp block of the quad to get the real index + * @stale: bitmap of len to indicate slots which have stale timestamps + * @block: which memory block (quad or port) the timestamps are captured in + * @offset: offset into timestamp block to get the real index * @len: length of the tstamps and in_use fields. * @init: if true, the tracker is initialized; * @calibrating: if true, the PHY is calibrating the Tx offset. During this * window, timestamps are temporarily disabled. + * @verify_cached: if true, verify new timestamp differs from last read value */ struct ice_ptp_tx { spinlock_t lock; /* lock protecting in_use bitmap */ struct ice_tx_tstamp *tstamps; unsigned long *in_use; - u8 quad; - u8 quad_offset; + unsigned long *stale; + u8 block; + u8 offset; u8 len; - u8 init; - u8 calibrating; + u8 init : 1; + u8 calibrating : 1; + u8 verify_cached : 1; }; /* Quad and port information for initializing timestamp blocks */ #define INDEX_PER_QUAD 64 -#define INDEX_PER_PORT (INDEX_PER_QUAD / ICE_PORTS_PER_QUAD) +#define INDEX_PER_PORT_E822 16 +#define INDEX_PER_PORT_E810 64 /** * struct ice_ptp_port - data used to initialize an external port for PTP @@ -256,7 +266,7 @@ void ice_ptp_reset(struct ice_pf *pf); void ice_ptp_prepare_for_reset(struct ice_pf *pf); void ice_ptp_init(struct ice_pf *pf); void ice_ptp_release(struct ice_pf *pf); -int ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup); +void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup); #else /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */ static inline int ice_ptp_set_ts_config(struct ice_pf *pf, struct ifreq *ifr) { @@ -291,7 +301,8 @@ static inline void ice_ptp_reset(struct ice_pf *pf) { } static inline void ice_ptp_prepare_for_reset(struct ice_pf *pf) { } static inline void ice_ptp_init(struct ice_pf *pf) { } static inline void ice_ptp_release(struct ice_pf *pf) { } -static inline int ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup) -{ return 0; } +static inline void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup) +{ +} #endif /* IS_ENABLED(CONFIG_PTP_1588_CLOCK) */ #endif /* _ICE_PTP_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c index 1f8dd50db524..a38614d21ea8 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c @@ -656,6 +656,32 @@ ice_clear_phy_tstamp_e822(struct ice_hw *hw, u8 quad, u8 idx) } /** + * ice_ptp_reset_ts_memory_quad_e822 - Clear all timestamps from the quad block + * @hw: pointer to the HW struct + * @quad: the quad to read from + * + * Clear all timestamps from the PHY quad block that is shared between the + * internal PHYs on the E822 devices. + */ +void ice_ptp_reset_ts_memory_quad_e822(struct ice_hw *hw, u8 quad) +{ + ice_write_quad_reg_e822(hw, quad, Q_REG_TS_CTRL, Q_REG_TS_CTRL_M); + ice_write_quad_reg_e822(hw, quad, Q_REG_TS_CTRL, ~(u32)Q_REG_TS_CTRL_M); +} + +/** + * ice_ptp_reset_ts_memory_e822 - Clear all timestamps from all quad blocks + * @hw: pointer to the HW struct + */ +static void ice_ptp_reset_ts_memory_e822(struct ice_hw *hw) +{ + unsigned int quad; + + for (quad = 0; quad < ICE_MAX_QUAD; quad++) + ice_ptp_reset_ts_memory_quad_e822(hw, quad); +} + +/** * ice_read_cgu_reg_e822 - Read a CGU register * @hw: pointer to the HW struct * @addr: Register address to read @@ -1715,21 +1741,48 @@ ice_calc_fixed_tx_offset_e822(struct ice_hw *hw, enum ice_ptp_link_spd link_spd) * adjust Tx timestamps by. This is calculated by combining some known static * latency along with the Vernier offset computations done by hardware. * - * This function must be called only after the offset registers are valid, - * i.e. after the Vernier calibration wait has passed, to ensure that the PHY - * has measured the offset. + * This function will not return successfully until the Tx offset calculations + * have been completed, which requires waiting until at least one packet has + * been transmitted by the device. It is safe to call this function + * periodically until calibration succeeds, as it will only program the offset + * once. * * To avoid overflow, when calculating the offset based on the known static * latency values, we use measurements in 1/100th of a nanosecond, and divide * the TUs per second up front. This avoids overflow while allowing * calculation of the adjustment using integer arithmetic. + * + * Returns zero on success, -EBUSY if the hardware vernier offset + * calibration has not completed, or another error code on failure. */ -static int ice_phy_cfg_tx_offset_e822(struct ice_hw *hw, u8 port) +int ice_phy_cfg_tx_offset_e822(struct ice_hw *hw, u8 port) { enum ice_ptp_link_spd link_spd; enum ice_ptp_fec_mode fec_mode; u64 total_offset, val; int err; + u32 reg; + + /* Nothing to do if we've already programmed the offset */ + err = ice_read_phy_reg_e822(hw, port, P_REG_TX_OR, ®); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to read TX_OR for port %u, err %d\n", + port, err); + return err; + } + + if (reg) + return 0; + + err = ice_read_phy_reg_e822(hw, port, P_REG_TX_OV_STATUS, ®); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to read TX_OV_STATUS for port %u, err %d\n", + port, err); + return err; + } + + if (!(reg & P_REG_TX_OV_STATUS_OV_M)) + return -EBUSY; err = ice_phy_get_speed_and_fec_e822(hw, port, &link_spd, &fec_mode); if (err) @@ -1783,46 +1836,8 @@ static int ice_phy_cfg_tx_offset_e822(struct ice_hw *hw, u8 port) if (err) return err; - return 0; -} - -/** - * ice_phy_cfg_fixed_tx_offset_e822 - Configure Tx offset for bypass mode - * @hw: pointer to the HW struct - * @port: the PHY port to configure - * - * Calculate and program the fixed Tx offset, and indicate that the offset is - * ready. This can be used when operating in bypass mode. - */ -static int -ice_phy_cfg_fixed_tx_offset_e822(struct ice_hw *hw, u8 port) -{ - enum ice_ptp_link_spd link_spd; - enum ice_ptp_fec_mode fec_mode; - u64 total_offset; - int err; - - err = ice_phy_get_speed_and_fec_e822(hw, port, &link_spd, &fec_mode); - if (err) - return err; - - total_offset = ice_calc_fixed_tx_offset_e822(hw, link_spd); - - /* Program the fixed Tx offset into the P_REG_TOTAL_TX_OFFSET_L - * register, then indicate that the Tx offset is ready. After this, - * timestamps will be enabled. - * - * Note that this skips including the more precise offsets generated - * by the Vernier calibration. - */ - err = ice_write_64b_phy_reg_e822(hw, port, P_REG_TOTAL_TX_OFFSET_L, - total_offset); - if (err) - return err; - - err = ice_write_phy_reg_e822(hw, port, P_REG_TX_OR, 1); - if (err) - return err; + dev_info(ice_hw_to_dev(hw), "Port=%d Tx vernier offset calibration complete\n", + port); return 0; } @@ -2026,6 +2041,11 @@ ice_calc_fixed_rx_offset_e822(struct ice_hw *hw, enum ice_ptp_link_spd link_spd) * measurements taken in hardware with some data about known fixed delay as * well as adjusting for multi-lane alignment delay. * + * This function will not return successfully until the Rx offset calculations + * have been completed, which requires waiting until at least one packet has + * been received by the device. It is safe to call this function periodically + * until calibration succeeds, as it will only program the offset once. + * * This function must be called only after the offset registers are valid, * i.e. after the Vernier calibration wait has passed, to ensure that the PHY * has measured the offset. @@ -2034,13 +2054,38 @@ ice_calc_fixed_rx_offset_e822(struct ice_hw *hw, enum ice_ptp_link_spd link_spd) * latency values, we use measurements in 1/100th of a nanosecond, and divide * the TUs per second up front. This avoids overflow while allowing * calculation of the adjustment using integer arithmetic. + * + * Returns zero on success, -EBUSY if the hardware vernier offset + * calibration has not completed, or another error code on failure. */ -static int ice_phy_cfg_rx_offset_e822(struct ice_hw *hw, u8 port) +int ice_phy_cfg_rx_offset_e822(struct ice_hw *hw, u8 port) { enum ice_ptp_link_spd link_spd; enum ice_ptp_fec_mode fec_mode; u64 total_offset, pmd, val; int err; + u32 reg; + + /* Nothing to do if we've already programmed the offset */ + err = ice_read_phy_reg_e822(hw, port, P_REG_RX_OR, ®); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to read RX_OR for port %u, err %d\n", + port, err); + return err; + } + + if (reg) + return 0; + + err = ice_read_phy_reg_e822(hw, port, P_REG_RX_OV_STATUS, ®); + if (err) { + ice_debug(hw, ICE_DBG_PTP, "Failed to read RX_OV_STATUS for port %u, err %d\n", + port, err); + return err; + } + + if (!(reg & P_REG_RX_OV_STATUS_OV_M)) + return -EBUSY; err = ice_phy_get_speed_and_fec_e822(hw, port, &link_spd, &fec_mode); if (err) @@ -2101,46 +2146,8 @@ static int ice_phy_cfg_rx_offset_e822(struct ice_hw *hw, u8 port) if (err) return err; - return 0; -} - -/** - * ice_phy_cfg_fixed_rx_offset_e822 - Configure fixed Rx offset for bypass mode - * @hw: pointer to the HW struct - * @port: the PHY port to configure - * - * Calculate and program the fixed Rx offset, and indicate that the offset is - * ready. This can be used when operating in bypass mode. - */ -static int -ice_phy_cfg_fixed_rx_offset_e822(struct ice_hw *hw, u8 port) -{ - enum ice_ptp_link_spd link_spd; - enum ice_ptp_fec_mode fec_mode; - u64 total_offset; - int err; - - err = ice_phy_get_speed_and_fec_e822(hw, port, &link_spd, &fec_mode); - if (err) - return err; - - total_offset = ice_calc_fixed_rx_offset_e822(hw, link_spd); - - /* Program the fixed Rx offset into the P_REG_TOTAL_RX_OFFSET_L - * register, then indicate that the Rx offset is ready. After this, - * timestamps will be enabled. - * - * Note that this skips including the more precise offsets generated - * by Vernier calibration. - */ - err = ice_write_64b_phy_reg_e822(hw, port, P_REG_TOTAL_RX_OFFSET_L, - total_offset); - if (err) - return err; - - err = ice_write_phy_reg_e822(hw, port, P_REG_RX_OR, 1); - if (err) - return err; + dev_info(ice_hw_to_dev(hw), "Port=%d Rx vernier offset calibration complete\n", + port); return 0; } @@ -2323,20 +2330,14 @@ ice_stop_phy_timer_e822(struct ice_hw *hw, u8 port, bool soft_reset) * ice_start_phy_timer_e822 - Start the PHY clock timer * @hw: pointer to the HW struct * @port: the PHY port to start - * @bypass: if true, start the PHY in bypass mode * * Start the clock of a PHY port. This must be done as part of the flow to * re-calibrate Tx and Rx timestamping offsets whenever the clock time is * initialized or when link speed changes. * - * Bypass mode enables timestamps immediately without waiting for Vernier - * calibration to complete. Hardware will still continue taking Vernier - * measurements on Tx or Rx of packets, but they will not be applied to - * timestamps. Use ice_phy_exit_bypass_e822 to exit bypass mode once hardware - * has completed offset calculation. + * Hardware will take Vernier measurements on Tx or Rx of packets. */ -int -ice_start_phy_timer_e822(struct ice_hw *hw, u8 port, bool bypass) +int ice_start_phy_timer_e822(struct ice_hw *hw, u8 port) { u32 lo, hi, val; u64 incval; @@ -2414,110 +2415,42 @@ ice_start_phy_timer_e822(struct ice_hw *hw, u8 port, bool bypass) if (err) return err; - if (bypass) { - val |= P_REG_PS_BYPASS_MODE_M; - /* Enter BYPASS mode, enabling timestamps immediately. */ - err = ice_write_phy_reg_e822(hw, port, P_REG_PS, val); - if (err) - return err; - - /* Program the fixed Tx offset */ - err = ice_phy_cfg_fixed_tx_offset_e822(hw, port); - if (err) - return err; - - /* Program the fixed Rx offset */ - err = ice_phy_cfg_fixed_rx_offset_e822(hw, port); - if (err) - return err; - } - ice_debug(hw, ICE_DBG_PTP, "Enabled clock on PHY port %u\n", port); return 0; } /** - * ice_phy_exit_bypass_e822 - Exit bypass mode, after vernier calculations + * ice_get_phy_tx_tstamp_ready_e822 - Read Tx memory status register * @hw: pointer to the HW struct - * @port: the PHY port to configure - * - * After hardware finishes vernier calculations for the Tx and Rx offset, this - * function can be used to exit bypass mode by updating the total Tx and Rx - * offsets, and then disabling bypass. This will enable hardware to include - * the more precise offset calibrations, increasing precision of the generated - * timestamps. + * @quad: the timestamp quad to read from + * @tstamp_ready: contents of the Tx memory status register * - * This cannot be done until hardware has measured the offsets, which requires - * waiting until at least one packet has been sent and received by the device. + * Read the Q_REG_TX_MEMORY_STATUS register indicating which timestamps in + * the PHY are ready. A set bit means the corresponding timestamp is valid and + * ready to be captured from the PHY timestamp block. */ -int ice_phy_exit_bypass_e822(struct ice_hw *hw, u8 port) +static int +ice_get_phy_tx_tstamp_ready_e822(struct ice_hw *hw, u8 quad, u64 *tstamp_ready) { + u32 hi, lo; int err; - u32 val; - - err = ice_read_phy_reg_e822(hw, port, P_REG_TX_OV_STATUS, &val); - if (err) { - ice_debug(hw, ICE_DBG_PTP, "Failed to read TX_OV_STATUS for port %u, err %d\n", - port, err); - return err; - } - - if (!(val & P_REG_TX_OV_STATUS_OV_M)) { - ice_debug(hw, ICE_DBG_PTP, "Tx offset is not yet valid for port %u\n", - port); - return -EBUSY; - } - - err = ice_read_phy_reg_e822(hw, port, P_REG_RX_OV_STATUS, &val); - if (err) { - ice_debug(hw, ICE_DBG_PTP, "Failed to read RX_OV_STATUS for port %u, err %d\n", - port, err); - return err; - } - - if (!(val & P_REG_TX_OV_STATUS_OV_M)) { - ice_debug(hw, ICE_DBG_PTP, "Rx offset is not yet valid for port %u\n", - port); - return -EBUSY; - } - - err = ice_phy_cfg_tx_offset_e822(hw, port); - if (err) { - ice_debug(hw, ICE_DBG_PTP, "Failed to program total Tx offset for port %u, err %d\n", - port, err); - return err; - } - - err = ice_phy_cfg_rx_offset_e822(hw, port); - if (err) { - ice_debug(hw, ICE_DBG_PTP, "Failed to program total Rx offset for port %u, err %d\n", - port, err); - return err; - } - /* Exit bypass mode now that the offset has been updated */ - err = ice_read_phy_reg_e822(hw, port, P_REG_PS, &val); + err = ice_read_quad_reg_e822(hw, quad, Q_REG_TX_MEMORY_STATUS_U, &hi); if (err) { - ice_debug(hw, ICE_DBG_PTP, "Failed to read P_REG_PS for port %u, err %d\n", - port, err); + ice_debug(hw, ICE_DBG_PTP, "Failed to read TX_MEMORY_STATUS_U for quad %u, err %d\n", + quad, err); return err; } - if (!(val & P_REG_PS_BYPASS_MODE_M)) - ice_debug(hw, ICE_DBG_PTP, "Port %u not in bypass mode\n", - port); - - val &= ~P_REG_PS_BYPASS_MODE_M; - err = ice_write_phy_reg_e822(hw, port, P_REG_PS, val); + err = ice_read_quad_reg_e822(hw, quad, Q_REG_TX_MEMORY_STATUS_L, &lo); if (err) { - ice_debug(hw, ICE_DBG_PTP, "Failed to disable bypass for port %u, err %d\n", - port, err); + ice_debug(hw, ICE_DBG_PTP, "Failed to read TX_MEMORY_STATUS_L for quad %u, err %d\n", + quad, err); return err; } - dev_info(ice_hw_to_dev(hw), "Exiting bypass mode on PHY port %u\n", - port); + *tstamp_ready = (u64)hi << 32 | (u64)lo; return 0; } @@ -3196,6 +3129,22 @@ int ice_clear_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx) return ice_clear_phy_tstamp_e822(hw, block, idx); } +/** + * ice_get_phy_tx_tstamp_ready_e810 - Read Tx memory status register + * @hw: pointer to the HW struct + * @port: the PHY port to read + * @tstamp_ready: contents of the Tx memory status register + * + * E810 devices do not use a Tx memory status register. Instead simply + * indicate that all timestamps are currently ready. + */ +static int +ice_get_phy_tx_tstamp_ready_e810(struct ice_hw *hw, u8 port, u64 *tstamp_ready) +{ + *tstamp_ready = 0xFFFFFFFFFFFFFFFF; + return 0; +} + /* E810T SMA functions * * The following functions operate specifically on E810T hardware and are used @@ -3379,6 +3328,18 @@ bool ice_is_pca9575_present(struct ice_hw *hw) } /** + * ice_ptp_reset_ts_memory - Reset timestamp memory for all blocks + * @hw: pointer to the HW struct + */ +void ice_ptp_reset_ts_memory(struct ice_hw *hw) +{ + if (ice_is_e810(hw)) + return; + + ice_ptp_reset_ts_memory_e822(hw); +} + +/** * ice_ptp_init_phc - Initialize PTP hardware clock * @hw: pointer to the HW struct * @@ -3399,3 +3360,24 @@ int ice_ptp_init_phc(struct ice_hw *hw) else return ice_ptp_init_phc_e822(hw); } + +/** + * ice_get_phy_tx_tstamp_ready - Read PHY Tx memory status indication + * @hw: pointer to the HW struct + * @block: the timestamp block to check + * @tstamp_ready: storage for the PHY Tx memory status information + * + * Check the PHY for Tx timestamp memory status. This reports a 64 bit value + * which indicates which timestamps in the block may be captured. A set bit + * means the timestamp can be read. An unset bit means the timestamp is not + * ready and software should avoid reading the register. + */ +int ice_get_phy_tx_tstamp_ready(struct ice_hw *hw, u8 block, u64 *tstamp_ready) +{ + if (ice_is_e810(hw)) + return ice_get_phy_tx_tstamp_ready_e810(hw, block, + tstamp_ready); + else + return ice_get_phy_tx_tstamp_ready_e822(hw, block, + tstamp_ready); +} diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h index 2bda64c76abc..3b68cb91bd81 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h @@ -133,7 +133,9 @@ int ice_ptp_write_incval_locked(struct ice_hw *hw, u64 incval); int ice_ptp_adj_clock(struct ice_hw *hw, s32 adj); int ice_read_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx, u64 *tstamp); int ice_clear_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx); +void ice_ptp_reset_ts_memory(struct ice_hw *hw); int ice_ptp_init_phc(struct ice_hw *hw); +int ice_get_phy_tx_tstamp_ready(struct ice_hw *hw, u8 block, u64 *tstamp_ready); /* E822 family functions */ int ice_read_phy_reg_e822(struct ice_hw *hw, u8 port, u16 offset, u32 *val); @@ -141,6 +143,7 @@ int ice_write_phy_reg_e822(struct ice_hw *hw, u8 port, u16 offset, u32 val); int ice_read_quad_reg_e822(struct ice_hw *hw, u8 quad, u16 offset, u32 *val); int ice_write_quad_reg_e822(struct ice_hw *hw, u8 quad, u16 offset, u32 val); int ice_ptp_prep_port_adj_e822(struct ice_hw *hw, u8 port, s64 time); +void ice_ptp_reset_ts_memory_quad_e822(struct ice_hw *hw, u8 quad); /** * ice_e822_time_ref - Get the current TIME_REF from capabilities @@ -184,8 +187,9 @@ static inline u64 ice_e822_pps_delay(enum ice_time_ref_freq time_ref) /* E822 Vernier calibration functions */ int ice_stop_phy_timer_e822(struct ice_hw *hw, u8 port, bool soft_reset); -int ice_start_phy_timer_e822(struct ice_hw *hw, u8 port, bool bypass); -int ice_phy_exit_bypass_e822(struct ice_hw *hw, u8 port); +int ice_start_phy_timer_e822(struct ice_hw *hw, u8 port); +int ice_phy_cfg_tx_offset_e822(struct ice_hw *hw, u8 port); +int ice_phy_cfg_rx_offset_e822(struct ice_hw *hw, u8 port); /* E810 family functions */ int ice_ptp_init_phy_e810(struct ice_hw *hw); diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 36acec89d3d4..7d60da1b7bf4 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -1413,6 +1413,8 @@ static int igb_intr_test(struct igb_adapter *adapter, u64 *data) *data = 1; return -1; } + wr32(E1000_IVAR_MISC, E1000_IVAR_VALID << 8); + wr32(E1000_EIMS, BIT(0)); } else if (adapter->flags & IGB_FLAG_HAS_MSI) { shared_int = false; if (request_irq(irq, diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index c2cb98d24f5c..f8925cac61e4 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -4270,7 +4270,7 @@ static void mvneta_percpu_elect(struct mvneta_port *pp) /* Use the cpu associated to the rxq when it is online, in all * the other cases, use the cpu 0 which can't be offline. */ - if (cpu_online(pp->rxq_def)) + if (pp->rxq_def < nr_cpu_ids && cpu_online(pp->rxq_def)) elected_cpu = pp->rxq_def; max_cpu = num_present_cpus(); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index c8724bfa86b0..b2b71fe80d61 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -64,6 +64,7 @@ static int cgx_fwi_link_change(struct cgx *cgx, int lmac_id, bool en); static const struct pci_device_id cgx_id_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_OCTEONTX2_CGX) }, { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CN10K_RPM) }, + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CN10KB_RPM) }, { 0, } /* end of table */ }; @@ -73,12 +74,13 @@ static bool is_dev_rpm(void *cgxd) { struct cgx *cgx = cgxd; - return (cgx->pdev->device == PCI_DEVID_CN10K_RPM); + return (cgx->pdev->device == PCI_DEVID_CN10K_RPM) || + (cgx->pdev->device == PCI_DEVID_CN10KB_RPM); } bool is_lmac_valid(struct cgx *cgx, int lmac_id) { - if (!cgx || lmac_id < 0 || lmac_id >= MAX_LMAC_PER_CGX) + if (!cgx || lmac_id < 0 || lmac_id >= cgx->max_lmac_per_mac) return false; return test_bit(lmac_id, &cgx->lmac_bmap); } @@ -90,7 +92,7 @@ static int get_sequence_id_of_lmac(struct cgx *cgx, int lmac_id) { int tmp, id = 0; - for_each_set_bit(tmp, &cgx->lmac_bmap, MAX_LMAC_PER_CGX) { + for_each_set_bit(tmp, &cgx->lmac_bmap, cgx->max_lmac_per_mac) { if (tmp == lmac_id) break; id++; @@ -121,7 +123,7 @@ u64 cgx_read(struct cgx *cgx, u64 lmac, u64 offset) struct lmac *lmac_pdata(u8 lmac_id, struct cgx *cgx) { - if (!cgx || lmac_id >= MAX_LMAC_PER_CGX) + if (!cgx || lmac_id >= cgx->max_lmac_per_mac) return NULL; return cgx->lmac_idmap[lmac_id]; @@ -485,7 +487,7 @@ int cgx_set_pkind(void *cgxd, u8 lmac_id, int pkind) if (!is_lmac_valid(cgx, lmac_id)) return -ENODEV; - cgx_write(cgx, lmac_id, CGXX_CMRX_RX_ID_MAP, (pkind & 0x3F)); + cgx_write(cgx, lmac_id, cgx->mac_ops->rxid_map_offset, (pkind & 0x3F)); return 0; } @@ -740,6 +742,10 @@ int cgx_get_fec_stats(void *cgxd, int lmac_id, struct cgx_fec_stats_rsp *rsp) if (!cgx || lmac_id >= cgx->lmac_count) return -ENODEV; + + if (cgx->lmac_idmap[lmac_id]->link_info.fec == OTX2_FEC_NONE) + return 0; + fec_stats_count = cgx_set_fec_stats_count(&cgx->lmac_idmap[lmac_id]->link_info); if (cgx->lmac_idmap[lmac_id]->link_info.fec == OTX2_FEC_BASER) { @@ -1224,7 +1230,7 @@ static inline void link_status_user_format(u64 lstat, linfo->speed = cgx_speed_mbps[FIELD_GET(RESP_LINKSTAT_SPEED, lstat)]; linfo->an = FIELD_GET(RESP_LINKSTAT_AN, lstat); linfo->fec = FIELD_GET(RESP_LINKSTAT_FEC, lstat); - linfo->lmac_type_id = cgx_get_lmac_type(cgx, lmac_id); + linfo->lmac_type_id = FIELD_GET(RESP_LINKSTAT_LMAC_TYPE, lstat); lmac_string = cgx_lmactype_string[linfo->lmac_type_id]; strncpy(linfo->lmac_type, lmac_string, LMACTYPE_STR_LEN - 1); } @@ -1395,7 +1401,7 @@ int cgx_get_fwdata_base(u64 *base) if (!cgx) return -ENXIO; - first_lmac = find_first_bit(&cgx->lmac_bmap, MAX_LMAC_PER_CGX); + first_lmac = find_first_bit(&cgx->lmac_bmap, cgx->max_lmac_per_mac); req = FIELD_SET(CMDREG_ID, CGX_CMD_GET_FWD_BASE, req); err = cgx_fwi_cmd_generic(req, &resp, cgx, first_lmac); if (!err) @@ -1484,7 +1490,7 @@ static int cgx_fwi_link_change(struct cgx *cgx, int lmac_id, bool enable) static inline int cgx_fwi_read_version(u64 *resp, struct cgx *cgx) { - int first_lmac = find_first_bit(&cgx->lmac_bmap, MAX_LMAC_PER_CGX); + int first_lmac = find_first_bit(&cgx->lmac_bmap, cgx->max_lmac_per_mac); u64 req = 0; req = FIELD_SET(CMDREG_ID, CGX_CMD_GET_FW_VER, req); @@ -1522,7 +1528,7 @@ static void cgx_lmac_linkup_work(struct work_struct *work) int i, err; /* Do Link up for all the enabled lmacs */ - for_each_set_bit(i, &cgx->lmac_bmap, MAX_LMAC_PER_CGX) { + for_each_set_bit(i, &cgx->lmac_bmap, cgx->max_lmac_per_mac) { err = cgx_fwi_link_change(cgx, i, true); if (err) dev_info(dev, "cgx port %d:%d Link up command failed\n", @@ -1542,14 +1548,6 @@ int cgx_lmac_linkup_start(void *cgxd) return 0; } -static void cgx_lmac_get_fifolen(struct cgx *cgx) -{ - u64 cfg; - - cfg = cgx_read(cgx, 0, CGX_CONST); - cgx->mac_ops->fifo_len = FIELD_GET(CGX_CONST_RXFIFO_SIZE, cfg); -} - static int cgx_configure_interrupt(struct cgx *cgx, struct lmac *lmac, int cnt, bool req_free) { @@ -1604,17 +1602,20 @@ static int cgx_lmac_init(struct cgx *cgx) u64 lmac_list; int i, err; - cgx_lmac_get_fifolen(cgx); - - cgx->lmac_count = cgx->mac_ops->get_nr_lmacs(cgx); /* lmac_list specifies which lmacs are enabled * when bit n is set to 1, LMAC[n] is enabled */ - if (cgx->mac_ops->non_contiguous_serdes_lane) - lmac_list = cgx_read(cgx, 0, CGXX_CMRX_RX_LMACS) & 0xFULL; + if (cgx->mac_ops->non_contiguous_serdes_lane) { + if (is_dev_rpm2(cgx)) + lmac_list = + cgx_read(cgx, 0, RPM2_CMRX_RX_LMACS) & 0xFFULL; + else + lmac_list = + cgx_read(cgx, 0, CGXX_CMRX_RX_LMACS) & 0xFULL; + } - if (cgx->lmac_count > MAX_LMAC_PER_CGX) - cgx->lmac_count = MAX_LMAC_PER_CGX; + if (cgx->lmac_count > cgx->max_lmac_per_mac) + cgx->lmac_count = cgx->max_lmac_per_mac; for (i = 0; i < cgx->lmac_count; i++) { lmac = kzalloc(sizeof(struct lmac), GFP_KERNEL); @@ -1635,7 +1636,9 @@ static int cgx_lmac_init(struct cgx *cgx) lmac->cgx = cgx; lmac->mac_to_index_bmap.max = - MAX_DMAC_ENTRIES_PER_CGX / cgx->lmac_count; + cgx->mac_ops->dmac_filter_count / + cgx->lmac_count; + err = rvu_alloc_bitmap(&lmac->mac_to_index_bmap); if (err) goto err_name_free; @@ -1692,7 +1695,7 @@ static int cgx_lmac_exit(struct cgx *cgx) } /* Free all lmac related resources */ - for_each_set_bit(i, &cgx->lmac_bmap, MAX_LMAC_PER_CGX) { + for_each_set_bit(i, &cgx->lmac_bmap, cgx->max_lmac_per_mac) { lmac = cgx->lmac_idmap[i]; if (!lmac) continue; @@ -1708,6 +1711,12 @@ static int cgx_lmac_exit(struct cgx *cgx) static void cgx_populate_features(struct cgx *cgx) { + u64 cfg; + + cfg = cgx_read(cgx, 0, CGX_CONST); + cgx->mac_ops->fifo_len = FIELD_GET(CGX_CONST_RXFIFO_SIZE, cfg); + cgx->max_lmac_per_mac = FIELD_GET(CGX_CONST_MAX_LMACS, cfg); + if (is_dev_rpm(cgx)) cgx->hw_features = (RVU_LMAC_FEAT_DMACF | RVU_MAC_RPM | RVU_LMAC_FEAT_FC | RVU_LMAC_FEAT_PTP); @@ -1716,6 +1725,15 @@ static void cgx_populate_features(struct cgx *cgx) RVU_LMAC_FEAT_PTP | RVU_LMAC_FEAT_DMACF); } +static u8 cgx_get_rxid_mapoffset(struct cgx *cgx) +{ + if (cgx->pdev->subsystem_device == PCI_SUBSYS_DEVID_CNF10KB_RPM || + is_dev_rpm2(cgx)) + return 0x80; + else + return 0x60; +} + static struct mac_ops cgx_mac_ops = { .name = "cgx", .csr_offset = 0, @@ -1728,12 +1746,14 @@ static struct mac_ops cgx_mac_ops = { .non_contiguous_serdes_lane = false, .rx_stats_cnt = 9, .tx_stats_cnt = 18, + .dmac_filter_count = 32, .get_nr_lmacs = cgx_get_nr_lmacs, .get_lmac_type = cgx_get_lmac_type, .lmac_fifo_len = cgx_get_lmac_fifo_len, .mac_lmac_intl_lbk = cgx_lmac_internal_loopback, .mac_get_rx_stats = cgx_get_rx_stats, .mac_get_tx_stats = cgx_get_tx_stats, + .get_fec_stats = cgx_get_fec_stats, .mac_enadis_rx_pause_fwding = cgx_lmac_enadis_rx_pause_fwding, .mac_get_pause_frm_status = cgx_lmac_get_pause_frm_status, .mac_enadis_pause_frm = cgx_lmac_enadis_pause_frm, @@ -1759,11 +1779,13 @@ static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id) pci_set_drvdata(pdev, cgx); /* Use mac_ops to get MAC specific features */ - if (pdev->device == PCI_DEVID_CN10K_RPM) - cgx->mac_ops = rpm_get_mac_ops(); + if (is_dev_rpm(cgx)) + cgx->mac_ops = rpm_get_mac_ops(cgx); else cgx->mac_ops = &cgx_mac_ops; + cgx->mac_ops->rxid_map_offset = cgx_get_rxid_mapoffset(cgx); + err = pci_enable_device(pdev); if (err) { dev_err(dev, "Failed to enable PCI device\n"); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h index 0b06788b8d80..fb2d37676d84 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h @@ -18,11 +18,7 @@ /* PCI BAR nos */ #define PCI_CFG_REG_BAR_NUM 0 -#define CGX_ID_MASK 0x7 -#define MAX_LMAC_PER_CGX 4 -#define MAX_DMAC_ENTRIES_PER_CGX 32 -#define CGX_FIFO_LEN 65536 /* 64K for both Rx & Tx */ -#define CGX_OFFSET(x) ((x) * MAX_LMAC_PER_CGX) +#define CGX_ID_MASK 0xF /* Registers */ #define CGXX_CMRX_CFG 0x00 @@ -56,7 +52,8 @@ #define CGXX_SCRATCH0_REG 0x1050 #define CGXX_SCRATCH1_REG 0x1058 #define CGX_CONST 0x2000 -#define CGX_CONST_RXFIFO_SIZE GENMASK_ULL(23, 0) +#define CGX_CONST_RXFIFO_SIZE GENMASK_ULL(55, 32) +#define CGX_CONST_MAX_LMACS GENMASK_ULL(31, 24) #define CGXX_SPUX_CONTROL1 0x10000 #define CGXX_SPUX_LNX_FEC_CORR_BLOCKS 0x10700 #define CGXX_SPUX_LNX_FEC_UNCORR_BLOCKS 0x10800 diff --git a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h index 52b6016789fa..39aaf0e4467d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/lmac_common.h @@ -75,6 +75,11 @@ struct mac_ops { /* RPM & CGX differs in number of Receive/transmit stats */ u8 rx_stats_cnt; u8 tx_stats_cnt; + /* Unlike CN10K which shares same CSR offset with CGX + * CNF10KB has different csr offset + */ + u64 rxid_map_offset; + u8 dmac_filter_count; /* Incase of RPM get number of lmacs from RPMX_CMR_RX_LMACS[LMAC_EXIST] * number of setbits in lmac_exist tells number of lmacs */ @@ -121,6 +126,9 @@ struct mac_ops { int (*mac_get_pfc_frm_cfg)(void *cgxd, int lmac_id, u8 *tx_pause, u8 *rx_pause); + /* FEC stats */ + int (*get_fec_stats)(void *cgxd, int lmac_id, + struct cgx_fec_stats_rsp *rsp); }; struct cgx { @@ -128,7 +136,10 @@ struct cgx { struct pci_dev *pdev; u8 cgx_id; u8 lmac_count; - struct lmac *lmac_idmap[MAX_LMAC_PER_CGX]; + /* number of LMACs per MAC could be 4 or 8 */ + u8 max_lmac_per_mac; +#define MAX_LMAC_COUNT 8 + struct lmac *lmac_idmap[MAX_LMAC_COUNT]; struct work_struct cgx_cmd_work; struct workqueue_struct *cgx_cmd_workq; struct list_head cgx_list; @@ -150,6 +161,6 @@ struct lmac *lmac_pdata(u8 lmac_id, struct cgx *cgx); int cgx_fwi_cmd_send(u64 req, u64 *resp, struct lmac *lmac); int cgx_fwi_cmd_generic(u64 req, u64 *resp, struct cgx *cgx, int lmac_id); bool is_lmac_valid(struct cgx *cgx, int lmac_id); -struct mac_ops *rpm_get_mac_ops(void); +struct mac_ops *rpm_get_mac_ops(struct cgx *cgx); #endif /* LMAC_COMMON_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c index a70e1153fa04..de0d88dd10d6 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c @@ -8,7 +8,7 @@ #include "cgx.h" #include "lmac_common.h" -static struct mac_ops rpm_mac_ops = { +static struct mac_ops rpm_mac_ops = { .name = "rpm", .csr_offset = 0x4e00, .lmac_offset = 20, @@ -20,12 +20,14 @@ static struct mac_ops rpm_mac_ops = { .non_contiguous_serdes_lane = true, .rx_stats_cnt = 43, .tx_stats_cnt = 34, + .dmac_filter_count = 32, .get_nr_lmacs = rpm_get_nr_lmacs, .get_lmac_type = rpm_get_lmac_type, .lmac_fifo_len = rpm_get_lmac_fifo_len, .mac_lmac_intl_lbk = rpm_lmac_internal_loopback, .mac_get_rx_stats = rpm_get_rx_stats, .mac_get_tx_stats = rpm_get_tx_stats, + .get_fec_stats = rpm_get_fec_stats, .mac_enadis_rx_pause_fwding = rpm_lmac_enadis_rx_pause_fwding, .mac_get_pause_frm_status = rpm_lmac_get_pause_frm_status, .mac_enadis_pause_frm = rpm_lmac_enadis_pause_frm, @@ -37,9 +39,50 @@ static struct mac_ops rpm_mac_ops = { .mac_get_pfc_frm_cfg = rpm_lmac_get_pfc_frm_cfg, }; -struct mac_ops *rpm_get_mac_ops(void) +static struct mac_ops rpm2_mac_ops = { + .name = "rpm", + .csr_offset = RPM2_CSR_OFFSET, + .lmac_offset = 20, + .int_register = RPM2_CMRX_SW_INT, + .int_set_reg = RPM2_CMRX_SW_INT_ENA_W1S, + .irq_offset = 1, + .int_ena_bit = BIT_ULL(0), + .lmac_fwi = RPM_LMAC_FWI, + .non_contiguous_serdes_lane = true, + .rx_stats_cnt = 43, + .tx_stats_cnt = 34, + .dmac_filter_count = 64, + .get_nr_lmacs = rpm2_get_nr_lmacs, + .get_lmac_type = rpm_get_lmac_type, + .lmac_fifo_len = rpm2_get_lmac_fifo_len, + .mac_lmac_intl_lbk = rpm_lmac_internal_loopback, + .mac_get_rx_stats = rpm_get_rx_stats, + .mac_get_tx_stats = rpm_get_tx_stats, + .get_fec_stats = rpm_get_fec_stats, + .mac_enadis_rx_pause_fwding = rpm_lmac_enadis_rx_pause_fwding, + .mac_get_pause_frm_status = rpm_lmac_get_pause_frm_status, + .mac_enadis_pause_frm = rpm_lmac_enadis_pause_frm, + .mac_pause_frm_config = rpm_lmac_pause_frm_config, + .mac_enadis_ptp_config = rpm_lmac_ptp_config, + .mac_rx_tx_enable = rpm_lmac_rx_tx_enable, + .mac_tx_enable = rpm_lmac_tx_enable, + .pfc_config = rpm_lmac_pfc_config, + .mac_get_pfc_frm_cfg = rpm_lmac_get_pfc_frm_cfg, +}; + +bool is_dev_rpm2(void *rpmd) +{ + rpm_t *rpm = rpmd; + + return (rpm->pdev->device == PCI_DEVID_CN10KB_RPM); +} + +struct mac_ops *rpm_get_mac_ops(rpm_t *rpm) { - return &rpm_mac_ops; + if (is_dev_rpm2(rpm)) + return &rpm2_mac_ops; + else + return &rpm_mac_ops; } static void rpm_write(rpm_t *rpm, u64 lmac, u64 offset, u64 val) @@ -52,6 +95,16 @@ static u64 rpm_read(rpm_t *rpm, u64 lmac, u64 offset) return cgx_read(rpm, lmac, offset); } +/* Read HW major version to determine RPM + * MAC type 100/USX + */ +static bool is_mac_rpmusx(void *rpmd) +{ + rpm_t *rpm = rpmd; + + return rpm_read(rpm, 0, RPMX_CONST1) & 0x700ULL; +} + int rpm_get_nr_lmacs(void *rpmd) { rpm_t *rpm = rpmd; @@ -59,6 +112,13 @@ int rpm_get_nr_lmacs(void *rpmd) return hweight8(rpm_read(rpm, 0, CGXX_CMRX_RX_LMACS) & 0xFULL); } +int rpm2_get_nr_lmacs(void *rpmd) +{ + rpm_t *rpm = rpmd; + + return hweight8(rpm_read(rpm, 0, RPM2_CMRX_RX_LMACS) & 0xFFULL); +} + int rpm_lmac_tx_enable(void *rpmd, int lmac_id, bool enable) { rpm_t *rpm = rpmd; @@ -222,6 +282,46 @@ static void rpm_cfg_pfc_quanta_thresh(rpm_t *rpm, int lmac_id, } } +static void rpm2_lmac_cfg_bp(rpm_t *rpm, int lmac_id, u8 tx_pause, u8 rx_pause) +{ + u64 cfg; + + cfg = rpm_read(rpm, lmac_id, RPM2_CMR_RX_OVR_BP); + if (tx_pause) { + /* Configure CL0 Pause Quanta & threshold + * for 802.3X frames + */ + rpm_cfg_pfc_quanta_thresh(rpm, lmac_id, 1, true); + cfg &= ~RPM2_CMR_RX_OVR_BP_EN; + } else { + /* Disable all Pause Quanta & threshold values */ + rpm_cfg_pfc_quanta_thresh(rpm, lmac_id, 0xffff, false); + cfg |= RPM2_CMR_RX_OVR_BP_EN; + cfg &= ~RPM2_CMR_RX_OVR_BP_BP; + } + rpm_write(rpm, lmac_id, RPM2_CMR_RX_OVR_BP, cfg); +} + +static void rpm_lmac_cfg_bp(rpm_t *rpm, int lmac_id, u8 tx_pause, u8 rx_pause) +{ + u64 cfg; + + cfg = rpm_read(rpm, 0, RPMX_CMR_RX_OVR_BP); + if (tx_pause) { + /* Configure CL0 Pause Quanta & threshold for + * 802.3X frames + */ + rpm_cfg_pfc_quanta_thresh(rpm, lmac_id, 1, true); + cfg &= ~RPMX_CMR_RX_OVR_BP_EN(lmac_id); + } else { + /* Disable all Pause Quanta & threshold values */ + rpm_cfg_pfc_quanta_thresh(rpm, lmac_id, 0xffff, false); + cfg |= RPMX_CMR_RX_OVR_BP_EN(lmac_id); + cfg &= ~RPMX_CMR_RX_OVR_BP_BP(lmac_id); + } + rpm_write(rpm, 0, RPMX_CMR_RX_OVR_BP, cfg); +} + int rpm_lmac_enadis_pause_frm(void *rpmd, int lmac_id, u8 tx_pause, u8 rx_pause) { @@ -243,18 +343,11 @@ int rpm_lmac_enadis_pause_frm(void *rpmd, int lmac_id, u8 tx_pause, cfg |= tx_pause ? 0x0 : RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE; rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); - cfg = rpm_read(rpm, 0, RPMX_CMR_RX_OVR_BP); - if (tx_pause) { - /* Configure CL0 Pause Quanta & threshold for 802.3X frames */ - rpm_cfg_pfc_quanta_thresh(rpm, lmac_id, 1, true); - cfg &= ~RPMX_CMR_RX_OVR_BP_EN(lmac_id); - } else { - /* Disable all Pause Quanta & threshold values */ - rpm_cfg_pfc_quanta_thresh(rpm, lmac_id, 0xffff, false); - cfg |= RPMX_CMR_RX_OVR_BP_EN(lmac_id); - cfg &= ~RPMX_CMR_RX_OVR_BP_BP(lmac_id); - } - rpm_write(rpm, 0, RPMX_CMR_RX_OVR_BP, cfg); + if (is_dev_rpm2(rpm)) + rpm2_lmac_cfg_bp(rpm, lmac_id, tx_pause, rx_pause); + else + rpm_lmac_cfg_bp(rpm, lmac_id, tx_pause, rx_pause); + return 0; } @@ -278,13 +371,16 @@ void rpm_lmac_pause_frm_config(void *rpmd, int lmac_id, bool enable) cfg |= RPMX_MTI_MAC100X_COMMAND_CONFIG_TX_P_DISABLE; rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); + /* Enable channel mask for all LMACS */ + if (is_dev_rpm2(rpm)) + rpm_write(rpm, lmac_id, RPM2_CMR_CHAN_MSK_OR, 0xffff); + else + rpm_write(rpm, 0, RPMX_CMR_CHAN_MSK_OR, ~0ULL); + /* Disable all PFC classes */ cfg = rpm_read(rpm, lmac_id, RPMX_CMRX_PRT_CBFC_CTL); cfg = FIELD_SET(RPM_PFC_CLASS_MASK, 0, cfg); rpm_write(rpm, lmac_id, RPMX_CMRX_PRT_CBFC_CTL, cfg); - - /* Enable channel mask for all LMACS */ - rpm_write(rpm, 0, RPMX_CMR_CHAN_MSK_OR, ~0ULL); } int rpm_get_rx_stats(void *rpmd, int lmac_id, int idx, u64 *rx_stat) @@ -292,7 +388,7 @@ int rpm_get_rx_stats(void *rpmd, int lmac_id, int idx, u64 *rx_stat) rpm_t *rpm = rpmd; u64 val_lo, val_hi; - if (!rpm || lmac_id >= rpm->lmac_count) + if (!is_lmac_valid(rpm, lmac_id)) return -ENODEV; mutex_lock(&rpm->lock); @@ -320,7 +416,7 @@ int rpm_get_tx_stats(void *rpmd, int lmac_id, int idx, u64 *tx_stat) rpm_t *rpm = rpmd; u64 val_lo, val_hi; - if (!rpm || lmac_id >= rpm->lmac_count) + if (!is_lmac_valid(rpm, lmac_id)) return -ENODEV; mutex_lock(&rpm->lock); @@ -380,13 +476,71 @@ u32 rpm_get_lmac_fifo_len(void *rpmd, int lmac_id) return 0; } +static int rpmusx_lmac_internal_loopback(rpm_t *rpm, int lmac_id, bool enable) +{ + u64 cfg; + + cfg = rpm_read(rpm, lmac_id, RPM2_USX_PCSX_CONTROL1); + + if (enable) + cfg |= RPM2_USX_PCS_LBK; + else + cfg &= ~RPM2_USX_PCS_LBK; + rpm_write(rpm, lmac_id, RPM2_USX_PCSX_CONTROL1, cfg); + + return 0; +} + +u32 rpm2_get_lmac_fifo_len(void *rpmd, int lmac_id) +{ + u64 hi_perf_lmac, lmac_info; + rpm_t *rpm = rpmd; + u8 num_lmacs; + u32 fifo_len; + + lmac_info = rpm_read(rpm, 0, RPM2_CMRX_RX_LMACS); + /* LMACs are divided into two groups and each group + * gets half of the FIFO + * Group0 lmac_id range {0..3} + * Group1 lmac_id range {4..7} + */ + fifo_len = rpm->mac_ops->fifo_len / 2; + + if (lmac_id < 4) { + num_lmacs = hweight8(lmac_info & 0xF); + hi_perf_lmac = (lmac_info >> 8) & 0x3ULL; + } else { + num_lmacs = hweight8(lmac_info & 0xF0); + hi_perf_lmac = (lmac_info >> 10) & 0x3ULL; + hi_perf_lmac += 4; + } + + switch (num_lmacs) { + case 1: + return fifo_len; + case 2: + return fifo_len / 2; + case 3: + /* LMAC marked as hi_perf gets half of the FIFO + * and rest 1/4th + */ + if (lmac_id == hi_perf_lmac) + return fifo_len / 2; + return fifo_len / 4; + case 4: + default: + return fifo_len / 4; + } + return 0; +} + int rpm_lmac_internal_loopback(void *rpmd, int lmac_id, bool enable) { rpm_t *rpm = rpmd; u8 lmac_type; u64 cfg; - if (!rpm || lmac_id >= rpm->lmac_count) + if (!is_lmac_valid(rpm, lmac_id)) return -ENODEV; lmac_type = rpm->mac_ops->get_lmac_type(rpm, lmac_id); @@ -395,6 +549,9 @@ int rpm_lmac_internal_loopback(void *rpmd, int lmac_id, bool enable) return 0; } + if (is_dev_rpm2(rpm) && is_mac_rpmusx(rpm)) + return rpmusx_lmac_internal_loopback(rpm, lmac_id, enable); + cfg = rpm_read(rpm, lmac_id, RPMX_MTI_PCS100X_CONTROL1); if (enable) @@ -439,8 +596,8 @@ void rpm_lmac_ptp_config(void *rpmd, int lmac_id, bool enable) int rpm_lmac_pfc_config(void *rpmd, int lmac_id, u8 tx_pause, u8 rx_pause, u16 pfc_en) { + u64 cfg, class_en, pfc_class_mask_cfg; rpm_t *rpm = rpmd; - u64 cfg, class_en; if (!is_lmac_valid(rpm, lmac_id)) return -ENODEV; @@ -476,7 +633,10 @@ int rpm_lmac_pfc_config(void *rpmd, int lmac_id, u8 tx_pause, u8 rx_pause, u16 p rpm_write(rpm, lmac_id, RPMX_MTI_MAC100X_COMMAND_CONFIG, cfg); - rpm_write(rpm, lmac_id, RPMX_CMRX_PRT_CBFC_CTL, class_en); + pfc_class_mask_cfg = is_dev_rpm2(rpm) ? RPM2_CMRX_PRT_CBFC_CTL : + RPMX_CMRX_PRT_CBFC_CTL; + + rpm_write(rpm, lmac_id, pfc_class_mask_cfg, class_en); return 0; } @@ -497,3 +657,59 @@ int rpm_lmac_get_pfc_frm_cfg(void *rpmd, int lmac_id, u8 *tx_pause, u8 *rx_paus return 0; } + +int rpm_get_fec_stats(void *rpmd, int lmac_id, struct cgx_fec_stats_rsp *rsp) +{ + u64 val_lo, val_hi; + rpm_t *rpm = rpmd; + u64 cfg; + + if (!is_lmac_valid(rpm, lmac_id)) + return -ENODEV; + + if (rpm->lmac_idmap[lmac_id]->link_info.fec == OTX2_FEC_NONE) + return 0; + + if (rpm->lmac_idmap[lmac_id]->link_info.fec == OTX2_FEC_BASER) { + val_lo = rpm_read(rpm, lmac_id, RPMX_MTI_FCFECX_VL0_CCW_LO); + val_hi = rpm_read(rpm, lmac_id, RPMX_MTI_FCFECX_CW_HI); + rsp->fec_corr_blks = (val_hi << 16 | val_lo); + + val_lo = rpm_read(rpm, lmac_id, RPMX_MTI_FCFECX_VL0_NCCW_LO); + val_hi = rpm_read(rpm, lmac_id, RPMX_MTI_FCFECX_CW_HI); + rsp->fec_uncorr_blks = (val_hi << 16 | val_lo); + + /* 50G uses 2 Physical serdes lines */ + if (rpm->lmac_idmap[lmac_id]->link_info.lmac_type_id == + LMAC_MODE_50G_R) { + val_lo = rpm_read(rpm, lmac_id, + RPMX_MTI_FCFECX_VL1_CCW_LO); + val_hi = rpm_read(rpm, lmac_id, + RPMX_MTI_FCFECX_CW_HI); + rsp->fec_corr_blks += (val_hi << 16 | val_lo); + + val_lo = rpm_read(rpm, lmac_id, + RPMX_MTI_FCFECX_VL1_NCCW_LO); + val_hi = rpm_read(rpm, lmac_id, + RPMX_MTI_FCFECX_CW_HI); + rsp->fec_uncorr_blks += (val_hi << 16 | val_lo); + } + } else { + /* enable RS-FEC capture */ + cfg = rpm_read(rpm, 0, RPMX_MTI_STAT_STATN_CONTROL); + cfg |= RPMX_RSFEC_RX_CAPTURE | BIT(lmac_id); + rpm_write(rpm, 0, RPMX_MTI_STAT_STATN_CONTROL, cfg); + + val_lo = rpm_read(rpm, 0, + RPMX_MTI_RSFEC_STAT_COUNTER_CAPTURE_2); + val_hi = rpm_read(rpm, 0, RPMX_MTI_STAT_DATA_HI_CDC); + rsp->fec_corr_blks = (val_hi << 32 | val_lo); + + val_lo = rpm_read(rpm, 0, + RPMX_MTI_RSFEC_STAT_COUNTER_CAPTURE_3); + val_hi = rpm_read(rpm, 0, RPMX_MTI_STAT_DATA_HI_CDC); + rsp->fec_uncorr_blks = (val_hi << 32 | val_lo); + } + + return 0; +} diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h index 77f2ef9e1425..22147b4c2137 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.h @@ -12,17 +12,19 @@ /* PCI device IDs */ #define PCI_DEVID_CN10K_RPM 0xA060 +#define PCI_SUBSYS_DEVID_CNF10KB_RPM 0xBC00 +#define PCI_DEVID_CN10KB_RPM 0xA09F /* Registers */ #define RPMX_CMRX_CFG 0x00 #define RPMX_RX_TS_PREPEND BIT_ULL(22) #define RPMX_TX_PTP_1S_SUPPORT BIT_ULL(17) +#define RPMX_CMRX_RX_ID_MAP 0x80 #define RPMX_CMRX_SW_INT 0x180 #define RPMX_CMRX_SW_INT_W1S 0x188 #define RPMX_CMRX_SW_INT_ENA_W1S 0x198 #define RPMX_CMRX_LINK_CFG 0x1070 #define RPMX_MTI_PCS100X_CONTROL1 0x20000 -#define RPMX_MTI_LPCSX_CONTROL1 0x30000 #define RPMX_MTI_PCS_LBK BIT_ULL(14) #define RPMX_MTI_LPCSX_CONTROL(id) (0x30000 | ((id) * 0x100)) @@ -76,11 +78,40 @@ #define RPMX_MTI_MAC100X_XIF_MODE 0x8100 #define RPMX_ONESTEP_ENABLE BIT_ULL(5) #define RPMX_TS_BINARY_MODE BIT_ULL(11) +#define RPMX_CONST1 0x2008 + +/* FEC stats */ +#define RPMX_MTI_STAT_STATN_CONTROL 0x10018 +#define RPMX_MTI_STAT_DATA_HI_CDC 0x10038 +#define RPMX_RSFEC_RX_CAPTURE BIT_ULL(27) +#define RPMX_MTI_RSFEC_STAT_COUNTER_CAPTURE_2 0x40050 +#define RPMX_MTI_RSFEC_STAT_COUNTER_CAPTURE_3 0x40058 +#define RPMX_MTI_FCFECX_VL0_CCW_LO 0x38618 +#define RPMX_MTI_FCFECX_VL0_NCCW_LO 0x38620 +#define RPMX_MTI_FCFECX_VL1_CCW_LO 0x38628 +#define RPMX_MTI_FCFECX_VL1_NCCW_LO 0x38630 +#define RPMX_MTI_FCFECX_CW_HI 0x38638 + +/* CN10KB CSR Declaration */ +#define RPM2_CMRX_SW_INT 0x1b0 +#define RPM2_CMRX_SW_INT_ENA_W1S 0x1b8 +#define RPM2_CMR_CHAN_MSK_OR 0x3120 +#define RPM2_CMR_RX_OVR_BP_EN BIT_ULL(2) +#define RPM2_CMR_RX_OVR_BP_BP BIT_ULL(1) +#define RPM2_CMR_RX_OVR_BP 0x3130 +#define RPM2_CSR_OFFSET 0x3e00 +#define RPM2_CMRX_PRT_CBFC_CTL 0x6510 +#define RPM2_CMRX_RX_LMACS 0x100 +#define RPM2_CMRX_RX_LOGL_XON 0x3100 +#define RPM2_CMRX_RX_STAT2 0x3010 +#define RPM2_USX_PCSX_CONTROL1 0x80000 +#define RPM2_USX_PCS_LBK BIT_ULL(14) /* Function Declarations */ int rpm_get_nr_lmacs(void *rpmd); u8 rpm_get_lmac_type(void *rpmd, int lmac_id); u32 rpm_get_lmac_fifo_len(void *rpmd, int lmac_id); +u32 rpm2_get_lmac_fifo_len(void *rpmd, int lmac_id); int rpm_lmac_internal_loopback(void *rpmd, int lmac_id, bool enable); void rpm_lmac_enadis_rx_pause_fwding(void *rpmd, int lmac_id, bool enable); int rpm_lmac_get_pause_frm_status(void *cgxd, int lmac_id, u8 *tx_pause, @@ -97,4 +128,7 @@ int rpm_lmac_pfc_config(void *rpmd, int lmac_id, u8 tx_pause, u8 rx_pause, u16 pfc_en); int rpm_lmac_get_pfc_frm_cfg(void *rpmd, int lmac_id, u8 *tx_pause, u8 *rx_pause); +int rpm2_get_nr_lmacs(void *rpmd); +bool is_dev_rpm2(void *rpmd); +int rpm_get_fec_stats(void *cgxd, int lmac_id, struct cgx_fec_stats_rsp *rsp); #endif /* RPM_H */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index f718cbd32a94..7f0a64731c67 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -410,9 +410,15 @@ struct rvu_fwdata { u32 ptp_ext_tstamp; #define FWDATA_RESERVED_MEM 1022 u64 reserved[FWDATA_RESERVED_MEM]; -#define CGX_MAX 5 +#define CGX_MAX 9 #define CGX_LMACS_MAX 4 - struct cgx_lmac_fwdata_s cgx_fw_data[CGX_MAX][CGX_LMACS_MAX]; +#define CGX_LMACS_USX 8 + union { + struct cgx_lmac_fwdata_s + cgx_fw_data[CGX_MAX][CGX_LMACS_MAX]; + struct cgx_lmac_fwdata_s + cgx_fw_data_usx[CGX_MAX][CGX_LMACS_USX]; + }; /* Do not add new fields below this line */ }; @@ -478,7 +484,7 @@ struct rvu { u8 cgx_mapped_pfs; u8 cgx_cnt_max; /* CGX port count max */ u8 *pf2cgxlmac_map; /* pf to cgx_lmac map */ - u16 *cgxlmac2pf_map; /* bitmap of mapped pfs for + u64 *cgxlmac2pf_map; /* bitmap of mapped pfs for * every cgx lmac port */ unsigned long pf_notify_bmap; /* Flags for PF notification */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index addc69f4b65c..438b212fb54a 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -55,8 +55,9 @@ bool is_mac_feature_supported(struct rvu *rvu, int pf, int feature) return (cgx_features_get(cgxd) & feature); } +#define CGX_OFFSET(x) ((x) * rvu->hw->lmac_per_cgx) /* Returns bitmap of mapped PFs */ -static u16 cgxlmac_to_pfmap(struct rvu *rvu, u8 cgx_id, u8 lmac_id) +static u64 cgxlmac_to_pfmap(struct rvu *rvu, u8 cgx_id, u8 lmac_id) { return rvu->cgxlmac2pf_map[CGX_OFFSET(cgx_id) + lmac_id]; } @@ -71,7 +72,8 @@ int cgxlmac_to_pf(struct rvu *rvu, int cgx_id, int lmac_id) if (!pfmap) return -ENODEV; else - return find_first_bit(&pfmap, 16); + return find_first_bit(&pfmap, + rvu->cgx_cnt_max * rvu->hw->lmac_per_cgx); } static u8 cgxlmac_id_to_bmap(u8 cgx_id, u8 lmac_id) @@ -129,14 +131,14 @@ static int rvu_map_cgx_lmac_pf(struct rvu *rvu) if (!cgx_cnt_max) return 0; - if (cgx_cnt_max > 0xF || MAX_LMAC_PER_CGX > 0xF) + if (cgx_cnt_max > 0xF || rvu->hw->lmac_per_cgx > 0xF) return -EINVAL; /* Alloc map table * An additional entry is required since PF id starts from 1 and * hence entry at offset 0 is invalid. */ - size = (cgx_cnt_max * MAX_LMAC_PER_CGX + 1) * sizeof(u8); + size = (cgx_cnt_max * rvu->hw->lmac_per_cgx + 1) * sizeof(u8); rvu->pf2cgxlmac_map = devm_kmalloc(rvu->dev, size, GFP_KERNEL); if (!rvu->pf2cgxlmac_map) return -ENOMEM; @@ -145,9 +147,10 @@ static int rvu_map_cgx_lmac_pf(struct rvu *rvu) memset(rvu->pf2cgxlmac_map, 0xFF, size); /* Reverse map table */ - rvu->cgxlmac2pf_map = devm_kzalloc(rvu->dev, - cgx_cnt_max * MAX_LMAC_PER_CGX * sizeof(u16), - GFP_KERNEL); + rvu->cgxlmac2pf_map = + devm_kzalloc(rvu->dev, + cgx_cnt_max * rvu->hw->lmac_per_cgx * sizeof(u64), + GFP_KERNEL); if (!rvu->cgxlmac2pf_map) return -ENOMEM; @@ -156,7 +159,7 @@ static int rvu_map_cgx_lmac_pf(struct rvu *rvu) if (!rvu_cgx_pdata(cgx, rvu)) continue; lmac_bmap = cgx_get_lmac_bmap(rvu_cgx_pdata(cgx, rvu)); - for_each_set_bit(iter, &lmac_bmap, MAX_LMAC_PER_CGX) { + for_each_set_bit(iter, &lmac_bmap, rvu->hw->lmac_per_cgx) { lmac = cgx_get_lmacid(rvu_cgx_pdata(cgx, rvu), iter); rvu->pf2cgxlmac_map[pf] = cgxlmac_id_to_bmap(cgx, lmac); @@ -235,7 +238,8 @@ static void cgx_notify_pfs(struct cgx_link_event *event, struct rvu *rvu) pfmap = cgxlmac_to_pfmap(rvu, event->cgx_id, event->lmac_id); do { - pfid = find_first_bit(&pfmap, 16); + pfid = find_first_bit(&pfmap, + rvu->cgx_cnt_max * rvu->hw->lmac_per_cgx); clear_bit(pfid, &pfmap); /* check if notification is enabled */ @@ -310,7 +314,7 @@ static int cgx_lmac_event_handler_init(struct rvu *rvu) if (!cgxd) continue; lmac_bmap = cgx_get_lmac_bmap(cgxd); - for_each_set_bit(lmac, &lmac_bmap, MAX_LMAC_PER_CGX) { + for_each_set_bit(lmac, &lmac_bmap, rvu->hw->lmac_per_cgx) { err = cgx_lmac_evh_register(&cb, cgxd, lmac); if (err) dev_err(rvu->dev, @@ -396,7 +400,7 @@ int rvu_cgx_exit(struct rvu *rvu) if (!cgxd) continue; lmac_bmap = cgx_get_lmac_bmap(cgxd); - for_each_set_bit(lmac, &lmac_bmap, MAX_LMAC_PER_CGX) + for_each_set_bit(lmac, &lmac_bmap, rvu->hw->lmac_per_cgx) cgx_lmac_evh_unregister(cgxd, lmac); } @@ -468,6 +472,7 @@ void rvu_cgx_disable_dmac_entries(struct rvu *rvu, u16 pcifunc) { int pf = rvu_get_pf(pcifunc); int i = 0, lmac_count = 0; + struct mac_ops *mac_ops; u8 max_dmac_filters; u8 cgx_id, lmac_id; void *cgx_dev; @@ -483,7 +488,12 @@ void rvu_cgx_disable_dmac_entries(struct rvu *rvu, u16 pcifunc) rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); cgx_dev = cgx_get_pdata(cgx_id); lmac_count = cgx_get_lmac_cnt(cgx_dev); - max_dmac_filters = MAX_DMAC_ENTRIES_PER_CGX / lmac_count; + + mac_ops = get_mac_ops(cgx_dev); + if (!mac_ops) + return; + + max_dmac_filters = mac_ops->dmac_filter_count / lmac_count; for (i = 0; i < max_dmac_filters; i++) cgx_lmac_addr_del(cgx_id, lmac_id, i); @@ -569,6 +579,7 @@ int rvu_mbox_handler_cgx_fec_stats(struct rvu *rvu, struct cgx_fec_stats_rsp *rsp) { int pf = rvu_get_pf(req->hdr.pcifunc); + struct mac_ops *mac_ops; u8 cgx_idx, lmac; void *cgxd; @@ -577,7 +588,8 @@ int rvu_mbox_handler_cgx_fec_stats(struct rvu *rvu, rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_idx, &lmac); cgxd = rvu_cgx_pdata(cgx_idx, rvu); - return cgx_get_fec_stats(cgxd, lmac, rsp); + mac_ops = get_mac_ops(cgxd); + return mac_ops->get_fec_stats(cgxd, lmac, rsp); } int rvu_mbox_handler_cgx_mac_addr_set(struct rvu *rvu, @@ -1110,8 +1122,15 @@ int rvu_mbox_handler_cgx_get_aux_link_info(struct rvu *rvu, struct msg_req *req, rvu_get_cgx_lmac_id(rvu->pf2cgxlmac_map[pf], &cgx_id, &lmac_id); - memcpy(&rsp->fwdata, &rvu->fwdata->cgx_fw_data[cgx_id][lmac_id], - sizeof(struct cgx_lmac_fwdata_s)); + if (rvu->hw->lmac_per_cgx == CGX_LMACS_USX) + memcpy(&rsp->fwdata, + &rvu->fwdata->cgx_fw_data_usx[cgx_id][lmac_id], + sizeof(struct cgx_lmac_fwdata_s)); + else + memcpy(&rsp->fwdata, + &rvu->fwdata->cgx_fw_data[cgx_id][lmac_id], + sizeof(struct cgx_lmac_fwdata_s)); + return 0; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index 0eb3085c4c21..fa280ebd3052 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -2613,7 +2613,7 @@ static void rvu_dbg_cgx_init(struct rvu *rvu) rvu->rvu_dbg.cgx = debugfs_create_dir(dname, rvu->rvu_dbg.cgx_root); - for_each_set_bit(lmac_id, &lmac_bmap, MAX_LMAC_PER_CGX) { + for_each_set_bit(lmac_id, &lmac_bmap, rvu->hw->lmac_per_cgx) { /* lmac debugfs dir */ sprintf(dname, "lmac%d", lmac_id); rvu->rvu_dbg.lmac = diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index a62c1b322012..6b8747ebc08c 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -3197,8 +3197,12 @@ static void rvu_get_lbk_link_max_frs(struct rvu *rvu, u16 *max_mtu) static void rvu_get_lmac_link_max_frs(struct rvu *rvu, u16 *max_mtu) { - /* RPM supports FIFO len 128 KB */ - if (rvu_cgx_get_fifolen(rvu) == 0x20000) + int fifo_size = rvu_cgx_get_fifolen(rvu); + + /* RPM supports FIFO len 128 KB and RPM2 supports double the + * FIFO len to accommodate 8 LMACS + */ + if (fifo_size == 0x20000 || fifo_size == 0x40000) *max_mtu = CN10K_LMAC_LINK_MAX_FRS; else *max_mtu = NIC_HW_MAX_FRS; @@ -4109,7 +4113,7 @@ static void nix_link_config(struct rvu *rvu, int blkaddr, /* Get LMAC id's from bitmap */ lmac_bmap = cgx_get_lmac_bmap(rvu_cgx_pdata(cgx, rvu)); - for_each_set_bit(iter, &lmac_bmap, MAX_LMAC_PER_CGX) { + for_each_set_bit(iter, &lmac_bmap, rvu->hw->lmac_per_cgx) { lmac_fifo_len = rvu_cgx_get_lmac_fifolen(rvu, cgx, iter); if (!lmac_fifo_len) { dev_err(rvu->dev, diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c index 00aef8f5ac29..f69102d20c90 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c @@ -1956,7 +1956,9 @@ int rvu_npc_exact_init(struct rvu *rvu) /* Install SDP drop rule */ drop_mcam_idx = &table->num_drop_rules; - max_lmac_cnt = rvu->cgx_cnt_max * MAX_LMAC_PER_CGX + PF_CGXMAP_BASE; + max_lmac_cnt = rvu->cgx_cnt_max * rvu->hw->lmac_per_cgx + + PF_CGXMAP_BASE; + for (i = PF_CGXMAP_BASE; i < max_lmac_cnt; i++) { if (rvu->pf2cgxlmac_map[i] == 0xFF) continue; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c index 0eb74e8c553d..0f8d1a69139f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c @@ -1268,6 +1268,39 @@ end: return err; } +static void otx2_get_fec_stats(struct net_device *netdev, + struct ethtool_fec_stats *fec_stats) +{ + struct otx2_nic *pfvf = netdev_priv(netdev); + struct cgx_fw_data *rsp; + + otx2_update_lmac_fec_stats(pfvf); + + /* Report MAC FEC stats */ + fec_stats->corrected_blocks.total = pfvf->hw.cgx_fec_corr_blks; + fec_stats->uncorrectable_blocks.total = pfvf->hw.cgx_fec_uncorr_blks; + + rsp = otx2_get_fwdata(pfvf); + if (!IS_ERR(rsp) && rsp->fwdata.phy.misc.has_fec_stats && + !otx2_get_phy_fec_stats(pfvf)) { + /* Fetch fwdata again because it's been recently populated with + * latest PHY FEC stats. + */ + rsp = otx2_get_fwdata(pfvf); + if (!IS_ERR(rsp)) { + struct fec_stats_s *p = &rsp->fwdata.phy.fec_stats; + + if (pfvf->linfo.fec == OTX2_FEC_BASER) { + fec_stats->corrected_blocks.total = p->brfec_corr_blks; + fec_stats->uncorrectable_blocks.total = p->brfec_uncorr_blks; + } else { + fec_stats->corrected_blocks.total = p->rsfec_corr_cws; + fec_stats->uncorrectable_blocks.total = p->rsfec_uncorr_cws; + } + } + } +} + static const struct ethtool_ops otx2_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_MAX_FRAMES | @@ -1298,6 +1331,7 @@ static const struct ethtool_ops otx2_ethtool_ops = { .get_pauseparam = otx2_get_pauseparam, .set_pauseparam = otx2_set_pauseparam, .get_ts_info = otx2_get_ts_info, + .get_fec_stats = otx2_get_fec_stats, .get_fecparam = otx2_get_fecparam, .set_fecparam = otx2_set_fecparam, .get_link_ksettings = otx2_get_link_ksettings, diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c index e421714524c2..044cc211424e 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c @@ -1159,7 +1159,12 @@ int otx2_init_tc(struct otx2_nic *nic) return err; tc->flow_ht_params = tc_flow_ht_params; - return rhashtable_init(&tc->flow_table, &tc->flow_ht_params); + err = rhashtable_init(&tc->flow_table, &tc->flow_ht_params); + if (err) { + kfree(tc->tc_entries_bitmap); + tc->tc_entries_bitmap = NULL; + } + return err; } EXPORT_SYMBOL(otx2_init_tc); diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 8b93dab79141..e3de9a53b2d9 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -4593,6 +4593,7 @@ static const struct mtk_soc_data mt7986_data = { .hw_features = MTK_HW_FEATURES, .required_clks = MT7986_CLKS_BITMAP, .required_pctl = false, + .offload_version = 2, .hash_offset = 4, .foe_entry_size = sizeof(struct mtk_foe_entry), .txrx = { diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c index d041615b2bac..a6271449617f 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed.c +++ b/drivers/net/ethernet/mediatek/mtk_wed.c @@ -174,9 +174,10 @@ mtk_wed_wo_reset(struct mtk_wed_device *dev) mtk_wdma_tx_reset(dev); mtk_wed_reset(dev, MTK_WED_RESET_WED); - mtk_wed_mcu_send_msg(wo, MTK_WED_MODULE_ID_WO, - MTK_WED_WO_CMD_CHANGE_STATE, &state, - sizeof(state), false); + if (mtk_wed_mcu_send_msg(wo, MTK_WED_MODULE_ID_WO, + MTK_WED_WO_CMD_CHANGE_STATE, &state, + sizeof(state), false)) + return; if (readx_poll_timeout(mtk_wed_wo_read_status, dev, val, val == MTK_WED_WOIF_DISABLE_DONE, @@ -576,12 +577,10 @@ mtk_wed_deinit(struct mtk_wed_device *dev) } static void -mtk_wed_detach(struct mtk_wed_device *dev) +__mtk_wed_detach(struct mtk_wed_device *dev) { struct mtk_wed_hw *hw = dev->hw; - mutex_lock(&hw_lock); - mtk_wed_deinit(dev); mtk_wdma_rx_reset(dev); @@ -590,9 +589,11 @@ mtk_wed_detach(struct mtk_wed_device *dev) mtk_wed_free_tx_rings(dev); if (mtk_wed_get_rx_capa(dev)) { - mtk_wed_wo_reset(dev); + if (hw->wed_wo) + mtk_wed_wo_reset(dev); mtk_wed_free_rx_rings(dev); - mtk_wed_wo_deinit(hw); + if (hw->wed_wo) + mtk_wed_wo_deinit(hw); } if (dev->wlan.bus_type == MTK_WED_BUS_PCIE) { @@ -612,6 +613,13 @@ mtk_wed_detach(struct mtk_wed_device *dev) module_put(THIS_MODULE); hw->wed_dev = NULL; +} + +static void +mtk_wed_detach(struct mtk_wed_device *dev) +{ + mutex_lock(&hw_lock); + __mtk_wed_detach(dev); mutex_unlock(&hw_lock); } @@ -1210,7 +1218,8 @@ mtk_wed_wdma_rx_ring_setup(struct mtk_wed_device *dev, int idx, int size, } static int -mtk_wed_wdma_tx_ring_setup(struct mtk_wed_device *dev, int idx, int size) +mtk_wed_wdma_tx_ring_setup(struct mtk_wed_device *dev, int idx, int size, + bool reset) { u32 desc_size = sizeof(struct mtk_wdma_desc) * dev->hw->version; struct mtk_wed_ring *wdma; @@ -1219,8 +1228,8 @@ mtk_wed_wdma_tx_ring_setup(struct mtk_wed_device *dev, int idx, int size) return -EINVAL; wdma = &dev->tx_wdma[idx]; - if (mtk_wed_ring_alloc(dev, wdma, MTK_WED_WDMA_RING_SIZE, desc_size, - true)) + if (!reset && mtk_wed_ring_alloc(dev, wdma, MTK_WED_WDMA_RING_SIZE, + desc_size, true)) return -ENOMEM; wdma_w32(dev, MTK_WDMA_RING_TX(idx) + MTK_WED_RING_OFS_BASE, @@ -1230,6 +1239,9 @@ mtk_wed_wdma_tx_ring_setup(struct mtk_wed_device *dev, int idx, int size) wdma_w32(dev, MTK_WDMA_RING_TX(idx) + MTK_WED_RING_OFS_CPU_IDX, 0); wdma_w32(dev, MTK_WDMA_RING_TX(idx) + MTK_WED_RING_OFS_DMA_IDX, 0); + if (reset) + mtk_wed_ring_reset(wdma, MTK_WED_WDMA_RING_SIZE, true); + if (!idx) { wed_w32(dev, MTK_WED_WDMA_RING_TX + MTK_WED_RING_OFS_BASE, wdma->desc_phys); @@ -1490,8 +1502,10 @@ mtk_wed_attach(struct mtk_wed_device *dev) ret = mtk_wed_wo_init(hw); } out: - if (ret) - mtk_wed_detach(dev); + if (ret) { + dev_err(dev->hw->dev, "failed to attach wed device\n"); + __mtk_wed_detach(dev); + } unlock: mutex_unlock(&hw_lock); @@ -1569,18 +1583,20 @@ mtk_wed_txfree_ring_setup(struct mtk_wed_device *dev, void __iomem *regs) } static int -mtk_wed_rx_ring_setup(struct mtk_wed_device *dev, int idx, void __iomem *regs) +mtk_wed_rx_ring_setup(struct mtk_wed_device *dev, int idx, void __iomem *regs, + bool reset) { struct mtk_wed_ring *ring = &dev->rx_ring[idx]; if (WARN_ON(idx >= ARRAY_SIZE(dev->rx_ring))) return -EINVAL; - if (mtk_wed_ring_alloc(dev, ring, MTK_WED_RX_RING_SIZE, - sizeof(*ring->desc), false)) + if (!reset && mtk_wed_ring_alloc(dev, ring, MTK_WED_RX_RING_SIZE, + sizeof(*ring->desc), false)) return -ENOMEM; - if (mtk_wed_wdma_tx_ring_setup(dev, idx, MTK_WED_WDMA_RING_SIZE)) + if (mtk_wed_wdma_tx_ring_setup(dev, idx, MTK_WED_WDMA_RING_SIZE, + reset)) return -ENOMEM; ring->reg_base = MTK_WED_RING_RX_DATA(idx); diff --git a/drivers/net/ethernet/mediatek/mtk_wed_mcu.c b/drivers/net/ethernet/mediatek/mtk_wed_mcu.c index f9539e6233c9..6bad0d262f28 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed_mcu.c +++ b/drivers/net/ethernet/mediatek/mtk_wed_mcu.c @@ -207,6 +207,9 @@ int mtk_wed_mcu_msg_update(struct mtk_wed_device *dev, int id, void *data, if (dev->hw->version == 1) return 0; + if (WARN_ON(!wo)) + return -ENODEV; + return mtk_wed_mcu_send_msg(wo, MTK_WED_MODULE_ID_WO, id, data, len, true); } diff --git a/drivers/net/ethernet/mediatek/mtk_wed_wo.c b/drivers/net/ethernet/mediatek/mtk_wed_wo.c index a219da85f4db..a0a39643caf7 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed_wo.c +++ b/drivers/net/ethernet/mediatek/mtk_wed_wo.c @@ -408,8 +408,10 @@ mtk_wed_wo_hardware_init(struct mtk_wed_wo *wo) return -ENODEV; wo->mmio.regs = syscon_regmap_lookup_by_phandle(np, NULL); - if (IS_ERR_OR_NULL(wo->mmio.regs)) - return PTR_ERR(wo->mmio.regs); + if (IS_ERR(wo->mmio.regs)) { + ret = PTR_ERR(wo->mmio.regs); + goto error_put; + } wo->mmio.irq = irq_of_parse_and_map(np, 0); wo->mmio.irq_mask = MTK_WED_WO_ALL_INT_MASK; @@ -457,7 +459,8 @@ mtk_wed_wo_hardware_init(struct mtk_wed_wo *wo) error: devm_free_irq(wo->hw->dev, wo->mmio.irq, wo); - +error_put: + of_node_put(np); return ret; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 43a4102e9c09..c5758637b7be 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -65,7 +65,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, ring->size = size; ring->size_mask = size - 1; ring->sp_stride = stride; - ring->full_size = ring->size - HEADROOM - MAX_DESC_TXBBS; + ring->full_size = ring->size - HEADROOM - MLX4_MAX_DESC_TXBBS; tmp = size * sizeof(struct mlx4_en_tx_info); ring->tx_info = kvmalloc_node(tmp, GFP_KERNEL, node); @@ -77,9 +77,11 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, en_dbg(DRV, priv, "Allocated tx_info ring at addr:%p size:%d\n", ring->tx_info, tmp); - ring->bounce_buf = kmalloc_node(MAX_DESC_SIZE, GFP_KERNEL, node); + ring->bounce_buf = kmalloc_node(MLX4_TX_BOUNCE_BUFFER_SIZE, + GFP_KERNEL, node); if (!ring->bounce_buf) { - ring->bounce_buf = kmalloc(MAX_DESC_SIZE, GFP_KERNEL); + ring->bounce_buf = kmalloc(MLX4_TX_BOUNCE_BUFFER_SIZE, + GFP_KERNEL); if (!ring->bounce_buf) { err = -ENOMEM; goto err_info; @@ -909,11 +911,6 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) /* Align descriptor to TXBB size */ desc_size = ALIGN(real_size, TXBB_SIZE); nr_txbb = desc_size >> LOG_TXBB_SIZE; - if (unlikely(nr_txbb > MAX_DESC_TXBBS)) { - if (netif_msg_tx_err(priv)) - en_warn(priv, "Oversized header or SG list\n"); - goto tx_drop_count; - } bf_ok = ring->bf_enabled; if (skb_vlan_tag_present(skb)) { @@ -941,6 +938,11 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) if (likely(index + nr_txbb <= ring->size)) tx_desc = ring->buf + (index << LOG_TXBB_SIZE); else { + if (unlikely(nr_txbb > MLX4_MAX_DESC_TXBBS)) { + if (netif_msg_tx_err(priv)) + en_warn(priv, "Oversized header or SG list\n"); + goto tx_drop_count; + } tx_desc = (struct mlx4_en_tx_desc *) ring->bounce_buf; bounce = true; bf_ok = false; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index e132ff4c82f2..3d4226ddba5e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -89,9 +89,19 @@ #define MLX4_EN_FILTER_HASH_SHIFT 4 #define MLX4_EN_FILTER_EXPIRY_QUOTA 60 -/* Typical TSO descriptor with 16 gather entries is 352 bytes... */ -#define MAX_DESC_SIZE 512 -#define MAX_DESC_TXBBS (MAX_DESC_SIZE / TXBB_SIZE) +#define CTRL_SIZE sizeof(struct mlx4_wqe_ctrl_seg) +#define DS_SIZE sizeof(struct mlx4_wqe_data_seg) + +/* Maximal size of the bounce buffer: + * 256 bytes for LSO headers. + * CTRL_SIZE for control desc. + * DS_SIZE if skb->head contains some payload. + * MAX_SKB_FRAGS frags. + */ +#define MLX4_TX_BOUNCE_BUFFER_SIZE \ + ALIGN(256 + CTRL_SIZE + DS_SIZE + MAX_SKB_FRAGS * DS_SIZE, TXBB_SIZE) + +#define MLX4_MAX_DESC_TXBBS (MLX4_TX_BOUNCE_BUFFER_SIZE / TXBB_SIZE) /* * OS related constants and tunables @@ -217,9 +227,7 @@ struct mlx4_en_tx_info { #define MLX4_EN_BIT_DESC_OWN 0x80000000 -#define CTRL_SIZE sizeof(struct mlx4_wqe_ctrl_seg) #define MLX4_EN_MEMTYPE_PAD 0x100 -#define DS_SIZE sizeof(struct mlx4_wqe_data_seg) struct mlx4_en_tx_desc { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index a22c32aabf11..cd4a1ab0ea78 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -111,6 +111,7 @@ mlx5_core-$(CONFIG_MLX5_SW_STEERING) += steering/dr_domain.o steering/dr_table.o steering/dr_ste_v2.o \ steering/dr_cmd.o steering/dr_fw.o \ steering/dr_action.o steering/fs_dr.o \ + steering/dr_definer.o \ steering/dr_dbg.o lib/smfs.o # # SF device diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 751bc4a9edcf..ddb197970c22 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -314,6 +314,10 @@ static const struct devlink_ops mlx5_devlink_ops = { .rate_node_new = mlx5_esw_devlink_rate_node_new, .rate_node_del = mlx5_esw_devlink_rate_node_del, .rate_leaf_parent_set = mlx5_esw_devlink_rate_parent_set, + .port_fn_roce_get = mlx5_devlink_port_fn_roce_get, + .port_fn_roce_set = mlx5_devlink_port_fn_roce_set, + .port_fn_migratable_get = mlx5_devlink_port_fn_migratable_get, + .port_fn_migratable_set = mlx5_devlink_port_fn_migratable_set, #endif #ifdef CONFIG_MLX5_SF_MANAGER .port_new = mlx5_devlink_sf_port_new, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c index c5bb79a4fa57..2732128e7a6e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c @@ -228,6 +228,17 @@ const char *parse_fs_hdrs(struct trace_seq *p, return ret; } +static const char +*fs_dest_range_field_to_str(enum mlx5_flow_dest_range_field field) +{ + switch (field) { + case MLX5_FLOW_DEST_RANGE_FIELD_PKT_LEN: + return "packet len"; + default: + return "unknown dest range field"; + } +} + const char *parse_fs_dst(struct trace_seq *p, const struct mlx5_flow_destination *dst, u32 counter_id) @@ -259,6 +270,11 @@ const char *parse_fs_dst(struct trace_seq *p, case MLX5_FLOW_DESTINATION_TYPE_PORT: trace_seq_printf(p, "port\n"); break; + case MLX5_FLOW_DESTINATION_TYPE_RANGE: + trace_seq_printf(p, "field=%s min=%d max=%d\n", + fs_dest_range_field_to_str(dst->range.field), + dst->range.min, dst->range.max); + break; case MLX5_FLOW_DESTINATION_TYPE_NONE: trace_seq_printf(p, "none\n"); break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c index 21aab96357b5..a278f52d52b0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/accept.c @@ -28,4 +28,5 @@ tc_act_parse_accept(struct mlx5e_tc_act_parse_state *parse_state, struct mlx5e_tc_act mlx5e_tc_act_accept = { .can_offload = tc_act_can_offload_accept, .parse_action = tc_act_parse_accept, + .is_terminating_action = true, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c index 3337241cfd84..eba0c8698926 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c @@ -11,7 +11,7 @@ static struct mlx5e_tc_act *tc_acts_fdb[NUM_FLOW_ACTIONS] = { [FLOW_ACTION_DROP] = &mlx5e_tc_act_drop, [FLOW_ACTION_TRAP] = &mlx5e_tc_act_trap, [FLOW_ACTION_GOTO] = &mlx5e_tc_act_goto, - [FLOW_ACTION_REDIRECT] = &mlx5e_tc_act_mirred, + [FLOW_ACTION_REDIRECT] = &mlx5e_tc_act_redirect, [FLOW_ACTION_MIRRED] = &mlx5e_tc_act_mirred, [FLOW_ACTION_REDIRECT_INGRESS] = &mlx5e_tc_act_redirect_ingress, [FLOW_ACTION_VLAN_PUSH] = &mlx5e_tc_act_vlan, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h index e1570ff056ae..8346557eeaf6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h @@ -32,6 +32,11 @@ struct mlx5e_tc_act_parse_state { struct mlx5_tc_ct_priv *ct_priv; }; +struct mlx5e_tc_act_branch_ctrl { + enum flow_action_id act_id; + u32 extval; +}; + struct mlx5e_tc_act { bool (*can_offload)(struct mlx5e_tc_act_parse_state *parse_state, const struct flow_action_entry *act, @@ -60,6 +65,12 @@ struct mlx5e_tc_act { int (*stats_action)(struct mlx5e_priv *priv, struct flow_offload_action *fl_act); + + bool (*get_branch_ctrl)(const struct flow_action_entry *act, + struct mlx5e_tc_act_branch_ctrl *cond_true, + struct mlx5e_tc_act_branch_ctrl *cond_false); + + bool is_terminating_action; }; struct mlx5e_tc_flow_action { @@ -81,6 +92,7 @@ extern struct mlx5e_tc_act mlx5e_tc_act_vlan_mangle; extern struct mlx5e_tc_act mlx5e_tc_act_mpls_push; extern struct mlx5e_tc_act mlx5e_tc_act_mpls_pop; extern struct mlx5e_tc_act mlx5e_tc_act_mirred; +extern struct mlx5e_tc_act mlx5e_tc_act_redirect; extern struct mlx5e_tc_act mlx5e_tc_act_mirred_nic; extern struct mlx5e_tc_act mlx5e_tc_act_ct; extern struct mlx5e_tc_act mlx5e_tc_act_sample; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c index dd025a95c439..7d16aeabb119 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/drop.c @@ -27,4 +27,5 @@ tc_act_parse_drop(struct mlx5e_tc_act_parse_state *parse_state, struct mlx5e_tc_act mlx5e_tc_act_drop = { .can_offload = tc_act_can_offload_drop, .parse_action = tc_act_parse_drop, + .is_terminating_action = true, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c index 25174f68613e..0923e6db2d0a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/goto.c @@ -121,4 +121,5 @@ struct mlx5e_tc_act mlx5e_tc_act_goto = { .can_offload = tc_act_can_offload_goto, .parse_action = tc_act_parse_goto, .post_parse = tc_act_post_parse_goto, + .is_terminating_action = true, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c index 4ac7de3f6afa..78c427b38048 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred.c @@ -334,4 +334,11 @@ tc_act_parse_mirred(struct mlx5e_tc_act_parse_state *parse_state, struct mlx5e_tc_act mlx5e_tc_act_mirred = { .can_offload = tc_act_can_offload_mirred, .parse_action = tc_act_parse_mirred, + .is_terminating_action = false, +}; + +struct mlx5e_tc_act mlx5e_tc_act_redirect = { + .can_offload = tc_act_can_offload_mirred, + .parse_action = tc_act_parse_mirred, + .is_terminating_action = true, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c index 90b4c1b34776..7f409692b18f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/mirred_nic.c @@ -48,4 +48,5 @@ tc_act_parse_mirred_nic(struct mlx5e_tc_act_parse_state *parse_state, struct mlx5e_tc_act mlx5e_tc_act_mirred_nic = { .can_offload = tc_act_can_offload_mirred_nic, .parse_action = tc_act_parse_mirred_nic, + .is_terminating_action = true, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c index c8e5ca65bb6e..512d43148922 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c @@ -3,6 +3,45 @@ #include "act.h" #include "en/tc_priv.h" +#include "fs_core.h" + +static bool police_act_validate_control(enum flow_action_id act_id, + struct netlink_ext_ack *extack) +{ + if (act_id != FLOW_ACTION_PIPE && + act_id != FLOW_ACTION_ACCEPT && + act_id != FLOW_ACTION_JUMP && + act_id != FLOW_ACTION_DROP) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform-exceed action is not pipe, ok, jump or drop"); + return false; + } + + return true; +} + +static int police_act_validate(const struct flow_action_entry *act, + struct netlink_ext_ack *extack) +{ + if (!police_act_validate_control(act->police.exceed.act_id, extack) || + !police_act_validate_control(act->police.notexceed.act_id, extack)) + return -EOPNOTSUPP; + + if (act->police.peakrate_bytes_ps || + act->police.avrate || act->police.overhead) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when peakrate/avrate/overhead is configured"); + return -EOPNOTSUPP; + } + + if (act->police.rate_pkt_ps) { + NL_SET_ERR_MSG_MOD(extack, + "QoS offload not support packets per second"); + return -EOPNOTSUPP; + } + + return 0; +} static bool tc_act_can_offload_police(struct mlx5e_tc_act_parse_state *parse_state, @@ -10,14 +49,10 @@ tc_act_can_offload_police(struct mlx5e_tc_act_parse_state *parse_state, int act_index, struct mlx5_flow_attr *attr) { - if (act->police.notexceed.act_id != FLOW_ACTION_PIPE && - act->police.notexceed.act_id != FLOW_ACTION_ACCEPT) { - NL_SET_ERR_MSG_MOD(parse_state->extack, - "Offload not supported when conform action is not pipe or ok"); - return false; - } - if (mlx5e_policer_validate(parse_state->flow_action, act, - parse_state->extack)) + int err; + + err = police_act_validate(act, parse_state->extack); + if (err) return false; return !!mlx5e_get_flow_meters(parse_state->flow->priv->mdev); @@ -37,6 +72,8 @@ fill_meter_params_from_act(const struct flow_action_entry *act, params->mode = MLX5_RATE_LIMIT_PPS; params->rate = act->police.rate_pkt_ps; params->burst = act->police.burst_pkt; + } else if (act->police.mtu) { + params->mtu = act->police.mtu; } else { return -EOPNOTSUPP; } @@ -50,14 +87,25 @@ tc_act_parse_police(struct mlx5e_tc_act_parse_state *parse_state, struct mlx5e_priv *priv, struct mlx5_flow_attr *attr) { + enum mlx5_flow_namespace_type ns = mlx5e_get_flow_namespace(parse_state->flow); + struct mlx5e_flow_meter_params *params = &attr->meter_attr.params; int err; - err = fill_meter_params_from_act(act, &attr->meter_attr.params); + err = fill_meter_params_from_act(act, params); if (err) return err; - attr->action |= MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO; - attr->exe_aso_type = MLX5_EXE_ASO_FLOW_METER; + if (params->mtu) { + if (!(mlx5_fs_get_capabilities(priv->mdev, ns) & + MLX5_FLOW_STEERING_CAP_MATCH_RANGES)) + return -EOPNOTSUPP; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + attr->flags |= MLX5_ATTR_FLAG_MTU; + } else { + attr->action |= MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO; + attr->exe_aso_type = MLX5_EXE_ASO_FLOW_METER; + } return 0; } @@ -79,7 +127,7 @@ tc_act_police_offload(struct mlx5e_priv *priv, struct mlx5e_flow_meter_handle *meter; int err = 0; - err = mlx5e_policer_validate(&fl_act->action, act, fl_act->extack); + err = police_act_validate(act, fl_act->extack); if (err) return err; @@ -147,6 +195,19 @@ tc_act_police_stats(struct mlx5e_priv *priv, return 0; } +static bool +tc_act_police_get_branch_ctrl(const struct flow_action_entry *act, + struct mlx5e_tc_act_branch_ctrl *cond_true, + struct mlx5e_tc_act_branch_ctrl *cond_false) +{ + cond_true->act_id = act->police.notexceed.act_id; + cond_true->extval = act->police.notexceed.extval; + + cond_false->act_id = act->police.exceed.act_id; + cond_false->extval = act->police.exceed.extval; + return true; +} + struct mlx5e_tc_act mlx5e_tc_act_police = { .can_offload = tc_act_can_offload_police, .parse_action = tc_act_parse_police, @@ -154,4 +215,5 @@ struct mlx5e_tc_act mlx5e_tc_act_police = { .offload_action = tc_act_police_offload, .destroy_action = tc_act_police_destroy, .stats_action = tc_act_police_stats, + .get_branch_ctrl = tc_act_police_get_branch_ctrl, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c index 25cd449e8aad..78af8a3175bf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c @@ -240,7 +240,7 @@ mlx5e_flow_meter_destroy_aso_obj(struct mlx5_core_dev *mdev, u32 obj_id) } static struct mlx5e_flow_meter_handle * -__mlx5e_flow_meter_alloc(struct mlx5e_flow_meters *flow_meters) +__mlx5e_flow_meter_alloc(struct mlx5e_flow_meters *flow_meters, bool alloc_aso) { struct mlx5_core_dev *mdev = flow_meters->mdev; struct mlx5e_flow_meter_aso_obj *meters_obj; @@ -256,16 +256,19 @@ __mlx5e_flow_meter_alloc(struct mlx5e_flow_meters *flow_meters) counter = mlx5_fc_create(mdev, true); if (IS_ERR(counter)) { err = PTR_ERR(counter); - goto err_red_counter; + goto err_drop_counter; } - meter->red_counter = counter; + meter->drop_counter = counter; counter = mlx5_fc_create(mdev, true); if (IS_ERR(counter)) { err = PTR_ERR(counter); - goto err_green_counter; + goto err_act_counter; } - meter->green_counter = counter; + meter->act_counter = counter; + + if (!alloc_aso) + goto no_aso; meters_obj = list_first_entry_or_null(&flow_meters->partial_list, struct mlx5e_flow_meter_aso_obj, @@ -299,11 +302,12 @@ __mlx5e_flow_meter_alloc(struct mlx5e_flow_meters *flow_meters) } bitmap_set(meters_obj->meters_map, pos, 1); - meter->flow_meters = flow_meters; meter->meters_obj = meters_obj; meter->obj_id = meters_obj->base_id + pos / 2; meter->idx = pos % 2; +no_aso: + meter->flow_meters = flow_meters; mlx5_core_dbg(mdev, "flow meter allocated, obj_id=0x%x, index=%d\n", meter->obj_id, meter->idx); @@ -312,10 +316,10 @@ __mlx5e_flow_meter_alloc(struct mlx5e_flow_meters *flow_meters) err_mem: mlx5e_flow_meter_destroy_aso_obj(mdev, id); err_create: - mlx5_fc_destroy(mdev, meter->green_counter); -err_green_counter: - mlx5_fc_destroy(mdev, meter->red_counter); -err_red_counter: + mlx5_fc_destroy(mdev, meter->act_counter); +err_act_counter: + mlx5_fc_destroy(mdev, meter->drop_counter); +err_drop_counter: kfree(meter); return ERR_PTR(err); } @@ -328,8 +332,11 @@ __mlx5e_flow_meter_free(struct mlx5e_flow_meter_handle *meter) struct mlx5e_flow_meter_aso_obj *meters_obj; int n, pos; - mlx5_fc_destroy(mdev, meter->green_counter); - mlx5_fc_destroy(mdev, meter->red_counter); + mlx5_fc_destroy(mdev, meter->act_counter); + mlx5_fc_destroy(mdev, meter->drop_counter); + + if (meter->params.mtu) + goto out_no_aso; meters_obj = meter->meters_obj; pos = (meter->obj_id - meters_obj->base_id) * 2 + meter->idx; @@ -344,6 +351,7 @@ __mlx5e_flow_meter_free(struct mlx5e_flow_meter_handle *meter) list_add(&meters_obj->entry, &flow_meters->partial_list); } +out_no_aso: mlx5_core_dbg(mdev, "flow meter freed, obj_id=0x%x, index=%d\n", meter->obj_id, meter->idx); kfree(meter); @@ -408,12 +416,13 @@ mlx5e_tc_meter_alloc(struct mlx5e_flow_meters *flow_meters, { struct mlx5e_flow_meter_handle *meter; - meter = __mlx5e_flow_meter_alloc(flow_meters); + meter = __mlx5e_flow_meter_alloc(flow_meters, !params->mtu); if (IS_ERR(meter)) return meter; hash_add(flow_meters->hashtbl, &meter->hlist, params->index); meter->params.index = params->index; + meter->params.mtu = params->mtu; meter->refcnt++; return meter; @@ -574,8 +583,8 @@ mlx5e_tc_meter_get_stats(struct mlx5e_flow_meter_handle *meter, u64 bytes1, packets1, lastuse1; u64 bytes2, packets2, lastuse2; - mlx5_fc_query_cached(meter->green_counter, &bytes1, &packets1, &lastuse1); - mlx5_fc_query_cached(meter->red_counter, &bytes2, &packets2, &lastuse2); + mlx5_fc_query_cached(meter->act_counter, &bytes1, &packets1, &lastuse1); + mlx5_fc_query_cached(meter->drop_counter, &bytes2, &packets2, &lastuse2); *bytes = bytes1 + bytes2; *packets = packets1 + packets2; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h index 6de6e8a16327..9b795cd106bb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.h @@ -20,6 +20,7 @@ struct mlx5e_flow_meter_params { u32 index; u64 rate; u64 burst; + u32 mtu; }; struct mlx5e_flow_meter_handle { @@ -32,8 +33,8 @@ struct mlx5e_flow_meter_handle { struct hlist_node hlist; struct mlx5e_flow_meter_params params; - struct mlx5_fc *green_counter; - struct mlx5_fc *red_counter; + struct mlx5_fc *act_counter; + struct mlx5_fc *drop_counter; }; struct mlx5e_meter_attr { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c index 8b77e822810e..8d7d761482d2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.c @@ -8,23 +8,56 @@ #define MLX5_PACKET_COLOR_BITS MLX5_REG_MAPPING_MBITS(PACKET_COLOR_TO_REG) #define MLX5_PACKET_COLOR_MASK MLX5_REG_MAPPING_MASK(PACKET_COLOR_TO_REG) -struct mlx5e_post_meter_priv { +struct mlx5e_post_meter_rate_table { + struct mlx5_flow_table *ft; + struct mlx5_flow_group *fg; + struct mlx5_flow_handle *green_rule; + struct mlx5_flow_attr *green_attr; + struct mlx5_flow_handle *red_rule; + struct mlx5_flow_attr *red_attr; +}; + +struct mlx5e_post_meter_mtu_table { struct mlx5_flow_table *ft; struct mlx5_flow_group *fg; - struct mlx5_flow_handle *fwd_green_rule; - struct mlx5_flow_handle *drop_red_rule; + struct mlx5_flow_handle *rule; + struct mlx5_flow_attr *attr; +}; + +struct mlx5e_post_meter_mtu_tables { + struct mlx5e_post_meter_mtu_table green_table; + struct mlx5e_post_meter_mtu_table red_table; +}; + +struct mlx5e_post_meter_priv { + enum mlx5e_post_meter_type type; + union { + struct mlx5e_post_meter_rate_table rate_steering_table; + struct mlx5e_post_meter_mtu_tables mtu_tables; + }; }; struct mlx5_flow_table * mlx5e_post_meter_get_ft(struct mlx5e_post_meter_priv *post_meter) { - return post_meter->ft; + return post_meter->rate_steering_table.ft; } -static int +struct mlx5_flow_table * +mlx5e_post_meter_get_mtu_true_ft(struct mlx5e_post_meter_priv *post_meter) +{ + return post_meter->mtu_tables.green_table.ft; +} + +struct mlx5_flow_table * +mlx5e_post_meter_get_mtu_false_ft(struct mlx5e_post_meter_priv *post_meter) +{ + return post_meter->mtu_tables.red_table.ft; +} + +static struct mlx5_flow_table * mlx5e_post_meter_table_create(struct mlx5e_priv *priv, - enum mlx5_flow_namespace_type ns_type, - struct mlx5e_post_meter_priv *post_meter) + enum mlx5_flow_namespace_type ns_type) { struct mlx5_flow_table_attr ft_attr = {}; struct mlx5_flow_namespace *root_ns; @@ -32,7 +65,7 @@ mlx5e_post_meter_table_create(struct mlx5e_priv *priv, root_ns = mlx5_get_flow_namespace(priv->mdev, ns_type); if (!root_ns) { mlx5_core_warn(priv->mdev, "Failed to get namespace for flow meter\n"); - return -EOPNOTSUPP; + return ERR_PTR(-EOPNOTSUPP); } ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED; @@ -40,19 +73,14 @@ mlx5e_post_meter_table_create(struct mlx5e_priv *priv, ft_attr.max_fte = 2; ft_attr.level = 1; - post_meter->ft = mlx5_create_flow_table(root_ns, &ft_attr); - if (IS_ERR(post_meter->ft)) { - mlx5_core_warn(priv->mdev, "Failed to create post_meter table\n"); - return PTR_ERR(post_meter->ft); - } - - return 0; + return mlx5_create_flow_table(root_ns, &ft_attr); } static int -mlx5e_post_meter_fg_create(struct mlx5e_priv *priv, - struct mlx5e_post_meter_priv *post_meter) +mlx5e_post_meter_rate_fg_create(struct mlx5e_priv *priv, + struct mlx5e_post_meter_priv *post_meter) { + struct mlx5e_post_meter_rate_table *table = &post_meter->rate_steering_table; int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); void *misc2, *match_criteria; u32 *flow_group_in; @@ -71,25 +99,58 @@ mlx5e_post_meter_fg_create(struct mlx5e_priv *priv, MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); - post_meter->fg = mlx5_create_flow_group(post_meter->ft, flow_group_in); - if (IS_ERR(post_meter->fg)) { + table->fg = mlx5_create_flow_group(table->ft, flow_group_in); + if (IS_ERR(table->fg)) { mlx5_core_warn(priv->mdev, "Failed to create post_meter flow group\n"); - err = PTR_ERR(post_meter->fg); + err = PTR_ERR(table->fg); } kvfree(flow_group_in); return err; } +static struct mlx5_flow_handle * +mlx5e_post_meter_add_rule(struct mlx5e_priv *priv, + struct mlx5e_post_meter_priv *post_meter, + struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr, + struct mlx5_fc *act_counter, + struct mlx5_fc *drop_counter) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_flow_handle *ret; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_DROP) + attr->counter = drop_counter; + else + attr->counter = act_counter; + + attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT; + attr->outer_match_level = MLX5_MATCH_NONE; + attr->chain = 0; + attr->prio = 0; + + ret = mlx5_eswitch_add_offloaded_rule(esw, spec, attr); + + /* We did not create the counter, so we can't delete it. + * Avoid freeing the counter when the attr is deleted in free_branching_attr + */ + attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT; + + return ret; +} + static int -mlx5e_post_meter_rules_create(struct mlx5e_priv *priv, - struct mlx5e_post_meter_priv *post_meter, - struct mlx5e_post_act *post_act, - struct mlx5_fc *green_counter, - struct mlx5_fc *red_counter) -{ - struct mlx5_flow_destination dest[2] = {}; - struct mlx5_flow_act flow_act = {}; +mlx5e_post_meter_rate_rules_create(struct mlx5e_priv *priv, + struct mlx5e_post_meter_priv *post_meter, + struct mlx5e_post_act *post_act, + struct mlx5_fc *act_counter, + struct mlx5_fc *drop_counter, + struct mlx5_flow_attr *green_attr, + struct mlx5_flow_attr *red_attr) +{ + struct mlx5e_post_meter_rate_table *table = &post_meter->rate_steering_table; struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; int err; @@ -100,72 +161,242 @@ mlx5e_post_meter_rules_create(struct mlx5e_priv *priv, mlx5e_tc_match_to_reg_match(spec, PACKET_COLOR_TO_REG, MLX5_FLOW_METER_COLOR_RED, MLX5_PACKET_COLOR_MASK); - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP | - MLX5_FLOW_CONTEXT_ACTION_COUNT; - flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; - dest[0].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest[0].counter_id = mlx5_fc_id(red_counter); - - rule = mlx5_add_flow_rules(post_meter->ft, spec, &flow_act, dest, 1); + red_attr->ft = post_meter->rate_steering_table.ft; + rule = mlx5e_post_meter_add_rule(priv, post_meter, spec, red_attr, + act_counter, drop_counter); if (IS_ERR(rule)) { - mlx5_core_warn(priv->mdev, "Failed to create post_meter flow drop rule\n"); + mlx5_core_warn(priv->mdev, "Failed to create post_meter exceed rule\n"); err = PTR_ERR(rule); goto err_red; } - post_meter->drop_red_rule = rule; + table->red_rule = rule; + table->red_attr = red_attr; mlx5e_tc_match_to_reg_match(spec, PACKET_COLOR_TO_REG, MLX5_FLOW_METER_COLOR_GREEN, MLX5_PACKET_COLOR_MASK); - flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_COUNT; - dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - dest[0].ft = mlx5e_tc_post_act_get_ft(post_act); - dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest[1].counter_id = mlx5_fc_id(green_counter); - - rule = mlx5_add_flow_rules(post_meter->ft, spec, &flow_act, dest, 2); + green_attr->ft = post_meter->rate_steering_table.ft; + rule = mlx5e_post_meter_add_rule(priv, post_meter, spec, green_attr, + act_counter, drop_counter); if (IS_ERR(rule)) { - mlx5_core_warn(priv->mdev, "Failed to create post_meter flow fwd rule\n"); + mlx5_core_warn(priv->mdev, "Failed to create post_meter notexceed rule\n"); err = PTR_ERR(rule); goto err_green; } - post_meter->fwd_green_rule = rule; + table->green_rule = rule; + table->green_attr = green_attr; kvfree(spec); return 0; err_green: - mlx5_del_flow_rules(post_meter->drop_red_rule); + mlx5_del_flow_rules(table->red_rule); err_red: kvfree(spec); return err; } static void -mlx5e_post_meter_rules_destroy(struct mlx5e_post_meter_priv *post_meter) +mlx5e_post_meter_rate_rules_destroy(struct mlx5_eswitch *esw, + struct mlx5e_post_meter_priv *post_meter) { - mlx5_del_flow_rules(post_meter->drop_red_rule); - mlx5_del_flow_rules(post_meter->fwd_green_rule); + struct mlx5e_post_meter_rate_table *rate_table = &post_meter->rate_steering_table; + + mlx5_eswitch_del_offloaded_rule(esw, rate_table->red_rule, rate_table->red_attr); + mlx5_eswitch_del_offloaded_rule(esw, rate_table->green_rule, rate_table->green_attr); } static void -mlx5e_post_meter_fg_destroy(struct mlx5e_post_meter_priv *post_meter) +mlx5e_post_meter_rate_fg_destroy(struct mlx5e_post_meter_priv *post_meter) { - mlx5_destroy_flow_group(post_meter->fg); + mlx5_destroy_flow_group(post_meter->rate_steering_table.fg); } static void -mlx5e_post_meter_table_destroy(struct mlx5e_post_meter_priv *post_meter) +mlx5e_post_meter_rate_table_destroy(struct mlx5e_post_meter_priv *post_meter) +{ + mlx5_destroy_flow_table(post_meter->rate_steering_table.ft); +} + +static void +mlx5e_post_meter_mtu_rules_destroy(struct mlx5e_post_meter_priv *post_meter) +{ + struct mlx5e_post_meter_mtu_tables *mtu_tables = &post_meter->mtu_tables; + + mlx5_del_flow_rules(mtu_tables->green_table.rule); + mlx5_del_flow_rules(mtu_tables->red_table.rule); +} + +static void +mlx5e_post_meter_mtu_fg_destroy(struct mlx5e_post_meter_priv *post_meter) +{ + struct mlx5e_post_meter_mtu_tables *mtu_tables = &post_meter->mtu_tables; + + mlx5_destroy_flow_group(mtu_tables->green_table.fg); + mlx5_destroy_flow_group(mtu_tables->red_table.fg); +} + +static void +mlx5e_post_meter_mtu_table_destroy(struct mlx5e_post_meter_priv *post_meter) +{ + struct mlx5e_post_meter_mtu_tables *mtu_tables = &post_meter->mtu_tables; + + mlx5_destroy_flow_table(mtu_tables->green_table.ft); + mlx5_destroy_flow_table(mtu_tables->red_table.ft); +} + +static int +mlx5e_post_meter_rate_create(struct mlx5e_priv *priv, + enum mlx5_flow_namespace_type ns_type, + struct mlx5e_post_act *post_act, + struct mlx5_fc *act_counter, + struct mlx5_fc *drop_counter, + struct mlx5e_post_meter_priv *post_meter, + struct mlx5_flow_attr *green_attr, + struct mlx5_flow_attr *red_attr) +{ + struct mlx5_flow_table *ft; + int err; + + post_meter->type = MLX5E_POST_METER_RATE; + + ft = mlx5e_post_meter_table_create(priv, ns_type); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + mlx5_core_warn(priv->mdev, "Failed to create post_meter table\n"); + goto err_ft; + } + + post_meter->rate_steering_table.ft = ft; + + err = mlx5e_post_meter_rate_fg_create(priv, post_meter); + if (err) + goto err_fg; + + err = mlx5e_post_meter_rate_rules_create(priv, post_meter, post_act, + act_counter, drop_counter, + green_attr, red_attr); + if (err) + goto err_rules; + + return 0; + +err_rules: + mlx5e_post_meter_rate_fg_destroy(post_meter); +err_fg: + mlx5e_post_meter_rate_table_destroy(post_meter); +err_ft: + return err; +} + +static int +mlx5e_post_meter_create_mtu_table(struct mlx5e_priv *priv, + enum mlx5_flow_namespace_type ns_type, + struct mlx5e_post_meter_mtu_table *table) { - mlx5_destroy_flow_table(post_meter->ft); + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *fg; + u32 *flow_group_in; + int err; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + if (!flow_group_in) + return -ENOMEM; + + table->ft = mlx5e_post_meter_table_create(priv, ns_type); + if (IS_ERR(table->ft)) { + err = PTR_ERR(table->ft); + goto err_ft; + } + + /* create miss group */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1); + fg = mlx5_create_flow_group(table->ft, flow_group_in); + if (IS_ERR(fg)) { + err = PTR_ERR(fg); + goto err_miss_grp; + } + table->fg = fg; + + kvfree(flow_group_in); + return 0; + +err_miss_grp: + mlx5_destroy_flow_table(table->ft); +err_ft: + kvfree(flow_group_in); + return err; +} + +static int +mlx5e_post_meter_mtu_create(struct mlx5e_priv *priv, + enum mlx5_flow_namespace_type ns_type, + struct mlx5e_post_act *post_act, + struct mlx5_fc *act_counter, + struct mlx5_fc *drop_counter, + struct mlx5e_post_meter_priv *post_meter, + struct mlx5_flow_attr *green_attr, + struct mlx5_flow_attr *red_attr) +{ + struct mlx5e_post_meter_mtu_tables *mtu_tables = &post_meter->mtu_tables; + static struct mlx5_flow_spec zero_spec = {}; + struct mlx5_flow_handle *rule; + int err; + + post_meter->type = MLX5E_POST_METER_MTU; + + err = mlx5e_post_meter_create_mtu_table(priv, ns_type, &mtu_tables->green_table); + if (err) + goto err_green_ft; + + green_attr->ft = mtu_tables->green_table.ft; + rule = mlx5e_post_meter_add_rule(priv, post_meter, &zero_spec, green_attr, + act_counter, drop_counter); + if (IS_ERR(rule)) { + mlx5_core_warn(priv->mdev, "Failed to create post_meter conform rule\n"); + err = PTR_ERR(rule); + goto err_green_rule; + } + mtu_tables->green_table.rule = rule; + mtu_tables->green_table.attr = green_attr; + + err = mlx5e_post_meter_create_mtu_table(priv, ns_type, &mtu_tables->red_table); + if (err) + goto err_red_ft; + + red_attr->ft = mtu_tables->red_table.ft; + rule = mlx5e_post_meter_add_rule(priv, post_meter, &zero_spec, red_attr, + act_counter, drop_counter); + if (IS_ERR(rule)) { + mlx5_core_warn(priv->mdev, "Failed to create post_meter exceed rule\n"); + err = PTR_ERR(rule); + goto err_red_rule; + } + mtu_tables->red_table.rule = rule; + mtu_tables->red_table.attr = red_attr; + + return 0; + +err_red_rule: + mlx5_destroy_flow_table(mtu_tables->red_table.ft); +err_red_ft: + mlx5_del_flow_rules(mtu_tables->green_table.rule); +err_green_rule: + mlx5_destroy_flow_table(mtu_tables->green_table.ft); +err_green_ft: + return err; } struct mlx5e_post_meter_priv * mlx5e_post_meter_init(struct mlx5e_priv *priv, enum mlx5_flow_namespace_type ns_type, struct mlx5e_post_act *post_act, - struct mlx5_fc *green_counter, - struct mlx5_fc *red_counter) + enum mlx5e_post_meter_type type, + struct mlx5_fc *act_counter, + struct mlx5_fc *drop_counter, + struct mlx5_flow_attr *branch_true, + struct mlx5_flow_attr *branch_false) { struct mlx5e_post_meter_priv *post_meter; int err; @@ -174,36 +405,55 @@ mlx5e_post_meter_init(struct mlx5e_priv *priv, if (!post_meter) return ERR_PTR(-ENOMEM); - err = mlx5e_post_meter_table_create(priv, ns_type, post_meter); - if (err) - goto err_ft; - - err = mlx5e_post_meter_fg_create(priv, post_meter); - if (err) - goto err_fg; + switch (type) { + case MLX5E_POST_METER_MTU: + err = mlx5e_post_meter_mtu_create(priv, ns_type, post_act, + act_counter, drop_counter, post_meter, + branch_true, branch_false); + break; + case MLX5E_POST_METER_RATE: + err = mlx5e_post_meter_rate_create(priv, ns_type, post_act, + act_counter, drop_counter, post_meter, + branch_true, branch_false); + break; + default: + err = -EOPNOTSUPP; + } - err = mlx5e_post_meter_rules_create(priv, post_meter, post_act, green_counter, - red_counter); if (err) - goto err_rules; + goto err; return post_meter; -err_rules: - mlx5e_post_meter_fg_destroy(post_meter); -err_fg: - mlx5e_post_meter_table_destroy(post_meter); -err_ft: +err: kfree(post_meter); return ERR_PTR(err); } +static void +mlx5e_post_meter_rate_destroy(struct mlx5_eswitch *esw, struct mlx5e_post_meter_priv *post_meter) +{ + mlx5e_post_meter_rate_rules_destroy(esw, post_meter); + mlx5e_post_meter_rate_fg_destroy(post_meter); + mlx5e_post_meter_rate_table_destroy(post_meter); +} + +static void +mlx5e_post_meter_mtu_destroy(struct mlx5e_post_meter_priv *post_meter) +{ + mlx5e_post_meter_mtu_rules_destroy(post_meter); + mlx5e_post_meter_mtu_fg_destroy(post_meter); + mlx5e_post_meter_mtu_table_destroy(post_meter); +} + void -mlx5e_post_meter_cleanup(struct mlx5e_post_meter_priv *post_meter) +mlx5e_post_meter_cleanup(struct mlx5_eswitch *esw, struct mlx5e_post_meter_priv *post_meter) { - mlx5e_post_meter_rules_destroy(post_meter); - mlx5e_post_meter_fg_destroy(post_meter); - mlx5e_post_meter_table_destroy(post_meter); + if (post_meter->type == MLX5E_POST_METER_RATE) + mlx5e_post_meter_rate_destroy(esw, post_meter); + else + mlx5e_post_meter_mtu_destroy(post_meter); + kfree(post_meter); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.h index 34d0e4b9fc7a..e013b77186b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_meter.h @@ -14,16 +14,49 @@ struct mlx5e_post_meter_priv; +enum mlx5e_post_meter_type { + MLX5E_POST_METER_RATE = 0, + MLX5E_POST_METER_MTU +}; + +#if IS_ENABLED(CONFIG_MLX5_CLS_ACT) + struct mlx5_flow_table * mlx5e_post_meter_get_ft(struct mlx5e_post_meter_priv *post_meter); +struct mlx5_flow_table * +mlx5e_post_meter_get_mtu_true_ft(struct mlx5e_post_meter_priv *post_meter); + +struct mlx5_flow_table * +mlx5e_post_meter_get_mtu_false_ft(struct mlx5e_post_meter_priv *post_meter); + struct mlx5e_post_meter_priv * mlx5e_post_meter_init(struct mlx5e_priv *priv, enum mlx5_flow_namespace_type ns_type, struct mlx5e_post_act *post_act, - struct mlx5_fc *green_counter, - struct mlx5_fc *red_counter); + enum mlx5e_post_meter_type type, + struct mlx5_fc *act_counter, + struct mlx5_fc *drop_counter, + struct mlx5_flow_attr *branch_true, + struct mlx5_flow_attr *branch_false); + void -mlx5e_post_meter_cleanup(struct mlx5e_post_meter_priv *post_meter); +mlx5e_post_meter_cleanup(struct mlx5_eswitch *esw, struct mlx5e_post_meter_priv *post_meter); + +#else /* CONFIG_MLX5_CLS_ACT */ + +static inline struct mlx5_flow_table * +mlx5e_post_meter_get_mtu_true_ft(struct mlx5e_post_meter_priv *post_meter) +{ + return NULL; +} + +static inline struct mlx5_flow_table * +mlx5e_post_meter_get_mtu_false_ft(struct mlx5e_post_meter_priv *post_meter) +{ + return NULL; +} + +#endif #endif /* __MLX5_EN_POST_METER_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h index 2e42d7c5451e..2b7fd1c0e643 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h @@ -211,8 +211,4 @@ struct mlx5e_flow_meters *mlx5e_get_flow_meters(struct mlx5_core_dev *dev); void *mlx5e_get_match_headers_value(u32 flags, struct mlx5_flow_spec *spec); void *mlx5e_get_match_headers_criteria(u32 flags, struct mlx5_flow_spec *spec); -int mlx5e_policer_validate(const struct flow_action *action, - const struct flow_action_entry *act, - struct netlink_ext_ack *extack); - #endif /* __MLX5_EN_TC_PRIV_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 623886462c10..75b9e1528fd2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -85,18 +85,25 @@ static const struct counter_desc sw_rep_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_bytes) }, }; -struct vport_stats { - u64 vport_rx_packets; - u64 vport_tx_packets; - u64 vport_rx_bytes; - u64 vport_tx_bytes; -}; - static const struct counter_desc vport_rep_stats_desc[] = { - { MLX5E_DECLARE_STAT(struct vport_stats, vport_rx_packets) }, - { MLX5E_DECLARE_STAT(struct vport_stats, vport_rx_bytes) }, - { MLX5E_DECLARE_STAT(struct vport_stats, vport_tx_packets) }, - { MLX5E_DECLARE_STAT(struct vport_stats, vport_tx_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rep_stats, vport_rx_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rep_stats, vport_rx_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rep_stats, vport_tx_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rep_stats, vport_tx_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rep_stats, + rx_vport_rdma_unicast_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rep_stats, rx_vport_rdma_unicast_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rep_stats, + tx_vport_rdma_unicast_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rep_stats, tx_vport_rdma_unicast_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rep_stats, + rx_vport_rdma_multicast_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rep_stats, + rx_vport_rdma_multicast_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rep_stats, + tx_vport_rdma_multicast_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rep_stats, + tx_vport_rdma_multicast_bytes) }, }; #define NUM_VPORT_REP_SW_COUNTERS ARRAY_SIZE(sw_rep_stats_desc) @@ -161,33 +168,80 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(vport_rep) int i; for (i = 0; i < NUM_VPORT_REP_HW_COUNTERS; i++) - data[idx++] = MLX5E_READ_CTR64_CPU(&priv->stats.vf_vport, + data[idx++] = MLX5E_READ_CTR64_CPU(&priv->stats.rep_stats, vport_rep_stats_desc, i); return idx; } static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vport_rep) { + struct mlx5e_rep_stats *rep_stats = &priv->stats.rep_stats; + int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out); struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *rep = rpriv->rep; - struct rtnl_link_stats64 *vport_stats; - struct ifla_vf_stats vf_stats; + u32 *out; int err; - err = mlx5_eswitch_get_vport_stats(esw, rep->vport, &vf_stats); + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return; + + err = mlx5_core_query_vport_counter(esw->dev, 1, rep->vport - 1, 0, out); if (err) { netdev_warn(priv->netdev, "vport %d error %d reading stats\n", rep->vport, err); return; } - vport_stats = &priv->stats.vf_vport; + #define MLX5_GET_CTR(p, x) \ + MLX5_GET64(query_vport_counter_out, p, x) /* flip tx/rx as we are reporting the counters for the switch vport */ - vport_stats->rx_packets = vf_stats.tx_packets; - vport_stats->rx_bytes = vf_stats.tx_bytes; - vport_stats->tx_packets = vf_stats.rx_packets; - vport_stats->tx_bytes = vf_stats.rx_bytes; + rep_stats->vport_rx_packets = + MLX5_GET_CTR(out, transmitted_ib_unicast.packets) + + MLX5_GET_CTR(out, transmitted_eth_unicast.packets) + + MLX5_GET_CTR(out, transmitted_ib_multicast.packets) + + MLX5_GET_CTR(out, transmitted_eth_multicast.packets) + + MLX5_GET_CTR(out, transmitted_eth_broadcast.packets); + + rep_stats->vport_tx_packets = + MLX5_GET_CTR(out, received_ib_unicast.packets) + + MLX5_GET_CTR(out, received_eth_unicast.packets) + + MLX5_GET_CTR(out, received_ib_multicast.packets) + + MLX5_GET_CTR(out, received_eth_multicast.packets) + + MLX5_GET_CTR(out, received_eth_broadcast.packets); + + rep_stats->vport_rx_bytes = + MLX5_GET_CTR(out, transmitted_ib_unicast.octets) + + MLX5_GET_CTR(out, transmitted_eth_unicast.octets) + + MLX5_GET_CTR(out, transmitted_ib_multicast.octets) + + MLX5_GET_CTR(out, transmitted_eth_broadcast.octets); + + rep_stats->vport_tx_bytes = + MLX5_GET_CTR(out, received_ib_unicast.octets) + + MLX5_GET_CTR(out, received_eth_unicast.octets) + + MLX5_GET_CTR(out, received_ib_multicast.octets) + + MLX5_GET_CTR(out, received_eth_multicast.octets) + + MLX5_GET_CTR(out, received_eth_broadcast.octets); + + rep_stats->rx_vport_rdma_unicast_packets = + MLX5_GET_CTR(out, transmitted_ib_unicast.packets); + rep_stats->tx_vport_rdma_unicast_packets = + MLX5_GET_CTR(out, received_ib_unicast.packets); + rep_stats->rx_vport_rdma_unicast_bytes = + MLX5_GET_CTR(out, transmitted_ib_unicast.octets); + rep_stats->tx_vport_rdma_unicast_bytes = + MLX5_GET_CTR(out, received_ib_unicast.octets); + rep_stats->rx_vport_rdma_multicast_packets = + MLX5_GET_CTR(out, transmitted_ib_multicast.packets); + rep_stats->tx_vport_rdma_multicast_packets = + MLX5_GET_CTR(out, received_ib_multicast.packets); + rep_stats->rx_vport_rdma_multicast_bytes = + MLX5_GET_CTR(out, transmitted_ib_multicast.octets); + rep_stats->tx_vport_rdma_multicast_bytes = + MLX5_GET_CTR(out, received_ib_multicast.octets); + + kvfree(out); } static void mlx5e_rep_get_strings(struct net_device *dev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 712cac10ba49..375752d6546d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -463,6 +463,21 @@ struct mlx5e_ptp_cq_stats { u64 resync_event; }; +struct mlx5e_rep_stats { + u64 vport_rx_packets; + u64 vport_tx_packets; + u64 vport_rx_bytes; + u64 vport_tx_bytes; + u64 rx_vport_rdma_unicast_packets; + u64 tx_vport_rdma_unicast_packets; + u64 rx_vport_rdma_unicast_bytes; + u64 tx_vport_rdma_unicast_bytes; + u64 rx_vport_rdma_multicast_packets; + u64 tx_vport_rdma_multicast_packets; + u64 rx_vport_rdma_multicast_bytes; + u64 tx_vport_rdma_multicast_bytes; +}; + struct mlx5e_stats { struct mlx5e_sw_stats sw; struct mlx5e_qcounter_stats qcnt; @@ -471,6 +486,7 @@ struct mlx5e_stats { struct mlx5e_pport_stats pport; struct rtnl_link_stats64 vf_vport; struct mlx5e_pcie_stats pcie; + struct mlx5e_rep_stats rep_stats; }; extern mlx5e_stats_grp_t mlx5e_nic_stats_grps[]; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 10d1609ece58..9af2aa2922f5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -132,6 +132,15 @@ struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = { [PACKET_COLOR_TO_REG] = packet_color_to_reg, }; +struct mlx5e_tc_jump_state { + u32 jump_count; + bool jump_target; + struct mlx5_flow_attr *jumping_attr; + + enum flow_action_id last_id; + u32 last_index; +}; + struct mlx5e_tc_table *mlx5e_tc_table_alloc(void) { struct mlx5e_tc_table *tc; @@ -160,6 +169,7 @@ static struct lock_class_key tc_ht_lock_key; static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow); static void free_flow_post_acts(struct mlx5e_tc_flow *flow); +static void mlx5_free_flow_attr(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr); void mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec, @@ -392,8 +402,9 @@ mlx5_tc_rule_delete(struct mlx5e_priv *priv, static bool is_flow_meter_action(struct mlx5_flow_attr *attr) { - return ((attr->action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) && - (attr->exe_aso_type == MLX5_EXE_ASO_FLOW_METER)); + return (((attr->action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) && + (attr->exe_aso_type == MLX5_EXE_ASO_FLOW_METER)) || + attr->flags & MLX5_ATTR_FLAG_MTU); } static int @@ -404,6 +415,7 @@ mlx5e_tc_add_flow_meter(struct mlx5e_priv *priv, struct mlx5e_post_meter_priv *post_meter; enum mlx5_flow_namespace_type ns_type; struct mlx5e_flow_meter_handle *meter; + enum mlx5e_post_meter_type type; meter = mlx5e_tc_meter_replace(priv->mdev, &attr->meter_attr.params); if (IS_ERR(meter)) { @@ -412,8 +424,11 @@ mlx5e_tc_add_flow_meter(struct mlx5e_priv *priv, } ns_type = mlx5e_tc_meter_get_namespace(meter->flow_meters); - post_meter = mlx5e_post_meter_init(priv, ns_type, post_act, meter->green_counter, - meter->red_counter); + type = meter->params.mtu ? MLX5E_POST_METER_MTU : MLX5E_POST_METER_RATE; + post_meter = mlx5e_post_meter_init(priv, ns_type, post_act, + type, + meter->act_counter, meter->drop_counter, + attr->branch_true, attr->branch_false); if (IS_ERR(post_meter)) { mlx5_core_err(priv->mdev, "Failed to init post meter\n"); goto err_meter_init; @@ -432,9 +447,9 @@ err_meter_init: } static void -mlx5e_tc_del_flow_meter(struct mlx5_flow_attr *attr) +mlx5e_tc_del_flow_meter(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr) { - mlx5e_post_meter_cleanup(attr->meter_attr.post_meter); + mlx5e_post_meter_cleanup(esw, attr->meter_attr.post_meter); mlx5e_tc_meter_put(attr->meter_attr.meter); } @@ -495,7 +510,7 @@ mlx5e_tc_rule_unoffload(struct mlx5e_priv *priv, mlx5_eswitch_del_offloaded_rule(esw, rule, attr); if (attr->meter_attr.meter) - mlx5e_tc_del_flow_meter(attr); + mlx5e_tc_del_flow_meter(esw, attr); } int @@ -606,6 +621,12 @@ int mlx5e_get_flow_namespace(struct mlx5e_tc_flow *flow) MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL; } +static struct mlx5_core_dev * +get_flow_counter_dev(struct mlx5e_tc_flow *flow) +{ + return mlx5e_is_eswitch_flow(flow) ? flow->attr->esw_attr->counter_dev : flow->priv->mdev; +} + static struct mod_hdr_tbl * get_mod_hdr_table(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { @@ -1719,6 +1740,90 @@ clean_encap_dests(struct mlx5e_priv *priv, } static int +verify_attr_actions(u32 actions, struct netlink_ext_ack *extack) +{ + if (!(actions & + (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) { + NL_SET_ERR_MSG_MOD(extack, "Rule must have at least one forward/drop action"); + return -EOPNOTSUPP; + } + + if (!(~actions & + (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) { + NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action"); + return -EOPNOTSUPP; + } + + if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && + actions & MLX5_FLOW_CONTEXT_ACTION_DROP) { + NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported"); + return -EOPNOTSUPP; + } + + return 0; +} + +static int +post_process_attr(struct mlx5e_tc_flow *flow, + struct mlx5_flow_attr *attr, + bool is_post_act_attr, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch; + bool vf_tun; + int err = 0; + + err = verify_attr_actions(attr->action, extack); + if (err) + goto err_out; + + err = set_encap_dests(flow->priv, flow, attr, extack, &vf_tun); + if (err) + goto err_out; + + if (mlx5e_is_eswitch_flow(flow)) { + err = mlx5_eswitch_add_vlan_action(esw, attr); + if (err) + goto err_out; + } + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { + if (vf_tun || is_post_act_attr) { + err = mlx5e_tc_add_flow_mod_hdr(flow->priv, flow, attr); + if (err) + goto err_out; + } else { + err = mlx5e_attach_mod_hdr(flow->priv, flow, attr->parse_attr); + if (err) + goto err_out; + } + } + + if (attr->branch_true && + attr->branch_true->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { + err = mlx5e_tc_add_flow_mod_hdr(flow->priv, flow, attr->branch_true); + if (err) + goto err_out; + } + + if (attr->branch_false && + attr->branch_false->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { + err = mlx5e_tc_add_flow_mod_hdr(flow->priv, flow, attr->branch_false); + if (err) + goto err_out; + } + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { + err = alloc_flow_attr_counter(get_flow_counter_dev(flow), attr); + if (err) + goto err_out; + } + +err_out: + return err; +} + +static int mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) @@ -1728,7 +1833,6 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr = flow->attr; struct mlx5_esw_flow_attr *esw_attr; u32 max_prio, max_chain; - bool vf_tun; int err = 0; parse_attr = attr->parse_attr; @@ -1818,32 +1922,10 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, esw_attr->int_port = int_port; } - err = set_encap_dests(priv, flow, attr, extack, &vf_tun); - if (err) - goto err_out; - - err = mlx5_eswitch_add_vlan_action(esw, attr); + err = post_process_attr(flow, attr, false, extack); if (err) goto err_out; - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { - if (vf_tun) { - err = mlx5e_tc_add_flow_mod_hdr(priv, flow, attr); - if (err) - goto err_out; - } else { - err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); - if (err) - goto err_out; - } - } - - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { - err = alloc_flow_attr_counter(esw_attr->counter_dev, attr); - if (err) - goto err_out; - } - /* we get here if one of the following takes place: * (1) there's no error * (2) there's an encap action and we don't have valid neigh @@ -1879,6 +1961,16 @@ static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow) return !!geneve_tlv_opt_0_data; } +static void free_branch_attr(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr) +{ + if (!attr) + return; + + mlx5_free_flow_attr(flow, attr); + kvfree(attr->parse_attr); + kfree(attr); +} + static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { @@ -1934,6 +2026,8 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, mlx5e_detach_decap(priv, flow); free_flow_post_acts(flow); + free_branch_attr(flow, attr->branch_true); + free_branch_attr(flow, attr->branch_false); if (flow->attr->lag.count) mlx5_lag_del_mpesw_rule(esw->dev); @@ -3507,36 +3601,6 @@ actions_match_supported(struct mlx5e_priv *priv, ct_clear = flow->attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR; ct_flow = flow_flag_test(flow, CT) && !ct_clear; - if (!(actions & - (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) { - NL_SET_ERR_MSG_MOD(extack, "Rule must have at least one forward/drop action"); - return false; - } - - if (!(~actions & - (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) { - NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action"); - return false; - } - - if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && - actions & MLX5_FLOW_CONTEXT_ACTION_DROP) { - NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported"); - return false; - } - - if (!(~actions & - (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) { - NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action"); - return false; - } - - if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && - actions & MLX5_FLOW_CONTEXT_ACTION_DROP) { - NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported"); - return false; - } - if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR && !modify_header_match_supported(priv, &parse_attr->spec, flow_action, actions, ct_flow, ct_clear, extack)) @@ -3636,15 +3700,12 @@ mlx5e_clone_flow_attr_for_post_act(struct mlx5_flow_attr *attr, attr2->esw_attr->split_count = 0; } + attr2->branch_true = NULL; + attr2->branch_false = NULL; + attr2->jumping_attr = NULL; return attr2; } -static struct mlx5_core_dev * -get_flow_counter_dev(struct mlx5e_tc_flow *flow) -{ - return mlx5e_is_eswitch_flow(flow) ? flow->attr->esw_attr->counter_dev : flow->priv->mdev; -} - struct mlx5_flow_attr * mlx5e_tc_get_encap_attr(struct mlx5e_tc_flow *flow) { @@ -3680,28 +3741,15 @@ mlx5e_tc_unoffload_flow_post_acts(struct mlx5e_tc_flow *flow) static void free_flow_post_acts(struct mlx5e_tc_flow *flow) { - struct mlx5_core_dev *counter_dev = get_flow_counter_dev(flow); - struct mlx5e_post_act *post_act = get_post_action(flow->priv); struct mlx5_flow_attr *attr, *tmp; - bool vf_tun; list_for_each_entry_safe(attr, tmp, &flow->attrs, list) { if (list_is_last(&attr->list, &flow->attrs)) break; - if (attr->post_act_handle) - mlx5e_tc_post_act_del(post_act, attr->post_act_handle); - - clean_encap_dests(flow->priv, flow, attr, &vf_tun); - - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) - mlx5_fc_destroy(counter_dev, attr->counter); - - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { - mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts); - if (attr->modify_hdr) - mlx5_modify_header_dealloc(flow->priv->mdev, attr->modify_hdr); - } + mlx5_free_flow_attr(flow, attr); + free_branch_attr(flow, attr->branch_true); + free_branch_attr(flow, attr->branch_false); list_del(&attr->list); kvfree(attr->parse_attr); @@ -3754,7 +3802,6 @@ alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) struct mlx5e_post_act *post_act = get_post_action(flow->priv); struct mlx5_flow_attr *attr, *next_attr = NULL; struct mlx5e_post_act_handle *handle; - bool vf_tun; int err; /* This is going in reverse order as needed. @@ -3764,7 +3811,9 @@ alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) if (!next_attr) { /* Set counter action on last post act rule. */ attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; - } else { + } + + if (next_attr && !(attr->flags & MLX5_ATTR_FLAG_TERMINATING)) { err = mlx5e_tc_act_set_next_post_act(flow, attr, next_attr); if (err) goto out_free; @@ -3776,26 +3825,14 @@ alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) if (list_is_last(&attr->list, &flow->attrs)) break; - err = set_encap_dests(flow->priv, flow, attr, extack, &vf_tun); + err = actions_prepare_mod_hdr_actions(flow->priv, flow, attr, extack); if (err) goto out_free; - err = actions_prepare_mod_hdr_actions(flow->priv, flow, attr, extack); + err = post_process_attr(flow, attr, true, extack); if (err) goto out_free; - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { - err = mlx5e_tc_add_flow_mod_hdr(flow->priv, flow, attr); - if (err) - goto out_free; - } - - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { - err = alloc_flow_attr_counter(get_flow_counter_dev(flow), attr); - if (err) - goto out_free; - } - handle = mlx5e_tc_post_act_add(post_act, attr); if (IS_ERR(handle)) { err = PTR_ERR(handle); @@ -3803,6 +3840,13 @@ alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) } attr->post_act_handle = handle; + + if (attr->jumping_attr) { + err = mlx5e_tc_act_set_next_post_act(flow, attr->jumping_attr, attr); + if (err) + goto out_free; + } + next_attr = attr; } @@ -3822,12 +3866,145 @@ out_free: } static int +alloc_branch_attr(struct mlx5e_tc_flow *flow, + struct mlx5e_tc_act_branch_ctrl *cond, + struct mlx5_flow_attr **cond_attr, + u32 *jump_count, + struct netlink_ext_ack *extack) +{ + struct mlx5_flow_attr *attr; + int err = 0; + + *cond_attr = mlx5e_clone_flow_attr_for_post_act(flow->attr, + mlx5e_get_flow_namespace(flow)); + if (!(*cond_attr)) + return -ENOMEM; + + attr = *cond_attr; + + switch (cond->act_id) { + case FLOW_ACTION_DROP: + attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP; + break; + case FLOW_ACTION_ACCEPT: + case FLOW_ACTION_PIPE: + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + attr->dest_ft = mlx5e_tc_post_act_get_ft(get_post_action(flow->priv)); + break; + case FLOW_ACTION_JUMP: + if (*jump_count) { + NL_SET_ERR_MSG_MOD(extack, "Cannot offload flows with nested jumps"); + err = -EOPNOTSUPP; + goto out_err; + } + *jump_count = cond->extval; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + attr->dest_ft = mlx5e_tc_post_act_get_ft(get_post_action(flow->priv)); + break; + default: + err = -EOPNOTSUPP; + goto out_err; + } + + return err; +out_err: + kfree(*cond_attr); + *cond_attr = NULL; + return err; +} + +static void +dec_jump_count(struct flow_action_entry *act, struct mlx5e_tc_act *tc_act, + struct mlx5_flow_attr *attr, struct mlx5e_priv *priv, + struct mlx5e_tc_jump_state *jump_state) +{ + if (!jump_state->jump_count) + return; + + /* Single tc action can instantiate multiple offload actions (e.g. pedit) + * Jump only over a tc action + */ + if (act->id == jump_state->last_id && act->hw_index == jump_state->last_index) + return; + + jump_state->last_id = act->id; + jump_state->last_index = act->hw_index; + + /* nothing to do for intermediate actions */ + if (--jump_state->jump_count > 1) + return; + + if (jump_state->jump_count == 1) { /* last action in the jump action list */ + + /* create a new attribute after this action */ + jump_state->jump_target = true; + + if (tc_act->is_terminating_action) { /* the branch ends here */ + attr->flags |= MLX5_ATTR_FLAG_TERMINATING; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; + } else { /* the branch continues executing the rest of the actions */ + struct mlx5e_post_act *post_act; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + post_act = get_post_action(priv); + attr->dest_ft = mlx5e_tc_post_act_get_ft(post_act); + } + } else if (jump_state->jump_count == 0) { /* first attr after the jump action list */ + /* This is the post action for the jumping attribute (either red or green) + * Use the stored jumping_attr to set the post act id on the jumping attribute + */ + attr->jumping_attr = jump_state->jumping_attr; + } +} + +static int +parse_branch_ctrl(struct flow_action_entry *act, struct mlx5e_tc_act *tc_act, + struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr, + struct mlx5e_tc_jump_state *jump_state, + struct netlink_ext_ack *extack) +{ + struct mlx5e_tc_act_branch_ctrl cond_true, cond_false; + u32 jump_count = jump_state->jump_count; + int err; + + if (!tc_act->get_branch_ctrl) + return 0; + + tc_act->get_branch_ctrl(act, &cond_true, &cond_false); + + err = alloc_branch_attr(flow, &cond_true, + &attr->branch_true, &jump_count, extack); + if (err) + goto out_err; + + if (jump_count) + jump_state->jumping_attr = attr->branch_true; + + err = alloc_branch_attr(flow, &cond_false, + &attr->branch_false, &jump_count, extack); + if (err) + goto err_branch_false; + + if (jump_count && !jump_state->jumping_attr) + jump_state->jumping_attr = attr->branch_false; + + jump_state->jump_count = jump_count; + return 0; + +err_branch_false: + free_branch_attr(flow, attr->branch_true); +out_err: + return err; +} + +static int parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state, struct flow_action *flow_action) { struct netlink_ext_ack *extack = parse_state->extack; struct mlx5e_tc_flow_action flow_action_reorder; struct mlx5e_tc_flow *flow = parse_state->flow; + struct mlx5e_tc_jump_state jump_state = {}; struct mlx5_flow_attr *attr = flow->attr; enum mlx5_flow_namespace_type ns_type; struct mlx5e_priv *priv = flow->priv; @@ -3847,6 +4024,7 @@ parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state, list_add(&attr->list, &flow->attrs); flow_action_for_each(i, _act, &flow_action_reorder) { + jump_state.jump_target = false; act = *_act; tc_act = mlx5e_tc_act_get(act->id, ns_type); if (!tc_act) { @@ -3864,12 +4042,19 @@ parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state, if (err) goto out_free; + dec_jump_count(act, tc_act, attr, priv, &jump_state); + + err = parse_branch_ctrl(act, tc_act, flow, attr, &jump_state, extack); + if (err) + goto out_free; + parse_state->actions |= attr->action; /* Split attr for multi table act if not the last act. */ - if (tc_act->is_multi_table_act && + if (jump_state.jump_target || + (tc_act->is_multi_table_act && tc_act->is_multi_table_act(priv, act, attr) && - i < flow_action_reorder.num_entries - 1) { + i < flow_action_reorder.num_entries - 1)) { err = mlx5e_tc_act_post_parse(parse_state, flow_action, attr, ns_type); if (err) goto out_free; @@ -3951,6 +4136,10 @@ parse_tc_nic_actions(struct mlx5e_priv *priv, if (err) return err; + err = verify_attr_actions(attr->action, extack); + if (err) + return err; + if (!actions_match_supported(priv, flow_action, parse_state->actions, parse_attr, flow, extack)) return -EOPNOTSUPP; @@ -4188,6 +4377,30 @@ mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type) return attr; } +static void +mlx5_free_flow_attr(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr) +{ + struct mlx5_core_dev *counter_dev = get_flow_counter_dev(flow); + bool vf_tun; + + if (!attr) + return; + + if (attr->post_act_handle) + mlx5e_tc_post_act_del(get_post_action(flow->priv), attr->post_act_handle); + + clean_encap_dests(flow->priv, flow, attr, &vf_tun); + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) + mlx5_fc_destroy(counter_dev, attr->counter); + + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { + mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts); + if (attr->modify_hdr) + mlx5_modify_header_dealloc(flow->priv->mdev, attr->modify_hdr); + } +} + static int mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, struct flow_cls_offload *f, unsigned long flow_flags, @@ -4730,10 +4943,17 @@ static int apply_police_params(struct mlx5e_priv *priv, u64 rate, return err; } -int mlx5e_policer_validate(const struct flow_action *action, - const struct flow_action_entry *act, - struct netlink_ext_ack *extack) +static int +tc_matchall_police_validate(const struct flow_action *action, + const struct flow_action_entry *act, + struct netlink_ext_ack *extack) { + if (act->police.notexceed.act_id != FLOW_ACTION_CONTINUE) { + NL_SET_ERR_MSG_MOD(extack, + "Offload not supported when conform action is not continue"); + return -EOPNOTSUPP; + } + if (act->police.exceed.act_id != FLOW_ACTION_DROP) { NL_SET_ERR_MSG_MOD(extack, "Offload not supported when exceed action is not drop"); @@ -4784,13 +5004,7 @@ static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv, flow_action_for_each(i, act, flow_action) { switch (act->id) { case FLOW_ACTION_POLICE: - if (act->police.notexceed.act_id != FLOW_ACTION_CONTINUE) { - NL_SET_ERR_MSG_MOD(extack, - "Offload not supported when conform action is not continue"); - return -EOPNOTSUPP; - } - - err = mlx5e_policer_validate(flow_action, act, extack); + err = tc_matchall_police_validate(flow_action, act, extack); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index 0db41fa4a9a6..50af70ef22f3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@ -95,6 +95,9 @@ struct mlx5_flow_attr { */ bool count; } lag; + struct mlx5_flow_attr *branch_true; + struct mlx5_flow_attr *branch_false; + struct mlx5_flow_attr *jumping_attr; /* keep this union last */ union { DECLARE_FLEX_ARRAY(struct mlx5_esw_flow_attr, esw_attr); @@ -110,6 +113,8 @@ enum { MLX5_ATTR_FLAG_SAMPLE = BIT(4), MLX5_ATTR_FLAG_ACCEPT = BIT(5), MLX5_ATTR_FLAG_CT = BIT(6), + MLX5_ATTR_FLAG_TERMINATING = BIT(7), + MLX5_ATTR_FLAG_MTU = BIT(8), }; /* Returns true if any of the flags that require skipping further TC/NF processing are set. */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c index 2db13c71e88c..3d0bbcca1cb9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c @@ -12,10 +12,11 @@ enum vnic_diag_counter { MLX5_VNIC_DIAG_CQ_OVERRUN, MLX5_VNIC_DIAG_INVALID_COMMAND, MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND, + MLX5_VNIC_DIAG_RX_STEERING_DISCARD, }; static int mlx5_esw_query_vnic_diag(struct mlx5_vport *vport, enum vnic_diag_counter counter, - u32 *val) + u64 *val) { u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {}; u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {}; @@ -57,6 +58,10 @@ static int mlx5_esw_query_vnic_diag(struct mlx5_vport *vport, enum vnic_diag_cou case MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND: *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, quota_exceeded_command); break; + case MLX5_VNIC_DIAG_RX_STEERING_DISCARD: + *val = MLX5_GET64(vnic_diagnostic_statistics, vnic_diag_out, + nic_receive_steering_discard); + break; } return 0; @@ -65,14 +70,14 @@ static int mlx5_esw_query_vnic_diag(struct mlx5_vport *vport, enum vnic_diag_cou static int __show_vnic_diag(struct seq_file *file, struct mlx5_vport *vport, enum vnic_diag_counter type) { - u32 val = 0; + u64 val = 0; int ret; ret = mlx5_esw_query_vnic_diag(vport, type, &val); if (ret) return ret; - seq_printf(file, "%d\n", val); + seq_printf(file, "%llu\n", val); return 0; } @@ -112,6 +117,11 @@ static int quota_exceeded_command_show(struct seq_file *file, void *priv) return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND); } +static int rx_steering_discard_show(struct seq_file *file, void *priv) +{ + return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_RX_STEERING_DISCARD); +} + DEFINE_SHOW_ATTRIBUTE(total_q_under_processor_handle); DEFINE_SHOW_ATTRIBUTE(send_queue_priority_update_flow); DEFINE_SHOW_ATTRIBUTE(comp_eq_overrun); @@ -119,6 +129,7 @@ DEFINE_SHOW_ATTRIBUTE(async_eq_overrun); DEFINE_SHOW_ATTRIBUTE(cq_overrun); DEFINE_SHOW_ATTRIBUTE(invalid_command); DEFINE_SHOW_ATTRIBUTE(quota_exceeded_command); +DEFINE_SHOW_ATTRIBUTE(rx_steering_discard); void mlx5_esw_vport_debugfs_destroy(struct mlx5_eswitch *esw, u16 vport_num) { @@ -179,4 +190,9 @@ void mlx5_esw_vport_debugfs_create(struct mlx5_eswitch *esw, u16 vport_num, bool if (MLX5_CAP_GEN(esw->dev, quota_exceeded_count)) debugfs_create_file("quota_exceeded_command", 0444, vnic_diag, vport, "a_exceeded_command_fops); + + if (MLX5_CAP_GEN(esw->dev, nic_receive_steering_discard)) + debugfs_create_file("rx_steering_discard", 0444, vnic_diag, vport, + &rx_steering_discard_fops); + } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 374e3fbdc2cf..527e4bffda8d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -772,6 +772,41 @@ static void esw_vport_cleanup_acl(struct mlx5_eswitch *esw, esw_vport_destroy_offloads_acl_tables(esw, vport); } +static int mlx5_esw_vport_caps_get(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + void *query_ctx; + void *hca_caps; + int err; + + if (!MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) + return 0; + + query_ctx = kzalloc(query_out_sz, GFP_KERNEL); + if (!query_ctx) + return -ENOMEM; + + err = mlx5_vport_get_other_func_cap(esw->dev, vport->vport, query_ctx, + MLX5_CAP_GENERAL); + if (err) + goto out_free; + + hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); + vport->info.roce_enabled = MLX5_GET(cmd_hca_cap, hca_caps, roce); + + memset(query_ctx, 0, query_out_sz); + err = mlx5_vport_get_other_func_cap(esw->dev, vport->vport, query_ctx, + MLX5_CAP_GENERAL_2); + if (err) + goto out_free; + + hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); + vport->info.mig_enabled = MLX5_GET(cmd_hca_cap_2, hca_caps, migratable); +out_free: + kfree(query_ctx); + return err; +} + static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { u16 vport_num = vport->vport; @@ -785,6 +820,10 @@ static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) if (mlx5_esw_is_manager_vport(esw, vport_num)) return 0; + err = mlx5_esw_vport_caps_get(esw, vport); + if (err) + goto err_caps; + mlx5_modify_vport_admin_state(esw->dev, MLX5_VPORT_STATE_OP_MOD_ESW_VPORT, vport_num, 1, @@ -804,6 +843,10 @@ static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) vport->info.qos, flags); return 0; + +err_caps: + esw_vport_cleanup_acl(esw, vport); + return err; } /* Don't cleanup vport->info, it's needed to restore vport configuration */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 42d9df417e20..5a85a5d32be7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -153,6 +153,8 @@ struct mlx5_vport_info { u8 qos; u8 spoofchk: 1; u8 trusted: 1; + u8 roce_enabled: 1; + u8 mig_enabled: 1; }; /* Vport context events */ @@ -508,7 +510,14 @@ int mlx5_devlink_port_function_hw_addr_get(struct devlink_port *port, int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port, const u8 *hw_addr, int hw_addr_len, struct netlink_ext_ack *extack); - +int mlx5_devlink_port_fn_roce_get(struct devlink_port *port, bool *is_enabled, + struct netlink_ext_ack *extack); +int mlx5_devlink_port_fn_roce_set(struct devlink_port *port, bool enable, + struct netlink_ext_ack *extack); +int mlx5_devlink_port_fn_migratable_get(struct devlink_port *port, bool *is_enabled, + struct netlink_ext_ack *extack); +int mlx5_devlink_port_fn_migratable_set(struct devlink_port *port, bool enable, + struct netlink_ext_ack *extack); void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type); int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 9b6fbb19c22a..e455b215c708 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -50,6 +50,7 @@ #include "en/mapping.h" #include "devlink.h" #include "lag/lag.h" +#include "en/tc/post_meter.h" #define mlx5_esw_for_each_rep(esw, i, rep) \ xa_for_each(&((esw)->offloads.vport_reps), i, rep) @@ -202,6 +203,21 @@ esw_cleanup_decap_indir(struct mlx5_eswitch *esw, } static int +esw_setup_mtu_dest(struct mlx5_flow_destination *dest, + struct mlx5e_meter_attr *meter, + int i) +{ + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_RANGE; + dest[i].range.field = MLX5_FLOW_DEST_RANGE_FIELD_PKT_LEN; + dest[i].range.min = 0; + dest[i].range.max = meter->params.mtu; + dest[i].range.hit_ft = mlx5e_post_meter_get_mtu_true_ft(meter->post_meter); + dest[i].range.miss_ft = mlx5e_post_meter_get_mtu_false_ft(meter->post_meter); + + return 0; +} + +static int esw_setup_sampler_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, u32 sampler_id, @@ -491,6 +507,9 @@ esw_setup_dests(struct mlx5_flow_destination *dest, } else if (attr->flags & MLX5_ATTR_FLAG_ACCEPT) { esw_setup_accept_dest(dest, flow_act, chains, *i); (*i)++; + } else if (attr->flags & MLX5_ATTR_FLAG_MTU) { + err = esw_setup_mtu_dest(dest, &attr->meter_attr, *i); + (*i)++; } else if (esw_is_indir_table(esw, attr)) { err = esw_setup_indir_table(dest, flow_act, esw, attr, spec, true, i); } else if (esw_is_chain_src_port_rewrite(esw, esw_attr)) { @@ -640,6 +659,11 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, goto err_esw_get; } + if (!i) { + kfree(dest); + dest = NULL; + } + if (mlx5_eswitch_termtbl_required(esw, attr, &flow_act, spec)) rule = mlx5_eswitch_add_termtbl_rule(esw, fdb, spec, esw_attr, &flow_act, dest, i); @@ -3889,7 +3913,7 @@ static int mlx5_esw_query_vport_vhca_id(struct mlx5_eswitch *esw, u16 vport_num, if (!query_ctx) return -ENOMEM; - err = mlx5_vport_get_other_func_cap(esw->dev, vport_num, query_ctx); + err = mlx5_vport_get_other_func_general_cap(esw->dev, vport_num, query_ctx); if (err) goto out_free; @@ -4022,3 +4046,212 @@ int mlx5_devlink_port_function_hw_addr_set(struct devlink_port *port, return mlx5_eswitch_set_vport_mac(esw, vport_num, hw_addr); } + +static struct mlx5_vport * +mlx5_devlink_port_fn_get_vport(struct devlink_port *port, struct mlx5_eswitch *esw) +{ + u16 vport_num; + + if (!MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) + return ERR_PTR(-EOPNOTSUPP); + + vport_num = mlx5_esw_devlink_port_index_to_vport_num(port->index); + if (!is_port_function_supported(esw, vport_num)) + return ERR_PTR(-EOPNOTSUPP); + + return mlx5_eswitch_get_vport(esw, vport_num); +} + +int mlx5_devlink_port_fn_migratable_get(struct devlink_port *port, bool *is_enabled, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw; + struct mlx5_vport *vport; + int err = -EOPNOTSUPP; + + esw = mlx5_devlink_eswitch_get(port->devlink); + if (IS_ERR(esw)) + return PTR_ERR(esw); + + if (!MLX5_CAP_GEN(esw->dev, migration)) { + NL_SET_ERR_MSG_MOD(extack, "Device doesn't support migration"); + return err; + } + + vport = mlx5_devlink_port_fn_get_vport(port, esw); + if (IS_ERR(vport)) { + NL_SET_ERR_MSG_MOD(extack, "Invalid port"); + return PTR_ERR(vport); + } + + mutex_lock(&esw->state_lock); + if (vport->enabled) { + *is_enabled = vport->info.mig_enabled; + err = 0; + } + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_devlink_port_fn_migratable_set(struct devlink_port *port, bool enable, + struct netlink_ext_ack *extack) +{ + int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + struct mlx5_eswitch *esw; + struct mlx5_vport *vport; + void *query_ctx; + void *hca_caps; + int err = -EOPNOTSUPP; + + esw = mlx5_devlink_eswitch_get(port->devlink); + if (IS_ERR(esw)) + return PTR_ERR(esw); + + if (!MLX5_CAP_GEN(esw->dev, migration)) { + NL_SET_ERR_MSG_MOD(extack, "Device doesn't support migration"); + return err; + } + + vport = mlx5_devlink_port_fn_get_vport(port, esw); + if (IS_ERR(vport)) { + NL_SET_ERR_MSG_MOD(extack, "Invalid port"); + return PTR_ERR(vport); + } + + mutex_lock(&esw->state_lock); + if (!vport->enabled) { + NL_SET_ERR_MSG_MOD(extack, "Eswitch vport is disabled"); + goto out; + } + + if (vport->info.mig_enabled == enable) { + err = 0; + goto out; + } + + query_ctx = kzalloc(query_out_sz, GFP_KERNEL); + if (!query_ctx) { + err = -ENOMEM; + goto out; + } + + err = mlx5_vport_get_other_func_cap(esw->dev, vport->vport, query_ctx, + MLX5_CAP_GENERAL_2); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed getting HCA caps"); + goto out_free; + } + + hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); + memcpy(hca_caps, MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability), + MLX5_UN_SZ_BYTES(hca_cap_union)); + MLX5_SET(cmd_hca_cap_2, hca_caps, migratable, 1); + + err = mlx5_vport_set_other_func_cap(esw->dev, hca_caps, vport->vport, + MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE2); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed setting HCA migratable cap"); + goto out_free; + } + + vport->info.mig_enabled = enable; + +out_free: + kfree(query_ctx); +out: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_devlink_port_fn_roce_get(struct devlink_port *port, bool *is_enabled, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw; + struct mlx5_vport *vport; + int err = -EOPNOTSUPP; + + esw = mlx5_devlink_eswitch_get(port->devlink); + if (IS_ERR(esw)) + return PTR_ERR(esw); + + vport = mlx5_devlink_port_fn_get_vport(port, esw); + if (IS_ERR(vport)) { + NL_SET_ERR_MSG_MOD(extack, "Invalid port"); + return PTR_ERR(vport); + } + + mutex_lock(&esw->state_lock); + if (vport->enabled) { + *is_enabled = vport->info.roce_enabled; + err = 0; + } + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_devlink_port_fn_roce_set(struct devlink_port *port, bool enable, + struct netlink_ext_ack *extack) +{ + int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + struct mlx5_eswitch *esw; + struct mlx5_vport *vport; + int err = -EOPNOTSUPP; + void *query_ctx; + void *hca_caps; + u16 vport_num; + + esw = mlx5_devlink_eswitch_get(port->devlink); + if (IS_ERR(esw)) + return PTR_ERR(esw); + + vport = mlx5_devlink_port_fn_get_vport(port, esw); + if (IS_ERR(vport)) { + NL_SET_ERR_MSG_MOD(extack, "Invalid port"); + return PTR_ERR(vport); + } + vport_num = vport->vport; + + mutex_lock(&esw->state_lock); + if (!vport->enabled) { + NL_SET_ERR_MSG_MOD(extack, "Eswitch vport is disabled"); + goto out; + } + + if (vport->info.roce_enabled == enable) { + err = 0; + goto out; + } + + query_ctx = kzalloc(query_out_sz, GFP_KERNEL); + if (!query_ctx) { + err = -ENOMEM; + goto out; + } + + err = mlx5_vport_get_other_func_cap(esw->dev, vport_num, query_ctx, + MLX5_CAP_GENERAL); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed getting HCA caps"); + goto out_free; + } + + hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); + memcpy(hca_caps, MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability), + MLX5_UN_SZ_BYTES(hca_cap_union)); + MLX5_SET(cmd_hca_cap, hca_caps, roce, enable); + + err = mlx5_vport_set_other_func_cap(esw->dev, hca_caps, vport_num, + MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed setting HCA roce cap"); + goto out_free; + } + + vport->info.roce_enabled = enable; + +out_free: + kfree(query_ctx); +out: + mutex_unlock(&esw->state_lock); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 9995307d374b..5a85d8c1e797 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -448,7 +448,8 @@ static bool is_fwd_dest_type(enum mlx5_flow_destination_type type) type == MLX5_FLOW_DESTINATION_TYPE_UPLINK || type == MLX5_FLOW_DESTINATION_TYPE_VPORT || type == MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER || - type == MLX5_FLOW_DESTINATION_TYPE_TIR; + type == MLX5_FLOW_DESTINATION_TYPE_TIR || + type == MLX5_FLOW_DESTINATION_TYPE_RANGE; } static bool check_valid_spec(const struct mlx5_flow_spec *spec) @@ -1578,7 +1579,13 @@ static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1, (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM && d1->ft_num == d2->ft_num) || (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER && - d1->sampler_id == d2->sampler_id)) + d1->sampler_id == d2->sampler_id) || + (d1->type == MLX5_FLOW_DESTINATION_TYPE_RANGE && + d1->range.field == d2->range.field && + d1->range.hit_ft == d2->range.hit_ft && + d1->range.miss_ft == d2->range.miss_ft && + d1->range.min == d2->range.min && + d1->range.max == d2->range.max)) return true; } @@ -1962,6 +1969,9 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, if (flow_act->fg && ft->autogroup.active) return ERR_PTR(-EINVAL); + if (dest && dest_num <= 0) + return ERR_PTR(-EINVAL); + for (i = 0; i < dest_num; i++) { if (!dest_is_valid(&dest[i], flow_act, ft)) return ERR_PTR(-EINVAL); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 3af50fd04d28..f137a0611b77 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -123,6 +123,7 @@ enum mlx5_flow_steering_mode { enum mlx5_flow_steering_capabilty { MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX = 1UL << 0, MLX5_FLOW_STEERING_CAP_VLAN_POP_ON_TX = 1UL << 1, + MLX5_FLOW_STEERING_CAP_MATCH_RANGES = 1UL << 2, }; struct mlx5_flow_steering { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c index 0259a149a64c..d9fcb9ed726f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c @@ -118,13 +118,41 @@ struct mlx5_fib_event_work { }; }; +static struct net_device* +mlx5_lag_get_next_fib_dev(struct mlx5_lag *ldev, + struct fib_info *fi, + struct net_device *current_dev) +{ + struct net_device *fib_dev; + int i, ldev_idx, nhs; + + nhs = fib_info_num_path(fi); + i = 0; + if (current_dev) { + for (; i < nhs; i++) { + fib_dev = fib_info_nh(fi, i)->fib_nh_dev; + if (fib_dev == current_dev) { + i++; + break; + } + } + } + for (; i < nhs; i++) { + fib_dev = fib_info_nh(fi, i)->fib_nh_dev; + ldev_idx = mlx5_lag_dev_get_netdev_idx(ldev, fib_dev); + if (ldev_idx >= 0) + return ldev->pf[ldev_idx].netdev; + } + + return NULL; +} + static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event, struct fib_entry_notifier_info *fen_info) { + struct net_device *nh_dev0, *nh_dev1; struct fib_info *fi = fen_info->fi; struct lag_mp *mp = &ldev->lag_mp; - struct fib_nh *fib_nh0, *fib_nh1; - unsigned int nhs; /* Handle delete event */ if (event == FIB_EVENT_ENTRY_DEL) { @@ -140,16 +168,25 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event, fi->fib_priority >= mp->fib.priority) return; + nh_dev0 = mlx5_lag_get_next_fib_dev(ldev, fi, NULL); + nh_dev1 = mlx5_lag_get_next_fib_dev(ldev, fi, nh_dev0); + /* Handle add/replace event */ - nhs = fib_info_num_path(fi); - if (nhs == 1) { - if (__mlx5_lag_is_active(ldev)) { - struct fib_nh *nh = fib_info_nh(fi, 0); - struct net_device *nh_dev = nh->fib_nh_dev; - int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev); + if (!nh_dev0) { + if (mp->fib.dst == fen_info->dst && mp->fib.dst_len == fen_info->dst_len) + mp->fib.mfi = NULL; + return; + } - if (i < 0) - return; + if (nh_dev0 == nh_dev1) { + mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev, + "Multipath offload doesn't support routes with multiple nexthops of the same device"); + return; + } + + if (!nh_dev1) { + if (__mlx5_lag_is_active(ldev)) { + int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev0); i++; mlx5_lag_set_port_affinity(ldev, i); @@ -159,21 +196,6 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event, return; } - if (nhs != 2) - return; - - /* Verify next hops are ports of the same hca */ - fib_nh0 = fib_info_nh(fi, 0); - fib_nh1 = fib_info_nh(fi, 1); - if (!(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev && - fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev) && - !(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev && - fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev)) { - mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev, - "Multipath offload require two ports of the same HCA\n"); - return; - } - /* First time we see multipath route */ if (!mp->fib.mfi && !__mlx5_lag_is_active(ldev)) { struct lag_tracker tracker; @@ -268,7 +290,6 @@ static int mlx5_lag_fib_event(struct notifier_block *nb, struct mlx5_fib_event_work *fib_work; struct fib_entry_notifier_info *fen_info; struct fib_nh_notifier_info *fnh_info; - struct net_device *fib_dev; struct fib_info *fi; if (info->family != AF_INET) @@ -285,11 +306,7 @@ static int mlx5_lag_fib_event(struct notifier_block *nb, fi = fen_info->fi; if (fi->nh) return NOTIFY_DONE; - fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev; - if (fib_dev != ldev->pf[MLX5_LAG_P1].netdev && - fib_dev != ldev->pf[MLX5_LAG_P2].netdev) { - return NOTIFY_DONE; - } + fib_work = mlx5_lag_init_fib_work(ldev, event); if (!fib_work) return NOTIFY_DONE; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index a806e3de7b7c..029305a8b80a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -324,7 +324,10 @@ void mlx5_unload_one_devl_locked(struct mlx5_core_dev *dev); int mlx5_load_one(struct mlx5_core_dev *dev, bool recovery); int mlx5_load_one_devl_locked(struct mlx5_core_dev *dev, bool recovery); -int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out); +int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap, u16 function_id, + u16 opmod); +#define mlx5_vport_get_other_func_general_cap(dev, fid, out) \ + mlx5_vport_get_other_func_cap(dev, fid, out, MLX5_CAP_GENERAL) void mlx5_events_work_enqueue(struct mlx5_core_dev *dev, struct work_struct *work); static inline u32 mlx5_sriov_get_vf_total_msix(struct pci_dev *pdev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 662f1d55e30e..6bde18bcd42f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -4,6 +4,7 @@ #include <linux/interrupt.h> #include <linux/notifier.h> #include <linux/mlx5/driver.h> +#include <linux/mlx5/vport.h> #include "mlx5_core.h" #include "mlx5_irq.h" #include "pci_irq.h" @@ -101,7 +102,7 @@ int mlx5_set_msix_vec_count(struct mlx5_core_dev *dev, int function_id, goto out; } - ret = mlx5_vport_get_other_func_cap(dev, function_id, query_cap); + ret = mlx5_vport_get_other_func_general_cap(dev, function_id, query_cap); if (ret) goto out; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c index b1dfad274a39..ee104cf04392 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c @@ -44,6 +44,7 @@ static const char * const action_type_to_str[] = { [DR_ACTION_TYP_INSERT_HDR] = "DR_ACTION_TYP_INSERT_HDR", [DR_ACTION_TYP_REMOVE_HDR] = "DR_ACTION_TYP_REMOVE_HDR", [DR_ACTION_TYP_ASO_FLOW_METER] = "DR_ACTION_TYP_ASO_FLOW_METER", + [DR_ACTION_TYP_RANGE] = "DR_ACTION_TYP_RANGE", [DR_ACTION_TYP_MAX] = "DR_ACTION_UNKNOWN", }; @@ -61,6 +62,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_NON_TERM, [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, @@ -79,6 +81,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_DECAP, [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_DECAP, @@ -94,6 +97,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_ENCAP, [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ENCAP, @@ -103,6 +107,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_MODIFY_HDR, [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_HDR, @@ -116,6 +121,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_POP_VLAN, [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_POP_VLAN, @@ -129,6 +135,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_STATE_PUSH_VLAN] = { [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_PUSH_VLAN, [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_PUSH_VLAN, @@ -141,6 +148,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_TAG] = DR_ACTION_STATE_NON_TERM, [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, @@ -159,6 +167,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ASO, }, [DR_ACTION_STATE_TERM] = { @@ -169,6 +178,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_STATE_NO_ACTION] = { [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, @@ -183,6 +193,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_STATE_DECAP] = { [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_DECAP, [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, @@ -190,6 +201,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_STATE_ENCAP] = { [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ENCAP, [DR_ACTION_TYP_ASO_FLOW_METER] = DR_ACTION_STATE_ASO, @@ -197,6 +209,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_STATE_MODIFY_HDR] = { [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_HDR, [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, @@ -207,6 +220,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] }, [DR_ACTION_STATE_POP_VLAN] = { [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_POP_VLAN, [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN, @@ -220,6 +234,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_STATE_PUSH_VLAN] = { [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_PUSH_VLAN, [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, @@ -231,6 +246,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_STATE_NON_TERM] = { [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, @@ -250,6 +266,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ASO, [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, }, [DR_ACTION_STATE_TERM] = { [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_TERM, @@ -259,6 +276,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_STATE_NO_ACTION] = { [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, [DR_ACTION_TYP_TNL_L2_TO_L2] = DR_ACTION_STATE_DECAP, @@ -276,6 +294,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_STATE_DECAP] = { [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_DECAP, [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, @@ -291,6 +310,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_QP] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ENCAP, @@ -299,6 +319,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_STATE_MODIFY_HDR] = { [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_HDR, [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, @@ -311,6 +332,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_STATE_POP_VLAN] = { [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN, [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_POP_VLAN, @@ -324,6 +346,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_STATE_PUSH_VLAN] = { [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_PUSH_VLAN, @@ -337,6 +360,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_STATE_NON_TERM] = { [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, [DR_ACTION_TYP_TNL_L2_TO_L2] = DR_ACTION_STATE_DECAP, @@ -354,6 +378,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_STATE_ASO] = { [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ASO, }, @@ -365,6 +390,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_STATE_NO_ACTION] = { [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, @@ -380,6 +406,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_STATE_DECAP] = { [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_DECAP, [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, @@ -388,6 +415,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_STATE_ENCAP] = { [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ENCAP, [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, @@ -396,6 +424,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_STATE_MODIFY_HDR] = { [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_MODIFY_HDR, [DR_ACTION_TYP_L2_TO_TNL_L2] = DR_ACTION_STATE_ENCAP, @@ -407,6 +436,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] }, [DR_ACTION_STATE_POP_VLAN] = { [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_POP_VLAN, [DR_ACTION_TYP_POP_VLAN] = DR_ACTION_STATE_POP_VLAN, @@ -421,6 +451,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_STATE_PUSH_VLAN] = { [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_PUSH_VLAN, @@ -433,6 +464,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_STATE_NON_TERM] = { [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_SAMPLER] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_NON_TERM, [DR_ACTION_TYP_MODIFY_HDR] = DR_ACTION_STATE_MODIFY_HDR, @@ -452,6 +484,7 @@ next_action_state[DR_ACTION_DOMAIN_MAX][DR_ACTION_STATE_MAX][DR_ACTION_TYP_MAX] [DR_ACTION_TYP_PUSH_VLAN] = DR_ACTION_STATE_PUSH_VLAN, [DR_ACTION_TYP_DROP] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_FT] = DR_ACTION_STATE_TERM, + [DR_ACTION_TYP_RANGE] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_VPORT] = DR_ACTION_STATE_TERM, [DR_ACTION_TYP_CTR] = DR_ACTION_STATE_ASO, }, @@ -634,6 +667,83 @@ static void dr_action_print_sequence(struct mlx5dr_domain *dmn, actions[i]->action_type); } +static int dr_action_get_dest_fw_tbl_addr(struct mlx5dr_matcher *matcher, + struct mlx5dr_action_dest_tbl *dest_tbl, + bool is_rx_rule, + u64 *final_icm_addr) +{ + struct mlx5dr_cmd_query_flow_table_details output; + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + int ret; + + if (!dest_tbl->fw_tbl.rx_icm_addr) { + ret = mlx5dr_cmd_query_flow_table(dmn->mdev, + dest_tbl->fw_tbl.type, + dest_tbl->fw_tbl.id, + &output); + if (ret) { + mlx5dr_err(dmn, + "Failed mlx5_cmd_query_flow_table ret: %d\n", + ret); + return ret; + } + + dest_tbl->fw_tbl.tx_icm_addr = output.sw_owner_icm_root_1; + dest_tbl->fw_tbl.rx_icm_addr = output.sw_owner_icm_root_0; + } + + *final_icm_addr = is_rx_rule ? dest_tbl->fw_tbl.rx_icm_addr : + dest_tbl->fw_tbl.tx_icm_addr; + return 0; +} + +static int dr_action_get_dest_sw_tbl_addr(struct mlx5dr_matcher *matcher, + struct mlx5dr_action_dest_tbl *dest_tbl, + bool is_rx_rule, + u64 *final_icm_addr) +{ + struct mlx5dr_domain *dmn = matcher->tbl->dmn; + struct mlx5dr_icm_chunk *chunk; + + if (dest_tbl->tbl->dmn != dmn) { + mlx5dr_err(dmn, + "Destination table belongs to a different domain\n"); + return -EINVAL; + } + + if (dest_tbl->tbl->level <= matcher->tbl->level) { + mlx5_core_dbg_once(dmn->mdev, + "Connecting table to a lower/same level destination table\n"); + mlx5dr_dbg(dmn, + "Connecting table at level %d to a destination table at level %d\n", + matcher->tbl->level, + dest_tbl->tbl->level); + } + + chunk = is_rx_rule ? dest_tbl->tbl->rx.s_anchor->chunk : + dest_tbl->tbl->tx.s_anchor->chunk; + + *final_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(chunk); + return 0; +} + +static int dr_action_get_dest_tbl_addr(struct mlx5dr_matcher *matcher, + struct mlx5dr_action_dest_tbl *dest_tbl, + bool is_rx_rule, + u64 *final_icm_addr) +{ + if (dest_tbl->is_fw_tbl) + return dr_action_get_dest_fw_tbl_addr(matcher, + dest_tbl, + is_rx_rule, + final_icm_addr); + + return dr_action_get_dest_sw_tbl_addr(matcher, + dest_tbl, + is_rx_rule, + final_icm_addr); +} + #define WITH_VLAN_NUM_HW_ACTIONS 6 int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, @@ -661,8 +771,6 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, action_domain = dr_action_get_action_domain(dmn->type, nic_dmn->type); for (i = 0; i < num_actions; i++) { - struct mlx5dr_action_dest_tbl *dest_tbl; - struct mlx5dr_icm_chunk *chunk; struct mlx5dr_action *action; int max_actions_type = 1; u32 action_type; @@ -676,50 +784,27 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher, break; case DR_ACTION_TYP_FT: dest_action = action; - dest_tbl = action->dest_tbl; - if (!dest_tbl->is_fw_tbl) { - if (dest_tbl->tbl->dmn != dmn) { - mlx5dr_err(dmn, - "Destination table belongs to a different domain\n"); - return -EINVAL; - } - if (dest_tbl->tbl->level <= matcher->tbl->level) { - mlx5_core_dbg_once(dmn->mdev, - "Connecting table to a lower/same level destination table\n"); - mlx5dr_dbg(dmn, - "Connecting table at level %d to a destination table at level %d\n", - matcher->tbl->level, - dest_tbl->tbl->level); - } - chunk = rx_rule ? dest_tbl->tbl->rx.s_anchor->chunk : - dest_tbl->tbl->tx.s_anchor->chunk; - attr.final_icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(chunk); - } else { - struct mlx5dr_cmd_query_flow_table_details output; - int ret; - - /* get the relevant addresses */ - if (!action->dest_tbl->fw_tbl.rx_icm_addr) { - ret = mlx5dr_cmd_query_flow_table(dmn->mdev, - dest_tbl->fw_tbl.type, - dest_tbl->fw_tbl.id, - &output); - if (!ret) { - dest_tbl->fw_tbl.tx_icm_addr = - output.sw_owner_icm_root_1; - dest_tbl->fw_tbl.rx_icm_addr = - output.sw_owner_icm_root_0; - } else { - mlx5dr_err(dmn, - "Failed mlx5_cmd_query_flow_table ret: %d\n", - ret); - return ret; - } - } - attr.final_icm_addr = rx_rule ? - dest_tbl->fw_tbl.rx_icm_addr : - dest_tbl->fw_tbl.tx_icm_addr; - } + ret = dr_action_get_dest_tbl_addr(matcher, action->dest_tbl, + rx_rule, &attr.final_icm_addr); + if (ret) + return ret; + break; + case DR_ACTION_TYP_RANGE: + ret = dr_action_get_dest_tbl_addr(matcher, + action->range->hit_tbl_action->dest_tbl, + rx_rule, &attr.final_icm_addr); + if (ret) + return ret; + + ret = dr_action_get_dest_tbl_addr(matcher, + action->range->miss_tbl_action->dest_tbl, + rx_rule, &attr.range.miss_icm_addr); + if (ret) + return ret; + + attr.range.definer_id = action->range->definer_id; + attr.range.min = action->range->min; + attr.range.max = action->range->max; break; case DR_ACTION_TYP_QP: mlx5dr_info(dmn, "Domain doesn't support QP\n"); @@ -866,6 +951,7 @@ static unsigned int action_size[DR_ACTION_TYP_MAX] = { [DR_ACTION_TYP_REMOVE_HDR] = sizeof(struct mlx5dr_action_reformat), [DR_ACTION_TYP_SAMPLER] = sizeof(struct mlx5dr_action_sampler), [DR_ACTION_TYP_ASO_FLOW_METER] = sizeof(struct mlx5dr_action_aso_flow_meter), + [DR_ACTION_TYP_RANGE] = sizeof(struct mlx5dr_action_range), }; static struct mlx5dr_action * @@ -933,6 +1019,123 @@ dec_ref: return NULL; } +static void dr_action_range_definer_fill(u16 *format_id, + u8 *dw_selectors, + u8 *byte_selectors, + u8 *match_mask) +{ + int i; + + *format_id = MLX5_IFC_DEFINER_FORMAT_ID_SELECT; + + dw_selectors[0] = MLX5_IFC_DEFINER_FORMAT_OFFSET_OUTER_ETH_PKT_LEN / 4; + + for (i = 1; i < MLX5_IFC_DEFINER_DW_SELECTORS_NUM; i++) + dw_selectors[i] = MLX5_IFC_DEFINER_FORMAT_OFFSET_UNUSED; + + for (i = 0; i < MLX5_IFC_DEFINER_BYTE_SELECTORS_NUM; i++) + byte_selectors[i] = MLX5_IFC_DEFINER_FORMAT_OFFSET_UNUSED; + + MLX5_SET(match_definer_match_mask, match_mask, + match_dw_0, 0xffffUL << 16); +} + +static int dr_action_create_range_definer(struct mlx5dr_action *action) +{ + u8 match_mask[MLX5_FLD_SZ_BYTES(match_definer, match_mask)] = {}; + u8 byte_selectors[MLX5_IFC_DEFINER_BYTE_SELECTORS_NUM] = {}; + u8 dw_selectors[MLX5_IFC_DEFINER_DW_SELECTORS_NUM] = {}; + struct mlx5dr_domain *dmn = action->range->dmn; + u32 definer_id; + u16 format_id; + int ret; + + dr_action_range_definer_fill(&format_id, + dw_selectors, + byte_selectors, + match_mask); + + ret = mlx5dr_definer_get(dmn, format_id, + dw_selectors, byte_selectors, + match_mask, &definer_id); + if (ret) + return ret; + + action->range->definer_id = definer_id; + return 0; +} + +static void dr_action_destroy_range_definer(struct mlx5dr_action *action) +{ + mlx5dr_definer_put(action->range->dmn, action->range->definer_id); +} + +struct mlx5dr_action * +mlx5dr_action_create_dest_match_range(struct mlx5dr_domain *dmn, + u32 field, + struct mlx5_flow_table *hit_ft, + struct mlx5_flow_table *miss_ft, + u32 min, + u32 max) +{ + struct mlx5dr_action *action; + int ret; + + if (!mlx5dr_supp_match_ranges(dmn->mdev)) { + mlx5dr_dbg(dmn, "SELECT definer support is needed for match range\n"); + return NULL; + } + + if (field != MLX5_FLOW_DEST_RANGE_FIELD_PKT_LEN || + min > 0xffff || max > 0xffff) { + mlx5dr_err(dmn, "Invalid match range parameters\n"); + return NULL; + } + + action = dr_action_create_generic(DR_ACTION_TYP_RANGE); + if (!action) + return NULL; + + action->range->hit_tbl_action = + mlx5dr_is_fw_table(hit_ft) ? + mlx5dr_action_create_dest_flow_fw_table(dmn, hit_ft) : + mlx5dr_action_create_dest_table(hit_ft->fs_dr_table.dr_table); + + if (!action->range->hit_tbl_action) + goto free_action; + + action->range->miss_tbl_action = + mlx5dr_is_fw_table(miss_ft) ? + mlx5dr_action_create_dest_flow_fw_table(dmn, miss_ft) : + mlx5dr_action_create_dest_table(miss_ft->fs_dr_table.dr_table); + + if (!action->range->miss_tbl_action) + goto free_hit_tbl_action; + + action->range->min = min; + action->range->max = max; + action->range->dmn = dmn; + + ret = dr_action_create_range_definer(action); + if (ret) + goto free_miss_tbl_action; + + /* No need to increase refcount on domain for this action, + * the hit/miss table actions will do it internally. + */ + + return action; + +free_miss_tbl_action: + mlx5dr_action_destroy(action->range->miss_tbl_action); +free_hit_tbl_action: + mlx5dr_action_destroy(action->range->hit_tbl_action); +free_action: + kfree(action); + + return NULL; +} + struct mlx5dr_action * mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn, struct mlx5dr_action_dest *dests, @@ -1980,6 +2183,11 @@ int mlx5dr_action_destroy(struct mlx5dr_action *action) case DR_ACTION_TYP_ASO_FLOW_METER: refcount_dec(&action->aso->dmn->refcount); break; + case DR_ACTION_TYP_RANGE: + dr_action_destroy_range_definer(action); + mlx5dr_action_destroy(action->range->miss_tbl_action); + mlx5dr_action_destroy(action->range->hit_tbl_action); + break; default: break; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c index b4739eafc180..07b6a6dcb92f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c @@ -564,6 +564,83 @@ void mlx5dr_cmd_destroy_reformat_ctx(struct mlx5_core_dev *mdev, mlx5_cmd_exec_in(mdev, dealloc_packet_reformat_context, in); } +static void dr_cmd_set_definer_format(void *ptr, u16 format_id, + u8 *dw_selectors, + u8 *byte_selectors) +{ + if (format_id != MLX5_IFC_DEFINER_FORMAT_ID_SELECT) + return; + + MLX5_SET(match_definer, ptr, format_select_dw0, dw_selectors[0]); + MLX5_SET(match_definer, ptr, format_select_dw1, dw_selectors[1]); + MLX5_SET(match_definer, ptr, format_select_dw2, dw_selectors[2]); + MLX5_SET(match_definer, ptr, format_select_dw3, dw_selectors[3]); + MLX5_SET(match_definer, ptr, format_select_dw4, dw_selectors[4]); + MLX5_SET(match_definer, ptr, format_select_dw5, dw_selectors[5]); + MLX5_SET(match_definer, ptr, format_select_dw6, dw_selectors[6]); + MLX5_SET(match_definer, ptr, format_select_dw7, dw_selectors[7]); + MLX5_SET(match_definer, ptr, format_select_dw8, dw_selectors[8]); + + MLX5_SET(match_definer, ptr, format_select_byte0, byte_selectors[0]); + MLX5_SET(match_definer, ptr, format_select_byte1, byte_selectors[1]); + MLX5_SET(match_definer, ptr, format_select_byte2, byte_selectors[2]); + MLX5_SET(match_definer, ptr, format_select_byte3, byte_selectors[3]); + MLX5_SET(match_definer, ptr, format_select_byte4, byte_selectors[4]); + MLX5_SET(match_definer, ptr, format_select_byte5, byte_selectors[5]); + MLX5_SET(match_definer, ptr, format_select_byte6, byte_selectors[6]); + MLX5_SET(match_definer, ptr, format_select_byte7, byte_selectors[7]); +} + +int mlx5dr_cmd_create_definer(struct mlx5_core_dev *mdev, + u16 format_id, + u8 *dw_selectors, + u8 *byte_selectors, + u8 *match_mask, + u32 *definer_id) +{ + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {}; + u32 in[MLX5_ST_SZ_DW(create_match_definer_in)] = {}; + void *ptr; + int err; + + ptr = MLX5_ADDR_OF(create_match_definer_in, in, + general_obj_in_cmd_hdr); + MLX5_SET(general_obj_in_cmd_hdr, ptr, opcode, + MLX5_CMD_OP_CREATE_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, ptr, obj_type, + MLX5_OBJ_TYPE_MATCH_DEFINER); + + ptr = MLX5_ADDR_OF(create_match_definer_in, in, obj_context); + MLX5_SET(match_definer, ptr, format_id, format_id); + + dr_cmd_set_definer_format(ptr, format_id, + dw_selectors, byte_selectors); + + ptr = MLX5_ADDR_OF(match_definer, ptr, match_mask); + memcpy(ptr, match_mask, MLX5_FLD_SZ_BYTES(match_definer, match_mask)); + + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + *definer_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); + + return 0; +} + +void +mlx5dr_cmd_destroy_definer(struct mlx5_core_dev *mdev, u32 definer_id) +{ + u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; + u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; + + MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_MATCH_DEFINER); + MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, definer_id); + + mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + int mlx5dr_cmd_query_gid(struct mlx5_core_dev *mdev, u8 vhca_port_num, u16 index, struct mlx5dr_cmd_gid_attr *attr) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c index 7adcf0eec13b..db81d881d38e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c @@ -49,7 +49,8 @@ enum dr_dump_rec_type { DR_DUMP_REC_TYPE_ACTION_POP_VLAN = 3413, DR_DUMP_REC_TYPE_ACTION_SAMPLER = 3415, DR_DUMP_REC_TYPE_ACTION_INSERT_HDR = 3420, - DR_DUMP_REC_TYPE_ACTION_REMOVE_HDR = 3421 + DR_DUMP_REC_TYPE_ACTION_REMOVE_HDR = 3421, + DR_DUMP_REC_TYPE_ACTION_MATCH_RANGE = 3425, }; void mlx5dr_dbg_tbl_add(struct mlx5dr_table *tbl) @@ -107,6 +108,8 @@ dr_dump_rule_action_mem(struct seq_file *file, const u64 rule_id, { struct mlx5dr_action *action = action_mem->action; const u64 action_id = DR_DBG_PTR_TO_ID(action); + u64 hit_tbl_ptr, miss_tbl_ptr; + u32 hit_tbl_id, miss_tbl_id; switch (action->action_type) { case DR_ACTION_TYP_DROP: @@ -198,6 +201,30 @@ dr_dump_rule_action_mem(struct seq_file *file, const u64 rule_id, action->sampler->rx_icm_addr, action->sampler->tx_icm_addr); break; + case DR_ACTION_TYP_RANGE: + if (action->range->hit_tbl_action->dest_tbl->is_fw_tbl) { + hit_tbl_id = action->range->hit_tbl_action->dest_tbl->fw_tbl.id; + hit_tbl_ptr = 0; + } else { + hit_tbl_id = action->range->hit_tbl_action->dest_tbl->tbl->table_id; + hit_tbl_ptr = + DR_DBG_PTR_TO_ID(action->range->hit_tbl_action->dest_tbl->tbl); + } + + if (action->range->miss_tbl_action->dest_tbl->is_fw_tbl) { + miss_tbl_id = action->range->miss_tbl_action->dest_tbl->fw_tbl.id; + miss_tbl_ptr = 0; + } else { + miss_tbl_id = action->range->miss_tbl_action->dest_tbl->tbl->table_id; + miss_tbl_ptr = + DR_DBG_PTR_TO_ID(action->range->miss_tbl_action->dest_tbl->tbl); + } + + seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%llx,0x%x,0x%llx,0x%x\n", + DR_DUMP_REC_TYPE_ACTION_MATCH_RANGE, action_id, rule_id, + hit_tbl_id, hit_tbl_ptr, miss_tbl_id, miss_tbl_ptr, + action->range->definer_id); + break; default: return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_definer.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_definer.c new file mode 100644 index 000000000000..d5ea97751945 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_definer.c @@ -0,0 +1,151 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "dr_types.h" +#include "dr_ste.h" + +struct dr_definer_object { + u32 id; + u16 format_id; + u8 dw_selectors[MLX5_IFC_DEFINER_DW_SELECTORS_NUM]; + u8 byte_selectors[MLX5_IFC_DEFINER_BYTE_SELECTORS_NUM]; + u8 match_mask[DR_STE_SIZE_MATCH_TAG]; + refcount_t refcount; +}; + +static bool dr_definer_compare(struct dr_definer_object *definer, + u16 format_id, u8 *dw_selectors, + u8 *byte_selectors, u8 *match_mask) +{ + int i; + + if (definer->format_id != format_id) + return false; + + for (i = 0; i < MLX5_IFC_DEFINER_DW_SELECTORS_NUM; i++) + if (definer->dw_selectors[i] != dw_selectors[i]) + return false; + + for (i = 0; i < MLX5_IFC_DEFINER_BYTE_SELECTORS_NUM; i++) + if (definer->byte_selectors[i] != byte_selectors[i]) + return false; + + if (memcmp(definer->match_mask, match_mask, DR_STE_SIZE_MATCH_TAG)) + return false; + + return true; +} + +static struct dr_definer_object * +dr_definer_find_obj(struct mlx5dr_domain *dmn, u16 format_id, + u8 *dw_selectors, u8 *byte_selectors, u8 *match_mask) +{ + struct dr_definer_object *definer_obj; + unsigned long id; + + xa_for_each(&dmn->definers_xa, id, definer_obj) { + if (dr_definer_compare(definer_obj, format_id, + dw_selectors, byte_selectors, + match_mask)) + return definer_obj; + } + + return NULL; +} + +static struct dr_definer_object * +dr_definer_create_obj(struct mlx5dr_domain *dmn, u16 format_id, + u8 *dw_selectors, u8 *byte_selectors, u8 *match_mask) +{ + struct dr_definer_object *definer_obj; + int ret = 0; + + definer_obj = kzalloc(sizeof(*definer_obj), GFP_KERNEL); + if (!definer_obj) + return NULL; + + ret = mlx5dr_cmd_create_definer(dmn->mdev, + format_id, + dw_selectors, + byte_selectors, + match_mask, + &definer_obj->id); + if (ret) + goto err_free_definer_obj; + + /* Definer ID can have 32 bits, but STE format + * supports only definers with 8 bit IDs. + */ + if (definer_obj->id > 0xff) { + mlx5dr_err(dmn, "Unsupported definer ID (%d)\n", definer_obj->id); + goto err_destroy_definer; + } + + definer_obj->format_id = format_id; + memcpy(definer_obj->dw_selectors, dw_selectors, sizeof(definer_obj->dw_selectors)); + memcpy(definer_obj->byte_selectors, byte_selectors, sizeof(definer_obj->byte_selectors)); + memcpy(definer_obj->match_mask, match_mask, sizeof(definer_obj->match_mask)); + + refcount_set(&definer_obj->refcount, 1); + + ret = xa_insert(&dmn->definers_xa, definer_obj->id, definer_obj, GFP_KERNEL); + if (ret) { + mlx5dr_dbg(dmn, "Couldn't insert new definer into xarray (%d)\n", ret); + goto err_destroy_definer; + } + + return definer_obj; + +err_destroy_definer: + mlx5dr_cmd_destroy_definer(dmn->mdev, definer_obj->id); +err_free_definer_obj: + kfree(definer_obj); + + return NULL; +} + +static void dr_definer_destroy_obj(struct mlx5dr_domain *dmn, + struct dr_definer_object *definer_obj) +{ + mlx5dr_cmd_destroy_definer(dmn->mdev, definer_obj->id); + xa_erase(&dmn->definers_xa, definer_obj->id); + kfree(definer_obj); +} + +int mlx5dr_definer_get(struct mlx5dr_domain *dmn, u16 format_id, + u8 *dw_selectors, u8 *byte_selectors, + u8 *match_mask, u32 *definer_id) +{ + struct dr_definer_object *definer_obj; + int ret = 0; + + definer_obj = dr_definer_find_obj(dmn, format_id, dw_selectors, + byte_selectors, match_mask); + if (!definer_obj) { + definer_obj = dr_definer_create_obj(dmn, format_id, + dw_selectors, byte_selectors, + match_mask); + if (!definer_obj) + return -ENOMEM; + } else { + refcount_inc(&definer_obj->refcount); + } + + *definer_id = definer_obj->id; + + return ret; +} + +void mlx5dr_definer_put(struct mlx5dr_domain *dmn, u32 definer_id) +{ + struct dr_definer_object *definer_obj; + + definer_obj = xa_load(&dmn->definers_xa, definer_id); + if (!definer_obj) { + mlx5dr_err(dmn, "Definer ID %d not found\n", definer_id); + return; + } + + if (refcount_dec_and_test(&definer_obj->refcount)) + dr_definer_destroy_obj(dmn, definer_obj); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c index 9a9836218c8e..5b8bb2ca31e6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c @@ -425,10 +425,11 @@ mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type) refcount_set(&dmn->refcount, 1); mutex_init(&dmn->info.rx.mutex); mutex_init(&dmn->info.tx.mutex); + xa_init(&dmn->definers_xa); if (dr_domain_caps_init(mdev, dmn)) { mlx5dr_err(dmn, "Failed init domain, no caps\n"); - goto free_domain; + goto def_xa_destroy; } dmn->info.max_log_action_icm_sz = DR_CHUNK_SIZE_4K; @@ -453,7 +454,8 @@ mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type) uninit_caps: dr_domain_caps_uninit(dmn); -free_domain: +def_xa_destroy: + xa_destroy(&dmn->definers_xa); kfree(dmn); return NULL; } @@ -493,6 +495,7 @@ int mlx5dr_domain_destroy(struct mlx5dr_domain *dmn) dr_domain_uninit_csum_recalc_fts(dmn); dr_domain_uninit_resources(dmn); dr_domain_caps_uninit(dmn); + xa_destroy(&dmn->definers_xa); mutex_destroy(&dmn->info.tx.mutex); mutex_destroy(&dmn->info.rx.mutex); kfree(dmn); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c index 7879991048ce..74cbe53ee9db 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c @@ -35,16 +35,28 @@ static int dr_rule_append_to_miss_list(struct mlx5dr_domain *dmn, return 0; } +static void dr_rule_set_last_ste_miss_addr(struct mlx5dr_matcher *matcher, + struct mlx5dr_matcher_rx_tx *nic_matcher, + u8 *hw_ste) +{ + struct mlx5dr_ste_ctx *ste_ctx = matcher->tbl->dmn->ste_ctx; + u64 icm_addr; + + if (mlx5dr_ste_is_miss_addr_set(ste_ctx, hw_ste)) + return; + + icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk); + mlx5dr_ste_set_miss_addr(ste_ctx, hw_ste, icm_addr); +} + static struct mlx5dr_ste * dr_rule_create_collision_htbl(struct mlx5dr_matcher *matcher, struct mlx5dr_matcher_rx_tx *nic_matcher, u8 *hw_ste) { struct mlx5dr_domain *dmn = matcher->tbl->dmn; - struct mlx5dr_ste_ctx *ste_ctx = dmn->ste_ctx; struct mlx5dr_ste_htbl *new_htbl; struct mlx5dr_ste *ste; - u64 icm_addr; /* Create new table for miss entry */ new_htbl = mlx5dr_ste_htbl_alloc(dmn->ste_icm_pool, @@ -58,8 +70,7 @@ dr_rule_create_collision_htbl(struct mlx5dr_matcher *matcher, /* One and only entry, never grows */ ste = new_htbl->chunk->ste_arr; - icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk); - mlx5dr_ste_set_miss_addr(ste_ctx, hw_ste, icm_addr); + dr_rule_set_last_ste_miss_addr(matcher, nic_matcher, hw_ste); mlx5dr_htbl_get(new_htbl); return ste; @@ -241,7 +252,6 @@ dr_rule_rehash_copy_ste(struct mlx5dr_matcher *matcher, bool use_update_list = false; u8 hw_ste[DR_STE_SIZE] = {}; struct mlx5dr_ste *new_ste; - u64 icm_addr; int new_idx; u8 sb_idx; @@ -250,9 +260,8 @@ dr_rule_rehash_copy_ste(struct mlx5dr_matcher *matcher, mlx5dr_ste_set_bit_mask(hw_ste, nic_matcher->ste_builder[sb_idx].bit_mask); /* Copy STE control and tag */ - icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk); memcpy(hw_ste, mlx5dr_ste_get_hw_ste(cur_ste), DR_STE_SIZE_REDUCED); - mlx5dr_ste_set_miss_addr(dmn->ste_ctx, hw_ste, icm_addr); + dr_rule_set_last_ste_miss_addr(matcher, nic_matcher, hw_ste); new_idx = mlx5dr_ste_calc_hash_index(hw_ste, new_htbl); new_ste = &new_htbl->chunk->ste_arr[new_idx]; @@ -773,7 +782,6 @@ static int dr_rule_handle_empty_entry(struct mlx5dr_matcher *matcher, { struct mlx5dr_domain *dmn = matcher->tbl->dmn; struct mlx5dr_ste_send_info *ste_info; - u64 icm_addr; /* Take ref on table, only on first time this ste is used */ mlx5dr_htbl_get(cur_htbl); @@ -781,8 +789,7 @@ static int dr_rule_handle_empty_entry(struct mlx5dr_matcher *matcher, /* new entry -> new branch */ list_add_tail(&ste->miss_list_node, miss_list); - icm_addr = mlx5dr_icm_pool_get_chunk_icm_addr(nic_matcher->e_anchor->chunk); - mlx5dr_ste_set_miss_addr(dmn->ste_ctx, hw_ste, icm_addr); + dr_rule_set_last_ste_miss_addr(matcher, nic_matcher, hw_ste); ste->ste_chain_location = ste_location; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c index 9e19a8dc9022..1e15f605df6e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c @@ -90,6 +90,16 @@ static void dr_ste_set_always_miss(struct dr_hw_ste_format *hw_ste) hw_ste->mask[0] = 0; } +bool mlx5dr_ste_is_miss_addr_set(struct mlx5dr_ste_ctx *ste_ctx, + u8 *hw_ste_p) +{ + if (!ste_ctx->is_miss_addr_set) + return false; + + /* check if miss address is already set for this type of STE */ + return ste_ctx->is_miss_addr_set(hw_ste_p); +} + void mlx5dr_ste_set_miss_addr(struct mlx5dr_ste_ctx *ste_ctx, u8 *hw_ste_p, u64 miss_addr) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h index 17513baff9b0..7075142bcfb6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.h @@ -151,6 +151,7 @@ struct mlx5dr_ste_ctx { bool is_rx, u16 gvmi); void (*set_next_lu_type)(u8 *hw_ste_p, u16 lu_type); u16 (*get_next_lu_type)(u8 *hw_ste_p); + bool (*is_miss_addr_set)(u8 *hw_ste_p); void (*set_miss_addr)(u8 *hw_ste_p, u64 miss_addr); u64 (*get_miss_addr)(u8 *hw_ste_p); void (*set_hit_addr)(u8 *hw_ste_p, u64 icm_addr, u32 ht_size); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c index ee677a5c76be..084145f18084 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c @@ -13,6 +13,7 @@ enum dr_ste_v1_entry_format { DR_STE_V1_TYPE_BWC_BYTE = 0x0, DR_STE_V1_TYPE_BWC_DW = 0x1, DR_STE_V1_TYPE_MATCH = 0x2, + DR_STE_V1_TYPE_MATCH_RANGES = 0x7, }; /* Lookup type is built from 2B: [ Definer mode 1B ][ Definer index 1B ] */ @@ -267,6 +268,16 @@ static void dr_ste_v1_set_entry_type(u8 *hw_ste_p, u8 entry_type) MLX5_SET(ste_match_bwc_v1, hw_ste_p, entry_format, entry_type); } +bool dr_ste_v1_is_miss_addr_set(u8 *hw_ste_p) +{ + u8 entry_type = MLX5_GET(ste_match_bwc_v1, hw_ste_p, entry_format); + + /* unlike MATCH STE, for MATCH_RANGES STE both hit and miss addresses + * are part of the action, so they both set as part of STE init + */ + return entry_type == DR_STE_V1_TYPE_MATCH_RANGES; +} + void dr_ste_v1_set_miss_addr(u8 *hw_ste_p, u64 miss_addr) { u64 index = miss_addr >> 6; @@ -520,6 +531,27 @@ static void dr_ste_v1_set_aso_flow_meter(u8 *d_action, init_color); } +static void dr_ste_v1_set_match_range_pkt_len(u8 *hw_ste_p, u32 definer_id, + u32 min, u32 max) +{ + MLX5_SET(ste_match_ranges_v1, hw_ste_p, match_definer_ctx_idx, definer_id); + + /* When the STE will be sent, its mask and tags will be swapped in + * dr_ste_v1_prepare_for_postsend(). This, however, is match range STE + * which doesn't have mask, and shouldn't have mask/tag swapped. + * We're using the common utilities functions to send this STE, so need + * to allow for this swapping - place the values in the corresponding + * locations to allow flipping them when writing to ICM. + * + * min/max_value_2 corresponds to match_dw_0 in its definer. + * To allow mask/tag swapping, writing the min/max_2 to min/max_0. + * + * Pkt len is 2 bytes that are stored in the higher section of the DW. + */ + MLX5_SET(ste_match_ranges_v1, hw_ste_p, min_value_0, min << 16); + MLX5_SET(ste_match_ranges_v1, hw_ste_p, max_value_0, max << 16); +} + static void dr_ste_v1_arr_init_next_match(u8 **last_ste, u32 *added_stes, u16 gvmi) @@ -535,6 +567,14 @@ static void dr_ste_v1_arr_init_next_match(u8 **last_ste, memset(action, 0, MLX5_FLD_SZ_BYTES(ste_mask_and_match_v1, action)); } +static void dr_ste_v1_arr_init_next_match_range(u8 **last_ste, + u32 *added_stes, + u16 gvmi) +{ + dr_ste_v1_arr_init_next_match(last_ste, added_stes, gvmi); + dr_ste_v1_set_entry_type(*last_ste, DR_STE_V1_TYPE_MATCH_RANGES); +} + void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn, u8 *action_type_set, u32 actions_caps, @@ -670,6 +710,20 @@ void dr_ste_v1_set_actions_tx(struct mlx5dr_domain *dmn, action += DR_STE_ACTION_DOUBLE_SZ; } + if (action_type_set[DR_ACTION_TYP_RANGE]) { + /* match ranges requires a new STE of its own type */ + dr_ste_v1_arr_init_next_match_range(&last_ste, added_stes, attr->gvmi); + dr_ste_v1_set_miss_addr(last_ste, attr->range.miss_icm_addr); + + /* we do not support setting any action on the match ranges STE */ + action_sz = 0; + + dr_ste_v1_set_match_range_pkt_len(last_ste, + attr->range.definer_id, + attr->range.min, + attr->range.max); + } + dr_ste_v1_set_hit_gvmi(last_ste, attr->hit_gvmi); dr_ste_v1_set_hit_addr(last_ste, attr->final_icm_addr, 1); } @@ -858,6 +912,20 @@ void dr_ste_v1_set_actions_rx(struct mlx5dr_domain *dmn, action += DR_STE_ACTION_DOUBLE_SZ; } + if (action_type_set[DR_ACTION_TYP_RANGE]) { + /* match ranges requires a new STE of its own type */ + dr_ste_v1_arr_init_next_match_range(&last_ste, added_stes, attr->gvmi); + dr_ste_v1_set_miss_addr(last_ste, attr->range.miss_icm_addr); + + /* we do not support setting any action on the match ranges STE */ + action_sz = 0; + + dr_ste_v1_set_match_range_pkt_len(last_ste, + attr->range.definer_id, + attr->range.min, + attr->range.max); + } + dr_ste_v1_set_hit_gvmi(last_ste, attr->hit_gvmi); dr_ste_v1_set_hit_addr(last_ste, attr->final_icm_addr, 1); } @@ -2144,6 +2212,7 @@ static struct mlx5dr_ste_ctx ste_ctx_v1 = { .ste_init = &dr_ste_v1_init, .set_next_lu_type = &dr_ste_v1_set_next_lu_type, .get_next_lu_type = &dr_ste_v1_get_next_lu_type, + .is_miss_addr_set = &dr_ste_v1_is_miss_addr_set, .set_miss_addr = &dr_ste_v1_set_miss_addr, .get_miss_addr = &dr_ste_v1_get_miss_addr, .set_hit_addr = &dr_ste_v1_set_hit_addr, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h index 8a1d49790c6e..b5c0f0f8392f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.h @@ -7,6 +7,7 @@ #include "dr_types.h" #include "dr_ste.h" +bool dr_ste_v1_is_miss_addr_set(u8 *hw_ste_p); void dr_ste_v1_set_miss_addr(u8 *hw_ste_p, u64 miss_addr); u64 dr_ste_v1_get_miss_addr(u8 *hw_ste_p); void dr_ste_v1_set_byte_mask(u8 *hw_ste_p, u16 byte_mask); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c index c60fddd125d2..cf1a3c9a1cf4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v2.c @@ -202,6 +202,7 @@ static struct mlx5dr_ste_ctx ste_ctx_v2 = { .ste_init = &dr_ste_v1_init, .set_next_lu_type = &dr_ste_v1_set_next_lu_type, .get_next_lu_type = &dr_ste_v1_get_next_lu_type, + .is_miss_addr_set = &dr_ste_v1_is_miss_addr_set, .set_miss_addr = &dr_ste_v1_set_miss_addr, .get_miss_addr = &dr_ste_v1_get_miss_addr, .set_hit_addr = &dr_ste_v1_set_hit_addr, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h index 41a37b9ac98b..2b769dcbd453 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h @@ -81,6 +81,7 @@ mlx5dr_icm_next_higher_chunk(enum mlx5dr_icm_chunk_size chunk) enum { DR_STE_SIZE = 64, DR_STE_SIZE_CTRL = 32, + DR_STE_SIZE_MATCH_TAG = 32, DR_STE_SIZE_TAG = 16, DR_STE_SIZE_MASK = 16, DR_STE_SIZE_REDUCED = DR_STE_SIZE - DR_STE_SIZE_MASK, @@ -128,6 +129,7 @@ enum mlx5dr_action_type { DR_ACTION_TYP_REMOVE_HDR, DR_ACTION_TYP_SAMPLER, DR_ACTION_TYP_ASO_FLOW_METER, + DR_ACTION_TYP_RANGE, DR_ACTION_TYP_MAX, }; @@ -237,6 +239,7 @@ static inline void mlx5dr_htbl_get(struct mlx5dr_ste_htbl *htbl) /* STE utils */ u32 mlx5dr_ste_calc_hash_index(u8 *hw_ste_p, struct mlx5dr_ste_htbl *htbl); +bool mlx5dr_ste_is_miss_addr_set(struct mlx5dr_ste_ctx *ste_ctx, u8 *hw_ste_p); void mlx5dr_ste_set_miss_addr(struct mlx5dr_ste_ctx *ste_ctx, u8 *hw_ste, u64 miss_addr); void mlx5dr_ste_set_hit_addr(struct mlx5dr_ste_ctx *ste_ctx, @@ -281,6 +284,13 @@ struct mlx5dr_ste_actions_attr { u8 dest_reg_id; u8 init_color; } aso_flow_meter; + + struct { + u64 miss_icm_addr; + u32 definer_id; + u32 min; + u32 max; + } range; }; void mlx5dr_ste_set_actions_rx(struct mlx5dr_ste_ctx *ste_ctx, @@ -924,6 +934,7 @@ struct mlx5dr_domain { struct mlx5dr_ste_ctx *ste_ctx; struct list_head dbg_tbl_list; struct mlx5dr_dbg_dump_info dump_info; + struct xarray definers_xa; }; struct mlx5dr_table_rx_tx { @@ -1026,6 +1037,15 @@ struct mlx5dr_action_dest_tbl { }; }; +struct mlx5dr_action_range { + struct mlx5dr_domain *dmn; + struct mlx5dr_action *hit_tbl_action; + struct mlx5dr_action *miss_tbl_action; + u32 definer_id; + u32 min; + u32 max; +}; + struct mlx5dr_action_ctr { u32 ctr_id; u32 offset; @@ -1072,6 +1092,7 @@ struct mlx5dr_action { struct mlx5dr_action_push_vlan *push_vlan; struct mlx5dr_action_flow_tag *flow_tag; struct mlx5dr_action_aso_flow_meter *aso; + struct mlx5dr_action_range *range; }; }; @@ -1295,6 +1316,14 @@ int mlx5dr_cmd_create_reformat_ctx(struct mlx5_core_dev *mdev, u32 *reformat_id); void mlx5dr_cmd_destroy_reformat_ctx(struct mlx5_core_dev *mdev, u32 reformat_id); +int mlx5dr_cmd_create_definer(struct mlx5_core_dev *mdev, + u16 format_id, + u8 *dw_selectors, + u8 *byte_selectors, + u8 *match_mask, + u32 *definer_id); +void mlx5dr_cmd_destroy_definer(struct mlx5_core_dev *mdev, + u32 definer_id); struct mlx5dr_cmd_gid_attr { u8 gid[16]; @@ -1483,4 +1512,18 @@ int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn, u32 flow_source); void mlx5dr_fw_destroy_md_tbl(struct mlx5dr_domain *dmn, u32 tbl_id, u32 group_id); + +static inline bool mlx5dr_is_fw_table(struct mlx5_flow_table *ft) +{ + return !ft->fs_dr_table.dr_table; +} + +static inline bool mlx5dr_supp_match_ranges(struct mlx5_core_dev *dev) +{ + return (MLX5_CAP_GEN(dev, steering_format_version) >= + MLX5_STEERING_FORMAT_CONNECTX_6DX) && + (MLX5_CAP_GEN_64(dev, match_definer_format_supported) & + (1ULL << MLX5_IFC_DEFINER_FORMAT_ID_SELECT)); +} + #endif /* _DR_TYPES_H_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c index 13b6d4721e17..984653756779 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c @@ -7,10 +7,11 @@ #include "fs_cmd.h" #include "mlx5dr.h" #include "fs_dr.h" +#include "dr_types.h" -static bool mlx5_dr_is_fw_table(u32 flags) +static bool dr_is_fw_term_table(struct mlx5_flow_table *ft) { - if (flags & MLX5_FLOW_TABLE_TERMINATION) + if (ft->flags & MLX5_FLOW_TABLE_TERMINATION) return true; return false; @@ -69,7 +70,7 @@ static int mlx5_cmd_dr_create_flow_table(struct mlx5_flow_root_namespace *ns, u32 flags; int err; - if (mlx5_dr_is_fw_table(ft->flags)) + if (dr_is_fw_term_table(ft)) return mlx5_fs_cmd_get_fw_cmds()->create_flow_table(ns, ft, ft_attr, next_ft); @@ -109,7 +110,7 @@ static int mlx5_cmd_dr_destroy_flow_table(struct mlx5_flow_root_namespace *ns, struct mlx5dr_action *action = ft->fs_dr_table.miss_action; int err; - if (mlx5_dr_is_fw_table(ft->flags)) + if (dr_is_fw_term_table(ft)) return mlx5_fs_cmd_get_fw_cmds()->destroy_flow_table(ns, ft); err = mlx5dr_table_destroy(ft->fs_dr_table.dr_table); @@ -134,7 +135,7 @@ static int mlx5_cmd_dr_modify_flow_table(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, struct mlx5_flow_table *next_ft) { - if (mlx5_dr_is_fw_table(ft->flags)) + if (dr_is_fw_term_table(ft)) return mlx5_fs_cmd_get_fw_cmds()->modify_flow_table(ns, ft, next_ft); return set_miss_action(ns, ft, next_ft); @@ -153,7 +154,7 @@ static int mlx5_cmd_dr_create_flow_group(struct mlx5_flow_root_namespace *ns, match_criteria_enable); struct mlx5dr_match_parameters mask; - if (mlx5_dr_is_fw_table(ft->flags)) + if (dr_is_fw_term_table(ft)) return mlx5_fs_cmd_get_fw_cmds()->create_flow_group(ns, ft, in, fg); @@ -178,7 +179,7 @@ static int mlx5_cmd_dr_destroy_flow_group(struct mlx5_flow_root_namespace *ns, struct mlx5_flow_table *ft, struct mlx5_flow_group *fg) { - if (mlx5_dr_is_fw_table(ft->flags)) + if (dr_is_fw_term_table(ft)) return mlx5_fs_cmd_get_fw_cmds()->destroy_flow_group(ns, ft, fg); return mlx5dr_matcher_destroy(fg->fs_dr_matcher.dr_matcher); @@ -209,11 +210,22 @@ static struct mlx5dr_action *create_ft_action(struct mlx5dr_domain *domain, { struct mlx5_flow_table *dest_ft = dst->dest_attr.ft; - if (mlx5_dr_is_fw_table(dest_ft->flags)) + if (mlx5dr_is_fw_table(dest_ft)) return mlx5dr_action_create_dest_flow_fw_table(domain, dest_ft); return mlx5dr_action_create_dest_table(dest_ft->fs_dr_table.dr_table); } +static struct mlx5dr_action *create_range_action(struct mlx5dr_domain *domain, + struct mlx5_flow_rule *dst) +{ + return mlx5dr_action_create_dest_match_range(domain, + dst->dest_attr.range.field, + dst->dest_attr.range.hit_ft, + dst->dest_attr.range.miss_ft, + dst->dest_attr.range.min, + dst->dest_attr.range.max); +} + static struct mlx5dr_action *create_action_push_vlan(struct mlx5dr_domain *domain, struct mlx5_fs_vlan *vlan) { @@ -260,7 +272,7 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, int err = 0; int i; - if (mlx5_dr_is_fw_table(ft->flags)) + if (dr_is_fw_term_table(ft)) return mlx5_fs_cmd_get_fw_cmds()->create_fte(ns, ft, group, fte); actions = kcalloc(MLX5_FLOW_CONTEXT_ACTION_MAX, sizeof(*actions), @@ -467,6 +479,15 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns, fs_dr_actions[fs_dr_num_actions++] = tmp_action; term_actions[num_term_actions++].dest = tmp_action; break; + case MLX5_FLOW_DESTINATION_TYPE_RANGE: + tmp_action = create_range_action(domain, dst); + if (!tmp_action) { + err = -ENOMEM; + goto free_actions; + } + fs_dr_actions[fs_dr_num_actions++] = tmp_action; + term_actions[num_term_actions++].dest = tmp_action; + break; default: err = -EOPNOTSUPP; goto free_actions; @@ -702,7 +723,7 @@ static int mlx5_cmd_dr_delete_fte(struct mlx5_flow_root_namespace *ns, int err; int i; - if (mlx5_dr_is_fw_table(ft->flags)) + if (dr_is_fw_term_table(ft)) return mlx5_fs_cmd_get_fw_cmds()->delete_fte(ns, ft, fte); err = mlx5dr_rule_destroy(rule->dr_rule); @@ -727,7 +748,7 @@ static int mlx5_cmd_dr_update_fte(struct mlx5_flow_root_namespace *ns, struct fs_fte fte_tmp = {}; int ret; - if (mlx5_dr_is_fw_table(ft->flags)) + if (dr_is_fw_term_table(ft)) return mlx5_fs_cmd_get_fw_cmds()->update_fte(ns, ft, group, modify_mask, fte); /* Backup current dr rule details */ @@ -780,11 +801,19 @@ static int mlx5_cmd_dr_destroy_ns(struct mlx5_flow_root_namespace *ns) static u32 mlx5_cmd_dr_get_capabilities(struct mlx5_flow_root_namespace *ns, enum fs_flow_table_type ft_type) { + u32 steering_caps = 0; + if (ft_type != FS_FT_FDB || MLX5_CAP_GEN(ns->dev, steering_format_version) == MLX5_STEERING_FORMAT_CONNECTX_5) return 0; - return MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX | MLX5_FLOW_STEERING_CAP_VLAN_POP_ON_TX; + steering_caps |= MLX5_FLOW_STEERING_CAP_VLAN_PUSH_ON_RX; + steering_caps |= MLX5_FLOW_STEERING_CAP_VLAN_POP_ON_TX; + + if (mlx5dr_supp_match_ranges(ns->dev)) + steering_caps |= MLX5_FLOW_STEERING_CAP_MATCH_RANGES; + + return steering_caps; } bool mlx5_fs_dr_is_supported(struct mlx5_core_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h index 34c2bd17a8b4..790a17d6207f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5_ifc_dr_ste_v1.h @@ -165,6 +165,41 @@ struct mlx5_ifc_ste_mask_and_match_v1_bits { u8 action[0x60]; }; +struct mlx5_ifc_ste_match_ranges_v1_bits { + u8 entry_format[0x8]; + u8 counter_id[0x18]; + + u8 miss_address_63_48[0x10]; + u8 match_definer_ctx_idx[0x8]; + u8 miss_address_39_32[0x8]; + + u8 miss_address_31_6[0x1a]; + u8 reserved_at_5a[0x1]; + u8 match_polarity[0x1]; + u8 reparse[0x1]; + u8 reserved_at_5d[0x3]; + + u8 next_table_base_63_48[0x10]; + u8 hash_definer_ctx_idx[0x8]; + u8 next_table_base_39_32_size[0x8]; + + u8 next_table_base_31_5_size[0x1b]; + u8 hash_type[0x2]; + u8 hash_after_actions[0x1]; + u8 reserved_at_9e[0x2]; + + u8 action[0x60]; + + u8 max_value_0[0x20]; + u8 min_value_0[0x20]; + u8 max_value_1[0x20]; + u8 min_value_1[0x20]; + u8 max_value_2[0x20]; + u8 min_value_2[0x20]; + u8 max_value_3[0x20]; + u8 min_value_3[0x20]; +}; + struct mlx5_ifc_ste_eth_l2_src_v1_bits { u8 reserved_at_0[0x1]; u8 sx_sniffer[0x1]; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h index 84ed77763b21..9afd268a2573 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h @@ -140,8 +140,21 @@ mlx5dr_action_create_aso(struct mlx5dr_domain *dmn, u8 init_color, u8 meter_id); +struct mlx5dr_action * +mlx5dr_action_create_dest_match_range(struct mlx5dr_domain *dmn, + u32 field, + struct mlx5_flow_table *hit_ft, + struct mlx5_flow_table *miss_ft, + u32 min, + u32 max); + int mlx5dr_action_destroy(struct mlx5dr_action *action); +int mlx5dr_definer_get(struct mlx5dr_domain *dmn, u16 format_id, + u8 *dw_selectors, u8 *byte_selectors, + u8 *match_mask, u32 *definer_id); +void mlx5dr_definer_put(struct mlx5dr_domain *dmn, u32 definer_id); + static inline bool mlx5dr_is_supported(struct mlx5_core_dev *dev) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index d5c317325030..ba7e3df22413 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -1160,14 +1160,40 @@ u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev) } EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid); -int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out) +int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out, + u16 opmod) { - u16 opmod = (MLX5_CAP_GENERAL << 1) | (HCA_CAP_OPMOD_GET_MAX & 0x01); u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)] = {}; + opmod = (opmod << 1) | (HCA_CAP_OPMOD_GET_MAX & 0x01); MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); MLX5_SET(query_hca_cap_in, in, op_mod, opmod); MLX5_SET(query_hca_cap_in, in, function_id, function_id); MLX5_SET(query_hca_cap_in, in, other_function, true); return mlx5_cmd_exec_inout(dev, query_hca_cap, in, out); } +EXPORT_SYMBOL_GPL(mlx5_vport_get_other_func_cap); + +int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap, + u16 function_id, u16 opmod) +{ + int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); + void *set_hca_cap; + void *set_ctx; + int ret; + + set_ctx = kzalloc(set_sz, GFP_KERNEL); + if (!set_ctx) + return -ENOMEM; + + MLX5_SET(set_hca_cap_in, set_ctx, opcode, MLX5_CMD_OP_SET_HCA_CAP); + MLX5_SET(set_hca_cap_in, set_ctx, op_mod, opmod << 1); + set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); + memcpy(set_hca_cap, hca_cap, MLX5_ST_SZ_BYTES(cmd_hca_cap)); + MLX5_SET(set_hca_cap_in, set_ctx, function_id, function_id); + MLX5_SET(set_hca_cap_in, set_ctx, other_function, true); + ret = mlx5_cmd_exec_in(dev, set_hca_cap, set_ctx); + + kfree(set_ctx); + return ret; +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c index a2ee695a3f17..3340b4a694c3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c @@ -363,93 +363,7 @@ static const struct mlxsw_sp_ipip_ops mlxsw_sp_ipip_gre4_ops = { }; static struct mlxsw_sp_ipip_parms -mlxsw_sp1_ipip_netdev_parms_init_gre6(const struct net_device *ol_dev) -{ - struct mlxsw_sp_ipip_parms parms = {0}; - - WARN_ON_ONCE(1); - return parms; -} - -static int -mlxsw_sp1_ipip_nexthop_update_gre6(struct mlxsw_sp *mlxsw_sp, u32 adj_index, - struct mlxsw_sp_ipip_entry *ipip_entry, - bool force, char *ratr_pl) -{ - WARN_ON_ONCE(1); - return -EINVAL; -} - -static int -mlxsw_sp1_ipip_decap_config_gre6(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_ipip_entry *ipip_entry, - u32 tunnel_index) -{ - WARN_ON_ONCE(1); - return -EINVAL; -} - -static bool mlxsw_sp1_ipip_can_offload_gre6(const struct mlxsw_sp *mlxsw_sp, - const struct net_device *ol_dev) -{ - return false; -} - -static struct mlxsw_sp_rif_ipip_lb_config -mlxsw_sp1_ipip_ol_loopback_config_gre6(struct mlxsw_sp *mlxsw_sp, - const struct net_device *ol_dev) -{ - struct mlxsw_sp_rif_ipip_lb_config config = {0}; - - WARN_ON_ONCE(1); - return config; -} - -static int -mlxsw_sp1_ipip_ol_netdev_change_gre6(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_ipip_entry *ipip_entry, - struct netlink_ext_ack *extack) -{ - WARN_ON_ONCE(1); - return -EINVAL; -} - -static int -mlxsw_sp1_ipip_rem_addr_set_gre6(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_ipip_entry *ipip_entry) -{ - WARN_ON_ONCE(1); - return -EINVAL; -} - -static void -mlxsw_sp1_ipip_rem_addr_unset_gre6(struct mlxsw_sp *mlxsw_sp, - const struct mlxsw_sp_ipip_entry *ipip_entry) -{ - WARN_ON_ONCE(1); -} - -static const struct mlxsw_sp_ipip_ops mlxsw_sp1_ipip_gre6_ops = { - .dev_type = ARPHRD_IP6GRE, - .ul_proto = MLXSW_SP_L3_PROTO_IPV6, - .inc_parsing_depth = true, - .parms_init = mlxsw_sp1_ipip_netdev_parms_init_gre6, - .nexthop_update = mlxsw_sp1_ipip_nexthop_update_gre6, - .decap_config = mlxsw_sp1_ipip_decap_config_gre6, - .can_offload = mlxsw_sp1_ipip_can_offload_gre6, - .ol_loopback_config = mlxsw_sp1_ipip_ol_loopback_config_gre6, - .ol_netdev_change = mlxsw_sp1_ipip_ol_netdev_change_gre6, - .rem_ip_addr_set = mlxsw_sp1_ipip_rem_addr_set_gre6, - .rem_ip_addr_unset = mlxsw_sp1_ipip_rem_addr_unset_gre6, -}; - -const struct mlxsw_sp_ipip_ops *mlxsw_sp1_ipip_ops_arr[] = { - [MLXSW_SP_IPIP_TYPE_GRE4] = &mlxsw_sp_ipip_gre4_ops, - [MLXSW_SP_IPIP_TYPE_GRE6] = &mlxsw_sp1_ipip_gre6_ops, -}; - -static struct mlxsw_sp_ipip_parms -mlxsw_sp2_ipip_netdev_parms_init_gre6(const struct net_device *ol_dev) +mlxsw_sp_ipip_netdev_parms_init_gre6(const struct net_device *ol_dev) { struct __ip6_tnl_parm parms = mlxsw_sp_ipip_netdev_parms6(ol_dev); @@ -464,9 +378,9 @@ mlxsw_sp2_ipip_netdev_parms_init_gre6(const struct net_device *ol_dev) } static int -mlxsw_sp2_ipip_nexthop_update_gre6(struct mlxsw_sp *mlxsw_sp, u32 adj_index, - struct mlxsw_sp_ipip_entry *ipip_entry, - bool force, char *ratr_pl) +mlxsw_sp_ipip_nexthop_update_gre6(struct mlxsw_sp *mlxsw_sp, u32 adj_index, + struct mlxsw_sp_ipip_entry *ipip_entry, + bool force, char *ratr_pl) { u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb); enum mlxsw_reg_ratr_op op; @@ -482,9 +396,9 @@ mlxsw_sp2_ipip_nexthop_update_gre6(struct mlxsw_sp *mlxsw_sp, u32 adj_index, } static int -mlxsw_sp2_ipip_decap_config_gre6(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_ipip_entry *ipip_entry, - u32 tunnel_index) +mlxsw_sp_ipip_decap_config_gre6(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + u32 tunnel_index) { u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb); u16 ul_rif_id = mlxsw_sp_ipip_lb_ul_rif_id(ipip_entry->ol_lb); @@ -519,8 +433,8 @@ mlxsw_sp2_ipip_decap_config_gre6(struct mlxsw_sp *mlxsw_sp, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rtdp), rtdp_pl); } -static bool mlxsw_sp2_ipip_can_offload_gre6(const struct mlxsw_sp *mlxsw_sp, - const struct net_device *ol_dev) +static bool mlxsw_sp_ipip_can_offload_gre6(const struct mlxsw_sp *mlxsw_sp, + const struct net_device *ol_dev) { struct __ip6_tnl_parm tparm = mlxsw_sp_ipip_netdev_parms6(ol_dev); bool inherit_tos = tparm.flags & IP6_TNL_F_USE_ORIG_TCLASS; @@ -534,8 +448,8 @@ static bool mlxsw_sp2_ipip_can_offload_gre6(const struct mlxsw_sp *mlxsw_sp, } static struct mlxsw_sp_rif_ipip_lb_config -mlxsw_sp2_ipip_ol_loopback_config_gre6(struct mlxsw_sp *mlxsw_sp, - const struct net_device *ol_dev) +mlxsw_sp_ipip_ol_loopback_config_gre6(struct mlxsw_sp *mlxsw_sp, + const struct net_device *ol_dev) { struct __ip6_tnl_parm parms = mlxsw_sp_ipip_netdev_parms6(ol_dev); enum mlxsw_reg_ritr_loopback_ipip_type lb_ipipt; @@ -553,20 +467,20 @@ mlxsw_sp2_ipip_ol_loopback_config_gre6(struct mlxsw_sp *mlxsw_sp, } static int -mlxsw_sp2_ipip_ol_netdev_change_gre6(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_ipip_entry *ipip_entry, - struct netlink_ext_ack *extack) +mlxsw_sp_ipip_ol_netdev_change_gre6(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry, + struct netlink_ext_ack *extack) { struct mlxsw_sp_ipip_parms new_parms; - new_parms = mlxsw_sp2_ipip_netdev_parms_init_gre6(ipip_entry->ol_dev); + new_parms = mlxsw_sp_ipip_netdev_parms_init_gre6(ipip_entry->ol_dev); return mlxsw_sp_ipip_ol_netdev_change_gre(mlxsw_sp, ipip_entry, &new_parms, extack); } static int -mlxsw_sp2_ipip_rem_addr_set_gre6(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_ipip_entry *ipip_entry) +mlxsw_sp_ipip_rem_addr_set_gre6(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_ipip_entry *ipip_entry) { return mlxsw_sp_ipv6_addr_kvdl_index_get(mlxsw_sp, &ipip_entry->parms.daddr.addr6, @@ -574,24 +488,44 @@ mlxsw_sp2_ipip_rem_addr_set_gre6(struct mlxsw_sp *mlxsw_sp, } static void -mlxsw_sp2_ipip_rem_addr_unset_gre6(struct mlxsw_sp *mlxsw_sp, - const struct mlxsw_sp_ipip_entry *ipip_entry) +mlxsw_sp_ipip_rem_addr_unset_gre6(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_ipip_entry *ipip_entry) { mlxsw_sp_ipv6_addr_put(mlxsw_sp, &ipip_entry->parms.daddr.addr6); } +static const struct mlxsw_sp_ipip_ops mlxsw_sp1_ipip_gre6_ops = { + .dev_type = ARPHRD_IP6GRE, + .ul_proto = MLXSW_SP_L3_PROTO_IPV6, + .inc_parsing_depth = true, + .double_rif_entry = true, + .parms_init = mlxsw_sp_ipip_netdev_parms_init_gre6, + .nexthop_update = mlxsw_sp_ipip_nexthop_update_gre6, + .decap_config = mlxsw_sp_ipip_decap_config_gre6, + .can_offload = mlxsw_sp_ipip_can_offload_gre6, + .ol_loopback_config = mlxsw_sp_ipip_ol_loopback_config_gre6, + .ol_netdev_change = mlxsw_sp_ipip_ol_netdev_change_gre6, + .rem_ip_addr_set = mlxsw_sp_ipip_rem_addr_set_gre6, + .rem_ip_addr_unset = mlxsw_sp_ipip_rem_addr_unset_gre6, +}; + +const struct mlxsw_sp_ipip_ops *mlxsw_sp1_ipip_ops_arr[] = { + [MLXSW_SP_IPIP_TYPE_GRE4] = &mlxsw_sp_ipip_gre4_ops, + [MLXSW_SP_IPIP_TYPE_GRE6] = &mlxsw_sp1_ipip_gre6_ops, +}; + static const struct mlxsw_sp_ipip_ops mlxsw_sp2_ipip_gre6_ops = { .dev_type = ARPHRD_IP6GRE, .ul_proto = MLXSW_SP_L3_PROTO_IPV6, .inc_parsing_depth = true, - .parms_init = mlxsw_sp2_ipip_netdev_parms_init_gre6, - .nexthop_update = mlxsw_sp2_ipip_nexthop_update_gre6, - .decap_config = mlxsw_sp2_ipip_decap_config_gre6, - .can_offload = mlxsw_sp2_ipip_can_offload_gre6, - .ol_loopback_config = mlxsw_sp2_ipip_ol_loopback_config_gre6, - .ol_netdev_change = mlxsw_sp2_ipip_ol_netdev_change_gre6, - .rem_ip_addr_set = mlxsw_sp2_ipip_rem_addr_set_gre6, - .rem_ip_addr_unset = mlxsw_sp2_ipip_rem_addr_unset_gre6, + .parms_init = mlxsw_sp_ipip_netdev_parms_init_gre6, + .nexthop_update = mlxsw_sp_ipip_nexthop_update_gre6, + .decap_config = mlxsw_sp_ipip_decap_config_gre6, + .can_offload = mlxsw_sp_ipip_can_offload_gre6, + .ol_loopback_config = mlxsw_sp_ipip_ol_loopback_config_gre6, + .ol_netdev_change = mlxsw_sp_ipip_ol_netdev_change_gre6, + .rem_ip_addr_set = mlxsw_sp_ipip_rem_addr_set_gre6, + .rem_ip_addr_unset = mlxsw_sp_ipip_rem_addr_unset_gre6, }; const struct mlxsw_sp_ipip_ops *mlxsw_sp2_ipip_ops_arr[] = { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h index 8cc259dcc8d0..a35f009da561 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h @@ -49,6 +49,7 @@ struct mlxsw_sp_ipip_ops { int dev_type; enum mlxsw_sp_l3proto ul_proto; /* Underlay. */ bool inc_parsing_depth; + bool double_rif_entry; struct mlxsw_sp_ipip_parms (*parms_init)(const struct net_device *ol_dev); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 48f1fa62a4fd..c22c3ac4e2a1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -18,6 +18,7 @@ #include <linux/jhash.h> #include <linux/net_namespace.h> #include <linux/mutex.h> +#include <linux/genalloc.h> #include <net/netevent.h> #include <net/neighbour.h> #include <net/arp.h> @@ -59,6 +60,7 @@ struct mlxsw_sp_rif { int mtu; u16 rif_index; u8 mac_profile_id; + u8 rif_entries; u16 vr_id; const struct mlxsw_sp_rif_ops *ops; struct mlxsw_sp *mlxsw_sp; @@ -77,6 +79,7 @@ struct mlxsw_sp_rif_params { }; u16 vid; bool lag; + bool double_entry; }; struct mlxsw_sp_rif_subport { @@ -1068,6 +1071,7 @@ mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp, lb_params = (struct mlxsw_sp_rif_params_ipip_lb) { .common.dev = ol_dev, .common.lag = false, + .common.double_entry = ipip_ops->double_rif_entry, .lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev), }; @@ -7826,18 +7830,26 @@ mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp, return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type); } -static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index) +static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index, + u8 rif_entries) { - int i; + *p_rif_index = gen_pool_alloc(mlxsw_sp->router->rifs_table, + rif_entries); + if (*p_rif_index == 0) + return -ENOBUFS; + *p_rif_index -= MLXSW_SP_ROUTER_GENALLOC_OFFSET; - for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) { - if (!mlxsw_sp->router->rifs[i]) { - *p_rif_index = i; - return 0; - } - } + /* RIF indexes must be aligned to the allocation size. */ + WARN_ON_ONCE(*p_rif_index % rif_entries); - return -ENOBUFS; + return 0; +} + +static void mlxsw_sp_rif_index_free(struct mlxsw_sp *mlxsw_sp, u16 rif_index, + u8 rif_entries) +{ + gen_pool_free(mlxsw_sp->router->rifs_table, + MLXSW_SP_ROUTER_GENALLOC_OFFSET + rif_index, rif_entries); } static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index, @@ -8081,6 +8093,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, const struct mlxsw_sp_rif_params *params, struct netlink_ext_ack *extack) { + u8 rif_entries = params->double_entry ? 2 : 1; u32 tb_id = l3mdev_fib_table(params->dev); const struct mlxsw_sp_rif_ops *ops; struct mlxsw_sp_fid *fid = NULL; @@ -8098,7 +8111,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, return ERR_CAST(vr); vr->rif_count++; - err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index); + err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index, rif_entries); if (err) { NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces"); goto err_rif_index_alloc; @@ -8113,6 +8126,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, mlxsw_sp->router->rifs[rif_index] = rif; rif->mlxsw_sp = mlxsw_sp; rif->ops = ops; + rif->rif_entries = rif_entries; if (ops->fid_get) { fid = ops->fid_get(rif, extack); @@ -8146,7 +8160,7 @@ mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_rif_counters_alloc(rif); } - atomic_inc(&mlxsw_sp->router->rifs_count); + atomic_add(rif_entries, &mlxsw_sp->router->rifs_count); return rif; err_stats_enable: @@ -8162,6 +8176,7 @@ err_fid_get: dev_put(rif->dev); kfree(rif); err_rif_alloc: + mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries); err_rif_index_alloc: vr->rif_count--; mlxsw_sp_vr_put(mlxsw_sp, vr); @@ -8173,10 +8188,12 @@ static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) const struct mlxsw_sp_rif_ops *ops = rif->ops; struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp; struct mlxsw_sp_fid *fid = rif->fid; + u8 rif_entries = rif->rif_entries; + u16 rif_index = rif->rif_index; struct mlxsw_sp_vr *vr; int i; - atomic_dec(&mlxsw_sp->router->rifs_count); + atomic_sub(rif_entries, &mlxsw_sp->router->rifs_count); mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif); vr = &mlxsw_sp->router->vrs[rif->vr_id]; @@ -8198,6 +8215,7 @@ static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif) mlxsw_sp->router->rifs[rif->rif_index] = NULL; dev_put(rif->dev); kfree(rif); + mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries); vr->rif_count--; mlxsw_sp_vr_put(mlxsw_sp, vr); } @@ -9771,42 +9789,51 @@ mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr, struct netlink_ext_ack *extack) { struct mlxsw_sp_rif *ul_rif; + u8 rif_entries = 1; u16 rif_index; int err; - err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index); + err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index, rif_entries); if (err) { NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces"); return ERR_PTR(err); } ul_rif = mlxsw_sp_rif_alloc(sizeof(*ul_rif), rif_index, vr->id, NULL); - if (!ul_rif) - return ERR_PTR(-ENOMEM); + if (!ul_rif) { + err = -ENOMEM; + goto err_rif_alloc; + } mlxsw_sp->router->rifs[rif_index] = ul_rif; ul_rif->mlxsw_sp = mlxsw_sp; + ul_rif->rif_entries = rif_entries; err = mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, true); if (err) goto ul_rif_op_err; - atomic_inc(&mlxsw_sp->router->rifs_count); + atomic_add(rif_entries, &mlxsw_sp->router->rifs_count); return ul_rif; ul_rif_op_err: mlxsw_sp->router->rifs[rif_index] = NULL; kfree(ul_rif); +err_rif_alloc: + mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries); return ERR_PTR(err); } static void mlxsw_sp_ul_rif_destroy(struct mlxsw_sp_rif *ul_rif) { struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp; + u8 rif_entries = ul_rif->rif_entries; + u16 rif_index = ul_rif->rif_index; - atomic_dec(&mlxsw_sp->router->rifs_count); + atomic_sub(rif_entries, &mlxsw_sp->router->rifs_count); mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, false); mlxsw_sp->router->rifs[ul_rif->rif_index] = NULL; kfree(ul_rif); + mlxsw_sp_rif_index_free(mlxsw_sp, rif_index, rif_entries); } static struct mlxsw_sp_rif * @@ -9940,11 +9967,43 @@ static const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[] = { [MLXSW_SP_RIF_TYPE_IPIP_LB] = &mlxsw_sp2_rif_ipip_lb_ops, }; +static int mlxsw_sp_rifs_table_init(struct mlxsw_sp *mlxsw_sp) +{ + struct gen_pool *rifs_table; + int err; + + rifs_table = gen_pool_create(0, -1); + if (!rifs_table) + return -ENOMEM; + + gen_pool_set_algo(rifs_table, gen_pool_first_fit_order_align, + NULL); + + err = gen_pool_add(rifs_table, MLXSW_SP_ROUTER_GENALLOC_OFFSET, + MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS), -1); + if (err) + goto err_gen_pool_add; + + mlxsw_sp->router->rifs_table = rifs_table; + + return 0; + +err_gen_pool_add: + gen_pool_destroy(rifs_table); + return err; +} + +static void mlxsw_sp_rifs_table_fini(struct mlxsw_sp *mlxsw_sp) +{ + gen_pool_destroy(mlxsw_sp->router->rifs_table); +} + static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp) { u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); struct mlxsw_core *core = mlxsw_sp->core; + int err; if (!MLXSW_CORE_RES_VALID(core, MAX_RIF_MAC_PROFILES)) return -EIO; @@ -9957,6 +10016,10 @@ static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp) if (!mlxsw_sp->router->rifs) return -ENOMEM; + err = mlxsw_sp_rifs_table_init(mlxsw_sp); + if (err) + goto err_rifs_table_init; + idr_init(&mlxsw_sp->router->rif_mac_profiles_idr); atomic_set(&mlxsw_sp->router->rif_mac_profiles_count, 0); atomic_set(&mlxsw_sp->router->rifs_count, 0); @@ -9970,6 +10033,10 @@ static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp) mlxsw_sp); return 0; + +err_rifs_table_init: + kfree(mlxsw_sp->router->rifs); + return err; } static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp) @@ -9986,6 +10053,7 @@ static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp) MLXSW_SP_RESOURCE_RIF_MAC_PROFILES); WARN_ON(!idr_is_empty(&mlxsw_sp->router->rif_mac_profiles_idr)); idr_destroy(&mlxsw_sp->router->rif_mac_profiles_idr); + mlxsw_sp_rifs_table_fini(mlxsw_sp); kfree(mlxsw_sp->router->rifs); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index c5dfb972b433..37d6e4c80e6a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -15,8 +15,12 @@ struct mlxsw_sp_router_nve_decap { u8 valid:1; }; +/* gen_pool_alloc() returns 0 when allocation fails, so use an offset */ +#define MLXSW_SP_ROUTER_GENALLOC_OFFSET 0x100 + struct mlxsw_sp_router { struct mlxsw_sp *mlxsw_sp; + struct gen_pool *rifs_table; struct mlxsw_sp_rif **rifs; struct idr rif_mac_profiles_idr; atomic_t rif_mac_profiles_count; diff --git a/drivers/net/ethernet/microchip/encx24j600-regmap.c b/drivers/net/ethernet/microchip/encx24j600-regmap.c index 81a8ccca7e5e..5693784eec5b 100644 --- a/drivers/net/ethernet/microchip/encx24j600-regmap.c +++ b/drivers/net/ethernet/microchip/encx24j600-regmap.c @@ -359,7 +359,7 @@ static int regmap_encx24j600_phy_reg_read(void *context, unsigned int reg, goto err_out; usleep_range(26, 100); - while ((ret = regmap_read(ctx->regmap, MISTAT, &mistat) != 0) && + while (((ret = regmap_read(ctx->regmap, MISTAT, &mistat)) == 0) && (mistat & BUSY)) cpu_relax(); @@ -397,7 +397,7 @@ static int regmap_encx24j600_phy_reg_write(void *context, unsigned int reg, goto err_out; usleep_range(26, 100); - while ((ret = regmap_read(ctx->regmap, MISTAT, &mistat) != 0) && + while (((ret = regmap_read(ctx->regmap, MISTAT, &mistat)) == 0) && (mistat & BUSY)) cpu_relax(); diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c index f6092983d028..cadde20505ba 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c @@ -443,11 +443,22 @@ static int lan966x_port_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { struct lan966x_port *port = netdev_priv(dev); + int err; + + if (cmd == SIOCSHWTSTAMP) { + err = lan966x_ptp_setup_traps(port, ifr); + if (err) + return err; + } if (!phy_has_hwtstamp(dev->phydev) && port->lan966x->ptp) { switch (cmd) { case SIOCSHWTSTAMP: - return lan966x_ptp_hwtstamp_set(port, ifr); + err = lan966x_ptp_hwtstamp_set(port, ifr); + if (err) + lan966x_ptp_del_traps(port); + + return err; case SIOCGHWTSTAMP: return lan966x_ptp_hwtstamp_get(port, ifr); } @@ -456,7 +467,11 @@ static int lan966x_port_ioctl(struct net_device *dev, struct ifreq *ifr, if (!dev->phydev) return -ENODEV; - return phy_mii_ioctl(dev->phydev, ifr, cmd); + err = phy_mii_ioctl(dev->phydev, ifr, cmd); + if (err && cmd == SIOCSHWTSTAMP) + lan966x_ptp_del_traps(port); + + return err; } static const struct net_device_ops lan966x_port_netdev_ops = { diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h index f2e45da7ffd4..3491f1961835 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.h +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.h @@ -88,6 +88,10 @@ #define SE_IDX_QUEUE 0 /* 0-79 : Queue scheduler elements */ #define SE_IDX_PORT 80 /* 80-89 : Port schedular elements */ +#define LAN966X_VCAP_CID_IS2_L0 VCAP_CID_INGRESS_STAGE2_L0 /* IS2 lookup 0 */ +#define LAN966X_VCAP_CID_IS2_L1 VCAP_CID_INGRESS_STAGE2_L1 /* IS2 lookup 1 */ +#define LAN966X_VCAP_CID_IS2_MAX (VCAP_CID_INGRESS_STAGE2_L2 - 1) /* IS2 Max */ + /* MAC table entry types. * ENTRYTYPE_NORMAL is subject to aging. * ENTRYTYPE_LOCKED is not subject to aging. @@ -116,6 +120,14 @@ enum lan966x_fdma_action { FDMA_REDIRECT, }; +/* Controls how PORT_MASK is applied */ +enum LAN966X_PORT_MASK_MODE { + LAN966X_PMM_NO_ACTION, + LAN966X_PMM_REPLACE, + LAN966X_PMM_FORWARDING, + LAN966X_PMM_REDIRECT, +}; + struct lan966x_port; struct lan966x_db { @@ -473,6 +485,8 @@ irqreturn_t lan966x_ptp_irq_handler(int irq, void *args); irqreturn_t lan966x_ptp_ext_irq_handler(int irq, void *args); u32 lan966x_ptp_get_period_ps(void); int lan966x_ptp_gettime64(struct ptp_clock_info *ptp, struct timespec64 *ts); +int lan966x_ptp_setup_traps(struct lan966x_port *port, struct ifreq *ifr); +int lan966x_ptp_del_traps(struct lan966x_port *port); int lan966x_fdma_xmit(struct sk_buff *skb, __be32 *ifh, struct net_device *dev); int lan966x_fdma_xmit_xdpf(struct lan966x_port *port, diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c b/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c index e5a2bbe064f8..300fe4005919 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_ptp.c @@ -3,6 +3,8 @@ #include <linux/ptp_classify.h> #include "lan966x_main.h" +#include "vcap_api.h" +#include "vcap_api_client.h" #define LAN966X_MAX_PTP_ID 512 @@ -18,6 +20,17 @@ #define TOD_ACC_PIN 0x7 +/* This represents the base rule ID for the PTP rules that are added in the + * VCAP to trap frames to CPU. This number needs to be bigger than the maximum + * number of entries that can exist in the VCAP. + */ +#define LAN966X_VCAP_PTP_RULE_ID 1000000 +#define LAN966X_VCAP_L2_PTP_TRAP (LAN966X_VCAP_PTP_RULE_ID + 0) +#define LAN966X_VCAP_IPV4_EV_PTP_TRAP (LAN966X_VCAP_PTP_RULE_ID + 1) +#define LAN966X_VCAP_IPV4_GEN_PTP_TRAP (LAN966X_VCAP_PTP_RULE_ID + 2) +#define LAN966X_VCAP_IPV6_EV_PTP_TRAP (LAN966X_VCAP_PTP_RULE_ID + 3) +#define LAN966X_VCAP_IPV6_GEN_PTP_TRAP (LAN966X_VCAP_PTP_RULE_ID + 4) + enum { PTP_PIN_ACTION_IDLE = 0, PTP_PIN_ACTION_LOAD, @@ -35,19 +48,228 @@ static u64 lan966x_ptp_get_nominal_value(void) return 0x304d4873ecade305; } +static int lan966x_ptp_add_trap(struct lan966x_port *port, + int (*add_ptp_key)(struct vcap_rule *vrule, + struct lan966x_port*), + u32 rule_id, + u16 proto) +{ + struct lan966x *lan966x = port->lan966x; + struct vcap_rule *vrule; + int err; + + vrule = vcap_get_rule(lan966x->vcap_ctrl, rule_id); + if (vrule) { + u32 value, mask; + + /* Just modify the ingress port mask and exit */ + vcap_rule_get_key_u32(vrule, VCAP_KF_IF_IGR_PORT_MASK, + &value, &mask); + mask &= ~BIT(port->chip_port); + vcap_rule_mod_key_u32(vrule, VCAP_KF_IF_IGR_PORT_MASK, + value, mask); + + err = vcap_mod_rule(vrule); + goto free_rule; + } + + vrule = vcap_alloc_rule(lan966x->vcap_ctrl, port->dev, + LAN966X_VCAP_CID_IS2_L0, + VCAP_USER_PTP, 0, rule_id); + if (!vrule) + return -ENOMEM; + if (IS_ERR(vrule)) + return PTR_ERR(vrule); + + err = add_ptp_key(vrule, port); + if (err) + goto free_rule; + + err = vcap_set_rule_set_actionset(vrule, VCAP_AFS_BASE_TYPE); + err |= vcap_rule_add_action_bit(vrule, VCAP_AF_CPU_COPY_ENA, VCAP_BIT_1); + err |= vcap_rule_add_action_u32(vrule, VCAP_AF_MASK_MODE, LAN966X_PMM_REPLACE); + err |= vcap_val_rule(vrule, proto); + if (err) + goto free_rule; + + err = vcap_add_rule(vrule); + +free_rule: + /* Free the local copy of the rule */ + vcap_free_rule(vrule); + return err; +} + +static int lan966x_ptp_del_trap(struct lan966x_port *port, + u32 rule_id) +{ + struct lan966x *lan966x = port->lan966x; + struct vcap_rule *vrule; + u32 value, mask; + int err; + + vrule = vcap_get_rule(lan966x->vcap_ctrl, rule_id); + if (!vrule) + return -EEXIST; + + vcap_rule_get_key_u32(vrule, VCAP_KF_IF_IGR_PORT_MASK, &value, &mask); + mask |= BIT(port->chip_port); + + /* No other port requires this trap, so it is safe to remove it */ + if (mask == GENMASK(lan966x->num_phys_ports, 0)) { + err = vcap_del_rule(lan966x->vcap_ctrl, port->dev, rule_id); + goto free_rule; + } + + vcap_rule_mod_key_u32(vrule, VCAP_KF_IF_IGR_PORT_MASK, value, mask); + err = vcap_mod_rule(vrule); + +free_rule: + vcap_free_rule(vrule); + return err; +} + +static int lan966x_ptp_add_l2_key(struct vcap_rule *vrule, + struct lan966x_port *port) +{ + return vcap_rule_add_key_u32(vrule, VCAP_KF_ETYPE, ETH_P_1588, ~0); +} + +static int lan966x_ptp_add_ip_event_key(struct vcap_rule *vrule, + struct lan966x_port *port) +{ + return vcap_rule_add_key_u32(vrule, VCAP_KF_L4_DPORT, PTP_EV_PORT, ~0) || + vcap_rule_add_key_bit(vrule, VCAP_KF_TCP_IS, VCAP_BIT_0); +} + +static int lan966x_ptp_add_ip_general_key(struct vcap_rule *vrule, + struct lan966x_port *port) +{ + return vcap_rule_add_key_u32(vrule, VCAP_KF_L4_DPORT, PTP_GEN_PORT, ~0) || + vcap_rule_add_key_bit(vrule, VCAP_KF_TCP_IS, VCAP_BIT_0); +} + +static int lan966x_ptp_add_l2_rule(struct lan966x_port *port) +{ + return lan966x_ptp_add_trap(port, lan966x_ptp_add_l2_key, + LAN966X_VCAP_L2_PTP_TRAP, ETH_P_ALL); +} + +static int lan966x_ptp_add_ipv4_rules(struct lan966x_port *port) +{ + int err; + + err = lan966x_ptp_add_trap(port, lan966x_ptp_add_ip_event_key, + LAN966X_VCAP_IPV4_EV_PTP_TRAP, ETH_P_IP); + if (err) + return err; + + err = lan966x_ptp_add_trap(port, lan966x_ptp_add_ip_general_key, + LAN966X_VCAP_IPV4_GEN_PTP_TRAP, ETH_P_IP); + if (err) + lan966x_ptp_del_trap(port, LAN966X_VCAP_IPV4_EV_PTP_TRAP); + + return err; +} + +static int lan966x_ptp_add_ipv6_rules(struct lan966x_port *port) +{ + int err; + + err = lan966x_ptp_add_trap(port, lan966x_ptp_add_ip_event_key, + LAN966X_VCAP_IPV6_EV_PTP_TRAP, ETH_P_IPV6); + if (err) + return err; + + err = lan966x_ptp_add_trap(port, lan966x_ptp_add_ip_general_key, + LAN966X_VCAP_IPV6_GEN_PTP_TRAP, ETH_P_IPV6); + if (err) + lan966x_ptp_del_trap(port, LAN966X_VCAP_IPV6_EV_PTP_TRAP); + + return err; +} + +static int lan966x_ptp_del_l2_rule(struct lan966x_port *port) +{ + return lan966x_ptp_del_trap(port, LAN966X_VCAP_L2_PTP_TRAP); +} + +static int lan966x_ptp_del_ipv4_rules(struct lan966x_port *port) +{ + int err; + + err = lan966x_ptp_del_trap(port, LAN966X_VCAP_IPV4_EV_PTP_TRAP); + err |= lan966x_ptp_del_trap(port, LAN966X_VCAP_IPV4_GEN_PTP_TRAP); + + return err; +} + +static int lan966x_ptp_del_ipv6_rules(struct lan966x_port *port) +{ + int err; + + err = lan966x_ptp_del_trap(port, LAN966X_VCAP_IPV6_EV_PTP_TRAP); + err |= lan966x_ptp_del_trap(port, LAN966X_VCAP_IPV6_GEN_PTP_TRAP); + + return err; +} + +static int lan966x_ptp_add_traps(struct lan966x_port *port) +{ + int err; + + err = lan966x_ptp_add_l2_rule(port); + if (err) + goto err_l2; + + err = lan966x_ptp_add_ipv4_rules(port); + if (err) + goto err_ipv4; + + err = lan966x_ptp_add_ipv6_rules(port); + if (err) + goto err_ipv6; + + return err; + +err_ipv6: + lan966x_ptp_del_ipv4_rules(port); +err_ipv4: + lan966x_ptp_del_l2_rule(port); +err_l2: + return err; +} + +int lan966x_ptp_del_traps(struct lan966x_port *port) +{ + int err; + + err = lan966x_ptp_del_l2_rule(port); + err |= lan966x_ptp_del_ipv4_rules(port); + err |= lan966x_ptp_del_ipv6_rules(port); + + return err; +} + +int lan966x_ptp_setup_traps(struct lan966x_port *port, struct ifreq *ifr) +{ + struct hwtstamp_config cfg; + + if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) + return -EFAULT; + + if (cfg.rx_filter == HWTSTAMP_FILTER_NONE) + return lan966x_ptp_del_traps(port); + else + return lan966x_ptp_add_traps(port); +} + int lan966x_ptp_hwtstamp_set(struct lan966x_port *port, struct ifreq *ifr) { struct lan966x *lan966x = port->lan966x; struct hwtstamp_config cfg; struct lan966x_phc *phc; - /* For now don't allow to run ptp on ports that are part of a bridge, - * because in case of transparent clock the HW will still forward the - * frames, so there would be duplicate frames - */ - if (lan966x->bridge_mask & BIT(port->chip_port)) - return -EINVAL; - if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) return -EFAULT; diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c b/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c index 04a2afd683cc..ba3fa917d6b7 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_tc_flower.c @@ -4,14 +4,6 @@ #include "vcap_api.h" #include "vcap_api_client.h" -/* Controls how PORT_MASK is applied */ -enum LAN966X_PORT_MASK_MODE { - LAN966X_PMM_NO_ACTION, - LAN966X_PMM_REPLACE, - LAN966X_PMM_FORWARDING, - LAN966X_PMM_REDIRECT, -}; - struct lan966x_tc_flower_parse_usage { struct flow_cls_offload *f; struct flow_rule *frule; diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c index 44f40d914947..d8dc9fbb81e1 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_vcap_impl.c @@ -5,10 +5,6 @@ #include "vcap_api.h" #include "vcap_api_client.h" -#define LAN966X_VCAP_CID_IS2_L0 VCAP_CID_INGRESS_STAGE2_L0 /* IS2 lookup 0 */ -#define LAN966X_VCAP_CID_IS2_L1 VCAP_CID_INGRESS_STAGE2_L1 /* IS2 lookup 1 */ -#define LAN966X_VCAP_CID_IS2_MAX (VCAP_CID_INGRESS_STAGE2_L2 - 1) /* IS2 Max */ - #define STREAMSIZE (64 * 4) #define LAN966X_IS2_LOOKUPS 2 @@ -219,9 +215,12 @@ static void lan966x_vcap_add_default_fields(struct net_device *dev, struct vcap_rule *rule) { struct lan966x_port *port = netdev_priv(dev); + u32 value, mask; - vcap_rule_add_key_u32(rule, VCAP_KF_IF_IGR_PORT_MASK, 0, - ~BIT(port->chip_port)); + if (vcap_rule_get_key_u32(rule, VCAP_KF_IF_IGR_PORT_MASK, + &value, &mask)) + vcap_rule_add_key_u32(rule, VCAP_KF_IF_IGR_PORT_MASK, 0, + ~BIT(port->chip_port)); if (lan966x_vcap_is_first_chain(rule)) vcap_rule_add_key_bit(rule, VCAP_KF_LOOKUP_FIRST_IS, diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c b/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c index 66360c8c5a38..141897dfe388 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_fdma.c @@ -317,7 +317,7 @@ int sparx5_fdma_xmit(struct sparx5 *sparx5, u32 *ifh, struct sk_buff *skb) next_dcb_hw = sparx5_fdma_next_dcb(tx, tx->curr_entry); db_hw = &next_dcb_hw->db[0]; if (!(db_hw->status & FDMA_DCB_STATUS_DONE)) - tx->dropped++; + return -EINVAL; db = list_first_entry(&tx->db_list, struct sparx5_db, list); list_move_tail(&db->list, &tx->db_list); next_dcb_hw->nextptr = FDMA_DCB_INVALID_DATA; diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c index f8382d3124e0..d25f4f09faa0 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_main.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_main.c @@ -897,6 +897,8 @@ static int mchp_sparx5_probe(struct platform_device *pdev) cleanup_ports: sparx5_cleanup_ports(sparx5); + if (sparx5->mact_queue) + destroy_workqueue(sparx5->mact_queue); cleanup_config: kfree(configs); cleanup_pnode: @@ -923,6 +925,7 @@ static int mchp_sparx5_remove(struct platform_device *pdev) sparx5_vcap_destroy(sparx5); /* Unregister netdevs */ sparx5_unregister_notifier_blocks(sparx5); + destroy_workqueue(sparx5->mact_queue); return 0; } diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c index 83c16ca5b30f..6db6ac6a3bbc 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_packet.c @@ -234,9 +234,8 @@ netdev_tx_t sparx5_port_xmit_impl(struct sk_buff *skb, struct net_device *dev) sparx5_set_port_ifh(ifh, port->portno); if (sparx5->ptp && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) { - ret = sparx5_ptp_txtstamp_request(port, skb); - if (ret) - return ret; + if (sparx5_ptp_txtstamp_request(port, skb) < 0) + return NETDEV_TX_BUSY; sparx5_set_port_ifh_rew_op(ifh, SPARX5_SKB_CB(skb)->rew_op); sparx5_set_port_ifh_pdu_type(ifh, SPARX5_SKB_CB(skb)->pdu_type); @@ -250,23 +249,31 @@ netdev_tx_t sparx5_port_xmit_impl(struct sk_buff *skb, struct net_device *dev) else ret = sparx5_inject(sparx5, ifh, skb, dev); - if (ret == NETDEV_TX_OK) { - stats->tx_bytes += skb->len; - stats->tx_packets++; + if (ret == -EBUSY) + goto busy; + if (ret < 0) + goto drop; - if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && - SPARX5_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP) - return ret; + stats->tx_bytes += skb->len; + stats->tx_packets++; + sparx5->tx.packets++; - dev_kfree_skb_any(skb); - } else { - stats->tx_dropped++; + if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && + SPARX5_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP) + return NETDEV_TX_OK; - if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && - SPARX5_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP) - sparx5_ptp_txtstamp_release(port, skb); - } - return ret; + dev_consume_skb_any(skb); + return NETDEV_TX_OK; +drop: + stats->tx_dropped++; + sparx5->tx.dropped++; + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; +busy: + if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && + SPARX5_SKB_CB(skb)->rew_op == IFH_REW_OP_TWO_STEP_PTP) + sparx5_ptp_txtstamp_release(port, skb); + return NETDEV_TX_BUSY; } static enum hrtimer_restart sparx5_injection_timeout(struct hrtimer *tmr) diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api.c b/drivers/net/ethernet/microchip/vcap/vcap_api.c index f2435d7ab515..664aae3e2acd 100644 --- a/drivers/net/ethernet/microchip/vcap/vcap_api.c +++ b/drivers/net/ethernet/microchip/vcap/vcap_api.c @@ -169,6 +169,227 @@ static void vcap_encode_typegroups(u32 *stream, int sw_width, } } +static bool vcap_bitarray_zero(int width, u8 *value) +{ + int bytes = DIV_ROUND_UP(width, BITS_PER_BYTE); + u8 total = 0, bmask = 0xff; + int rwidth = width; + int idx; + + for (idx = 0; idx < bytes; ++idx, rwidth -= BITS_PER_BYTE) { + if (rwidth && rwidth < BITS_PER_BYTE) + bmask = (1 << rwidth) - 1; + total += value[idx] & bmask; + } + return total == 0; +} + +static bool vcap_get_bit(u32 *stream, struct vcap_stream_iter *itr) +{ + u32 mask = BIT(itr->reg_bitpos); + u32 *p = &stream[itr->reg_idx]; + + return !!(*p & mask); +} + +static void vcap_decode_field(u32 *stream, struct vcap_stream_iter *itr, + int width, u8 *value) +{ + int idx; + + /* Loop over the field value bits and get the field bits and + * set them in the output value byte array + */ + for (idx = 0; idx < width; idx++) { + u8 bidx = idx & 0x7; + + /* Decode one field value bit */ + if (vcap_get_bit(stream, itr)) + *value |= 1 << bidx; + vcap_iter_next(itr); + if (bidx == 7) + value++; + } +} + +/* Verify that the type id in the stream matches the type id of the keyset */ +static bool vcap_verify_keystream_keyset(struct vcap_control *vctrl, + enum vcap_type vt, + u32 *keystream, + u32 *mskstream, + enum vcap_keyfield_set keyset) +{ + const struct vcap_info *vcap = &vctrl->vcaps[vt]; + const struct vcap_field *typefld; + const struct vcap_typegroup *tgt; + const struct vcap_field *fields; + struct vcap_stream_iter iter; + const struct vcap_set *info; + u32 value = 0; + u32 mask = 0; + + if (vcap_keyfield_count(vctrl, vt, keyset) == 0) + return false; + + info = vcap_keyfieldset(vctrl, vt, keyset); + /* Check that the keyset is valid */ + if (!info) + return false; + + /* a type_id of value -1 means that there is no type field */ + if (info->type_id == (u8)-1) + return true; + + /* Get a valid typegroup for the specific keyset */ + tgt = vcap_keyfield_typegroup(vctrl, vt, keyset); + if (!tgt) + return false; + + fields = vcap_keyfields(vctrl, vt, keyset); + if (!fields) + return false; + + typefld = &fields[VCAP_KF_TYPE]; + vcap_iter_init(&iter, vcap->sw_width, tgt, typefld->offset); + vcap_decode_field(mskstream, &iter, typefld->width, (u8 *)&mask); + /* no type info if there are no mask bits */ + if (vcap_bitarray_zero(typefld->width, (u8 *)&mask)) + return false; + + /* Get the value of the type field in the stream and compare to the + * one define in the vcap keyset + */ + vcap_iter_init(&iter, vcap->sw_width, tgt, typefld->offset); + vcap_decode_field(keystream, &iter, typefld->width, (u8 *)&value); + + return (value & mask) == (info->type_id & mask); +} + +/* Verify that the typegroup bits have the correct values */ +static int vcap_verify_typegroups(u32 *stream, int sw_width, + const struct vcap_typegroup *tgt, bool mask, + int sw_max) +{ + struct vcap_stream_iter iter; + int sw_cnt, idx; + + vcap_iter_set(&iter, sw_width, tgt, 0); + sw_cnt = 0; + while (iter.tg->width) { + u32 value = 0; + u32 tg_value = iter.tg->value; + + if (mask) + tg_value = (1 << iter.tg->width) - 1; + /* Set position to current typegroup bit */ + iter.offset = iter.tg->offset; + vcap_iter_update(&iter); + for (idx = 0; idx < iter.tg->width; idx++) { + /* Decode one typegroup bit */ + if (vcap_get_bit(stream, &iter)) + value |= 1 << idx; + iter.offset++; + vcap_iter_update(&iter); + } + if (value != tg_value) + return -EINVAL; + iter.tg++; /* next typegroup */ + sw_cnt++; + /* Stop checking more typegroups */ + if (sw_max && sw_cnt >= sw_max) + break; + } + return 0; +} + +/* Find the subword width of the key typegroup that matches the stream data */ +static int vcap_find_keystream_typegroup_sw(struct vcap_control *vctrl, + enum vcap_type vt, u32 *stream, + bool mask, int sw_max) +{ + const struct vcap_typegroup **tgt; + int sw_idx, res; + + tgt = vctrl->vcaps[vt].keyfield_set_typegroups; + /* Try the longest subword match first */ + for (sw_idx = vctrl->vcaps[vt].sw_count; sw_idx >= 0; sw_idx--) { + if (!tgt[sw_idx]) + continue; + + res = vcap_verify_typegroups(stream, vctrl->vcaps[vt].sw_width, + tgt[sw_idx], mask, sw_max); + if (res == 0) + return sw_idx; + } + return -EINVAL; +} + +/* Verify that the typegroup information, subword count, keyset and type id + * are in sync and correct, return the list of matchin keysets + */ +int +vcap_find_keystream_keysets(struct vcap_control *vctrl, + enum vcap_type vt, + u32 *keystream, + u32 *mskstream, + bool mask, int sw_max, + struct vcap_keyset_list *kslist) +{ + const struct vcap_set *keyfield_set; + int sw_count, idx; + + sw_count = vcap_find_keystream_typegroup_sw(vctrl, vt, keystream, mask, + sw_max); + if (sw_count < 0) + return sw_count; + + keyfield_set = vctrl->vcaps[vt].keyfield_set; + for (idx = 0; idx < vctrl->vcaps[vt].keyfield_set_size; ++idx) { + if (keyfield_set[idx].sw_per_item != sw_count) + continue; + + if (vcap_verify_keystream_keyset(vctrl, vt, keystream, + mskstream, idx)) + vcap_keyset_list_add(kslist, idx); + } + if (kslist->cnt > 0) + return 0; + return -EINVAL; +} +EXPORT_SYMBOL_GPL(vcap_find_keystream_keysets); + +/* Read key data from a VCAP address and discover if there are any rule keysets + * here + */ +int vcap_addr_keysets(struct vcap_control *vctrl, + struct net_device *ndev, + struct vcap_admin *admin, + int addr, + struct vcap_keyset_list *kslist) +{ + enum vcap_type vt = admin->vtype; + int keyset_sw_regs, idx; + u32 key = 0, mask = 0; + + /* Read the cache at the specified address */ + keyset_sw_regs = DIV_ROUND_UP(vctrl->vcaps[vt].sw_width, 32); + vctrl->ops->update(ndev, admin, VCAP_CMD_READ, VCAP_SEL_ALL, addr); + vctrl->ops->cache_read(ndev, admin, VCAP_SEL_ENTRY, 0, + keyset_sw_regs); + /* Skip uninitialized key/mask entries */ + for (idx = 0; idx < keyset_sw_regs; ++idx) { + key |= ~admin->cache.keystream[idx]; + mask |= admin->cache.maskstream[idx]; + } + if (key == 0 && mask == 0) + return -EINVAL; + /* Decode and locate the keysets */ + return vcap_find_keystream_keysets(vctrl, vt, admin->cache.keystream, + admin->cache.maskstream, false, 0, + kslist); +} +EXPORT_SYMBOL_GPL(vcap_addr_keysets); + /* Return the list of keyfields for the keyset */ const struct vcap_field *vcap_keyfields(struct vcap_control *vctrl, enum vcap_type vt, @@ -618,6 +839,517 @@ struct vcap_rule_internal *vcap_dup_rule(struct vcap_rule_internal *ri) return duprule; } +static void vcap_apply_width(u8 *dst, int width, int bytes) +{ + u8 bmask; + int idx; + + for (idx = 0; idx < bytes; idx++) { + if (width > 0) + if (width < 8) + bmask = (1 << width) - 1; + else + bmask = ~0; + else + bmask = 0; + dst[idx] &= bmask; + width -= 8; + } +} + +static void vcap_copy_from_w32be(u8 *dst, u8 *src, int size, int width) +{ + int idx, ridx, wstart, nidx; + int tail_bytes = (((size + 4) >> 2) << 2) - size; + + for (idx = 0, ridx = size - 1; idx < size; ++idx, --ridx) { + wstart = (idx >> 2) << 2; + nidx = wstart + 3 - (idx & 0x3); + if (nidx >= size) + nidx -= tail_bytes; + dst[nidx] = src[ridx]; + } + + vcap_apply_width(dst, width, size); +} + +static void vcap_copy_action_bit_field(struct vcap_u1_action *field, u8 *value) +{ + field->value = (*value) & 0x1; +} + +static void vcap_copy_limited_actionfield(u8 *dstvalue, u8 *srcvalue, + int width, int bytes) +{ + memcpy(dstvalue, srcvalue, bytes); + vcap_apply_width(dstvalue, width, bytes); +} + +static void vcap_copy_to_client_actionfield(struct vcap_rule_internal *ri, + struct vcap_client_actionfield *field, + u8 *value, u16 width) +{ + int field_size = actionfield_size_table[field->ctrl.type]; + + if (ri->admin->w32be) { + switch (field->ctrl.type) { + case VCAP_FIELD_BIT: + vcap_copy_action_bit_field(&field->data.u1, value); + break; + case VCAP_FIELD_U32: + vcap_copy_limited_actionfield((u8 *)&field->data.u32.value, + value, + width, field_size); + break; + case VCAP_FIELD_U48: + vcap_copy_from_w32be(field->data.u48.value, value, + field_size, width); + break; + case VCAP_FIELD_U56: + vcap_copy_from_w32be(field->data.u56.value, value, + field_size, width); + break; + case VCAP_FIELD_U64: + vcap_copy_from_w32be(field->data.u64.value, value, + field_size, width); + break; + case VCAP_FIELD_U72: + vcap_copy_from_w32be(field->data.u72.value, value, + field_size, width); + break; + case VCAP_FIELD_U112: + vcap_copy_from_w32be(field->data.u112.value, value, + field_size, width); + break; + case VCAP_FIELD_U128: + vcap_copy_from_w32be(field->data.u128.value, value, + field_size, width); + break; + }; + } else { + switch (field->ctrl.type) { + case VCAP_FIELD_BIT: + vcap_copy_action_bit_field(&field->data.u1, value); + break; + case VCAP_FIELD_U32: + vcap_copy_limited_actionfield((u8 *)&field->data.u32.value, + value, + width, field_size); + break; + case VCAP_FIELD_U48: + vcap_copy_limited_actionfield(field->data.u48.value, + value, + width, field_size); + break; + case VCAP_FIELD_U56: + vcap_copy_limited_actionfield(field->data.u56.value, + value, + width, field_size); + break; + case VCAP_FIELD_U64: + vcap_copy_limited_actionfield(field->data.u64.value, + value, + width, field_size); + break; + case VCAP_FIELD_U72: + vcap_copy_limited_actionfield(field->data.u72.value, + value, + width, field_size); + break; + case VCAP_FIELD_U112: + vcap_copy_limited_actionfield(field->data.u112.value, + value, + width, field_size); + break; + case VCAP_FIELD_U128: + vcap_copy_limited_actionfield(field->data.u128.value, + value, + width, field_size); + break; + }; + } +} + +static void vcap_copy_key_bit_field(struct vcap_u1_key *field, + u8 *value, u8 *mask) +{ + field->value = (*value) & 0x1; + field->mask = (*mask) & 0x1; +} + +static void vcap_copy_limited_keyfield(u8 *dstvalue, u8 *dstmask, + u8 *srcvalue, u8 *srcmask, + int width, int bytes) +{ + memcpy(dstvalue, srcvalue, bytes); + vcap_apply_width(dstvalue, width, bytes); + memcpy(dstmask, srcmask, bytes); + vcap_apply_width(dstmask, width, bytes); +} + +static void vcap_copy_to_client_keyfield(struct vcap_rule_internal *ri, + struct vcap_client_keyfield *field, + u8 *value, u8 *mask, u16 width) +{ + int field_size = keyfield_size_table[field->ctrl.type] / 2; + + if (ri->admin->w32be) { + switch (field->ctrl.type) { + case VCAP_FIELD_BIT: + vcap_copy_key_bit_field(&field->data.u1, value, mask); + break; + case VCAP_FIELD_U32: + vcap_copy_limited_keyfield((u8 *)&field->data.u32.value, + (u8 *)&field->data.u32.mask, + value, mask, + width, field_size); + break; + case VCAP_FIELD_U48: + vcap_copy_from_w32be(field->data.u48.value, value, + field_size, width); + vcap_copy_from_w32be(field->data.u48.mask, mask, + field_size, width); + break; + case VCAP_FIELD_U56: + vcap_copy_from_w32be(field->data.u56.value, value, + field_size, width); + vcap_copy_from_w32be(field->data.u56.mask, mask, + field_size, width); + break; + case VCAP_FIELD_U64: + vcap_copy_from_w32be(field->data.u64.value, value, + field_size, width); + vcap_copy_from_w32be(field->data.u64.mask, mask, + field_size, width); + break; + case VCAP_FIELD_U72: + vcap_copy_from_w32be(field->data.u72.value, value, + field_size, width); + vcap_copy_from_w32be(field->data.u72.mask, mask, + field_size, width); + break; + case VCAP_FIELD_U112: + vcap_copy_from_w32be(field->data.u112.value, value, + field_size, width); + vcap_copy_from_w32be(field->data.u112.mask, mask, + field_size, width); + break; + case VCAP_FIELD_U128: + vcap_copy_from_w32be(field->data.u128.value, value, + field_size, width); + vcap_copy_from_w32be(field->data.u128.mask, mask, + field_size, width); + break; + }; + } else { + switch (field->ctrl.type) { + case VCAP_FIELD_BIT: + vcap_copy_key_bit_field(&field->data.u1, value, mask); + break; + case VCAP_FIELD_U32: + vcap_copy_limited_keyfield((u8 *)&field->data.u32.value, + (u8 *)&field->data.u32.mask, + value, mask, + width, field_size); + break; + case VCAP_FIELD_U48: + vcap_copy_limited_keyfield(field->data.u48.value, + field->data.u48.mask, + value, mask, + width, field_size); + break; + case VCAP_FIELD_U56: + vcap_copy_limited_keyfield(field->data.u56.value, + field->data.u56.mask, + value, mask, + width, field_size); + break; + case VCAP_FIELD_U64: + vcap_copy_limited_keyfield(field->data.u64.value, + field->data.u64.mask, + value, mask, + width, field_size); + break; + case VCAP_FIELD_U72: + vcap_copy_limited_keyfield(field->data.u72.value, + field->data.u72.mask, + value, mask, + width, field_size); + break; + case VCAP_FIELD_U112: + vcap_copy_limited_keyfield(field->data.u112.value, + field->data.u112.mask, + value, mask, + width, field_size); + break; + case VCAP_FIELD_U128: + vcap_copy_limited_keyfield(field->data.u128.value, + field->data.u128.mask, + value, mask, + width, field_size); + break; + }; + } +} + +static void vcap_rule_alloc_keyfield(struct vcap_rule_internal *ri, + const struct vcap_field *keyfield, + enum vcap_key_field key, + u8 *value, u8 *mask) +{ + struct vcap_client_keyfield *field; + + field = kzalloc(sizeof(*field), GFP_KERNEL); + if (!field) + return; + INIT_LIST_HEAD(&field->ctrl.list); + field->ctrl.key = key; + field->ctrl.type = keyfield->type; + vcap_copy_to_client_keyfield(ri, field, value, mask, keyfield->width); + list_add_tail(&field->ctrl.list, &ri->data.keyfields); +} + +/* Read key data from a VCAP address and discover if there is a rule keyset + * here + */ +static bool +vcap_verify_actionstream_actionset(struct vcap_control *vctrl, + enum vcap_type vt, + u32 *actionstream, + enum vcap_actionfield_set actionset) +{ + const struct vcap_typegroup *tgt; + const struct vcap_field *fields; + const struct vcap_set *info; + + if (vcap_actionfield_count(vctrl, vt, actionset) == 0) + return false; + + info = vcap_actionfieldset(vctrl, vt, actionset); + /* Check that the actionset is valid */ + if (!info) + return false; + + /* a type_id of value -1 means that there is no type field */ + if (info->type_id == (u8)-1) + return true; + + /* Get a valid typegroup for the specific actionset */ + tgt = vcap_actionfield_typegroup(vctrl, vt, actionset); + if (!tgt) + return false; + + fields = vcap_actionfields(vctrl, vt, actionset); + if (!fields) + return false; + + /* Later this will be expanded with a check of the type id */ + return true; +} + +/* Find the subword width of the action typegroup that matches the stream data + */ +static int vcap_find_actionstream_typegroup_sw(struct vcap_control *vctrl, + enum vcap_type vt, u32 *stream, + int sw_max) +{ + const struct vcap_typegroup **tgt; + int sw_idx, res; + + tgt = vctrl->vcaps[vt].actionfield_set_typegroups; + /* Try the longest subword match first */ + for (sw_idx = vctrl->vcaps[vt].sw_count; sw_idx >= 0; sw_idx--) { + if (!tgt[sw_idx]) + continue; + res = vcap_verify_typegroups(stream, vctrl->vcaps[vt].act_width, + tgt[sw_idx], false, sw_max); + if (res == 0) + return sw_idx; + } + return -EINVAL; +} + +/* Verify that the typegroup information, subword count, actionset and type id + * are in sync and correct, return the actionset + */ +static enum vcap_actionfield_set +vcap_find_actionstream_actionset(struct vcap_control *vctrl, + enum vcap_type vt, + u32 *stream, + int sw_max) +{ + const struct vcap_set *actionfield_set; + int sw_count, idx; + bool res; + + sw_count = vcap_find_actionstream_typegroup_sw(vctrl, vt, stream, + sw_max); + if (sw_count < 0) + return sw_count; + + actionfield_set = vctrl->vcaps[vt].actionfield_set; + for (idx = 0; idx < vctrl->vcaps[vt].actionfield_set_size; ++idx) { + if (actionfield_set[idx].sw_per_item != sw_count) + continue; + + res = vcap_verify_actionstream_actionset(vctrl, vt, + stream, idx); + if (res) + return idx; + } + return -EINVAL; +} + +/* Store action value in an element in a list for the client */ +static void vcap_rule_alloc_actionfield(struct vcap_rule_internal *ri, + const struct vcap_field *actionfield, + enum vcap_action_field action, + u8 *value) +{ + struct vcap_client_actionfield *field; + + field = kzalloc(sizeof(*field), GFP_KERNEL); + if (!field) + return; + INIT_LIST_HEAD(&field->ctrl.list); + field->ctrl.action = action; + field->ctrl.type = actionfield->type; + vcap_copy_to_client_actionfield(ri, field, value, actionfield->width); + list_add_tail(&field->ctrl.list, &ri->data.actionfields); +} + +static int vcap_decode_actionset(struct vcap_rule_internal *ri) +{ + struct vcap_control *vctrl = ri->vctrl; + struct vcap_admin *admin = ri->admin; + const struct vcap_field *actionfield; + enum vcap_actionfield_set actionset; + enum vcap_type vt = admin->vtype; + const struct vcap_typegroup *tgt; + struct vcap_stream_iter iter; + int idx, res, actfield_count; + u32 *actstream; + u8 value[16]; + + actstream = admin->cache.actionstream; + res = vcap_find_actionstream_actionset(vctrl, vt, actstream, 0); + if (res < 0) { + pr_err("%s:%d: could not find valid actionset: %d\n", + __func__, __LINE__, res); + return -EINVAL; + } + actionset = res; + actfield_count = vcap_actionfield_count(vctrl, vt, actionset); + actionfield = vcap_actionfields(vctrl, vt, actionset); + tgt = vcap_actionfield_typegroup(vctrl, vt, actionset); + /* Start decoding the stream */ + for (idx = 0; idx < actfield_count; ++idx) { + if (actionfield[idx].width <= 0) + continue; + /* Get the action */ + memset(value, 0, DIV_ROUND_UP(actionfield[idx].width, 8)); + vcap_iter_init(&iter, vctrl->vcaps[vt].act_width, tgt, + actionfield[idx].offset); + vcap_decode_field(actstream, &iter, actionfield[idx].width, + value); + /* Skip if no bits are set */ + if (vcap_bitarray_zero(actionfield[idx].width, value)) + continue; + vcap_rule_alloc_actionfield(ri, &actionfield[idx], idx, value); + /* Later the action id will also be checked */ + } + return vcap_set_rule_set_actionset((struct vcap_rule *)ri, actionset); +} + +static int vcap_decode_keyset(struct vcap_rule_internal *ri) +{ + struct vcap_control *vctrl = ri->vctrl; + struct vcap_stream_iter kiter, miter; + struct vcap_admin *admin = ri->admin; + enum vcap_keyfield_set keysets[10]; + const struct vcap_field *keyfield; + enum vcap_type vt = admin->vtype; + const struct vcap_typegroup *tgt; + struct vcap_keyset_list matches; + enum vcap_keyfield_set keyset; + int idx, res, keyfield_count; + u32 *maskstream; + u32 *keystream; + u8 value[16]; + u8 mask[16]; + + keystream = admin->cache.keystream; + maskstream = admin->cache.maskstream; + matches.keysets = keysets; + matches.cnt = 0; + matches.max = ARRAY_SIZE(keysets); + res = vcap_find_keystream_keysets(vctrl, vt, keystream, maskstream, + false, 0, &matches); + if (res < 0) { + pr_err("%s:%d: could not find valid keysets: %d\n", + __func__, __LINE__, res); + return -EINVAL; + } + keyset = matches.keysets[0]; + keyfield_count = vcap_keyfield_count(vctrl, vt, keyset); + keyfield = vcap_keyfields(vctrl, vt, keyset); + tgt = vcap_keyfield_typegroup(vctrl, vt, keyset); + /* Start decoding the streams */ + for (idx = 0; idx < keyfield_count; ++idx) { + if (keyfield[idx].width <= 0) + continue; + /* First get the mask */ + memset(mask, 0, DIV_ROUND_UP(keyfield[idx].width, 8)); + vcap_iter_init(&miter, vctrl->vcaps[vt].sw_width, tgt, + keyfield[idx].offset); + vcap_decode_field(maskstream, &miter, keyfield[idx].width, + mask); + /* Skip if no mask bits are set */ + if (vcap_bitarray_zero(keyfield[idx].width, mask)) + continue; + /* Get the key */ + memset(value, 0, DIV_ROUND_UP(keyfield[idx].width, 8)); + vcap_iter_init(&kiter, vctrl->vcaps[vt].sw_width, tgt, + keyfield[idx].offset); + vcap_decode_field(keystream, &kiter, keyfield[idx].width, + value); + vcap_rule_alloc_keyfield(ri, &keyfield[idx], idx, value, mask); + } + return vcap_set_rule_set_keyset((struct vcap_rule *)ri, keyset); +} + +/* Read VCAP content into the VCAP cache */ +static int vcap_read_rule(struct vcap_rule_internal *ri) +{ + struct vcap_admin *admin = ri->admin; + int sw_idx, ent_idx = 0, act_idx = 0; + u32 addr = ri->addr; + + if (!ri->size || !ri->keyset_sw_regs || !ri->actionset_sw_regs) { + pr_err("%s:%d: rule is empty\n", __func__, __LINE__); + return -EINVAL; + } + vcap_erase_cache(ri); + /* Use the values in the streams to read the VCAP cache */ + for (sw_idx = 0; sw_idx < ri->size; sw_idx++, addr++) { + ri->vctrl->ops->update(ri->ndev, admin, VCAP_CMD_READ, + VCAP_SEL_ALL, addr); + ri->vctrl->ops->cache_read(ri->ndev, admin, + VCAP_SEL_ENTRY, ent_idx, + ri->keyset_sw_regs); + ri->vctrl->ops->cache_read(ri->ndev, admin, + VCAP_SEL_ACTION, act_idx, + ri->actionset_sw_regs); + if (sw_idx == 0) + ri->vctrl->ops->cache_read(ri->ndev, admin, + VCAP_SEL_COUNTER, + ri->counter_id, 0); + ent_idx += ri->keyset_sw_regs; + act_idx += ri->actionset_sw_regs; + } + return 0; +} + /* Write VCAP cache content to the VCAP HW instance */ static int vcap_write_rule(struct vcap_rule_internal *ri) { @@ -1183,6 +1915,82 @@ void vcap_free_rule(struct vcap_rule *rule) } EXPORT_SYMBOL_GPL(vcap_free_rule); +struct vcap_rule *vcap_get_rule(struct vcap_control *vctrl, u32 id) +{ + struct vcap_rule_internal *elem; + struct vcap_rule_internal *ri; + int err; + + ri = NULL; + + err = vcap_api_check(vctrl); + if (err) + return ERR_PTR(err); + elem = vcap_lookup_rule(vctrl, id); + if (!elem) + return NULL; + mutex_lock(&elem->admin->lock); + ri = vcap_dup_rule(elem); + if (IS_ERR(ri)) + goto unlock; + err = vcap_read_rule(ri); + if (err) { + ri = ERR_PTR(err); + goto unlock; + } + err = vcap_decode_keyset(ri); + if (err) { + ri = ERR_PTR(err); + goto unlock; + } + err = vcap_decode_actionset(ri); + if (err) { + ri = ERR_PTR(err); + goto unlock; + } + +unlock: + mutex_unlock(&elem->admin->lock); + return (struct vcap_rule *)ri; +} +EXPORT_SYMBOL_GPL(vcap_get_rule); + +/* Update existing rule */ +int vcap_mod_rule(struct vcap_rule *rule) +{ + struct vcap_rule_internal *ri = to_intrule(rule); + struct vcap_counter ctr; + int err; + + err = vcap_api_check(ri->vctrl); + if (err) + return err; + + if (!vcap_lookup_rule(ri->vctrl, ri->data.id)) + return -ENOENT; + + mutex_lock(&ri->admin->lock); + /* Encode the bitstreams to the VCAP cache */ + vcap_erase_cache(ri); + err = vcap_encode_rule(ri); + if (err) + goto out; + + err = vcap_write_rule(ri); + if (err) + goto out; + + memset(&ctr, 0, sizeof(ctr)); + err = vcap_write_counter(ri, &ctr); + if (err) + goto out; + +out: + mutex_unlock(&ri->admin->lock); + return err; +} +EXPORT_SYMBOL_GPL(vcap_mod_rule); + /* Return the alignment offset for a new rule address */ static int vcap_valid_rule_move(struct vcap_rule_internal *el, int offset) { @@ -1389,7 +2197,7 @@ static void vcap_copy_from_client_keyfield(struct vcap_rule *rule, vcap_copy_to_w32be(field->data.u128.value, data->u128.value, size); vcap_copy_to_w32be(field->data.u128.mask, data->u128.mask, size); break; - }; + } } /* Check if the keyfield is already in the rule */ @@ -1530,6 +2338,22 @@ int vcap_rule_add_key_u128(struct vcap_rule *rule, enum vcap_key_field key, } EXPORT_SYMBOL_GPL(vcap_rule_add_key_u128); +int vcap_rule_get_key_u32(struct vcap_rule *rule, enum vcap_key_field key, + u32 *value, u32 *mask) +{ + struct vcap_client_keyfield *ckf; + + ckf = vcap_find_keyfield(rule, key); + if (!ckf) + return -ENOENT; + + *value = ckf->data.u32.value; + *mask = ckf->data.u32.mask; + + return 0; +} +EXPORT_SYMBOL_GPL(vcap_rule_get_key_u32); + /* Find a client action field in a rule */ static struct vcap_client_actionfield * vcap_find_actionfield(struct vcap_rule *rule, enum vcap_action_field act) @@ -1579,7 +2403,7 @@ static void vcap_copy_from_client_actionfield(struct vcap_rule *rule, case VCAP_FIELD_U128: vcap_copy_to_w32be(field->data.u128.value, data->u128.value, size); break; - }; + } } /* Check if the actionfield is already in the rule */ diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api_client.h b/drivers/net/ethernet/microchip/vcap/vcap_api_client.h index 93a0fcb12a81..0319866f9c94 100644 --- a/drivers/net/ethernet/microchip/vcap/vcap_api_client.h +++ b/drivers/net/ethernet/microchip/vcap/vcap_api_client.h @@ -170,6 +170,10 @@ int vcap_add_rule(struct vcap_rule *rule); int vcap_del_rule(struct vcap_control *vctrl, struct net_device *ndev, u32 id); /* Make a full copy of an existing rule with a new rule id */ struct vcap_rule *vcap_copy_rule(struct vcap_rule *rule); +/* Get rule from a VCAP instance */ +struct vcap_rule *vcap_get_rule(struct vcap_control *vctrl, u32 id); +/* Update existing rule */ +int vcap_mod_rule(struct vcap_rule *rule); /* Update the keyset for the rule */ int vcap_set_rule_set_keyset(struct vcap_rule *rule, @@ -254,4 +258,8 @@ int vcap_rule_mod_action_u32(struct vcap_rule *rule, enum vcap_action_field action, u32 value); +/* Get a 32 bit key field value and mask from the rule */ +int vcap_rule_get_key_u32(struct vcap_rule *rule, enum vcap_key_field key, + u32 *value, u32 *mask); + #endif /* __VCAP_API_CLIENT__ */ diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api_debugfs.c b/drivers/net/ethernet/microchip/vcap/vcap_api_debugfs.c index 3b8d165dc832..895bfff550d2 100644 --- a/drivers/net/ethernet/microchip/vcap/vcap_api_debugfs.c +++ b/drivers/net/ethernet/microchip/vcap/vcap_api_debugfs.c @@ -18,355 +18,15 @@ struct vcap_port_debugfs_info { struct net_device *ndev; }; -static bool vcap_bitarray_zero(int width, u8 *value) -{ - int bytes = DIV_ROUND_UP(width, BITS_PER_BYTE); - u8 total = 0, bmask = 0xff; - int rwidth = width; - int idx; - - for (idx = 0; idx < bytes; ++idx, rwidth -= BITS_PER_BYTE) { - if (rwidth && rwidth < BITS_PER_BYTE) - bmask = (1 << rwidth) - 1; - total += value[idx] & bmask; - } - return total == 0; -} - -static bool vcap_get_bit(u32 *stream, struct vcap_stream_iter *itr) -{ - u32 mask = BIT(itr->reg_bitpos); - u32 *p = &stream[itr->reg_idx]; - - return !!(*p & mask); -} - -static void vcap_decode_field(u32 *stream, struct vcap_stream_iter *itr, - int width, u8 *value) -{ - int idx; - - /* Loop over the field value bits and get the field bits and - * set them in the output value byte array - */ - for (idx = 0; idx < width; idx++) { - u8 bidx = idx & 0x7; - - /* Decode one field value bit */ - if (vcap_get_bit(stream, itr)) - *value |= 1 << bidx; - vcap_iter_next(itr); - if (bidx == 7) - value++; - } -} - -/* Verify that the typegroup bits have the correct values */ -static int vcap_verify_typegroups(u32 *stream, int sw_width, - const struct vcap_typegroup *tgt, bool mask, - int sw_max) -{ - struct vcap_stream_iter iter; - int sw_cnt, idx; - - vcap_iter_set(&iter, sw_width, tgt, 0); - sw_cnt = 0; - while (iter.tg->width) { - u32 value = 0; - u32 tg_value = iter.tg->value; - - if (mask) - tg_value = (1 << iter.tg->width) - 1; - /* Set position to current typegroup bit */ - iter.offset = iter.tg->offset; - vcap_iter_update(&iter); - for (idx = 0; idx < iter.tg->width; idx++) { - /* Decode one typegroup bit */ - if (vcap_get_bit(stream, &iter)) - value |= 1 << idx; - iter.offset++; - vcap_iter_update(&iter); - } - if (value != tg_value) - return -EINVAL; - iter.tg++; /* next typegroup */ - sw_cnt++; - /* Stop checking more typegroups */ - if (sw_max && sw_cnt >= sw_max) - break; - } - return 0; -} - -/* Find the subword width of the key typegroup that matches the stream data */ -static int vcap_find_keystream_typegroup_sw(struct vcap_control *vctrl, - enum vcap_type vt, u32 *stream, - bool mask, int sw_max) -{ - const struct vcap_typegroup **tgt; - int sw_idx, res; - - tgt = vctrl->vcaps[vt].keyfield_set_typegroups; - /* Try the longest subword match first */ - for (sw_idx = vctrl->vcaps[vt].sw_count; sw_idx >= 0; sw_idx--) { - if (!tgt[sw_idx]) - continue; - - res = vcap_verify_typegroups(stream, vctrl->vcaps[vt].sw_width, - tgt[sw_idx], mask, sw_max); - if (res == 0) - return sw_idx; - } - return -EINVAL; -} - -/* Find the subword width of the action typegroup that matches the stream data - */ -static int vcap_find_actionstream_typegroup_sw(struct vcap_control *vctrl, - enum vcap_type vt, u32 *stream, - int sw_max) -{ - const struct vcap_typegroup **tgt; - int sw_idx, res; - - tgt = vctrl->vcaps[vt].actionfield_set_typegroups; - /* Try the longest subword match first */ - for (sw_idx = vctrl->vcaps[vt].sw_count; sw_idx >= 0; sw_idx--) { - if (!tgt[sw_idx]) - continue; - res = vcap_verify_typegroups(stream, vctrl->vcaps[vt].act_width, - tgt[sw_idx], false, sw_max); - if (res == 0) - return sw_idx; - } - return -EINVAL; -} - -/* Verify that the type id in the stream matches the type id of the keyset */ -static bool vcap_verify_keystream_keyset(struct vcap_control *vctrl, - enum vcap_type vt, - u32 *keystream, - u32 *mskstream, - enum vcap_keyfield_set keyset) -{ - const struct vcap_info *vcap = &vctrl->vcaps[vt]; - const struct vcap_field *typefld; - const struct vcap_typegroup *tgt; - const struct vcap_field *fields; - struct vcap_stream_iter iter; - const struct vcap_set *info; - u32 value = 0; - u32 mask = 0; - - if (vcap_keyfield_count(vctrl, vt, keyset) == 0) - return false; - - info = vcap_keyfieldset(vctrl, vt, keyset); - /* Check that the keyset is valid */ - if (!info) - return false; - - /* a type_id of value -1 means that there is no type field */ - if (info->type_id == (u8)-1) - return true; - - /* Get a valid typegroup for the specific keyset */ - tgt = vcap_keyfield_typegroup(vctrl, vt, keyset); - if (!tgt) - return false; - - fields = vcap_keyfields(vctrl, vt, keyset); - if (!fields) - return false; - - typefld = &fields[VCAP_KF_TYPE]; - vcap_iter_init(&iter, vcap->sw_width, tgt, typefld->offset); - vcap_decode_field(mskstream, &iter, typefld->width, (u8 *)&mask); - /* no type info if there are no mask bits */ - if (vcap_bitarray_zero(typefld->width, (u8 *)&mask)) - return false; - - /* Get the value of the type field in the stream and compare to the - * one define in the vcap keyset - */ - vcap_iter_init(&iter, vcap->sw_width, tgt, typefld->offset); - vcap_decode_field(keystream, &iter, typefld->width, (u8 *)&value); - - return (value & mask) == (info->type_id & mask); -} - -/* Verify that the typegroup information, subword count, keyset and type id - * are in sync and correct, return the list of matching keysets - */ -static int -vcap_find_keystream_keysets(struct vcap_control *vctrl, - enum vcap_type vt, - u32 *keystream, - u32 *mskstream, - bool mask, int sw_max, - struct vcap_keyset_list *kslist) -{ - const struct vcap_set *keyfield_set; - int sw_count, idx; - - sw_count = vcap_find_keystream_typegroup_sw(vctrl, vt, keystream, mask, - sw_max); - if (sw_count < 0) - return sw_count; - - keyfield_set = vctrl->vcaps[vt].keyfield_set; - for (idx = 0; idx < vctrl->vcaps[vt].keyfield_set_size; ++idx) { - if (keyfield_set[idx].sw_per_item != sw_count) - continue; - - if (vcap_verify_keystream_keyset(vctrl, vt, keystream, - mskstream, idx)) - vcap_keyset_list_add(kslist, idx); - } - if (kslist->cnt > 0) - return 0; - return -EINVAL; -} - -/* Read key data from a VCAP address and discover if there is a rule keyset - * here - */ -static bool -vcap_verify_actionstream_actionset(struct vcap_control *vctrl, - enum vcap_type vt, - u32 *actionstream, - enum vcap_actionfield_set actionset) -{ - const struct vcap_typegroup *tgt; - const struct vcap_field *fields; - const struct vcap_set *info; - - if (vcap_actionfield_count(vctrl, vt, actionset) == 0) - return false; - - info = vcap_actionfieldset(vctrl, vt, actionset); - /* Check that the actionset is valid */ - if (!info) - return false; - - /* a type_id of value -1 means that there is no type field */ - if (info->type_id == (u8)-1) - return true; - - /* Get a valid typegroup for the specific actionset */ - tgt = vcap_actionfield_typegroup(vctrl, vt, actionset); - if (!tgt) - return false; - - fields = vcap_actionfields(vctrl, vt, actionset); - if (!fields) - return false; - - /* Later this will be expanded with a check of the type id */ - return true; -} - -/* Verify that the typegroup information, subword count, actionset and type id - * are in sync and correct, return the actionset - */ -static enum vcap_actionfield_set -vcap_find_actionstream_actionset(struct vcap_control *vctrl, - enum vcap_type vt, - u32 *stream, - int sw_max) -{ - const struct vcap_set *actionfield_set; - int sw_count, idx; - bool res; - - sw_count = vcap_find_actionstream_typegroup_sw(vctrl, vt, stream, - sw_max); - if (sw_count < 0) - return sw_count; - - actionfield_set = vctrl->vcaps[vt].actionfield_set; - for (idx = 0; idx < vctrl->vcaps[vt].actionfield_set_size; ++idx) { - if (actionfield_set[idx].sw_per_item != sw_count) - continue; - - res = vcap_verify_actionstream_actionset(vctrl, vt, - stream, idx); - if (res) - return idx; - } - return -EINVAL; -} - -/* Read key data from a VCAP address and discover if there are any rule keysets - * here - */ -static int vcap_addr_keysets(struct vcap_control *vctrl, - struct net_device *ndev, - struct vcap_admin *admin, - int addr, - struct vcap_keyset_list *kslist) -{ - enum vcap_type vt = admin->vtype; - int keyset_sw_regs, idx; - u32 key = 0, mask = 0; - - /* Read the cache at the specified address */ - keyset_sw_regs = DIV_ROUND_UP(vctrl->vcaps[vt].sw_width, 32); - vctrl->ops->update(ndev, admin, VCAP_CMD_READ, VCAP_SEL_ALL, addr); - vctrl->ops->cache_read(ndev, admin, VCAP_SEL_ENTRY, 0, - keyset_sw_regs); - /* Skip uninitialized key/mask entries */ - for (idx = 0; idx < keyset_sw_regs; ++idx) { - key |= ~admin->cache.keystream[idx]; - mask |= admin->cache.maskstream[idx]; - } - if (key == 0 && mask == 0) - return -EINVAL; - /* Decode and locate the keysets */ - return vcap_find_keystream_keysets(vctrl, vt, admin->cache.keystream, - admin->cache.maskstream, false, 0, - kslist); -} - -static int vcap_read_rule(struct vcap_rule_internal *ri) -{ - struct vcap_admin *admin = ri->admin; - int sw_idx, ent_idx = 0, act_idx = 0; - u32 addr = ri->addr; - - if (!ri->size || !ri->keyset_sw_regs || !ri->actionset_sw_regs) { - pr_err("%s:%d: rule is empty\n", __func__, __LINE__); - return -EINVAL; - } - vcap_erase_cache(ri); - /* Use the values in the streams to read the VCAP cache */ - for (sw_idx = 0; sw_idx < ri->size; sw_idx++, addr++) { - ri->vctrl->ops->update(ri->ndev, admin, VCAP_CMD_READ, - VCAP_SEL_ALL, addr); - ri->vctrl->ops->cache_read(ri->ndev, admin, - VCAP_SEL_ENTRY, ent_idx, - ri->keyset_sw_regs); - ri->vctrl->ops->cache_read(ri->ndev, admin, - VCAP_SEL_ACTION, act_idx, - ri->actionset_sw_regs); - if (sw_idx == 0) - ri->vctrl->ops->cache_read(ri->ndev, admin, - VCAP_SEL_COUNTER, - ri->counter_id, 0); - ent_idx += ri->keyset_sw_regs; - act_idx += ri->actionset_sw_regs; - } - return 0; -} - /* Dump the keyfields value and mask values */ static void vcap_debugfs_show_rule_keyfield(struct vcap_control *vctrl, struct vcap_output_print *out, enum vcap_key_field key, const struct vcap_field *keyfield, - u8 *value, u8 *mask) + struct vcap_client_keyfield_data *data) { bool hex = false; + u8 *value, *mask; int idx, bytes; out->prf(out->dst, " %s: W%d: ", vcap_keyfield_name(vctrl, key), @@ -374,40 +34,62 @@ static void vcap_debugfs_show_rule_keyfield(struct vcap_control *vctrl, switch (keyfield[key].type) { case VCAP_FIELD_BIT: - out->prf(out->dst, "%d/%d", value[0], mask[0]); + out->prf(out->dst, "%d/%d", data->u1.value, data->u1.mask); break; case VCAP_FIELD_U32: + value = (u8 *)(&data->u32.value); + mask = (u8 *)(&data->u32.mask); + if (key == VCAP_KF_L3_IP4_SIP || key == VCAP_KF_L3_IP4_DIP) { - out->prf(out->dst, "%pI4h/%pI4h", value, mask); + out->prf(out->dst, "%pI4h/%pI4h", &data->u32.value, + &data->u32.mask); } else if (key == VCAP_KF_ETYPE || key == VCAP_KF_IF_IGR_PORT_MASK) { hex = true; } else { u32 fmsk = (1 << keyfield[key].width) - 1; - u32 val = *(u32 *)value; - u32 msk = *(u32 *)mask; - out->prf(out->dst, "%u/%u", val & fmsk, msk & fmsk); + out->prf(out->dst, "%u/%u", data->u32.value & fmsk, + data->u32.mask & fmsk); } break; case VCAP_FIELD_U48: + value = data->u48.value; + mask = data->u48.mask; if (key == VCAP_KF_L2_SMAC || key == VCAP_KF_L2_DMAC) - out->prf(out->dst, "%pMR/%pMR", value, mask); + out->prf(out->dst, "%pMR/%pMR", data->u48.value, + data->u48.mask); else hex = true; break; case VCAP_FIELD_U56: + value = data->u56.value; + mask = data->u56.mask; + hex = true; + break; case VCAP_FIELD_U64: + value = data->u64.value; + mask = data->u64.mask; + hex = true; + break; case VCAP_FIELD_U72: + value = data->u72.value; + mask = data->u72.mask; + hex = true; + break; case VCAP_FIELD_U112: + value = data->u112.value; + mask = data->u112.mask; hex = true; break; case VCAP_FIELD_U128: if (key == VCAP_KF_L3_IP6_SIP || key == VCAP_KF_L3_IP6_DIP) { u8 nvalue[16], nmask[16]; - vcap_netbytes_copy(nvalue, value, sizeof(nvalue)); - vcap_netbytes_copy(nmask, mask, sizeof(nmask)); + vcap_netbytes_copy(nvalue, data->u128.value, + sizeof(nvalue)); + vcap_netbytes_copy(nmask, data->u128.mask, + sizeof(nmask)); out->prf(out->dst, "%pI6/%pI6", nvalue, nmask); } else { hex = true; @@ -472,19 +154,15 @@ static int vcap_debugfs_show_rule_keyset(struct vcap_rule_internal *ri, struct vcap_output_print *out) { struct vcap_control *vctrl = ri->vctrl; - struct vcap_stream_iter kiter, miter; struct vcap_admin *admin = ri->admin; enum vcap_keyfield_set keysets[10]; const struct vcap_field *keyfield; enum vcap_type vt = admin->vtype; - const struct vcap_typegroup *tgt; + struct vcap_client_keyfield *ckf; struct vcap_keyset_list matches; - enum vcap_keyfield_set keyset; - int idx, res, keyfield_count; u32 *maskstream; u32 *keystream; - u8 value[16]; - u8 mask[16]; + int res; keystream = admin->cache.keystream; maskstream = admin->cache.maskstream; @@ -498,39 +176,20 @@ static int vcap_debugfs_show_rule_keyset(struct vcap_rule_internal *ri, __func__, __LINE__, res); return -EINVAL; } - keyset = matches.keysets[0]; out->prf(out->dst, " keysets:"); - for (idx = 0; idx < matches.cnt; ++idx) + for (int idx = 0; idx < matches.cnt; ++idx) out->prf(out->dst, " %s", vcap_keyset_name(vctrl, matches.keysets[idx])); out->prf(out->dst, "\n"); out->prf(out->dst, " keyset_sw: %d\n", ri->keyset_sw); out->prf(out->dst, " keyset_sw_regs: %d\n", ri->keyset_sw_regs); - keyfield_count = vcap_keyfield_count(vctrl, vt, keyset); - keyfield = vcap_keyfields(vctrl, vt, keyset); - tgt = vcap_keyfield_typegroup(vctrl, vt, keyset); - /* Start decoding the streams */ - for (idx = 0; idx < keyfield_count; ++idx) { - if (keyfield[idx].width <= 0) - continue; - /* First get the mask */ - memset(mask, 0, DIV_ROUND_UP(keyfield[idx].width, 8)); - vcap_iter_init(&miter, vctrl->vcaps[vt].sw_width, tgt, - keyfield[idx].offset); - vcap_decode_field(maskstream, &miter, keyfield[idx].width, - mask); - /* Skip if no mask bits are set */ - if (vcap_bitarray_zero(keyfield[idx].width, mask)) - continue; - /* Get the key */ - memset(value, 0, DIV_ROUND_UP(keyfield[idx].width, 8)); - vcap_iter_init(&kiter, vctrl->vcaps[vt].sw_width, tgt, - keyfield[idx].offset); - vcap_decode_field(keystream, &kiter, keyfield[idx].width, - value); - vcap_debugfs_show_rule_keyfield(vctrl, out, idx, keyfield, - value, mask); + + list_for_each_entry(ckf, &ri->data.keyfields, ctrl.list) { + keyfield = vcap_keyfields(vctrl, admin->vtype, ri->data.keyset); + vcap_debugfs_show_rule_keyfield(vctrl, out, ckf->ctrl.key, + keyfield, &ckf->data); } + return 0; } @@ -540,48 +199,21 @@ static int vcap_debugfs_show_rule_actionset(struct vcap_rule_internal *ri, struct vcap_control *vctrl = ri->vctrl; struct vcap_admin *admin = ri->admin; const struct vcap_field *actionfield; - enum vcap_actionfield_set actionset; - enum vcap_type vt = admin->vtype; - const struct vcap_typegroup *tgt; - struct vcap_stream_iter iter; - int idx, res, actfield_count; - u32 *actstream; - u8 value[16]; - bool no_bits; - - actstream = admin->cache.actionstream; - res = vcap_find_actionstream_actionset(vctrl, vt, actstream, 0); - if (res < 0) { - pr_err("%s:%d: could not find valid actionset: %d\n", - __func__, __LINE__, res); - return -EINVAL; - } - actionset = res; + struct vcap_client_actionfield *caf; + out->prf(out->dst, " actionset: %s\n", vcap_actionset_name(vctrl, ri->data.actionset)); out->prf(out->dst, " actionset_sw: %d\n", ri->actionset_sw); out->prf(out->dst, " actionset_sw_regs: %d\n", ri->actionset_sw_regs); - actfield_count = vcap_actionfield_count(vctrl, vt, actionset); - actionfield = vcap_actionfields(vctrl, vt, actionset); - tgt = vcap_actionfield_typegroup(vctrl, vt, actionset); - /* Start decoding the stream */ - for (idx = 0; idx < actfield_count; ++idx) { - if (actionfield[idx].width <= 0) - continue; - /* Get the action */ - memset(value, 0, DIV_ROUND_UP(actionfield[idx].width, 8)); - vcap_iter_init(&iter, vctrl->vcaps[vt].act_width, tgt, - actionfield[idx].offset); - vcap_decode_field(actstream, &iter, actionfield[idx].width, - value); - /* Skip if no bits are set */ - no_bits = vcap_bitarray_zero(actionfield[idx].width, value); - if (no_bits) - continue; - /* Later the action id will also be checked */ - vcap_debugfs_show_rule_actionfield(vctrl, out, idx, actionfield, - value); + + list_for_each_entry(caf, &ri->data.actionfields, ctrl.list) { + actionfield = vcap_actionfields(vctrl, admin->vtype, + ri->data.actionset); + vcap_debugfs_show_rule_actionfield(vctrl, out, caf->ctrl.action, + actionfield, + &caf->data.u1.value); } + return 0; } @@ -632,32 +264,22 @@ static int vcap_show_admin(struct vcap_control *vctrl, struct vcap_admin *admin, struct vcap_output_print *out) { - struct vcap_rule_internal *elem, *ri; + struct vcap_rule_internal *elem; + struct vcap_rule *vrule; int ret = 0; vcap_show_admin_info(vctrl, admin, out); - mutex_lock(&admin->lock); list_for_each_entry(elem, &admin->rules, list) { - ri = vcap_dup_rule(elem); - if (IS_ERR(ri)) { - ret = PTR_ERR(ri); - goto err_unlock; + vrule = vcap_get_rule(vctrl, elem->data.id); + if (IS_ERR_OR_NULL(vrule)) { + ret = PTR_ERR(vrule); + break; } - /* Read data from VCAP */ - ret = vcap_read_rule(ri); - if (ret) - goto err_free_rule; + out->prf(out->dst, "\n"); - vcap_show_admin_rule(vctrl, admin, out, ri); - vcap_free_rule((struct vcap_rule *)ri); + vcap_show_admin_rule(vctrl, admin, out, to_intrule(vrule)); + vcap_free_rule(vrule); } - mutex_unlock(&admin->lock); - return 0; - -err_free_rule: - vcap_free_rule((struct vcap_rule *)ri); -err_unlock: - mutex_unlock(&admin->lock); return ret; } diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api_private.h b/drivers/net/ethernet/microchip/vcap/vcap_api_private.h index 9ac1b1d55f22..4fd21da97679 100644 --- a/drivers/net/ethernet/microchip/vcap/vcap_api_private.h +++ b/drivers/net/ethernet/microchip/vcap/vcap_api_private.h @@ -96,4 +96,18 @@ const char *vcap_actionset_name(struct vcap_control *vctrl, const char *vcap_actionfield_name(struct vcap_control *vctrl, enum vcap_action_field action); +/* Read key data from a VCAP address and discover if there are any rule keysets + * here + */ +int vcap_addr_keysets(struct vcap_control *vctrl, struct net_device *ndev, + struct vcap_admin *admin, int addr, + struct vcap_keyset_list *kslist); + +/* Verify that the typegroup information, subword count, keyset and type id + * are in sync and correct, return the list of matchin keysets + */ +int vcap_find_keystream_keysets(struct vcap_control *vctrl, enum vcap_type vt, + u32 *keystream, u32 *mskstream, bool mask, + int sw_max, struct vcap_keyset_list *kslist); + #endif /* __VCAP_API_PRIVATE__ */ diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index ad1277ac7f0d..2f6a048dee90 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -1363,10 +1363,11 @@ static void mana_poll_rx_cq(struct mana_cq *cq) xdp_do_flush(); } -static void mana_cq_handler(void *context, struct gdma_queue *gdma_queue) +static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue) { struct mana_cq *cq = context; u8 arm_bit; + int w; WARN_ON_ONCE(cq->gdma_cq != gdma_queue); @@ -1375,26 +1376,31 @@ static void mana_cq_handler(void *context, struct gdma_queue *gdma_queue) else mana_poll_tx_cq(cq); - if (cq->work_done < cq->budget && - napi_complete_done(&cq->napi, cq->work_done)) { + w = cq->work_done; + + if (w < cq->budget && + napi_complete_done(&cq->napi, w)) { arm_bit = SET_ARM_BIT; } else { arm_bit = 0; } mana_gd_ring_cq(gdma_queue, arm_bit); + + return w; } static int mana_poll(struct napi_struct *napi, int budget) { struct mana_cq *cq = container_of(napi, struct mana_cq, napi); + int w; cq->work_done = 0; cq->budget = budget; - mana_cq_handler(cq, cq->gdma_cq); + w = mana_cq_handler(cq, cq->gdma_cq); - return min(cq->work_done, budget); + return min(w, budget); } static void mana_schedule_napi(void *context, struct gdma_queue *gdma_queue) diff --git a/drivers/net/ethernet/netronome/nfp/ccm_mbox.c b/drivers/net/ethernet/netronome/nfp/ccm_mbox.c index 4247bca09807..aa8aba4ff7aa 100644 --- a/drivers/net/ethernet/netronome/nfp/ccm_mbox.c +++ b/drivers/net/ethernet/netronome/nfp/ccm_mbox.c @@ -503,7 +503,7 @@ nfp_ccm_mbox_msg_prepare(struct nfp_net *nn, struct sk_buff *skb, max_len = max(max_reply_size, round_up(skb->len, 4)); if (max_len > mbox_max) { nn_dp_warn(&nn->dp, - "message too big for tha mailbox: %u/%u vs %u\n", + "message too big for the mailbox: %u/%u vs %u\n", skb->len, max_reply_size, mbox_max); return -EMSGSIZE; } diff --git a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c index 2b427d8ccb2f..ccacb6ab6c39 100644 --- a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c +++ b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c @@ -282,7 +282,7 @@ netdev_tx_t nfp_nfdk_tx(struct sk_buff *skb, struct net_device *netdev) dma_len = skb_headlen(skb); if (skb_is_gso(skb)) type = NFDK_DESC_TX_TYPE_TSO; - else if (!nr_frags && dma_len < NFDK_TX_MAX_DATA_PER_HEAD) + else if (!nr_frags && dma_len <= NFDK_TX_MAX_DATA_PER_HEAD) type = NFDK_DESC_TX_TYPE_SIMPLE; else type = NFDK_DESC_TX_TYPE_GATHER; @@ -927,7 +927,7 @@ nfp_nfdk_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring, dma_len = pkt_len; dma_addr = rxbuf->dma_addr + dma_off; - if (dma_len < NFDK_TX_MAX_DATA_PER_HEAD) + if (dma_len <= NFDK_TX_MAX_DATA_PER_HEAD) type = NFDK_DESC_TX_TYPE_SIMPLE; else type = NFDK_DESC_TX_TYPE_GATHER; @@ -1325,7 +1325,7 @@ nfp_nfdk_ctrl_tx_one(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, txbuf = &tx_ring->ktxbufs[wr_idx]; dma_len = skb_headlen(skb); - if (dma_len < NFDK_TX_MAX_DATA_PER_HEAD) + if (dma_len <= NFDK_TX_MAX_DATA_PER_HEAD) type = NFDK_DESC_TX_TYPE_SIMPLE; else type = NFDK_DESC_TX_TYPE_GATHER; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index 6c83e47d8b3d..da33f09facb9 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -88,6 +88,9 @@ #define NFP_NET_FL_BATCH 16 /* Add freelist in this Batch size */ #define NFP_NET_XDP_MAX_COMPLETE 2048 /* XDP bufs to reclaim in NAPI poll */ +/* MC definitions */ +#define NFP_NET_CFG_MAC_MC_MAX 1024 /* The maximum number of MC address per port*/ + /* Offload definitions */ #define NFP_NET_N_VXLAN_PORTS (NFP_NET_CFG_VXLAN_SZ / sizeof(__be16)) @@ -476,6 +479,7 @@ struct nfp_stat_pair { * @rx_dma_off: Offset at which DMA packets (for XDP headroom) * @rx_offset: Offset in the RX buffers where packet data starts * @ctrl: Local copy of the control register/word. + * @ctrl_w1: Local copy of the control register/word1. * @fl_bufsz: Currently configured size of the freelist buffers * @xdp_prog: Installed XDP program * @tx_rings: Array of pre-allocated TX ring structures @@ -508,6 +512,7 @@ struct nfp_net_dp { u32 rx_dma_off; u32 ctrl; + u32 ctrl_w1; u32 fl_bufsz; struct bpf_prog *xdp_prog; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 682a9198fb54..2314cf55e821 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -1007,6 +1007,7 @@ static int nfp_net_set_config_and_enable(struct nfp_net *nn) new_ctrl |= NFP_NET_CFG_CTRL_RINGCFG; nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl); + nn_writel(nn, NFP_NET_CFG_CTRL_WORD1, nn->dp.ctrl_w1); err = nfp_net_reconfig(nn, update); if (err) { nfp_net_clear_config_and_disable(nn); @@ -1333,18 +1334,59 @@ err_unlock: return err; } +static int nfp_net_mc_cfg(struct net_device *netdev, const unsigned char *addr, const u32 cmd) +{ + struct nfp_net *nn = netdev_priv(netdev); + int ret; + + ret = nfp_net_mbox_lock(nn, NFP_NET_CFG_MULTICAST_SZ); + if (ret) + return ret; + + nn_writel(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_MULTICAST_MAC_HI, + get_unaligned_be32(addr)); + nn_writew(nn, nn->tlv_caps.mbox_off + NFP_NET_CFG_MULTICAST_MAC_LO, + get_unaligned_be16(addr + 4)); + + return nfp_net_mbox_reconfig_and_unlock(nn, cmd); +} + +static int nfp_net_mc_sync(struct net_device *netdev, const unsigned char *addr) +{ + struct nfp_net *nn = netdev_priv(netdev); + + if (netdev_mc_count(netdev) > NFP_NET_CFG_MAC_MC_MAX) { + nn_err(nn, "Requested number of MC addresses (%d) exceeds maximum (%d).\n", + netdev_mc_count(netdev), NFP_NET_CFG_MAC_MC_MAX); + return -EINVAL; + } + + return nfp_net_mc_cfg(netdev, addr, NFP_NET_CFG_MBOX_CMD_MULTICAST_ADD); +} + +static int nfp_net_mc_unsync(struct net_device *netdev, const unsigned char *addr) +{ + return nfp_net_mc_cfg(netdev, addr, NFP_NET_CFG_MBOX_CMD_MULTICAST_DEL); +} + static void nfp_net_set_rx_mode(struct net_device *netdev) { struct nfp_net *nn = netdev_priv(netdev); - u32 new_ctrl; + u32 new_ctrl, new_ctrl_w1; new_ctrl = nn->dp.ctrl; + new_ctrl_w1 = nn->dp.ctrl_w1; if (!netdev_mc_empty(netdev) || netdev->flags & IFF_ALLMULTI) new_ctrl |= nn->cap & NFP_NET_CFG_CTRL_L2MC; else new_ctrl &= ~NFP_NET_CFG_CTRL_L2MC; + if (netdev->flags & IFF_ALLMULTI) + new_ctrl_w1 &= ~NFP_NET_CFG_CTRL_MCAST_FILTER; + else + new_ctrl_w1 |= nn->cap_w1 & NFP_NET_CFG_CTRL_MCAST_FILTER; + if (netdev->flags & IFF_PROMISC) { if (nn->cap & NFP_NET_CFG_CTRL_PROMISC) new_ctrl |= NFP_NET_CFG_CTRL_PROMISC; @@ -1354,13 +1396,21 @@ static void nfp_net_set_rx_mode(struct net_device *netdev) new_ctrl &= ~NFP_NET_CFG_CTRL_PROMISC; } - if (new_ctrl == nn->dp.ctrl) + if ((nn->cap_w1 & NFP_NET_CFG_CTRL_MCAST_FILTER) && + __dev_mc_sync(netdev, nfp_net_mc_sync, nfp_net_mc_unsync)) + netdev_err(netdev, "Sync mc address failed\n"); + + if (new_ctrl == nn->dp.ctrl && new_ctrl_w1 == nn->dp.ctrl_w1) return; - nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl); + if (new_ctrl != nn->dp.ctrl) + nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl); + if (new_ctrl_w1 != nn->dp.ctrl_w1) + nn_writel(nn, NFP_NET_CFG_CTRL_WORD1, new_ctrl_w1); nfp_net_reconfig_post(nn, NFP_NET_CFG_UPDATE_GEN); nn->dp.ctrl = new_ctrl; + nn->dp.ctrl_w1 = new_ctrl_w1; } static void nfp_net_rss_init_itbl(struct nfp_net *nn) @@ -2092,7 +2142,7 @@ void nfp_net_info(struct nfp_net *nn) nn->fw_ver.extend, nn->fw_ver.class, nn->fw_ver.major, nn->fw_ver.minor, nn->max_mtu); - nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", + nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", nn->cap, nn->cap & NFP_NET_CFG_CTRL_PROMISC ? "PROMISC " : "", nn->cap & NFP_NET_CFG_CTRL_L2BC ? "L2BCFILT " : "", @@ -2120,6 +2170,7 @@ void nfp_net_info(struct nfp_net *nn) nn->cap & NFP_NET_CFG_CTRL_CSUM_COMPLETE ? "RXCSUM_COMPLETE " : "", nn->cap & NFP_NET_CFG_CTRL_LIVE_ADDR ? "LIVE_ADDR " : "", + nn->cap_w1 & NFP_NET_CFG_CTRL_MCAST_FILTER ? "MULTICAST_FILTER " : "", nfp_app_extra_cap(nn->app, nn)); } @@ -2548,6 +2599,9 @@ int nfp_net_init(struct nfp_net *nn) if (nn->cap & NFP_NET_CFG_CTRL_TXRWB) nn->dp.ctrl |= NFP_NET_CFG_CTRL_TXRWB; + if (nn->cap_w1 & NFP_NET_CFG_CTRL_MCAST_FILTER) + nn->dp.ctrl_w1 |= NFP_NET_CFG_CTRL_MCAST_FILTER; + /* Stash the re-configuration queue away. First odd queue in TX Bar */ nn->qcp_cfg = nn->tx_bar + NFP_QCP_QUEUE_ADDR_SZ; @@ -2555,6 +2609,7 @@ int nfp_net_init(struct nfp_net *nn) nn_writel(nn, NFP_NET_CFG_CTRL, 0); nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, 0); nn_writeq(nn, NFP_NET_CFG_RXRS_ENABLE, 0); + nn_writel(nn, NFP_NET_CFG_CTRL_WORD1, 0); err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_RING | NFP_NET_CFG_UPDATE_GEN); if (err) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h index cc11b3dc1252..51124309ae1f 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h @@ -267,6 +267,7 @@ #define NFP_NET_CFG_CTRL_WORD1 0x0098 #define NFP_NET_CFG_CTRL_PKT_TYPE (0x1 << 0) /* Pkttype offload */ #define NFP_NET_CFG_CTRL_IPSEC (0x1 << 1) /* IPsec offload */ +#define NFP_NET_CFG_CTRL_MCAST_FILTER (0x1 << 2) /* Multicast Filter */ #define NFP_NET_CFG_CAP_WORD1 0x00a4 @@ -413,6 +414,9 @@ #define NFP_NET_CFG_MBOX_CMD_PCI_DSCP_PRIOMAP_SET 5 #define NFP_NET_CFG_MBOX_CMD_TLV_CMSG 6 +#define NFP_NET_CFG_MBOX_CMD_MULTICAST_ADD 8 +#define NFP_NET_CFG_MBOX_CMD_MULTICAST_DEL 9 + /* VLAN filtering using general use mailbox * %NFP_NET_CFG_VLAN_FILTER: Base address of VLAN filter mailbox * %NFP_NET_CFG_VLAN_FILTER_VID: VLAN ID to filter @@ -424,6 +428,17 @@ #define NFP_NET_CFG_VLAN_FILTER_PROTO (NFP_NET_CFG_VLAN_FILTER + 2) #define NFP_NET_CFG_VLAN_FILTER_SZ 0x0004 +/* Multicast filtering using general use mailbox + * %NFP_NET_CFG_MULTICAST: Base address of Multicast filter mailbox + * %NFP_NET_CFG_MULTICAST_MAC_HI: High 32-bits of Multicast MAC address + * %NFP_NET_CFG_MULTICAST_MAC_LO: Low 16-bits of Multicast MAC address + * %NFP_NET_CFG_MULTICAST_SZ: Size of the Multicast filter mailbox in bytes + */ +#define NFP_NET_CFG_MULTICAST NFP_NET_CFG_MBOX_SIMPLE_VAL +#define NFP_NET_CFG_MULTICAST_MAC_HI NFP_NET_CFG_MULTICAST +#define NFP_NET_CFG_MULTICAST_MAC_LO (NFP_NET_CFG_MULTICAST + 6) +#define NFP_NET_CFG_MULTICAST_SZ 0x0006 + /* TLV capabilities * %NFP_NET_CFG_TLV_TYPE: Offset of type within the TLV * %NFP_NET_CFG_TLV_TYPE_REQUIRED: Driver must be able to parse the TLV diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index ed274f033626..e5116a86cfbc 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -200,7 +200,7 @@ static void qed_ll2b_complete_rx_packet(void *cxt, dma_unmap_single(&cdev->pdev->dev, buffer->phys_addr, cdev->ll2->rx_size, DMA_FROM_DEVICE); - skb = build_skb(buffer->data, 0); + skb = slab_build_skb(buffer->data); if (!skb) { DP_INFO(cdev, "Failed to build SKB\n"); kfree(buffer->data); diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index ec157885da13..a9dcc98b6af1 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -5283,6 +5283,8 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) dev->hw_features |= NETIF_F_RXALL; dev->hw_features |= NETIF_F_RXFCS; + netdev_sw_irq_coalesce_default_on(dev); + /* configure chip for default features */ rtl8169_set_features(dev, dev->features); diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 6bc923326268..33f723a9f471 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -841,7 +841,7 @@ static bool ravb_rx_gbeth(struct net_device *ndev, int *quota, int q) napi_gro_receive(&priv->napi[q], priv->rx_1st_skb); stats->rx_packets++; - stats->rx_bytes += priv->rx_1st_skb->len; + stats->rx_bytes += pkt_len; break; } } diff --git a/drivers/net/ethernet/sfc/efx_common.c b/drivers/net/ethernet/sfc/efx_common.c index c2224e41a694..cc30524c2fe4 100644 --- a/drivers/net/ethernet/sfc/efx_common.c +++ b/drivers/net/ethernet/sfc/efx_common.c @@ -1164,7 +1164,7 @@ static ssize_t mcdi_logging_show(struct device *dev, struct efx_nic *efx = dev_get_drvdata(dev); struct efx_mcdi_iface *mcdi = efx_mcdi(efx); - return scnprintf(buf, PAGE_SIZE, "%d\n", mcdi->logging_enabled); + return sysfs_emit(buf, "%d\n", mcdi->logging_enabled); } static ssize_t mcdi_logging_store(struct device *dev, diff --git a/drivers/net/ethernet/sfc/siena/efx_common.c b/drivers/net/ethernet/sfc/siena/efx_common.c index 1fd396b00bfb..e4b294b8e9ac 100644 --- a/drivers/net/ethernet/sfc/siena/efx_common.c +++ b/drivers/net/ethernet/sfc/siena/efx_common.c @@ -1178,7 +1178,7 @@ static ssize_t mcdi_logging_show(struct device *dev, struct efx_nic *efx = dev_get_drvdata(dev); struct efx_mcdi_iface *mcdi = efx_mcdi(efx); - return scnprintf(buf, PAGE_SIZE, "%d\n", mcdi->logging_enabled); + return sysfs_emit(buf, "%d\n", mcdi->logging_enabled); } static ssize_t mcdi_logging_store(struct device *dev, diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index 31ff35174034..f77511fe4e87 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -235,6 +235,15 @@ config DWMAC_INTEL_PLAT the stmmac device driver. This driver is used for the Intel Keem Bay SoC. +config DWMAC_TEGRA + tristate "NVIDIA Tegra MGBE support" + depends on ARCH_TEGRA || COMPILE_TEST + help + This selects the Multi-GigaBit Ethernet (MGBE) Controller that is + found on the NVIDIA Tegra SoC devices. This driver provides the glue + layer on top of the stmmac driver required for these NVIDIA Tegra SoC + devices. + config DWMAC_VISCONTI tristate "Toshiba Visconti DWMAC support" default ARCH_VISCONTI diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile index d4e12e9ace4f..057e4bab5c08 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Makefile +++ b/drivers/net/ethernet/stmicro/stmmac/Makefile @@ -31,6 +31,7 @@ obj-$(CONFIG_DWMAC_DWC_QOS_ETH) += dwmac-dwc-qos-eth.o obj-$(CONFIG_DWMAC_INTEL_PLAT) += dwmac-intel-plat.o obj-$(CONFIG_DWMAC_GENERIC) += dwmac-generic.o obj-$(CONFIG_DWMAC_IMX8) += dwmac-imx.o +obj-$(CONFIG_DWMAC_TEGRA) += dwmac-tegra.o obj-$(CONFIG_DWMAC_VISCONTI) += dwmac-visconti.o stmmac-platform-objs:= stmmac_platform.o dwmac-altr-socfpga-objs := altr_tse_pcs.o dwmac-socfpga.o diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c new file mode 100644 index 000000000000..bdf990cf2f31 --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c @@ -0,0 +1,388 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/platform_device.h> +#include <linux/of_device.h> +#include <linux/module.h> +#include <linux/stmmac.h> +#include <linux/clk.h> + +#include "stmmac_platform.h" + +static const char *const mgbe_clks[] = { + "rx-pcs", "tx", "tx-pcs", "mac-divider", "mac", "mgbe", "ptp-ref", "mac" +}; + +struct tegra_mgbe { + struct device *dev; + + struct clk_bulk_data *clks; + + struct reset_control *rst_mac; + struct reset_control *rst_pcs; + + void __iomem *hv; + void __iomem *regs; + void __iomem *xpcs; + + struct mii_bus *mii; +}; + +#define XPCS_WRAP_UPHY_RX_CONTROL 0x801c +#define XPCS_WRAP_UPHY_RX_CONTROL_RX_SW_OVRD BIT(31) +#define XPCS_WRAP_UPHY_RX_CONTROL_RX_PCS_PHY_RDY BIT(10) +#define XPCS_WRAP_UPHY_RX_CONTROL_RX_CDR_RESET BIT(9) +#define XPCS_WRAP_UPHY_RX_CONTROL_RX_CAL_EN BIT(8) +#define XPCS_WRAP_UPHY_RX_CONTROL_RX_SLEEP (BIT(7) | BIT(6)) +#define XPCS_WRAP_UPHY_RX_CONTROL_AUX_RX_IDDQ BIT(5) +#define XPCS_WRAP_UPHY_RX_CONTROL_RX_IDDQ BIT(4) +#define XPCS_WRAP_UPHY_RX_CONTROL_RX_DATA_EN BIT(0) +#define XPCS_WRAP_UPHY_HW_INIT_CTRL 0x8020 +#define XPCS_WRAP_UPHY_HW_INIT_CTRL_TX_EN BIT(0) +#define XPCS_WRAP_UPHY_HW_INIT_CTRL_RX_EN BIT(2) +#define XPCS_WRAP_UPHY_STATUS 0x8044 +#define XPCS_WRAP_UPHY_STATUS_TX_P_UP BIT(0) +#define XPCS_WRAP_IRQ_STATUS 0x8050 +#define XPCS_WRAP_IRQ_STATUS_PCS_LINK_STS BIT(6) + +#define XPCS_REG_ADDR_SHIFT 10 +#define XPCS_REG_ADDR_MASK 0x1fff +#define XPCS_ADDR 0x3fc + +#define MGBE_WRAP_COMMON_INTR_ENABLE 0x8704 +#define MAC_SBD_INTR BIT(2) +#define MGBE_WRAP_AXI_ASID0_CTRL 0x8400 +#define MGBE_SID 0x6 + +static int __maybe_unused tegra_mgbe_suspend(struct device *dev) +{ + struct tegra_mgbe *mgbe = get_stmmac_bsp_priv(dev); + int err; + + err = stmmac_suspend(dev); + if (err) + return err; + + clk_bulk_disable_unprepare(ARRAY_SIZE(mgbe_clks), mgbe->clks); + + return reset_control_assert(mgbe->rst_mac); +} + +static int __maybe_unused tegra_mgbe_resume(struct device *dev) +{ + struct tegra_mgbe *mgbe = get_stmmac_bsp_priv(dev); + u32 value; + int err; + + err = clk_bulk_prepare_enable(ARRAY_SIZE(mgbe_clks), mgbe->clks); + if (err < 0) + return err; + + err = reset_control_deassert(mgbe->rst_mac); + if (err < 0) + return err; + + /* Enable common interrupt at wrapper level */ + writel(MAC_SBD_INTR, mgbe->regs + MGBE_WRAP_COMMON_INTR_ENABLE); + + /* Program SID */ + writel(MGBE_SID, mgbe->hv + MGBE_WRAP_AXI_ASID0_CTRL); + + value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_STATUS); + if ((value & XPCS_WRAP_UPHY_STATUS_TX_P_UP) == 0) { + value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_HW_INIT_CTRL); + value |= XPCS_WRAP_UPHY_HW_INIT_CTRL_TX_EN; + writel(value, mgbe->xpcs + XPCS_WRAP_UPHY_HW_INIT_CTRL); + } + + err = readl_poll_timeout(mgbe->xpcs + XPCS_WRAP_UPHY_HW_INIT_CTRL, value, + (value & XPCS_WRAP_UPHY_HW_INIT_CTRL_TX_EN) == 0, + 500, 500 * 2000); + if (err < 0) { + dev_err(mgbe->dev, "timeout waiting for TX lane to become enabled\n"); + clk_bulk_disable_unprepare(ARRAY_SIZE(mgbe_clks), mgbe->clks); + return err; + } + + err = stmmac_resume(dev); + if (err < 0) + clk_bulk_disable_unprepare(ARRAY_SIZE(mgbe_clks), mgbe->clks); + + return err; +} + +static int mgbe_uphy_lane_bringup_serdes_up(struct net_device *ndev, void *mgbe_data) +{ + struct tegra_mgbe *mgbe = (struct tegra_mgbe *)mgbe_data; + u32 value; + int err; + + value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); + value |= XPCS_WRAP_UPHY_RX_CONTROL_RX_SW_OVRD; + writel(value, mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); + + value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); + value &= ~XPCS_WRAP_UPHY_RX_CONTROL_RX_IDDQ; + writel(value, mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); + + value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); + value &= ~XPCS_WRAP_UPHY_RX_CONTROL_AUX_RX_IDDQ; + writel(value, mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); + + value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); + value &= ~XPCS_WRAP_UPHY_RX_CONTROL_RX_SLEEP; + writel(value, mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); + + value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); + value |= XPCS_WRAP_UPHY_RX_CONTROL_RX_CAL_EN; + writel(value, mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); + + err = readl_poll_timeout(mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL, value, + (value & XPCS_WRAP_UPHY_RX_CONTROL_RX_CAL_EN) == 0, + 1000, 1000 * 2000); + if (err < 0) { + dev_err(mgbe->dev, "timeout waiting for RX calibration to become enabled\n"); + return err; + } + + value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); + value |= XPCS_WRAP_UPHY_RX_CONTROL_RX_DATA_EN; + writel(value, mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); + + value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); + value |= XPCS_WRAP_UPHY_RX_CONTROL_RX_CDR_RESET; + writel(value, mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); + + value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); + value &= ~XPCS_WRAP_UPHY_RX_CONTROL_RX_CDR_RESET; + writel(value, mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); + + value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); + value |= XPCS_WRAP_UPHY_RX_CONTROL_RX_PCS_PHY_RDY; + writel(value, mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); + + err = readl_poll_timeout(mgbe->xpcs + XPCS_WRAP_IRQ_STATUS, value, + value & XPCS_WRAP_IRQ_STATUS_PCS_LINK_STS, + 500, 500 * 2000); + if (err < 0) { + dev_err(mgbe->dev, "timeout waiting for link to become ready\n"); + return err; + } + + /* clear status */ + writel(value, mgbe->xpcs + XPCS_WRAP_IRQ_STATUS); + + return 0; +} + +static void mgbe_uphy_lane_bringup_serdes_down(struct net_device *ndev, void *mgbe_data) +{ + struct tegra_mgbe *mgbe = (struct tegra_mgbe *)mgbe_data; + u32 value; + + value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); + value |= XPCS_WRAP_UPHY_RX_CONTROL_RX_SW_OVRD; + writel(value, mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); + + value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); + value &= ~XPCS_WRAP_UPHY_RX_CONTROL_RX_DATA_EN; + writel(value, mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); + + value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); + value |= XPCS_WRAP_UPHY_RX_CONTROL_RX_SLEEP; + writel(value, mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); + + value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); + value |= XPCS_WRAP_UPHY_RX_CONTROL_AUX_RX_IDDQ; + writel(value, mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); + + value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); + value |= XPCS_WRAP_UPHY_RX_CONTROL_RX_IDDQ; + writel(value, mgbe->xpcs + XPCS_WRAP_UPHY_RX_CONTROL); +} + +static int tegra_mgbe_probe(struct platform_device *pdev) +{ + struct plat_stmmacenet_data *plat; + struct stmmac_resources res; + struct tegra_mgbe *mgbe; + int irq, err, i; + u32 value; + + mgbe = devm_kzalloc(&pdev->dev, sizeof(*mgbe), GFP_KERNEL); + if (!mgbe) + return -ENOMEM; + + mgbe->dev = &pdev->dev; + + memset(&res, 0, sizeof(res)); + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + + mgbe->hv = devm_platform_ioremap_resource_byname(pdev, "hypervisor"); + if (IS_ERR(mgbe->hv)) + return PTR_ERR(mgbe->hv); + + mgbe->regs = devm_platform_ioremap_resource_byname(pdev, "mac"); + if (IS_ERR(mgbe->regs)) + return PTR_ERR(mgbe->regs); + + mgbe->xpcs = devm_platform_ioremap_resource_byname(pdev, "xpcs"); + if (IS_ERR(mgbe->xpcs)) + return PTR_ERR(mgbe->xpcs); + + res.addr = mgbe->regs; + res.irq = irq; + + mgbe->clks = devm_kzalloc(&pdev->dev, sizeof(*mgbe->clks), GFP_KERNEL); + if (!mgbe->clks) + return -ENOMEM; + + for (i = 0; i < ARRAY_SIZE(mgbe_clks); i++) + mgbe->clks[i].id = mgbe_clks[i]; + + err = devm_clk_bulk_get(mgbe->dev, ARRAY_SIZE(mgbe_clks), mgbe->clks); + if (err < 0) + return err; + + err = clk_bulk_prepare_enable(ARRAY_SIZE(mgbe_clks), mgbe->clks); + if (err < 0) + return err; + + /* Perform MAC reset */ + mgbe->rst_mac = devm_reset_control_get(&pdev->dev, "mac"); + if (IS_ERR(mgbe->rst_mac)) { + err = PTR_ERR(mgbe->rst_mac); + goto disable_clks; + } + + err = reset_control_assert(mgbe->rst_mac); + if (err < 0) + goto disable_clks; + + usleep_range(2000, 4000); + + err = reset_control_deassert(mgbe->rst_mac); + if (err < 0) + goto disable_clks; + + /* Perform PCS reset */ + mgbe->rst_pcs = devm_reset_control_get(&pdev->dev, "pcs"); + if (IS_ERR(mgbe->rst_pcs)) { + err = PTR_ERR(mgbe->rst_pcs); + goto disable_clks; + } + + err = reset_control_assert(mgbe->rst_pcs); + if (err < 0) + goto disable_clks; + + usleep_range(2000, 4000); + + err = reset_control_deassert(mgbe->rst_pcs); + if (err < 0) + goto disable_clks; + + plat = stmmac_probe_config_dt(pdev, res.mac); + if (IS_ERR(plat)) { + err = PTR_ERR(plat); + goto disable_clks; + } + + plat->has_xgmac = 1; + plat->tso_en = 1; + plat->pmt = 1; + plat->bsp_priv = mgbe; + + if (!plat->mdio_node) + plat->mdio_node = of_get_child_by_name(pdev->dev.of_node, "mdio"); + + if (!plat->mdio_bus_data) { + plat->mdio_bus_data = devm_kzalloc(&pdev->dev, sizeof(*plat->mdio_bus_data), + GFP_KERNEL); + if (!plat->mdio_bus_data) { + err = -ENOMEM; + goto remove; + } + } + + plat->mdio_bus_data->needs_reset = true; + + value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_STATUS); + if ((value & XPCS_WRAP_UPHY_STATUS_TX_P_UP) == 0) { + value = readl(mgbe->xpcs + XPCS_WRAP_UPHY_HW_INIT_CTRL); + value |= XPCS_WRAP_UPHY_HW_INIT_CTRL_TX_EN; + writel(value, mgbe->xpcs + XPCS_WRAP_UPHY_HW_INIT_CTRL); + } + + err = readl_poll_timeout(mgbe->xpcs + XPCS_WRAP_UPHY_HW_INIT_CTRL, value, + (value & XPCS_WRAP_UPHY_HW_INIT_CTRL_TX_EN) == 0, + 500, 500 * 2000); + if (err < 0) { + dev_err(mgbe->dev, "timeout waiting for TX lane to become enabled\n"); + goto remove; + } + + plat->serdes_powerup = mgbe_uphy_lane_bringup_serdes_up; + plat->serdes_powerdown = mgbe_uphy_lane_bringup_serdes_down; + + /* Tx FIFO Size - 128KB */ + plat->tx_fifo_size = 131072; + /* Rx FIFO Size - 192KB */ + plat->rx_fifo_size = 196608; + + /* Enable common interrupt at wrapper level */ + writel(MAC_SBD_INTR, mgbe->regs + MGBE_WRAP_COMMON_INTR_ENABLE); + + /* Program SID */ + writel(MGBE_SID, mgbe->hv + MGBE_WRAP_AXI_ASID0_CTRL); + + plat->serdes_up_after_phy_linkup = 1; + + err = stmmac_dvr_probe(&pdev->dev, plat, &res); + if (err < 0) + goto remove; + + return 0; + +remove: + stmmac_remove_config_dt(pdev, plat); +disable_clks: + clk_bulk_disable_unprepare(ARRAY_SIZE(mgbe_clks), mgbe->clks); + + return err; +} + +static int tegra_mgbe_remove(struct platform_device *pdev) +{ + struct tegra_mgbe *mgbe = get_stmmac_bsp_priv(&pdev->dev); + + clk_bulk_disable_unprepare(ARRAY_SIZE(mgbe_clks), mgbe->clks); + + stmmac_pltfr_remove(pdev); + + return 0; +} + +static const struct of_device_id tegra_mgbe_match[] = { + { .compatible = "nvidia,tegra234-mgbe", }, + { } +}; +MODULE_DEVICE_TABLE(of, tegra_mgbe_match); + +static SIMPLE_DEV_PM_OPS(tegra_mgbe_pm_ops, tegra_mgbe_suspend, tegra_mgbe_resume); + +static struct platform_driver tegra_mgbe_driver = { + .probe = tegra_mgbe_probe, + .remove = tegra_mgbe_remove, + .driver = { + .name = "tegra-mgbe", + .pm = &tegra_mgbe_pm_ops, + .of_match_table = tegra_mgbe_match, + }, +}; +module_platform_driver(tegra_mgbe_driver); + +MODULE_AUTHOR("Thierry Reding <[email protected]>"); +MODULE_DESCRIPTION("NVIDIA Tegra MGBE driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 18c7ca29da2c..f36590d0c830 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -988,6 +988,9 @@ static void stmmac_mac_link_up(struct phylink_config *config, struct stmmac_priv *priv = netdev_priv(to_net_dev(config->dev)); u32 old_ctrl, ctrl; + if (priv->plat->serdes_up_after_phy_linkup && priv->plat->serdes_powerup) + priv->plat->serdes_powerup(priv->dev, priv->plat->bsp_priv); + old_ctrl = readl(priv->ioaddr + MAC_CTRL_REG); ctrl = old_ctrl & ~priv->hw->link.speed_mask; @@ -3809,7 +3812,7 @@ static int __stmmac_open(struct net_device *dev, stmmac_reset_queues_param(priv); - if (priv->plat->serdes_powerup) { + if (!priv->plat->serdes_up_after_phy_linkup && priv->plat->serdes_powerup) { ret = priv->plat->serdes_powerup(dev, priv->plat->bsp_priv); if (ret < 0) { netdev_err(priv->dev, "%s: Serdes powerup failed\n", @@ -7518,7 +7521,7 @@ int stmmac_resume(struct device *dev) stmmac_mdio_reset(priv->mii); } - if (priv->plat->serdes_powerup) { + if (!priv->plat->serdes_up_after_phy_linkup && priv->plat->serdes_powerup) { ret = priv->plat->serdes_powerup(ndev, priv->plat->bsp_priv); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 50f6b4a14be4..eb6d9cd8e93f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -108,10 +108,10 @@ static struct stmmac_axi *stmmac_axi_setup(struct platform_device *pdev) axi->axi_lpi_en = of_property_read_bool(np, "snps,lpi_en"); axi->axi_xit_frm = of_property_read_bool(np, "snps,xit_frm"); - axi->axi_kbbe = of_property_read_bool(np, "snps,axi_kbbe"); - axi->axi_fb = of_property_read_bool(np, "snps,axi_fb"); - axi->axi_mb = of_property_read_bool(np, "snps,axi_mb"); - axi->axi_rb = of_property_read_bool(np, "snps,axi_rb"); + axi->axi_kbbe = of_property_read_bool(np, "snps,kbbe"); + axi->axi_fb = of_property_read_bool(np, "snps,fb"); + axi->axi_mb = of_property_read_bool(np, "snps,mb"); + axi->axi_rb = of_property_read_bool(np, "snps,rb"); if (of_property_read_u32(np, "snps,wr_osr_lmt", &axi->axi_wr_osr_lmt)) axi->axi_wr_osr_lmt = 1; diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 51c37e99d086..de112ab3195c 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -133,11 +133,6 @@ NETIF_MSG_IFUP | NETIF_MSG_PROBE | NETIF_MSG_IFDOWN | \ NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR) -static int am65_cpsw_nuss_init_tx_chns(struct am65_cpsw_common *common); -static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common); -static void am65_cpsw_nuss_free_tx_chns(struct am65_cpsw_common *common); -static void am65_cpsw_nuss_free_rx_chns(struct am65_cpsw_common *common); - static void am65_cpsw_port_set_sl_mac(struct am65_cpsw_port *slave, const u8 *dev_addr) { @@ -379,20 +374,6 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common) if (common->usage_count) return 0; - /* init tx/rx channels */ - ret = am65_cpsw_nuss_init_tx_chns(common); - if (ret) { - dev_err(common->dev, "init_tx_chns failed\n"); - return ret; - } - - ret = am65_cpsw_nuss_init_rx_chns(common); - if (ret) { - dev_err(common->dev, "init_rx_chns failed\n"); - am65_cpsw_nuss_free_tx_chns(common); - return ret; - } - /* Control register */ writel(AM65_CPSW_CTL_P0_ENABLE | AM65_CPSW_CTL_P0_TX_CRC_REMOVE | AM65_CPSW_CTL_VLAN_AWARE | AM65_CPSW_CTL_P0_RX_PAD, @@ -421,7 +402,6 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common) /* disable priority elevation */ writel(0, common->cpsw_base + AM65_CPSW_REG_PTYPE); - cpsw_ale_control_set(common->ale, 0, ALE_CLEAR, 1); cpsw_ale_start(common->ale); /* limit to one RX flow only */ @@ -453,8 +433,7 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common) GFP_KERNEL); if (!skb) { dev_err(common->dev, "cannot allocate skb\n"); - ret = -ENOMEM; - goto err; + return -ENOMEM; } ret = am65_cpsw_nuss_rx_push(common, skb); @@ -463,7 +442,7 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common) "cannot submit skb to channel rx, error %d\n", ret); kfree_skb(skb); - goto err; + return ret; } kmemleak_not_leak(skb); } @@ -472,7 +451,7 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common) for (i = 0; i < common->tx_ch_num; i++) { ret = k3_udma_glue_enable_tx_chn(common->tx_chns[i].tx_chn); if (ret) - goto err; + return ret; napi_enable(&common->tx_chns[i].napi_tx); } @@ -484,12 +463,6 @@ static int am65_cpsw_nuss_common_open(struct am65_cpsw_common *common) dev_dbg(common->dev, "cpsw_nuss started\n"); return 0; - -err: - am65_cpsw_nuss_free_tx_chns(common); - am65_cpsw_nuss_free_rx_chns(common); - - return ret; } static void am65_cpsw_nuss_tx_cleanup(void *data, dma_addr_t desc_dma); @@ -543,9 +516,6 @@ static int am65_cpsw_nuss_common_stop(struct am65_cpsw_common *common) writel(0, common->cpsw_base + AM65_CPSW_REG_CTL); writel(0, common->cpsw_base + AM65_CPSW_REG_STAT_PORT_EN); - am65_cpsw_nuss_free_tx_chns(common); - am65_cpsw_nuss_free_rx_chns(common); - dev_dbg(common->dev, "cpsw_nuss stopped\n"); return 0; } @@ -587,7 +557,6 @@ static int am65_cpsw_nuss_ndo_slave_open(struct net_device *ndev) struct am65_cpsw_port *port = am65_ndev_to_port(ndev); int ret, i; u32 reg; - int tmo; ret = pm_runtime_resume_and_get(common->dev); if (ret < 0) @@ -595,19 +564,17 @@ static int am65_cpsw_nuss_ndo_slave_open(struct net_device *ndev) /* Idle MAC port */ cpsw_sl_ctl_set(port->slave.mac_sl, CPSW_SL_CTL_CMD_IDLE); - - tmo = cpsw_sl_wait_for_idle(port->slave.mac_sl, 100); - dev_info(common->dev, "down msc_sl %08x tmo %d\n", - cpsw_sl_reg_read(port->slave.mac_sl, CPSW_SL_MACSTATUS), tmo); - + cpsw_sl_wait_for_idle(port->slave.mac_sl, 100); cpsw_sl_ctl_reset(port->slave.mac_sl); /* soft reset MAC */ cpsw_sl_reg_write(port->slave.mac_sl, CPSW_SL_SOFT_RESET, 1); mdelay(1); reg = cpsw_sl_reg_read(port->slave.mac_sl, CPSW_SL_SOFT_RESET); - if (reg) - dev_info(common->dev, "mac reset not yet done\n"); + if (reg) { + dev_err(common->dev, "soft RESET didn't complete\n"); + return -ETIMEDOUT; + } /* Notify the stack of the actual queue counts. */ ret = netif_set_real_num_tx_queues(ndev, common->tx_ch_num); @@ -1495,7 +1462,7 @@ static void am65_cpsw_nuss_mac_link_up(struct phylink_config *config, struct phy if (speed == SPEED_1000) mac_control |= CPSW_SL_CTL_GIG; - if (speed == SPEED_10 && interface == PHY_INTERFACE_MODE_RGMII) + if (speed == SPEED_10 && phy_interface_mode_is_rgmii(interface)) /* Can be used with in band mode only */ mac_control |= CPSW_SL_CTL_EXT_EN; if (speed == SPEED_100 && interface == PHY_INTERFACE_MODE_RMII) @@ -1539,9 +1506,9 @@ static void am65_cpsw_nuss_slave_disable_unused(struct am65_cpsw_port *port) cpsw_sl_ctl_reset(port->slave.mac_sl); } -static void am65_cpsw_nuss_free_tx_chns(struct am65_cpsw_common *common) +static void am65_cpsw_nuss_free_tx_chns(void *data) { - struct device *dev = common->dev; + struct am65_cpsw_common *common = data; int i; for (i = 0; i < common->tx_ch_num; i++) { @@ -1553,11 +1520,7 @@ static void am65_cpsw_nuss_free_tx_chns(struct am65_cpsw_common *common) if (!IS_ERR_OR_NULL(tx_chn->tx_chn)) k3_udma_glue_release_tx_chn(tx_chn->tx_chn); - /* Don't clear tx_chn memory as we need to preserve - * data between suspend/resume - */ - if (!(tx_chn->irq < 0)) - devm_free_irq(dev, tx_chn->irq, tx_chn); + memset(tx_chn, 0, sizeof(*tx_chn)); } } @@ -1566,10 +1529,12 @@ void am65_cpsw_nuss_remove_tx_chns(struct am65_cpsw_common *common) struct device *dev = common->dev; int i; + devm_remove_action(dev, am65_cpsw_nuss_free_tx_chns, common); + for (i = 0; i < common->tx_ch_num; i++) { struct am65_cpsw_tx_chn *tx_chn = &common->tx_chns[i]; - if (!(tx_chn->irq < 0)) + if (tx_chn->irq) devm_free_irq(dev, tx_chn->irq, tx_chn); netif_napi_del(&tx_chn->napi_tx); @@ -1584,6 +1549,32 @@ void am65_cpsw_nuss_remove_tx_chns(struct am65_cpsw_common *common) } } +static int am65_cpsw_nuss_ndev_add_tx_napi(struct am65_cpsw_common *common) +{ + struct device *dev = common->dev; + int i, ret = 0; + + for (i = 0; i < common->tx_ch_num; i++) { + struct am65_cpsw_tx_chn *tx_chn = &common->tx_chns[i]; + + netif_napi_add_tx(common->dma_ndev, &tx_chn->napi_tx, + am65_cpsw_nuss_tx_poll); + + ret = devm_request_irq(dev, tx_chn->irq, + am65_cpsw_nuss_tx_irq, + IRQF_TRIGGER_HIGH, + tx_chn->tx_chn_name, tx_chn); + if (ret) { + dev_err(dev, "failure requesting tx%u irq %u, %d\n", + tx_chn->id, tx_chn->irq, ret); + goto err; + } + } + +err: + return ret; +} + static int am65_cpsw_nuss_init_tx_chns(struct am65_cpsw_common *common) { u32 max_desc_num = ALIGN(AM65_CPSW_MAX_TX_DESC, MAX_SKB_FRAGS); @@ -1639,7 +1630,7 @@ static int am65_cpsw_nuss_init_tx_chns(struct am65_cpsw_common *common) } tx_chn->irq = k3_udma_glue_tx_get_irq(tx_chn->tx_chn); - if (tx_chn->irq < 0) { + if (tx_chn->irq <= 0) { dev_err(dev, "Failed to get tx dma irq %d\n", tx_chn->irq); goto err; @@ -1648,41 +1639,59 @@ static int am65_cpsw_nuss_init_tx_chns(struct am65_cpsw_common *common) snprintf(tx_chn->tx_chn_name, sizeof(tx_chn->tx_chn_name), "%s-tx%d", dev_name(dev), tx_chn->id); - - ret = devm_request_irq(dev, tx_chn->irq, - am65_cpsw_nuss_tx_irq, - IRQF_TRIGGER_HIGH, - tx_chn->tx_chn_name, tx_chn); - if (ret) { - dev_err(dev, "failure requesting tx%u irq %u, %d\n", - tx_chn->id, tx_chn->irq, ret); - tx_chn->irq = -EINVAL; - goto err; - } } - return 0; + ret = am65_cpsw_nuss_ndev_add_tx_napi(common); + if (ret) { + dev_err(dev, "Failed to add tx NAPI %d\n", ret); + goto err; + } err: - am65_cpsw_nuss_free_tx_chns(common); + i = devm_add_action(dev, am65_cpsw_nuss_free_tx_chns, common); + if (i) { + dev_err(dev, "Failed to add free_tx_chns action %d\n", i); + return i; + } return ret; } -static void am65_cpsw_nuss_free_rx_chns(struct am65_cpsw_common *common) +static void am65_cpsw_nuss_free_rx_chns(void *data) { + struct am65_cpsw_common *common = data; struct am65_cpsw_rx_chn *rx_chn; rx_chn = &common->rx_chns; + if (!IS_ERR_OR_NULL(rx_chn->desc_pool)) + k3_cppi_desc_pool_destroy(rx_chn->desc_pool); + + if (!IS_ERR_OR_NULL(rx_chn->rx_chn)) + k3_udma_glue_release_rx_chn(rx_chn->rx_chn); +} + +static void am65_cpsw_nuss_remove_rx_chns(void *data) +{ + struct am65_cpsw_common *common = data; + struct am65_cpsw_rx_chn *rx_chn; + struct device *dev = common->dev; + + rx_chn = &common->rx_chns; + devm_remove_action(dev, am65_cpsw_nuss_free_rx_chns, common); + if (!(rx_chn->irq < 0)) - devm_free_irq(common->dev, rx_chn->irq, common); + devm_free_irq(dev, rx_chn->irq, common); + + netif_napi_del(&common->napi_rx); if (!IS_ERR_OR_NULL(rx_chn->desc_pool)) k3_cppi_desc_pool_destroy(rx_chn->desc_pool); if (!IS_ERR_OR_NULL(rx_chn->rx_chn)) k3_udma_glue_release_rx_chn(rx_chn->rx_chn); + + common->rx_flow_id_base = -1; } static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common) @@ -1700,7 +1709,7 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common) rx_cfg.swdata_size = AM65_CPSW_NAV_SW_DATA_SIZE; rx_cfg.flow_id_num = AM65_CPSW_MAX_RX_FLOWS; - rx_cfg.flow_id_base = -1; + rx_cfg.flow_id_base = common->rx_flow_id_base; /* init all flows */ rx_chn->dev = dev; @@ -1772,20 +1781,24 @@ static int am65_cpsw_nuss_init_rx_chns(struct am65_cpsw_common *common) } } + netif_napi_add(common->dma_ndev, &common->napi_rx, + am65_cpsw_nuss_rx_poll); + ret = devm_request_irq(dev, rx_chn->irq, am65_cpsw_nuss_rx_irq, IRQF_TRIGGER_HIGH, dev_name(dev), common); if (ret) { dev_err(dev, "failure requesting rx irq %u, %d\n", rx_chn->irq, ret); - rx_chn->irq = -EINVAL; goto err; } - return 0; - err: - am65_cpsw_nuss_free_rx_chns(common); + i = devm_add_action(dev, am65_cpsw_nuss_free_rx_chns, common); + if (i) { + dev_err(dev, "Failed to add free_rx_chns action %d\n", i); + return i; + } return ret; } @@ -2105,26 +2118,9 @@ static int am65_cpsw_nuss_init_ndevs(struct am65_cpsw_common *common) return ret; } - netif_napi_add(common->dma_ndev, &common->napi_rx, - am65_cpsw_nuss_rx_poll); - return ret; } -static int am65_cpsw_nuss_ndev_add_tx_napi(struct am65_cpsw_common *common) -{ - int i; - - for (i = 0; i < common->tx_ch_num; i++) { - struct am65_cpsw_tx_chn *tx_chn = &common->tx_chns[i]; - - netif_napi_add_tx(common->dma_ndev, &tx_chn->napi_tx, - am65_cpsw_nuss_tx_poll); - } - - return 0; -} - static void am65_cpsw_nuss_cleanup_ndev(struct am65_cpsw_common *common) { struct am65_cpsw_port *port; @@ -2587,7 +2583,11 @@ static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common) struct am65_cpsw_port *port; int ret = 0, i; - ret = am65_cpsw_nuss_ndev_add_tx_napi(common); + /* init tx channels */ + ret = am65_cpsw_nuss_init_tx_chns(common); + if (ret) + return ret; + ret = am65_cpsw_nuss_init_rx_chns(common); if (ret) return ret; @@ -2634,10 +2634,8 @@ int am65_cpsw_nuss_update_tx_chns(struct am65_cpsw_common *common, int num_tx) common->tx_ch_num = num_tx; ret = am65_cpsw_nuss_init_tx_chns(common); - if (ret) - return ret; - return am65_cpsw_nuss_ndev_add_tx_napi(common); + return ret; } struct am65_cpsw_soc_pdata { @@ -2745,6 +2743,7 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev) if (common->port_num < 1 || common->port_num > AM65_CPSW_MAX_PORTS) return -ENOENT; + common->rx_flow_id_base = -1; init_completion(&common->tdown_complete); common->tx_ch_num = 1; common->pf_p0_rx_ptype_rrobin = false; @@ -2878,10 +2877,10 @@ static int am65_cpsw_nuss_remove(struct platform_device *pdev) static int am65_cpsw_nuss_suspend(struct device *dev) { struct am65_cpsw_common *common = dev_get_drvdata(dev); + struct am65_cpsw_host *host_p = am65_common_get_host(common); struct am65_cpsw_port *port; struct net_device *ndev; int i, ret; - struct am65_cpsw_host *host_p = am65_common_get_host(common); cpsw_ale_dump(common->ale, common->ale_context); host_p->vid_context = readl(host_p->port_base + AM65_CPSW_PORT_VLAN_REG_OFFSET); @@ -2907,6 +2906,9 @@ static int am65_cpsw_nuss_suspend(struct device *dev) am65_cpts_suspend(common->cpts); + am65_cpsw_nuss_remove_rx_chns(common); + am65_cpsw_nuss_remove_tx_chns(common); + return 0; } @@ -2918,6 +2920,17 @@ static int am65_cpsw_nuss_resume(struct device *dev) int i, ret; struct am65_cpsw_host *host_p = am65_common_get_host(common); + ret = am65_cpsw_nuss_init_tx_chns(common); + if (ret) + return ret; + ret = am65_cpsw_nuss_init_rx_chns(common); + if (ret) + return ret; + + /* If RX IRQ was disabled before suspend, keep it disabled */ + if (common->rx_irq_disabled) + disable_irq(common->rx_chns.irq); + am65_cpts_resume(common->cpts); for (i = 0; i < common->port_num; i++) { diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c index 450b16ad40a4..e1a569b99e4a 100644 --- a/drivers/net/ieee802154/ca8210.c +++ b/drivers/net/ieee802154/ca8210.c @@ -885,7 +885,7 @@ static int ca8210_spi_transfer( dev_dbg(&spi->dev, "%s called\n", __func__); - cas_ctl = kmalloc(sizeof(*cas_ctl), GFP_ATOMIC); + cas_ctl = kzalloc(sizeof(*cas_ctl), GFP_ATOMIC); if (!cas_ctl) return -ENOMEM; diff --git a/drivers/net/ieee802154/cc2520.c b/drivers/net/ieee802154/cc2520.c index c69b87d3837d..edc769daad07 100644 --- a/drivers/net/ieee802154/cc2520.c +++ b/drivers/net/ieee802154/cc2520.c @@ -970,7 +970,7 @@ static int cc2520_hw_init(struct cc2520_private *priv) if (timeout-- <= 0) { dev_err(&priv->spi->dev, "oscillator start failed!\n"); - return ret; + return -ETIMEDOUT; } udelay(1); } while (!(status & CC2520_STATUS_XOSC32M_STABLE)); diff --git a/drivers/net/ipa/ipa_sysfs.c b/drivers/net/ipa/ipa_sysfs.c index 5cbc15a971f9..14bd2f903045 100644 --- a/drivers/net/ipa/ipa_sysfs.c +++ b/drivers/net/ipa/ipa_sysfs.c @@ -46,7 +46,7 @@ version_show(struct device *dev, struct device_attribute *attr, char *buf) { struct ipa *ipa = dev_get_drvdata(dev); - return scnprintf(buf, PAGE_SIZE, "%s\n", ipa_version_string(ipa)); + return sysfs_emit(buf, "%s\n", ipa_version_string(ipa)); } static DEVICE_ATTR_RO(version); @@ -70,7 +70,7 @@ static ssize_t rx_offload_show(struct device *dev, { struct ipa *ipa = dev_get_drvdata(dev); - return scnprintf(buf, PAGE_SIZE, "%s\n", ipa_offload_string(ipa)); + return sysfs_emit(buf, "%s\n", ipa_offload_string(ipa)); } static DEVICE_ATTR_RO(rx_offload); @@ -80,7 +80,7 @@ static ssize_t tx_offload_show(struct device *dev, { struct ipa *ipa = dev_get_drvdata(dev); - return scnprintf(buf, PAGE_SIZE, "%s\n", ipa_offload_string(ipa)); + return sysfs_emit(buf, "%s\n", ipa_offload_string(ipa)); } static DEVICE_ATTR_RO(tx_offload); diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index d73b9d535b7a..937f5b1f04ff 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -3698,6 +3698,7 @@ static const struct nla_policy macsec_rtnl_policy[IFLA_MACSEC_MAX + 1] = { [IFLA_MACSEC_SCB] = { .type = NLA_U8 }, [IFLA_MACSEC_REPLAY_PROTECT] = { .type = NLA_U8 }, [IFLA_MACSEC_VALIDATION] = { .type = NLA_U8 }, + [IFLA_MACSEC_OFFLOAD] = { .type = NLA_U8 }, }; static void macsec_free_netdev(struct net_device *dev) diff --git a/drivers/net/mdio/fwnode_mdio.c b/drivers/net/mdio/fwnode_mdio.c index eb344f6d4a7b..b782c35c4ac1 100644 --- a/drivers/net/mdio/fwnode_mdio.c +++ b/drivers/net/mdio/fwnode_mdio.c @@ -98,6 +98,7 @@ int fwnode_mdiobus_phy_device_register(struct mii_bus *mdio, */ rc = phy_device_register(phy); if (rc) { + device_set_node(&phy->mdio.dev, NULL); fwnode_handle_put(child); return rc; } @@ -153,7 +154,8 @@ int fwnode_mdiobus_register_phy(struct mii_bus *bus, /* All data is now stored in the phy struct, so register it */ rc = phy_device_register(phy); if (rc) { - fwnode_handle_put(phy->mdio.dev.fwnode); + phy->mdio.dev.fwnode = NULL; + fwnode_handle_put(child); goto clean_phy; } } else if (is_of_node(child)) { diff --git a/drivers/net/mdio/of_mdio.c b/drivers/net/mdio/of_mdio.c index 796e9c7857d0..510822d6d0d9 100644 --- a/drivers/net/mdio/of_mdio.c +++ b/drivers/net/mdio/of_mdio.c @@ -68,8 +68,9 @@ static int of_mdiobus_register_device(struct mii_bus *mdio, /* All data is now stored in the mdiodev struct; register it. */ rc = mdio_device_register(mdiodev); if (rc) { + device_set_node(&mdiodev->dev, NULL); + fwnode_handle_put(fwnode); mdio_device_free(mdiodev); - of_node_put(child); return rc; } diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index af00cf44cd97..1327290decab 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -47,7 +47,6 @@ config LED_TRIGGER_PHY config FIXED_PHY tristate "MDIO Bus/PHY emulation with fixed speed/link PHYs" - depends on PHYLIB select SWPHY help Adds the platform "fixed" MDIO Bus to cover the boards that use @@ -112,7 +111,6 @@ config BROADCOM_PHY config BCM54140_PHY tristate "Broadcom BCM54140 PHY" - depends on PHYLIB depends on HWMON || HWMON=n select BCM_NET_PHYLIB help @@ -137,7 +135,6 @@ config BCM7XXX_PHY config BCM84881_PHY tristate "Broadcom BCM84881 PHY" - depends on PHYLIB help Support the Broadcom BCM84881 PHY. diff --git a/drivers/net/phy/mdio_device.c b/drivers/net/phy/mdio_device.c index 250742ffdfd9..044828d081d2 100644 --- a/drivers/net/phy/mdio_device.c +++ b/drivers/net/phy/mdio_device.c @@ -21,6 +21,7 @@ #include <linux/slab.h> #include <linux/string.h> #include <linux/unistd.h> +#include <linux/property.h> void mdio_device_free(struct mdio_device *mdiodev) { @@ -30,6 +31,7 @@ EXPORT_SYMBOL(mdio_device_free); static void mdio_device_release(struct device *dev) { + fwnode_handle_put(dev->fwnode); kfree(to_mdio_device(dev)); } diff --git a/drivers/net/phy/mxl-gpy.c b/drivers/net/phy/mxl-gpy.c index 27c0f161623e..147d7a5a9b35 100644 --- a/drivers/net/phy/mxl-gpy.c +++ b/drivers/net/phy/mxl-gpy.c @@ -9,6 +9,7 @@ #include <linux/module.h> #include <linux/bitfield.h> #include <linux/hwmon.h> +#include <linux/mutex.h> #include <linux/phy.h> #include <linux/polynomial.h> #include <linux/netdevice.h> @@ -78,8 +79,16 @@ VSPEC1_SGMII_CTRL_ANRS) /* Temperature sensor */ -#define VPSPEC1_TEMP_STA 0x0E -#define VPSPEC1_TEMP_STA_DATA GENMASK(9, 0) +#define VSPEC1_TEMP_STA 0x0E +#define VSPEC1_TEMP_STA_DATA GENMASK(9, 0) + +/* Mailbox */ +#define VSPEC1_MBOX_DATA 0x5 +#define VSPEC1_MBOX_ADDRLO 0x6 +#define VSPEC1_MBOX_CMD 0x7 +#define VSPEC1_MBOX_CMD_ADDRHI GENMASK(7, 0) +#define VSPEC1_MBOX_CMD_RD (0 << 8) +#define VSPEC1_MBOX_CMD_READY BIT(15) /* WoL */ #define VPSPEC2_WOL_CTL 0x0E06 @@ -88,7 +97,13 @@ #define VPSPEC2_WOL_AD45 0x0E0A #define WOL_EN BIT(0) +/* Internal registers, access via mbox */ +#define REG_GPIO0_OUT 0xd3ce00 + struct gpy_priv { + /* serialize mailbox acesses */ + struct mutex mbox_lock; + u8 fw_major; u8 fw_minor; }; @@ -140,14 +155,14 @@ static int gpy_hwmon_read(struct device *dev, struct phy_device *phydev = dev_get_drvdata(dev); int ret; - ret = phy_read_mmd(phydev, MDIO_MMD_VEND1, VPSPEC1_TEMP_STA); + ret = phy_read_mmd(phydev, MDIO_MMD_VEND1, VSPEC1_TEMP_STA); if (ret < 0) return ret; if (!ret) return -ENODATA; *value = polynomial_calc(&poly_N_to_temp, - FIELD_GET(VPSPEC1_TEMP_STA_DATA, ret)); + FIELD_GET(VSPEC1_TEMP_STA_DATA, ret)); return 0; } @@ -198,6 +213,45 @@ static int gpy_hwmon_register(struct phy_device *phydev) } #endif +static int gpy_mbox_read(struct phy_device *phydev, u32 addr) +{ + struct gpy_priv *priv = phydev->priv; + int val, ret; + u16 cmd; + + mutex_lock(&priv->mbox_lock); + + ret = phy_write_mmd(phydev, MDIO_MMD_VEND1, VSPEC1_MBOX_ADDRLO, + addr); + if (ret) + goto out; + + cmd = VSPEC1_MBOX_CMD_RD; + cmd |= FIELD_PREP(VSPEC1_MBOX_CMD_ADDRHI, addr >> 16); + + ret = phy_write_mmd(phydev, MDIO_MMD_VEND1, VSPEC1_MBOX_CMD, cmd); + if (ret) + goto out; + + /* The mbox read is used in the interrupt workaround. It was observed + * that a read might take up to 2.5ms. This is also the time for which + * the interrupt line is stuck low. To be on the safe side, poll the + * ready bit for 10ms. + */ + ret = phy_read_mmd_poll_timeout(phydev, MDIO_MMD_VEND1, + VSPEC1_MBOX_CMD, val, + (val & VSPEC1_MBOX_CMD_READY), + 500, 10000, false); + if (ret) + goto out; + + ret = phy_read_mmd(phydev, MDIO_MMD_VEND1, VSPEC1_MBOX_DATA); + +out: + mutex_unlock(&priv->mbox_lock); + return ret; +} + static int gpy_config_init(struct phy_device *phydev) { int ret; @@ -212,6 +266,13 @@ static int gpy_config_init(struct phy_device *phydev) return ret < 0 ? ret : 0; } +static bool gpy_has_broken_mdint(struct phy_device *phydev) +{ + /* At least these PHYs are known to have broken interrupt handling */ + return phydev->drv->phy_id == PHY_ID_GPY215B || + phydev->drv->phy_id == PHY_ID_GPY215C; +} + static int gpy_probe(struct phy_device *phydev) { struct device *dev = &phydev->mdio.dev; @@ -229,6 +290,7 @@ static int gpy_probe(struct phy_device *phydev) if (!priv) return -ENOMEM; phydev->priv = priv; + mutex_init(&priv->mbox_lock); fw_version = phy_read(phydev, PHY_FWV); if (fw_version < 0) @@ -574,6 +636,29 @@ static irqreturn_t gpy_handle_interrupt(struct phy_device *phydev) if (!(reg & PHY_IMASK_MASK)) return IRQ_NONE; + /* The PHY might leave the interrupt line asserted even after PHY_ISTAT + * is read. To avoid interrupt storms, delay the interrupt handling as + * long as the PHY drives the interrupt line. An internal bus read will + * stall as long as the interrupt line is asserted, thus just read a + * random register here. + * Because we cannot access the internal bus at all while the interrupt + * is driven by the PHY, there is no way to make the interrupt line + * unstuck (e.g. by changing the pinmux to GPIO input) during that time + * frame. Therefore, polling is the best we can do and won't do any more + * harm. + * It was observed that this bug happens on link state and link speed + * changes on a GPY215B and GYP215C independent of the firmware version + * (which doesn't mean that this list is exhaustive). + */ + if (gpy_has_broken_mdint(phydev) && + (reg & (PHY_IMASK_LSTC | PHY_IMASK_LSPC))) { + reg = gpy_mbox_read(phydev, REG_GPIO0_OUT); + if (reg < 0) { + phy_error(phydev); + return IRQ_NONE; + } + } + phy_trigger_machine(phydev); return IRQ_HANDLED; diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 39fd1811375c..83b99d95b278 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -2642,10 +2642,46 @@ static void sfp_cleanup(void *data) kfree(sfp); } +static int sfp_i2c_get(struct sfp *sfp) +{ + struct acpi_handle *acpi_handle; + struct fwnode_handle *h; + struct i2c_adapter *i2c; + struct device_node *np; + int err; + + h = fwnode_find_reference(dev_fwnode(sfp->dev), "i2c-bus", 0); + if (IS_ERR(h)) { + dev_err(sfp->dev, "missing 'i2c-bus' property\n"); + return -ENODEV; + } + + if (is_acpi_device_node(h)) { + acpi_handle = ACPI_HANDLE_FWNODE(h); + i2c = i2c_acpi_find_adapter_by_handle(acpi_handle); + } else if ((np = to_of_node(h)) != NULL) { + i2c = of_find_i2c_adapter_by_node(np); + } else { + err = -EINVAL; + goto put; + } + + if (!i2c) { + err = -EPROBE_DEFER; + goto put; + } + + err = sfp_i2c_configure(sfp, i2c); + if (err) + i2c_put_adapter(i2c); +put: + fwnode_handle_put(h); + return err; +} + static int sfp_probe(struct platform_device *pdev) { const struct sff_data *sff; - struct i2c_adapter *i2c; char *sfp_irq_name; struct sfp *sfp; int err, i; @@ -2663,51 +2699,20 @@ static int sfp_probe(struct platform_device *pdev) sff = sfp->type = &sfp_data; if (pdev->dev.of_node) { - struct device_node *node = pdev->dev.of_node; const struct of_device_id *id; - struct device_node *np; - id = of_match_node(sfp_of_match, node); + id = of_match_node(sfp_of_match, pdev->dev.of_node); if (WARN_ON(!id)) return -EINVAL; sff = sfp->type = id->data; - - np = of_parse_phandle(node, "i2c-bus", 0); - if (!np) { - dev_err(sfp->dev, "missing 'i2c-bus' property\n"); - return -ENODEV; - } - - i2c = of_find_i2c_adapter_by_node(np); - of_node_put(np); - } else if (has_acpi_companion(&pdev->dev)) { - struct acpi_device *adev = ACPI_COMPANION(&pdev->dev); - struct fwnode_handle *fw = acpi_fwnode_handle(adev); - struct fwnode_reference_args args; - struct acpi_handle *acpi_handle; - int ret; - - ret = acpi_node_get_property_reference(fw, "i2c-bus", 0, &args); - if (ret || !is_acpi_device_node(args.fwnode)) { - dev_err(&pdev->dev, "missing 'i2c-bus' property\n"); - return -ENODEV; - } - - acpi_handle = ACPI_HANDLE_FWNODE(args.fwnode); - i2c = i2c_acpi_find_adapter_by_handle(acpi_handle); - } else { + } else if (!has_acpi_companion(&pdev->dev)) { return -EINVAL; } - if (!i2c) - return -EPROBE_DEFER; - - err = sfp_i2c_configure(sfp, i2c); - if (err < 0) { - i2c_put_adapter(i2c); + err = sfp_i2c_get(sfp); + if (err) return err; - } for (i = 0; i < GPIO_MAX; i++) if (sff->gpios & BIT(i)) { diff --git a/drivers/net/plip/plip.c b/drivers/net/plip/plip.c index c8791e9b451d..40ce8abe6999 100644 --- a/drivers/net/plip/plip.c +++ b/drivers/net/plip/plip.c @@ -450,12 +450,12 @@ plip_bh_timeout_error(struct net_device *dev, struct net_local *nl, } rcv->state = PLIP_PK_DONE; if (rcv->skb) { - kfree_skb(rcv->skb); + dev_kfree_skb_irq(rcv->skb); rcv->skb = NULL; } snd->state = PLIP_PK_DONE; if (snd->skb) { - dev_kfree_skb(snd->skb); + dev_consume_skb_irq(snd->skb); snd->skb = NULL; } spin_unlock_irq(&nl->lock); diff --git a/drivers/net/thunderbolt.c b/drivers/net/thunderbolt.c index 4ed7f5b547e3..990484776f2d 100644 --- a/drivers/net/thunderbolt.c +++ b/drivers/net/thunderbolt.c @@ -914,6 +914,7 @@ static int tbnet_open(struct net_device *dev) eof_mask, tbnet_start_poll, net); if (!ring) { netdev_err(dev, "failed to allocate Rx ring\n"); + tb_xdomain_release_out_hopid(xd, hopid); tb_ring_free(net->tx_ring.ring); net->tx_ring.ring = NULL; return -ENOMEM; diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index 0fe3773c5bca..743cbf5d662c 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -1350,6 +1350,20 @@ static const struct driver_info ax88772b_info = { .data = FLAG_EEPROM_MAC, }; +static const struct driver_info lxausb_t1l_info = { + .description = "Linux Automation GmbH USB 10Base-T1L", + .bind = ax88772_bind, + .unbind = ax88772_unbind, + .status = asix_status, + .reset = ax88772_reset, + .stop = ax88772_stop, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_LINK_INTR | + FLAG_MULTI_PACKET, + .rx_fixup = asix_rx_fixup_common, + .tx_fixup = asix_tx_fixup, + .data = FLAG_EEPROM_MAC, +}; + static const struct driver_info ax88178_info = { .description = "ASIX AX88178 USB 2.0 Ethernet", .bind = ax88178_bind, @@ -1538,6 +1552,10 @@ static const struct usb_device_id products [] = { */ USB_DEVICE(0x066b, 0x20f9), .driver_info = (unsigned long) &hg20f9_info, +}, { + // Linux Automation GmbH USB 10Base-T1L + USB_DEVICE(0x33f7, 0x0004), + .driver_info = (unsigned long) &lxausb_t1l_info, }, { }, // END }; diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index d3e7b27eb933..6f1e560fb15c 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -75,8 +75,14 @@ vmxnet3_enable_all_intrs(struct vmxnet3_adapter *adapter) for (i = 0; i < adapter->intr.num_intrs; i++) vmxnet3_enable_intr(adapter, i); - adapter->shared->devRead.intrConf.intrCtrl &= + if (!VMXNET3_VERSION_GE_6(adapter) || + !adapter->queuesExtEnabled) { + adapter->shared->devRead.intrConf.intrCtrl &= + cpu_to_le32(~VMXNET3_IC_DISABLE_ALL); + } else { + adapter->shared->devReadExt.intrConfExt.intrCtrl &= cpu_to_le32(~VMXNET3_IC_DISABLE_ALL); + } } @@ -85,8 +91,14 @@ vmxnet3_disable_all_intrs(struct vmxnet3_adapter *adapter) { int i; - adapter->shared->devRead.intrConf.intrCtrl |= + if (!VMXNET3_VERSION_GE_6(adapter) || + !adapter->queuesExtEnabled) { + adapter->shared->devRead.intrConf.intrCtrl |= + cpu_to_le32(VMXNET3_IC_DISABLE_ALL); + } else { + adapter->shared->devReadExt.intrConfExt.intrCtrl |= cpu_to_le32(VMXNET3_IC_DISABLE_ALL); + } for (i = 0; i < adapter->intr.num_intrs; i++) vmxnet3_disable_intr(adapter, i); } @@ -1396,6 +1408,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, }; u32 num_pkts = 0; bool skip_page_frags = false; + bool encap_lro = false; struct Vmxnet3_RxCompDesc *rcd; struct vmxnet3_rx_ctx *ctx = &rq->rx_ctx; u16 segCnt = 0, mss = 0; @@ -1556,13 +1569,18 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, if (VMXNET3_VERSION_GE_2(adapter) && rcd->type == VMXNET3_CDTYPE_RXCOMP_LRO) { struct Vmxnet3_RxCompDescExt *rcdlro; + union Vmxnet3_GenericDesc *gdesc; + rcdlro = (struct Vmxnet3_RxCompDescExt *)rcd; + gdesc = (union Vmxnet3_GenericDesc *)rcd; segCnt = rcdlro->segCnt; WARN_ON_ONCE(segCnt == 0); mss = rcdlro->mss; if (unlikely(segCnt <= 1)) segCnt = 0; + encap_lro = (le32_to_cpu(gdesc->dword[0]) & + (1UL << VMXNET3_RCD_HDR_INNER_SHIFT)); } else { segCnt = 0; } @@ -1630,7 +1648,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, vmxnet3_rx_csum(adapter, skb, (union Vmxnet3_GenericDesc *)rcd); skb->protocol = eth_type_trans(skb, adapter->netdev); - if (!rcd->tcp || + if ((!rcd->tcp && !encap_lro) || !(adapter->netdev->features & NETIF_F_LRO)) goto not_lro; @@ -1639,7 +1657,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, SKB_GSO_TCPV4 : SKB_GSO_TCPV6; skb_shinfo(skb)->gso_size = mss; skb_shinfo(skb)->gso_segs = segCnt; - } else if (segCnt != 0 || skb->len > mtu) { + } else if ((segCnt != 0 || skb->len > mtu) && !encap_lro) { u32 hlen; hlen = vmxnet3_get_hdr_len(adapter, skb, @@ -1668,6 +1686,7 @@ not_lro: napi_gro_receive(&rq->napi, skb); ctx->skb = NULL; + encap_lro = false; num_pkts++; } diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c index 3f8c0845fcca..f795548562f5 100644 --- a/drivers/net/wireless/mediatek/mt76/dma.c +++ b/drivers/net/wireless/mediatek/mt76/dma.c @@ -648,7 +648,7 @@ mt76_dma_wed_setup(struct mt76_dev *dev, struct mt76_queue *q) q->wed_regs = wed->txfree_ring.reg_base; break; case MT76_WED_Q_RX: - ret = mtk_wed_device_rx_ring_setup(wed, ring, q->regs); + ret = mtk_wed_device_rx_ring_setup(wed, ring, q->regs, false); if (!ret) q->wed_regs = wed->rx_ring[ring].reg_base; break; diff --git a/drivers/net/wwan/iosm/iosm_ipc_mux.c b/drivers/net/wwan/iosm/iosm_ipc_mux.c index 9c7a9a2a1f25..fc928b298a98 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_mux.c +++ b/drivers/net/wwan/iosm/iosm_ipc_mux.c @@ -332,6 +332,7 @@ struct iosm_mux *ipc_mux_init(struct ipc_mux_config *mux_cfg, if (!ipc_mux->ul_adb.pp_qlt[i]) { for (j = i - 1; j >= 0; j--) kfree(ipc_mux->ul_adb.pp_qlt[j]); + kfree(ipc_mux); return NULL; } } diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 1545cbee77a4..3dbfc8a6924e 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -386,7 +386,7 @@ int xenvif_dealloc_kthread(void *data); irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data); bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread); -void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb); +bool xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb); void xenvif_carrier_on(struct xenvif *vif); diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 650fa180220f..f3f2c07423a6 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -254,14 +254,16 @@ xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev) if (vif->hash.alg == XEN_NETIF_CTRL_HASH_ALGORITHM_NONE) skb_clear_hash(skb); - xenvif_rx_queue_tail(queue, skb); + if (!xenvif_rx_queue_tail(queue, skb)) + goto drop; + xenvif_kick_thread(queue); return NETDEV_TX_OK; drop: vif->dev->stats.tx_dropped++; - dev_kfree_skb(skb); + dev_kfree_skb_any(skb); return NETDEV_TX_OK; } diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 3d2081bbbc86..bf627af723bf 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -332,10 +332,13 @@ static int xenvif_count_requests(struct xenvif_queue *queue, struct xenvif_tx_cb { - u16 pending_idx; + u16 copy_pending_idx[XEN_NETBK_LEGACY_SLOTS_MAX + 1]; + u8 copy_count; }; #define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb) +#define copy_pending_idx(skb, i) (XENVIF_TX_CB(skb)->copy_pending_idx[i]) +#define copy_count(skb) (XENVIF_TX_CB(skb)->copy_count) static inline void xenvif_tx_create_map_op(struct xenvif_queue *queue, u16 pending_idx, @@ -370,31 +373,93 @@ static inline struct sk_buff *xenvif_alloc_skb(unsigned int size) return skb; } -static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *queue, - struct sk_buff *skb, - struct xen_netif_tx_request *txp, - struct gnttab_map_grant_ref *gop, - unsigned int frag_overflow, - struct sk_buff *nskb) +static void xenvif_get_requests(struct xenvif_queue *queue, + struct sk_buff *skb, + struct xen_netif_tx_request *first, + struct xen_netif_tx_request *txfrags, + unsigned *copy_ops, + unsigned *map_ops, + unsigned int frag_overflow, + struct sk_buff *nskb, + unsigned int extra_count, + unsigned int data_len) { struct skb_shared_info *shinfo = skb_shinfo(skb); skb_frag_t *frags = shinfo->frags; - u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx; - int start; + u16 pending_idx; pending_ring_idx_t index; unsigned int nr_slots; + struct gnttab_copy *cop = queue->tx_copy_ops + *copy_ops; + struct gnttab_map_grant_ref *gop = queue->tx_map_ops + *map_ops; + struct xen_netif_tx_request *txp = first; + + nr_slots = shinfo->nr_frags + 1; + + copy_count(skb) = 0; + + /* Create copy ops for exactly data_len bytes into the skb head. */ + __skb_put(skb, data_len); + while (data_len > 0) { + int amount = data_len > txp->size ? txp->size : data_len; + + cop->source.u.ref = txp->gref; + cop->source.domid = queue->vif->domid; + cop->source.offset = txp->offset; + + cop->dest.domid = DOMID_SELF; + cop->dest.offset = (offset_in_page(skb->data + + skb_headlen(skb) - + data_len)) & ~XEN_PAGE_MASK; + cop->dest.u.gmfn = virt_to_gfn(skb->data + skb_headlen(skb) + - data_len); + + cop->len = amount; + cop->flags = GNTCOPY_source_gref; - nr_slots = shinfo->nr_frags; + index = pending_index(queue->pending_cons); + pending_idx = queue->pending_ring[index]; + callback_param(queue, pending_idx).ctx = NULL; + copy_pending_idx(skb, copy_count(skb)) = pending_idx; + copy_count(skb)++; + + cop++; + data_len -= amount; - /* Skip first skb fragment if it is on same page as header fragment. */ - start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); + if (amount == txp->size) { + /* The copy op covered the full tx_request */ + + memcpy(&queue->pending_tx_info[pending_idx].req, + txp, sizeof(*txp)); + queue->pending_tx_info[pending_idx].extra_count = + (txp == first) ? extra_count : 0; + + if (txp == first) + txp = txfrags; + else + txp++; + queue->pending_cons++; + nr_slots--; + } else { + /* The copy op partially covered the tx_request. + * The remainder will be mapped. + */ + txp->offset += amount; + txp->size -= amount; + } + } - for (shinfo->nr_frags = start; shinfo->nr_frags < nr_slots; - shinfo->nr_frags++, txp++, gop++) { + for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots; + shinfo->nr_frags++, gop++) { index = pending_index(queue->pending_cons++); pending_idx = queue->pending_ring[index]; - xenvif_tx_create_map_op(queue, pending_idx, txp, 0, gop); + xenvif_tx_create_map_op(queue, pending_idx, txp, + txp == first ? extra_count : 0, gop); frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx); + + if (txp == first) + txp = txfrags; + else + txp++; } if (frag_overflow) { @@ -415,7 +480,8 @@ static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif_queue *que skb_shinfo(skb)->frag_list = nskb; } - return gop; + (*copy_ops) = cop - queue->tx_copy_ops; + (*map_ops) = gop - queue->tx_map_ops; } static inline void xenvif_grant_handle_set(struct xenvif_queue *queue, @@ -451,7 +517,7 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue, struct gnttab_copy **gopp_copy) { struct gnttab_map_grant_ref *gop_map = *gopp_map; - u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx; + u16 pending_idx; /* This always points to the shinfo of the skb being checked, which * could be either the first or the one on the frag_list */ @@ -462,24 +528,37 @@ static int xenvif_tx_check_gop(struct xenvif_queue *queue, struct skb_shared_info *first_shinfo = NULL; int nr_frags = shinfo->nr_frags; const bool sharedslot = nr_frags && - frag_get_pending_idx(&shinfo->frags[0]) == pending_idx; - int i, err; + frag_get_pending_idx(&shinfo->frags[0]) == + copy_pending_idx(skb, copy_count(skb) - 1); + int i, err = 0; - /* Check status of header. */ - err = (*gopp_copy)->status; - if (unlikely(err)) { - if (net_ratelimit()) - netdev_dbg(queue->vif->dev, - "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n", - (*gopp_copy)->status, - pending_idx, - (*gopp_copy)->source.u.ref); - /* The first frag might still have this slot mapped */ - if (!sharedslot) - xenvif_idx_release(queue, pending_idx, - XEN_NETIF_RSP_ERROR); + for (i = 0; i < copy_count(skb); i++) { + int newerr; + + /* Check status of header. */ + pending_idx = copy_pending_idx(skb, i); + + newerr = (*gopp_copy)->status; + if (likely(!newerr)) { + /* The first frag might still have this slot mapped */ + if (i < copy_count(skb) - 1 || !sharedslot) + xenvif_idx_release(queue, pending_idx, + XEN_NETIF_RSP_OKAY); + } else { + err = newerr; + if (net_ratelimit()) + netdev_dbg(queue->vif->dev, + "Grant copy of header failed! status: %d pending_idx: %u ref: %u\n", + (*gopp_copy)->status, + pending_idx, + (*gopp_copy)->source.u.ref); + /* The first frag might still have this slot mapped */ + if (i < copy_count(skb) - 1 || !sharedslot) + xenvif_idx_release(queue, pending_idx, + XEN_NETIF_RSP_ERROR); + } + (*gopp_copy)++; } - (*gopp_copy)++; check_frags: for (i = 0; i < nr_frags; i++, gop_map++) { @@ -526,14 +605,6 @@ check_frags: if (err) continue; - /* First error: if the header haven't shared a slot with the - * first frag, release it as well. - */ - if (!sharedslot) - xenvif_idx_release(queue, - XENVIF_TX_CB(skb)->pending_idx, - XEN_NETIF_RSP_OKAY); - /* Invalidate preceding fragments of this skb. */ for (j = 0; j < i; j++) { pending_idx = frag_get_pending_idx(&shinfo->frags[j]); @@ -803,7 +874,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, unsigned *copy_ops, unsigned *map_ops) { - struct gnttab_map_grant_ref *gop = queue->tx_map_ops; struct sk_buff *skb, *nskb; int ret; unsigned int frag_overflow; @@ -885,8 +955,12 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, continue; } + data_len = (txreq.size > XEN_NETBACK_TX_COPY_LEN) ? + XEN_NETBACK_TX_COPY_LEN : txreq.size; + ret = xenvif_count_requests(queue, &txreq, extra_count, txfrags, work_to_do); + if (unlikely(ret < 0)) break; @@ -912,9 +986,8 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, index = pending_index(queue->pending_cons); pending_idx = queue->pending_ring[index]; - data_len = (txreq.size > XEN_NETBACK_TX_COPY_LEN && - ret < XEN_NETBK_LEGACY_SLOTS_MAX) ? - XEN_NETBACK_TX_COPY_LEN : txreq.size; + if (ret >= XEN_NETBK_LEGACY_SLOTS_MAX - 1 && data_len < txreq.size) + data_len = txreq.size; skb = xenvif_alloc_skb(data_len); if (unlikely(skb == NULL)) { @@ -925,8 +998,6 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, } skb_shinfo(skb)->nr_frags = ret; - if (data_len < txreq.size) - skb_shinfo(skb)->nr_frags++; /* At this point shinfo->nr_frags is in fact the number of * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX. */ @@ -988,54 +1059,19 @@ static void xenvif_tx_build_gops(struct xenvif_queue *queue, type); } - XENVIF_TX_CB(skb)->pending_idx = pending_idx; - - __skb_put(skb, data_len); - queue->tx_copy_ops[*copy_ops].source.u.ref = txreq.gref; - queue->tx_copy_ops[*copy_ops].source.domid = queue->vif->domid; - queue->tx_copy_ops[*copy_ops].source.offset = txreq.offset; - - queue->tx_copy_ops[*copy_ops].dest.u.gmfn = - virt_to_gfn(skb->data); - queue->tx_copy_ops[*copy_ops].dest.domid = DOMID_SELF; - queue->tx_copy_ops[*copy_ops].dest.offset = - offset_in_page(skb->data) & ~XEN_PAGE_MASK; - - queue->tx_copy_ops[*copy_ops].len = data_len; - queue->tx_copy_ops[*copy_ops].flags = GNTCOPY_source_gref; - - (*copy_ops)++; - - if (data_len < txreq.size) { - frag_set_pending_idx(&skb_shinfo(skb)->frags[0], - pending_idx); - xenvif_tx_create_map_op(queue, pending_idx, &txreq, - extra_count, gop); - gop++; - } else { - frag_set_pending_idx(&skb_shinfo(skb)->frags[0], - INVALID_PENDING_IDX); - memcpy(&queue->pending_tx_info[pending_idx].req, - &txreq, sizeof(txreq)); - queue->pending_tx_info[pending_idx].extra_count = - extra_count; - } - - queue->pending_cons++; - - gop = xenvif_get_requests(queue, skb, txfrags, gop, - frag_overflow, nskb); + xenvif_get_requests(queue, skb, &txreq, txfrags, copy_ops, + map_ops, frag_overflow, nskb, extra_count, + data_len); __skb_queue_tail(&queue->tx_queue, skb); queue->tx.req_cons = idx; - if (((gop-queue->tx_map_ops) >= ARRAY_SIZE(queue->tx_map_ops)) || + if ((*map_ops >= ARRAY_SIZE(queue->tx_map_ops)) || (*copy_ops >= ARRAY_SIZE(queue->tx_copy_ops))) break; } - (*map_ops) = gop - queue->tx_map_ops; return; } @@ -1114,9 +1150,8 @@ static int xenvif_tx_submit(struct xenvif_queue *queue) while ((skb = __skb_dequeue(&queue->tx_queue)) != NULL) { struct xen_netif_tx_request *txp; u16 pending_idx; - unsigned data_len; - pending_idx = XENVIF_TX_CB(skb)->pending_idx; + pending_idx = copy_pending_idx(skb, 0); txp = &queue->pending_tx_info[pending_idx].req; /* Check the remap error code. */ @@ -1135,18 +1170,6 @@ static int xenvif_tx_submit(struct xenvif_queue *queue) continue; } - data_len = skb->len; - callback_param(queue, pending_idx).ctx = NULL; - if (data_len < txp->size) { - /* Append the packet payload as a fragment. */ - txp->offset += data_len; - txp->size -= data_len; - } else { - /* Schedule a response immediately. */ - xenvif_idx_release(queue, pending_idx, - XEN_NETIF_RSP_OKAY); - } - if (txp->flags & XEN_NETTXF_csum_blank) skb->ip_summed = CHECKSUM_PARTIAL; else if (txp->flags & XEN_NETTXF_data_validated) @@ -1332,7 +1355,7 @@ static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue) /* Called after netfront has transmitted */ int xenvif_tx_action(struct xenvif_queue *queue, int budget) { - unsigned nr_mops, nr_cops = 0; + unsigned nr_mops = 0, nr_cops = 0; int work_done, ret; if (unlikely(!tx_work_todo(queue))) diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c index 932762177110..0ba754ebc5ba 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -82,9 +82,10 @@ static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue) return false; } -void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) +bool xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) { unsigned long flags; + bool ret = true; spin_lock_irqsave(&queue->rx_queue.lock, flags); @@ -92,8 +93,7 @@ void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) struct net_device *dev = queue->vif->dev; netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id)); - kfree_skb(skb); - queue->vif->dev->stats.rx_dropped++; + ret = false; } else { if (skb_queue_empty(&queue->rx_queue)) xenvif_update_needed_slots(queue, skb); @@ -104,6 +104,8 @@ void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) } spin_unlock_irqrestore(&queue->rx_queue.lock, flags); + + return ret; } static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index ef4e53bf5604..14aec417fa06 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -1862,6 +1862,12 @@ static int netfront_resume(struct xenbus_device *dev) netif_tx_unlock_bh(info->netdev); xennet_disconnect_backend(info); + + rtnl_lock(); + if (info->queues) + xennet_destroy_queues(info); + rtnl_unlock(); + return 0; } diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index da55ce45ac70..69e333922bea 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4304,7 +4304,7 @@ static void nvme_ns_remove(struct nvme_ns *ns) mutex_unlock(&ns->ctrl->subsys->lock); /* guarantee not available in head->list */ - synchronize_rcu(); + synchronize_srcu(&ns->head->srcu); if (!nvme_ns_head_multipath(ns->head)) nvme_cdev_del(&ns->cdev, &ns->cdev_device); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 93e2138a8b42..7e025b8948cb 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -174,11 +174,14 @@ void nvme_mpath_revalidate_paths(struct nvme_ns *ns) struct nvme_ns_head *head = ns->head; sector_t capacity = get_capacity(head->disk); int node; + int srcu_idx; + srcu_idx = srcu_read_lock(&head->srcu); list_for_each_entry_rcu(ns, &head->list, siblings) { if (capacity != get_capacity(ns->disk)) clear_bit(NVME_NS_READY, &ns->flags); } + srcu_read_unlock(&head->srcu, srcu_idx); for_each_node(node) rcu_assign_pointer(head->current_path[node], NULL); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index f4335519399d..488ad7dabeb8 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -797,6 +797,8 @@ static blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev, cmnd->dptr.prp1 = cpu_to_le64(iod->first_dma); if (bv->bv_len > first_prp_len) cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma + first_prp_len); + else + cmnd->dptr.prp2 = 0; return BLK_STS_OK; } diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c index 52ecd66ce357..047a8374b4fd 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.c +++ b/drivers/pinctrl/intel/pinctrl-intel.c @@ -436,9 +436,14 @@ static void __intel_gpio_set_direction(void __iomem *padcfg0, bool input) writel(value, padcfg0); } +static int __intel_gpio_get_gpio_mode(u32 value) +{ + return (value & PADCFG0_PMODE_MASK) >> PADCFG0_PMODE_SHIFT; +} + static int intel_gpio_get_gpio_mode(void __iomem *padcfg0) { - return (readl(padcfg0) & PADCFG0_PMODE_MASK) >> PADCFG0_PMODE_SHIFT; + return __intel_gpio_get_gpio_mode(readl(padcfg0)); } static void intel_gpio_set_gpio_mode(void __iomem *padcfg0) @@ -1674,6 +1679,7 @@ EXPORT_SYMBOL_GPL(intel_pinctrl_get_soc_data); static bool intel_pinctrl_should_save(struct intel_pinctrl *pctrl, unsigned int pin) { const struct pin_desc *pd = pin_desc_get(pctrl->pctldev, pin); + u32 value; if (!pd || !intel_pad_usable(pctrl, pin)) return false; @@ -1688,6 +1694,25 @@ static bool intel_pinctrl_should_save(struct intel_pinctrl *pctrl, unsigned int gpiochip_line_is_irq(&pctrl->chip, intel_pin_to_gpio(pctrl, pin))) return true; + /* + * The firmware on some systems may configure GPIO pins to be + * an interrupt source in so called "direct IRQ" mode. In such + * cases the GPIO controller driver has no idea if those pins + * are being used or not. At the same time, there is a known bug + * in the firmwares that don't restore the pin settings correctly + * after suspend, i.e. by an unknown reason the Rx value becomes + * inverted. + * + * Hence, let's save and restore the pins that are configured + * as GPIOs in the input mode with GPIROUTIOXAPIC bit set. + * + * See https://bugzilla.kernel.org/show_bug.cgi?id=214749. + */ + value = readl(intel_get_padcfg(pctrl, pin, PADCFG0)); + if ((value & PADCFG0_GPIROUTIOXAPIC) && (value & PADCFG0_GPIOTXDIS) && + (__intel_gpio_get_gpio_mode(value) == PADCFG0_PMODE_GPIO)) + return true; + return false; } diff --git a/drivers/pinctrl/mediatek/mtk-eint.c b/drivers/pinctrl/mediatek/mtk-eint.c index 65d312967619..27f0a54e12bf 100644 --- a/drivers/pinctrl/mediatek/mtk-eint.c +++ b/drivers/pinctrl/mediatek/mtk-eint.c @@ -303,12 +303,15 @@ static struct irq_chip mtk_eint_irq_chip = { static unsigned int mtk_eint_hw_init(struct mtk_eint *eint) { - void __iomem *reg = eint->base + eint->regs->dom_en; + void __iomem *dom_en = eint->base + eint->regs->dom_en; + void __iomem *mask_set = eint->base + eint->regs->mask_set; unsigned int i; for (i = 0; i < eint->hw->ap_num; i += 32) { - writel(0xffffffff, reg); - reg += 4; + writel(0xffffffff, dom_en); + writel(0xffffffff, mask_set); + dom_en += 4; + mask_set += 4; } return 0; diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c index 67bec7ea0f8b..414ee6bb8ac9 100644 --- a/drivers/pinctrl/pinctrl-single.c +++ b/drivers/pinctrl/pinctrl-single.c @@ -727,7 +727,7 @@ static int pcs_allocate_pin_table(struct pcs_device *pcs) mux_bytes = pcs->width / BITS_PER_BYTE; - if (pcs->bits_per_mux) { + if (pcs->bits_per_mux && pcs->fmask) { pcs->bits_per_pin = fls(pcs->fmask); nr_pins = (pcs->size * BITS_PER_BYTE) / pcs->bits_per_pin; } else { diff --git a/drivers/platform/x86/amd/pmc.c b/drivers/platform/x86/amd/pmc.c index ef4ae977b8e0..439d282aafd1 100644 --- a/drivers/platform/x86/amd/pmc.c +++ b/drivers/platform/x86/amd/pmc.c @@ -739,8 +739,14 @@ static void amd_pmc_s2idle_prepare(void) static void amd_pmc_s2idle_check(void) { struct amd_pmc_dev *pdev = &pmc; + struct smu_metrics table; int rc; + /* CZN: Ensure that future s0i3 entry attempts at least 10ms passed */ + if (pdev->cpu_id == AMD_CPU_ID_CZN && !get_metrics_table(pdev, &table) && + table.s0i3_last_entry_status) + usleep_range(10000, 20000); + /* Dump the IdleMask before we add to the STB */ amd_pmc_idlemask_read(pdev, pdev->dev, NULL); diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 9dc935886e9f..c6ded3fdd715 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -758,7 +758,6 @@ static void qeth_l2_br2dev_worker(struct work_struct *work) struct list_head *iter; int err = 0; - kfree(br2dev_event_work); QETH_CARD_TEXT_(card, 4, "b2dw%04lx", event); QETH_CARD_TEXT_(card, 4, "ma%012llx", ether_addr_to_u64(addr)); @@ -815,6 +814,7 @@ unlock: dev_put(brdev); dev_put(lsyncdev); dev_put(dstdev); + kfree(br2dev_event_work); } static int qeth_l2_br2dev_queue_work(struct net_device *brdev, diff --git a/fs/afs/server.c b/fs/afs/server.c index 4981baf97835..b5237206eac3 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -406,7 +406,7 @@ void afs_put_server(struct afs_net *net, struct afs_server *server, if (!server) return; - a = atomic_inc_return(&server->active); + a = atomic_read(&server->active); zero = __refcount_dec_and_test(&server->ref, &r); trace_afs_server(debug_id, r - 1, a, reason); if (unlikely(zero)) diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index 451d8a077e12..bce2492186d0 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -605,6 +605,14 @@ again: set_bit(FSCACHE_COOKIE_DO_PREP_TO_WRITE, &cookie->flags); queue = true; } + /* + * We could race with cookie_lru which may set LRU_DISCARD bit + * but has yet to run the cookie state machine. If this happens + * and another thread tries to use the cookie, clear LRU_DISCARD + * so we don't end up withdrawing the cookie while in use. + */ + if (test_and_clear_bit(FSCACHE_COOKIE_DO_LRU_DISCARD, &cookie->flags)) + fscache_see_cookie(cookie, fscache_cookie_see_lru_discard_clear); break; case FSCACHE_COOKIE_STATE_FAILED: diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 3b55e239705f..9930fa901039 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -111,6 +111,13 @@ static void nilfs_dat_commit_free(struct inode *dat, kunmap_atomic(kaddr); nilfs_dat_commit_entry(dat, req); + + if (unlikely(req->pr_desc_bh == NULL || req->pr_bitmap_bh == NULL)) { + nilfs_error(dat->i_sb, + "state inconsistency probably due to duplicate use of vblocknr = %llu", + (unsigned long long)req->pr_entry_nr); + return; + } nilfs_palloc_commit_free_entry(dat, req); } diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 492dce43236e..cab7cfebf40b 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -222,12 +222,16 @@ extern void tlb_remove_table(struct mmu_gather *tlb, void *table); #define tlb_needs_table_invalidate() (true) #endif +void tlb_remove_table_sync_one(void); + #else #ifdef tlb_needs_table_invalidate #error tlb_needs_table_invalidate() requires MMU_GATHER_RCU_TABLE_FREE #endif +static inline void tlb_remove_table_sync_one(void) { } + #endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */ diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 528bd44b59e2..2b7d077de7ef 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -68,6 +68,7 @@ struct css_task_iter { struct list_head iters_node; /* css_set->task_iters */ }; +extern struct file_system_type cgroup_fs_type; extern struct cgroup_root cgrp_dfl_root; extern struct css_set init_css_set; diff --git a/include/linux/gfp.h b/include/linux/gfp.h index ef4aea3b356e..65a78773dcca 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -210,6 +210,20 @@ alloc_pages_bulk_array_node(gfp_t gfp, int nid, unsigned long nr_pages, struct p return __alloc_pages_bulk(gfp, nid, NULL, nr_pages, NULL, page_array); } +static inline void warn_if_node_offline(int this_node, gfp_t gfp_mask) +{ + gfp_t warn_gfp = gfp_mask & (__GFP_THISNODE|__GFP_NOWARN); + + if (warn_gfp != (__GFP_THISNODE|__GFP_NOWARN)) + return; + + if (node_online(this_node)) + return; + + pr_warn("%pGg allocation from offline node %d\n", &gfp_mask, this_node); + dump_stack(); +} + /* * Allocate pages, preferring the node given as nid. The node must be valid and * online. For more general interface, see alloc_pages_node(). @@ -218,7 +232,7 @@ static inline struct page * __alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) { VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); - VM_WARN_ON((gfp_mask & __GFP_THISNODE) && !node_online(nid)); + warn_if_node_offline(nid, gfp_mask); return __alloc_pages(gfp_mask, order, nid, NULL); } @@ -227,7 +241,7 @@ static inline struct folio *__folio_alloc_node(gfp_t gfp, unsigned int order, int nid) { VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); - VM_WARN_ON((gfp & __GFP_THISNODE) && !node_online(nid)); + warn_if_node_offline(nid, gfp); return __folio_alloc(gfp, order, nid, NULL); } diff --git a/include/linux/license.h b/include/linux/license.h index ad937f57f2cb..7cce390f120b 100644 --- a/include/linux/license.h +++ b/include/linux/license.h @@ -2,8 +2,6 @@ #ifndef __LICENSE_H #define __LICENSE_H -#include <linux/string.h> - static inline int license_is_gpl_compatible(const char *license) { return (strcmp(license, "GPL") == 0 diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index c7a91981cd5a..ba6958b49a8e 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -50,6 +50,7 @@ enum mlx5_flow_destination_type { MLX5_FLOW_DESTINATION_TYPE_PORT, MLX5_FLOW_DESTINATION_TYPE_COUNTER, MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM, + MLX5_FLOW_DESTINATION_TYPE_RANGE, }; enum { @@ -143,6 +144,10 @@ enum { MLX5_FLOW_DEST_VPORT_REFORMAT_ID = BIT(1), }; +enum mlx5_flow_dest_range_field { + MLX5_FLOW_DEST_RANGE_FIELD_PKT_LEN = 0, +}; + struct mlx5_flow_destination { enum mlx5_flow_destination_type type; union { @@ -156,6 +161,13 @@ struct mlx5_flow_destination { struct mlx5_pkt_reformat *pkt_reformat; u8 flags; } vport; + struct { + struct mlx5_flow_table *hit_ft; + struct mlx5_flow_table *miss_ft; + enum mlx5_flow_dest_range_field field; + u32 min; + u32 max; + } range; u32 sampler_id; }; }; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 300b56ea5ff4..152d2d7f8743 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -68,6 +68,7 @@ enum { MLX5_SET_HCA_CAP_OP_MOD_ODP = 0x2, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC = 0x3, MLX5_SET_HCA_CAP_OP_MOD_ROCE = 0x4, + MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE2 = 0x20, MLX5_SET_HCA_CAP_OP_MODE_PORT_SELECTION = 0x25, }; @@ -1880,7 +1881,10 @@ struct mlx5_ifc_cmd_hca_cap_bits { }; struct mlx5_ifc_cmd_hca_cap_2_bits { - u8 reserved_at_0[0xa0]; + u8 reserved_at_0[0x80]; + + u8 migratable[0x1]; + u8 reserved_at_81[0x1f]; u8 max_reformat_insert_size[0x8]; u8 max_reformat_insert_offset[0x8]; @@ -6109,6 +6113,38 @@ struct mlx5_ifc_match_definer_format_32_bits { u8 inner_dmac_15_0[0x10]; }; +enum { + MLX5_IFC_DEFINER_FORMAT_ID_SELECT = 61, +}; + +#define MLX5_IFC_DEFINER_FORMAT_OFFSET_UNUSED 0x0 +#define MLX5_IFC_DEFINER_FORMAT_OFFSET_OUTER_ETH_PKT_LEN 0x48 +#define MLX5_IFC_DEFINER_DW_SELECTORS_NUM 9 +#define MLX5_IFC_DEFINER_BYTE_SELECTORS_NUM 8 + +struct mlx5_ifc_match_definer_match_mask_bits { + u8 reserved_at_1c0[5][0x20]; + u8 match_dw_8[0x20]; + u8 match_dw_7[0x20]; + u8 match_dw_6[0x20]; + u8 match_dw_5[0x20]; + u8 match_dw_4[0x20]; + u8 match_dw_3[0x20]; + u8 match_dw_2[0x20]; + u8 match_dw_1[0x20]; + u8 match_dw_0[0x20]; + + u8 match_byte_7[0x8]; + u8 match_byte_6[0x8]; + u8 match_byte_5[0x8]; + u8 match_byte_4[0x8]; + + u8 match_byte_3[0x8]; + u8 match_byte_2[0x8]; + u8 match_byte_1[0x8]; + u8 match_byte_0[0x8]; +}; + struct mlx5_ifc_match_definer_bits { u8 modify_field_select[0x40]; @@ -6117,9 +6153,41 @@ struct mlx5_ifc_match_definer_bits { u8 reserved_at_80[0x10]; u8 format_id[0x10]; - u8 reserved_at_a0[0x160]; + u8 reserved_at_a0[0x60]; - u8 match_mask[16][0x20]; + u8 format_select_dw3[0x8]; + u8 format_select_dw2[0x8]; + u8 format_select_dw1[0x8]; + u8 format_select_dw0[0x8]; + + u8 format_select_dw7[0x8]; + u8 format_select_dw6[0x8]; + u8 format_select_dw5[0x8]; + u8 format_select_dw4[0x8]; + + u8 reserved_at_100[0x18]; + u8 format_select_dw8[0x8]; + + u8 reserved_at_120[0x20]; + + u8 format_select_byte3[0x8]; + u8 format_select_byte2[0x8]; + u8 format_select_byte1[0x8]; + u8 format_select_byte0[0x8]; + + u8 format_select_byte7[0x8]; + u8 format_select_byte6[0x8]; + u8 format_select_byte5[0x8]; + u8 format_select_byte4[0x8]; + + u8 reserved_at_180[0x40]; + + union { + struct { + u8 match_mask[16][0x20]; + }; + struct mlx5_ifc_match_definer_match_mask_bits match_mask_format; + }; }; struct mlx5_ifc_general_obj_in_cmd_hdr_bits { diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index aad53cb72f17..7f31432f44c2 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -132,4 +132,6 @@ int mlx5_nic_vport_affiliate_multiport(struct mlx5_core_dev *master_mdev, int mlx5_nic_vport_unaffiliate_multiport(struct mlx5_core_dev *port_mdev); u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev); +int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 function_id, void *out, + u16 opmod); #endif /* __MLX5_VPORT_H__ */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 8bbcccbc5565..974ccca609d2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1852,6 +1852,25 @@ static void __maybe_unused show_free_areas(unsigned int flags, nodemask_t *nodem __show_free_areas(flags, nodemask, MAX_NR_ZONES - 1); } +/* + * Parameter block passed down to zap_pte_range in exceptional cases. + */ +struct zap_details { + struct folio *single_folio; /* Locked folio to be unmapped */ + bool even_cows; /* Zap COWed private pages too? */ + zap_flags_t zap_flags; /* Extra flags for zapping */ +}; + +/* + * Whether to drop the pte markers, for example, the uffd-wp information for + * file-backed memory. This should only be specified when we will completely + * drop the page in the mm, either by truncation or unmapping of the vma. By + * default, the flag is not set. + */ +#define ZAP_FLAG_DROP_MARKER ((__force zap_flags_t) BIT(0)) +/* Set in unmap_vmas() to indicate a final unmap call. Only used by hugetlb */ +#define ZAP_FLAG_UNMAP ((__force zap_flags_t) BIT(1)) + #ifdef CONFIG_MMU extern bool can_do_mlock(void); #else @@ -1869,6 +1888,8 @@ void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, unsigned long size); void zap_page_range(struct vm_area_struct *vma, unsigned long address, unsigned long size); +void zap_page_range_single(struct vm_area_struct *vma, unsigned long address, + unsigned long size, struct zap_details *details); void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt, struct vm_area_struct *start_vma, unsigned long start, unsigned long end); @@ -3467,12 +3488,4 @@ madvise_set_anon_name(struct mm_struct *mm, unsigned long start, } #endif -/* - * Whether to drop the pte markers, for example, the uffd-wp information for - * file-backed memory. This should only be specified when we will completely - * drop the page in the mm, either by truncation or unmapping of the vma. By - * default, the flag is not set. - */ -#define ZAP_FLAG_DROP_MARKER ((__force zap_flags_t) BIT(0)) - #endif /* _LINUX_MM_H */ diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index 9c50bc40f8ff..6f7993803ee7 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -451,7 +451,7 @@ static inline bool mmc_ready_for_data(u32 status) #define MMC_SECURE_TRIM1_ARG 0x80000001 #define MMC_SECURE_TRIM2_ARG 0x80008000 #define MMC_SECURE_ARGS 0x80000000 -#define MMC_TRIM_ARGS 0x00008001 +#define MMC_TRIM_OR_DISCARD_ARGS 0x00008003 #define mmc_driver_type_mask(n) (1 << (n)) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 29ae964e3b89..2287cb8eb9e4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -78,6 +78,7 @@ struct xdp_buff; void synchronize_net(void); void netdev_set_default_ethtool_ops(struct net_device *dev, const struct ethtool_ops *ops); +void netdev_sw_irq_coalesce_default_on(struct net_device *dev); /* Backlog congestion levels */ #define NET_RX_SUCCESS 0 /* keep 'em coming, baby */ diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index a108b60a6962..5f0d7d0b9471 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -165,6 +165,13 @@ static inline pte_t *virt_to_kpte(unsigned long vaddr) return pmd_none(*pmd) ? NULL : pte_offset_kernel(pmd, vaddr); } +#ifndef pmd_young +static inline int pmd_young(pmd_t pmd) +{ + return 0; +} +#endif + #ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, @@ -260,6 +267,17 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma, #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif +#ifndef arch_has_hw_nonleaf_pmd_young +/* + * Return whether the accessed bit in non-leaf PMD entries is supported on the + * local CPU. + */ +static inline bool arch_has_hw_nonleaf_pmd_young(void) +{ + return IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG); +} +#endif + #ifndef arch_has_hw_pte_young /* * Return whether the accessed bit is supported on the local CPU. diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 68dab3e08aad..5b5357c0bd8c 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -323,29 +323,36 @@ static inline struct rhash_lock_head __rcu **rht_bucket_insert( * When we write to a bucket without unlocking, we use rht_assign_locked(). */ -static inline void rht_lock(struct bucket_table *tbl, - struct rhash_lock_head __rcu **bkt) +static inline unsigned long rht_lock(struct bucket_table *tbl, + struct rhash_lock_head __rcu **bkt) { - local_bh_disable(); + unsigned long flags; + + local_irq_save(flags); bit_spin_lock(0, (unsigned long *)bkt); lock_map_acquire(&tbl->dep_map); + return flags; } -static inline void rht_lock_nested(struct bucket_table *tbl, - struct rhash_lock_head __rcu **bucket, - unsigned int subclass) +static inline unsigned long rht_lock_nested(struct bucket_table *tbl, + struct rhash_lock_head __rcu **bucket, + unsigned int subclass) { - local_bh_disable(); + unsigned long flags; + + local_irq_save(flags); bit_spin_lock(0, (unsigned long *)bucket); lock_acquire_exclusive(&tbl->dep_map, subclass, 0, NULL, _THIS_IP_); + return flags; } static inline void rht_unlock(struct bucket_table *tbl, - struct rhash_lock_head __rcu **bkt) + struct rhash_lock_head __rcu **bkt, + unsigned long flags) { lock_map_release(&tbl->dep_map); bit_spin_unlock(0, (unsigned long *)bkt); - local_bh_enable(); + local_irq_restore(flags); } static inline struct rhash_head *__rht_ptr( @@ -393,7 +400,8 @@ static inline void rht_assign_locked(struct rhash_lock_head __rcu **bkt, static inline void rht_assign_unlock(struct bucket_table *tbl, struct rhash_lock_head __rcu **bkt, - struct rhash_head *obj) + struct rhash_head *obj, + unsigned long flags) { if (rht_is_a_nulls(obj)) obj = NULL; @@ -401,7 +409,7 @@ static inline void rht_assign_unlock(struct bucket_table *tbl, rcu_assign_pointer(*bkt, (void *)obj); preempt_enable(); __release(bitlock); - local_bh_enable(); + local_irq_restore(flags); } /** @@ -706,6 +714,7 @@ static inline void *__rhashtable_insert_fast( struct rhash_head __rcu **pprev; struct bucket_table *tbl; struct rhash_head *head; + unsigned long flags; unsigned int hash; int elasticity; void *data; @@ -720,11 +729,11 @@ static inline void *__rhashtable_insert_fast( if (!bkt) goto out; pprev = NULL; - rht_lock(tbl, bkt); + flags = rht_lock(tbl, bkt); if (unlikely(rcu_access_pointer(tbl->future_tbl))) { slow_path: - rht_unlock(tbl, bkt); + rht_unlock(tbl, bkt, flags); rcu_read_unlock(); return rhashtable_insert_slow(ht, key, obj); } @@ -756,9 +765,9 @@ slow_path: RCU_INIT_POINTER(list->rhead.next, head); if (pprev) { rcu_assign_pointer(*pprev, obj); - rht_unlock(tbl, bkt); + rht_unlock(tbl, bkt, flags); } else - rht_assign_unlock(tbl, bkt, obj); + rht_assign_unlock(tbl, bkt, obj, flags); data = NULL; goto out; } @@ -785,7 +794,7 @@ slow_path: } atomic_inc(&ht->nelems); - rht_assign_unlock(tbl, bkt, obj); + rht_assign_unlock(tbl, bkt, obj, flags); if (rht_grow_above_75(ht, tbl)) schedule_work(&ht->run_work); @@ -797,7 +806,7 @@ out: return data; out_unlock: - rht_unlock(tbl, bkt); + rht_unlock(tbl, bkt, flags); goto out; } @@ -991,6 +1000,7 @@ static inline int __rhashtable_remove_fast_one( struct rhash_lock_head __rcu **bkt; struct rhash_head __rcu **pprev; struct rhash_head *he; + unsigned long flags; unsigned int hash; int err = -ENOENT; @@ -999,7 +1009,7 @@ static inline int __rhashtable_remove_fast_one( if (!bkt) return -ENOENT; pprev = NULL; - rht_lock(tbl, bkt); + flags = rht_lock(tbl, bkt); rht_for_each_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) { struct rhlist_head *list; @@ -1043,14 +1053,14 @@ static inline int __rhashtable_remove_fast_one( if (pprev) { rcu_assign_pointer(*pprev, obj); - rht_unlock(tbl, bkt); + rht_unlock(tbl, bkt, flags); } else { - rht_assign_unlock(tbl, bkt, obj); + rht_assign_unlock(tbl, bkt, obj, flags); } goto unlocked; } - rht_unlock(tbl, bkt); + rht_unlock(tbl, bkt, flags); unlocked: if (err > 0) { atomic_dec(&ht->nelems); @@ -1143,6 +1153,7 @@ static inline int __rhashtable_replace_fast( struct rhash_lock_head __rcu **bkt; struct rhash_head __rcu **pprev; struct rhash_head *he; + unsigned long flags; unsigned int hash; int err = -ENOENT; @@ -1158,7 +1169,7 @@ static inline int __rhashtable_replace_fast( return -ENOENT; pprev = NULL; - rht_lock(tbl, bkt); + flags = rht_lock(tbl, bkt); rht_for_each_from(he, rht_ptr(bkt, tbl, hash), tbl, hash) { if (he != obj_old) { @@ -1169,15 +1180,15 @@ static inline int __rhashtable_replace_fast( rcu_assign_pointer(obj_new->next, obj_old->next); if (pprev) { rcu_assign_pointer(*pprev, obj_new); - rht_unlock(tbl, bkt); + rht_unlock(tbl, bkt, flags); } else { - rht_assign_unlock(tbl, bkt, obj_new); + rht_assign_unlock(tbl, bkt, obj_new, flags); } err = 0; goto unlocked; } - rht_unlock(tbl, bkt); + rht_unlock(tbl, bkt, flags); unlocked: return err; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4e464a27adaf..4c8492401a10 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1255,6 +1255,7 @@ struct sk_buff *build_skb_around(struct sk_buff *skb, void skb_attempt_defer_free(struct sk_buff *skb); struct sk_buff *napi_build_skb(void *data, unsigned int frag_size); +struct sk_buff *slab_build_skb(void *data); /** * alloc_skb - allocate a network buffer diff --git a/include/linux/soc/mediatek/mtk_wed.h b/include/linux/soc/mediatek/mtk_wed.h index beb190449704..a0746d4aec20 100644 --- a/include/linux/soc/mediatek/mtk_wed.h +++ b/include/linux/soc/mediatek/mtk_wed.h @@ -160,7 +160,7 @@ struct mtk_wed_ops { int (*tx_ring_setup)(struct mtk_wed_device *dev, int ring, void __iomem *regs, bool reset); int (*rx_ring_setup)(struct mtk_wed_device *dev, int ring, - void __iomem *regs); + void __iomem *regs, bool reset); int (*txfree_ring_setup)(struct mtk_wed_device *dev, void __iomem *regs); int (*msg_update)(struct mtk_wed_device *dev, int cmd_id, @@ -228,8 +228,8 @@ mtk_wed_get_rx_capa(struct mtk_wed_device *dev) (_dev)->ops->irq_get(_dev, _mask) #define mtk_wed_device_irq_set_mask(_dev, _mask) \ (_dev)->ops->irq_set_mask(_dev, _mask) -#define mtk_wed_device_rx_ring_setup(_dev, _ring, _regs) \ - (_dev)->ops->rx_ring_setup(_dev, _ring, _regs) +#define mtk_wed_device_rx_ring_setup(_dev, _ring, _regs, _reset) \ + (_dev)->ops->rx_ring_setup(_dev, _ring, _regs, _reset) #define mtk_wed_device_ppe_check(_dev, _skb, _reason, _hash) \ (_dev)->ops->ppe_check(_dev, _skb, _reason, _hash) #define mtk_wed_device_update_msg(_dev, _id, _msg, _len) \ @@ -249,7 +249,7 @@ static inline bool mtk_wed_device_active(struct mtk_wed_device *dev) #define mtk_wed_device_reg_write(_dev, _reg, _val) do {} while (0) #define mtk_wed_device_irq_get(_dev, _mask) 0 #define mtk_wed_device_irq_set_mask(_dev, _mask) do {} while (0) -#define mtk_wed_device_rx_ring_setup(_dev, _ring, _regs) -ENODEV +#define mtk_wed_device_rx_ring_setup(_dev, _ring, _regs, _reset) -ENODEV #define mtk_wed_device_ppe_check(_dev, _skb, _reason, _hash) do {} while (0) #define mtk_wed_device_update_msg(_dev, _id, _msg, _len) -ENODEV #define mtk_wed_device_stop(_dev) do {} while (0) diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index fb2e88614f5d..83ca2e8eb6b5 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -271,5 +271,6 @@ struct plat_stmmacenet_data { int msi_tx_base_vec; bool use_phy_wol; bool sph_disable; + bool serdes_up_after_phy_linkup; }; #endif diff --git a/include/net/act_api.h b/include/net/act_api.h index c94ea1a306e0..2a6f443f0ef6 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -101,11 +101,6 @@ static inline enum flow_action_hw_stats tc_act_hw_stats(u8 hw_stats) return hw_stats; } -#ifdef CONFIG_NET_CLS_ACT - -#define ACT_P_CREATED 1 -#define ACT_P_DELETED 1 - typedef void (*tc_action_priv_destructor)(void *priv); struct tc_action_ops { @@ -140,6 +135,11 @@ struct tc_action_ops { struct netlink_ext_ack *extack); }; +#ifdef CONFIG_NET_CLS_ACT + +#define ACT_P_CREATED 1 +#define ACT_P_DELETED 1 + struct tc_action_net { struct tcf_idrinfo *idrinfo; const struct tc_action_ops *ops; diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index b69ca695935c..d5a5ae926380 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -66,10 +66,10 @@ int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t, void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64); bool rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *); u32 rxrpc_kernel_get_epoch(struct socket *, struct rxrpc_call *); -bool rxrpc_kernel_call_is_complete(struct rxrpc_call *); void rxrpc_kernel_set_max_life(struct socket *, struct rxrpc_call *, unsigned long); int rxrpc_sock_set_min_security_level(struct sock *sk, unsigned int val); +int rxrpc_sock_set_security_keyring(struct sock *, struct key *); #endif /* _NET_RXRPC_H */ diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index e004ba04a9ae..684f1cd28730 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -228,6 +228,17 @@ enum { */ HCI_QUIRK_VALID_LE_STATES, + /* When this quirk is set, then erroneous data reporting + * is ignored. This is mainly due to the fact that the HCI + * Read Default Erroneous Data Reporting command is advertised, + * but not supported; these controllers often reply with unknown + * command and tend to lock up randomly. Needing a hard reset. + * + * This quirk can be set before hci_register_dev is called or + * during the hdev->setup vendor callback. + */ + HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, + /* * When this quirk is set, then the hci_suspend_notifier is not * registered. This is intended for devices which drop completely @@ -1424,7 +1435,6 @@ struct hci_std_codecs_v2 { } __packed; struct hci_vnd_codec_v2 { - __u8 id; __le16 cid; __le16 vid; __u8 transport; diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index e1481f9cf049..d09c393d229f 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -260,6 +260,24 @@ struct ieee802154_addr { }; }; +/** + * struct ieee802154_coord_desc - Coordinator descriptor + * @addr: PAN ID and coordinator address + * @page: page this coordinator is using + * @channel: channel this coordinator is using + * @superframe_spec: SuperFrame specification as received + * @link_quality: link quality indicator at which the beacon was received + * @gts_permit: the coordinator accepts GTS requests + */ +struct ieee802154_coord_desc { + struct ieee802154_addr addr; + u8 page; + u8 channel; + u16 superframe_spec; + u8 link_quality; + bool gts_permit; +}; + struct ieee802154_llsec_key_id { u8 mode; u8 id; diff --git a/include/net/devlink.h b/include/net/devlink.h index 5f6eca5e4a40..0f376a28b9c4 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -1452,6 +1452,45 @@ struct devlink_ops { const u8 *hw_addr, int hw_addr_len, struct netlink_ext_ack *extack); /** + * @port_fn_roce_get: Port function's roce get function. + * + * Query RoCE state of a function managed by the devlink port. + * Return -EOPNOTSUPP if port function RoCE handling is not supported. + */ + int (*port_fn_roce_get)(struct devlink_port *devlink_port, + bool *is_enable, + struct netlink_ext_ack *extack); + /** + * @port_fn_roce_set: Port function's roce set function. + * + * Enable/Disable the RoCE state of a function managed by the devlink + * port. + * Return -EOPNOTSUPP if port function RoCE handling is not supported. + */ + int (*port_fn_roce_set)(struct devlink_port *devlink_port, + bool enable, struct netlink_ext_ack *extack); + /** + * @port_fn_migratable_get: Port function's migratable get function. + * + * Query migratable state of a function managed by the devlink port. + * Return -EOPNOTSUPP if port function migratable handling is not + * supported. + */ + int (*port_fn_migratable_get)(struct devlink_port *devlink_port, + bool *is_enable, + struct netlink_ext_ack *extack); + /** + * @port_fn_migratable_set: Port function's migratable set function. + * + * Enable/Disable migratable state of a function managed by the devlink + * port. + * Return -EOPNOTSUPP if port function migratable handling is not + * supported. + */ + int (*port_fn_migratable_set)(struct devlink_port *devlink_port, + bool enable, + struct netlink_ext_ack *extack); + /** * port_new() - Add a new port function of a specified flavor * @devlink: Devlink instance * @attrs: attributes of the new port diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index 28d0687bf7da..d80c78506f19 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -522,7 +522,14 @@ enum { #define GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT BIT(0) -#define GDMA_DRV_CAP_FLAGS1 GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT +/* Advertise to the NIC firmware: the NAPI work_done variable race is fixed, + * so the driver is able to reliably support features like busy_poll. + */ +#define GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX BIT(2) + +#define GDMA_DRV_CAP_FLAGS1 \ + (GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \ + GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX) #define GDMA_DRV_CAP_FLAGS2 0 diff --git a/include/net/nl802154.h b/include/net/nl802154.h index f5850b569c52..b79a89d5207c 100644 --- a/include/net/nl802154.h +++ b/include/net/nl802154.h @@ -72,6 +72,8 @@ enum nl802154_commands { NL802154_CMD_NEW_SEC_LEVEL, NL802154_CMD_DEL_SEC_LEVEL, + NL802154_CMD_SCAN_EVENT, + /* add new commands above here */ /* used to define NL802154_CMD_MAX below */ @@ -131,6 +133,8 @@ enum nl802154_attrs { NL802154_ATTR_PID, NL802154_ATTR_NETNS_FD, + NL802154_ATTR_COORDINATOR, + /* add attributes here, update the policy in nl802154.c */ #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL @@ -217,6 +221,45 @@ enum nl802154_wpan_phy_capability_attr { }; /** + * enum nl802154_coord - Netlink attributes for a coord + * + * @__NL802154_COORD_INVALID: invalid + * @NL802154_COORD_PANID: PANID of the coordinator (2 bytes) + * @NL802154_COORD_ADDR: coordinator address, (8 bytes or 2 bytes) + * @NL802154_COORD_CHANNEL: channel number, related to @NL802154_COORD_PAGE (u8) + * @NL802154_COORD_PAGE: channel page, related to @NL802154_COORD_CHANNEL (u8) + * @NL802154_COORD_PREAMBLE_CODE: Preamble code used when the beacon was received, + * this is PHY dependent and optional (u8) + * @NL802154_COORD_MEAN_PRF: Mean PRF used when the beacon was received, + * this is PHY dependent and optional (u8) + * @NL802154_COORD_SUPERFRAME_SPEC: superframe specification of the PAN (u16) + * @NL802154_COORD_LINK_QUALITY: signal quality of beacon in unspecified units, + * scaled to 0..255 (u8) + * @NL802154_COORD_GTS_PERMIT: set to true if GTS is permitted on this PAN + * @NL802154_COORD_PAYLOAD_DATA: binary data containing the raw data from the + * frame payload, (only if beacon or probe response had data) + * @NL802154_COORD_PAD: attribute used for padding for 64-bit alignment + * @NL802154_COORD_MAX: highest coordinator attribute + */ +enum nl802154_coord { + __NL802154_COORD_INVALID, + NL802154_COORD_PANID, + NL802154_COORD_ADDR, + NL802154_COORD_CHANNEL, + NL802154_COORD_PAGE, + NL802154_COORD_PREAMBLE_CODE, + NL802154_COORD_MEAN_PRF, + NL802154_COORD_SUPERFRAME_SPEC, + NL802154_COORD_LINK_QUALITY, + NL802154_COORD_GTS_PERMIT, + NL802154_COORD_PAYLOAD_DATA, + NL802154_COORD_PAD, + + /* keep last */ + NL802154_COORD_MAX, +}; + +/** * enum nl802154_cca_modes - cca modes * * @__NL802154_CCA_INVALID: cca mode number 0 is reserved diff --git a/include/net/ping.h b/include/net/ping.h index e4ff3911cbf5..9233ad3de0ad 100644 --- a/include/net/ping.h +++ b/include/net/ping.h @@ -16,9 +16,6 @@ #define PING_HTABLE_SIZE 64 #define PING_HTABLE_MASK (PING_HTABLE_SIZE-1) -#define ping_portaddr_for_each_entry(__sk, node, list) \ - hlist_nulls_for_each_entry(__sk, node, list, sk_nulls_node) - /* * gid_t is either uint or ushort. We want to pass it to * proc_dointvec_minmax(), so it must not be larger than MAX_INT diff --git a/include/net/sock.h b/include/net/sock.h index 6d207e7c4ad0..ecea3dcc2217 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -503,10 +503,10 @@ struct sock { #if BITS_PER_LONG==32 seqlock_t sk_stamp_seq; #endif - u16 sk_tsflags; - u8 sk_shutdown; atomic_t sk_tskey; atomic_t sk_zckey; + u32 sk_tsflags; + u8 sk_shutdown; u8 sk_clockid; u8 sk_txtime_deadline_mode : 1, @@ -1899,7 +1899,7 @@ static inline void sock_replace_proto(struct sock *sk, struct proto *proto) struct sockcm_cookie { u64 transmit_time; u32 mark; - u16 tsflags; + u32 tsflags; }; static inline void sockcm_init(struct sockcm_cookie *sockc, diff --git a/include/net/tc_wrapper.h b/include/net/tc_wrapper.h new file mode 100644 index 000000000000..ceed2fc089ff --- /dev/null +++ b/include/net/tc_wrapper.h @@ -0,0 +1,251 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __NET_TC_WRAPPER_H +#define __NET_TC_WRAPPER_H + +#include <net/pkt_cls.h> + +#if IS_ENABLED(CONFIG_RETPOLINE) + +#include <linux/cpufeature.h> +#include <linux/static_key.h> +#include <linux/indirect_call_wrapper.h> + +#define TC_INDIRECT_SCOPE + +extern struct static_key_false tc_skip_wrapper; + +/* TC Actions */ +#ifdef CONFIG_NET_CLS_ACT + +#define TC_INDIRECT_ACTION_DECLARE(fname) \ + INDIRECT_CALLABLE_DECLARE(int fname(struct sk_buff *skb, \ + const struct tc_action *a, \ + struct tcf_result *res)) + +TC_INDIRECT_ACTION_DECLARE(tcf_bpf_act); +TC_INDIRECT_ACTION_DECLARE(tcf_connmark_act); +TC_INDIRECT_ACTION_DECLARE(tcf_csum_act); +TC_INDIRECT_ACTION_DECLARE(tcf_ct_act); +TC_INDIRECT_ACTION_DECLARE(tcf_ctinfo_act); +TC_INDIRECT_ACTION_DECLARE(tcf_gact_act); +TC_INDIRECT_ACTION_DECLARE(tcf_gate_act); +TC_INDIRECT_ACTION_DECLARE(tcf_ife_act); +TC_INDIRECT_ACTION_DECLARE(tcf_ipt_act); +TC_INDIRECT_ACTION_DECLARE(tcf_mirred_act); +TC_INDIRECT_ACTION_DECLARE(tcf_mpls_act); +TC_INDIRECT_ACTION_DECLARE(tcf_nat_act); +TC_INDIRECT_ACTION_DECLARE(tcf_pedit_act); +TC_INDIRECT_ACTION_DECLARE(tcf_police_act); +TC_INDIRECT_ACTION_DECLARE(tcf_sample_act); +TC_INDIRECT_ACTION_DECLARE(tcf_simp_act); +TC_INDIRECT_ACTION_DECLARE(tcf_skbedit_act); +TC_INDIRECT_ACTION_DECLARE(tcf_skbmod_act); +TC_INDIRECT_ACTION_DECLARE(tcf_vlan_act); +TC_INDIRECT_ACTION_DECLARE(tunnel_key_act); + +static inline int tc_act(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) +{ + if (static_branch_likely(&tc_skip_wrapper)) + goto skip; + +#if IS_BUILTIN(CONFIG_NET_ACT_GACT) + if (a->ops->act == tcf_gact_act) + return tcf_gact_act(skb, a, res); +#endif +#if IS_BUILTIN(CONFIG_NET_ACT_MIRRED) + if (a->ops->act == tcf_mirred_act) + return tcf_mirred_act(skb, a, res); +#endif +#if IS_BUILTIN(CONFIG_NET_ACT_PEDIT) + if (a->ops->act == tcf_pedit_act) + return tcf_pedit_act(skb, a, res); +#endif +#if IS_BUILTIN(CONFIG_NET_ACT_SKBEDIT) + if (a->ops->act == tcf_skbedit_act) + return tcf_skbedit_act(skb, a, res); +#endif +#if IS_BUILTIN(CONFIG_NET_ACT_SKBMOD) + if (a->ops->act == tcf_skbmod_act) + return tcf_skbmod_act(skb, a, res); +#endif +#if IS_BUILTIN(CONFIG_NET_ACT_POLICE) + if (a->ops->act == tcf_police_act) + return tcf_police_act(skb, a, res); +#endif +#if IS_BUILTIN(CONFIG_NET_ACT_BPF) + if (a->ops->act == tcf_bpf_act) + return tcf_bpf_act(skb, a, res); +#endif +#if IS_BUILTIN(CONFIG_NET_ACT_CONNMARK) + if (a->ops->act == tcf_connmark_act) + return tcf_connmark_act(skb, a, res); +#endif +#if IS_BUILTIN(CONFIG_NET_ACT_CSUM) + if (a->ops->act == tcf_csum_act) + return tcf_csum_act(skb, a, res); +#endif +#if IS_BUILTIN(CONFIG_NET_ACT_CT) + if (a->ops->act == tcf_ct_act) + return tcf_ct_act(skb, a, res); +#endif +#if IS_BUILTIN(CONFIG_NET_ACT_CTINFO) + if (a->ops->act == tcf_ctinfo_act) + return tcf_ctinfo_act(skb, a, res); +#endif +#if IS_BUILTIN(CONFIG_NET_ACT_GATE) + if (a->ops->act == tcf_gate_act) + return tcf_gate_act(skb, a, res); +#endif +#if IS_BUILTIN(CONFIG_NET_ACT_MPLS) + if (a->ops->act == tcf_mpls_act) + return tcf_mpls_act(skb, a, res); +#endif +#if IS_BUILTIN(CONFIG_NET_ACT_NAT) + if (a->ops->act == tcf_nat_act) + return tcf_nat_act(skb, a, res); +#endif +#if IS_BUILTIN(CONFIG_NET_ACT_TUNNEL_KEY) + if (a->ops->act == tunnel_key_act) + return tunnel_key_act(skb, a, res); +#endif +#if IS_BUILTIN(CONFIG_NET_ACT_VLAN) + if (a->ops->act == tcf_vlan_act) + return tcf_vlan_act(skb, a, res); +#endif +#if IS_BUILTIN(CONFIG_NET_ACT_IFE) + if (a->ops->act == tcf_ife_act) + return tcf_ife_act(skb, a, res); +#endif +#if IS_BUILTIN(CONFIG_NET_ACT_IPT) + if (a->ops->act == tcf_ipt_act) + return tcf_ipt_act(skb, a, res); +#endif +#if IS_BUILTIN(CONFIG_NET_ACT_SIMP) + if (a->ops->act == tcf_simp_act) + return tcf_simp_act(skb, a, res); +#endif +#if IS_BUILTIN(CONFIG_NET_ACT_SAMPLE) + if (a->ops->act == tcf_sample_act) + return tcf_sample_act(skb, a, res); +#endif + +skip: + return a->ops->act(skb, a, res); +} + +#endif /* CONFIG_NET_CLS_ACT */ + +/* TC Filters */ +#ifdef CONFIG_NET_CLS + +#define TC_INDIRECT_FILTER_DECLARE(fname) \ + INDIRECT_CALLABLE_DECLARE(int fname(struct sk_buff *skb, \ + const struct tcf_proto *tp, \ + struct tcf_result *res)) + +TC_INDIRECT_FILTER_DECLARE(basic_classify); +TC_INDIRECT_FILTER_DECLARE(cls_bpf_classify); +TC_INDIRECT_FILTER_DECLARE(cls_cgroup_classify); +TC_INDIRECT_FILTER_DECLARE(fl_classify); +TC_INDIRECT_FILTER_DECLARE(flow_classify); +TC_INDIRECT_FILTER_DECLARE(fw_classify); +TC_INDIRECT_FILTER_DECLARE(mall_classify); +TC_INDIRECT_FILTER_DECLARE(route4_classify); +TC_INDIRECT_FILTER_DECLARE(rsvp_classify); +TC_INDIRECT_FILTER_DECLARE(rsvp6_classify); +TC_INDIRECT_FILTER_DECLARE(tcindex_classify); +TC_INDIRECT_FILTER_DECLARE(u32_classify); + +static inline int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp, + struct tcf_result *res) +{ + if (static_branch_likely(&tc_skip_wrapper)) + goto skip; + +#if IS_BUILTIN(CONFIG_NET_CLS_BPF) + if (tp->classify == cls_bpf_classify) + return cls_bpf_classify(skb, tp, res); +#endif +#if IS_BUILTIN(CONFIG_NET_CLS_U32) + if (tp->classify == u32_classify) + return u32_classify(skb, tp, res); +#endif +#if IS_BUILTIN(CONFIG_NET_CLS_FLOWER) + if (tp->classify == fl_classify) + return fl_classify(skb, tp, res); +#endif +#if IS_BUILTIN(CONFIG_NET_CLS_FW) + if (tp->classify == fw_classify) + return fw_classify(skb, tp, res); +#endif +#if IS_BUILTIN(CONFIG_NET_CLS_MATCHALL) + if (tp->classify == mall_classify) + return mall_classify(skb, tp, res); +#endif +#if IS_BUILTIN(CONFIG_NET_CLS_BASIC) + if (tp->classify == basic_classify) + return basic_classify(skb, tp, res); +#endif +#if IS_BUILTIN(CONFIG_NET_CLS_CGROUP) + if (tp->classify == cls_cgroup_classify) + return cls_cgroup_classify(skb, tp, res); +#endif +#if IS_BUILTIN(CONFIG_NET_CLS_FLOW) + if (tp->classify == flow_classify) + return flow_classify(skb, tp, res); +#endif +#if IS_BUILTIN(CONFIG_NET_CLS_ROUTE4) + if (tp->classify == route4_classify) + return route4_classify(skb, tp, res); +#endif +#if IS_BUILTIN(CONFIG_NET_CLS_RSVP) + if (tp->classify == rsvp_classify) + return rsvp_classify(skb, tp, res); +#endif +#if IS_BUILTIN(CONFIG_NET_CLS_RSVP6) + if (tp->classify == rsvp6_classify) + return rsvp6_classify(skb, tp, res); +#endif +#if IS_BUILTIN(CONFIG_NET_CLS_TCINDEX) + if (tp->classify == tcindex_classify) + return tcindex_classify(skb, tp, res); +#endif + +skip: + return tp->classify(skb, tp, res); +} + +static inline void tc_wrapper_init(void) +{ +#ifdef CONFIG_X86 + if (!cpu_feature_enabled(X86_FEATURE_RETPOLINE)) + static_branch_enable(&tc_skip_wrapper); +#endif +} + +#endif /* CONFIG_NET_CLS */ + +#else + +#define TC_INDIRECT_SCOPE static + +static inline int tc_act(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) +{ + return a->ops->act(skb, a, res); +} + +static inline int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp, + struct tcf_result *res) +{ + return tp->classify(skb, tp, res); +} + +static inline void tc_wrapper_init(void) +{ +} + +#endif + +#endif /* __NET_TC_WRAPPER_H */ diff --git a/include/trace/events/fscache.h b/include/trace/events/fscache.h index c078c48a8e6d..a6190aa1b406 100644 --- a/include/trace/events/fscache.h +++ b/include/trace/events/fscache.h @@ -66,6 +66,7 @@ enum fscache_cookie_trace { fscache_cookie_put_work, fscache_cookie_see_active, fscache_cookie_see_lru_discard, + fscache_cookie_see_lru_discard_clear, fscache_cookie_see_lru_do_one, fscache_cookie_see_relinquish, fscache_cookie_see_withdraw, @@ -149,6 +150,7 @@ enum fscache_access_trace { EM(fscache_cookie_put_work, "PQ work ") \ EM(fscache_cookie_see_active, "- activ") \ EM(fscache_cookie_see_lru_discard, "- x-lru") \ + EM(fscache_cookie_see_lru_discard_clear,"- lrudc") \ EM(fscache_cookie_see_lru_do_one, "- lrudo") \ EM(fscache_cookie_see_relinquish, "- x-rlq") \ EM(fscache_cookie_see_withdraw, "- x-wth") \ diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index b9886d1df825..049b52e7aa6a 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -16,44 +16,121 @@ /* * Declare tracing information enums and their string mappings for display. */ +#define rxrpc_call_poke_traces \ + EM(rxrpc_call_poke_error, "Error") \ + EM(rxrpc_call_poke_idle, "Idle") \ + EM(rxrpc_call_poke_start, "Start") \ + EM(rxrpc_call_poke_timer, "Timer") \ + E_(rxrpc_call_poke_timer_now, "Timer-now") + #define rxrpc_skb_traces \ - EM(rxrpc_skb_ack, "ACK") \ - EM(rxrpc_skb_cleaned, "CLN") \ - EM(rxrpc_skb_cloned_jumbo, "CLJ") \ - EM(rxrpc_skb_freed, "FRE") \ - EM(rxrpc_skb_got, "GOT") \ - EM(rxrpc_skb_lost, "*L*") \ - EM(rxrpc_skb_new, "NEW") \ - EM(rxrpc_skb_purged, "PUR") \ - EM(rxrpc_skb_received, "RCV") \ - EM(rxrpc_skb_rotated, "ROT") \ - EM(rxrpc_skb_seen, "SEE") \ - EM(rxrpc_skb_unshared, "UNS") \ - E_(rxrpc_skb_unshared_nomem, "US0") + EM(rxrpc_skb_eaten_by_unshare, "ETN unshare ") \ + EM(rxrpc_skb_eaten_by_unshare_nomem, "ETN unshar-nm") \ + EM(rxrpc_skb_get_conn_work, "GET conn-work") \ + EM(rxrpc_skb_get_local_work, "GET locl-work") \ + EM(rxrpc_skb_get_reject_work, "GET rej-work ") \ + EM(rxrpc_skb_get_to_recvmsg, "GET to-recv ") \ + EM(rxrpc_skb_get_to_recvmsg_oos, "GET to-recv-o") \ + EM(rxrpc_skb_new_encap_rcv, "NEW encap-rcv") \ + EM(rxrpc_skb_new_error_report, "NEW error-rpt") \ + EM(rxrpc_skb_new_jumbo_subpacket, "NEW jumbo-sub") \ + EM(rxrpc_skb_new_unshared, "NEW unshared ") \ + EM(rxrpc_skb_put_conn_work, "PUT conn-work") \ + EM(rxrpc_skb_put_error_report, "PUT error-rep") \ + EM(rxrpc_skb_put_input, "PUT input ") \ + EM(rxrpc_skb_put_jumbo_subpacket, "PUT jumbo-sub") \ + EM(rxrpc_skb_put_purge, "PUT purge ") \ + EM(rxrpc_skb_put_rotate, "PUT rotate ") \ + EM(rxrpc_skb_put_unknown, "PUT unknown ") \ + EM(rxrpc_skb_see_conn_work, "SEE conn-work") \ + EM(rxrpc_skb_see_recvmsg, "SEE recvmsg ") \ + EM(rxrpc_skb_see_reject, "SEE reject ") \ + EM(rxrpc_skb_see_rotate, "SEE rotate ") \ + E_(rxrpc_skb_see_version, "SEE version ") #define rxrpc_local_traces \ - EM(rxrpc_local_got, "GOT") \ - EM(rxrpc_local_new, "NEW") \ - EM(rxrpc_local_processing, "PRO") \ - EM(rxrpc_local_put, "PUT") \ - EM(rxrpc_local_queued, "QUE") \ - E_(rxrpc_local_tx_ack, "TAK") + EM(rxrpc_local_free, "FREE ") \ + EM(rxrpc_local_get_call, "GET call ") \ + EM(rxrpc_local_get_client_conn, "GET conn-cln") \ + EM(rxrpc_local_get_for_use, "GET for-use ") \ + EM(rxrpc_local_get_peer, "GET peer ") \ + EM(rxrpc_local_get_prealloc_conn, "GET conn-pre") \ + EM(rxrpc_local_new, "NEW ") \ + EM(rxrpc_local_put_bind, "PUT bind ") \ + EM(rxrpc_local_put_call, "PUT call ") \ + EM(rxrpc_local_put_for_use, "PUT for-use ") \ + EM(rxrpc_local_put_kill_conn, "PUT conn-kil") \ + EM(rxrpc_local_put_peer, "PUT peer ") \ + EM(rxrpc_local_put_prealloc_conn, "PUT conn-pre") \ + EM(rxrpc_local_put_release_sock, "PUT rel-sock") \ + EM(rxrpc_local_stop, "STOP ") \ + EM(rxrpc_local_stopped, "STOPPED ") \ + EM(rxrpc_local_unuse_bind, "UNU bind ") \ + EM(rxrpc_local_unuse_conn_work, "UNU conn-wrk") \ + EM(rxrpc_local_unuse_peer_keepalive, "UNU peer-kpa") \ + EM(rxrpc_local_unuse_release_sock, "UNU rel-sock") \ + EM(rxrpc_local_use_conn_work, "USE conn-wrk") \ + EM(rxrpc_local_use_lookup, "USE lookup ") \ + E_(rxrpc_local_use_peer_keepalive, "USE peer-kpa") #define rxrpc_peer_traces \ - EM(rxrpc_peer_got, "GOT") \ - EM(rxrpc_peer_new, "NEW") \ - EM(rxrpc_peer_processing, "PRO") \ - E_(rxrpc_peer_put, "PUT") + EM(rxrpc_peer_free, "FREE ") \ + EM(rxrpc_peer_get_accept, "GET accept ") \ + EM(rxrpc_peer_get_activate_call, "GET act-call") \ + EM(rxrpc_peer_get_bundle, "GET bundle ") \ + EM(rxrpc_peer_get_client_conn, "GET cln-conn") \ + EM(rxrpc_peer_get_input, "GET input ") \ + EM(rxrpc_peer_get_input_error, "GET inpt-err") \ + EM(rxrpc_peer_get_keepalive, "GET keepaliv") \ + EM(rxrpc_peer_get_lookup_client, "GET look-cln") \ + EM(rxrpc_peer_get_service_conn, "GET srv-conn") \ + EM(rxrpc_peer_new_client, "NEW client ") \ + EM(rxrpc_peer_new_prealloc, "NEW prealloc") \ + EM(rxrpc_peer_put_bundle, "PUT bundle ") \ + EM(rxrpc_peer_put_call, "PUT call ") \ + EM(rxrpc_peer_put_conn, "PUT conn ") \ + EM(rxrpc_peer_put_discard_tmp, "PUT disc-tmp") \ + EM(rxrpc_peer_put_input, "PUT input ") \ + EM(rxrpc_peer_put_input_error, "PUT inpt-err") \ + E_(rxrpc_peer_put_keepalive, "PUT keepaliv") + +#define rxrpc_bundle_traces \ + EM(rxrpc_bundle_free, "FREE ") \ + EM(rxrpc_bundle_get_client_call, "GET clt-call") \ + EM(rxrpc_bundle_get_client_conn, "GET clt-conn") \ + EM(rxrpc_bundle_get_service_conn, "GET svc-conn") \ + EM(rxrpc_bundle_put_conn, "PUT conn ") \ + EM(rxrpc_bundle_put_discard, "PUT discard ") \ + E_(rxrpc_bundle_new, "NEW ") #define rxrpc_conn_traces \ - EM(rxrpc_conn_got, "GOT") \ - EM(rxrpc_conn_new_client, "NWc") \ - EM(rxrpc_conn_new_service, "NWs") \ - EM(rxrpc_conn_put_client, "PTc") \ - EM(rxrpc_conn_put_service, "PTs") \ - EM(rxrpc_conn_queued, "QUE") \ - EM(rxrpc_conn_reap_service, "RPs") \ - E_(rxrpc_conn_seen, "SEE") + EM(rxrpc_conn_free, "FREE ") \ + EM(rxrpc_conn_get_activate_call, "GET act-call") \ + EM(rxrpc_conn_get_call_input, "GET inp-call") \ + EM(rxrpc_conn_get_conn_input, "GET inp-conn") \ + EM(rxrpc_conn_get_idle, "GET idle ") \ + EM(rxrpc_conn_get_poke, "GET poke ") \ + EM(rxrpc_conn_get_service_conn, "GET svc-conn") \ + EM(rxrpc_conn_new_client, "NEW client ") \ + EM(rxrpc_conn_new_service, "NEW service ") \ + EM(rxrpc_conn_put_call, "PUT call ") \ + EM(rxrpc_conn_put_call_input, "PUT inp-call") \ + EM(rxrpc_conn_put_conn_input, "PUT inp-conn") \ + EM(rxrpc_conn_put_discard, "PUT discard ") \ + EM(rxrpc_conn_put_discard_idle, "PUT disc-idl") \ + EM(rxrpc_conn_put_local_dead, "PUT loc-dead") \ + EM(rxrpc_conn_put_noreuse, "PUT noreuse ") \ + EM(rxrpc_conn_put_poke, "PUT poke ") \ + EM(rxrpc_conn_put_service_reaped, "PUT svc-reap") \ + EM(rxrpc_conn_put_unbundle, "PUT unbundle") \ + EM(rxrpc_conn_put_unidle, "PUT unidle ") \ + EM(rxrpc_conn_queue_challenge, "QUE chall ") \ + EM(rxrpc_conn_queue_retry_work, "QUE retry-wk") \ + EM(rxrpc_conn_queue_rx_work, "QUE rx-work ") \ + EM(rxrpc_conn_queue_timer, "QUE timer ") \ + EM(rxrpc_conn_see_new_service_conn, "SEE new-svc ") \ + EM(rxrpc_conn_see_reap_service, "SEE reap-svc") \ + E_(rxrpc_conn_see_work, "SEE work ") #define rxrpc_client_traces \ EM(rxrpc_client_activate_chans, "Activa") \ @@ -71,26 +148,36 @@ E_(rxrpc_client_to_idle, "->Idle") #define rxrpc_call_traces \ - EM(rxrpc_call_connected, "CON") \ - EM(rxrpc_call_error, "*E*") \ - EM(rxrpc_call_got, "GOT") \ - EM(rxrpc_call_got_kernel, "Gke") \ - EM(rxrpc_call_got_timer, "GTM") \ - EM(rxrpc_call_got_tx, "Gtx") \ - EM(rxrpc_call_got_userid, "Gus") \ - EM(rxrpc_call_new_client, "NWc") \ - EM(rxrpc_call_new_service, "NWs") \ - EM(rxrpc_call_put, "PUT") \ - EM(rxrpc_call_put_kernel, "Pke") \ - EM(rxrpc_call_put_noqueue, "PnQ") \ - EM(rxrpc_call_put_notimer, "PnT") \ - EM(rxrpc_call_put_timer, "PTM") \ - EM(rxrpc_call_put_tx, "Ptx") \ - EM(rxrpc_call_put_userid, "Pus") \ - EM(rxrpc_call_queued, "QUE") \ - EM(rxrpc_call_queued_ref, "QUR") \ - EM(rxrpc_call_release, "RLS") \ - E_(rxrpc_call_seen, "SEE") + EM(rxrpc_call_get_input, "GET input ") \ + EM(rxrpc_call_get_kernel_service, "GET krnl-srv") \ + EM(rxrpc_call_get_notify_socket, "GET notify ") \ + EM(rxrpc_call_get_poke, "GET poke ") \ + EM(rxrpc_call_get_recvmsg, "GET recvmsg ") \ + EM(rxrpc_call_get_release_sock, "GET rel-sock") \ + EM(rxrpc_call_get_sendmsg, "GET sendmsg ") \ + EM(rxrpc_call_get_userid, "GET user-id ") \ + EM(rxrpc_call_new_client, "NEW client ") \ + EM(rxrpc_call_new_prealloc_service, "NEW prealloc") \ + EM(rxrpc_call_put_discard_prealloc, "PUT disc-pre") \ + EM(rxrpc_call_put_discard_error, "PUT disc-err") \ + EM(rxrpc_call_put_input, "PUT input ") \ + EM(rxrpc_call_put_kernel, "PUT kernel ") \ + EM(rxrpc_call_put_poke, "PUT poke ") \ + EM(rxrpc_call_put_recvmsg, "PUT recvmsg ") \ + EM(rxrpc_call_put_release_sock, "PUT rls-sock") \ + EM(rxrpc_call_put_release_sock_tba, "PUT rls-sk-a") \ + EM(rxrpc_call_put_sendmsg, "PUT sendmsg ") \ + EM(rxrpc_call_put_unnotify, "PUT unnotify") \ + EM(rxrpc_call_put_userid_exists, "PUT u-exists") \ + EM(rxrpc_call_see_accept, "SEE accept ") \ + EM(rxrpc_call_see_activate_client, "SEE act-clnt") \ + EM(rxrpc_call_see_connect_failed, "SEE con-fail") \ + EM(rxrpc_call_see_connected, "SEE connect ") \ + EM(rxrpc_call_see_distribute_error, "SEE dist-err") \ + EM(rxrpc_call_see_input, "SEE input ") \ + EM(rxrpc_call_see_release, "SEE release ") \ + EM(rxrpc_call_see_userid_exists, "SEE u-exists") \ + E_(rxrpc_call_see_zap, "SEE zap ") #define rxrpc_txqueue_traces \ EM(rxrpc_txqueue_await_reply, "AWR") \ @@ -179,6 +266,7 @@ EM(rxrpc_propose_ack_respond_to_ping, "Rsp2Png") \ EM(rxrpc_propose_ack_retry_tx, "RetryTx") \ EM(rxrpc_propose_ack_rotate_rx, "RxAck ") \ + EM(rxrpc_propose_ack_rx_idle, "RxIdle ") \ E_(rxrpc_propose_ack_terminal_ack, "ClTerm ") #define rxrpc_congest_modes \ @@ -273,6 +361,7 @@ EM(rxrpc_txbuf_put_rotated, "PUT ROTATED") \ EM(rxrpc_txbuf_put_send_aborted, "PUT SEND-X ") \ EM(rxrpc_txbuf_put_trans, "PUT TRANS ") \ + EM(rxrpc_txbuf_see_out_of_step, "OUT-OF-STEP") \ EM(rxrpc_txbuf_see_send_more, "SEE SEND+ ") \ E_(rxrpc_txbuf_see_unacked, "SEE UNACKED") @@ -287,6 +376,8 @@ #define EM(a, b) a, #define E_(a, b) a +enum rxrpc_bundle_trace { rxrpc_bundle_traces } __mode(byte); +enum rxrpc_call_poke_trace { rxrpc_call_poke_traces } __mode(byte); enum rxrpc_call_trace { rxrpc_call_traces } __mode(byte); enum rxrpc_client_trace { rxrpc_client_traces } __mode(byte); enum rxrpc_congest_change { rxrpc_congest_changes } __mode(byte); @@ -316,6 +407,8 @@ enum rxrpc_txqueue_trace { rxrpc_txqueue_traces } __mode(byte); #define EM(a, b) TRACE_DEFINE_ENUM(a); #define E_(a, b) TRACE_DEFINE_ENUM(a); +rxrpc_bundle_traces; +rxrpc_call_poke_traces; rxrpc_call_traces; rxrpc_client_traces; rxrpc_congest_changes; @@ -345,83 +438,98 @@ rxrpc_txqueue_traces; TRACE_EVENT(rxrpc_local, TP_PROTO(unsigned int local_debug_id, enum rxrpc_local_trace op, - int usage, const void *where), + int ref, int usage), - TP_ARGS(local_debug_id, op, usage, where), + TP_ARGS(local_debug_id, op, ref, usage), TP_STRUCT__entry( __field(unsigned int, local ) __field(int, op ) + __field(int, ref ) __field(int, usage ) - __field(const void *, where ) ), TP_fast_assign( __entry->local = local_debug_id; __entry->op = op; + __entry->ref = ref; __entry->usage = usage; - __entry->where = where; ), - TP_printk("L=%08x %s u=%d sp=%pSR", + TP_printk("L=%08x %s r=%d u=%d", __entry->local, __print_symbolic(__entry->op, rxrpc_local_traces), - __entry->usage, - __entry->where) + __entry->ref, + __entry->usage) ); TRACE_EVENT(rxrpc_peer, - TP_PROTO(unsigned int peer_debug_id, enum rxrpc_peer_trace op, - int usage, const void *where), + TP_PROTO(unsigned int peer_debug_id, int ref, enum rxrpc_peer_trace why), - TP_ARGS(peer_debug_id, op, usage, where), + TP_ARGS(peer_debug_id, ref, why), TP_STRUCT__entry( __field(unsigned int, peer ) - __field(int, op ) - __field(int, usage ) - __field(const void *, where ) + __field(int, ref ) + __field(int, why ) ), TP_fast_assign( __entry->peer = peer_debug_id; - __entry->op = op; - __entry->usage = usage; - __entry->where = where; + __entry->ref = ref; + __entry->why = why; ), - TP_printk("P=%08x %s u=%d sp=%pSR", + TP_printk("P=%08x %s r=%d", __entry->peer, - __print_symbolic(__entry->op, rxrpc_peer_traces), - __entry->usage, - __entry->where) + __print_symbolic(__entry->why, rxrpc_peer_traces), + __entry->ref) + ); + +TRACE_EVENT(rxrpc_bundle, + TP_PROTO(unsigned int bundle_debug_id, int ref, enum rxrpc_bundle_trace why), + + TP_ARGS(bundle_debug_id, ref, why), + + TP_STRUCT__entry( + __field(unsigned int, bundle ) + __field(int, ref ) + __field(int, why ) + ), + + TP_fast_assign( + __entry->bundle = bundle_debug_id; + __entry->ref = ref; + __entry->why = why; + ), + + TP_printk("CB=%08x %s r=%d", + __entry->bundle, + __print_symbolic(__entry->why, rxrpc_bundle_traces), + __entry->ref) ); TRACE_EVENT(rxrpc_conn, - TP_PROTO(unsigned int conn_debug_id, enum rxrpc_conn_trace op, - int usage, const void *where), + TP_PROTO(unsigned int conn_debug_id, int ref, enum rxrpc_conn_trace why), - TP_ARGS(conn_debug_id, op, usage, where), + TP_ARGS(conn_debug_id, ref, why), TP_STRUCT__entry( __field(unsigned int, conn ) - __field(int, op ) - __field(int, usage ) - __field(const void *, where ) + __field(int, ref ) + __field(int, why ) ), TP_fast_assign( __entry->conn = conn_debug_id; - __entry->op = op; - __entry->usage = usage; - __entry->where = where; + __entry->ref = ref; + __entry->why = why; ), - TP_printk("C=%08x %s u=%d sp=%pSR", + TP_printk("C=%08x %s r=%d", __entry->conn, - __print_symbolic(__entry->op, rxrpc_conn_traces), - __entry->usage, - __entry->where) + __print_symbolic(__entry->why, rxrpc_conn_traces), + __entry->ref) ); TRACE_EVENT(rxrpc_client, @@ -455,63 +563,57 @@ TRACE_EVENT(rxrpc_client, ); TRACE_EVENT(rxrpc_call, - TP_PROTO(unsigned int call_debug_id, enum rxrpc_call_trace op, - int usage, const void *where, const void *aux), + TP_PROTO(unsigned int call_debug_id, int ref, unsigned long aux, + enum rxrpc_call_trace why), - TP_ARGS(call_debug_id, op, usage, where, aux), + TP_ARGS(call_debug_id, ref, aux, why), TP_STRUCT__entry( __field(unsigned int, call ) - __field(int, op ) - __field(int, usage ) - __field(const void *, where ) - __field(const void *, aux ) + __field(int, ref ) + __field(int, why ) + __field(unsigned long, aux ) ), TP_fast_assign( __entry->call = call_debug_id; - __entry->op = op; - __entry->usage = usage; - __entry->where = where; + __entry->ref = ref; + __entry->why = why; __entry->aux = aux; ), - TP_printk("c=%08x %s u=%d sp=%pSR a=%p", + TP_printk("c=%08x %s r=%d a=%lx", __entry->call, - __print_symbolic(__entry->op, rxrpc_call_traces), - __entry->usage, - __entry->where, + __print_symbolic(__entry->why, rxrpc_call_traces), + __entry->ref, __entry->aux) ); TRACE_EVENT(rxrpc_skb, - TP_PROTO(struct sk_buff *skb, enum rxrpc_skb_trace op, - int usage, int mod_count, const void *where), + TP_PROTO(struct sk_buff *skb, int usage, int mod_count, + enum rxrpc_skb_trace why), - TP_ARGS(skb, op, usage, mod_count, where), + TP_ARGS(skb, usage, mod_count, why), TP_STRUCT__entry( __field(struct sk_buff *, skb ) - __field(enum rxrpc_skb_trace, op ) __field(int, usage ) __field(int, mod_count ) - __field(const void *, where ) + __field(enum rxrpc_skb_trace, why ) ), TP_fast_assign( __entry->skb = skb; - __entry->op = op; __entry->usage = usage; __entry->mod_count = mod_count; - __entry->where = where; + __entry->why = why; ), - TP_printk("s=%p Rx %s u=%d m=%d p=%pSR", + TP_printk("s=%p Rx %s u=%d m=%d", __entry->skb, - __print_symbolic(__entry->op, rxrpc_skb_traces), + __print_symbolic(__entry->why, rxrpc_skb_traces), __entry->usage, - __entry->mod_count, - __entry->where) + __entry->mod_count) ); TRACE_EVENT(rxrpc_rx_packet, @@ -623,6 +725,7 @@ TRACE_EVENT(rxrpc_txqueue, __field(rxrpc_seq_t, acks_hard_ack ) __field(rxrpc_seq_t, tx_bottom ) __field(rxrpc_seq_t, tx_top ) + __field(rxrpc_seq_t, tx_prepared ) __field(int, tx_winsize ) ), @@ -632,16 +735,18 @@ TRACE_EVENT(rxrpc_txqueue, __entry->acks_hard_ack = call->acks_hard_ack; __entry->tx_bottom = call->tx_bottom; __entry->tx_top = call->tx_top; + __entry->tx_prepared = call->tx_prepared; __entry->tx_winsize = call->tx_winsize; ), - TP_printk("c=%08x %s f=%08x h=%08x n=%u/%u/%u", + TP_printk("c=%08x %s f=%08x h=%08x n=%u/%u/%u/%u", __entry->call, __print_symbolic(__entry->why, rxrpc_txqueue_traces), __entry->tx_bottom, __entry->acks_hard_ack, __entry->tx_top - __entry->tx_bottom, __entry->tx_top - __entry->acks_hard_ack, + __entry->tx_prepared - __entry->tx_bottom, __entry->tx_winsize) ); @@ -733,6 +838,66 @@ TRACE_EVENT(rxrpc_rx_abort, __entry->abort_code) ); +TRACE_EVENT(rxrpc_rx_challenge, + TP_PROTO(struct rxrpc_connection *conn, rxrpc_serial_t serial, + u32 version, u32 nonce, u32 min_level), + + TP_ARGS(conn, serial, version, nonce, min_level), + + TP_STRUCT__entry( + __field(unsigned int, conn ) + __field(rxrpc_serial_t, serial ) + __field(u32, version ) + __field(u32, nonce ) + __field(u32, min_level ) + ), + + TP_fast_assign( + __entry->conn = conn->debug_id; + __entry->serial = serial; + __entry->version = version; + __entry->nonce = nonce; + __entry->min_level = min_level; + ), + + TP_printk("C=%08x CHALLENGE %08x v=%x n=%x ml=%x", + __entry->conn, + __entry->serial, + __entry->version, + __entry->nonce, + __entry->min_level) + ); + +TRACE_EVENT(rxrpc_rx_response, + TP_PROTO(struct rxrpc_connection *conn, rxrpc_serial_t serial, + u32 version, u32 kvno, u32 ticket_len), + + TP_ARGS(conn, serial, version, kvno, ticket_len), + + TP_STRUCT__entry( + __field(unsigned int, conn ) + __field(rxrpc_serial_t, serial ) + __field(u32, version ) + __field(u32, kvno ) + __field(u32, ticket_len ) + ), + + TP_fast_assign( + __entry->conn = conn->debug_id; + __entry->serial = serial; + __entry->version = version; + __entry->kvno = kvno; + __entry->ticket_len = ticket_len; + ), + + TP_printk("C=%08x RESPONSE %08x v=%x kvno=%x tl=%x", + __entry->conn, + __entry->serial, + __entry->version, + __entry->kvno, + __entry->ticket_len) + ); + TRACE_EVENT(rxrpc_rx_rwind_change, TP_PROTO(struct rxrpc_call *call, rxrpc_serial_t serial, u32 rwind, bool wake), @@ -1278,6 +1443,44 @@ TRACE_EVENT(rxrpc_congest, __entry->sum.retrans_timeo ? " rTxTo" : "") ); +TRACE_EVENT(rxrpc_reset_cwnd, + TP_PROTO(struct rxrpc_call *call, ktime_t now), + + TP_ARGS(call, now), + + TP_STRUCT__entry( + __field(unsigned int, call ) + __field(enum rxrpc_congest_mode, mode ) + __field(unsigned short, cwnd ) + __field(unsigned short, extra ) + __field(rxrpc_seq_t, hard_ack ) + __field(rxrpc_seq_t, prepared ) + __field(ktime_t, since_last_tx ) + __field(bool, has_data ) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->mode = call->cong_mode; + __entry->cwnd = call->cong_cwnd; + __entry->extra = call->cong_extra; + __entry->hard_ack = call->acks_hard_ack; + __entry->prepared = call->tx_prepared - call->tx_bottom; + __entry->since_last_tx = ktime_sub(now, call->tx_last_sent); + __entry->has_data = !list_empty(&call->tx_sendmsg); + ), + + TP_printk("c=%08x q=%08x %s cw=%u+%u pr=%u tm=%llu d=%u", + __entry->call, + __entry->hard_ack, + __print_symbolic(__entry->mode, rxrpc_congest_modes), + __entry->cwnd, + __entry->extra, + __entry->prepared, + ktime_to_ns(__entry->since_last_tx), + __entry->has_data) + ); + TRACE_EVENT(rxrpc_disconnect_call, TP_PROTO(struct rxrpc_call *call), @@ -1352,6 +1555,7 @@ TRACE_EVENT(rxrpc_connect_call, __field(unsigned long, user_call_ID ) __field(u32, cid ) __field(u32, call_id ) + __field_struct(struct sockaddr_rxrpc, srx ) ), TP_fast_assign( @@ -1359,33 +1563,42 @@ TRACE_EVENT(rxrpc_connect_call, __entry->user_call_ID = call->user_call_ID; __entry->cid = call->cid; __entry->call_id = call->call_id; + __entry->srx = call->dest_srx; ), - TP_printk("c=%08x u=%p %08x:%08x", + TP_printk("c=%08x u=%p %08x:%08x dst=%pISp", __entry->call, (void *)__entry->user_call_ID, __entry->cid, - __entry->call_id) + __entry->call_id, + &__entry->srx.transport) ); TRACE_EVENT(rxrpc_resend, - TP_PROTO(struct rxrpc_call *call), + TP_PROTO(struct rxrpc_call *call, struct sk_buff *ack), - TP_ARGS(call), + TP_ARGS(call, ack), TP_STRUCT__entry( __field(unsigned int, call ) __field(rxrpc_seq_t, seq ) + __field(rxrpc_seq_t, transmitted ) + __field(rxrpc_serial_t, ack_serial ) ), TP_fast_assign( + struct rxrpc_skb_priv *sp = ack ? rxrpc_skb(ack) : NULL; __entry->call = call->debug_id; __entry->seq = call->acks_hard_ack; + __entry->transmitted = call->tx_transmitted; + __entry->ack_serial = sp ? sp->hdr.serial : 0; ), - TP_printk("c=%08x q=%x", + TP_printk("c=%08x r=%x q=%x tq=%x", __entry->call, - __entry->seq) + __entry->ack_serial, + __entry->seq, + __entry->transmitted) ); TRACE_EVENT(rxrpc_rx_icmp, @@ -1586,6 +1799,47 @@ TRACE_EVENT(rxrpc_txbuf, __entry->ref) ); +TRACE_EVENT(rxrpc_poke_call, + TP_PROTO(struct rxrpc_call *call, bool busy, + enum rxrpc_call_poke_trace what), + + TP_ARGS(call, busy, what), + + TP_STRUCT__entry( + __field(unsigned int, call_debug_id ) + __field(bool, busy ) + __field(enum rxrpc_call_poke_trace, what ) + ), + + TP_fast_assign( + __entry->call_debug_id = call->debug_id; + __entry->busy = busy; + __entry->what = what; + ), + + TP_printk("c=%08x %s%s", + __entry->call_debug_id, + __print_symbolic(__entry->what, rxrpc_call_poke_traces), + __entry->busy ? "!" : "") + ); + +TRACE_EVENT(rxrpc_call_poked, + TP_PROTO(struct rxrpc_call *call), + + TP_ARGS(call), + + TP_STRUCT__entry( + __field(unsigned int, call_debug_id ) + ), + + TP_fast_assign( + __entry->call_debug_id = call->debug_id; + ), + + TP_printk("c=%08x", + __entry->call_debug_id) + ); + #undef EM #undef E_ #endif /* _TRACE_RXRPC_H */ diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index 70191d96af89..3782d4219ac9 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -658,11 +658,24 @@ enum devlink_resource_unit { DEVLINK_RESOURCE_UNIT_ENTRY, }; +enum devlink_port_fn_attr_cap { + DEVLINK_PORT_FN_ATTR_CAP_ROCE_BIT, + DEVLINK_PORT_FN_ATTR_CAP_MIGRATABLE_BIT, + + /* Add new caps above */ + __DEVLINK_PORT_FN_ATTR_CAPS_MAX, +}; + +#define DEVLINK_PORT_FN_CAP_ROCE _BITUL(DEVLINK_PORT_FN_ATTR_CAP_ROCE_BIT) +#define DEVLINK_PORT_FN_CAP_MIGRATABLE \ + _BITUL(DEVLINK_PORT_FN_ATTR_CAP_MIGRATABLE_BIT) + enum devlink_port_function_attr { DEVLINK_PORT_FUNCTION_ATTR_UNSPEC, DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR, /* binary */ DEVLINK_PORT_FN_ATTR_STATE, /* u8 */ DEVLINK_PORT_FN_ATTR_OPSTATE, /* u8 */ + DEVLINK_PORT_FN_ATTR_CAPS, /* bitfield32 */ __DEVLINK_PORT_FUNCTION_ATTR_MAX, DEVLINK_PORT_FUNCTION_ATTR_MAX = __DEVLINK_PORT_FUNCTION_ATTR_MAX - 1 diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index aaf7c6963d61..5799a9db034e 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -51,6 +51,7 @@ enum { ETHTOOL_MSG_MODULE_SET, ETHTOOL_MSG_PSE_GET, ETHTOOL_MSG_PSE_SET, + ETHTOOL_MSG_RSS_GET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -97,6 +98,7 @@ enum { ETHTOOL_MSG_MODULE_GET_REPLY, ETHTOOL_MSG_MODULE_NTF, ETHTOOL_MSG_PSE_GET_REPLY, + ETHTOOL_MSG_RSS_GET_REPLY, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, @@ -880,6 +882,18 @@ enum { ETHTOOL_A_PSE_MAX = (__ETHTOOL_A_PSE_CNT - 1) }; +enum { + ETHTOOL_A_RSS_UNSPEC, + ETHTOOL_A_RSS_HEADER, + ETHTOOL_A_RSS_CONTEXT, /* u32 */ + ETHTOOL_A_RSS_HFUNC, /* u32 */ + ETHTOOL_A_RSS_INDIR, /* binary */ + ETHTOOL_A_RSS_HKEY, /* binary */ + + __ETHTOOL_A_RSS_CNT, + ETHTOOL_A_RSS_MAX = (__ETHTOOL_A_RSS_CNT - 1), +}; + /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" #define ETHTOOL_GENL_VERSION 1 diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index 55501e5e7ac8..a2c66b3d7f0f 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -31,8 +31,9 @@ enum { SOF_TIMESTAMPING_OPT_PKTINFO = (1<<13), SOF_TIMESTAMPING_OPT_TX_SWHW = (1<<14), SOF_TIMESTAMPING_BIND_PHC = (1 << 15), + SOF_TIMESTAMPING_OPT_ID_TCP = (1 << 16), - SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_BIND_PHC, + SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_ID_TCP, SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) | SOF_TIMESTAMPING_LAST }; diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 94066f87e9ee..c5d62ee82567 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -277,11 +277,25 @@ enum ovs_vport_attr { OVS_VPORT_ATTR_PAD, OVS_VPORT_ATTR_IFINDEX, OVS_VPORT_ATTR_NETNSID, + OVS_VPORT_ATTR_UPCALL_STATS, __OVS_VPORT_ATTR_MAX }; #define OVS_VPORT_ATTR_MAX (__OVS_VPORT_ATTR_MAX - 1) +/** + * enum ovs_vport_upcall_attr - attributes for %OVS_VPORT_UPCALL* commands + * @OVS_VPORT_UPCALL_SUCCESS: 64-bit upcall success packets. + * @OVS_VPORT_UPCALL_FAIL: 64-bit upcall fail packets. + */ +enum ovs_vport_upcall_attr { + OVS_VPORT_UPCALL_ATTR_SUCCESS, + OVS_VPORT_UPCALL_ATTR_FAIL, + __OVS_VPORT_UPCALL_ATTR_MAX +}; + +#define OVS_VPORT_UPCALL_ATTR_MAX (__OVS_VPORT_UPCALL_ATTR_MAX - 1) + enum { OVS_VXLAN_EXT_UNSPEC, OVS_VXLAN_EXT_GBP, /* Flag or __u32 */ diff --git a/ipc/sem.c b/ipc/sem.c index c8496f98b139..00f88aa01ac5 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -2179,14 +2179,15 @@ long __do_semtimedop(int semid, struct sembuf *sops, * scenarios where we were awakened externally, during the * window between wake_q_add() and wake_up_q(). */ + rcu_read_lock(); error = READ_ONCE(queue.status); if (error != -EINTR) { /* see SEM_BARRIER_2 for purpose/pairing */ smp_acquire__after_ctrl_dep(); + rcu_read_unlock(); goto out; } - rcu_read_lock(); locknum = sem_lock(sma, sops, nsops); if (!ipc_valid_object(&sma->sem_perm)) diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index fd4020835ec6..367b0a42ada9 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -167,7 +167,6 @@ struct cgroup_mgctx { extern spinlock_t css_set_lock; extern struct cgroup_subsys *cgroup_subsys[]; extern struct list_head cgroup_roots; -extern struct file_system_type cgroup_fs_type; /* iterate across the hierarchies */ #define for_each_root(root) \ diff --git a/kernel/events/core.c b/kernel/events/core.c index 7091bbf88ee7..7f04f995c975 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2291,6 +2291,7 @@ event_sched_out(struct perf_event *event, !event->pending_work) { event->pending_work = 1; dec = false; + WARN_ON_ONCE(!atomic_long_inc_not_zero(&event->refcount)); task_work_add(current, &event->pending_task, TWA_RESUME); } if (dec) @@ -2336,6 +2337,7 @@ group_sched_out(struct perf_event *group_event, #define DETACH_GROUP 0x01UL #define DETACH_CHILD 0x02UL +#define DETACH_DEAD 0x04UL /* * Cross CPU call to remove a performance event @@ -2356,12 +2358,20 @@ __perf_remove_from_context(struct perf_event *event, update_cgrp_time_from_cpuctx(cpuctx, false); } + /* + * Ensure event_sched_out() switches to OFF, at the very least + * this avoids raising perf_pending_task() at this time. + */ + if (flags & DETACH_DEAD) + event->pending_disable = 1; event_sched_out(event, cpuctx, ctx); if (flags & DETACH_GROUP) perf_group_detach(event); if (flags & DETACH_CHILD) perf_child_detach(event); list_del_event(event, ctx); + if (flags & DETACH_DEAD) + event->state = PERF_EVENT_STATE_DEAD; if (!ctx->nr_events && ctx->is_active) { if (ctx == &cpuctx->ctx) @@ -5121,9 +5131,7 @@ int perf_event_release_kernel(struct perf_event *event) ctx = perf_event_ctx_lock(event); WARN_ON_ONCE(ctx->parent_ctx); - perf_remove_from_context(event, DETACH_GROUP); - raw_spin_lock_irq(&ctx->lock); /* * Mark this event as STATE_DEAD, there is no external reference to it * anymore. @@ -5135,8 +5143,7 @@ int perf_event_release_kernel(struct perf_event *event) * Thus this guarantees that we will in fact observe and kill _ALL_ * child events. */ - event->state = PERF_EVENT_STATE_DEAD; - raw_spin_unlock_irq(&ctx->lock); + perf_remove_from_context(event, DETACH_GROUP|DETACH_DEAD); perf_event_ctx_unlock(event, ctx); @@ -6577,6 +6584,8 @@ static void perf_pending_task(struct callback_head *head) if (rctx >= 0) perf_swevent_put_recursion_context(rctx); preempt_enable_notrace(); + + put_event(event); } #ifdef CONFIG_GUEST_PERF_EVENTS diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 188c305aeb8b..c6d9dec11b74 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -267,13 +267,14 @@ int proc_dostring(struct ctl_table *table, int write, ppos); } -static size_t proc_skip_spaces(char **buf) +static void proc_skip_spaces(char **buf, size_t *size) { - size_t ret; - char *tmp = skip_spaces(*buf); - ret = tmp - *buf; - *buf = tmp; - return ret; + while (*size) { + if (!isspace(**buf)) + break; + (*size)--; + (*buf)++; + } } static void proc_skip_char(char **buf, size_t *size, const char v) @@ -342,13 +343,12 @@ static int proc_get_long(char **buf, size_t *size, unsigned long *val, bool *neg, const char *perm_tr, unsigned perm_tr_len, char *tr) { - int len; char *p, tmp[TMPBUFLEN]; + ssize_t len = *size; - if (!*size) + if (len <= 0) return -EINVAL; - len = *size; if (len > TMPBUFLEN - 1) len = TMPBUFLEN - 1; @@ -521,7 +521,7 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, bool neg; if (write) { - left -= proc_skip_spaces(&p); + proc_skip_spaces(&p, &left); if (!left) break; @@ -548,7 +548,7 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, if (!write && !first && left && !err) proc_put_char(&buffer, &left, '\n'); if (write && !err && left) - left -= proc_skip_spaces(&p); + proc_skip_spaces(&p, &left); if (write && first) return err ? : -EINVAL; *lenp -= left; @@ -590,7 +590,7 @@ static int do_proc_douintvec_w(unsigned int *tbl_data, if (left > PAGE_SIZE - 1) left = PAGE_SIZE - 1; - left -= proc_skip_spaces(&p); + proc_skip_spaces(&p, &left); if (!left) { err = -EINVAL; goto out_free; @@ -610,7 +610,7 @@ static int do_proc_douintvec_w(unsigned int *tbl_data, } if (!err && left) - left -= proc_skip_spaces(&p); + proc_skip_spaces(&p, &left); out_free: if (err) @@ -1075,7 +1075,7 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, if (write) { bool neg; - left -= proc_skip_spaces(&p); + proc_skip_spaces(&p, &left); if (!left) break; @@ -1104,7 +1104,7 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, if (!write && !first && left && !err) proc_put_char(&buffer, &left, '\n'); if (write && !err) - left -= proc_skip_spaces(&p); + proc_skip_spaces(&p, &left); if (write && first) return err ? : -EINVAL; *lenp -= left; diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index a1005415f0f4..3638b3424be5 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -399,6 +399,7 @@ config FRAME_WARN default 2048 if GCC_PLUGIN_LATENT_ENTROPY default 2048 if PARISC default 1536 if (!64BIT && XTENSA) + default 1280 if KASAN && !64BIT default 1024 if !64BIT default 2048 if 64BIT help @@ -1874,8 +1875,14 @@ config NETDEV_NOTIFIER_ERROR_INJECT If unsure, say N. config FUNCTION_ERROR_INJECTION - def_bool y + bool "Fault-injections of functions" depends on HAVE_FUNCTION_ERROR_INJECTION && KPROBES + help + Add fault injections into various functions that are annotated with + ALLOW_ERROR_INJECTION() in the kernel. BPF may also modify the return + value of theses functions. This is useful to test error paths of code. + + If unsure, say N config FAULT_INJECTION bool "Fault-injection framework" diff --git a/lib/rhashtable.c b/lib/rhashtable.c index e12bbfb240b8..6ae2ba8e06a2 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -231,6 +231,7 @@ static int rhashtable_rehash_one(struct rhashtable *ht, struct rhash_head *head, *next, *entry; struct rhash_head __rcu **pprev = NULL; unsigned int new_hash; + unsigned long flags; if (new_tbl->nest) goto out; @@ -253,13 +254,14 @@ static int rhashtable_rehash_one(struct rhashtable *ht, new_hash = head_hashfn(ht, new_tbl, entry); - rht_lock_nested(new_tbl, &new_tbl->buckets[new_hash], SINGLE_DEPTH_NESTING); + flags = rht_lock_nested(new_tbl, &new_tbl->buckets[new_hash], + SINGLE_DEPTH_NESTING); head = rht_ptr(new_tbl->buckets + new_hash, new_tbl, new_hash); RCU_INIT_POINTER(entry->next, head); - rht_assign_unlock(new_tbl, &new_tbl->buckets[new_hash], entry); + rht_assign_unlock(new_tbl, &new_tbl->buckets[new_hash], entry, flags); if (pprev) rcu_assign_pointer(*pprev, next); @@ -276,18 +278,19 @@ static int rhashtable_rehash_chain(struct rhashtable *ht, { struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); struct rhash_lock_head __rcu **bkt = rht_bucket_var(old_tbl, old_hash); + unsigned long flags; int err; if (!bkt) return 0; - rht_lock(old_tbl, bkt); + flags = rht_lock(old_tbl, bkt); while (!(err = rhashtable_rehash_one(ht, bkt, old_hash))) ; if (err == -ENOENT) err = 0; - rht_unlock(old_tbl, bkt); + rht_unlock(old_tbl, bkt, flags); return err; } @@ -590,6 +593,7 @@ static void *rhashtable_try_insert(struct rhashtable *ht, const void *key, struct bucket_table *new_tbl; struct bucket_table *tbl; struct rhash_lock_head __rcu **bkt; + unsigned long flags; unsigned int hash; void *data; @@ -607,7 +611,7 @@ static void *rhashtable_try_insert(struct rhashtable *ht, const void *key, new_tbl = rht_dereference_rcu(tbl->future_tbl, ht); data = ERR_PTR(-EAGAIN); } else { - rht_lock(tbl, bkt); + flags = rht_lock(tbl, bkt); data = rhashtable_lookup_one(ht, bkt, tbl, hash, key, obj); new_tbl = rhashtable_insert_one(ht, bkt, tbl, @@ -615,7 +619,7 @@ static void *rhashtable_try_insert(struct rhashtable *ht, const void *key, if (PTR_ERR(new_tbl) != -EEXIST) data = ERR_CAST(new_tbl); - rht_unlock(tbl, bkt); + rht_unlock(tbl, bkt, flags); } } while (!IS_ERR_OR_NULL(new_tbl)); diff --git a/mm/compaction.c b/mm/compaction.c index c51f7f545afe..1f6da31dd9a5 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -985,28 +985,28 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, } /* + * Be careful not to clear PageLRU until after we're + * sure the page is not being freed elsewhere -- the + * page release code relies on it. + */ + if (unlikely(!get_page_unless_zero(page))) + goto isolate_fail; + + /* * Migration will fail if an anonymous page is pinned in memory, * so avoid taking lru_lock and isolating it unnecessarily in an * admittedly racy check. */ mapping = page_mapping(page); - if (!mapping && page_count(page) > page_mapcount(page)) - goto isolate_fail; + if (!mapping && (page_count(page) - 1) > total_mapcount(page)) + goto isolate_fail_put; /* * Only allow to migrate anonymous pages in GFP_NOFS context * because those do not depend on fs locks. */ if (!(cc->gfp_mask & __GFP_FS) && mapping) - goto isolate_fail; - - /* - * Be careful not to clear PageLRU until after we're - * sure the page is not being freed elsewhere -- the - * page release code relies on it. - */ - if (unlikely(!get_page_unless_zero(page))) - goto isolate_fail; + goto isolate_fail_put; /* Only take pages on LRU: a check now makes later tests safe */ if (!PageLRU(page)) diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c index 5ce403378c20..07e5f1bdf025 100644 --- a/mm/damon/sysfs.c +++ b/mm/damon/sysfs.c @@ -2283,12 +2283,54 @@ static struct damos *damon_sysfs_mk_scheme( &wmarks); } +static void damon_sysfs_update_scheme(struct damos *scheme, + struct damon_sysfs_scheme *sysfs_scheme) +{ + struct damon_sysfs_access_pattern *access_pattern = + sysfs_scheme->access_pattern; + struct damon_sysfs_quotas *sysfs_quotas = sysfs_scheme->quotas; + struct damon_sysfs_weights *sysfs_weights = sysfs_quotas->weights; + struct damon_sysfs_watermarks *sysfs_wmarks = sysfs_scheme->watermarks; + + scheme->pattern.min_sz_region = access_pattern->sz->min; + scheme->pattern.max_sz_region = access_pattern->sz->max; + scheme->pattern.min_nr_accesses = access_pattern->nr_accesses->min; + scheme->pattern.max_nr_accesses = access_pattern->nr_accesses->max; + scheme->pattern.min_age_region = access_pattern->age->min; + scheme->pattern.max_age_region = access_pattern->age->max; + + scheme->action = sysfs_scheme->action; + + scheme->quota.ms = sysfs_quotas->ms; + scheme->quota.sz = sysfs_quotas->sz; + scheme->quota.reset_interval = sysfs_quotas->reset_interval_ms; + scheme->quota.weight_sz = sysfs_weights->sz; + scheme->quota.weight_nr_accesses = sysfs_weights->nr_accesses; + scheme->quota.weight_age = sysfs_weights->age; + + scheme->wmarks.metric = sysfs_wmarks->metric; + scheme->wmarks.interval = sysfs_wmarks->interval_us; + scheme->wmarks.high = sysfs_wmarks->high; + scheme->wmarks.mid = sysfs_wmarks->mid; + scheme->wmarks.low = sysfs_wmarks->low; +} + static int damon_sysfs_set_schemes(struct damon_ctx *ctx, struct damon_sysfs_schemes *sysfs_schemes) { - int i; + struct damos *scheme, *next; + int i = 0; + + damon_for_each_scheme_safe(scheme, next, ctx) { + if (i < sysfs_schemes->nr) + damon_sysfs_update_scheme(scheme, + sysfs_schemes->schemes_arr[i]); + else + damon_destroy_scheme(scheme); + i++; + } - for (i = 0; i < sysfs_schemes->nr; i++) { + for (; i < sysfs_schemes->nr; i++) { struct damos *scheme, *next; scheme = damon_sysfs_mk_scheme(sysfs_schemes->schemes_arr[i]); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f1385c3b6c96..e36ca75311a5 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5206,17 +5206,22 @@ void __unmap_hugepage_range_final(struct mmu_gather *tlb, __unmap_hugepage_range(tlb, vma, start, end, ref_page, zap_flags); - /* - * Unlock and free the vma lock before releasing i_mmap_rwsem. When - * the vma_lock is freed, this makes the vma ineligible for pmd - * sharing. And, i_mmap_rwsem is required to set up pmd sharing. - * This is important as page tables for this unmapped range will - * be asynchrously deleted. If the page tables are shared, there - * will be issues when accessed by someone else. - */ - __hugetlb_vma_unlock_write_free(vma); - - i_mmap_unlock_write(vma->vm_file->f_mapping); + if (zap_flags & ZAP_FLAG_UNMAP) { /* final unmap */ + /* + * Unlock and free the vma lock before releasing i_mmap_rwsem. + * When the vma_lock is freed, this makes the vma ineligible + * for pmd sharing. And, i_mmap_rwsem is required to set up + * pmd sharing. This is important as page tables for this + * unmapped range will be asynchrously deleted. If the page + * tables are shared, there will be issues when accessed by + * someone else. + */ + __hugetlb_vma_unlock_write_free(vma); + i_mmap_unlock_write(vma->vm_file->f_mapping); + } else { + i_mmap_unlock_write(vma->vm_file->f_mapping); + hugetlb_vma_unlock_write(vma); + } } void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, diff --git a/mm/khugepaged.c b/mm/khugepaged.c index a8d5ef2a77d2..3703a56571c1 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1051,6 +1051,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, _pmd = pmdp_collapse_flush(vma, address, pmd); spin_unlock(pmd_ptl); mmu_notifier_invalidate_range_end(&range); + tlb_remove_table_sync_one(); spin_lock(pte_ptl); result = __collapse_huge_page_isolate(vma, address, pte, cc, @@ -1379,16 +1380,43 @@ static int set_huge_pmd(struct vm_area_struct *vma, unsigned long addr, return SCAN_SUCCEED; } +/* + * A note about locking: + * Trying to take the page table spinlocks would be useless here because those + * are only used to synchronize: + * + * - modifying terminal entries (ones that point to a data page, not to another + * page table) + * - installing *new* non-terminal entries + * + * Instead, we need roughly the same kind of protection as free_pgtables() or + * mm_take_all_locks() (but only for a single VMA): + * The mmap lock together with this VMA's rmap locks covers all paths towards + * the page table entries we're messing with here, except for hardware page + * table walks and lockless_pages_from_mm(). + */ static void collapse_and_free_pmd(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp) { - spinlock_t *ptl; pmd_t pmd; + struct mmu_notifier_range range; mmap_assert_write_locked(mm); - ptl = pmd_lock(vma->vm_mm, pmdp); + if (vma->vm_file) + lockdep_assert_held_write(&vma->vm_file->f_mapping->i_mmap_rwsem); + /* + * All anon_vmas attached to the VMA have the same root and are + * therefore locked by the same lock. + */ + if (vma->anon_vma) + lockdep_assert_held_write(&vma->anon_vma->root->rwsem); + + mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, NULL, mm, addr, + addr + HPAGE_PMD_SIZE); + mmu_notifier_invalidate_range_start(&range); pmd = pmdp_collapse_flush(vma, addr, pmdp); - spin_unlock(ptl); + tlb_remove_table_sync_one(); + mmu_notifier_invalidate_range_end(&range); mm_dec_nr_ptes(mm); page_table_check_pte_clear_range(mm, addr, pmd); pte_free(mm, pmd_pgtable(pmd)); @@ -1439,6 +1467,14 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, if (!hugepage_vma_check(vma, vma->vm_flags, false, false, false)) return SCAN_VMA_CHECK; + /* + * Symmetry with retract_page_tables(): Exclude MAP_PRIVATE mappings + * that got written to. Without this, we'd have to also lock the + * anon_vma if one exists. + */ + if (vma->anon_vma) + return SCAN_VMA_CHECK; + /* Keep pmd pgtable for uffd-wp; see comment in retract_page_tables() */ if (userfaultfd_wp(vma)) return SCAN_PTE_UFFD_WP; @@ -1472,6 +1508,20 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, goto drop_hpage; } + /* + * We need to lock the mapping so that from here on, only GUP-fast and + * hardware page walks can access the parts of the page tables that + * we're operating on. + * See collapse_and_free_pmd(). + */ + i_mmap_lock_write(vma->vm_file->f_mapping); + + /* + * This spinlock should be unnecessary: Nobody else should be accessing + * the page tables under spinlock protection here, only + * lockless_pages_from_mm() and the hardware page walker can access page + * tables while all the high-level locks are held in write mode. + */ start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl); result = SCAN_FAIL; @@ -1526,6 +1576,8 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, /* step 4: remove pte entries */ collapse_and_free_pmd(mm, vma, haddr, pmd); + i_mmap_unlock_write(vma->vm_file->f_mapping); + maybe_install_pmd: /* step 5: install pmd entry */ result = install_pmd @@ -1539,6 +1591,7 @@ drop_hpage: abort: pte_unmap_unlock(start_pte, ptl); + i_mmap_unlock_write(vma->vm_file->f_mapping); goto drop_hpage; } @@ -1595,7 +1648,8 @@ static int retract_page_tables(struct address_space *mapping, pgoff_t pgoff, * An alternative would be drop the check, but check that page * table is clear before calling pmdp_collapse_flush() under * ptl. It has higher chance to recover THP for the VMA, but - * has higher cost too. + * has higher cost too. It would also probably require locking + * the anon_vma. */ if (vma->anon_vma) { result = SCAN_PAGE_ANON; diff --git a/mm/madvise.c b/mm/madvise.c index c7105ec6d08c..b913ba6efc10 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -772,8 +772,8 @@ static int madvise_free_single_vma(struct vm_area_struct *vma, * Application no longer needs these pages. If the pages are dirty, * it's OK to just throw them away. The app will be more careful about * data it wants to keep. Be sure to free swap resources too. The - * zap_page_range call sets things up for shrink_active_list to actually free - * these pages later if no one else has touched them in the meantime, + * zap_page_range_single call sets things up for shrink_active_list to actually + * free these pages later if no one else has touched them in the meantime, * although we could add these pages to a global reuse list for * shrink_active_list to pick up before reclaiming other pages. * @@ -790,7 +790,7 @@ static int madvise_free_single_vma(struct vm_area_struct *vma, static long madvise_dontneed_single_vma(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - zap_page_range(vma, start, end - start); + zap_page_range_single(vma, start, end - start, NULL); return 0; } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a1a35c12635e..266a1ab05434 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4832,6 +4832,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of, unsigned int efd, cfd; struct fd efile; struct fd cfile; + struct dentry *cdentry; const char *name; char *endp; int ret; @@ -4886,6 +4887,16 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of, goto out_put_cfile; /* + * The control file must be a regular cgroup1 file. As a regular cgroup + * file can't be renamed, it's safe to access its name afterwards. + */ + cdentry = cfile.file->f_path.dentry; + if (cdentry->d_sb->s_type != &cgroup_fs_type || !d_is_reg(cdentry)) { + ret = -EINVAL; + goto out_put_cfile; + } + + /* * Determine the event callbacks and set them in @event. This used * to be done via struct cftype but cgroup core no longer knows * about these events. The following is crude but the whole thing @@ -4893,7 +4904,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of, * * DO NOT ADD NEW FILES. */ - name = cfile.file->f_path.dentry->d_name.name; + name = cdentry->d_name.name; if (!strcmp(name, "memory.usage_in_bytes")) { event->register_event = mem_cgroup_usage_register_event; @@ -4917,7 +4928,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of, * automatically removed on cgroup destruction but the removal is * asynchronous, so take an extra ref on @css. */ - cfile_css = css_tryget_online_from_dir(cfile.file->f_path.dentry->d_parent, + cfile_css = css_tryget_online_from_dir(cdentry->d_parent, &memory_cgrp_subsys); ret = -EINVAL; if (IS_ERR(cfile_css)) diff --git a/mm/memory.c b/mm/memory.c index 8a6d5c823f91..8c8420934d60 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1341,15 +1341,6 @@ copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma) return ret; } -/* - * Parameter block passed down to zap_pte_range in exceptional cases. - */ -struct zap_details { - struct folio *single_folio; /* Locked folio to be unmapped */ - bool even_cows; /* Zap COWed private pages too? */ - zap_flags_t zap_flags; /* Extra flags for zapping */ -}; - /* Whether we should zap all COWed (private) pages too */ static inline bool should_zap_cows(struct zap_details *details) { @@ -1720,7 +1711,7 @@ void unmap_vmas(struct mmu_gather *tlb, struct maple_tree *mt, { struct mmu_notifier_range range; struct zap_details details = { - .zap_flags = ZAP_FLAG_DROP_MARKER, + .zap_flags = ZAP_FLAG_DROP_MARKER | ZAP_FLAG_UNMAP, /* Careful - we need to zap private pages too! */ .even_cows = true, }; @@ -1774,19 +1765,27 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start, * * The range must fit into one VMA. */ -static void zap_page_range_single(struct vm_area_struct *vma, unsigned long address, +void zap_page_range_single(struct vm_area_struct *vma, unsigned long address, unsigned long size, struct zap_details *details) { + const unsigned long end = address + size; struct mmu_notifier_range range; struct mmu_gather tlb; lru_add_drain(); mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm, - address, address + size); + address, end); + if (is_vm_hugetlb_page(vma)) + adjust_range_if_pmd_sharing_possible(vma, &range.start, + &range.end); tlb_gather_mmu(&tlb, vma->vm_mm); update_hiwater_rss(vma->vm_mm); mmu_notifier_invalidate_range_start(&range); - unmap_single_vma(&tlb, vma, address, range.end, details); + /* + * unmap 'address-end' not 'range.start-range.end' as range + * could have been expanded for hugetlb pmd sharing. + */ + unmap_single_vma(&tlb, vma, address, end, details); mmu_notifier_invalidate_range_end(&range); tlb_finish_mmu(&tlb); } diff --git a/mm/mmap.c b/mm/mmap.c index 74a84eb33b90..a5eb2f175da0 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1779,9 +1779,6 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, */ pgoff = 0; get_area = shmem_get_unmapped_area; - } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { - /* Ensures that larger anonymous mappings are THP aligned. */ - get_area = thp_get_unmapped_area; } addr = get_area(file, addr, len, pgoff, flags); diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c index add4244e5790..3a2c3f8cad2f 100644 --- a/mm/mmu_gather.c +++ b/mm/mmu_gather.c @@ -153,7 +153,7 @@ static void tlb_remove_table_smp_sync(void *arg) /* Simply deliver the interrupt */ } -static void tlb_remove_table_sync_one(void) +void tlb_remove_table_sync_one(void) { /* * This isn't an RCU grace period and hence the page-tables cannot be @@ -177,8 +177,6 @@ static void tlb_remove_table_free(struct mmu_table_batch *batch) #else /* !CONFIG_MMU_GATHER_RCU_TABLE_FREE */ -static void tlb_remove_table_sync_one(void) { } - static void tlb_remove_table_free(struct mmu_table_batch *batch) { __tlb_remove_table_free(batch); diff --git a/mm/vmscan.c b/mm/vmscan.c index 026199c047e0..8fcc5fa768c0 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3987,7 +3987,7 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area goto next; if (!pmd_trans_huge(pmd[i])) { - if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && + if (arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG)) pmdp_test_and_clear_young(vma, addr, pmd + i); goto next; @@ -4085,14 +4085,14 @@ restart: #endif walk->mm_stats[MM_NONLEAF_TOTAL]++; -#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG - if (get_cap(LRU_GEN_NONLEAF_YOUNG)) { + if (arch_has_hw_nonleaf_pmd_young() && + get_cap(LRU_GEN_NONLEAF_YOUNG)) { if (!pmd_young(val)) continue; walk_pmd_range_locked(pud, addr, vma, args, bitmap, &pos); } -#endif + if (!walk->force_scan && !test_bloom_filter(walk->lruvec, walk->max_seq, pmd + i)) continue; @@ -5392,7 +5392,7 @@ static ssize_t show_enabled(struct kobject *kobj, struct kobj_attribute *attr, c if (arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK)) caps |= BIT(LRU_GEN_MM_WALK); - if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && get_cap(LRU_GEN_NONLEAF_YOUNG)) + if (arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG)) caps |= BIT(LRU_GEN_NONLEAF_YOUNG); return snprintf(buf, PAGE_SIZE, "0x%04x\n", caps); diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 215af9b3b589..c57d643afb10 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -972,6 +972,7 @@ static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type, hci_dev_lock(hdev); hcon = hci_conn_hash_lookup_le(hdev, addr, *addr_type); hci_dev_unlock(hdev); + hci_dev_put(hdev); if (!hcon) return -ENOENT; diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index dc65974f5adb..1c3c7ff5c3c6 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -737,7 +737,7 @@ static int __init bt_init(void) err = bt_sysfs_init(); if (err < 0) - return err; + goto cleanup_led; err = sock_register(&bt_sock_family_ops); if (err) @@ -773,6 +773,8 @@ unregister_socket: sock_unregister(PF_BLUETOOTH); cleanup_sysfs: bt_sysfs_cleanup(); +cleanup_led: + bt_leds_cleanup(); return err; } diff --git a/net/bluetooth/hci_codec.c b/net/bluetooth/hci_codec.c index 38201532f58e..3cc135bb1d30 100644 --- a/net/bluetooth/hci_codec.c +++ b/net/bluetooth/hci_codec.c @@ -72,9 +72,8 @@ static void hci_read_codec_capabilities(struct hci_dev *hdev, __u8 transport, continue; } - skb = __hci_cmd_sync(hdev, HCI_OP_READ_LOCAL_CODEC_CAPS, - sizeof(*cmd), cmd, - HCI_CMD_TIMEOUT); + skb = __hci_cmd_sync_sk(hdev, HCI_OP_READ_LOCAL_CODEC_CAPS, + sizeof(*cmd), cmd, 0, HCI_CMD_TIMEOUT, NULL); if (IS_ERR(skb)) { bt_dev_err(hdev, "Failed to read codec capabilities (%ld)", PTR_ERR(skb)); @@ -127,8 +126,8 @@ void hci_read_supported_codecs(struct hci_dev *hdev) struct hci_op_read_local_codec_caps caps; __u8 i; - skb = __hci_cmd_sync(hdev, HCI_OP_READ_LOCAL_CODECS, 0, NULL, - HCI_CMD_TIMEOUT); + skb = __hci_cmd_sync_sk(hdev, HCI_OP_READ_LOCAL_CODECS, 0, NULL, + 0, HCI_CMD_TIMEOUT, NULL); if (IS_ERR(skb)) { bt_dev_err(hdev, "Failed to read local supported codecs (%ld)", @@ -158,7 +157,8 @@ void hci_read_supported_codecs(struct hci_dev *hdev) for (i = 0; i < std_codecs->num; i++) { caps.id = std_codecs->codec[i]; caps.direction = 0x00; - hci_read_codec_capabilities(hdev, LOCAL_CODEC_ACL_MASK, &caps); + hci_read_codec_capabilities(hdev, + LOCAL_CODEC_ACL_MASK | LOCAL_CODEC_SCO_MASK, &caps); } skb_pull(skb, flex_array_size(std_codecs, codec, std_codecs->num) @@ -178,7 +178,8 @@ void hci_read_supported_codecs(struct hci_dev *hdev) caps.cid = vnd_codecs->codec[i].cid; caps.vid = vnd_codecs->codec[i].vid; caps.direction = 0x00; - hci_read_codec_capabilities(hdev, LOCAL_CODEC_ACL_MASK, &caps); + hci_read_codec_capabilities(hdev, + LOCAL_CODEC_ACL_MASK | LOCAL_CODEC_SCO_MASK, &caps); } error: @@ -194,8 +195,8 @@ void hci_read_supported_codecs_v2(struct hci_dev *hdev) struct hci_op_read_local_codec_caps caps; __u8 i; - skb = __hci_cmd_sync(hdev, HCI_OP_READ_LOCAL_CODECS_V2, 0, NULL, - HCI_CMD_TIMEOUT); + skb = __hci_cmd_sync_sk(hdev, HCI_OP_READ_LOCAL_CODECS_V2, 0, NULL, + 0, HCI_CMD_TIMEOUT, NULL); if (IS_ERR(skb)) { bt_dev_err(hdev, "Failed to read local supported codecs (%ld)", diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 0540555b3704..d97fac4f7130 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -2764,7 +2764,8 @@ int hci_register_suspend_notifier(struct hci_dev *hdev) { int ret = 0; - if (!test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks)) { + if (!hdev->suspend_notifier.notifier_call && + !test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks)) { hdev->suspend_notifier.notifier_call = hci_suspend_notifier; ret = register_pm_notifier(&hdev->suspend_notifier); } @@ -2776,8 +2777,11 @@ int hci_unregister_suspend_notifier(struct hci_dev *hdev) { int ret = 0; - if (!test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks)) + if (hdev->suspend_notifier.notifier_call) { ret = unregister_pm_notifier(&hdev->suspend_notifier); + if (!ret) + hdev->suspend_notifier.notifier_call = NULL; + } return ret; } diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 5a0296a4352e..f7e006a36382 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -269,7 +269,7 @@ void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, const void *param) { - bt_dev_err(req->hdev, "HCI_REQ-0x%4.4x", opcode); + bt_dev_dbg(req->hdev, "HCI_REQ-0x%4.4x", opcode); hci_req_add_ev(req, opcode, plen, param, 0); } diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 76c3107c9f91..1fc693122a47 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -12,6 +12,7 @@ #include <net/bluetooth/mgmt.h> #include "hci_request.h" +#include "hci_codec.h" #include "hci_debugfs.h" #include "smp.h" #include "eir.h" @@ -3780,7 +3781,8 @@ static int hci_read_page_scan_activity_sync(struct hci_dev *hdev) static int hci_read_def_err_data_reporting_sync(struct hci_dev *hdev) { if (!(hdev->commands[18] & 0x04) || - !(hdev->features[0][6] & LMP_ERR_DATA_REPORTING)) + !(hdev->features[0][6] & LMP_ERR_DATA_REPORTING) || + test_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks)) return 0; return __hci_cmd_sync_status(hdev, HCI_OP_READ_DEF_ERR_DATA_REPORTING, @@ -4238,11 +4240,12 @@ static int hci_set_event_mask_page_2_sync(struct hci_dev *hdev) /* Read local codec list if the HCI command is supported */ static int hci_read_local_codecs_sync(struct hci_dev *hdev) { - if (!(hdev->commands[29] & 0x20)) - return 0; + if (hdev->commands[45] & 0x04) + hci_read_supported_codecs_v2(hdev); + else if (hdev->commands[29] & 0x20) + hci_read_supported_codecs(hdev); - return __hci_cmd_sync_status(hdev, HCI_OP_READ_LOCAL_CODECS, 0, NULL, - HCI_CMD_TIMEOUT); + return 0; } /* Read local pairing options if the HCI command is supported */ @@ -4298,7 +4301,8 @@ static int hci_set_err_data_report_sync(struct hci_dev *hdev) bool enabled = hci_dev_test_flag(hdev, HCI_WIDEBAND_SPEECH_ENABLED); if (!(hdev->commands[18] & 0x08) || - !(hdev->features[0][6] & LMP_ERR_DATA_REPORTING)) + !(hdev->features[0][6] & LMP_ERR_DATA_REPORTING) || + test_bit(HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, &hdev->quirks)) return 0; if (enabled == hdev->err_data_reporting) @@ -4457,6 +4461,9 @@ static const struct { HCI_QUIRK_BROKEN(STORED_LINK_KEY, "HCI Delete Stored Link Key command is advertised, " "but not supported."), + HCI_QUIRK_BROKEN(ERR_DATA_REPORTING, + "HCI Read Default Erroneous Data Reporting command is " + "advertised, but not supported."), HCI_QUIRK_BROKEN(READ_TRANSMIT_POWER, "HCI Read Transmit Power Level command is advertised, " "but not supported."), diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index f825857db6d0..26db929b97c4 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -879,6 +879,7 @@ static int iso_listen_bis(struct sock *sk) iso_pi(sk)->bc_sid); hci_dev_unlock(hdev); + hci_dev_put(hdev); return err; } diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 9c24947aa41e..9fdede5fe71c 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -4453,7 +4453,8 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, chan->ident = cmd->ident; l2cap_send_cmd(conn, cmd->ident, L2CAP_CONF_RSP, len, rsp); - chan->num_conf_rsp++; + if (chan->num_conf_rsp < L2CAP_CONF_MAX_CONF_RSP) + chan->num_conf_rsp++; /* Reset config buffer. */ chan->conf_len = 0; diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 6094ef7cffcd..c9bfd263dcef 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -1128,7 +1128,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, } sock_init_data(NULL, sk); - skb = build_skb(data, 0); + skb = slab_build_skb(data); if (!skb) { kfree(data); kfree(ctx); diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 321be94c445a..ae7d93c08880 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -754,73 +754,6 @@ static const struct nla_policy br_mdbe_attrs_pol[MDBE_ATTR_MAX + 1] = { sizeof(struct in6_addr)), }; -static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh, - struct net_device **pdev, struct br_mdb_entry **pentry, - struct nlattr **mdb_attrs, struct netlink_ext_ack *extack) -{ - struct net *net = sock_net(skb->sk); - struct br_mdb_entry *entry; - struct br_port_msg *bpm; - struct nlattr *tb[MDBA_SET_ENTRY_MAX+1]; - struct net_device *dev; - int err; - - err = nlmsg_parse_deprecated(nlh, sizeof(*bpm), tb, - MDBA_SET_ENTRY_MAX, NULL, NULL); - if (err < 0) - return err; - - bpm = nlmsg_data(nlh); - if (bpm->ifindex == 0) { - NL_SET_ERR_MSG_MOD(extack, "Invalid bridge ifindex"); - return -EINVAL; - } - - dev = __dev_get_by_index(net, bpm->ifindex); - if (dev == NULL) { - NL_SET_ERR_MSG_MOD(extack, "Bridge device doesn't exist"); - return -ENODEV; - } - - if (!netif_is_bridge_master(dev)) { - NL_SET_ERR_MSG_MOD(extack, "Device is not a bridge"); - return -EOPNOTSUPP; - } - - *pdev = dev; - - if (!tb[MDBA_SET_ENTRY]) { - NL_SET_ERR_MSG_MOD(extack, "Missing MDBA_SET_ENTRY attribute"); - return -EINVAL; - } - if (nla_len(tb[MDBA_SET_ENTRY]) != sizeof(struct br_mdb_entry)) { - NL_SET_ERR_MSG_MOD(extack, "Invalid MDBA_SET_ENTRY attribute length"); - return -EINVAL; - } - - entry = nla_data(tb[MDBA_SET_ENTRY]); - if (!is_valid_mdb_entry(entry, extack)) - return -EINVAL; - *pentry = entry; - - if (tb[MDBA_SET_ENTRY_ATTRS]) { - err = nla_parse_nested(mdb_attrs, MDBE_ATTR_MAX, - tb[MDBA_SET_ENTRY_ATTRS], - br_mdbe_attrs_pol, extack); - if (err) - return err; - if (mdb_attrs[MDBE_ATTR_SOURCE] && - !is_valid_mdb_source(mdb_attrs[MDBE_ATTR_SOURCE], - entry->addr.proto, extack)) - return -EINVAL; - } else { - memset(mdb_attrs, 0, - sizeof(struct nlattr *) * (MDBE_ATTR_MAX + 1)); - } - - return 0; -} - static struct net_bridge_mcast * __br_mdb_choose_context(struct net_bridge *br, const struct br_mdb_entry *entry, @@ -853,44 +786,26 @@ out: return brmctx; } -static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, - struct br_mdb_entry *entry, - struct nlattr **mdb_attrs, +static int br_mdb_add_group(const struct br_mdb_config *cfg, struct netlink_ext_ack *extack) { struct net_bridge_mdb_entry *mp, *star_mp; struct net_bridge_port_group __rcu **pp; + struct br_mdb_entry *entry = cfg->entry; + struct net_bridge_port *port = cfg->p; + struct net_bridge *br = cfg->br; struct net_bridge_port_group *p; struct net_bridge_mcast *brmctx; - struct br_ip group, star_group; + struct br_ip group = cfg->group; unsigned long now = jiffies; unsigned char flags = 0; + struct br_ip star_group; u8 filter_mode; - __mdb_entry_to_br_ip(entry, &group, mdb_attrs); - brmctx = __br_mdb_choose_context(br, entry, extack); if (!brmctx) return -EINVAL; - /* host join errors which can happen before creating the group */ - if (!port && !br_group_is_l2(&group)) { - /* don't allow any flags for host-joined IP groups */ - if (entry->state) { - NL_SET_ERR_MSG_MOD(extack, "Flags are not allowed for host groups"); - return -EINVAL; - } - if (!br_multicast_is_star_g(&group)) { - NL_SET_ERR_MSG_MOD(extack, "Groups with sources cannot be manually host joined"); - return -EINVAL; - } - } - - if (br_group_is_l2(&group) && entry->state != MDB_PERMANENT) { - NL_SET_ERR_MSG_MOD(extack, "Only permanent L2 entries allowed"); - return -EINVAL; - } - mp = br_multicast_new_group(br, &group); if (IS_ERR(mp)) return PTR_ERR(mp); @@ -959,107 +874,197 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, return 0; } -static int __br_mdb_add(struct net *net, struct net_bridge *br, - struct net_bridge_port *p, - struct br_mdb_entry *entry, - struct nlattr **mdb_attrs, +static int __br_mdb_add(const struct br_mdb_config *cfg, struct netlink_ext_ack *extack) { int ret; - spin_lock_bh(&br->multicast_lock); - ret = br_mdb_add_group(br, p, entry, mdb_attrs, extack); - spin_unlock_bh(&br->multicast_lock); + spin_lock_bh(&cfg->br->multicast_lock); + ret = br_mdb_add_group(cfg, extack); + spin_unlock_bh(&cfg->br->multicast_lock); return ret; } -static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, - struct netlink_ext_ack *extack) +static int br_mdb_config_attrs_init(struct nlattr *set_attrs, + struct br_mdb_config *cfg, + struct netlink_ext_ack *extack) { struct nlattr *mdb_attrs[MDBE_ATTR_MAX + 1]; - struct net *net = sock_net(skb->sk); - struct net_bridge_vlan_group *vg; - struct net_bridge_port *p = NULL; - struct net_device *dev, *pdev; - struct br_mdb_entry *entry; - struct net_bridge_vlan *v; - struct net_bridge *br; int err; - err = br_mdb_parse(skb, nlh, &dev, &entry, mdb_attrs, extack); - if (err < 0) + err = nla_parse_nested(mdb_attrs, MDBE_ATTR_MAX, set_attrs, + br_mdbe_attrs_pol, extack); + if (err) return err; - br = netdev_priv(dev); + if (mdb_attrs[MDBE_ATTR_SOURCE] && + !is_valid_mdb_source(mdb_attrs[MDBE_ATTR_SOURCE], + cfg->entry->addr.proto, extack)) + return -EINVAL; + + __mdb_entry_to_br_ip(cfg->entry, &cfg->group, mdb_attrs); - if (!netif_running(br->dev)) { + return 0; +} + +static int br_mdb_config_init(struct net *net, const struct nlmsghdr *nlh, + struct br_mdb_config *cfg, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[MDBA_SET_ENTRY_MAX + 1]; + struct br_port_msg *bpm; + struct net_device *dev; + int err; + + err = nlmsg_parse_deprecated(nlh, sizeof(*bpm), tb, + MDBA_SET_ENTRY_MAX, NULL, extack); + if (err) + return err; + + memset(cfg, 0, sizeof(*cfg)); + + bpm = nlmsg_data(nlh); + if (!bpm->ifindex) { + NL_SET_ERR_MSG_MOD(extack, "Invalid bridge ifindex"); + return -EINVAL; + } + + dev = __dev_get_by_index(net, bpm->ifindex); + if (!dev) { + NL_SET_ERR_MSG_MOD(extack, "Bridge device doesn't exist"); + return -ENODEV; + } + + if (!netif_is_bridge_master(dev)) { + NL_SET_ERR_MSG_MOD(extack, "Device is not a bridge"); + return -EOPNOTSUPP; + } + + cfg->br = netdev_priv(dev); + + if (!netif_running(cfg->br->dev)) { NL_SET_ERR_MSG_MOD(extack, "Bridge device is not running"); return -EINVAL; } - if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) { + if (!br_opt_get(cfg->br, BROPT_MULTICAST_ENABLED)) { NL_SET_ERR_MSG_MOD(extack, "Bridge's multicast processing is disabled"); return -EINVAL; } - if (entry->ifindex != br->dev->ifindex) { - pdev = __dev_get_by_index(net, entry->ifindex); + if (NL_REQ_ATTR_CHECK(extack, NULL, tb, MDBA_SET_ENTRY)) { + NL_SET_ERR_MSG_MOD(extack, "Missing MDBA_SET_ENTRY attribute"); + return -EINVAL; + } + if (nla_len(tb[MDBA_SET_ENTRY]) != sizeof(struct br_mdb_entry)) { + NL_SET_ERR_MSG_MOD(extack, "Invalid MDBA_SET_ENTRY attribute length"); + return -EINVAL; + } + + cfg->entry = nla_data(tb[MDBA_SET_ENTRY]); + if (!is_valid_mdb_entry(cfg->entry, extack)) + return -EINVAL; + + if (cfg->entry->ifindex != cfg->br->dev->ifindex) { + struct net_device *pdev; + + pdev = __dev_get_by_index(net, cfg->entry->ifindex); if (!pdev) { NL_SET_ERR_MSG_MOD(extack, "Port net device doesn't exist"); return -ENODEV; } - p = br_port_get_rtnl(pdev); - if (!p) { + cfg->p = br_port_get_rtnl(pdev); + if (!cfg->p) { NL_SET_ERR_MSG_MOD(extack, "Net device is not a bridge port"); return -EINVAL; } - if (p->br != br) { + if (cfg->p->br != cfg->br) { NL_SET_ERR_MSG_MOD(extack, "Port belongs to a different bridge device"); return -EINVAL; } - if (p->state == BR_STATE_DISABLED && entry->state != MDB_PERMANENT) { + } + + if (tb[MDBA_SET_ENTRY_ATTRS]) + return br_mdb_config_attrs_init(tb[MDBA_SET_ENTRY_ATTRS], cfg, + extack); + else + __mdb_entry_to_br_ip(cfg->entry, &cfg->group, NULL); + + return 0; +} + +static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, + struct netlink_ext_ack *extack) +{ + struct net *net = sock_net(skb->sk); + struct net_bridge_vlan_group *vg; + struct net_bridge_vlan *v; + struct br_mdb_config cfg; + int err; + + err = br_mdb_config_init(net, nlh, &cfg, extack); + if (err) + return err; + + /* host join errors which can happen before creating the group */ + if (!cfg.p && !br_group_is_l2(&cfg.group)) { + /* don't allow any flags for host-joined IP groups */ + if (cfg.entry->state) { + NL_SET_ERR_MSG_MOD(extack, "Flags are not allowed for host groups"); + return -EINVAL; + } + if (!br_multicast_is_star_g(&cfg.group)) { + NL_SET_ERR_MSG_MOD(extack, "Groups with sources cannot be manually host joined"); + return -EINVAL; + } + } + + if (br_group_is_l2(&cfg.group) && cfg.entry->state != MDB_PERMANENT) { + NL_SET_ERR_MSG_MOD(extack, "Only permanent L2 entries allowed"); + return -EINVAL; + } + + if (cfg.p) { + if (cfg.p->state == BR_STATE_DISABLED && cfg.entry->state != MDB_PERMANENT) { NL_SET_ERR_MSG_MOD(extack, "Port is in disabled state and entry is not permanent"); return -EINVAL; } - vg = nbp_vlan_group(p); + vg = nbp_vlan_group(cfg.p); } else { - vg = br_vlan_group(br); + vg = br_vlan_group(cfg.br); } /* If vlan filtering is enabled and VLAN is not specified * install mdb entry on all vlans configured on the port. */ - if (br_vlan_enabled(br->dev) && vg && entry->vid == 0) { + if (br_vlan_enabled(cfg.br->dev) && vg && cfg.entry->vid == 0) { list_for_each_entry(v, &vg->vlan_list, vlist) { - entry->vid = v->vid; - err = __br_mdb_add(net, br, p, entry, mdb_attrs, extack); + cfg.entry->vid = v->vid; + cfg.group.vid = v->vid; + err = __br_mdb_add(&cfg, extack); if (err) break; } } else { - err = __br_mdb_add(net, br, p, entry, mdb_attrs, extack); + err = __br_mdb_add(&cfg, extack); } return err; } -static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry, - struct nlattr **mdb_attrs) +static int __br_mdb_del(const struct br_mdb_config *cfg) { + struct br_mdb_entry *entry = cfg->entry; + struct net_bridge *br = cfg->br; struct net_bridge_mdb_entry *mp; struct net_bridge_port_group *p; struct net_bridge_port_group __rcu **pp; - struct br_ip ip; + struct br_ip ip = cfg->group; int err = -EINVAL; - if (!netif_running(br->dev) || !br_opt_get(br, BROPT_MULTICAST_ENABLED)) - return -EINVAL; - - __mdb_entry_to_br_ip(entry, &ip, mdb_attrs); - spin_lock_bh(&br->multicast_lock); mp = br_mdb_ip_get(br, &ip); if (!mp) @@ -1094,51 +1099,32 @@ unlock: static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { - struct nlattr *mdb_attrs[MDBE_ATTR_MAX + 1]; struct net *net = sock_net(skb->sk); struct net_bridge_vlan_group *vg; - struct net_bridge_port *p = NULL; - struct net_device *dev, *pdev; - struct br_mdb_entry *entry; struct net_bridge_vlan *v; - struct net_bridge *br; + struct br_mdb_config cfg; int err; - err = br_mdb_parse(skb, nlh, &dev, &entry, mdb_attrs, extack); - if (err < 0) + err = br_mdb_config_init(net, nlh, &cfg, extack); + if (err) return err; - br = netdev_priv(dev); - - if (entry->ifindex != br->dev->ifindex) { - pdev = __dev_get_by_index(net, entry->ifindex); - if (!pdev) - return -ENODEV; - - p = br_port_get_rtnl(pdev); - if (!p) { - NL_SET_ERR_MSG_MOD(extack, "Net device is not a bridge port"); - return -EINVAL; - } - if (p->br != br) { - NL_SET_ERR_MSG_MOD(extack, "Port belongs to a different bridge device"); - return -EINVAL; - } - vg = nbp_vlan_group(p); - } else { - vg = br_vlan_group(br); - } + if (cfg.p) + vg = nbp_vlan_group(cfg.p); + else + vg = br_vlan_group(cfg.br); /* If vlan filtering is enabled and VLAN is not specified * delete mdb entry on all vlans configured on the port. */ - if (br_vlan_enabled(br->dev) && vg && entry->vid == 0) { + if (br_vlan_enabled(cfg.br->dev) && vg && cfg.entry->vid == 0) { list_for_each_entry(v, &vg->vlan_list, vlist) { - entry->vid = v->vid; - err = __br_mdb_del(br, entry, mdb_attrs); + cfg.entry->vid = v->vid; + cfg.group.vid = v->vid; + err = __br_mdb_del(&cfg); } } else { - err = __br_mdb_del(br, entry, mdb_attrs); + err = __br_mdb_del(&cfg); } return err; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 5e988f0ed2c0..db4c3900ae95 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1273,7 +1273,7 @@ br_multicast_new_group_src(struct net_bridge_port_group *pg, struct br_ip *src_i struct net_bridge_port_group *br_multicast_new_port_group( struct net_bridge_port *port, - struct br_ip *group, + const struct br_ip *group, struct net_bridge_port_group __rcu *next, unsigned char flags, const unsigned char *src, diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 4c4fda930068..3997e16c15fc 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -92,6 +92,13 @@ struct bridge_mcast_stats { struct br_mcast_stats mstats; struct u64_stats_sync syncp; }; + +struct br_mdb_config { + struct net_bridge *br; + struct net_bridge_port *p; + struct br_mdb_entry *entry; + struct br_ip group; +}; #endif /* net_bridge_mcast_port must be always defined due to forwarding stubs */ @@ -934,7 +941,8 @@ br_mdb_ip_get(struct net_bridge *br, struct br_ip *dst); struct net_bridge_mdb_entry * br_multicast_new_group(struct net_bridge *br, struct br_ip *group); struct net_bridge_port_group * -br_multicast_new_port_group(struct net_bridge_port *port, struct br_ip *group, +br_multicast_new_port_group(struct net_bridge_port *port, + const struct br_ip *group, struct net_bridge_port_group __rcu *next, unsigned char flags, const unsigned char *src, u8 filter_mode, u8 rt_protocol); diff --git a/net/can/af_can.c b/net/can/af_can.c index 27dcdcc0b808..c69168f11e44 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -677,7 +677,7 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev) static int can_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { - if (unlikely(dev->type != ARPHRD_CAN || (!can_is_can_skb(skb)))) { + if (unlikely(dev->type != ARPHRD_CAN || !can_get_ml_priv(dev) || !can_is_can_skb(skb))) { pr_warn_once("PF_CAN: dropped non conform CAN skbuff: dev type %d, len %d\n", dev->type, skb->len); @@ -692,7 +692,7 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev, static int canfd_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { - if (unlikely(dev->type != ARPHRD_CAN || (!can_is_canfd_skb(skb)))) { + if (unlikely(dev->type != ARPHRD_CAN || !can_get_ml_priv(dev) || !can_is_canfd_skb(skb))) { pr_warn_once("PF_CAN: dropped non conform CAN FD skbuff: dev type %d, len %d\n", dev->type, skb->len); @@ -707,7 +707,7 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev, static int canxl_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { - if (unlikely(dev->type != ARPHRD_CAN || (!can_is_canxl_skb(skb)))) { + if (unlikely(dev->type != ARPHRD_CAN || !can_get_ml_priv(dev) || !can_is_canxl_skb(skb))) { pr_warn_once("PF_CAN: dropped non conform CAN XL skbuff: dev type %d, len %d\n", dev->type, skb->len); diff --git a/net/core/dev.c b/net/core/dev.c index 7627c475d991..b76fb37b381e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10517,6 +10517,22 @@ void netdev_set_default_ethtool_ops(struct net_device *dev, } EXPORT_SYMBOL_GPL(netdev_set_default_ethtool_ops); +/** + * netdev_sw_irq_coalesce_default_on() - enable SW IRQ coalescing by default + * @dev: netdev to enable the IRQ coalescing on + * + * Sets a conservative default for SW IRQ coalescing. Users can use + * sysfs attributes to override the default values. + */ +void netdev_sw_irq_coalesce_default_on(struct net_device *dev) +{ + WARN_ON(dev->reg_state == NETREG_REGISTERED); + + dev->gro_flush_timeout = 20000; + dev->napi_defer_hard_irqs = 1; +} +EXPORT_SYMBOL_GPL(netdev_sw_irq_coalesce_default_on); + void netdev_freemem(struct net_device *dev) { char *addr = (char *)dev - dev->padded; diff --git a/net/core/devlink.c b/net/core/devlink.c index 907df7124157..6004bd0ccee4 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -195,11 +195,16 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg); EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwerr); EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_trap_report); +#define DEVLINK_PORT_FN_CAPS_VALID_MASK \ + (_BITUL(__DEVLINK_PORT_FN_ATTR_CAPS_MAX) - 1) + static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_ATTR_MAX + 1] = { [DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] = { .type = NLA_BINARY }, [DEVLINK_PORT_FN_ATTR_STATE] = NLA_POLICY_RANGE(NLA_U8, DEVLINK_PORT_FN_STATE_INACTIVE, DEVLINK_PORT_FN_STATE_ACTIVE), + [DEVLINK_PORT_FN_ATTR_CAPS] = + NLA_POLICY_BITFIELD32(DEVLINK_PORT_FN_CAPS_VALID_MASK), }; static const struct nla_policy devlink_selftest_nl_policy[DEVLINK_ATTR_SELFTEST_ID_MAX + 1] = { @@ -680,6 +685,87 @@ devlink_sb_tc_index_get_from_attrs(struct devlink_sb *devlink_sb, return 0; } +static void devlink_port_fn_cap_fill(struct nla_bitfield32 *caps, + u32 cap, bool is_enable) +{ + caps->selector |= cap; + if (is_enable) + caps->value |= cap; +} + +static int devlink_port_fn_roce_fill(const struct devlink_ops *ops, + struct devlink_port *devlink_port, + struct nla_bitfield32 *caps, + struct netlink_ext_ack *extack) +{ + bool is_enable; + int err; + + if (!ops->port_fn_roce_get) + return 0; + + err = ops->port_fn_roce_get(devlink_port, &is_enable, extack); + if (err) { + if (err == -EOPNOTSUPP) + return 0; + return err; + } + + devlink_port_fn_cap_fill(caps, DEVLINK_PORT_FN_CAP_ROCE, is_enable); + return 0; +} + +static int devlink_port_fn_migratable_fill(const struct devlink_ops *ops, + struct devlink_port *devlink_port, + struct nla_bitfield32 *caps, + struct netlink_ext_ack *extack) +{ + bool is_enable; + int err; + + if (!ops->port_fn_migratable_get || + devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF) + return 0; + + err = ops->port_fn_migratable_get(devlink_port, &is_enable, extack); + if (err) { + if (err == -EOPNOTSUPP) + return 0; + return err; + } + + devlink_port_fn_cap_fill(caps, DEVLINK_PORT_FN_CAP_MIGRATABLE, is_enable); + return 0; +} + +static int devlink_port_fn_caps_fill(const struct devlink_ops *ops, + struct devlink_port *devlink_port, + struct sk_buff *msg, + struct netlink_ext_ack *extack, + bool *msg_updated) +{ + struct nla_bitfield32 caps = {}; + int err; + + err = devlink_port_fn_roce_fill(ops, devlink_port, &caps, extack); + if (err) + return err; + + err = devlink_port_fn_migratable_fill(ops, devlink_port, &caps, extack); + if (err) + return err; + + if (!caps.selector) + return 0; + err = nla_put_bitfield32(msg, DEVLINK_PORT_FN_ATTR_CAPS, caps.value, + caps.selector); + if (err) + return err; + + *msg_updated = true; + return 0; +} + static int devlink_sb_tc_index_get_from_info(struct devlink_sb *devlink_sb, struct genl_info *info, @@ -1264,6 +1350,51 @@ static int devlink_port_fn_state_fill(const struct devlink_ops *ops, } static int +devlink_port_fn_mig_set(struct devlink_port *devlink_port, bool enable, + struct netlink_ext_ack *extack) +{ + const struct devlink_ops *ops = devlink_port->devlink->ops; + + return ops->port_fn_migratable_set(devlink_port, enable, extack); +} + +static int +devlink_port_fn_roce_set(struct devlink_port *devlink_port, bool enable, + struct netlink_ext_ack *extack) +{ + const struct devlink_ops *ops = devlink_port->devlink->ops; + + return ops->port_fn_roce_set(devlink_port, enable, extack); +} + +static int devlink_port_fn_caps_set(struct devlink_port *devlink_port, + const struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + struct nla_bitfield32 caps; + u32 caps_value; + int err; + + caps = nla_get_bitfield32(attr); + caps_value = caps.value & caps.selector; + if (caps.selector & DEVLINK_PORT_FN_CAP_ROCE) { + err = devlink_port_fn_roce_set(devlink_port, + caps_value & DEVLINK_PORT_FN_CAP_ROCE, + extack); + if (err) + return err; + } + if (caps.selector & DEVLINK_PORT_FN_CAP_MIGRATABLE) { + err = devlink_port_fn_mig_set(devlink_port, caps_value & + DEVLINK_PORT_FN_CAP_MIGRATABLE, + extack); + if (err) + return err; + } + return 0; +} + +static int devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port, struct netlink_ext_ack *extack) { @@ -1281,6 +1412,10 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por &msg_updated); if (err) goto out; + err = devlink_port_fn_caps_fill(ops, port, msg, extack, + &msg_updated); + if (err) + goto out; err = devlink_port_fn_state_fill(ops, port, msg, extack, &msg_updated); out: if (err || !msg_updated) @@ -1632,11 +1767,6 @@ static int devlink_port_function_hw_addr_set(struct devlink_port *port, } } - if (!ops->port_function_hw_addr_set) { - NL_SET_ERR_MSG_MOD(extack, "Port doesn't support function attributes"); - return -EOPNOTSUPP; - } - return ops->port_function_hw_addr_set(port, hw_addr, hw_addr_len, extack); } @@ -1650,12 +1780,52 @@ static int devlink_port_fn_state_set(struct devlink_port *port, state = nla_get_u8(attr); ops = port->devlink->ops; - if (!ops->port_fn_state_set) { - NL_SET_ERR_MSG_MOD(extack, - "Function does not support state setting"); + return ops->port_fn_state_set(port, state, extack); +} + +static int devlink_port_function_validate(struct devlink_port *devlink_port, + struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + const struct devlink_ops *ops = devlink_port->devlink->ops; + struct nlattr *attr; + + if (tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] && + !ops->port_function_hw_addr_set) { + NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR], + "Port doesn't support function attributes"); return -EOPNOTSUPP; } - return ops->port_fn_state_set(port, state, extack); + if (tb[DEVLINK_PORT_FN_ATTR_STATE] && !ops->port_fn_state_set) { + NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR], + "Function does not support state setting"); + return -EOPNOTSUPP; + } + attr = tb[DEVLINK_PORT_FN_ATTR_CAPS]; + if (attr) { + struct nla_bitfield32 caps; + + caps = nla_get_bitfield32(attr); + if (caps.selector & DEVLINK_PORT_FN_CAP_ROCE && + !ops->port_fn_roce_set) { + NL_SET_ERR_MSG_ATTR(extack, attr, + "Port doesn't support RoCE function attribute"); + return -EOPNOTSUPP; + } + if (caps.selector & DEVLINK_PORT_FN_CAP_MIGRATABLE) { + if (!ops->port_fn_migratable_set) { + NL_SET_ERR_MSG_ATTR(extack, attr, + "Port doesn't support migratable function attribute"); + return -EOPNOTSUPP; + } + if (devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF) { + NL_SET_ERR_MSG_ATTR(extack, attr, + "migratable function attribute supported for VFs only"); + return -EOPNOTSUPP; + } + } + } + return 0; } static int devlink_port_function_set(struct devlink_port *port, @@ -1672,12 +1842,24 @@ static int devlink_port_function_set(struct devlink_port *port, return err; } + err = devlink_port_function_validate(port, tb, extack); + if (err) + return err; + attr = tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR]; if (attr) { err = devlink_port_function_hw_addr_set(port, attr, extack); if (err) return err; } + + attr = tb[DEVLINK_PORT_FN_ATTR_CAPS]; + if (attr) { + err = devlink_port_fn_caps_set(port, attr, extack); + if (err) + return err; + } + /* Keep this as the last function attribute set, so that when * multiple port function attributes are set along with state, * Those can be applied first before activating the state. @@ -4259,9 +4441,10 @@ static int devlink_resource_put(struct devlink *devlink, struct sk_buff *skb, nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_ID, resource->id, DEVLINK_ATTR_PAD)) goto nla_put_failure; - if (resource->size != resource->size_new) - nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_NEW, - resource->size_new, DEVLINK_ATTR_PAD); + if (resource->size != resource->size_new && + nla_put_u64_64bit(skb, DEVLINK_ATTR_RESOURCE_SIZE_NEW, + resource->size_new, DEVLINK_ATTR_PAD)) + goto nla_put_failure; if (devlink_resource_occ_put(resource, skb)) goto nla_put_failure; if (devlink_resource_size_params_put(resource, skb)) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 4bf95e36ed16..3cbba7099c0f 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -270,12 +270,10 @@ static struct sk_buff *napi_skb_cache_get(void) return skb; } -/* Caller must provide SKB that is memset cleared */ -static void __build_skb_around(struct sk_buff *skb, void *data, - unsigned int frag_size) +static inline void __finalize_skb_around(struct sk_buff *skb, void *data, + unsigned int size) { struct skb_shared_info *shinfo; - unsigned int size = frag_size ? : ksize(data); size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); @@ -297,15 +295,71 @@ static void __build_skb_around(struct sk_buff *skb, void *data, skb_set_kcov_handle(skb, kcov_common_handle()); } +static inline void *__slab_build_skb(struct sk_buff *skb, void *data, + unsigned int *size) +{ + void *resized; + + /* Must find the allocation size (and grow it to match). */ + *size = ksize(data); + /* krealloc() will immediately return "data" when + * "ksize(data)" is requested: it is the existing upper + * bounds. As a result, GFP_ATOMIC will be ignored. Note + * that this "new" pointer needs to be passed back to the + * caller for use so the __alloc_size hinting will be + * tracked correctly. + */ + resized = krealloc(data, *size, GFP_ATOMIC); + WARN_ON_ONCE(resized != data); + return resized; +} + +/* build_skb() variant which can operate on slab buffers. + * Note that this should be used sparingly as slab buffers + * cannot be combined efficiently by GRO! + */ +struct sk_buff *slab_build_skb(void *data) +{ + struct sk_buff *skb; + unsigned int size; + + skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC); + if (unlikely(!skb)) + return NULL; + + memset(skb, 0, offsetof(struct sk_buff, tail)); + data = __slab_build_skb(skb, data, &size); + __finalize_skb_around(skb, data, size); + + return skb; +} +EXPORT_SYMBOL(slab_build_skb); + +/* Caller must provide SKB that is memset cleared */ +static void __build_skb_around(struct sk_buff *skb, void *data, + unsigned int frag_size) +{ + unsigned int size = frag_size; + + /* frag_size == 0 is considered deprecated now. Callers + * using slab buffer should use slab_build_skb() instead. + */ + if (WARN_ONCE(size == 0, "Use slab_build_skb() instead")) + data = __slab_build_skb(skb, data, &size); + + __finalize_skb_around(skb, data, size); +} + /** * __build_skb - build a network buffer * @data: data buffer provided by caller - * @frag_size: size of data, or 0 if head was kmalloced + * @frag_size: size of data (must not be 0) * * Allocate a new &sk_buff. Caller provides space holding head and - * skb_shared_info. @data must have been allocated by kmalloc() only if - * @frag_size is 0, otherwise data should come from the page allocator - * or vmalloc() + * skb_shared_info. @data must have been allocated from the page + * allocator or vmalloc(). (A @frag_size of 0 to indicate a kmalloc() + * allocation is deprecated, and callers should use slab_build_skb() + * instead.) * The return is the new skb buffer. * On a failure the return is %NULL, and @data is not freed. * Notes : diff --git a/net/core/sock.c b/net/core/sock.c index 4571914a4aa8..b0ab841e0aed 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -901,13 +901,20 @@ int sock_set_timestamping(struct sock *sk, int optname, if (val & ~SOF_TIMESTAMPING_MASK) return -EINVAL; + if (val & SOF_TIMESTAMPING_OPT_ID_TCP && + !(val & SOF_TIMESTAMPING_OPT_ID)) + return -EINVAL; + if (val & SOF_TIMESTAMPING_OPT_ID && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) { if (sk_is_tcp(sk)) { if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) return -EINVAL; - atomic_set(&sk->sk_tskey, tcp_sk(sk)->snd_una); + if (val & SOF_TIMESTAMPING_OPT_ID_TCP) + atomic_set(&sk->sk_tskey, tcp_sk(sk)->write_seq); + else + atomic_set(&sk->sk_tskey, tcp_sk(sk)->snd_una); } else { atomic_set(&sk->sk_tskey, 0); } diff --git a/net/dsa/tag_hellcreek.c b/net/dsa/tag_hellcreek.c index 71884296fc70..03a1fb9c87a9 100644 --- a/net/dsa/tag_hellcreek.c +++ b/net/dsa/tag_hellcreek.c @@ -51,7 +51,8 @@ static struct sk_buff *hellcreek_rcv(struct sk_buff *skb, return NULL; } - pskb_trim_rcsum(skb, skb->len - HELLCREEK_TAG_LEN); + if (pskb_trim_rcsum(skb, skb->len - HELLCREEK_TAG_LEN)) + return NULL; dsa_default_offload_fwd_mark(skb); diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c index 0f6ae143afc9..080e5c369f5b 100644 --- a/net/dsa/tag_ksz.c +++ b/net/dsa/tag_ksz.c @@ -27,7 +27,8 @@ static struct sk_buff *ksz_common_rcv(struct sk_buff *skb, if (!skb->dev) return NULL; - pskb_trim_rcsum(skb, skb->len - len); + if (pskb_trim_rcsum(skb, skb->len - len)) + return NULL; dsa_default_offload_fwd_mark(skb); diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index f14f51b41491..1c2ceba4771b 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -670,7 +670,8 @@ static struct sk_buff *sja1110_rcv_inband_control_extension(struct sk_buff *skb, * padding and trailer we need to account for the fact that * skb->data points to skb_mac_header(skb) + ETH_HLEN. */ - pskb_trim_rcsum(skb, start_of_padding - ETH_HLEN); + if (pskb_trim_rcsum(skb, start_of_padding - ETH_HLEN)) + return NULL; /* Trap-to-host frame, no timestamp trailer */ } else { *source_port = SJA1110_RX_HEADER_SRC_PORT(rx_header); diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index e02daa74e833..2edc8b796a4e 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -398,7 +398,7 @@ EXPORT_SYMBOL(alloc_etherdev_mqs); ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len) { - return scnprintf(buf, PAGE_SIZE, "%*phC\n", len, addr); + return sysfs_emit(buf, "%*phC\n", len, addr); } EXPORT_SYMBOL(sysfs_format_mac); diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile index 72ab0944262a..228f13df2e18 100644 --- a/net/ethtool/Makefile +++ b/net/ethtool/Makefile @@ -4,7 +4,7 @@ obj-y += ioctl.o common.o obj-$(CONFIG_ETHTOOL_NETLINK) += ethtool_nl.o -ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o \ +ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o rss.o \ linkstate.o debug.o wol.o features.o privflags.o rings.o \ channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \ tunnels.o fec.o eeprom.o stats.o phc_vclocks.o module.o \ diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 21cfe8557205..6f399afc2ff2 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -417,6 +417,7 @@ const char sof_timestamping_names[][ETH_GSTRING_LEN] = { [const_ilog2(SOF_TIMESTAMPING_OPT_PKTINFO)] = "option-pktinfo", [const_ilog2(SOF_TIMESTAMPING_OPT_TX_SWHW)] = "option-tx-swhw", [const_ilog2(SOF_TIMESTAMPING_BIND_PHC)] = "bind-phc", + [const_ilog2(SOF_TIMESTAMPING_OPT_ID_TCP)] = "option-id-tcp", }; static_assert(ARRAY_SIZE(sof_timestamping_names) == __SOF_TIMESTAMPING_CNT); diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 1a4c11356c96..aee98be6237f 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -287,6 +287,7 @@ ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = { [ETHTOOL_MSG_PHC_VCLOCKS_GET] = ðnl_phc_vclocks_request_ops, [ETHTOOL_MSG_MODULE_GET] = ðnl_module_request_ops, [ETHTOOL_MSG_PSE_GET] = ðnl_pse_request_ops, + [ETHTOOL_MSG_RSS_GET] = ðnl_rss_request_ops, }; static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb) @@ -1040,6 +1041,12 @@ static const struct genl_ops ethtool_genl_ops[] = { .policy = ethnl_pse_set_policy, .maxattr = ARRAY_SIZE(ethnl_pse_set_policy) - 1, }, + { + .cmd = ETHTOOL_MSG_RSS_GET, + .doit = ethnl_default_doit, + .policy = ethnl_rss_get_policy, + .maxattr = ARRAY_SIZE(ethnl_rss_get_policy) - 1, + }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 1bfd374f9718..3753787ba233 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -346,6 +346,7 @@ extern const struct ethnl_request_ops ethnl_stats_request_ops; extern const struct ethnl_request_ops ethnl_phc_vclocks_request_ops; extern const struct ethnl_request_ops ethnl_module_request_ops; extern const struct ethnl_request_ops ethnl_pse_request_ops; +extern const struct ethnl_request_ops ethnl_rss_request_ops; extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1]; extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1]; @@ -386,6 +387,7 @@ extern const struct nla_policy ethnl_module_get_policy[ETHTOOL_A_MODULE_HEADER + extern const struct nla_policy ethnl_module_set_policy[ETHTOOL_A_MODULE_POWER_MODE_POLICY + 1]; extern const struct nla_policy ethnl_pse_get_policy[ETHTOOL_A_PSE_HEADER + 1]; extern const struct nla_policy ethnl_pse_set_policy[ETHTOOL_A_PSE_MAX + 1]; +extern const struct nla_policy ethnl_rss_get_policy[ETHTOOL_A_RSS_CONTEXT + 1]; int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info); int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info); diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c new file mode 100644 index 000000000000..ebe6145aed3f --- /dev/null +++ b/net/ethtool/rss.c @@ -0,0 +1,153 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "netlink.h" +#include "common.h" + +struct rss_req_info { + struct ethnl_req_info base; + u32 rss_context; +}; + +struct rss_reply_data { + struct ethnl_reply_data base; + u32 indir_size; + u32 hkey_size; + u32 hfunc; + u32 *indir_table; + u8 *hkey; +}; + +#define RSS_REQINFO(__req_base) \ + container_of(__req_base, struct rss_req_info, base) + +#define RSS_REPDATA(__reply_base) \ + container_of(__reply_base, struct rss_reply_data, base) + +const struct nla_policy ethnl_rss_get_policy[] = { + [ETHTOOL_A_RSS_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), + [ETHTOOL_A_RSS_CONTEXT] = { .type = NLA_U32 }, +}; + +static int +rss_parse_request(struct ethnl_req_info *req_info, struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct rss_req_info *request = RSS_REQINFO(req_info); + + if (tb[ETHTOOL_A_RSS_CONTEXT]) + request->rss_context = nla_get_u32(tb[ETHTOOL_A_RSS_CONTEXT]); + + return 0; +} + +static int +rss_prepare_data(const struct ethnl_req_info *req_base, + struct ethnl_reply_data *reply_base, struct genl_info *info) +{ + struct rss_reply_data *data = RSS_REPDATA(reply_base); + struct rss_req_info *request = RSS_REQINFO(req_base); + struct net_device *dev = reply_base->dev; + const struct ethtool_ops *ops; + u32 total_size, indir_bytes; + u8 dev_hfunc = 0; + u8 *rss_config; + int ret; + + ops = dev->ethtool_ops; + if (!ops->get_rxfh) + return -EOPNOTSUPP; + + /* Some drivers don't handle rss_context */ + if (request->rss_context && !ops->get_rxfh_context) + return -EOPNOTSUPP; + + ret = ethnl_ops_begin(dev); + if (ret < 0) + return ret; + + data->indir_size = 0; + data->hkey_size = 0; + if (ops->get_rxfh_indir_size) + data->indir_size = ops->get_rxfh_indir_size(dev); + if (ops->get_rxfh_key_size) + data->hkey_size = ops->get_rxfh_key_size(dev); + + indir_bytes = data->indir_size * sizeof(u32); + total_size = indir_bytes + data->hkey_size; + rss_config = kzalloc(total_size, GFP_KERNEL); + if (!rss_config) { + ret = -ENOMEM; + goto out_ops; + } + + if (data->indir_size) + data->indir_table = (u32 *)rss_config; + + if (data->hkey_size) + data->hkey = rss_config + indir_bytes; + + if (request->rss_context) + ret = ops->get_rxfh_context(dev, data->indir_table, data->hkey, + &dev_hfunc, request->rss_context); + else + ret = ops->get_rxfh(dev, data->indir_table, data->hkey, + &dev_hfunc); + + if (ret) + goto out_ops; + + data->hfunc = dev_hfunc; +out_ops: + ethnl_ops_complete(dev); + return ret; +} + +static int +rss_reply_size(const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + const struct rss_reply_data *data = RSS_REPDATA(reply_base); + int len; + + len = nla_total_size(sizeof(u32)) + /* _RSS_HFUNC */ + nla_total_size(sizeof(u32) * data->indir_size) + /* _RSS_INDIR */ + nla_total_size(data->hkey_size); /* _RSS_HKEY */ + + return len; +} + +static int +rss_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + const struct rss_reply_data *data = RSS_REPDATA(reply_base); + + if (nla_put_u32(skb, ETHTOOL_A_RSS_HFUNC, data->hfunc) || + nla_put(skb, ETHTOOL_A_RSS_INDIR, + sizeof(u32) * data->indir_size, data->indir_table) || + nla_put(skb, ETHTOOL_A_RSS_HKEY, data->hkey_size, data->hkey)) + return -EMSGSIZE; + + return 0; +} + +static void rss_cleanup_data(struct ethnl_reply_data *reply_base) +{ + const struct rss_reply_data *data = RSS_REPDATA(reply_base); + + kfree(data->indir_table); +} + +const struct ethnl_request_ops ethnl_rss_request_ops = { + .request_cmd = ETHTOOL_MSG_RSS_GET, + .reply_cmd = ETHTOOL_MSG_RSS_GET_REPLY, + .hdr_attr = ETHTOOL_A_RSS_HEADER, + .req_info_size = sizeof(struct rss_req_info), + .reply_data_size = sizeof(struct rss_reply_data), + + .parse_request = rss_parse_request, + .prepare_data = rss_prepare_data, + .reply_size = rss_reply_size, + .fill_reply = rss_fill_reply, + .cleanup_data = rss_cleanup_data, +}; diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index b33d1b5eda87..248ad5e46969 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -26,10 +26,12 @@ static struct genl_family nl802154_fam; /* multicast groups */ enum nl802154_multicast_groups { NL802154_MCGRP_CONFIG, + NL802154_MCGRP_SCAN, }; static const struct genl_multicast_group nl802154_mcgrps[] = { [NL802154_MCGRP_CONFIG] = { .name = "config", }, + [NL802154_MCGRP_SCAN] = { .name = "scan", }, }; /* returns ERR_PTR values */ @@ -216,6 +218,9 @@ static const struct nla_policy nl802154_policy[NL802154_ATTR_MAX+1] = { [NL802154_ATTR_PID] = { .type = NLA_U32 }, [NL802154_ATTR_NETNS_FD] = { .type = NLA_U32 }, + + [NL802154_ATTR_COORDINATOR] = { .type = NLA_NESTED }, + #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL [NL802154_ATTR_SEC_ENABLED] = { .type = NLA_U8, }, [NL802154_ATTR_SEC_OUT_LEVEL] = { .type = NLA_U32, }, @@ -1281,6 +1286,104 @@ static int nl802154_wpan_phy_netns(struct sk_buff *skb, struct genl_info *info) return err; } +static int nl802154_prep_scan_event_msg(struct sk_buff *msg, + struct cfg802154_registered_device *rdev, + struct wpan_dev *wpan_dev, + u32 portid, u32 seq, int flags, u8 cmd, + struct ieee802154_coord_desc *desc) +{ + struct nlattr *nla; + void *hdr; + + hdr = nl802154hdr_put(msg, portid, seq, flags, cmd); + if (!hdr) + return -ENOBUFS; + + if (nla_put_u32(msg, NL802154_ATTR_WPAN_PHY, rdev->wpan_phy_idx)) + goto nla_put_failure; + + if (wpan_dev->netdev && + nla_put_u32(msg, NL802154_ATTR_IFINDEX, wpan_dev->netdev->ifindex)) + goto nla_put_failure; + + if (nla_put_u64_64bit(msg, NL802154_ATTR_WPAN_DEV, + wpan_dev_id(wpan_dev), NL802154_ATTR_PAD)) + goto nla_put_failure; + + nla = nla_nest_start_noflag(msg, NL802154_ATTR_COORDINATOR); + if (!nla) + goto nla_put_failure; + + if (nla_put(msg, NL802154_COORD_PANID, IEEE802154_PAN_ID_LEN, + &desc->addr.pan_id)) + goto nla_put_failure; + + if (desc->addr.mode == IEEE802154_ADDR_SHORT) { + if (nla_put(msg, NL802154_COORD_ADDR, + IEEE802154_SHORT_ADDR_LEN, + &desc->addr.short_addr)) + goto nla_put_failure; + } else { + if (nla_put(msg, NL802154_COORD_ADDR, + IEEE802154_EXTENDED_ADDR_LEN, + &desc->addr.extended_addr)) + goto nla_put_failure; + } + + if (nla_put_u8(msg, NL802154_COORD_CHANNEL, desc->channel)) + goto nla_put_failure; + + if (nla_put_u8(msg, NL802154_COORD_PAGE, desc->page)) + goto nla_put_failure; + + if (nla_put_u16(msg, NL802154_COORD_SUPERFRAME_SPEC, + desc->superframe_spec)) + goto nla_put_failure; + + if (nla_put_u8(msg, NL802154_COORD_LINK_QUALITY, desc->link_quality)) + goto nla_put_failure; + + if (desc->gts_permit && nla_put_flag(msg, NL802154_COORD_GTS_PERMIT)) + goto nla_put_failure; + + /* TODO: NL802154_COORD_PAYLOAD_DATA if any */ + + nla_nest_end(msg, nla); + + genlmsg_end(msg, hdr); + + return 0; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + + return -EMSGSIZE; +} + +int nl802154_scan_event(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, + struct ieee802154_coord_desc *desc) +{ + struct cfg802154_registered_device *rdev = wpan_phy_to_rdev(wpan_phy); + struct sk_buff *msg; + int ret; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); + if (!msg) + return -ENOMEM; + + ret = nl802154_prep_scan_event_msg(msg, rdev, wpan_dev, 0, 0, 0, + NL802154_CMD_SCAN_EVENT, + desc); + if (ret < 0) { + nlmsg_free(msg); + return ret; + } + + return genlmsg_multicast_netns(&nl802154_fam, wpan_phy_net(wpan_phy), + msg, 0, NL802154_MCGRP_SCAN, GFP_ATOMIC); +} +EXPORT_SYMBOL_GPL(nl802154_scan_event); + #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL static const struct nla_policy nl802154_dev_addr_policy[NL802154_DEV_ADDR_ATTR_MAX + 1] = { [NL802154_DEV_ADDR_ATTR_PAN_ID] = { .type = NLA_U16 }, diff --git a/net/ieee802154/nl802154.h b/net/ieee802154/nl802154.h index 8c4b6d08954c..89b805500032 100644 --- a/net/ieee802154/nl802154.h +++ b/net/ieee802154/nl802154.h @@ -4,5 +4,7 @@ int nl802154_init(void); void nl802154_exit(void); +int nl802154_scan_event(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev, + struct ieee802154_coord_desc *desc); #endif /* __IEEE802154_NL802154_H */ diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index f361d3d56be2..b5736ef16ed2 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -841,6 +841,9 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, return -EINVAL; } + if (!cfg->fc_table) + cfg->fc_table = RT_TABLE_MAIN; + return 0; errout: return err; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 19a662003eef..ce9ff3c62e84 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -423,6 +423,7 @@ static struct fib_info *fib_find_info(struct fib_info *nfi) nfi->fib_prefsrc == fi->fib_prefsrc && nfi->fib_priority == fi->fib_priority && nfi->fib_type == fi->fib_type && + nfi->fib_tb_id == fi->fib_tb_id && memcmp(nfi->fib_metrics, fi->fib_metrics, sizeof(u32) * RTAX_MAX) == 0 && !((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) && diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index a4ccef3e6935..ffff46cdcb58 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1492,24 +1492,6 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) struct ip_tunnel_parm *p = &t->parms; __be16 o_flags = p->o_flags; - if (t->erspan_ver <= 2) { - if (t->erspan_ver != 0 && !t->collect_md) - o_flags |= TUNNEL_KEY; - - if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver)) - goto nla_put_failure; - - if (t->erspan_ver == 1) { - if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index)) - goto nla_put_failure; - } else if (t->erspan_ver == 2) { - if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir)) - goto nla_put_failure; - if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid)) - goto nla_put_failure; - } - } - if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || nla_put_be16(skb, IFLA_GRE_IFLAGS, gre_tnl_flags_to_gre_flags(p->i_flags)) || @@ -1550,6 +1532,34 @@ nla_put_failure: return -EMSGSIZE; } +static int erspan_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct ip_tunnel *t = netdev_priv(dev); + + if (t->erspan_ver <= 2) { + if (t->erspan_ver != 0 && !t->collect_md) + t->parms.o_flags |= TUNNEL_KEY; + + if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver)) + goto nla_put_failure; + + if (t->erspan_ver == 1) { + if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index)) + goto nla_put_failure; + } else if (t->erspan_ver == 2) { + if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir)) + goto nla_put_failure; + if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid)) + goto nla_put_failure; + } + } + + return ipgre_fill_info(skb, dev); + +nla_put_failure: + return -EMSGSIZE; +} + static void erspan_setup(struct net_device *dev) { struct ip_tunnel *t = netdev_priv(dev); @@ -1628,7 +1638,7 @@ static struct rtnl_link_ops erspan_link_ops __read_mostly = { .changelink = erspan_changelink, .dellink = ip_tunnel_dellink, .get_size = ipgre_get_size, - .fill_info = ipgre_fill_info, + .fill_info = erspan_fill_info, .get_link_net = ip_tunnel_get_link_net, }; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index bb9854c2b7a1..409ec2a1f95b 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -49,6 +49,11 @@ #include <net/transp_v6.h> #endif +#define ping_portaddr_for_each_entry(__sk, node, list) \ + hlist_nulls_for_each_entry(__sk, node, list, sk_nulls_node) +#define ping_portaddr_for_each_entry_rcu(__sk, node, list) \ + hlist_nulls_for_each_entry_rcu(__sk, node, list, sk_nulls_node) + struct ping_table { struct hlist_nulls_head hash[PING_HTABLE_SIZE]; spinlock_t lock; @@ -192,7 +197,7 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) return NULL; } - ping_portaddr_for_each_entry(sk, hnode, hslot) { + ping_portaddr_for_each_entry_rcu(sk, hnode, hslot) { isk = inet_sk(sk); pr_debug("iterate\n"); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index e19507614f64..60fd91bb5171 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -920,6 +920,9 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, if (err < 0) goto fail; + /* We prevent @rt from being freed. */ + rcu_read_lock(); + for (;;) { /* Prepare header of the next frame, * before previous one went down. */ @@ -943,6 +946,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, if (err == 0) { IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGOKS); + rcu_read_unlock(); return 0; } @@ -950,6 +954,7 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGFAILS); + rcu_read_unlock(); return err; slow_path_clean: diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c index d9b50884d34e..ac0b28025fb0 100644 --- a/net/mac802154/iface.c +++ b/net/mac802154/iface.c @@ -254,7 +254,6 @@ ieee802154_check_concurrent_iface(struct ieee802154_sub_if_data *sdata, enum nl802154_iftype iftype) { struct ieee802154_local *local = sdata->local; - struct wpan_dev *wpan_dev = &sdata->wpan_dev; struct ieee802154_sub_if_data *nsdata; /* we hold the RTNL here so can safely walk the list */ @@ -262,13 +261,13 @@ ieee802154_check_concurrent_iface(struct ieee802154_sub_if_data *sdata, if (nsdata != sdata && ieee802154_sdata_running(nsdata)) { int ret; - /* TODO currently we don't support multiple node types - * we need to run skb_clone at rx path. Check if there - * exist really an use case if we need to support - * multiple node types at the same time. + /* TODO currently we don't support multiple node/coord + * types we need to run skb_clone at rx path. Check if + * there exist really an use case if we need to support + * multiple node/coord types at the same time. */ - if (wpan_dev->iftype == NL802154_IFTYPE_NODE && - nsdata->wpan_dev.iftype == NL802154_IFTYPE_NODE) + if (sdata->wpan_dev.iftype != NL802154_IFTYPE_MONITOR && + nsdata->wpan_dev.iftype != NL802154_IFTYPE_MONITOR) return -EBUSY; /* check all phy mac sublayer settings are the same. @@ -565,6 +564,7 @@ ieee802154_setup_sdata(struct ieee802154_sub_if_data *sdata, wpan_dev->short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST); switch (type) { + case NL802154_IFTYPE_COORD: case NL802154_IFTYPE_NODE: ieee802154_be64_to_le64(&wpan_dev->extended_addr, sdata->dev->dev_addr); @@ -624,6 +624,7 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name, ieee802154_le64_to_be64(ndev->perm_addr, &local->hw.phy->perm_extended_addr); switch (type) { + case NL802154_IFTYPE_COORD: case NL802154_IFTYPE_NODE: ndev->type = ARPHRD_IEEE802154; if (ieee802154_is_valid_extended_unicast_addr(extended_addr)) { @@ -650,6 +651,7 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name, sdata->dev = ndev; sdata->wpan_dev.wpan_phy = local->hw.phy; sdata->local = local; + INIT_LIST_HEAD(&sdata->wpan_dev.list); /* setup type-dependent data */ ret = ieee802154_setup_sdata(sdata, type); diff --git a/net/mac802154/main.c b/net/mac802154/main.c index 40fab08df24b..3ed31daf7b9c 100644 --- a/net/mac802154/main.c +++ b/net/mac802154/main.c @@ -107,7 +107,7 @@ ieee802154_alloc_hw(size_t priv_data_len, const struct ieee802154_ops *ops) phy->supported.lbt = NL802154_SUPPORTED_BOOL_FALSE; /* always supported */ - phy->supported.iftypes = BIT(NL802154_IFTYPE_NODE); + phy->supported.iftypes = BIT(NL802154_IFTYPE_NODE) | BIT(NL802154_IFTYPE_COORD); return &local->hw; } diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c index 0724aac8f48c..c2aae2a6d6a6 100644 --- a/net/mac802154/rx.c +++ b/net/mac802154/rx.c @@ -208,6 +208,7 @@ __ieee802154_rx_handle_packet(struct ieee802154_local *local, int ret; struct ieee802154_sub_if_data *sdata; struct ieee802154_hdr hdr; + struct sk_buff *skb2; ret = ieee802154_parse_frame_start(skb, &hdr); if (ret) { @@ -217,7 +218,7 @@ __ieee802154_rx_handle_packet(struct ieee802154_local *local, } list_for_each_entry_rcu(sdata, &local->interfaces, list) { - if (sdata->wpan_dev.iftype != NL802154_IFTYPE_NODE) + if (sdata->wpan_dev.iftype == NL802154_IFTYPE_MONITOR) continue; if (!ieee802154_sdata_running(sdata)) @@ -230,12 +231,12 @@ __ieee802154_rx_handle_packet(struct ieee802154_local *local, sdata->required_filtering == IEEE802154_FILTERING_4_FRAME_FIELDS) continue; - ieee802154_subif_frame(sdata, skb, &hdr); - skb = NULL; - break; + skb2 = skb_clone(skb, GFP_ATOMIC); + if (skb2) { + skb2->dev = sdata->dev; + ieee802154_subif_frame(sdata, skb2, &hdr); + } } - - kfree_skb(skb); } static void @@ -274,7 +275,7 @@ void ieee802154_rx(struct ieee802154_local *local, struct sk_buff *skb) WARN_ON_ONCE(softirq_count() == 0); if (local->suspended) - goto drop; + goto free_skb; /* TODO: When a transceiver omits the checksum here, we * add an own calculated one. This is currently an ugly @@ -292,20 +293,17 @@ void ieee802154_rx(struct ieee802154_local *local, struct sk_buff *skb) /* Level 1 filtering: Check the FCS by software when relevant */ if (local->hw.phy->filtering == IEEE802154_FILTERING_NONE) { crc = crc_ccitt(0, skb->data, skb->len); - if (crc) { - rcu_read_unlock(); + if (crc) goto drop; - } } /* remove crc */ skb_trim(skb, skb->len - 2); __ieee802154_rx_handle_packet(local, skb); - rcu_read_unlock(); - - return; drop: + rcu_read_unlock(); +free_skb: kfree_skb(skb); } diff --git a/net/mac802154/trace.h b/net/mac802154/trace.h index df855c33daf2..689396d6c76a 100644 --- a/net/mac802154/trace.h +++ b/net/mac802154/trace.h @@ -264,6 +264,31 @@ TRACE_EVENT(802154_drv_set_promiscuous_mode, BOOL_TO_STR(__entry->on)) ); +TRACE_EVENT(802154_new_scan_event, + TP_PROTO(struct ieee802154_coord_desc *desc), + TP_ARGS(desc), + TP_STRUCT__entry( + __field(__le16, pan_id) + __field(__le64, addr) + __field(u8, channel) + __field(u8, page) + ), + TP_fast_assign( + __entry->page = desc->page; + __entry->channel = desc->channel; + __entry->pan_id = desc->addr.pan_id; + __entry->addr = desc->addr.extended_addr; + ), + TP_printk("panid: %u, coord_addr: 0x%llx, page: %u, channel: %u", + __le16_to_cpu(__entry->pan_id), __le64_to_cpu(__entry->addr), + __entry->page, __entry->channel) +); + +DEFINE_EVENT(802154_new_scan_event, 802154_scan_event, + TP_PROTO(struct ieee802154_coord_desc *desc), + TP_ARGS(desc) +); + #endif /* !__MAC802154_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index eef69d0e44ec..2ea7eae43bdb 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -1190,7 +1190,7 @@ static int mptcp_pm_parse_pm_addr_attr(struct nlattr *tb[], if (!tb[MPTCP_PM_ADDR_ATTR_FAMILY]) { if (!require_family) - return err; + return 0; NL_SET_ERR_MSG_ATTR(info->extack, attr, "missing family"); @@ -1224,7 +1224,7 @@ static int mptcp_pm_parse_pm_addr_attr(struct nlattr *tb[], if (tb[MPTCP_PM_ADDR_ATTR_PORT]) addr->port = htons(nla_get_u16(tb[MPTCP_PM_ADDR_ATTR_PORT])); - return err; + return 0; } int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info, @@ -2094,7 +2094,7 @@ void mptcp_event_addr_removed(const struct mptcp_sock *msk, uint8_t id) return; nla_put_failure: - kfree_skb(skb); + nlmsg_free(skb); } void mptcp_event_addr_announced(const struct sock *ssk, @@ -2151,7 +2151,7 @@ void mptcp_event_addr_announced(const struct sock *ssk, return; nla_put_failure: - kfree_skb(skb); + nlmsg_free(skb); } void mptcp_event_pm_listener(const struct sock *ssk, @@ -2203,7 +2203,7 @@ void mptcp_event_pm_listener(const struct sock *ssk, return; nla_put_failure: - kfree_skb(skb); + nlmsg_free(skb); } void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk, @@ -2261,7 +2261,7 @@ void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk, return; nla_put_failure: - kfree_skb(skb); + nlmsg_free(skb); } static const struct genl_small_ops mptcp_pm_ops[] = { diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index a47423ebb33a..d4b1e6ec1b36 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -740,7 +740,7 @@ static int mptcp_setsockopt_v4_set_tos(struct mptcp_sock *msk, int optname, } release_sock(sk); - return err; + return 0; } static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname, diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c index dda8b76b7798..fd2236ee9a79 100644 --- a/net/ncsi/ncsi-cmd.c +++ b/net/ncsi/ncsi-cmd.c @@ -228,7 +228,8 @@ static int ncsi_cmd_handler_oem(struct sk_buff *skb, len += max(payload, padding_bytes); cmd = skb_put_zero(skb, len); - memcpy(&cmd->mfr_id, nca->data, nca->payload); + unsafe_memcpy(&cmd->mfr_id, nca->data, nca->payload, + /* skb allocated with enough to load the payload */); ncsi_cmd_build_header(&cmd->cmd.common, nca); return 0; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index b96338b4bf36..5c3cf0834af0 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -887,7 +887,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) zone = nf_ct_zone(ct); if (!nf_ct_ext_valid_pre(ct->ext)) { - NF_CT_STAT_INC(net, insert_failed); + NF_CT_STAT_INC_ATOMIC(net, insert_failed); return -ETIMEDOUT; } @@ -934,7 +934,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) if (!nf_ct_ext_valid_post(ct->ext)) { nf_ct_kill(ct); - NF_CT_STAT_INC(net, drop); + NF_CT_STAT_INC_ATOMIC(net, drop); return -ETIMEDOUT; } @@ -1271,7 +1271,7 @@ chaintoolong: */ if (!nf_ct_ext_valid_post(ct->ext)) { nf_ct_kill(ct); - NF_CT_STAT_INC(net, drop); + NF_CT_STAT_INC_ATOMIC(net, drop); return NF_DROP; } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index d71150a40fb0..1286ae7d4609 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -328,8 +328,13 @@ nla_put_failure: } #ifdef CONFIG_NF_CONNTRACK_MARK -static int ctnetlink_dump_mark(struct sk_buff *skb, u32 mark) +static int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct) { + u32 mark = READ_ONCE(ct->mark); + + if (!mark) + return 0; + if (nla_put_be32(skb, CTA_MARK, htonl(mark))) goto nla_put_failure; return 0; @@ -543,7 +548,7 @@ static int ctnetlink_dump_extinfo(struct sk_buff *skb, static int ctnetlink_dump_info(struct sk_buff *skb, struct nf_conn *ct) { if (ctnetlink_dump_status(skb, ct) < 0 || - ctnetlink_dump_mark(skb, READ_ONCE(ct->mark)) < 0 || + ctnetlink_dump_mark(skb, ct) < 0 || ctnetlink_dump_secctx(skb, ct) < 0 || ctnetlink_dump_id(skb, ct) < 0 || ctnetlink_dump_use(skb, ct) < 0 || @@ -722,7 +727,6 @@ ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item) struct sk_buff *skb; unsigned int type; unsigned int flags = 0, group; - u32 mark; int err; if (events & (1 << IPCT_DESTROY)) { @@ -827,9 +831,8 @@ ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item) } #ifdef CONFIG_NF_CONNTRACK_MARK - mark = READ_ONCE(ct->mark); - if ((events & (1 << IPCT_MARK) || mark) && - ctnetlink_dump_mark(skb, mark) < 0) + if (events & (1 << IPCT_MARK) && + ctnetlink_dump_mark(skb, ct) < 0) goto nla_put_failure; #endif nlmsg_end(skb, nlh); @@ -2671,7 +2674,6 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct) { const struct nf_conntrack_zone *zone; struct nlattr *nest_parms; - u32 mark; zone = nf_ct_zone(ct); @@ -2733,8 +2735,7 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct) goto nla_put_failure; #ifdef CONFIG_NF_CONNTRACK_MARK - mark = READ_ONCE(ct->mark); - if (mark && ctnetlink_dump_mark(skb, mark) < 0) + if (ctnetlink_dump_mark(skb, ct) < 0) goto nla_put_failure; #endif if (ctnetlink_dump_labels(skb, ct) < 0) diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index 00b522890d77..0fdcdb2c9ae4 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -997,13 +997,13 @@ static void flow_offload_queue_work(struct flow_offload_work *offload) struct net *net = read_pnet(&offload->flowtable->net); if (offload->cmd == FLOW_CLS_REPLACE) { - NF_FLOW_TABLE_STAT_INC(net, count_wq_add); + NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count_wq_add); queue_work(nf_flow_offload_add_wq, &offload->work); } else if (offload->cmd == FLOW_CLS_DESTROY) { - NF_FLOW_TABLE_STAT_INC(net, count_wq_del); + NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count_wq_del); queue_work(nf_flow_offload_del_wq, &offload->work); } else { - NF_FLOW_TABLE_STAT_INC(net, count_wq_stats); + NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count_wq_stats); queue_work(nf_flow_offload_stats_wq, &offload->work); } } diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 4f9299b9dcdd..06d46d182634 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1162,6 +1162,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, struct nft_pipapo_match *m = priv->clone; u8 genmask = nft_genmask_next(net); struct nft_pipapo_field *f; + const u8 *start_p, *end_p; int i, bsize_max, err = 0; if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END)) @@ -1202,9 +1203,9 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set, } /* Validate */ + start_p = start; + end_p = end; nft_pipapo_for_each_field(f, i, m) { - const u8 *start_p = start, *end_p = end; - if (f->rules >= (unsigned long)NFT_PIPAPO_RULE0_MAX) return -ENOSPC; diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c index 282c51051dcc..994a0a1efb58 100644 --- a/net/nfc/nci/ntf.c +++ b/net/nfc/nci/ntf.c @@ -240,6 +240,8 @@ static int nci_add_new_protocol(struct nci_dev *ndev, target->sens_res = nfca_poll->sens_res; target->sel_res = nfca_poll->sel_res; target->nfcid1_len = nfca_poll->nfcid1_len; + if (target->nfcid1_len > ARRAY_SIZE(target->nfcid1)) + return -EPROTO; if (target->nfcid1_len > 0) { memcpy(target->nfcid1, nfca_poll->nfcid1, target->nfcid1_len); @@ -248,6 +250,8 @@ static int nci_add_new_protocol(struct nci_dev *ndev, nfcb_poll = (struct rf_tech_specific_params_nfcb_poll *)params; target->sensb_res_len = nfcb_poll->sensb_res_len; + if (target->sensb_res_len > ARRAY_SIZE(target->sensb_res)) + return -EPROTO; if (target->sensb_res_len > 0) { memcpy(target->sensb_res, nfcb_poll->sensb_res, target->sensb_res_len); @@ -256,6 +260,8 @@ static int nci_add_new_protocol(struct nci_dev *ndev, nfcf_poll = (struct rf_tech_specific_params_nfcf_poll *)params; target->sensf_res_len = nfcf_poll->sensf_res_len; + if (target->sensf_res_len > ARRAY_SIZE(target->sensf_res)) + return -EPROTO; if (target->sensf_res_len > 0) { memcpy(target->sensf_res, nfcf_poll->sensf_res, target->sensf_res_len); diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 861dfb8daf4a..932bcf766d63 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -209,6 +209,26 @@ static struct vport *new_vport(const struct vport_parms *parms) return vport; } +static void ovs_vport_update_upcall_stats(struct sk_buff *skb, + const struct dp_upcall_info *upcall_info, + bool upcall_result) +{ + struct vport *p = OVS_CB(skb)->input_vport; + struct vport_upcall_stats_percpu *stats; + + if (upcall_info->cmd != OVS_PACKET_CMD_MISS && + upcall_info->cmd != OVS_PACKET_CMD_ACTION) + return; + + stats = this_cpu_ptr(p->upcall_stats); + u64_stats_update_begin(&stats->syncp); + if (upcall_result) + u64_stats_inc(&stats->n_success); + else + u64_stats_inc(&stats->n_fail); + u64_stats_update_end(&stats->syncp); +} + void ovs_dp_detach_port(struct vport *p) { ASSERT_OVSL(); @@ -216,6 +236,9 @@ void ovs_dp_detach_port(struct vport *p) /* First drop references to device. */ hlist_del_rcu(&p->dp_hash_node); + /* Free percpu memory */ + free_percpu(p->upcall_stats); + /* Then destroy it. */ ovs_vport_del(p); } @@ -305,6 +328,8 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen); else err = queue_gso_packets(dp, skb, key, upcall_info, cutlen); + + ovs_vport_update_upcall_stats(skb, upcall_info, !err); if (err) goto err; @@ -1826,6 +1851,12 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) goto err_destroy_portids; } + vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu); + if (!vport->upcall_stats) { + err = -ENOMEM; + goto err_destroy_portids; + } + err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, info->snd_seq, 0, OVS_DP_CMD_NEW); BUG_ON(err < 0); @@ -2098,6 +2129,9 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, OVS_VPORT_ATTR_PAD)) goto nla_put_failure; + if (ovs_vport_get_upcall_stats(vport, skb)) + goto nla_put_failure; + if (ovs_vport_get_upcall_portids(vport, skb)) goto nla_put_failure; @@ -2279,6 +2313,12 @@ restart: goto exit_unlock_free; } + vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu); + if (!vport->upcall_stats) { + err = -ENOMEM; + goto exit_unlock_free; + } + err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), info->snd_portid, info->snd_seq, 0, OVS_VPORT_CMD_NEW, GFP_KERNEL); @@ -2508,6 +2548,7 @@ static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED }, [OVS_VPORT_ATTR_IFINDEX] = { .type = NLA_U32 }, [OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 }, + [OVS_VPORT_ATTR_UPCALL_STATS] = { .type = NLA_NESTED }, }; static const struct genl_small_ops dp_vport_genl_ops[] = { diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 82a74f998966..7e0f5c45b512 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -285,6 +285,56 @@ void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats) } /** + * ovs_vport_get_upcall_stats - retrieve upcall stats + * + * @vport: vport from which to retrieve the stats. + * @skb: sk_buff where upcall stats should be appended. + * + * Retrieves upcall stats for the given device. + * + * Must be called with ovs_mutex or rcu_read_lock. + */ +int ovs_vport_get_upcall_stats(struct vport *vport, struct sk_buff *skb) +{ + struct nlattr *nla; + int i; + + __u64 tx_success = 0; + __u64 tx_fail = 0; + + for_each_possible_cpu(i) { + const struct vport_upcall_stats_percpu *stats; + unsigned int start; + + stats = per_cpu_ptr(vport->upcall_stats, i); + do { + start = u64_stats_fetch_begin(&stats->syncp); + tx_success += u64_stats_read(&stats->n_success); + tx_fail += u64_stats_read(&stats->n_fail); + } while (u64_stats_fetch_retry(&stats->syncp, start)); + } + + nla = nla_nest_start_noflag(skb, OVS_VPORT_ATTR_UPCALL_STATS); + if (!nla) + return -EMSGSIZE; + + if (nla_put_u64_64bit(skb, OVS_VPORT_UPCALL_ATTR_SUCCESS, tx_success, + OVS_VPORT_ATTR_PAD)) { + nla_nest_cancel(skb, nla); + return -EMSGSIZE; + } + + if (nla_put_u64_64bit(skb, OVS_VPORT_UPCALL_ATTR_FAIL, tx_fail, + OVS_VPORT_ATTR_PAD)) { + nla_nest_cancel(skb, nla); + return -EMSGSIZE; + } + nla_nest_end(skb, nla); + + return 0; +} + +/** * ovs_vport_get_options - retrieve device options * * @vport: vport from which to retrieve the options. diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 6ff45e8a0868..3e71ca8ad8a7 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -32,6 +32,8 @@ struct vport *ovs_vport_locate(const struct net *net, const char *name); void ovs_vport_get_stats(struct vport *, struct ovs_vport_stats *); +int ovs_vport_get_upcall_stats(struct vport *vport, struct sk_buff *skb); + int ovs_vport_set_options(struct vport *, struct nlattr *options); int ovs_vport_get_options(const struct vport *, struct sk_buff *); @@ -65,6 +67,7 @@ struct vport_portids { * @hash_node: Element in @dev_table hash table in vport.c. * @dp_hash_node: Element in @datapath->ports hash table in datapath.c. * @ops: Class structure. + * @upcall_stats: Upcall stats of every ports. * @detach_list: list used for detaching vport in net-exit call. * @rcu: RCU callback head for deferred destruction. */ @@ -78,6 +81,7 @@ struct vport { struct hlist_node hash_node; struct hlist_node dp_hash_node; const struct vport_ops *ops; + struct vport_upcall_stats_percpu __percpu *upcall_stats; struct list_head detach_list; struct rcu_head rcu; @@ -137,6 +141,18 @@ struct vport_ops { struct list_head list; }; +/** + * struct vport_upcall_stats_percpu - per-cpu packet upcall statistics for + * a given vport. + * @n_success: Number of packets that upcall to userspace succeed. + * @n_fail: Number of packets that upcall to userspace failed. + */ +struct vport_upcall_stats_percpu { + struct u64_stats_sync syncp; + u64_stats_t n_success; + u64_stats_t n_fail; +}; + struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *, const struct vport_parms *); void ovs_vport_free(struct vport *); diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig index accd35c05577..7ae023b37a83 100644 --- a/net/rxrpc/Kconfig +++ b/net/rxrpc/Kconfig @@ -58,4 +58,11 @@ config RXKAD See Documentation/networking/rxrpc.rst. +config RXPERF + tristate "RxRPC test service" + help + Provide an rxperf service tester. This listens on UDP port 7009 for + incoming calls from the rxperf program (an example of which can be + found in OpenAFS). + endif diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile index fdeba488fc6e..e76d3459d78e 100644 --- a/net/rxrpc/Makefile +++ b/net/rxrpc/Makefile @@ -16,6 +16,7 @@ rxrpc-y := \ conn_service.o \ input.o \ insecure.o \ + io_thread.o \ key.o \ local_event.o \ local_object.o \ @@ -36,3 +37,6 @@ rxrpc-y := \ rxrpc-$(CONFIG_PROC_FS) += proc.o rxrpc-$(CONFIG_RXKAD) += rxkad.o rxrpc-$(CONFIG_SYSCTL) += sysctl.o + + +obj-$(CONFIG_RXPERF) += rxperf.o diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index aacdd96a9886..7ea576f6ba4b 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -194,8 +194,8 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) service_in_use: write_unlock(&local->services_lock); - rxrpc_unuse_local(local); - rxrpc_put_local(local); + rxrpc_unuse_local(local, rxrpc_local_unuse_bind); + rxrpc_put_local(local, rxrpc_local_put_bind); ret = -EADDRINUSE; error_unlock: release_sock(&rx->sk); @@ -328,7 +328,7 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, mutex_unlock(&call->user_mutex); } - rxrpc_put_peer(cp.peer); + rxrpc_put_peer(cp.peer, rxrpc_peer_put_discard_tmp); _leave(" = %p", call); return call; } @@ -359,9 +359,9 @@ void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call) /* Make sure we're not going to call back into a kernel service */ if (call->notify_rx) { - spin_lock_bh(&call->notify_lock); + spin_lock(&call->notify_lock); call->notify_rx = rxrpc_dummy_notify_rx; - spin_unlock_bh(&call->notify_lock); + spin_unlock(&call->notify_lock); } mutex_unlock(&call->user_mutex); @@ -812,14 +812,12 @@ static int rxrpc_shutdown(struct socket *sock, int flags) lock_sock(sk); - spin_lock_bh(&sk->sk_receive_queue.lock); if (sk->sk_state < RXRPC_CLOSE) { sk->sk_state = RXRPC_CLOSE; sk->sk_shutdown = SHUTDOWN_MASK; } else { ret = -ESHUTDOWN; } - spin_unlock_bh(&sk->sk_receive_queue.lock); rxrpc_discard_prealloc(rx); @@ -872,9 +870,7 @@ static int rxrpc_release_sock(struct sock *sk) break; } - spin_lock_bh(&sk->sk_receive_queue.lock); sk->sk_state = RXRPC_CLOSE; - spin_unlock_bh(&sk->sk_receive_queue.lock); if (rx->local && rcu_access_pointer(rx->local->service) == rx) { write_lock(&rx->local->services_lock); @@ -888,8 +884,8 @@ static int rxrpc_release_sock(struct sock *sk) flush_workqueue(rxrpc_workqueue); rxrpc_purge_queue(&sk->sk_receive_queue); - rxrpc_unuse_local(rx->local); - rxrpc_put_local(rx->local); + rxrpc_unuse_local(rx->local, rxrpc_local_unuse_release_sock); + rxrpc_put_local(rx->local, rxrpc_local_put_release_sock); rx->local = NULL; key_put(rx->key); rx->key = NULL; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index f5c538ce3e23..e7dccab7b741 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -36,6 +36,8 @@ struct rxrpc_txbuf; * to pass supplementary information. */ enum rxrpc_skb_mark { + RXRPC_SKB_MARK_PACKET, /* Received packet */ + RXRPC_SKB_MARK_ERROR, /* Error notification */ RXRPC_SKB_MARK_REJECT_BUSY, /* Reject with BUSY */ RXRPC_SKB_MARK_REJECT_ABORT, /* Reject with ABORT (code in skb->priority) */ }; @@ -76,7 +78,7 @@ struct rxrpc_net { bool kill_all_client_conns; atomic_t nr_client_conns; spinlock_t client_conn_cache_lock; /* Lock for ->*_client_conns */ - spinlock_t client_conn_discard_lock; /* Prevent multiple discarders */ + struct mutex client_conn_discard_lock; /* Prevent multiple discarders */ struct list_head idle_client_conns; struct work_struct client_conn_reaper; struct timer_list client_conn_reap_timer; @@ -99,6 +101,9 @@ struct rxrpc_net { atomic_t stat_tx_data_retrans; atomic_t stat_tx_data_send; atomic_t stat_tx_data_send_frag; + atomic_t stat_tx_data_send_fail; + atomic_t stat_tx_data_underflow; + atomic_t stat_tx_data_cwnd_reset; atomic_t stat_rx_data; atomic_t stat_rx_data_reqack; atomic_t stat_rx_data_jumbo; @@ -110,6 +115,8 @@ struct rxrpc_net { atomic_t stat_rx_acks[256]; atomic_t stat_why_req_ack[8]; + + atomic_t stat_io_loop; }; /* @@ -279,13 +286,11 @@ struct rxrpc_local { struct rxrpc_net *rxnet; /* The network ns in which this resides */ struct hlist_node link; struct socket *socket; /* my UDP socket */ - struct work_struct processor; - struct list_head ack_tx_queue; /* List of ACKs that need sending */ - spinlock_t ack_tx_lock; /* ACK list lock */ + struct task_struct *io_thread; struct rxrpc_sock __rcu *service; /* Service(s) listening on this endpoint */ struct rw_semaphore defrag_sem; /* control re-enablement of IP DF bit */ - struct sk_buff_head reject_queue; /* packets awaiting rejection */ - struct sk_buff_head event_queue; /* endpoint event packets awaiting processing */ + struct sk_buff_head rx_queue; /* Received packets */ + struct list_head call_attend_q; /* Calls requiring immediate attention */ struct rb_root client_bundles; /* Client connection bundles by socket params */ spinlock_t client_bundles_lock; /* Lock for client_bundles */ spinlock_t lock; /* access lock */ @@ -403,12 +408,18 @@ enum rxrpc_conn_proto_state { * RxRPC client connection bundle. */ struct rxrpc_bundle { - struct rxrpc_conn_parameters params; + struct rxrpc_local *local; /* Representation of local endpoint */ + struct rxrpc_peer *peer; /* Remote endpoint */ + struct key *key; /* Security details */ refcount_t ref; atomic_t active; /* Number of active users */ unsigned int debug_id; + u32 security_level; /* Security level selected */ + u16 service_id; /* Service ID for this connection */ bool try_upgrade; /* True if the bundle is attempting upgrade */ bool alloc_conn; /* True if someone's getting a conn */ + bool exclusive; /* T if conn is exclusive */ + bool upgrade; /* T if service ID can be upgraded */ short alloc_error; /* Error from last conn allocation */ spinlock_t channel_lock; struct rb_node local_node; /* Node in local->client_conns */ @@ -424,9 +435,13 @@ struct rxrpc_bundle { */ struct rxrpc_connection { struct rxrpc_conn_proto proto; - struct rxrpc_conn_parameters params; + struct rxrpc_local *local; /* Representation of local endpoint */ + struct rxrpc_peer *peer; /* Remote endpoint */ + struct rxrpc_net *rxnet; /* Network namespace to which call belongs */ + struct key *key; /* Security details */ refcount_t ref; + atomic_t active; /* Active count for service conns */ struct rcu_head rcu; struct list_head cache_link; @@ -447,6 +462,7 @@ struct rxrpc_connection { struct timer_list timer; /* Conn event timer */ struct work_struct processor; /* connection event processor */ + struct work_struct destructor; /* In-process-context destroyer */ struct rxrpc_bundle *bundle; /* Client connection bundle */ struct rb_node service_node; /* Node in peer->service_conns */ struct list_head proc_link; /* link in procfs list */ @@ -471,9 +487,13 @@ struct rxrpc_connection { atomic_t serial; /* packet serial number counter */ unsigned int hi_serial; /* highest serial number received */ u32 service_id; /* Service ID, possibly upgraded */ + u32 security_level; /* Security level selected */ u8 security_ix; /* security type */ u8 out_clientflag; /* RXRPC_CLIENT_INITIATED if we are client */ u8 bundle_shift; /* Index into bundle->avail_chans */ + bool exclusive; /* T if conn is exclusive */ + bool upgrade; /* T if service ID can be upgraded */ + u16 orig_service_id; /* Originally requested service ID */ short error; /* Local error code */ }; @@ -502,22 +522,19 @@ enum rxrpc_call_flag { RXRPC_CALL_RETRANS_TIMEOUT, /* Retransmission due to timeout occurred */ RXRPC_CALL_BEGAN_RX_TIMER, /* We began the expect_rx_by timer */ RXRPC_CALL_RX_HEARD, /* The peer responded at least once to this call */ - RXRPC_CALL_RX_UNDERRUN, /* Got data underrun */ RXRPC_CALL_DISCONNECTED, /* The call has been disconnected */ RXRPC_CALL_KERNEL, /* The call was made by the kernel */ RXRPC_CALL_UPGRADE, /* Service upgrade was requested for the call */ - RXRPC_CALL_DELAY_ACK_PENDING, /* DELAY ACK generation is pending */ - RXRPC_CALL_IDLE_ACK_PENDING, /* IDLE ACK generation is pending */ + RXRPC_CALL_EXCLUSIVE, /* The call uses a once-only connection */ + RXRPC_CALL_RX_IS_IDLE, /* Reception is idle - send an ACK */ }; /* * Events that can be raised on a call. */ enum rxrpc_call_event { - RXRPC_CALL_EV_ABORT, /* need to generate abort */ - RXRPC_CALL_EV_RESEND, /* Tx resend required */ - RXRPC_CALL_EV_EXPIRED, /* Expiry occurred */ RXRPC_CALL_EV_ACK_LOST, /* ACK may be lost, send ping */ + RXRPC_CALL_EV_INITIAL_PING, /* Send initial ping for a new service call */ }; /* @@ -570,10 +587,13 @@ struct rxrpc_call { struct rcu_head rcu; struct rxrpc_connection *conn; /* connection carrying call */ struct rxrpc_peer *peer; /* Peer record for remote address */ + struct rxrpc_local *local; /* Representation of local endpoint */ struct rxrpc_sock __rcu *socket; /* socket responsible */ struct rxrpc_net *rxnet; /* Network namespace to which call belongs */ + struct key *key; /* Security details */ const struct rxrpc_security *security; /* applied security module */ struct mutex user_mutex; /* User access mutex */ + struct sockaddr_rxrpc dest_srx; /* Destination address */ unsigned long delay_ack_at; /* When DELAY ACK needs to happen */ unsigned long ack_lost_at; /* When ACK is figured as lost */ unsigned long resend_at; /* When next resend needs to happen */ @@ -585,7 +605,7 @@ struct rxrpc_call { u32 next_rx_timo; /* Timeout for next Rx packet (jif) */ u32 next_req_timo; /* Timeout for next Rx request packet (jif) */ struct timer_list timer; /* Combined event timer */ - struct work_struct processor; /* Event processor */ + struct work_struct destroyer; /* In-process-context destroyer */ rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */ struct list_head link; /* link in master call list */ struct list_head chan_wait_link; /* Link in conn->bundle->waiting_calls */ @@ -594,6 +614,7 @@ struct rxrpc_call { struct list_head recvmsg_link; /* Link in rx->recvmsg_q */ struct list_head sock_link; /* Link in rx->sock_calls */ struct rb_node sock_node; /* Node in rx->calls */ + struct list_head attend_link; /* Link in local->call_attend_q */ struct rxrpc_txbuf *tx_pending; /* Tx buffer being filled */ wait_queue_head_t waitq; /* Wait queue for channel or Tx */ s64 tx_total_len; /* Total length left to be transmitted (or -1) */ @@ -607,20 +628,22 @@ struct rxrpc_call { enum rxrpc_call_state state; /* current state of call */ enum rxrpc_call_completion completion; /* Call completion condition */ refcount_t ref; - u16 service_id; /* service ID */ u8 security_ix; /* Security type */ enum rxrpc_interruptibility interruptibility; /* At what point call may be interrupted */ u32 call_id; /* call ID on connection */ u32 cid; /* connection ID plus channel index */ + u32 security_level; /* Security level selected */ int debug_id; /* debug ID for printks */ unsigned short rx_pkt_offset; /* Current recvmsg packet offset */ unsigned short rx_pkt_len; /* Current recvmsg packet len */ /* Transmitted data tracking. */ spinlock_t tx_lock; /* Transmit queue lock */ + struct list_head tx_sendmsg; /* Sendmsg prepared packets */ struct list_head tx_buffer; /* Buffer of transmissible packets */ rxrpc_seq_t tx_bottom; /* First packet in buffer */ rxrpc_seq_t tx_transmitted; /* Highest packet transmitted */ + rxrpc_seq_t tx_prepared; /* Highest Tx slot prepared. */ rxrpc_seq_t tx_top; /* Highest Tx slot allocated. */ u16 tx_backoff; /* Delay to insert due to Tx failure */ u8 tx_winsize; /* Maximum size of Tx window */ @@ -635,13 +658,13 @@ struct rxrpc_call { rxrpc_seq_t rx_consumed; /* Highest packet consumed */ rxrpc_serial_t rx_serial; /* Highest serial received for this call */ u8 rx_winsize; /* Size of Rx window */ - spinlock_t input_lock; /* Lock for packet input to this call */ /* TCP-style slow-start congestion control [RFC5681]. Since the SMSS * is fixed, we keep these numbers in terms of segments (ie. DATA * packets) rather than bytes. */ #define RXRPC_TX_SMSS RXRPC_JUMBO_DATALEN +#define RXRPC_MIN_CWND (RXRPC_TX_SMSS > 2190 ? 2 : RXRPC_TX_SMSS > 1095 ? 3 : 4) u8 cong_cwnd; /* Congestion window size */ u8 cong_extra; /* Extra to send for congestion management */ u8 cong_ssthresh; /* Slow-start threshold */ @@ -676,11 +699,7 @@ struct rxrpc_call { rxrpc_seq_t acks_prev_seq; /* Highest previousPacket received */ rxrpc_seq_t acks_hard_ack; /* Latest hard-ack point */ rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */ - rxrpc_seq_t acks_lost_top; /* tx_top at the time lost-ack ping sent */ - rxrpc_serial_t acks_lost_ping; /* Serial number of probe ACK */ rxrpc_serial_t acks_highest_serial; /* Highest serial number ACK'd */ - struct sk_buff *acks_soft_tbl; /* The last ACK packet with NAKs in it */ - spinlock_t acks_ack_lock; /* Access to ->acks_last_ack */ }; /* @@ -739,9 +758,8 @@ struct rxrpc_send_params { */ struct rxrpc_txbuf { struct rcu_head rcu; - struct list_head call_link; /* Link in call->tx_queue */ + struct list_head call_link; /* Link in call->tx_sendmsg/tx_buffer */ struct list_head tx_link; /* Link in live Enc queue or Tx queue */ - struct rxrpc_call *call; /* Call to which belongs */ ktime_t last_sent; /* Time at which last transmitted */ refcount_t ref; rxrpc_seq_t seq; /* Sequence number of this packet */ @@ -793,9 +811,9 @@ extern struct workqueue_struct *rxrpc_workqueue; */ int rxrpc_service_prealloc(struct rxrpc_sock *, gfp_t); void rxrpc_discard_prealloc(struct rxrpc_sock *); -struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *, - struct rxrpc_sock *, - struct sk_buff *); +bool rxrpc_new_incoming_call(struct rxrpc_local *, struct rxrpc_peer *, + struct rxrpc_connection *, struct sockaddr_rxrpc *, + struct sk_buff *); void rxrpc_accept_incoming_calls(struct rxrpc_local *); int rxrpc_user_charge_accept(struct rxrpc_sock *, unsigned long); @@ -808,14 +826,14 @@ void rxrpc_send_ACK(struct rxrpc_call *, u8, rxrpc_serial_t, enum rxrpc_propose_ void rxrpc_propose_delay_ACK(struct rxrpc_call *, rxrpc_serial_t, enum rxrpc_propose_ack_trace); void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *); -void rxrpc_process_call(struct work_struct *); +void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb); void rxrpc_reduce_call_timer(struct rxrpc_call *call, unsigned long expire_at, unsigned long now, enum rxrpc_timer_trace why); -void rxrpc_delete_call_timer(struct rxrpc_call *call); +void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb); /* * call_object.c @@ -824,6 +842,7 @@ extern const char *const rxrpc_call_states[]; extern const char *const rxrpc_call_completions[]; extern struct kmem_cache *rxrpc_call_jar; +void rxrpc_poke_call(struct rxrpc_call *call, enum rxrpc_call_poke_trace what); struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long); struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *, gfp_t, unsigned int); struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *, @@ -835,10 +854,8 @@ void rxrpc_incoming_call(struct rxrpc_sock *, struct rxrpc_call *, struct sk_buff *); void rxrpc_release_call(struct rxrpc_sock *, struct rxrpc_call *); void rxrpc_release_calls_on_socket(struct rxrpc_sock *); -bool __rxrpc_queue_call(struct rxrpc_call *); -bool rxrpc_queue_call(struct rxrpc_call *); -void rxrpc_see_call(struct rxrpc_call *); -bool rxrpc_try_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op); +void rxrpc_see_call(struct rxrpc_call *, enum rxrpc_call_trace); +struct rxrpc_call *rxrpc_try_get_call(struct rxrpc_call *, enum rxrpc_call_trace); void rxrpc_get_call(struct rxrpc_call *, enum rxrpc_call_trace); void rxrpc_put_call(struct rxrpc_call *, enum rxrpc_call_trace); void rxrpc_cleanup_call(struct rxrpc_call *); @@ -863,14 +880,14 @@ extern unsigned long rxrpc_conn_idle_client_fast_expiry; extern struct idr rxrpc_client_conn_ids; void rxrpc_destroy_client_conn_ids(void); -struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *); -void rxrpc_put_bundle(struct rxrpc_bundle *); +struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *, enum rxrpc_bundle_trace); +void rxrpc_put_bundle(struct rxrpc_bundle *, enum rxrpc_bundle_trace); int rxrpc_connect_call(struct rxrpc_sock *, struct rxrpc_call *, struct rxrpc_conn_parameters *, struct sockaddr_rxrpc *, gfp_t); void rxrpc_expose_client_call(struct rxrpc_call *); void rxrpc_disconnect_client_call(struct rxrpc_bundle *, struct rxrpc_call *); -void rxrpc_put_client_conn(struct rxrpc_connection *); +void rxrpc_put_client_conn(struct rxrpc_connection *, enum rxrpc_conn_trace); void rxrpc_discard_expired_client_conns(struct work_struct *); void rxrpc_destroy_all_client_connections(struct rxrpc_net *); void rxrpc_clean_up_local_conns(struct rxrpc_local *); @@ -880,6 +897,7 @@ void rxrpc_clean_up_local_conns(struct rxrpc_local *); */ void rxrpc_process_connection(struct work_struct *); void rxrpc_process_delayed_final_acks(struct rxrpc_connection *, bool); +int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb); /* * conn_object.c @@ -887,18 +905,20 @@ void rxrpc_process_delayed_final_acks(struct rxrpc_connection *, bool); extern unsigned int rxrpc_connection_expiry; extern unsigned int rxrpc_closed_conn_expiry; -struct rxrpc_connection *rxrpc_alloc_connection(gfp_t); -struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *, - struct sk_buff *, - struct rxrpc_peer **); +struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *, gfp_t); +struct rxrpc_connection *rxrpc_find_client_connection_rcu(struct rxrpc_local *, + struct sockaddr_rxrpc *, + struct sk_buff *); void __rxrpc_disconnect_call(struct rxrpc_connection *, struct rxrpc_call *); void rxrpc_disconnect_call(struct rxrpc_call *); -void rxrpc_kill_connection(struct rxrpc_connection *); -bool rxrpc_queue_conn(struct rxrpc_connection *); -void rxrpc_see_connection(struct rxrpc_connection *); -struct rxrpc_connection *rxrpc_get_connection(struct rxrpc_connection *); -struct rxrpc_connection *rxrpc_get_connection_maybe(struct rxrpc_connection *); -void rxrpc_put_service_conn(struct rxrpc_connection *); +void rxrpc_kill_client_conn(struct rxrpc_connection *); +void rxrpc_queue_conn(struct rxrpc_connection *, enum rxrpc_conn_trace); +void rxrpc_see_connection(struct rxrpc_connection *, enum rxrpc_conn_trace); +struct rxrpc_connection *rxrpc_get_connection(struct rxrpc_connection *, + enum rxrpc_conn_trace); +struct rxrpc_connection *rxrpc_get_connection_maybe(struct rxrpc_connection *, + enum rxrpc_conn_trace); +void rxrpc_put_connection(struct rxrpc_connection *, enum rxrpc_conn_trace); void rxrpc_service_connection_reaper(struct work_struct *); void rxrpc_destroy_all_connections(struct rxrpc_net *); @@ -912,17 +932,6 @@ static inline bool rxrpc_conn_is_service(const struct rxrpc_connection *conn) return !rxrpc_conn_is_client(conn); } -static inline void rxrpc_put_connection(struct rxrpc_connection *conn) -{ - if (!conn) - return; - - if (rxrpc_conn_is_client(conn)) - rxrpc_put_client_conn(conn); - else - rxrpc_put_service_conn(conn); -} - static inline void rxrpc_reduce_conn_timer(struct rxrpc_connection *conn, unsigned long expire_at) { @@ -942,7 +951,20 @@ void rxrpc_unpublish_service_conn(struct rxrpc_connection *); /* * input.c */ -int rxrpc_input_packet(struct sock *, struct sk_buff *); +void rxrpc_congestion_degrade(struct rxrpc_call *); +void rxrpc_input_call_packet(struct rxrpc_call *, struct sk_buff *); +void rxrpc_implicit_end_call(struct rxrpc_call *, struct sk_buff *); + +/* + * io_thread.c + */ +int rxrpc_encap_rcv(struct sock *, struct sk_buff *); +void rxrpc_error_report(struct sock *); +int rxrpc_io_thread(void *data); +static inline void rxrpc_wake_up_io_thread(struct rxrpc_local *local) +{ + wake_up_process(local->io_thread); +} /* * insecure.c @@ -961,28 +983,41 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time64_t, /* * local_event.c */ -extern void rxrpc_process_local_events(struct rxrpc_local *); +void rxrpc_send_version_request(struct rxrpc_local *local, + struct rxrpc_host_header *hdr, + struct sk_buff *skb); /* * local_object.c */ struct rxrpc_local *rxrpc_lookup_local(struct net *, const struct sockaddr_rxrpc *); -struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *); -struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *); -void rxrpc_put_local(struct rxrpc_local *); -struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *); -void rxrpc_unuse_local(struct rxrpc_local *); -void rxrpc_queue_local(struct rxrpc_local *); +struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *, enum rxrpc_local_trace); +struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *, enum rxrpc_local_trace); +void rxrpc_put_local(struct rxrpc_local *, enum rxrpc_local_trace); +struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *, enum rxrpc_local_trace); +void rxrpc_unuse_local(struct rxrpc_local *, enum rxrpc_local_trace); +void rxrpc_destroy_local(struct rxrpc_local *local); void rxrpc_destroy_all_locals(struct rxrpc_net *); -static inline bool __rxrpc_unuse_local(struct rxrpc_local *local) +static inline bool __rxrpc_use_local(struct rxrpc_local *local, + enum rxrpc_local_trace why) { - return atomic_dec_return(&local->active_users) == 0; + int r, u; + + r = refcount_read(&local->ref); + u = atomic_fetch_add_unless(&local->active_users, 1, 0); + trace_rxrpc_local(local->debug_id, why, r, u); + return u != 0; } -static inline bool __rxrpc_use_local(struct rxrpc_local *local) +static inline void rxrpc_see_local(struct rxrpc_local *local, + enum rxrpc_local_trace why) { - return atomic_fetch_add_unless(&local->active_users, 1, 0) != 0; + int r, u; + + r = refcount_read(&local->ref); + u = atomic_read(&local->active_users); + trace_rxrpc_local(local->debug_id, why, r, u); } /* @@ -1009,16 +1044,17 @@ static inline struct rxrpc_net *rxrpc_net(struct net *net) /* * output.c */ -void rxrpc_transmit_ack_packets(struct rxrpc_local *); +int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb); int rxrpc_send_abort_packet(struct rxrpc_call *); int rxrpc_send_data_packet(struct rxrpc_call *, struct rxrpc_txbuf *); -void rxrpc_reject_packets(struct rxrpc_local *); +void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb); void rxrpc_send_keepalive(struct rxrpc_peer *); +void rxrpc_transmit_one(struct rxrpc_call *call, struct rxrpc_txbuf *txb); /* * peer_event.c */ -void rxrpc_error_report(struct sock *); +void rxrpc_input_error(struct rxrpc_local *, struct sk_buff *); void rxrpc_peer_keepalive_worker(struct work_struct *); /* @@ -1028,14 +1064,15 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *, const struct sockaddr_rxrpc *); struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *, struct rxrpc_local *, struct sockaddr_rxrpc *, gfp_t); -struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t); +struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t, + enum rxrpc_peer_trace); void rxrpc_new_incoming_peer(struct rxrpc_sock *, struct rxrpc_local *, struct rxrpc_peer *); void rxrpc_destroy_all_peers(struct rxrpc_net *); -struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *); -struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *); -void rxrpc_put_peer(struct rxrpc_peer *); -void rxrpc_put_peer_locked(struct rxrpc_peer *); +struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *, enum rxrpc_peer_trace); +struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *, enum rxrpc_peer_trace); +void rxrpc_put_peer(struct rxrpc_peer *, enum rxrpc_peer_trace); +void rxrpc_put_peer_locked(struct rxrpc_peer *, enum rxrpc_peer_trace); /* * proc.c @@ -1097,6 +1134,7 @@ extern const struct rxrpc_security rxkad; int __init rxrpc_init_security(void); const struct rxrpc_security *rxrpc_security_lookup(u8); void rxrpc_exit_security(void); +int rxrpc_init_client_call_security(struct rxrpc_call *); int rxrpc_init_client_conn_security(struct rxrpc_connection *); const struct rxrpc_security *rxrpc_get_incoming_security(struct rxrpc_sock *, struct sk_buff *); @@ -1119,7 +1157,6 @@ int rxrpc_server_keyring(struct rxrpc_sock *, sockptr_t, int); * skbuff.c */ void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *); -void rxrpc_packet_destructor(struct sk_buff *); void rxrpc_new_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_see_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_eaten_skb(struct sk_buff *, enum rxrpc_skb_trace); @@ -1190,23 +1227,17 @@ extern unsigned int rxrpc_debug; #define kenter(FMT,...) dbgprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__) #define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__) #define kdebug(FMT,...) dbgprintk(" "FMT ,##__VA_ARGS__) -#define kproto(FMT,...) dbgprintk("### "FMT ,##__VA_ARGS__) -#define knet(FMT,...) dbgprintk("@@@ "FMT ,##__VA_ARGS__) #if defined(__KDEBUG) #define _enter(FMT,...) kenter(FMT,##__VA_ARGS__) #define _leave(FMT,...) kleave(FMT,##__VA_ARGS__) #define _debug(FMT,...) kdebug(FMT,##__VA_ARGS__) -#define _proto(FMT,...) kproto(FMT,##__VA_ARGS__) -#define _net(FMT,...) knet(FMT,##__VA_ARGS__) #elif defined(CONFIG_AF_RXRPC_DEBUG) #define RXRPC_DEBUG_KENTER 0x01 #define RXRPC_DEBUG_KLEAVE 0x02 #define RXRPC_DEBUG_KDEBUG 0x04 -#define RXRPC_DEBUG_KPROTO 0x08 -#define RXRPC_DEBUG_KNET 0x10 #define _enter(FMT,...) \ do { \ @@ -1226,24 +1257,10 @@ do { \ kdebug(FMT,##__VA_ARGS__); \ } while (0) -#define _proto(FMT,...) \ -do { \ - if (unlikely(rxrpc_debug & RXRPC_DEBUG_KPROTO)) \ - kproto(FMT,##__VA_ARGS__); \ -} while (0) - -#define _net(FMT,...) \ -do { \ - if (unlikely(rxrpc_debug & RXRPC_DEBUG_KNET)) \ - knet(FMT,##__VA_ARGS__); \ -} while (0) - #else #define _enter(FMT,...) no_printk("==> %s("FMT")",__func__ ,##__VA_ARGS__) #define _leave(FMT,...) no_printk("<== %s()"FMT"",__func__ ,##__VA_ARGS__) #define _debug(FMT,...) no_printk(" "FMT ,##__VA_ARGS__) -#define _proto(FMT,...) no_printk("### "FMT ,##__VA_ARGS__) -#define _net(FMT,...) no_printk("@@@ "FMT ,##__VA_ARGS__) #endif /* diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 48790ee77019..d1850863507f 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -38,7 +38,6 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, unsigned long user_call_ID, gfp_t gfp, unsigned int debug_id) { - const void *here = __builtin_return_address(0); struct rxrpc_call *call, *xcall; struct rxrpc_net *rxnet = rxrpc_net(sock_net(&rx->sk)); struct rb_node *parent, **pp; @@ -70,7 +69,9 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, head = b->peer_backlog_head; tail = READ_ONCE(b->peer_backlog_tail); if (CIRC_CNT(head, tail, size) < max) { - struct rxrpc_peer *peer = rxrpc_alloc_peer(rx->local, gfp); + struct rxrpc_peer *peer; + + peer = rxrpc_alloc_peer(rx->local, gfp, rxrpc_peer_new_prealloc); if (!peer) return -ENOMEM; b->peer_backlog[head] = peer; @@ -89,9 +90,6 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, b->conn_backlog[head] = conn; smp_store_release(&b->conn_backlog_head, (head + 1) & (size - 1)); - - trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_service, - refcount_read(&conn->ref), here); } /* Now it gets complicated, because calls get registered with the @@ -102,10 +100,10 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, return -ENOMEM; call->flags |= (1 << RXRPC_CALL_IS_SERVICE); call->state = RXRPC_CALL_SERVER_PREALLOC; + __set_bit(RXRPC_CALL_EV_INITIAL_PING, &call->events); - trace_rxrpc_call(call->debug_id, rxrpc_call_new_service, - refcount_read(&call->ref), - here, (const void *)user_call_ID); + trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), + user_call_ID, rxrpc_call_new_prealloc_service); write_lock(&rx->call_lock); @@ -126,11 +124,11 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, call->user_call_ID = user_call_ID; call->notify_rx = notify_rx; if (user_attach_call) { - rxrpc_get_call(call, rxrpc_call_got_kernel); + rxrpc_get_call(call, rxrpc_call_get_kernel_service); user_attach_call(call, user_call_ID); } - rxrpc_get_call(call, rxrpc_call_got_userid); + rxrpc_get_call(call, rxrpc_call_get_userid); rb_link_node(&call->sock_node, parent, pp); rb_insert_color(&call->sock_node, &rx->calls); set_bit(RXRPC_CALL_HAS_USERID, &call->flags); @@ -140,9 +138,9 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, write_unlock(&rx->call_lock); rxnet = call->rxnet; - spin_lock_bh(&rxnet->call_lock); + spin_lock(&rxnet->call_lock); list_add_tail_rcu(&call->link, &rxnet->calls); - spin_unlock_bh(&rxnet->call_lock); + spin_unlock(&rxnet->call_lock); b->call_backlog[call_head] = call; smp_store_release(&b->call_backlog_head, (call_head + 1) & (size - 1)); @@ -190,14 +188,14 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx) /* Make sure that there aren't any incoming calls in progress before we * clear the preallocation buffers. */ - spin_lock_bh(&rx->incoming_lock); - spin_unlock_bh(&rx->incoming_lock); + spin_lock(&rx->incoming_lock); + spin_unlock(&rx->incoming_lock); head = b->peer_backlog_head; tail = b->peer_backlog_tail; while (CIRC_CNT(head, tail, size) > 0) { struct rxrpc_peer *peer = b->peer_backlog[tail]; - rxrpc_put_local(peer->local); + rxrpc_put_local(peer->local, rxrpc_local_put_prealloc_conn); kfree(peer); tail = (tail + 1) & (size - 1); } @@ -230,7 +228,7 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx) } rxrpc_call_completed(call); rxrpc_release_call(rx, call); - rxrpc_put_call(call, rxrpc_call_put); + rxrpc_put_call(call, rxrpc_call_put_discard_prealloc); tail = (tail + 1) & (size - 1); } @@ -238,21 +236,6 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx) } /* - * Ping the other end to fill our RTT cache and to retrieve the rwind - * and MTU parameters. - */ -static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb) -{ - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - ktime_t now = skb->tstamp; - - if (call->peer->rtt_count < 3 || - ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), now)) - rxrpc_send_ACK(call, RXRPC_ACK_PING, sp->hdr.serial, - rxrpc_propose_ack_ping_for_params); -} - -/* * Allocate a new incoming call from the prealloc pool, along with a connection * and a peer as necessary. */ @@ -261,6 +244,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, struct rxrpc_peer *peer, struct rxrpc_connection *conn, const struct rxrpc_security *sec, + struct sockaddr_rxrpc *peer_srx, struct sk_buff *skb) { struct rxrpc_backlog *b = rx->backlog; @@ -286,12 +270,11 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, return NULL; if (!conn) { - if (peer && !rxrpc_get_peer_maybe(peer)) + if (peer && !rxrpc_get_peer_maybe(peer, rxrpc_peer_get_service_conn)) peer = NULL; if (!peer) { peer = b->peer_backlog[peer_tail]; - if (rxrpc_extract_addr_from_skb(&peer->srx, skb) < 0) - return NULL; + peer->srx = *peer_srx; b->peer_backlog[peer_tail] = NULL; smp_store_release(&b->peer_backlog_tail, (peer_tail + 1) & @@ -305,12 +288,13 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, b->conn_backlog[conn_tail] = NULL; smp_store_release(&b->conn_backlog_tail, (conn_tail + 1) & (RXRPC_BACKLOG_MAX - 1)); - conn->params.local = rxrpc_get_local(local); - conn->params.peer = peer; - rxrpc_see_connection(conn); + conn->local = rxrpc_get_local(local, rxrpc_local_get_prealloc_conn); + conn->peer = peer; + rxrpc_see_connection(conn, rxrpc_conn_see_new_service_conn); rxrpc_new_incoming_connection(rx, conn, sec, skb); } else { - rxrpc_get_connection(conn); + rxrpc_get_connection(conn, rxrpc_conn_get_service_conn); + atomic_inc(&conn->active); } /* And now we can allocate and set up a new call */ @@ -319,43 +303,69 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, smp_store_release(&b->call_backlog_tail, (call_tail + 1) & (RXRPC_BACKLOG_MAX - 1)); - rxrpc_see_call(call); + rxrpc_see_call(call, rxrpc_call_see_accept); + call->local = rxrpc_get_local(conn->local, rxrpc_local_get_call); call->conn = conn; call->security = conn->security; call->security_ix = conn->security_ix; - call->peer = rxrpc_get_peer(conn->params.peer); + call->peer = rxrpc_get_peer(conn->peer, rxrpc_peer_get_accept); + call->dest_srx = peer->srx; call->cong_ssthresh = call->peer->cong_ssthresh; call->tx_last_sent = ktime_get_real(); return call; } /* - * Set up a new incoming call. Called in BH context with the RCU read lock - * held. + * Set up a new incoming call. Called from the I/O thread. * * If this is for a kernel service, when we allocate the call, it will have * three refs on it: (1) the kernel service, (2) the user_call_ID tree, (3) the * retainer ref obtained from the backlog buffer. Prealloc calls for userspace - * services only have the ref from the backlog buffer. We want to pass this - * ref to non-BH context to dispose of. + * services only have the ref from the backlog buffer. * * If we want to report an error, we mark the skb with the packet type and - * abort code and return NULL. - * - * The call is returned with the user access mutex held. + * abort code and return false. */ -struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, - struct rxrpc_sock *rx, - struct sk_buff *skb) +bool rxrpc_new_incoming_call(struct rxrpc_local *local, + struct rxrpc_peer *peer, + struct rxrpc_connection *conn, + struct sockaddr_rxrpc *peer_srx, + struct sk_buff *skb) { - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); const struct rxrpc_security *sec = NULL; - struct rxrpc_connection *conn; - struct rxrpc_peer *peer = NULL; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_call *call = NULL; + struct rxrpc_sock *rx; _enter(""); + /* Don't set up a call for anything other than the first DATA packet. */ + if (sp->hdr.seq != 1 || + sp->hdr.type != RXRPC_PACKET_TYPE_DATA) + return true; /* Just discard */ + + rcu_read_lock(); + + /* Weed out packets to services we're not offering. Packets that would + * begin a call are explicitly rejected and the rest are just + * discarded. + */ + rx = rcu_dereference(local->service); + if (!rx || (sp->hdr.serviceId != rx->srx.srx_service && + sp->hdr.serviceId != rx->second_service) + ) { + if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && + sp->hdr.seq == 1) + goto unsupported_service; + goto discard; + } + + if (!conn) { + sec = rxrpc_get_incoming_security(rx, skb); + if (!sec) + goto reject; + } + spin_lock(&rx->incoming_lock); if (rx->sk.sk_state == RXRPC_SERVER_LISTEN_DISABLED || rx->sk.sk_state == RXRPC_CLOSE) { @@ -366,20 +376,8 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, goto no_call; } - /* The peer, connection and call may all have sprung into existence due - * to a duplicate packet being handled on another CPU in parallel, so - * we have to recheck the routing. However, we're now holding - * rx->incoming_lock, so the values should remain stable. - */ - conn = rxrpc_find_connection_rcu(local, skb, &peer); - - if (!conn) { - sec = rxrpc_get_incoming_security(rx, skb); - if (!sec) - goto no_call; - } - - call = rxrpc_alloc_incoming_call(rx, local, peer, conn, sec, skb); + call = rxrpc_alloc_incoming_call(rx, local, peer, conn, sec, peer_srx, + skb); if (!call) { skb->mark = RXRPC_SKB_MARK_REJECT_BUSY; goto no_call; @@ -396,50 +394,41 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, rx->notify_new_call(&rx->sk, call, call->user_call_ID); spin_lock(&conn->state_lock); - switch (conn->state) { - case RXRPC_CONN_SERVICE_UNSECURED: + if (conn->state == RXRPC_CONN_SERVICE_UNSECURED) { conn->state = RXRPC_CONN_SERVICE_CHALLENGING; set_bit(RXRPC_CONN_EV_CHALLENGE, &call->conn->events); - rxrpc_queue_conn(call->conn); - break; - - case RXRPC_CONN_SERVICE: - write_lock(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE) - call->state = RXRPC_CALL_SERVER_RECV_REQUEST; - write_unlock(&call->state_lock); - break; - - case RXRPC_CONN_REMOTELY_ABORTED: - rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, - conn->abort_code, conn->error); - break; - case RXRPC_CONN_LOCALLY_ABORTED: - rxrpc_abort_call("CON", call, sp->hdr.seq, - conn->abort_code, conn->error); - break; - default: - BUG(); + rxrpc_queue_conn(call->conn, rxrpc_conn_queue_challenge); } spin_unlock(&conn->state_lock); - spin_unlock(&rx->incoming_lock); - rxrpc_send_ping(call, skb); + spin_unlock(&rx->incoming_lock); + rcu_read_unlock(); - /* We have to discard the prealloc queue's ref here and rely on a - * combination of the RCU read lock and refs held either by the socket - * (recvmsg queue, to-be-accepted queue or user ID tree) or the kernel - * service to prevent the call from being deallocated too early. - */ - rxrpc_put_call(call, rxrpc_call_put); + if (hlist_unhashed(&call->error_link)) { + spin_lock(&call->peer->lock); + hlist_add_head(&call->error_link, &call->peer->error_targets); + spin_unlock(&call->peer->lock); + } _leave(" = %p{%d}", call, call->debug_id); - return call; - + rxrpc_input_call_event(call, skb); + rxrpc_put_call(call, rxrpc_call_put_input); + return true; + +unsupported_service: + trace_rxrpc_abort(0, "INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + RX_INVALID_OPERATION, EOPNOTSUPP); + skb->priority = RX_INVALID_OPERATION; + goto reject; no_call: spin_unlock(&rx->incoming_lock); - _leave(" = NULL [%u]", skb->mark); - return NULL; +reject: + rcu_read_unlock(); + _leave(" = f [%u]", skb->mark); + return false; +discard: + rcu_read_unlock(); + return true; } /* diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 1e21a708390e..b2cf448fb02c 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -69,21 +69,15 @@ void rxrpc_propose_delay_ACK(struct rxrpc_call *call, rxrpc_serial_t serial, void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason, rxrpc_serial_t serial, enum rxrpc_propose_ack_trace why) { - struct rxrpc_local *local = call->conn->params.local; struct rxrpc_txbuf *txb; if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) return; - if (ack_reason == RXRPC_ACK_DELAY && - test_and_set_bit(RXRPC_CALL_DELAY_ACK_PENDING, &call->flags)) { - trace_rxrpc_drop_ack(call, why, ack_reason, serial, false); - return; - } rxrpc_inc_stat(call->rxnet, stat_tx_acks[ack_reason]); txb = rxrpc_alloc_txbuf(call, RXRPC_PACKET_TYPE_ACK, - in_softirq() ? GFP_ATOMIC | __GFP_NOWARN : GFP_NOFS); + rcu_read_lock_held() ? GFP_ATOMIC | __GFP_NOWARN : GFP_NOFS); if (!txb) { kleave(" = -ENOMEM"); return; @@ -101,22 +95,9 @@ void rxrpc_send_ACK(struct rxrpc_call *call, u8 ack_reason, txb->ack.reason = ack_reason; txb->ack.nAcks = 0; - if (!rxrpc_try_get_call(call, rxrpc_call_got)) { - rxrpc_put_txbuf(txb, rxrpc_txbuf_put_nomem); - return; - } - - spin_lock_bh(&local->ack_tx_lock); - list_add_tail(&txb->tx_link, &local->ack_tx_queue); - spin_unlock_bh(&local->ack_tx_lock); trace_rxrpc_send_ack(call, why, ack_reason, serial); - - if (in_task()) { - rxrpc_transmit_ack_packets(call->peer->local); - } else { - rxrpc_get_local(local); - rxrpc_queue_local(local); - } + rxrpc_send_ack_packet(call, txb); + rxrpc_put_txbuf(txb, rxrpc_txbuf_put_ack_tx); } /* @@ -130,11 +111,10 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call) /* * Perform retransmission of NAK'd and unack'd packets. */ -static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) +void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb) { struct rxrpc_ackpacket *ack = NULL; struct rxrpc_txbuf *txb; - struct sk_buff *ack_skb = NULL; unsigned long resend_at; rxrpc_seq_t transmitted = READ_ONCE(call->tx_transmitted); ktime_t now, max_age, oldest, ack_ts; @@ -148,32 +128,21 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) max_age = ktime_sub_us(now, jiffies_to_usecs(call->peer->rto_j)); oldest = now; - /* See if there's an ACK saved with a soft-ACK table in it. */ - if (call->acks_soft_tbl) { - spin_lock_bh(&call->acks_ack_lock); - ack_skb = call->acks_soft_tbl; - if (ack_skb) { - rxrpc_get_skb(ack_skb, rxrpc_skb_ack); - ack = (void *)ack_skb->data + sizeof(struct rxrpc_wire_header); - } - spin_unlock_bh(&call->acks_ack_lock); - } - if (list_empty(&call->tx_buffer)) goto no_resend; - spin_lock(&call->tx_lock); - if (list_empty(&call->tx_buffer)) goto no_further_resend; - trace_rxrpc_resend(call); + trace_rxrpc_resend(call, ack_skb); txb = list_first_entry(&call->tx_buffer, struct rxrpc_txbuf, call_link); /* Scan the soft ACK table without dropping the lock and resend any * explicitly NAK'd packets. */ - if (ack) { + if (ack_skb) { + ack = (void *)ack_skb->data + sizeof(struct rxrpc_wire_header); + for (i = 0; i < ack->nAcks; i++) { rxrpc_seq_t seq; @@ -197,8 +166,6 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) rxrpc_see_txbuf(txb, rxrpc_txbuf_see_unacked); if (list_empty(&txb->tx_link)) { - rxrpc_get_txbuf(txb, rxrpc_txbuf_get_retrans); - rxrpc_get_call(call, rxrpc_call_got_tx); list_add_tail(&txb->tx_link, &retrans_queue); set_bit(RXRPC_TXBUF_RESENT, &txb->flags); } @@ -242,7 +209,6 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) do_resend: unacked = true; if (list_empty(&txb->tx_link)) { - rxrpc_get_txbuf(txb, rxrpc_txbuf_get_retrans); list_add_tail(&txb->tx_link, &retrans_queue); set_bit(RXRPC_TXBUF_RESENT, &txb->flags); rxrpc_inc_stat(call->rxnet, stat_tx_data_retrans); @@ -250,10 +216,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) } no_further_resend: - spin_unlock(&call->tx_lock); no_resend: - rxrpc_free_skb(ack_skb, rxrpc_skb_freed); - resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(now, oldest))); resend_at += jiffies + rxrpc_get_rto_backoff(call->peer, !list_empty(&retrans_queue)); @@ -267,7 +230,7 @@ no_resend: * retransmitting data. */ if (list_empty(&retrans_queue)) { - rxrpc_reduce_call_timer(call, resend_at, now_j, + rxrpc_reduce_call_timer(call, resend_at, jiffies, rxrpc_timer_set_for_resend); ack_ts = ktime_sub(now, call->acks_latest_ts); if (ktime_to_us(ack_ts) < (call->peer->srtt_us >> 3)) @@ -277,76 +240,134 @@ no_resend: goto out; } + /* Retransmit the queue */ while ((txb = list_first_entry_or_null(&retrans_queue, struct rxrpc_txbuf, tx_link))) { list_del_init(&txb->tx_link); - rxrpc_send_data_packet(call, txb); - rxrpc_put_txbuf(txb, rxrpc_txbuf_put_trans); - - trace_rxrpc_retransmit(call, txb->seq, - ktime_to_ns(ktime_sub(txb->last_sent, - max_age))); + rxrpc_transmit_one(call, txb); } out: _leave(""); } +static bool rxrpc_tx_window_has_space(struct rxrpc_call *call) +{ + unsigned int winsize = min_t(unsigned int, call->tx_winsize, + call->cong_cwnd + call->cong_extra); + rxrpc_seq_t window = call->acks_hard_ack, wtop = window + winsize; + rxrpc_seq_t tx_top = call->tx_top; + int space; + + space = wtop - tx_top; + return space > 0; +} + +/* + * Decant some if the sendmsg prepared queue into the transmission buffer. + */ +static void rxrpc_decant_prepared_tx(struct rxrpc_call *call) +{ + struct rxrpc_txbuf *txb; + + if (rxrpc_is_client_call(call) && + !test_bit(RXRPC_CALL_EXPOSED, &call->flags)) + rxrpc_expose_client_call(call); + + while ((txb = list_first_entry_or_null(&call->tx_sendmsg, + struct rxrpc_txbuf, call_link))) { + spin_lock(&call->tx_lock); + list_del(&txb->call_link); + spin_unlock(&call->tx_lock); + + call->tx_top = txb->seq; + list_add_tail(&txb->call_link, &call->tx_buffer); + + rxrpc_transmit_one(call, txb); + + if (!rxrpc_tx_window_has_space(call)) + break; + } +} + +static void rxrpc_transmit_some_data(struct rxrpc_call *call) +{ + switch (call->state) { + case RXRPC_CALL_SERVER_ACK_REQUEST: + if (list_empty(&call->tx_sendmsg)) + return; + fallthrough; + + case RXRPC_CALL_SERVER_SEND_REPLY: + case RXRPC_CALL_SERVER_AWAIT_ACK: + case RXRPC_CALL_CLIENT_SEND_REQUEST: + case RXRPC_CALL_CLIENT_AWAIT_REPLY: + if (!rxrpc_tx_window_has_space(call)) + return; + if (list_empty(&call->tx_sendmsg)) { + rxrpc_inc_stat(call->rxnet, stat_tx_data_underflow); + return; + } + rxrpc_decant_prepared_tx(call); + break; + default: + return; + } +} + +/* + * Ping the other end to fill our RTT cache and to retrieve the rwind + * and MTU parameters. + */ +static void rxrpc_send_initial_ping(struct rxrpc_call *call) +{ + if (call->peer->rtt_count < 3 || + ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), + ktime_get_real())) + rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, + rxrpc_propose_ack_ping_for_params); +} + /* * Handle retransmission and deferred ACK/abort generation. */ -void rxrpc_process_call(struct work_struct *work) +void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) { - struct rxrpc_call *call = - container_of(work, struct rxrpc_call, processor); unsigned long now, next, t; - unsigned int iterations = 0; rxrpc_serial_t ackr_serial; + bool resend = false, expired = false; - rxrpc_see_call(call); + rxrpc_see_call(call, rxrpc_call_see_input); //printk("\n--------------------\n"); _enter("{%d,%s,%lx}", call->debug_id, rxrpc_call_states[call->state], call->events); -recheck_state: - /* Limit the number of times we do this before returning to the manager */ - iterations++; - if (iterations > 5) - goto requeue; - - if (test_and_clear_bit(RXRPC_CALL_EV_ABORT, &call->events)) { - rxrpc_send_abort_packet(call); - goto recheck_state; - } - - if (READ_ONCE(call->acks_hard_ack) != call->tx_bottom) - rxrpc_shrink_call_tx_buffer(call); + if (call->state == RXRPC_CALL_COMPLETE) + goto out; - if (call->state == RXRPC_CALL_COMPLETE) { - rxrpc_delete_call_timer(call); - goto out_put; - } + if (skb && skb->mark == RXRPC_SKB_MARK_ERROR) + goto out; - /* Work out if any timeouts tripped */ + /* If we see our async-event poke, check for timeout trippage. */ now = jiffies; t = READ_ONCE(call->expect_rx_by); if (time_after_eq(now, t)) { trace_rxrpc_timer(call, rxrpc_timer_exp_normal, now); - set_bit(RXRPC_CALL_EV_EXPIRED, &call->events); + expired = true; } t = READ_ONCE(call->expect_req_by); if (call->state == RXRPC_CALL_SERVER_RECV_REQUEST && time_after_eq(now, t)) { trace_rxrpc_timer(call, rxrpc_timer_exp_idle, now); - set_bit(RXRPC_CALL_EV_EXPIRED, &call->events); + expired = true; } t = READ_ONCE(call->expect_term_by); if (time_after_eq(now, t)) { trace_rxrpc_timer(call, rxrpc_timer_exp_hard, now); - set_bit(RXRPC_CALL_EV_EXPIRED, &call->events); + expired = true; } t = READ_ONCE(call->delay_ack_at); @@ -385,11 +406,26 @@ recheck_state: if (time_after_eq(now, t)) { trace_rxrpc_timer(call, rxrpc_timer_exp_resend, now); cmpxchg(&call->resend_at, t, now + MAX_JIFFY_OFFSET); - set_bit(RXRPC_CALL_EV_RESEND, &call->events); + resend = true; } + if (skb) + rxrpc_input_call_packet(call, skb); + + rxrpc_transmit_some_data(call); + + if (skb) { + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + if (sp->hdr.type == RXRPC_PACKET_TYPE_ACK) + rxrpc_congestion_degrade(call); + } + + if (test_and_clear_bit(RXRPC_CALL_EV_INITIAL_PING, &call->events)) + rxrpc_send_initial_ping(call); + /* Process events */ - if (test_and_clear_bit(RXRPC_CALL_EV_EXPIRED, &call->events)) { + if (expired) { if (test_bit(RXRPC_CALL_RX_HEARD, &call->flags) && (int)call->conn->hi_serial - (int)call->rx_serial > 0) { trace_rxrpc_call_reset(call); @@ -397,52 +433,50 @@ recheck_state: } else { rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, -ETIME); } - set_bit(RXRPC_CALL_EV_ABORT, &call->events); - goto recheck_state; + rxrpc_send_abort_packet(call); + goto out; } - if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events)) { - call->acks_lost_top = call->tx_top; + if (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events)) rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, rxrpc_propose_ack_ping_for_lost_ack); - } - if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events) && - call->state != RXRPC_CALL_CLIENT_RECV_REPLY) { - rxrpc_resend(call, now); - goto recheck_state; - } + if (resend && call->state != RXRPC_CALL_CLIENT_RECV_REPLY) + rxrpc_resend(call, NULL); + + if (test_and_clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags)) + rxrpc_send_ACK(call, RXRPC_ACK_IDLE, 0, + rxrpc_propose_ack_rx_idle); + + if (atomic_read(&call->ackr_nr_unacked) > 2) + rxrpc_send_ACK(call, RXRPC_ACK_IDLE, 0, + rxrpc_propose_ack_input_data); /* Make sure the timer is restarted */ - next = call->expect_rx_by; + if (call->state != RXRPC_CALL_COMPLETE) { + next = call->expect_rx_by; #define set(T) { t = READ_ONCE(T); if (time_before(t, next)) next = t; } - set(call->expect_req_by); - set(call->expect_term_by); - set(call->delay_ack_at); - set(call->ack_lost_at); - set(call->resend_at); - set(call->keepalive_at); - set(call->ping_at); - - now = jiffies; - if (time_after_eq(now, next)) - goto recheck_state; + set(call->expect_req_by); + set(call->expect_term_by); + set(call->delay_ack_at); + set(call->ack_lost_at); + set(call->resend_at); + set(call->keepalive_at); + set(call->ping_at); - rxrpc_reduce_call_timer(call, next, now, rxrpc_timer_restart); + now = jiffies; + if (time_after_eq(now, next)) + rxrpc_poke_call(call, rxrpc_call_poke_timer_now); - /* other events may have been raised since we started checking */ - if (call->events && call->state < RXRPC_CALL_COMPLETE) - goto requeue; + rxrpc_reduce_call_timer(call, next, now, rxrpc_timer_restart); + } -out_put: - rxrpc_put_call(call, rxrpc_call_put); out: + if (call->state == RXRPC_CALL_COMPLETE) + del_timer_sync(&call->timer); + if (call->acks_hard_ack != call->tx_bottom) + rxrpc_shrink_call_tx_buffer(call); _leave(""); - return; - -requeue: - __rxrpc_queue_call(call); - goto out; } diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 1befe22cd301..be5eb8cdf549 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -45,6 +45,24 @@ static struct semaphore rxrpc_call_limiter = static struct semaphore rxrpc_kernel_call_limiter = __SEMAPHORE_INITIALIZER(rxrpc_kernel_call_limiter, 1000); +void rxrpc_poke_call(struct rxrpc_call *call, enum rxrpc_call_poke_trace what) +{ + struct rxrpc_local *local = call->local; + bool busy; + + if (call->state < RXRPC_CALL_COMPLETE) { + spin_lock_bh(&local->lock); + busy = !list_empty(&call->attend_link); + trace_rxrpc_poke_call(call, busy, what); + if (!busy) { + rxrpc_get_call(call, rxrpc_call_get_poke); + list_add_tail(&call->attend_link, &local->call_attend_q); + } + spin_unlock_bh(&local->lock); + rxrpc_wake_up_io_thread(local); + } +} + static void rxrpc_call_timer_expired(struct timer_list *t) { struct rxrpc_call *call = from_timer(call, t, timer); @@ -53,9 +71,7 @@ static void rxrpc_call_timer_expired(struct timer_list *t) if (call->state < RXRPC_CALL_COMPLETE) { trace_rxrpc_timer_expired(call, jiffies); - __rxrpc_queue_call(call); - } else { - rxrpc_put_call(call, rxrpc_call_put); + rxrpc_poke_call(call, rxrpc_call_poke_timer); } } @@ -64,21 +80,14 @@ void rxrpc_reduce_call_timer(struct rxrpc_call *call, unsigned long now, enum rxrpc_timer_trace why) { - if (rxrpc_try_get_call(call, rxrpc_call_got_timer)) { - trace_rxrpc_timer(call, why, now); - if (timer_reduce(&call->timer, expire_at)) - rxrpc_put_call(call, rxrpc_call_put_notimer); - } -} - -void rxrpc_delete_call_timer(struct rxrpc_call *call) -{ - if (del_timer_sync(&call->timer)) - rxrpc_put_call(call, rxrpc_call_put_timer); + trace_rxrpc_timer(call, why, now); + timer_reduce(&call->timer, expire_at); } static struct lock_class_key rxrpc_call_user_mutex_lock_class_key; +static void rxrpc_destroy_call(struct work_struct *); + /* * find an extant server call * - called in process context with IRQs enabled @@ -110,7 +119,7 @@ struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *rx, return NULL; found_extant_call: - rxrpc_get_call(call, rxrpc_call_got); + rxrpc_get_call(call, rxrpc_call_get_sendmsg); read_unlock(&rx->call_lock); _leave(" = %p [%d]", call, refcount_read(&call->ref)); return call; @@ -139,20 +148,20 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, &rxrpc_call_user_mutex_lock_class_key); timer_setup(&call->timer, rxrpc_call_timer_expired, 0); - INIT_WORK(&call->processor, &rxrpc_process_call); + INIT_WORK(&call->destroyer, rxrpc_destroy_call); INIT_LIST_HEAD(&call->link); INIT_LIST_HEAD(&call->chan_wait_link); INIT_LIST_HEAD(&call->accept_link); INIT_LIST_HEAD(&call->recvmsg_link); INIT_LIST_HEAD(&call->sock_link); + INIT_LIST_HEAD(&call->attend_link); + INIT_LIST_HEAD(&call->tx_sendmsg); INIT_LIST_HEAD(&call->tx_buffer); skb_queue_head_init(&call->recvmsg_queue); skb_queue_head_init(&call->rx_oos_queue); init_waitqueue_head(&call->waitq); spin_lock_init(&call->notify_lock); spin_lock_init(&call->tx_lock); - spin_lock_init(&call->input_lock); - spin_lock_init(&call->acks_ack_lock); rwlock_init(&call->state_lock); refcount_set(&call->ref, 1); call->debug_id = debug_id; @@ -185,22 +194,45 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, */ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx, struct sockaddr_rxrpc *srx, + struct rxrpc_conn_parameters *cp, + struct rxrpc_call_params *p, gfp_t gfp, unsigned int debug_id) { struct rxrpc_call *call; ktime_t now; + int ret; _enter(""); call = rxrpc_alloc_call(rx, gfp, debug_id); if (!call) return ERR_PTR(-ENOMEM); - call->state = RXRPC_CALL_CLIENT_AWAIT_CONN; - call->service_id = srx->srx_service; now = ktime_get_real(); - call->acks_latest_ts = now; - call->cong_tstamp = now; + call->acks_latest_ts = now; + call->cong_tstamp = now; + call->state = RXRPC_CALL_CLIENT_AWAIT_CONN; + call->dest_srx = *srx; + call->interruptibility = p->interruptibility; + call->tx_total_len = p->tx_total_len; + call->key = key_get(cp->key); + call->local = rxrpc_get_local(cp->local, rxrpc_local_get_call); + if (p->kernel) + __set_bit(RXRPC_CALL_KERNEL, &call->flags); + if (cp->upgrade) + __set_bit(RXRPC_CALL_UPGRADE, &call->flags); + if (cp->exclusive) + __set_bit(RXRPC_CALL_EXCLUSIVE, &call->flags); + + ret = rxrpc_init_client_call_security(call); + if (ret < 0) { + __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 0, ret); + rxrpc_put_call(call, rxrpc_call_put_discard_error); + return ERR_PTR(ret); + } + + trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), + p->user_call_ID, rxrpc_call_new_client); _leave(" = %p", call); return call; @@ -218,6 +250,7 @@ static void rxrpc_start_call_timer(struct rxrpc_call *call) call->ack_lost_at = j; call->resend_at = j; call->ping_at = j; + call->keepalive_at = j; call->expect_rx_by = j; call->expect_req_by = j; call->expect_term_by = j; @@ -270,7 +303,6 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, struct rxrpc_net *rxnet; struct semaphore *limiter; struct rb_node *parent, **pp; - const void *here = __builtin_return_address(0); int ret; _enter("%p,%lx", rx, p->user_call_ID); @@ -281,7 +313,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, return ERR_PTR(-ERESTARTSYS); } - call = rxrpc_alloc_client_call(rx, srx, gfp, debug_id); + call = rxrpc_alloc_client_call(rx, srx, cp, p, gfp, debug_id); if (IS_ERR(call)) { release_sock(&rx->sk); up(limiter); @@ -289,14 +321,6 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, return call; } - call->interruptibility = p->interruptibility; - call->tx_total_len = p->tx_total_len; - trace_rxrpc_call(call->debug_id, rxrpc_call_new_client, - refcount_read(&call->ref), - here, (const void *)p->user_call_ID); - if (p->kernel) - __set_bit(RXRPC_CALL_KERNEL, &call->flags); - /* We need to protect a partially set up call against the user as we * will be acting outside the socket lock. */ @@ -322,7 +346,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, rcu_assign_pointer(call->socket, rx); call->user_call_ID = p->user_call_ID; __set_bit(RXRPC_CALL_HAS_USERID, &call->flags); - rxrpc_get_call(call, rxrpc_call_got_userid); + rxrpc_get_call(call, rxrpc_call_get_userid); rb_link_node(&call->sock_node, parent, pp); rb_insert_color(&call->sock_node, &rx->calls); list_add(&call->sock_link, &rx->sock_calls); @@ -330,9 +354,9 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, write_unlock(&rx->call_lock); rxnet = call->rxnet; - spin_lock_bh(&rxnet->call_lock); + spin_lock(&rxnet->call_lock); list_add_tail_rcu(&call->link, &rxnet->calls); - spin_unlock_bh(&rxnet->call_lock); + spin_unlock(&rxnet->call_lock); /* From this point on, the call is protected by its own lock. */ release_sock(&rx->sk); @@ -344,13 +368,10 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, if (ret < 0) goto error_attached_to_socket; - trace_rxrpc_call(call->debug_id, rxrpc_call_connected, - refcount_read(&call->ref), here, NULL); + rxrpc_see_call(call, rxrpc_call_see_connected); rxrpc_start_call_timer(call); - _net("CALL new %d on CONN %d", call->debug_id, call->conn->debug_id); - _leave(" = %p [new]", call); return call; @@ -364,11 +385,11 @@ error_dup_user_ID: release_sock(&rx->sk); __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, RX_CALL_DEAD, -EEXIST); - trace_rxrpc_call(call->debug_id, rxrpc_call_error, - refcount_read(&call->ref), here, ERR_PTR(-EEXIST)); + trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), 0, + rxrpc_call_see_userid_exists); rxrpc_release_call(rx, call); mutex_unlock(&call->user_mutex); - rxrpc_put_call(call, rxrpc_call_put); + rxrpc_put_call(call, rxrpc_call_put_userid_exists); _leave(" = -EEXIST"); return ERR_PTR(-EEXIST); @@ -378,8 +399,8 @@ error_dup_user_ID: * leave the error to recvmsg() to deal with. */ error_attached_to_socket: - trace_rxrpc_call(call->debug_id, rxrpc_call_error, - refcount_read(&call->ref), here, ERR_PTR(ret)); + trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), ret, + rxrpc_call_see_connect_failed); set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, RX_CALL_DEAD, ret); @@ -403,11 +424,34 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx, rcu_assign_pointer(call->socket, rx); call->call_id = sp->hdr.callNumber; - call->service_id = sp->hdr.serviceId; + call->dest_srx.srx_service = sp->hdr.serviceId; call->cid = sp->hdr.cid; call->state = RXRPC_CALL_SERVER_SECURING; call->cong_tstamp = skb->tstamp; + spin_lock(&conn->state_lock); + + switch (conn->state) { + case RXRPC_CONN_SERVICE_UNSECURED: + case RXRPC_CONN_SERVICE_CHALLENGING: + call->state = RXRPC_CALL_SERVER_SECURING; + break; + case RXRPC_CONN_SERVICE: + call->state = RXRPC_CALL_SERVER_RECV_REQUEST; + break; + + case RXRPC_CONN_REMOTELY_ABORTED: + __rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, + conn->abort_code, conn->error); + break; + case RXRPC_CONN_LOCALLY_ABORTED: + __rxrpc_abort_call("CON", call, 1, + conn->abort_code, conn->error); + break; + default: + BUG(); + } + /* Set the channel for this call. We don't get channel_lock as we're * only defending against the data_ready handler (which we're called * from) and the RESPONSE packet parser (which is only really @@ -418,86 +462,48 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx, conn->channels[chan].call_counter = call->call_id; conn->channels[chan].call_id = call->call_id; rcu_assign_pointer(conn->channels[chan].call, call); + spin_unlock(&conn->state_lock); - spin_lock(&conn->params.peer->lock); - hlist_add_head_rcu(&call->error_link, &conn->params.peer->error_targets); - spin_unlock(&conn->params.peer->lock); - - _net("CALL incoming %d on CONN %d", call->debug_id, call->conn->debug_id); + spin_lock(&conn->peer->lock); + hlist_add_head(&call->error_link, &conn->peer->error_targets); + spin_unlock(&conn->peer->lock); rxrpc_start_call_timer(call); _leave(""); } /* - * Queue a call's work processor, getting a ref to pass to the work queue. - */ -bool rxrpc_queue_call(struct rxrpc_call *call) -{ - const void *here = __builtin_return_address(0); - int n; - - if (!__refcount_inc_not_zero(&call->ref, &n)) - return false; - if (rxrpc_queue_work(&call->processor)) - trace_rxrpc_call(call->debug_id, rxrpc_call_queued, n + 1, - here, NULL); - else - rxrpc_put_call(call, rxrpc_call_put_noqueue); - return true; -} - -/* - * Queue a call's work processor, passing the callers ref to the work queue. - */ -bool __rxrpc_queue_call(struct rxrpc_call *call) -{ - const void *here = __builtin_return_address(0); - int n = refcount_read(&call->ref); - ASSERTCMP(n, >=, 1); - if (rxrpc_queue_work(&call->processor)) - trace_rxrpc_call(call->debug_id, rxrpc_call_queued_ref, n, - here, NULL); - else - rxrpc_put_call(call, rxrpc_call_put_noqueue); - return true; -} - -/* * Note the re-emergence of a call. */ -void rxrpc_see_call(struct rxrpc_call *call) +void rxrpc_see_call(struct rxrpc_call *call, enum rxrpc_call_trace why) { - const void *here = __builtin_return_address(0); if (call) { - int n = refcount_read(&call->ref); + int r = refcount_read(&call->ref); - trace_rxrpc_call(call->debug_id, rxrpc_call_seen, n, - here, NULL); + trace_rxrpc_call(call->debug_id, r, 0, why); } } -bool rxrpc_try_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op) +struct rxrpc_call *rxrpc_try_get_call(struct rxrpc_call *call, + enum rxrpc_call_trace why) { - const void *here = __builtin_return_address(0); - int n; + int r; - if (!__refcount_inc_not_zero(&call->ref, &n)) - return false; - trace_rxrpc_call(call->debug_id, op, n + 1, here, NULL); - return true; + if (!call || !__refcount_inc_not_zero(&call->ref, &r)) + return NULL; + trace_rxrpc_call(call->debug_id, r + 1, 0, why); + return call; } /* * Note the addition of a ref on a call. */ -void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op) +void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace why) { - const void *here = __builtin_return_address(0); - int n; + int r; - __refcount_inc(&call->ref, &n); - trace_rxrpc_call(call->debug_id, op, n + 1, here, NULL); + __refcount_inc(&call->ref, &r); + trace_rxrpc_call(call->debug_id, r + 1, 0, why); } /* @@ -514,15 +520,13 @@ static void rxrpc_cleanup_ring(struct rxrpc_call *call) */ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) { - const void *here = __builtin_return_address(0); struct rxrpc_connection *conn = call->conn; bool put = false; _enter("{%d,%d}", call->debug_id, refcount_read(&call->ref)); - trace_rxrpc_call(call->debug_id, rxrpc_call_release, - refcount_read(&call->ref), - here, (const void *)call->flags); + trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), + call->flags, rxrpc_call_see_release); ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); @@ -530,10 +534,10 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) BUG(); rxrpc_put_call_slot(call); - rxrpc_delete_call_timer(call); + del_timer_sync(&call->timer); /* Make sure we don't get any more notifications */ - write_lock_bh(&rx->recvmsg_lock); + write_lock(&rx->recvmsg_lock); if (!list_empty(&call->recvmsg_link)) { _debug("unlinking once-pending call %p { e=%lx f=%lx }", @@ -546,16 +550,16 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) call->recvmsg_link.next = NULL; call->recvmsg_link.prev = NULL; - write_unlock_bh(&rx->recvmsg_lock); + write_unlock(&rx->recvmsg_lock); if (put) - rxrpc_put_call(call, rxrpc_call_put); + rxrpc_put_call(call, rxrpc_call_put_unnotify); write_lock(&rx->call_lock); if (test_and_clear_bit(RXRPC_CALL_HAS_USERID, &call->flags)) { rb_erase(&call->sock_node, &rx->calls); memset(&call->sock_node, 0xdd, sizeof(call->sock_node)); - rxrpc_put_call(call, rxrpc_call_put_userid); + rxrpc_put_call(call, rxrpc_call_put_userid_exists); } list_del(&call->sock_link); @@ -584,17 +588,17 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) struct rxrpc_call, accept_link); list_del(&call->accept_link); rxrpc_abort_call("SKR", call, 0, RX_CALL_DEAD, -ECONNRESET); - rxrpc_put_call(call, rxrpc_call_put); + rxrpc_put_call(call, rxrpc_call_put_release_sock_tba); } while (!list_empty(&rx->sock_calls)) { call = list_entry(rx->sock_calls.next, struct rxrpc_call, sock_link); - rxrpc_get_call(call, rxrpc_call_got); + rxrpc_get_call(call, rxrpc_call_get_release_sock); rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, -ECONNRESET); rxrpc_send_abort_packet(call); rxrpc_release_call(rx, call); - rxrpc_put_call(call, rxrpc_call_put); + rxrpc_put_call(call, rxrpc_call_put_release_sock); } _leave(""); @@ -603,26 +607,24 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) /* * release a call */ -void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op) +void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace why) { struct rxrpc_net *rxnet = call->rxnet; - const void *here = __builtin_return_address(0); unsigned int debug_id = call->debug_id; bool dead; - int n; + int r; ASSERT(call != NULL); - dead = __refcount_dec_and_test(&call->ref, &n); - trace_rxrpc_call(debug_id, op, n, here, NULL); + dead = __refcount_dec_and_test(&call->ref, &r); + trace_rxrpc_call(debug_id, r - 1, 0, why); if (dead) { - _debug("call %d dead", call->debug_id); ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); if (!list_empty(&call->link)) { - spin_lock_bh(&rxnet->call_lock); + spin_lock(&rxnet->call_lock); list_del_init(&call->link); - spin_unlock_bh(&rxnet->call_lock); + spin_unlock(&rxnet->call_lock); } rxrpc_cleanup_call(call); @@ -630,36 +632,45 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op) } /* - * Final call destruction - but must be done in process context. + * Free up the call under RCU. */ -static void rxrpc_destroy_call(struct work_struct *work) +static void rxrpc_rcu_free_call(struct rcu_head *rcu) { - struct rxrpc_call *call = container_of(work, struct rxrpc_call, processor); - struct rxrpc_net *rxnet = call->rxnet; - - rxrpc_delete_call_timer(call); + struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu); + struct rxrpc_net *rxnet = READ_ONCE(call->rxnet); - rxrpc_put_connection(call->conn); - rxrpc_put_peer(call->peer); kmem_cache_free(rxrpc_call_jar, call); if (atomic_dec_and_test(&rxnet->nr_calls)) wake_up_var(&rxnet->nr_calls); } /* - * Final call destruction under RCU. + * Final call destruction - but must be done in process context. */ -static void rxrpc_rcu_destroy_call(struct rcu_head *rcu) +static void rxrpc_destroy_call(struct work_struct *work) { - struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu); + struct rxrpc_call *call = container_of(work, struct rxrpc_call, destroyer); + struct rxrpc_txbuf *txb; - if (in_softirq()) { - INIT_WORK(&call->processor, rxrpc_destroy_call); - if (!rxrpc_queue_work(&call->processor)) - BUG(); - } else { - rxrpc_destroy_call(&call->processor); + del_timer_sync(&call->timer); + + rxrpc_cleanup_ring(call); + while ((txb = list_first_entry_or_null(&call->tx_sendmsg, + struct rxrpc_txbuf, call_link))) { + list_del(&txb->call_link); + rxrpc_put_txbuf(txb, rxrpc_txbuf_put_cleaned); } + while ((txb = list_first_entry_or_null(&call->tx_buffer, + struct rxrpc_txbuf, call_link))) { + list_del(&txb->call_link); + rxrpc_put_txbuf(txb, rxrpc_txbuf_put_cleaned); + } + + rxrpc_put_txbuf(call->tx_pending, rxrpc_txbuf_put_cleaned); + rxrpc_put_connection(call->conn, rxrpc_conn_put_call); + rxrpc_put_peer(call->peer, rxrpc_peer_put_call); + rxrpc_put_local(call->local, rxrpc_local_put_call); + call_rcu(&call->rcu, rxrpc_rcu_free_call); } /* @@ -667,25 +678,20 @@ static void rxrpc_rcu_destroy_call(struct rcu_head *rcu) */ void rxrpc_cleanup_call(struct rxrpc_call *call) { - struct rxrpc_txbuf *txb; - - _net("DESTROY CALL %d", call->debug_id); - memset(&call->sock_node, 0xcd, sizeof(call->sock_node)); ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags)); - rxrpc_cleanup_ring(call); - while ((txb = list_first_entry_or_null(&call->tx_buffer, - struct rxrpc_txbuf, call_link))) { - list_del(&txb->call_link); - rxrpc_put_txbuf(txb, rxrpc_txbuf_put_cleaned); - } - rxrpc_put_txbuf(call->tx_pending, rxrpc_txbuf_put_cleaned); - rxrpc_free_skb(call->acks_soft_tbl, rxrpc_skb_cleaned); + del_timer(&call->timer); - call_rcu(&call->rcu, rxrpc_rcu_destroy_call); + if (rcu_read_lock_held()) + /* Can't use the rxrpc workqueue as we need to cancel/flush + * something that may be running/waiting there. + */ + schedule_work(&call->destroyer); + else + rxrpc_destroy_call(&call->destroyer); } /* @@ -700,14 +706,14 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet) _enter(""); if (!list_empty(&rxnet->calls)) { - spin_lock_bh(&rxnet->call_lock); + spin_lock(&rxnet->call_lock); while (!list_empty(&rxnet->calls)) { call = list_entry(rxnet->calls.next, struct rxrpc_call, link); _debug("Zapping call %p", call); - rxrpc_see_call(call); + rxrpc_see_call(call, rxrpc_call_see_zap); list_del_init(&call->link); pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n", @@ -715,12 +721,12 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet) rxrpc_call_states[call->state], call->flags, call->events); - spin_unlock_bh(&rxnet->call_lock); + spin_unlock(&rxnet->call_lock); cond_resched(); - spin_lock_bh(&rxnet->call_lock); + spin_lock(&rxnet->call_lock); } - spin_unlock_bh(&rxnet->call_lock); + spin_unlock(&rxnet->call_lock); } atomic_dec(&rxnet->nr_calls); diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index f11c97e28d2a..a08e33c9e54b 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -51,7 +51,7 @@ static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle); static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn, gfp_t gfp) { - struct rxrpc_net *rxnet = conn->params.local->rxnet; + struct rxrpc_net *rxnet = conn->rxnet; int id; _enter(""); @@ -122,37 +122,47 @@ static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_conn_parameters *cp, bundle = kzalloc(sizeof(*bundle), gfp); if (bundle) { - bundle->params = *cp; - rxrpc_get_peer(bundle->params.peer); + bundle->local = cp->local; + bundle->peer = rxrpc_get_peer(cp->peer, rxrpc_peer_get_bundle); + bundle->key = cp->key; + bundle->exclusive = cp->exclusive; + bundle->upgrade = cp->upgrade; + bundle->service_id = cp->service_id; + bundle->security_level = cp->security_level; refcount_set(&bundle->ref, 1); atomic_set(&bundle->active, 1); spin_lock_init(&bundle->channel_lock); INIT_LIST_HEAD(&bundle->waiting_calls); + trace_rxrpc_bundle(bundle->debug_id, 1, rxrpc_bundle_new); } return bundle; } -struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *bundle) +struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *bundle, + enum rxrpc_bundle_trace why) { - refcount_inc(&bundle->ref); + int r; + + __refcount_inc(&bundle->ref, &r); + trace_rxrpc_bundle(bundle->debug_id, r + 1, why); return bundle; } static void rxrpc_free_bundle(struct rxrpc_bundle *bundle) { - rxrpc_put_peer(bundle->params.peer); + trace_rxrpc_bundle(bundle->debug_id, 1, rxrpc_bundle_free); + rxrpc_put_peer(bundle->peer, rxrpc_peer_put_bundle); kfree(bundle); } -void rxrpc_put_bundle(struct rxrpc_bundle *bundle) +void rxrpc_put_bundle(struct rxrpc_bundle *bundle, enum rxrpc_bundle_trace why) { - unsigned int d = bundle->debug_id; + unsigned int id = bundle->debug_id; bool dead; int r; dead = __refcount_dec_and_test(&bundle->ref, &r); - - _debug("PUT B=%x %d", d, r - 1); + trace_rxrpc_bundle(id, r - 1, why); if (dead) rxrpc_free_bundle(bundle); } @@ -164,12 +174,12 @@ static struct rxrpc_connection * rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle, gfp_t gfp) { struct rxrpc_connection *conn; - struct rxrpc_net *rxnet = bundle->params.local->rxnet; + struct rxrpc_net *rxnet = bundle->local->rxnet; int ret; _enter(""); - conn = rxrpc_alloc_connection(gfp); + conn = rxrpc_alloc_connection(rxnet, gfp); if (!conn) { _leave(" = -ENOMEM"); return ERR_PTR(-ENOMEM); @@ -177,10 +187,16 @@ rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle, gfp_t gfp) refcount_set(&conn->ref, 1); conn->bundle = bundle; - conn->params = bundle->params; + conn->local = bundle->local; + conn->peer = bundle->peer; + conn->key = bundle->key; + conn->exclusive = bundle->exclusive; + conn->upgrade = bundle->upgrade; + conn->orig_service_id = bundle->service_id; + conn->security_level = bundle->security_level; conn->out_clientflag = RXRPC_CLIENT_INITIATED; conn->state = RXRPC_CONN_CLIENT; - conn->service_id = conn->params.service_id; + conn->service_id = conn->orig_service_id; ret = rxrpc_get_client_connection_id(conn, gfp); if (ret < 0) @@ -195,14 +211,13 @@ rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle, gfp_t gfp) list_add_tail(&conn->proc_link, &rxnet->conn_proc_list); write_unlock(&rxnet->conn_lock); - rxrpc_get_bundle(bundle); - rxrpc_get_peer(conn->params.peer); - rxrpc_get_local(conn->params.local); - key_get(conn->params.key); + rxrpc_get_bundle(bundle, rxrpc_bundle_get_client_conn); + rxrpc_get_peer(conn->peer, rxrpc_peer_get_client_conn); + rxrpc_get_local(conn->local, rxrpc_local_get_client_conn); + key_get(conn->key); - trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_client, - refcount_read(&conn->ref), - __builtin_return_address(0)); + trace_rxrpc_conn(conn->debug_id, refcount_read(&conn->ref), + rxrpc_conn_new_client); atomic_inc(&rxnet->nr_client_conns); trace_rxrpc_client(conn, -1, rxrpc_client_alloc); @@ -228,7 +243,7 @@ static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn) if (!conn) goto dont_reuse; - rxnet = conn->params.local->rxnet; + rxnet = conn->rxnet; if (test_bit(RXRPC_CONN_DONT_REUSE, &conn->flags)) goto dont_reuse; @@ -285,7 +300,7 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c while (p) { bundle = rb_entry(p, struct rxrpc_bundle, local_node); -#define cmp(X) ((long)bundle->params.X - (long)cp->X) +#define cmp(X) ((long)bundle->X - (long)cp->X) diff = (cmp(peer) ?: cmp(key) ?: cmp(security_level) ?: @@ -314,7 +329,7 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c parent = *pp; bundle = rb_entry(parent, struct rxrpc_bundle, local_node); -#define cmp(X) ((long)bundle->params.X - (long)cp->X) +#define cmp(X) ((long)bundle->X - (long)cp->X) diff = (cmp(peer) ?: cmp(key) ?: cmp(security_level) ?: @@ -332,7 +347,7 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c candidate->debug_id = atomic_inc_return(&rxrpc_bundle_id); rb_link_node(&candidate->local_node, parent, pp); rb_insert_color(&candidate->local_node, &local->client_bundles); - rxrpc_get_bundle(candidate); + rxrpc_get_bundle(candidate, rxrpc_bundle_get_client_call); spin_unlock(&local->client_bundles_lock); _leave(" = %u [new]", candidate->debug_id); return candidate; @@ -340,7 +355,7 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c found_bundle_free: rxrpc_free_bundle(candidate); found_bundle: - rxrpc_get_bundle(bundle); + rxrpc_get_bundle(bundle, rxrpc_bundle_get_client_call); atomic_inc(&bundle->active); spin_unlock(&local->client_bundles_lock); _leave(" = %u [found]", bundle->debug_id); @@ -456,10 +471,10 @@ static void rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle, gfp_t gfp) if (candidate) { _debug("discard C=%x", candidate->debug_id); trace_rxrpc_client(candidate, -1, rxrpc_client_duplicate); - rxrpc_put_connection(candidate); + rxrpc_put_connection(candidate, rxrpc_conn_put_discard); } - rxrpc_put_connection(old); + rxrpc_put_connection(old, rxrpc_conn_put_noreuse); _leave(""); } @@ -530,23 +545,21 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn, clear_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags); clear_bit(conn->bundle_shift + channel, &bundle->avail_chans); - rxrpc_see_call(call); + rxrpc_see_call(call, rxrpc_call_see_activate_client); list_del_init(&call->chan_wait_link); - call->peer = rxrpc_get_peer(conn->params.peer); - call->conn = rxrpc_get_connection(conn); + call->peer = rxrpc_get_peer(conn->peer, rxrpc_peer_get_activate_call); + call->conn = rxrpc_get_connection(conn, rxrpc_conn_get_activate_call); call->cid = conn->proto.cid | channel; call->call_id = call_id; call->security = conn->security; call->security_ix = conn->security_ix; - call->service_id = conn->service_id; + call->dest_srx.srx_service = conn->service_id; trace_rxrpc_connect_call(call); - _net("CONNECT call %08x:%08x as call %d on conn %d", - call->cid, call->call_id, call->debug_id, conn->debug_id); - write_lock_bh(&call->state_lock); + write_lock(&call->state_lock); call->state = RXRPC_CALL_CLIENT_SEND_REQUEST; - write_unlock_bh(&call->state_lock); + write_unlock(&call->state_lock); /* Paired with the read barrier in rxrpc_connect_call(). This orders * cid and epoch in the connection wrt to call_id without the need to @@ -571,7 +584,7 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn, */ static void rxrpc_unidle_conn(struct rxrpc_bundle *bundle, struct rxrpc_connection *conn) { - struct rxrpc_net *rxnet = bundle->params.local->rxnet; + struct rxrpc_net *rxnet = bundle->local->rxnet; bool drop_ref; if (!list_empty(&conn->cache_link)) { @@ -583,7 +596,7 @@ static void rxrpc_unidle_conn(struct rxrpc_bundle *bundle, struct rxrpc_connecti } spin_unlock(&rxnet->client_conn_cache_lock); if (drop_ref) - rxrpc_put_connection(conn); + rxrpc_put_connection(conn, rxrpc_conn_put_unidle); } } @@ -732,7 +745,7 @@ granted_channel: out_put_bundle: rxrpc_deactivate_bundle(bundle); - rxrpc_put_bundle(bundle); + rxrpc_put_bundle(bundle, rxrpc_bundle_get_client_call); out: _leave(" = %d", ret); return ret; @@ -773,6 +786,10 @@ void rxrpc_expose_client_call(struct rxrpc_call *call) if (chan->call_counter >= INT_MAX) set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); trace_rxrpc_client(conn, channel, rxrpc_client_exposed); + + spin_lock(&call->peer->lock); + hlist_add_head(&call->error_link, &call->peer->error_targets); + spin_unlock(&call->peer->lock); } } @@ -797,7 +814,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call { struct rxrpc_connection *conn; struct rxrpc_channel *chan = NULL; - struct rxrpc_net *rxnet = bundle->params.local->rxnet; + struct rxrpc_net *rxnet = bundle->local->rxnet; unsigned int channel; bool may_reuse; u32 cid; @@ -887,7 +904,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call trace_rxrpc_client(conn, channel, rxrpc_client_to_idle); conn->idle_timestamp = jiffies; - rxrpc_get_connection(conn); + rxrpc_get_connection(conn, rxrpc_conn_get_idle); spin_lock(&rxnet->client_conn_cache_lock); list_move_tail(&conn->cache_link, &rxnet->idle_client_conns); spin_unlock(&rxnet->client_conn_cache_lock); @@ -929,7 +946,7 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn) if (need_drop) { rxrpc_deactivate_bundle(bundle); - rxrpc_put_connection(conn); + rxrpc_put_connection(conn, rxrpc_conn_put_unbundle); } } @@ -938,11 +955,11 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn) */ static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle) { - struct rxrpc_local *local = bundle->params.local; + struct rxrpc_local *local = bundle->local; bool need_put = false; if (atomic_dec_and_lock(&bundle->active, &local->client_bundles_lock)) { - if (!bundle->params.exclusive) { + if (!bundle->exclusive) { _debug("erase bundle"); rb_erase(&bundle->local_node, &local->client_bundles); need_put = true; @@ -950,16 +967,16 @@ static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle) spin_unlock(&local->client_bundles_lock); if (need_put) - rxrpc_put_bundle(bundle); + rxrpc_put_bundle(bundle, rxrpc_bundle_put_discard); } } /* * Clean up a dead client connection. */ -static void rxrpc_kill_client_conn(struct rxrpc_connection *conn) +void rxrpc_kill_client_conn(struct rxrpc_connection *conn) { - struct rxrpc_local *local = conn->params.local; + struct rxrpc_local *local = conn->local; struct rxrpc_net *rxnet = local->rxnet; _enter("C=%x", conn->debug_id); @@ -968,23 +985,6 @@ static void rxrpc_kill_client_conn(struct rxrpc_connection *conn) atomic_dec(&rxnet->nr_client_conns); rxrpc_put_client_connection_id(conn); - rxrpc_kill_connection(conn); -} - -/* - * Clean up a dead client connections. - */ -void rxrpc_put_client_conn(struct rxrpc_connection *conn) -{ - const void *here = __builtin_return_address(0); - unsigned int debug_id = conn->debug_id; - bool dead; - int r; - - dead = __refcount_dec_and_test(&conn->ref, &r); - trace_rxrpc_conn(debug_id, rxrpc_conn_put_client, r - 1, here); - if (dead) - rxrpc_kill_client_conn(conn); } /* @@ -1010,7 +1010,7 @@ void rxrpc_discard_expired_client_conns(struct work_struct *work) } /* Don't double up on the discarding */ - if (!spin_trylock(&rxnet->client_conn_discard_lock)) { + if (!mutex_trylock(&rxnet->client_conn_discard_lock)) { _leave(" [already]"); return; } @@ -1038,7 +1038,7 @@ next: expiry = rxrpc_conn_idle_client_expiry; if (nr_conns > rxrpc_reap_client_connections) expiry = rxrpc_conn_idle_client_fast_expiry; - if (conn->params.local->service_closed) + if (conn->local->service_closed) expiry = rxrpc_closed_conn_expiry * HZ; conn_expires_at = conn->idle_timestamp + expiry; @@ -1048,13 +1048,15 @@ next: goto not_yet_expired; } + atomic_dec(&conn->active); trace_rxrpc_client(conn, -1, rxrpc_client_discard); list_del_init(&conn->cache_link); spin_unlock(&rxnet->client_conn_cache_lock); rxrpc_unbundle_conn(conn); - rxrpc_put_connection(conn); /* Drop the ->cache_link ref */ + /* Drop the ->cache_link ref */ + rxrpc_put_connection(conn, rxrpc_conn_put_discard_idle); nr_conns--; goto next; @@ -1073,7 +1075,7 @@ not_yet_expired: out: spin_unlock(&rxnet->client_conn_cache_lock); - spin_unlock(&rxnet->client_conn_discard_lock); + mutex_unlock(&rxnet->client_conn_discard_lock); _leave(""); } @@ -1112,7 +1114,8 @@ void rxrpc_clean_up_local_conns(struct rxrpc_local *local) list_for_each_entry_safe(conn, tmp, &rxnet->idle_client_conns, cache_link) { - if (conn->params.local == local) { + if (conn->local == local) { + atomic_dec(&conn->active); trace_rxrpc_client(conn, -1, rxrpc_client_discard); list_move(&conn->cache_link, &graveyard); } @@ -1125,7 +1128,7 @@ void rxrpc_clean_up_local_conns(struct rxrpc_local *local) struct rxrpc_connection, cache_link); list_del_init(&conn->cache_link); rxrpc_unbundle_conn(conn); - rxrpc_put_connection(conn); + rxrpc_put_connection(conn, rxrpc_conn_put_local_dead); } _leave(" [culled]"); diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index aab069701398..480364bcbf85 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -52,8 +52,8 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, if (skb && call_id != sp->hdr.callNumber) return; - msg.msg_name = &conn->params.peer->srx.transport; - msg.msg_namelen = conn->params.peer->srx.transport_len; + msg.msg_name = &conn->peer->srx.transport; + msg.msg_namelen = conn->peer->srx.transport_len; msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; @@ -86,8 +86,8 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, break; case RXRPC_PACKET_TYPE_ACK: - mtu = conn->params.peer->if_mtu; - mtu -= conn->params.peer->hdrsize; + mtu = conn->peer->if_mtu; + mtu -= conn->peer->hdrsize; pkt.ack.bufferSpace = 0; pkt.ack.maxSkew = htons(skb ? skb->priority : 0); pkt.ack.firstPacket = htonl(chan->last_seq + 1); @@ -122,19 +122,17 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, switch (chan->last_type) { case RXRPC_PACKET_TYPE_ABORT: - _proto("Tx ABORT %%%u { %d } [re]", serial, conn->abort_code); break; case RXRPC_PACKET_TYPE_ACK: trace_rxrpc_tx_ack(chan->call_debug_id, serial, ntohl(pkt.ack.firstPacket), ntohl(pkt.ack.serial), pkt.ack.reason, 0); - _proto("Tx ACK %%%u [re]", serial); break; } - ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, ioc, len); - conn->params.peer->last_tx_at = ktime_get_seconds(); + ret = kernel_sendmsg(conn->local->socket, &msg, iov, ioc, len); + conn->peer->last_tx_at = ktime_get_seconds(); if (ret < 0) trace_rxrpc_tx_fail(chan->call_debug_id, serial, ret, rxrpc_tx_point_call_final_resend); @@ -200,9 +198,9 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, _enter("%d,,%u,%u", conn->debug_id, error, abort_code); /* generate a connection-level abort */ - spin_lock_bh(&conn->state_lock); + spin_lock(&conn->state_lock); if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) { - spin_unlock_bh(&conn->state_lock); + spin_unlock(&conn->state_lock); _leave(" = 0 [already dead]"); return 0; } @@ -211,10 +209,10 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, conn->abort_code = abort_code; conn->state = RXRPC_CONN_LOCALLY_ABORTED; set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); - spin_unlock_bh(&conn->state_lock); + spin_unlock(&conn->state_lock); - msg.msg_name = &conn->params.peer->srx.transport; - msg.msg_namelen = conn->params.peer->srx.transport_len; + msg.msg_name = &conn->peer->srx.transport; + msg.msg_namelen = conn->peer->srx.transport_len; msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; @@ -242,9 +240,8 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, serial = atomic_inc_return(&conn->serial); rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, serial); whdr.serial = htonl(serial); - _proto("Tx CONN ABORT %%%u { %d }", serial, conn->abort_code); - ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); + ret = kernel_sendmsg(conn->local->socket, &msg, iov, 2, len); if (ret < 0) { trace_rxrpc_tx_fail(conn->debug_id, serial, ret, rxrpc_tx_point_conn_abort); @@ -254,7 +251,7 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, trace_rxrpc_tx_packet(conn->debug_id, &whdr, rxrpc_tx_point_conn_abort); - conn->params.peer->last_tx_at = ktime_get_seconds(); + conn->peer->last_tx_at = ktime_get_seconds(); _leave(" = 0"); return 0; @@ -268,12 +265,12 @@ static void rxrpc_call_is_secure(struct rxrpc_call *call) { _enter("%p", call); if (call) { - write_lock_bh(&call->state_lock); + write_lock(&call->state_lock); if (call->state == RXRPC_CALL_SERVER_SECURING) { call->state = RXRPC_CALL_SERVER_RECV_REQUEST; rxrpc_notify_socket(call); } - write_unlock_bh(&call->state_lock); + write_unlock(&call->state_lock); } } @@ -285,8 +282,6 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, u32 *_abort_code) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - __be32 wtmp; - u32 abort_code; int loop, ret; if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) { @@ -308,17 +303,8 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, return 0; case RXRPC_PACKET_TYPE_ABORT: - if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), - &wtmp, sizeof(wtmp)) < 0) { - trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, - tracepoint_string("bad_abort")); - return -EPROTO; - } - abort_code = ntohl(wtmp); - _proto("Rx ABORT %%%u { ac=%d }", sp->hdr.serial, abort_code); - conn->error = -ECONNABORTED; - conn->abort_code = abort_code; + conn->abort_code = skb->priority; conn->state = RXRPC_CONN_REMOTELY_ABORTED; set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, sp->hdr.serial); @@ -334,23 +320,23 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, return ret; ret = conn->security->init_connection_security( - conn, conn->params.key->payload.data[0]); + conn, conn->key->payload.data[0]); if (ret < 0) return ret; spin_lock(&conn->bundle->channel_lock); - spin_lock_bh(&conn->state_lock); + spin_lock(&conn->state_lock); if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING) { conn->state = RXRPC_CONN_SERVICE; - spin_unlock_bh(&conn->state_lock); + spin_unlock(&conn->state_lock); for (loop = 0; loop < RXRPC_MAXCALLS; loop++) rxrpc_call_is_secure( rcu_dereference_protected( conn->channels[loop].call, lockdep_is_held(&conn->bundle->channel_lock))); } else { - spin_unlock_bh(&conn->state_lock); + spin_unlock(&conn->state_lock); } spin_unlock(&conn->bundle->channel_lock); @@ -451,7 +437,7 @@ static void rxrpc_do_process_connection(struct rxrpc_connection *conn) /* go through the conn-level event packets, releasing the ref on this * connection that each one has when we've finished with it */ while ((skb = skb_dequeue(&conn->rx_queue))) { - rxrpc_see_skb(skb, rxrpc_skb_seen); + rxrpc_see_skb(skb, rxrpc_skb_see_conn_work); ret = rxrpc_process_event(conn, skb, &abort_code); switch (ret) { case -EPROTO: @@ -463,7 +449,7 @@ static void rxrpc_do_process_connection(struct rxrpc_connection *conn) goto requeue_and_leave; case -ECONNABORTED: default: - rxrpc_free_skb(skb, rxrpc_skb_freed); + rxrpc_free_skb(skb, rxrpc_skb_put_conn_work); break; } } @@ -477,7 +463,7 @@ requeue_and_leave: protocol_error: if (rxrpc_abort_connection(conn, ret, abort_code) < 0) goto requeue_and_leave; - rxrpc_free_skb(skb, rxrpc_skb_freed); + rxrpc_free_skb(skb, rxrpc_skb_put_conn_work); return; } @@ -486,14 +472,70 @@ void rxrpc_process_connection(struct work_struct *work) struct rxrpc_connection *conn = container_of(work, struct rxrpc_connection, processor); - rxrpc_see_connection(conn); + rxrpc_see_connection(conn, rxrpc_conn_see_work); - if (__rxrpc_use_local(conn->params.local)) { + if (__rxrpc_use_local(conn->local, rxrpc_local_use_conn_work)) { rxrpc_do_process_connection(conn); - rxrpc_unuse_local(conn->params.local); + rxrpc_unuse_local(conn->local, rxrpc_local_unuse_conn_work); } +} - rxrpc_put_connection(conn); - _leave(""); - return; +/* + * post connection-level events to the connection + * - this includes challenges, responses, some aborts and call terminal packet + * retransmission. + */ +static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn, + struct sk_buff *skb) +{ + _enter("%p,%p", conn, skb); + + rxrpc_get_skb(skb, rxrpc_skb_get_conn_work); + skb_queue_tail(&conn->rx_queue, skb); + rxrpc_queue_conn(conn, rxrpc_conn_queue_rx_work); +} + +/* + * Input a connection-level packet. + */ +int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) { + _leave(" = -ECONNABORTED [%u]", conn->state); + return -ECONNABORTED; + } + + _enter("{%d},{%u,%%%u},", conn->debug_id, sp->hdr.type, sp->hdr.serial); + + switch (sp->hdr.type) { + case RXRPC_PACKET_TYPE_DATA: + case RXRPC_PACKET_TYPE_ACK: + rxrpc_conn_retransmit_call(conn, skb, + sp->hdr.cid & RXRPC_CHANNELMASK); + return 0; + + case RXRPC_PACKET_TYPE_BUSY: + /* Just ignore BUSY packets for now. */ + return 0; + + case RXRPC_PACKET_TYPE_ABORT: + conn->error = -ECONNABORTED; + conn->abort_code = skb->priority; + conn->state = RXRPC_CONN_REMOTELY_ABORTED; + set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); + rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, sp->hdr.serial); + return -ECONNABORTED; + + case RXRPC_PACKET_TYPE_CHALLENGE: + case RXRPC_PACKET_TYPE_RESPONSE: + rxrpc_post_packet_to_conn(conn, skb); + return 0; + + default: + trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, + tracepoint_string("bad_conn_pkt")); + return -EPROTO; + } } diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 156bd26daf74..3c8f83dacb2b 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -19,20 +19,23 @@ unsigned int __read_mostly rxrpc_connection_expiry = 10 * 60; unsigned int __read_mostly rxrpc_closed_conn_expiry = 10; -static void rxrpc_destroy_connection(struct rcu_head *); +static void rxrpc_clean_up_connection(struct work_struct *work); +static void rxrpc_set_service_reap_timer(struct rxrpc_net *rxnet, + unsigned long reap_at); static void rxrpc_connection_timer(struct timer_list *timer) { struct rxrpc_connection *conn = container_of(timer, struct rxrpc_connection, timer); - rxrpc_queue_conn(conn); + rxrpc_queue_conn(conn, rxrpc_conn_queue_timer); } /* * allocate a new connection */ -struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp) +struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *rxnet, + gfp_t gfp) { struct rxrpc_connection *conn; @@ -42,10 +45,12 @@ struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp) if (conn) { INIT_LIST_HEAD(&conn->cache_link); timer_setup(&conn->timer, &rxrpc_connection_timer, 0); - INIT_WORK(&conn->processor, &rxrpc_process_connection); + INIT_WORK(&conn->processor, rxrpc_process_connection); + INIT_WORK(&conn->destructor, rxrpc_clean_up_connection); INIT_LIST_HEAD(&conn->proc_link); INIT_LIST_HEAD(&conn->link); skb_queue_head_init(&conn->rx_queue); + conn->rxnet = rxnet; conn->security = &rxrpc_no_security; spin_lock_init(&conn->state_lock); conn->debug_id = atomic_inc_return(&rxrpc_debug_id); @@ -67,89 +72,55 @@ struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp) * * The caller must be holding the RCU read lock. */ -struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local, - struct sk_buff *skb, - struct rxrpc_peer **_peer) +struct rxrpc_connection *rxrpc_find_client_connection_rcu(struct rxrpc_local *local, + struct sockaddr_rxrpc *srx, + struct sk_buff *skb) { struct rxrpc_connection *conn; - struct rxrpc_conn_proto k; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - struct sockaddr_rxrpc srx; struct rxrpc_peer *peer; _enter(",%x", sp->hdr.cid & RXRPC_CIDMASK); - if (rxrpc_extract_addr_from_skb(&srx, skb) < 0) - goto not_found; - - if (srx.transport.family != local->srx.transport.family && - (srx.transport.family == AF_INET && - local->srx.transport.family != AF_INET6)) { - pr_warn_ratelimited("AF_RXRPC: Protocol mismatch %u not %u\n", - srx.transport.family, - local->srx.transport.family); + /* Look up client connections by connection ID alone as their IDs are + * unique for this machine. + */ + conn = idr_find(&rxrpc_client_conn_ids, sp->hdr.cid >> RXRPC_CIDSHIFT); + if (!conn || refcount_read(&conn->ref) == 0) { + _debug("no conn"); goto not_found; } - k.epoch = sp->hdr.epoch; - k.cid = sp->hdr.cid & RXRPC_CIDMASK; - - if (rxrpc_to_server(sp)) { - /* We need to look up service connections by the full protocol - * parameter set. We look up the peer first as an intermediate - * step and then the connection from the peer's tree. - */ - peer = rxrpc_lookup_peer_rcu(local, &srx); - if (!peer) - goto not_found; - *_peer = peer; - conn = rxrpc_find_service_conn_rcu(peer, skb); - if (!conn || refcount_read(&conn->ref) == 0) - goto not_found; - _leave(" = %p", conn); - return conn; - } else { - /* Look up client connections by connection ID alone as their - * IDs are unique for this machine. - */ - conn = idr_find(&rxrpc_client_conn_ids, - sp->hdr.cid >> RXRPC_CIDSHIFT); - if (!conn || refcount_read(&conn->ref) == 0) { - _debug("no conn"); - goto not_found; - } + if (conn->proto.epoch != sp->hdr.epoch || + conn->local != local) + goto not_found; - if (conn->proto.epoch != k.epoch || - conn->params.local != local) + peer = conn->peer; + switch (srx->transport.family) { + case AF_INET: + if (peer->srx.transport.sin.sin_port != + srx->transport.sin.sin_port || + peer->srx.transport.sin.sin_addr.s_addr != + srx->transport.sin.sin_addr.s_addr) goto not_found; - - peer = conn->params.peer; - switch (srx.transport.family) { - case AF_INET: - if (peer->srx.transport.sin.sin_port != - srx.transport.sin.sin_port || - peer->srx.transport.sin.sin_addr.s_addr != - srx.transport.sin.sin_addr.s_addr) - goto not_found; - break; + break; #ifdef CONFIG_AF_RXRPC_IPV6 - case AF_INET6: - if (peer->srx.transport.sin6.sin6_port != - srx.transport.sin6.sin6_port || - memcmp(&peer->srx.transport.sin6.sin6_addr, - &srx.transport.sin6.sin6_addr, - sizeof(struct in6_addr)) != 0) - goto not_found; - break; + case AF_INET6: + if (peer->srx.transport.sin6.sin6_port != + srx->transport.sin6.sin6_port || + memcmp(&peer->srx.transport.sin6.sin6_addr, + &srx->transport.sin6.sin6_addr, + sizeof(struct in6_addr)) != 0) + goto not_found; + break; #endif - default: - BUG(); - } - - _leave(" = %p", conn); - return conn; + default: + BUG(); } + _leave(" = %p", conn); + return conn; + not_found: _leave(" = NULL"); return NULL; @@ -210,9 +181,9 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) call->peer->cong_ssthresh = call->cong_ssthresh; if (!hlist_unhashed(&call->error_link)) { - spin_lock_bh(&call->peer->lock); - hlist_del_rcu(&call->error_link); - spin_unlock_bh(&call->peer->lock); + spin_lock(&call->peer->lock); + hlist_del_init(&call->error_link); + spin_unlock(&call->peer->lock); } if (rxrpc_is_client_call(call)) @@ -224,79 +195,45 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); conn->idle_timestamp = jiffies; -} - -/* - * Kill off a connection. - */ -void rxrpc_kill_connection(struct rxrpc_connection *conn) -{ - struct rxrpc_net *rxnet = conn->params.local->rxnet; - - ASSERT(!rcu_access_pointer(conn->channels[0].call) && - !rcu_access_pointer(conn->channels[1].call) && - !rcu_access_pointer(conn->channels[2].call) && - !rcu_access_pointer(conn->channels[3].call)); - ASSERT(list_empty(&conn->cache_link)); - - write_lock(&rxnet->conn_lock); - list_del_init(&conn->proc_link); - write_unlock(&rxnet->conn_lock); - - /* Drain the Rx queue. Note that even though we've unpublished, an - * incoming packet could still be being added to our Rx queue, so we - * will need to drain it again in the RCU cleanup handler. - */ - rxrpc_purge_queue(&conn->rx_queue); - - /* Leave final destruction to RCU. The connection processor work item - * must carry a ref on the connection to prevent us getting here whilst - * it is queued or running. - */ - call_rcu(&conn->rcu, rxrpc_destroy_connection); + if (atomic_dec_and_test(&conn->active)) + rxrpc_set_service_reap_timer(conn->rxnet, + jiffies + rxrpc_connection_expiry); } /* * Queue a connection's work processor, getting a ref to pass to the work * queue. */ -bool rxrpc_queue_conn(struct rxrpc_connection *conn) +void rxrpc_queue_conn(struct rxrpc_connection *conn, enum rxrpc_conn_trace why) { - const void *here = __builtin_return_address(0); - int r; - - if (!__refcount_inc_not_zero(&conn->ref, &r)) - return false; - if (rxrpc_queue_work(&conn->processor)) - trace_rxrpc_conn(conn->debug_id, rxrpc_conn_queued, r + 1, here); - else - rxrpc_put_connection(conn); - return true; + if (atomic_read(&conn->active) >= 0 && + rxrpc_queue_work(&conn->processor)) + rxrpc_see_connection(conn, why); } /* * Note the re-emergence of a connection. */ -void rxrpc_see_connection(struct rxrpc_connection *conn) +void rxrpc_see_connection(struct rxrpc_connection *conn, + enum rxrpc_conn_trace why) { - const void *here = __builtin_return_address(0); if (conn) { - int n = refcount_read(&conn->ref); + int r = refcount_read(&conn->ref); - trace_rxrpc_conn(conn->debug_id, rxrpc_conn_seen, n, here); + trace_rxrpc_conn(conn->debug_id, r, why); } } /* * Get a ref on a connection. */ -struct rxrpc_connection *rxrpc_get_connection(struct rxrpc_connection *conn) +struct rxrpc_connection *rxrpc_get_connection(struct rxrpc_connection *conn, + enum rxrpc_conn_trace why) { - const void *here = __builtin_return_address(0); int r; __refcount_inc(&conn->ref, &r); - trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, r, here); + trace_rxrpc_conn(conn->debug_id, r + 1, why); return conn; } @@ -304,14 +241,14 @@ struct rxrpc_connection *rxrpc_get_connection(struct rxrpc_connection *conn) * Try to get a ref on a connection. */ struct rxrpc_connection * -rxrpc_get_connection_maybe(struct rxrpc_connection *conn) +rxrpc_get_connection_maybe(struct rxrpc_connection *conn, + enum rxrpc_conn_trace why) { - const void *here = __builtin_return_address(0); int r; if (conn) { if (__refcount_inc_not_zero(&conn->ref, &r)) - trace_rxrpc_conn(conn->debug_id, rxrpc_conn_got, r + 1, here); + trace_rxrpc_conn(conn->debug_id, r + 1, why); else conn = NULL; } @@ -329,49 +266,95 @@ static void rxrpc_set_service_reap_timer(struct rxrpc_net *rxnet, } /* - * Release a service connection + * destroy a virtual connection */ -void rxrpc_put_service_conn(struct rxrpc_connection *conn) +static void rxrpc_rcu_free_connection(struct rcu_head *rcu) { - const void *here = __builtin_return_address(0); - unsigned int debug_id = conn->debug_id; - int r; + struct rxrpc_connection *conn = + container_of(rcu, struct rxrpc_connection, rcu); + struct rxrpc_net *rxnet = conn->rxnet; - __refcount_dec(&conn->ref, &r); - trace_rxrpc_conn(debug_id, rxrpc_conn_put_service, r - 1, here); - if (r - 1 == 1) - rxrpc_set_service_reap_timer(conn->params.local->rxnet, - jiffies + rxrpc_connection_expiry); + _enter("{%d,u=%d}", conn->debug_id, refcount_read(&conn->ref)); + + trace_rxrpc_conn(conn->debug_id, refcount_read(&conn->ref), + rxrpc_conn_free); + kfree(conn); + + if (atomic_dec_and_test(&rxnet->nr_conns)) + wake_up_var(&rxnet->nr_conns); } /* - * destroy a virtual connection + * Clean up a dead connection. */ -static void rxrpc_destroy_connection(struct rcu_head *rcu) +static void rxrpc_clean_up_connection(struct work_struct *work) { struct rxrpc_connection *conn = - container_of(rcu, struct rxrpc_connection, rcu); + container_of(work, struct rxrpc_connection, destructor); + struct rxrpc_net *rxnet = conn->rxnet; - _enter("{%d,u=%d}", conn->debug_id, refcount_read(&conn->ref)); + ASSERT(!rcu_access_pointer(conn->channels[0].call) && + !rcu_access_pointer(conn->channels[1].call) && + !rcu_access_pointer(conn->channels[2].call) && + !rcu_access_pointer(conn->channels[3].call)); + ASSERT(list_empty(&conn->cache_link)); - ASSERTCMP(refcount_read(&conn->ref), ==, 0); + del_timer_sync(&conn->timer); + cancel_work_sync(&conn->processor); /* Processing may restart the timer */ + del_timer_sync(&conn->timer); - _net("DESTROY CONN %d", conn->debug_id); + write_lock(&rxnet->conn_lock); + list_del_init(&conn->proc_link); + write_unlock(&rxnet->conn_lock); - del_timer_sync(&conn->timer); rxrpc_purge_queue(&conn->rx_queue); + rxrpc_kill_client_conn(conn); + conn->security->clear(conn); - key_put(conn->params.key); - rxrpc_put_bundle(conn->bundle); - rxrpc_put_peer(conn->params.peer); + key_put(conn->key); + rxrpc_put_bundle(conn->bundle, rxrpc_bundle_put_conn); + rxrpc_put_peer(conn->peer, rxrpc_peer_put_conn); + rxrpc_put_local(conn->local, rxrpc_local_put_kill_conn); + + /* Drain the Rx queue. Note that even though we've unpublished, an + * incoming packet could still be being added to our Rx queue, so we + * will need to drain it again in the RCU cleanup handler. + */ + rxrpc_purge_queue(&conn->rx_queue); - if (atomic_dec_and_test(&conn->params.local->rxnet->nr_conns)) - wake_up_var(&conn->params.local->rxnet->nr_conns); - rxrpc_put_local(conn->params.local); + call_rcu(&conn->rcu, rxrpc_rcu_free_connection); +} - kfree(conn); - _leave(""); +/* + * Drop a ref on a connection. + */ +void rxrpc_put_connection(struct rxrpc_connection *conn, + enum rxrpc_conn_trace why) +{ + unsigned int debug_id; + bool dead; + int r; + + if (!conn) + return; + + debug_id = conn->debug_id; + dead = __refcount_dec_and_test(&conn->ref, &r); + trace_rxrpc_conn(debug_id, r - 1, why); + if (dead) { + del_timer(&conn->timer); + cancel_work(&conn->processor); + + if (in_softirq() || work_busy(&conn->processor) || + timer_pending(&conn->timer)) + /* Can't use the rxrpc workqueue as we need to cancel/flush + * something that may be running/waiting there. + */ + schedule_work(&conn->destructor); + else + rxrpc_clean_up_connection(&conn->destructor); + } } /* @@ -383,6 +366,7 @@ void rxrpc_service_connection_reaper(struct work_struct *work) struct rxrpc_net *rxnet = container_of(work, struct rxrpc_net, service_conn_reaper); unsigned long expire_at, earliest, idle_timestamp, now; + int active; LIST_HEAD(graveyard); @@ -393,20 +377,20 @@ void rxrpc_service_connection_reaper(struct work_struct *work) write_lock(&rxnet->conn_lock); list_for_each_entry_safe(conn, _p, &rxnet->service_conns, link) { - ASSERTCMP(refcount_read(&conn->ref), >, 0); - if (likely(refcount_read(&conn->ref) > 1)) + ASSERTCMP(atomic_read(&conn->active), >=, 0); + if (likely(atomic_read(&conn->active) > 0)) continue; if (conn->state == RXRPC_CONN_SERVICE_PREALLOC) continue; - if (rxnet->live && !conn->params.local->dead) { + if (rxnet->live && !conn->local->dead) { idle_timestamp = READ_ONCE(conn->idle_timestamp); expire_at = idle_timestamp + rxrpc_connection_expiry * HZ; - if (conn->params.local->service_closed) + if (conn->local->service_closed) expire_at = idle_timestamp + rxrpc_closed_conn_expiry * HZ; - _debug("reap CONN %d { u=%d,t=%ld }", - conn->debug_id, refcount_read(&conn->ref), + _debug("reap CONN %d { a=%d,t=%ld }", + conn->debug_id, atomic_read(&conn->active), (long)expire_at - (long)now); if (time_before(now, expire_at)) { @@ -416,12 +400,13 @@ void rxrpc_service_connection_reaper(struct work_struct *work) } } - /* The usage count sits at 1 whilst the object is unused on the - * list; we reduce that to 0 to make the object unavailable. + /* The activity count sits at 0 whilst the conn is unused on + * the list; we reduce that to -1 to make the conn unavailable. */ - if (!refcount_dec_if_one(&conn->ref)) + active = 0; + if (!atomic_try_cmpxchg(&conn->active, &active, -1)) continue; - trace_rxrpc_conn(conn->debug_id, rxrpc_conn_reap_service, 0, NULL); + rxrpc_see_connection(conn, rxrpc_conn_see_reap_service); if (rxrpc_conn_is_client(conn)) BUG(); @@ -443,8 +428,8 @@ void rxrpc_service_connection_reaper(struct work_struct *work) link); list_del_init(&conn->link); - ASSERTCMP(refcount_read(&conn->ref), ==, 0); - rxrpc_kill_connection(conn); + ASSERTCMP(atomic_read(&conn->active), ==, -1); + rxrpc_put_connection(conn, rxrpc_conn_put_service_reaped); } _leave(""); diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c index 6e6aa02c6f9e..2a55a88b2a5b 100644 --- a/net/rxrpc/conn_service.c +++ b/net/rxrpc/conn_service.c @@ -73,7 +73,7 @@ static void rxrpc_publish_service_conn(struct rxrpc_peer *peer, struct rxrpc_conn_proto k = conn->proto; struct rb_node **pp, *parent; - write_seqlock_bh(&peer->service_conn_lock); + write_seqlock(&peer->service_conn_lock); pp = &peer->service_conns.rb_node; parent = NULL; @@ -94,14 +94,14 @@ static void rxrpc_publish_service_conn(struct rxrpc_peer *peer, rb_insert_color(&conn->service_node, &peer->service_conns); conn_published: set_bit(RXRPC_CONN_IN_SERVICE_CONNS, &conn->flags); - write_sequnlock_bh(&peer->service_conn_lock); + write_sequnlock(&peer->service_conn_lock); _leave(" = %d [new]", conn->debug_id); return; found_extant_conn: if (refcount_read(&cursor->ref) == 0) goto replace_old_connection; - write_sequnlock_bh(&peer->service_conn_lock); + write_sequnlock(&peer->service_conn_lock); /* We should not be able to get here. rxrpc_incoming_connection() is * called in a non-reentrant context, so there can't be a race to * insert a new connection. @@ -125,7 +125,7 @@ replace_old_connection: struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxnet, gfp_t gfp) { - struct rxrpc_connection *conn = rxrpc_alloc_connection(gfp); + struct rxrpc_connection *conn = rxrpc_alloc_connection(rxnet, gfp); if (conn) { /* We maintain an extra ref on the connection whilst it is on @@ -133,7 +133,8 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxn */ conn->state = RXRPC_CONN_SERVICE_PREALLOC; refcount_set(&conn->ref, 2); - conn->bundle = rxrpc_get_bundle(&rxrpc_service_dummy_bundle); + conn->bundle = rxrpc_get_bundle(&rxrpc_service_dummy_bundle, + rxrpc_bundle_get_service_conn); atomic_inc(&rxnet->nr_conns); write_lock(&rxnet->conn_lock); @@ -141,9 +142,7 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxn list_add_tail(&conn->proc_link, &rxnet->conn_proc_list); write_unlock(&rxnet->conn_lock); - trace_rxrpc_conn(conn->debug_id, rxrpc_conn_new_service, - refcount_read(&conn->ref), - __builtin_return_address(0)); + rxrpc_see_connection(conn, rxrpc_conn_new_service); } return conn; @@ -164,7 +163,7 @@ void rxrpc_new_incoming_connection(struct rxrpc_sock *rx, conn->proto.epoch = sp->hdr.epoch; conn->proto.cid = sp->hdr.cid & RXRPC_CIDMASK; - conn->params.service_id = sp->hdr.serviceId; + conn->orig_service_id = sp->hdr.serviceId; conn->service_id = sp->hdr.serviceId; conn->security_ix = sp->hdr.securityIndex; conn->out_clientflag = 0; @@ -182,10 +181,10 @@ void rxrpc_new_incoming_connection(struct rxrpc_sock *rx, conn->service_id == rx->service_upgrade.from) conn->service_id = rx->service_upgrade.to; - /* Make the connection a target for incoming packets. */ - rxrpc_publish_service_conn(conn->params.peer, conn); + atomic_set(&conn->active, 1); - _net("CONNECTION new %d {%x}", conn->debug_id, conn->proto.cid); + /* Make the connection a target for incoming packets. */ + rxrpc_publish_service_conn(conn->peer, conn); } /* @@ -194,10 +193,10 @@ void rxrpc_new_incoming_connection(struct rxrpc_sock *rx, */ void rxrpc_unpublish_service_conn(struct rxrpc_connection *conn) { - struct rxrpc_peer *peer = conn->params.peer; + struct rxrpc_peer *peer = conn->peer; - write_seqlock_bh(&peer->service_conn_lock); + write_seqlock(&peer->service_conn_lock); if (test_and_clear_bit(RXRPC_CONN_IN_SERVICE_CONNS, &conn->flags)) rb_erase(&conn->service_node, &peer->service_conns); - write_sequnlock_bh(&peer->service_conn_lock); + write_sequnlock(&peer->service_conn_lock); } diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index bdf70b81addc..d0e20e946e48 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later -/* RxRPC packet reception +/* Processing of received RxRPC packets * - * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2020 Red Hat, Inc. All Rights Reserved. * Written by David Howells ([email protected]) */ @@ -12,10 +12,8 @@ static void rxrpc_proto_abort(const char *why, struct rxrpc_call *call, rxrpc_seq_t seq) { - if (rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, -EBADMSG)) { - set_bit(RXRPC_CALL_EV_ABORT, &call->events); - rxrpc_queue_call(call); - } + if (rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, -EBADMSG)) + rxrpc_send_abort_packet(call); } /* @@ -58,25 +56,6 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, summary->cumulative_acks = cumulative_acks; summary->dup_acks = call->cong_dup_acks; - /* If we haven't transmitted anything for >1RTT, we should reset the - * congestion management state. - */ - if ((call->cong_mode == RXRPC_CALL_SLOW_START || - call->cong_mode == RXRPC_CALL_CONGEST_AVOIDANCE) && - ktime_before(ktime_add_us(call->tx_last_sent, - call->peer->srtt_us >> 3), - ktime_get_real()) - ) { - change = rxrpc_cong_idle_reset; - summary->mode = RXRPC_CALL_SLOW_START; - if (RXRPC_TX_SMSS > 2190) - summary->cwnd = 2; - else if (RXRPC_TX_SMSS > 1095) - summary->cwnd = 3; - else - summary->cwnd = 4; - } - switch (call->cong_mode) { case RXRPC_CALL_SLOW_START: if (summary->saw_nacks) @@ -174,8 +153,8 @@ out_no_clear_ca: call->cong_cwnd = cwnd; call->cong_cumul_acks = cumulative_acks; trace_rxrpc_congest(call, summary, acked_serial, change); - if (resend && !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) - rxrpc_queue_call(call); + if (resend) + rxrpc_resend(call, skb); return; packet_loss_detected: @@ -197,6 +176,33 @@ send_extra_data: } /* + * Degrade the congestion window if we haven't transmitted a packet for >1RTT. + */ +void rxrpc_congestion_degrade(struct rxrpc_call *call) +{ + ktime_t rtt, now; + + if (call->cong_mode != RXRPC_CALL_SLOW_START && + call->cong_mode != RXRPC_CALL_CONGEST_AVOIDANCE) + return; + if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) + return; + + rtt = ns_to_ktime(call->peer->srtt_us * (1000 / 8)); + now = ktime_get_real(); + if (!ktime_before(ktime_add(call->tx_last_sent, rtt), now)) + return; + + trace_rxrpc_reset_cwnd(call, now); + rxrpc_inc_stat(call->rxnet, stat_tx_data_cwnd_reset); + call->tx_last_sent = now; + call->cong_mode = RXRPC_CALL_SLOW_START; + call->cong_ssthresh = max_t(unsigned int, call->cong_ssthresh, + call->cong_cwnd * 3 / 4); + call->cong_cwnd = max_t(unsigned int, call->cong_cwnd / 2, RXRPC_MIN_CWND); +} + +/* * Apply a hard ACK by advancing the Tx window. */ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, @@ -338,7 +344,8 @@ static void rxrpc_input_queue_data(struct rxrpc_call *call, struct sk_buff *skb, /* * Process a DATA packet. */ -static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb) +static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb, + bool *_notify) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct sk_buff *oos; @@ -361,7 +368,7 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb) if (test_and_set_bit(RXRPC_CALL_RX_LAST, &call->flags) && seq + 1 != wtop) { rxrpc_proto_abort("LSN", call, seq); - goto err_free; + return; } } else { if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) && @@ -369,7 +376,7 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb) pr_warn("Packet beyond last: c=%x q=%x window=%x-%x wlimit=%x\n", call->debug_id, seq, window, wtop, wlimit); rxrpc_proto_abort("LSA", call, seq); - goto err_free; + return; } } @@ -397,14 +404,18 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb) /* Send an immediate ACK if we fill in a hole */ else if (!skb_queue_empty(&call->rx_oos_queue)) ack_reason = RXRPC_ACK_DELAY; + else + atomic_inc_return(&call->ackr_nr_unacked); window++; if (after(window, wtop)) wtop = window; + rxrpc_get_skb(skb, rxrpc_skb_get_to_recvmsg); + spin_lock(&call->recvmsg_queue.lock); rxrpc_input_queue_data(call, skb, window, wtop, rxrpc_receive_queue); - skb = NULL; + *_notify = true; while ((oos = skb_peek(&call->rx_oos_queue))) { struct rxrpc_skb_priv *osp = rxrpc_skb(oos); @@ -456,36 +467,26 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb) struct rxrpc_skb_priv *osp = rxrpc_skb(oos); if (after(osp->hdr.seq, seq)) { + rxrpc_get_skb(skb, rxrpc_skb_get_to_recvmsg_oos); __skb_queue_before(&call->rx_oos_queue, oos, skb); goto oos_queued; } } + rxrpc_get_skb(skb, rxrpc_skb_get_to_recvmsg_oos); __skb_queue_tail(&call->rx_oos_queue, skb); oos_queued: trace_rxrpc_receive(call, last ? rxrpc_receive_oos_last : rxrpc_receive_oos, sp->hdr.serial, sp->hdr.seq); - skb = NULL; } send_ack: - if (ack_reason < 0 && - atomic_inc_return(&call->ackr_nr_unacked) > 2 && - test_and_set_bit(RXRPC_CALL_IDLE_ACK_PENDING, &call->flags)) { - ack_reason = RXRPC_ACK_IDLE; - } else if (ack_reason >= 0) { - set_bit(RXRPC_CALL_IDLE_ACK_PENDING, &call->flags); - } - if (ack_reason >= 0) rxrpc_send_ACK(call, ack_reason, serial, rxrpc_propose_ack_input_data); else rxrpc_propose_delay_ACK(call, serial, rxrpc_propose_ack_input_data); - -err_free: - rxrpc_free_skb(skb, rxrpc_skb_freed); } /* @@ -498,6 +499,7 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb struct sk_buff *jskb; unsigned int offset = sizeof(struct rxrpc_wire_header); unsigned int len = skb->len - offset; + bool notify = false; while (sp->hdr.flags & RXRPC_JUMBO_PACKET) { if (len < RXRPC_JUMBO_SUBPKTLEN) @@ -508,16 +510,17 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb &jhdr, sizeof(jhdr)) < 0) goto protocol_error; - jskb = skb_clone(skb, GFP_ATOMIC); + jskb = skb_clone(skb, GFP_NOFS); if (!jskb) { kdebug("couldn't clone"); return false; } - rxrpc_new_skb(jskb, rxrpc_skb_cloned_jumbo); + rxrpc_new_skb(jskb, rxrpc_skb_new_jumbo_subpacket); jsp = rxrpc_skb(jskb); jsp->offset = offset; jsp->len = RXRPC_JUMBO_DATALEN; - rxrpc_input_data_one(call, jskb); + rxrpc_input_data_one(call, jskb, ¬ify); + rxrpc_free_skb(jskb, rxrpc_skb_put_jumbo_subpacket); sp->hdr.flags = jhdr.flags; sp->hdr._rsvd = ntohs(jhdr._rsvd); @@ -529,7 +532,11 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb sp->offset = offset; sp->len = len; - rxrpc_input_data_one(call, skb); + rxrpc_input_data_one(call, skb, ¬ify); + if (notify) { + trace_rxrpc_notify_socket(call->debug_id, sp->hdr.serial); + rxrpc_notify_socket(call); + } return true; protocol_error: @@ -551,32 +558,9 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) atomic64_read(&call->ackr_window), call->rx_highest_seq, skb->len, seq0); - _proto("Rx DATA %%%u { #%u f=%02x }", - sp->hdr.serial, seq0, sp->hdr.flags); - state = READ_ONCE(call->state); - if (state >= RXRPC_CALL_COMPLETE) { - rxrpc_free_skb(skb, rxrpc_skb_freed); + if (state >= RXRPC_CALL_COMPLETE) return; - } - - /* Unshare the packet so that it can be modified for in-place - * decryption. - */ - if (sp->hdr.securityIndex != 0) { - struct sk_buff *nskb = skb_unshare(skb, GFP_ATOMIC); - if (!nskb) { - rxrpc_eaten_skb(skb, rxrpc_skb_unshared_nomem); - return; - } - - if (nskb != skb) { - rxrpc_eaten_skb(skb, rxrpc_skb_received); - skb = nskb; - rxrpc_new_skb(skb, rxrpc_skb_unshared); - sp = rxrpc_skb(skb); - } - } if (state == RXRPC_CALL_SERVER_RECV_REQUEST) { unsigned long timo = READ_ONCE(call->next_req_timo); @@ -591,28 +575,23 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) } } - spin_lock(&call->input_lock); - /* Received data implicitly ACKs all of the request packets we sent * when we're acting as a client. */ if ((state == RXRPC_CALL_CLIENT_SEND_REQUEST || state == RXRPC_CALL_CLIENT_AWAIT_REPLY) && !rxrpc_receiving_reply(call)) - goto out; + goto out_notify; if (!rxrpc_input_split_jumbo(call, skb)) { rxrpc_proto_abort("VLD", call, sp->hdr.seq); - goto out; + goto out_notify; } skb = NULL; -out: +out_notify: trace_rxrpc_notify_socket(call->debug_id, serial); rxrpc_notify_socket(call); - - spin_unlock(&call->input_lock); - rxrpc_free_skb(skb, rxrpc_skb_freed); _leave(" [queued]"); } @@ -671,32 +650,6 @@ static void rxrpc_complete_rtt_probe(struct rxrpc_call *call, } /* - * Process the response to a ping that we sent to find out if we lost an ACK. - * - * If we got back a ping response that indicates a lower tx_top than what we - * had at the time of the ping transmission, we adjudge all the DATA packets - * sent between the response tx_top and the ping-time tx_top to have been lost. - */ -static void rxrpc_input_check_for_lost_ack(struct rxrpc_call *call) -{ - if (after(call->acks_lost_top, call->acks_prev_seq) && - !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) - rxrpc_queue_call(call); -} - -/* - * Process a ping response. - */ -static void rxrpc_input_ping_response(struct rxrpc_call *call, - ktime_t resp_time, - rxrpc_serial_t acked_serial, - rxrpc_serial_t ack_serial) -{ - if (acked_serial == call->acks_lost_ping) - rxrpc_input_check_for_lost_ack(call); -} - -/* * Process the extra information that may be appended to an ACK packet */ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, @@ -708,11 +661,6 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, bool wake = false; u32 rwind = ntohl(ackinfo->rwind); - _proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }", - sp->hdr.serial, - ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU), - rwind, ntohl(ackinfo->jumbo_max)); - if (rwind > RXRPC_TX_MAX_WINDOW) rwind = RXRPC_TX_MAX_WINDOW; if (call->tx_winsize != rwind) { @@ -729,11 +677,10 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, peer = call->peer; if (mtu < peer->maxdata) { - spin_lock_bh(&peer->lock); + spin_lock(&peer->lock); peer->maxdata = mtu; peer->mtu = mtu + peer->hdrsize; - spin_unlock_bh(&peer->lock); - _net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata); + spin_unlock(&peer->lock); } if (wake) @@ -810,7 +757,6 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) struct rxrpc_ackpacket ack; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_ackinfo info; - struct sk_buff *skb_old = NULL, *skb_put = skb; rxrpc_serial_t ack_serial, acked_serial; rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt; int nr_acks, offset, ioffset; @@ -818,10 +764,8 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) _enter(""); offset = sizeof(struct rxrpc_wire_header); - if (skb_copy_bits(skb, offset, &ack, sizeof(ack)) < 0) { - rxrpc_proto_abort("XAK", call, 0); - goto out_not_locked; - } + if (skb_copy_bits(skb, offset, &ack, sizeof(ack)) < 0) + return rxrpc_proto_abort("XAK", call, 0); offset += sizeof(ack); ack_serial = sp->hdr.serial; @@ -855,7 +799,6 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) } if (ack.reason == RXRPC_ACK_PING) { - _proto("Rx ACK %%%u PING Request", ack_serial); rxrpc_send_ACK(call, RXRPC_ACK_PING_RESPONSE, ack_serial, rxrpc_propose_ack_respond_to_ping); } else if (sp->hdr.flags & RXRPC_REQUEST_ACK) { @@ -895,41 +838,25 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) trace_rxrpc_rx_discard_ack(call->debug_id, ack_serial, first_soft_ack, call->acks_first_seq, prev_pkt, call->acks_prev_seq); - goto out_not_locked; + return; } info.rxMTU = 0; ioffset = offset + nr_acks + 3; if (skb->len >= ioffset + sizeof(info) && - skb_copy_bits(skb, ioffset, &info, sizeof(info)) < 0) { - rxrpc_proto_abort("XAI", call, 0); - goto out_not_locked; - } + skb_copy_bits(skb, ioffset, &info, sizeof(info)) < 0) + return rxrpc_proto_abort("XAI", call, 0); if (nr_acks > 0) skb_condense(skb); - spin_lock(&call->input_lock); - - /* Discard any out-of-order or duplicate ACKs (inside lock). */ - if (!rxrpc_is_ack_valid(call, first_soft_ack, prev_pkt)) { - trace_rxrpc_rx_discard_ack(call->debug_id, ack_serial, - first_soft_ack, call->acks_first_seq, - prev_pkt, call->acks_prev_seq); - goto out; - } call->acks_latest_ts = skb->tstamp; - call->acks_first_seq = first_soft_ack; call->acks_prev_seq = prev_pkt; switch (ack.reason) { case RXRPC_ACK_PING: break; - case RXRPC_ACK_PING_RESPONSE: - rxrpc_input_ping_response(call, skb->tstamp, acked_serial, - ack_serial); - fallthrough; default: if (after(acked_serial, call->acks_highest_serial)) call->acks_highest_serial = acked_serial; @@ -940,10 +867,8 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) if (info.rxMTU) rxrpc_input_ackinfo(call, skb, &info); - if (first_soft_ack == 0) { - rxrpc_proto_abort("AK0", call, 0); - goto out; - } + if (first_soft_ack == 0) + return rxrpc_proto_abort("AK0", call, 0); /* Ignore ACKs unless we are or have just been transmitting. */ switch (READ_ONCE(call->state)) { @@ -953,45 +878,27 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) case RXRPC_CALL_SERVER_AWAIT_ACK: break; default: - goto out; + return; } if (before(hard_ack, call->acks_hard_ack) || - after(hard_ack, call->tx_top)) { - rxrpc_proto_abort("AKW", call, 0); - goto out; - } - if (nr_acks > call->tx_top - hard_ack) { - rxrpc_proto_abort("AKN", call, 0); - goto out; - } + after(hard_ack, call->tx_top)) + return rxrpc_proto_abort("AKW", call, 0); + if (nr_acks > call->tx_top - hard_ack) + return rxrpc_proto_abort("AKN", call, 0); if (after(hard_ack, call->acks_hard_ack)) { if (rxrpc_rotate_tx_window(call, hard_ack, &summary)) { rxrpc_end_tx_phase(call, false, "ETA"); - goto out; + return; } } if (nr_acks > 0) { - if (offset > (int)skb->len - nr_acks) { - rxrpc_proto_abort("XSA", call, 0); - goto out; - } - - spin_lock(&call->acks_ack_lock); - skb_old = call->acks_soft_tbl; - call->acks_soft_tbl = skb; - spin_unlock(&call->acks_ack_lock); - + if (offset > (int)skb->len - nr_acks) + return rxrpc_proto_abort("XSA", call, 0); rxrpc_input_soft_acks(call, skb->data + offset, first_soft_ack, nr_acks, &summary); - skb_put = NULL; - } else if (call->acks_soft_tbl) { - spin_lock(&call->acks_ack_lock); - skb_old = call->acks_soft_tbl; - call->acks_soft_tbl = NULL; - spin_unlock(&call->acks_ack_lock); } if (test_bit(RXRPC_CALL_TX_LAST, &call->flags) && @@ -1001,11 +908,6 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_propose_ack_ping_for_lost_reply); rxrpc_congestion_management(call, skb, &summary, acked_serial); -out: - spin_unlock(&call->input_lock); -out_not_locked: - rxrpc_free_skb(skb_put, rxrpc_skb_freed); - rxrpc_free_skb(skb_old, rxrpc_skb_freed); } /* @@ -1014,16 +916,9 @@ out_not_locked: static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_ack_summary summary = { 0 }; - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - - _proto("Rx ACKALL %%%u", sp->hdr.serial); - - spin_lock(&call->input_lock); if (rxrpc_rotate_tx_window(call, call->tx_top, &summary)) rxrpc_end_tx_phase(call, false, "ETL"); - - spin_unlock(&call->input_lock); } /* @@ -1032,35 +927,30 @@ static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb) static void rxrpc_input_abort(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - __be32 wtmp; - u32 abort_code = RX_CALL_DEAD; - - _enter(""); - - if (skb->len >= 4 && - skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), - &wtmp, sizeof(wtmp)) >= 0) - abort_code = ntohl(wtmp); - trace_rxrpc_rx_abort(call, sp->hdr.serial, abort_code); - - _proto("Rx ABORT %%%u { %x }", sp->hdr.serial, abort_code); + trace_rxrpc_rx_abort(call, sp->hdr.serial, skb->priority); rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, - abort_code, -ECONNABORTED); + skb->priority, -ECONNABORTED); } /* * Process an incoming call packet. */ -static void rxrpc_input_call_packet(struct rxrpc_call *call, - struct sk_buff *skb) +void rxrpc_input_call_packet(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); unsigned long timo; _enter("%p,%p", call, skb); + if (sp->hdr.serviceId != call->dest_srx.srx_service) + call->dest_srx.srx_service = sp->hdr.serviceId; + if ((int)sp->hdr.serial - (int)call->rx_serial > 0) + call->rx_serial = sp->hdr.serial; + if (!test_bit(RXRPC_CALL_RX_HEARD, &call->flags)) + set_bit(RXRPC_CALL_RX_HEARD, &call->flags); + timo = READ_ONCE(call->next_rx_timo); if (timo) { unsigned long now = jiffies, expect_rx_by; @@ -1074,15 +964,13 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call, switch (sp->hdr.type) { case RXRPC_PACKET_TYPE_DATA: rxrpc_input_data(call, skb); - goto no_free; + break; case RXRPC_PACKET_TYPE_ACK: rxrpc_input_ack(call, skb); - goto no_free; + break; case RXRPC_PACKET_TYPE_BUSY: - _proto("Rx BUSY %%%u", sp->hdr.serial); - /* Just ignore BUSY packets from the server; the retry and * lifespan timers will take care of business. BUSY packets * from the client don't make sense. @@ -1100,10 +988,6 @@ static void rxrpc_input_call_packet(struct rxrpc_call *call, default: break; } - - rxrpc_free_skb(skb, rxrpc_skb_freed); -no_free: - _leave(""); } /* @@ -1112,10 +996,10 @@ no_free: * * TODO: If callNumber > call_id + 1, renegotiate security. */ -static void rxrpc_input_implicit_end_call(struct rxrpc_sock *rx, - struct rxrpc_connection *conn, - struct rxrpc_call *call) +void rxrpc_implicit_end_call(struct rxrpc_call *call, struct sk_buff *skb) { + struct rxrpc_connection *conn = call->conn; + switch (READ_ONCE(call->state)) { case RXRPC_CALL_SERVER_AWAIT_ACK: rxrpc_call_completed(call); @@ -1123,360 +1007,15 @@ static void rxrpc_input_implicit_end_call(struct rxrpc_sock *rx, case RXRPC_CALL_COMPLETE: break; default: - if (rxrpc_abort_call("IMP", call, 0, RX_CALL_DEAD, -ESHUTDOWN)) { - set_bit(RXRPC_CALL_EV_ABORT, &call->events); - rxrpc_queue_call(call); - } + if (rxrpc_abort_call("IMP", call, 0, RX_CALL_DEAD, -ESHUTDOWN)) + rxrpc_send_abort_packet(call); trace_rxrpc_improper_term(call); break; } - spin_lock(&rx->incoming_lock); - __rxrpc_disconnect_call(conn, call); - spin_unlock(&rx->incoming_lock); -} - -/* - * post connection-level events to the connection - * - this includes challenges, responses, some aborts and call terminal packet - * retransmission. - */ -static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn, - struct sk_buff *skb) -{ - _enter("%p,%p", conn, skb); - - skb_queue_tail(&conn->rx_queue, skb); - rxrpc_queue_conn(conn); -} - -/* - * post endpoint-level events to the local endpoint - * - this includes debug and version messages - */ -static void rxrpc_post_packet_to_local(struct rxrpc_local *local, - struct sk_buff *skb) -{ - _enter("%p,%p", local, skb); - - if (rxrpc_get_local_maybe(local)) { - skb_queue_tail(&local->event_queue, skb); - rxrpc_queue_local(local); - } else { - rxrpc_free_skb(skb, rxrpc_skb_freed); - } -} - -/* - * put a packet up for transport-level abort - */ -static void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb) -{ - if (rxrpc_get_local_maybe(local)) { - skb_queue_tail(&local->reject_queue, skb); - rxrpc_queue_local(local); - } else { - rxrpc_free_skb(skb, rxrpc_skb_freed); - } -} - -/* - * Extract the wire header from a packet and translate the byte order. - */ -static noinline -int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) -{ - struct rxrpc_wire_header whdr; - - /* dig out the RxRPC connection details */ - if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0) { - trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, - tracepoint_string("bad_hdr")); - return -EBADMSG; - } - - memset(sp, 0, sizeof(*sp)); - sp->hdr.epoch = ntohl(whdr.epoch); - sp->hdr.cid = ntohl(whdr.cid); - sp->hdr.callNumber = ntohl(whdr.callNumber); - sp->hdr.seq = ntohl(whdr.seq); - sp->hdr.serial = ntohl(whdr.serial); - sp->hdr.flags = whdr.flags; - sp->hdr.type = whdr.type; - sp->hdr.userStatus = whdr.userStatus; - sp->hdr.securityIndex = whdr.securityIndex; - sp->hdr._rsvd = ntohs(whdr._rsvd); - sp->hdr.serviceId = ntohs(whdr.serviceId); - return 0; -} - -/* - * handle data received on the local endpoint - * - may be called in interrupt context - * - * [!] Note that as this is called from the encap_rcv hook, the socket is not - * held locked by the caller and nothing prevents sk_user_data on the UDP from - * being cleared in the middle of processing this function. - * - * Called with the RCU read lock held from the IP layer via UDP. - */ -int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb) -{ - struct rxrpc_local *local = rcu_dereference_sk_user_data(udp_sk); - struct rxrpc_connection *conn; - struct rxrpc_channel *chan; - struct rxrpc_call *call = NULL; - struct rxrpc_skb_priv *sp; - struct rxrpc_peer *peer = NULL; - struct rxrpc_sock *rx = NULL; - unsigned int channel; - - _enter("%p", udp_sk); - - if (unlikely(!local)) { - kfree_skb(skb); - return 0; - } - if (skb->tstamp == 0) - skb->tstamp = ktime_get_real(); - - rxrpc_new_skb(skb, rxrpc_skb_received); - - skb_pull(skb, sizeof(struct udphdr)); - - /* The UDP protocol already released all skb resources; - * we are free to add our own data there. - */ - sp = rxrpc_skb(skb); - - /* dig out the RxRPC connection details */ - if (rxrpc_extract_header(sp, skb) < 0) - goto bad_message; - - if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { - static int lose; - if ((lose++ & 7) == 7) { - trace_rxrpc_rx_lose(sp); - rxrpc_free_skb(skb, rxrpc_skb_lost); - return 0; - } - } - - if (skb->tstamp == 0) - skb->tstamp = ktime_get_real(); - trace_rxrpc_rx_packet(sp); - - switch (sp->hdr.type) { - case RXRPC_PACKET_TYPE_VERSION: - if (rxrpc_to_client(sp)) - goto discard; - rxrpc_post_packet_to_local(local, skb); - goto out; - - case RXRPC_PACKET_TYPE_BUSY: - if (rxrpc_to_server(sp)) - goto discard; - fallthrough; - case RXRPC_PACKET_TYPE_ACK: - case RXRPC_PACKET_TYPE_ACKALL: - if (sp->hdr.callNumber == 0) - goto bad_message; - fallthrough; - case RXRPC_PACKET_TYPE_ABORT: - break; - - case RXRPC_PACKET_TYPE_DATA: - if (sp->hdr.callNumber == 0 || - sp->hdr.seq == 0) - goto bad_message; - - /* Unshare the packet so that it can be modified for in-place - * decryption. - */ - if (sp->hdr.securityIndex != 0) { - struct sk_buff *nskb = skb_unshare(skb, GFP_ATOMIC); - if (!nskb) { - rxrpc_eaten_skb(skb, rxrpc_skb_unshared_nomem); - goto out; - } - - if (nskb != skb) { - rxrpc_eaten_skb(skb, rxrpc_skb_received); - skb = nskb; - rxrpc_new_skb(skb, rxrpc_skb_unshared); - sp = rxrpc_skb(skb); - } - } - break; - - case RXRPC_PACKET_TYPE_CHALLENGE: - if (rxrpc_to_server(sp)) - goto discard; - break; - case RXRPC_PACKET_TYPE_RESPONSE: - if (rxrpc_to_client(sp)) - goto discard; - break; - - /* Packet types 9-11 should just be ignored. */ - case RXRPC_PACKET_TYPE_PARAMS: - case RXRPC_PACKET_TYPE_10: - case RXRPC_PACKET_TYPE_11: - goto discard; - - default: - _proto("Rx Bad Packet Type %u", sp->hdr.type); - goto bad_message; - } - - if (sp->hdr.serviceId == 0) - goto bad_message; - - if (rxrpc_to_server(sp)) { - /* Weed out packets to services we're not offering. Packets - * that would begin a call are explicitly rejected and the rest - * are just discarded. - */ - rx = rcu_dereference(local->service); - if (!rx || (sp->hdr.serviceId != rx->srx.srx_service && - sp->hdr.serviceId != rx->second_service)) { - if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && - sp->hdr.seq == 1) - goto unsupported_service; - goto discard; - } - } - - conn = rxrpc_find_connection_rcu(local, skb, &peer); - if (conn) { - if (sp->hdr.securityIndex != conn->security_ix) - goto wrong_security; - - if (sp->hdr.serviceId != conn->service_id) { - int old_id; - - if (!test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags)) - goto reupgrade; - old_id = cmpxchg(&conn->service_id, conn->params.service_id, - sp->hdr.serviceId); - - if (old_id != conn->params.service_id && - old_id != sp->hdr.serviceId) - goto reupgrade; - } - - if (sp->hdr.callNumber == 0) { - /* Connection-level packet */ - _debug("CONN %p {%d}", conn, conn->debug_id); - rxrpc_post_packet_to_conn(conn, skb); - goto out; - } - - if ((int)sp->hdr.serial - (int)conn->hi_serial > 0) - conn->hi_serial = sp->hdr.serial; - - /* Call-bound packets are routed by connection channel. */ - channel = sp->hdr.cid & RXRPC_CHANNELMASK; - chan = &conn->channels[channel]; - - /* Ignore really old calls */ - if (sp->hdr.callNumber < chan->last_call) - goto discard; - - if (sp->hdr.callNumber == chan->last_call) { - if (chan->call || - sp->hdr.type == RXRPC_PACKET_TYPE_ABORT) - goto discard; - - /* For the previous service call, if completed - * successfully, we discard all further packets. - */ - if (rxrpc_conn_is_service(conn) && - chan->last_type == RXRPC_PACKET_TYPE_ACK) - goto discard; - - /* But otherwise we need to retransmit the final packet - * from data cached in the connection record. - */ - if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA) - trace_rxrpc_rx_data(chan->call_debug_id, - sp->hdr.seq, - sp->hdr.serial, - sp->hdr.flags); - rxrpc_post_packet_to_conn(conn, skb); - goto out; - } - - call = rcu_dereference(chan->call); - - if (sp->hdr.callNumber > chan->call_id) { - if (rxrpc_to_client(sp)) - goto reject_packet; - if (call) - rxrpc_input_implicit_end_call(rx, conn, call); - call = NULL; - } - - if (call) { - if (sp->hdr.serviceId != call->service_id) - call->service_id = sp->hdr.serviceId; - if ((int)sp->hdr.serial - (int)call->rx_serial > 0) - call->rx_serial = sp->hdr.serial; - if (!test_bit(RXRPC_CALL_RX_HEARD, &call->flags)) - set_bit(RXRPC_CALL_RX_HEARD, &call->flags); - } - } - - if (!call || refcount_read(&call->ref) == 0) { - if (rxrpc_to_client(sp) || - sp->hdr.type != RXRPC_PACKET_TYPE_DATA) - goto bad_message; - if (sp->hdr.seq != 1) - goto discard; - call = rxrpc_new_incoming_call(local, rx, skb); - if (!call) - goto reject_packet; - } - - /* Process a call packet; this either discards or passes on the ref - * elsewhere. - */ - rxrpc_input_call_packet(call, skb); - goto out; + rxrpc_input_call_event(call, skb); -discard: - rxrpc_free_skb(skb, rxrpc_skb_freed); -out: - trace_rxrpc_rx_done(0, 0); - return 0; - -wrong_security: - trace_rxrpc_abort(0, "SEC", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, - RXKADINCONSISTENCY, EBADMSG); - skb->priority = RXKADINCONSISTENCY; - goto post_abort; - -unsupported_service: - trace_rxrpc_abort(0, "INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, - RX_INVALID_OPERATION, EOPNOTSUPP); - skb->priority = RX_INVALID_OPERATION; - goto post_abort; - -reupgrade: - trace_rxrpc_abort(0, "UPG", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, - RX_PROTOCOL_ERROR, EBADMSG); - goto protocol_error; - -bad_message: - trace_rxrpc_abort(0, "BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, - RX_PROTOCOL_ERROR, EBADMSG); -protocol_error: - skb->priority = RX_PROTOCOL_ERROR; -post_abort: - skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; -reject_packet: - trace_rxrpc_rx_done(skb->mark, skb->priority); - rxrpc_reject_packet(local, skb); - _leave(" [badmsg]"); - return 0; + spin_lock(&conn->bundle->channel_lock); + __rxrpc_disconnect_call(conn, call); + spin_unlock(&conn->bundle->channel_lock); } diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c new file mode 100644 index 000000000000..d83ae3193032 --- /dev/null +++ b/net/rxrpc/io_thread.c @@ -0,0 +1,496 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* RxRPC packet reception + * + * Copyright (C) 2007, 2016, 2022 Red Hat, Inc. All Rights Reserved. + * Written by David Howells ([email protected]) + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include "ar-internal.h" + +static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn, + struct sockaddr_rxrpc *peer_srx, + struct sk_buff *skb); + +/* + * handle data received on the local endpoint + * - may be called in interrupt context + * + * [!] Note that as this is called from the encap_rcv hook, the socket is not + * held locked by the caller and nothing prevents sk_user_data on the UDP from + * being cleared in the middle of processing this function. + * + * Called with the RCU read lock held from the IP layer via UDP. + */ +int rxrpc_encap_rcv(struct sock *udp_sk, struct sk_buff *skb) +{ + struct rxrpc_local *local = rcu_dereference_sk_user_data(udp_sk); + + if (unlikely(!local)) { + kfree_skb(skb); + return 0; + } + if (skb->tstamp == 0) + skb->tstamp = ktime_get_real(); + + skb->mark = RXRPC_SKB_MARK_PACKET; + rxrpc_new_skb(skb, rxrpc_skb_new_encap_rcv); + skb_queue_tail(&local->rx_queue, skb); + rxrpc_wake_up_io_thread(local); + return 0; +} + +/* + * Handle an error received on the local endpoint. + */ +void rxrpc_error_report(struct sock *sk) +{ + struct rxrpc_local *local; + struct sk_buff *skb; + + rcu_read_lock(); + local = rcu_dereference_sk_user_data(sk); + if (unlikely(!local)) { + rcu_read_unlock(); + return; + } + + while ((skb = skb_dequeue(&sk->sk_error_queue))) { + skb->mark = RXRPC_SKB_MARK_ERROR; + rxrpc_new_skb(skb, rxrpc_skb_new_error_report); + skb_queue_tail(&local->rx_queue, skb); + } + + rxrpc_wake_up_io_thread(local); + rcu_read_unlock(); +} + +/* + * Process event packets targeted at a local endpoint. + */ +static void rxrpc_input_version(struct rxrpc_local *local, struct sk_buff *skb) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + char v; + + _enter(""); + + rxrpc_see_skb(skb, rxrpc_skb_see_version); + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), &v, 1) >= 0) { + if (v == 0) + rxrpc_send_version_request(local, &sp->hdr, skb); + } +} + +/* + * Extract the wire header from a packet and translate the byte order. + */ +static noinline +int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) +{ + struct rxrpc_wire_header whdr; + + /* dig out the RxRPC connection details */ + if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0) { + trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, + tracepoint_string("bad_hdr")); + return -EBADMSG; + } + + memset(sp, 0, sizeof(*sp)); + sp->hdr.epoch = ntohl(whdr.epoch); + sp->hdr.cid = ntohl(whdr.cid); + sp->hdr.callNumber = ntohl(whdr.callNumber); + sp->hdr.seq = ntohl(whdr.seq); + sp->hdr.serial = ntohl(whdr.serial); + sp->hdr.flags = whdr.flags; + sp->hdr.type = whdr.type; + sp->hdr.userStatus = whdr.userStatus; + sp->hdr.securityIndex = whdr.securityIndex; + sp->hdr._rsvd = ntohs(whdr._rsvd); + sp->hdr.serviceId = ntohs(whdr.serviceId); + return 0; +} + +/* + * Extract the abort code from an ABORT packet and stash it in skb->priority. + */ +static bool rxrpc_extract_abort(struct sk_buff *skb) +{ + __be32 wtmp; + + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + &wtmp, sizeof(wtmp)) < 0) + return false; + skb->priority = ntohl(wtmp); + return true; +} + +/* + * Process packets received on the local endpoint + */ +static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb) +{ + struct rxrpc_connection *conn; + struct sockaddr_rxrpc peer_srx; + struct rxrpc_skb_priv *sp; + struct rxrpc_peer *peer = NULL; + struct sk_buff *skb = *_skb; + int ret = 0; + + skb_pull(skb, sizeof(struct udphdr)); + + sp = rxrpc_skb(skb); + + /* dig out the RxRPC connection details */ + if (rxrpc_extract_header(sp, skb) < 0) + goto bad_message; + + if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { + static int lose; + if ((lose++ & 7) == 7) { + trace_rxrpc_rx_lose(sp); + return 0; + } + } + + trace_rxrpc_rx_packet(sp); + + switch (sp->hdr.type) { + case RXRPC_PACKET_TYPE_VERSION: + if (rxrpc_to_client(sp)) + return 0; + rxrpc_input_version(local, skb); + return 0; + + case RXRPC_PACKET_TYPE_BUSY: + if (rxrpc_to_server(sp)) + return 0; + fallthrough; + case RXRPC_PACKET_TYPE_ACK: + case RXRPC_PACKET_TYPE_ACKALL: + if (sp->hdr.callNumber == 0) + goto bad_message; + break; + case RXRPC_PACKET_TYPE_ABORT: + if (!rxrpc_extract_abort(skb)) + return 0; /* Just discard if malformed */ + break; + + case RXRPC_PACKET_TYPE_DATA: + if (sp->hdr.callNumber == 0 || + sp->hdr.seq == 0) + goto bad_message; + + /* Unshare the packet so that it can be modified for in-place + * decryption. + */ + if (sp->hdr.securityIndex != 0) { + skb = skb_unshare(skb, GFP_ATOMIC); + if (!skb) { + rxrpc_eaten_skb(*_skb, rxrpc_skb_eaten_by_unshare_nomem); + *_skb = NULL; + return 0; + } + + if (skb != *_skb) { + rxrpc_eaten_skb(*_skb, rxrpc_skb_eaten_by_unshare); + *_skb = skb; + rxrpc_new_skb(skb, rxrpc_skb_new_unshared); + sp = rxrpc_skb(skb); + } + } + break; + + case RXRPC_PACKET_TYPE_CHALLENGE: + if (rxrpc_to_server(sp)) + return 0; + break; + case RXRPC_PACKET_TYPE_RESPONSE: + if (rxrpc_to_client(sp)) + return 0; + break; + + /* Packet types 9-11 should just be ignored. */ + case RXRPC_PACKET_TYPE_PARAMS: + case RXRPC_PACKET_TYPE_10: + case RXRPC_PACKET_TYPE_11: + return 0; + + default: + goto bad_message; + } + + if (sp->hdr.serviceId == 0) + goto bad_message; + + if (WARN_ON_ONCE(rxrpc_extract_addr_from_skb(&peer_srx, skb) < 0)) + return true; /* Unsupported address type - discard. */ + + if (peer_srx.transport.family != local->srx.transport.family && + (peer_srx.transport.family == AF_INET && + local->srx.transport.family != AF_INET6)) { + pr_warn_ratelimited("AF_RXRPC: Protocol mismatch %u not %u\n", + peer_srx.transport.family, + local->srx.transport.family); + return true; /* Wrong address type - discard. */ + } + + if (rxrpc_to_client(sp)) { + rcu_read_lock(); + conn = rxrpc_find_client_connection_rcu(local, &peer_srx, skb); + conn = rxrpc_get_connection_maybe(conn, rxrpc_conn_get_call_input); + rcu_read_unlock(); + if (!conn) { + trace_rxrpc_abort(0, "NCC", sp->hdr.cid, + sp->hdr.callNumber, sp->hdr.seq, + RXKADINCONSISTENCY, EBADMSG); + goto protocol_error; + } + + ret = rxrpc_input_packet_on_conn(conn, &peer_srx, skb); + rxrpc_put_connection(conn, rxrpc_conn_put_call_input); + return ret; + } + + /* We need to look up service connections by the full protocol + * parameter set. We look up the peer first as an intermediate step + * and then the connection from the peer's tree. + */ + rcu_read_lock(); + + peer = rxrpc_lookup_peer_rcu(local, &peer_srx); + if (!peer) { + rcu_read_unlock(); + return rxrpc_new_incoming_call(local, NULL, NULL, &peer_srx, skb); + } + + conn = rxrpc_find_service_conn_rcu(peer, skb); + conn = rxrpc_get_connection_maybe(conn, rxrpc_conn_get_call_input); + if (conn) { + rcu_read_unlock(); + ret = rxrpc_input_packet_on_conn(conn, &peer_srx, skb); + rxrpc_put_connection(conn, rxrpc_conn_put_call_input); + return ret; + } + + peer = rxrpc_get_peer_maybe(peer, rxrpc_peer_get_input); + rcu_read_unlock(); + + ret = rxrpc_new_incoming_call(local, peer, NULL, &peer_srx, skb); + rxrpc_put_peer(peer, rxrpc_peer_put_input); + if (ret < 0) + goto reject_packet; + return 0; + +bad_message: + trace_rxrpc_abort(0, "BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + RX_PROTOCOL_ERROR, EBADMSG); +protocol_error: + skb->priority = RX_PROTOCOL_ERROR; + skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; +reject_packet: + rxrpc_reject_packet(local, skb); + return ret; +} + +/* + * Deal with a packet that's associated with an extant connection. + */ +static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn, + struct sockaddr_rxrpc *peer_srx, + struct sk_buff *skb) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxrpc_channel *chan; + struct rxrpc_call *call = NULL; + unsigned int channel; + + if (sp->hdr.securityIndex != conn->security_ix) + goto wrong_security; + + if (sp->hdr.serviceId != conn->service_id) { + int old_id; + + if (!test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags)) + goto reupgrade; + old_id = cmpxchg(&conn->service_id, conn->orig_service_id, + sp->hdr.serviceId); + + if (old_id != conn->orig_service_id && + old_id != sp->hdr.serviceId) + goto reupgrade; + } + + if (after(sp->hdr.serial, conn->hi_serial)) + conn->hi_serial = sp->hdr.serial; + + /* It's a connection-level packet if the call number is 0. */ + if (sp->hdr.callNumber == 0) + return rxrpc_input_conn_packet(conn, skb); + + /* Call-bound packets are routed by connection channel. */ + channel = sp->hdr.cid & RXRPC_CHANNELMASK; + chan = &conn->channels[channel]; + + /* Ignore really old calls */ + if (sp->hdr.callNumber < chan->last_call) + return 0; + + if (sp->hdr.callNumber == chan->last_call) { + if (chan->call || + sp->hdr.type == RXRPC_PACKET_TYPE_ABORT) + return 0; + + /* For the previous service call, if completed successfully, we + * discard all further packets. + */ + if (rxrpc_conn_is_service(conn) && + chan->last_type == RXRPC_PACKET_TYPE_ACK) + return 0; + + /* But otherwise we need to retransmit the final packet from + * data cached in the connection record. + */ + if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA) + trace_rxrpc_rx_data(chan->call_debug_id, + sp->hdr.seq, + sp->hdr.serial, + sp->hdr.flags); + rxrpc_input_conn_packet(conn, skb); + return 0; + } + + rcu_read_lock(); + call = rxrpc_try_get_call(rcu_dereference(chan->call), + rxrpc_call_get_input); + rcu_read_unlock(); + + if (sp->hdr.callNumber > chan->call_id) { + if (rxrpc_to_client(sp)) { + rxrpc_put_call(call, rxrpc_call_put_input); + goto reject_packet; + } + + if (call) { + rxrpc_implicit_end_call(call, skb); + rxrpc_put_call(call, rxrpc_call_put_input); + call = NULL; + } + } + + if (!call) { + if (rxrpc_to_client(sp)) + goto bad_message; + if (rxrpc_new_incoming_call(conn->local, conn->peer, conn, + peer_srx, skb)) + return 0; + goto reject_packet; + } + + rxrpc_input_call_event(call, skb); + rxrpc_put_call(call, rxrpc_call_put_input); + return 0; + +wrong_security: + trace_rxrpc_abort(0, "SEC", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + RXKADINCONSISTENCY, EBADMSG); + skb->priority = RXKADINCONSISTENCY; + goto post_abort; + +reupgrade: + trace_rxrpc_abort(0, "UPG", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + RX_PROTOCOL_ERROR, EBADMSG); + goto protocol_error; + +bad_message: + trace_rxrpc_abort(0, "BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + RX_PROTOCOL_ERROR, EBADMSG); +protocol_error: + skb->priority = RX_PROTOCOL_ERROR; +post_abort: + skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; +reject_packet: + rxrpc_reject_packet(conn->local, skb); + return 0; +} + +/* + * I/O and event handling thread. + */ +int rxrpc_io_thread(void *data) +{ + struct sk_buff_head rx_queue; + struct rxrpc_local *local = data; + struct rxrpc_call *call; + struct sk_buff *skb; + + skb_queue_head_init(&rx_queue); + + set_user_nice(current, MIN_NICE); + + for (;;) { + rxrpc_inc_stat(local->rxnet, stat_io_loop); + + /* Deal with calls that want immediate attention. */ + if ((call = list_first_entry_or_null(&local->call_attend_q, + struct rxrpc_call, + attend_link))) { + spin_lock_bh(&local->lock); + list_del_init(&call->attend_link); + spin_unlock_bh(&local->lock); + + trace_rxrpc_call_poked(call); + rxrpc_input_call_event(call, NULL); + rxrpc_put_call(call, rxrpc_call_put_poke); + continue; + } + + /* Process received packets and errors. */ + if ((skb = __skb_dequeue(&rx_queue))) { + switch (skb->mark) { + case RXRPC_SKB_MARK_PACKET: + skb->priority = 0; + rxrpc_input_packet(local, &skb); + trace_rxrpc_rx_done(skb->mark, skb->priority); + rxrpc_free_skb(skb, rxrpc_skb_put_input); + break; + case RXRPC_SKB_MARK_ERROR: + rxrpc_input_error(local, skb); + rxrpc_free_skb(skb, rxrpc_skb_put_error_report); + break; + default: + WARN_ON_ONCE(1); + rxrpc_free_skb(skb, rxrpc_skb_put_unknown); + break; + } + continue; + } + + if (!skb_queue_empty(&local->rx_queue)) { + spin_lock_irq(&local->rx_queue.lock); + skb_queue_splice_tail_init(&local->rx_queue, &rx_queue); + spin_unlock_irq(&local->rx_queue.lock); + continue; + } + + set_current_state(TASK_INTERRUPTIBLE); + if (!skb_queue_empty(&local->rx_queue) || + !list_empty(&local->call_attend_q)) { + __set_current_state(TASK_RUNNING); + continue; + } + + if (kthread_should_stop()) + break; + schedule(); + } + + __set_current_state(TASK_RUNNING); + rxrpc_see_local(local, rxrpc_local_stop); + rxrpc_destroy_local(local); + local->io_thread = NULL; + rxrpc_see_local(local, rxrpc_local_stopped); + return 0; +} diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c index 8d2073e0e3da..8d53aded09c4 100644 --- a/net/rxrpc/key.c +++ b/net/rxrpc/key.c @@ -513,7 +513,7 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *conn, if (ret < 0) goto error; - conn->params.key = key; + conn->key = key; _leave(" = 0 [%d]", key_serial(key)); return 0; @@ -602,7 +602,8 @@ static long rxrpc_read(const struct key *key, } _debug("token[%u]: toksize=%u", ntoks, toksize); - ASSERTCMP(toksize, <=, AFSTOKEN_LENGTH_MAX); + if (WARN_ON(toksize > AFSTOKEN_LENGTH_MAX)) + return -EIO; toksizes[ntoks++] = toksize; size += toksize + 4; /* each token has a length word */ @@ -679,8 +680,9 @@ static long rxrpc_read(const struct key *key, return -ENOPKG; } - ASSERTCMP((unsigned long)xdr - (unsigned long)oldxdr, ==, - toksize); + if (WARN_ON((unsigned long)xdr - (unsigned long)oldxdr == + toksize)) + return -EIO; } #undef ENCODE_STR @@ -688,8 +690,10 @@ static long rxrpc_read(const struct key *key, #undef ENCODE64 #undef ENCODE - ASSERTCMP(tok, ==, ntoks); - ASSERTCMP((char __user *) xdr - buffer, ==, size); + if (WARN_ON(tok != ntoks)) + return -EIO; + if (WARN_ON((unsigned long)xdr - (unsigned long)buffer != size)) + return -EIO; _leave(" = %zu", size); return size; } diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c index 19e929c7c38b..5e69ea6b233d 100644 --- a/net/rxrpc/local_event.c +++ b/net/rxrpc/local_event.c @@ -21,9 +21,9 @@ static const char rxrpc_version_string[65] = "linux-" UTS_RELEASE " AF_RXRPC"; /* * Reply to a version request */ -static void rxrpc_send_version_request(struct rxrpc_local *local, - struct rxrpc_host_header *hdr, - struct sk_buff *skb) +void rxrpc_send_version_request(struct rxrpc_local *local, + struct rxrpc_host_header *hdr, + struct sk_buff *skb) { struct rxrpc_wire_header whdr; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); @@ -63,8 +63,6 @@ static void rxrpc_send_version_request(struct rxrpc_local *local, len = iov[0].iov_len + iov[1].iov_len; - _proto("Tx VERSION (reply)"); - ret = kernel_sendmsg(local->socket, &msg, iov, 2, len); if (ret < 0) trace_rxrpc_tx_fail(local->debug_id, 0, ret, @@ -75,41 +73,3 @@ static void rxrpc_send_version_request(struct rxrpc_local *local, _leave(""); } - -/* - * Process event packets targeted at a local endpoint. - */ -void rxrpc_process_local_events(struct rxrpc_local *local) -{ - struct sk_buff *skb; - char v; - - _enter(""); - - skb = skb_dequeue(&local->event_queue); - if (skb) { - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - - rxrpc_see_skb(skb, rxrpc_skb_seen); - _debug("{%d},{%u}", local->debug_id, sp->hdr.type); - - switch (sp->hdr.type) { - case RXRPC_PACKET_TYPE_VERSION: - if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), - &v, 1) < 0) - return; - _proto("Rx VERSION { %02x }", v); - if (v == 0) - rxrpc_send_version_request(local, &sp->hdr, skb); - break; - - default: - /* Just ignore anything we don't understand */ - break; - } - - rxrpc_free_skb(skb, rxrpc_skb_freed); - } - - _leave(""); -} diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index a943fdf91e24..44222923c0d1 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -20,7 +20,6 @@ #include <net/af_rxrpc.h> #include "ar-internal.h" -static void rxrpc_local_processor(struct work_struct *); static void rxrpc_local_rcu(struct rcu_head *); /* @@ -97,12 +96,9 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet, atomic_set(&local->active_users, 1); local->rxnet = rxnet; INIT_HLIST_NODE(&local->link); - INIT_WORK(&local->processor, rxrpc_local_processor); - INIT_LIST_HEAD(&local->ack_tx_queue); - spin_lock_init(&local->ack_tx_lock); init_rwsem(&local->defrag_sem); - skb_queue_head_init(&local->reject_queue); - skb_queue_head_init(&local->event_queue); + skb_queue_head_init(&local->rx_queue); + INIT_LIST_HEAD(&local->call_attend_q); local->client_bundles = RB_ROOT; spin_lock_init(&local->client_bundles_lock); spin_lock_init(&local->lock); @@ -110,7 +106,7 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet, local->debug_id = atomic_inc_return(&rxrpc_debug_id); memcpy(&local->srx, srx, sizeof(*srx)); local->srx.srx_service = 0; - trace_rxrpc_local(local->debug_id, rxrpc_local_new, 1, NULL); + trace_rxrpc_local(local->debug_id, rxrpc_local_new, 1, 1); } _leave(" = %p", local); @@ -126,6 +122,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) struct udp_tunnel_sock_cfg tuncfg = {NULL}; struct sockaddr_rxrpc *srx = &local->srx; struct udp_port_cfg udp_conf = {0}; + struct task_struct *io_thread; struct sock *usk; int ret; @@ -152,7 +149,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) } tuncfg.encap_type = UDP_ENCAP_RXRPC; - tuncfg.encap_rcv = rxrpc_input_packet; + tuncfg.encap_rcv = rxrpc_encap_rcv; tuncfg.encap_err_rcv = rxrpc_encap_err_rcv; tuncfg.sk_user_data = local; setup_udp_tunnel_sock(net, local->socket, &tuncfg); @@ -185,8 +182,23 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) BUG(); } + io_thread = kthread_run(rxrpc_io_thread, local, + "krxrpcio/%u", ntohs(udp_conf.local_udp_port)); + if (IS_ERR(io_thread)) { + ret = PTR_ERR(io_thread); + goto error_sock; + } + + local->io_thread = io_thread; _leave(" = 0"); return 0; + +error_sock: + kernel_sock_shutdown(local->socket, SHUT_RDWR); + local->socket->sk->sk_user_data = NULL; + sock_release(local->socket); + local->socket = NULL; + return ret; } /* @@ -198,7 +210,6 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net, struct rxrpc_local *local; struct rxrpc_net *rxnet = rxrpc_net(net); struct hlist_node *cursor; - const char *age; long diff; int ret; @@ -229,10 +240,9 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net, * we're attempting to use a local address that the dying * object is still using. */ - if (!rxrpc_use_local(local)) + if (!rxrpc_use_local(local, rxrpc_local_use_lookup)) break; - age = "old"; goto found; } @@ -250,14 +260,9 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net, } else { hlist_add_head_rcu(&local->link, &rxnet->local_endpoints); } - age = "new"; found: mutex_unlock(&rxnet->local_mutex); - - _net("LOCAL %s %d {%pISp}", - age, local->debug_id, &local->srx.transport); - _leave(" = %p", local); return local; @@ -279,64 +284,49 @@ addr_in_use: /* * Get a ref on a local endpoint. */ -struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *local) +struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *local, + enum rxrpc_local_trace why) { - const void *here = __builtin_return_address(0); - int r; + int r, u; + u = atomic_read(&local->active_users); __refcount_inc(&local->ref, &r); - trace_rxrpc_local(local->debug_id, rxrpc_local_got, r + 1, here); + trace_rxrpc_local(local->debug_id, why, r + 1, u); return local; } /* * Get a ref on a local endpoint unless its usage has already reached 0. */ -struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local) +struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *local, + enum rxrpc_local_trace why) { - const void *here = __builtin_return_address(0); - int r; + int r, u; - if (local) { - if (__refcount_inc_not_zero(&local->ref, &r)) - trace_rxrpc_local(local->debug_id, rxrpc_local_got, - r + 1, here); - else - local = NULL; + if (local && __refcount_inc_not_zero(&local->ref, &r)) { + u = atomic_read(&local->active_users); + trace_rxrpc_local(local->debug_id, why, r + 1, u); + return local; } - return local; -} -/* - * Queue a local endpoint and pass the caller's reference to the work item. - */ -void rxrpc_queue_local(struct rxrpc_local *local) -{ - const void *here = __builtin_return_address(0); - unsigned int debug_id = local->debug_id; - int r = refcount_read(&local->ref); - - if (rxrpc_queue_work(&local->processor)) - trace_rxrpc_local(debug_id, rxrpc_local_queued, r + 1, here); - else - rxrpc_put_local(local); + return NULL; } /* * Drop a ref on a local endpoint. */ -void rxrpc_put_local(struct rxrpc_local *local) +void rxrpc_put_local(struct rxrpc_local *local, enum rxrpc_local_trace why) { - const void *here = __builtin_return_address(0); unsigned int debug_id; bool dead; - int r; + int r, u; if (local) { debug_id = local->debug_id; + u = atomic_read(&local->active_users); dead = __refcount_dec_and_test(&local->ref, &r); - trace_rxrpc_local(debug_id, rxrpc_local_put, r, here); + trace_rxrpc_local(debug_id, why, r, u); if (dead) call_rcu(&local->rcu, rxrpc_local_rcu); @@ -346,14 +336,15 @@ void rxrpc_put_local(struct rxrpc_local *local) /* * Start using a local endpoint. */ -struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *local) +struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *local, + enum rxrpc_local_trace why) { - local = rxrpc_get_local_maybe(local); + local = rxrpc_get_local_maybe(local, rxrpc_local_get_for_use); if (!local) return NULL; - if (!__rxrpc_use_local(local)) { - rxrpc_put_local(local); + if (!__rxrpc_use_local(local, why)) { + rxrpc_put_local(local, rxrpc_local_put_for_use); return NULL; } @@ -362,15 +353,19 @@ struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *local) /* * Cease using a local endpoint. Once the number of active users reaches 0, we - * start the closure of the transport in the work processor. + * start the closure of the transport in the I/O thread.. */ -void rxrpc_unuse_local(struct rxrpc_local *local) +void rxrpc_unuse_local(struct rxrpc_local *local, enum rxrpc_local_trace why) { + unsigned int debug_id = local->debug_id; + int r, u; + if (local) { - if (__rxrpc_unuse_local(local)) { - rxrpc_get_local(local); - rxrpc_queue_local(local); - } + r = refcount_read(&local->ref); + u = atomic_dec_return(&local->active_users); + trace_rxrpc_local(debug_id, why, r, u); + if (u == 0) + kthread_stop(local->io_thread); } } @@ -381,7 +376,7 @@ void rxrpc_unuse_local(struct rxrpc_local *local) * Closing the socket cannot be done from bottom half context or RCU callback * context because it might sleep. */ -static void rxrpc_local_destroyer(struct rxrpc_local *local) +void rxrpc_destroy_local(struct rxrpc_local *local) { struct socket *socket = local->socket; struct rxrpc_net *rxnet = local->rxnet; @@ -408,52 +403,7 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local) /* At this point, there should be no more packets coming in to the * local endpoint. */ - rxrpc_purge_queue(&local->reject_queue); - rxrpc_purge_queue(&local->event_queue); -} - -/* - * Process events on an endpoint. The work item carries a ref which - * we must release. - */ -static void rxrpc_local_processor(struct work_struct *work) -{ - struct rxrpc_local *local = - container_of(work, struct rxrpc_local, processor); - bool again; - - if (local->dead) - return; - - trace_rxrpc_local(local->debug_id, rxrpc_local_processing, - refcount_read(&local->ref), NULL); - - do { - again = false; - if (!__rxrpc_use_local(local)) { - rxrpc_local_destroyer(local); - break; - } - - if (!list_empty(&local->ack_tx_queue)) { - rxrpc_transmit_ack_packets(local); - again = true; - } - - if (!skb_queue_empty(&local->reject_queue)) { - rxrpc_reject_packets(local); - again = true; - } - - if (!skb_queue_empty(&local->event_queue)) { - rxrpc_process_local_events(local); - again = true; - } - - __rxrpc_unuse_local(local); - } while (again); - - rxrpc_put_local(local); + rxrpc_purge_queue(&local->rx_queue); } /* @@ -463,13 +413,8 @@ static void rxrpc_local_rcu(struct rcu_head *rcu) { struct rxrpc_local *local = container_of(rcu, struct rxrpc_local, rcu); - _enter("%d", local->debug_id); - - ASSERT(!work_pending(&local->processor)); - - _net("DESTROY LOCAL %d", local->debug_id); + rxrpc_see_local(local, rxrpc_local_free); kfree(local); - _leave(""); } /* diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c index 84242c0e467c..5905530e2f33 100644 --- a/net/rxrpc/net_ns.c +++ b/net/rxrpc/net_ns.c @@ -65,7 +65,7 @@ static __net_init int rxrpc_init_net(struct net *net) atomic_set(&rxnet->nr_client_conns, 0); rxnet->kill_all_client_conns = false; spin_lock_init(&rxnet->client_conn_cache_lock); - spin_lock_init(&rxnet->client_conn_discard_lock); + mutex_init(&rxnet->client_conn_discard_lock); INIT_LIST_HEAD(&rxnet->idle_client_conns); INIT_WORK(&rxnet->client_conn_reaper, rxrpc_discard_expired_client_conns); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index c5eed0e83e47..3d8c9f830ee0 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -142,8 +142,8 @@ retry: txb->ack.reason = RXRPC_ACK_IDLE; } - mtu = conn->params.peer->if_mtu; - mtu -= conn->params.peer->hdrsize; + mtu = conn->peer->if_mtu; + mtu -= conn->peer->hdrsize; jmax = rxrpc_rx_jumbo_max; qsize = (window - 1) - call->rx_consumed; rsize = max_t(int, call->rx_winsize - qsize, 0); @@ -203,12 +203,11 @@ static void rxrpc_cancel_rtt_probe(struct rxrpc_call *call, } /* - * Send an ACK call packet. + * Transmit an ACK packet. */ -static int rxrpc_send_ack_packet(struct rxrpc_local *local, struct rxrpc_txbuf *txb) +int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) { struct rxrpc_connection *conn; - struct rxrpc_call *call = txb->call; struct msghdr msg; struct kvec iov[1]; rxrpc_serial_t serial; @@ -229,11 +228,6 @@ static int rxrpc_send_ack_packet(struct rxrpc_local *local, struct rxrpc_txbuf * if (txb->ack.reason == RXRPC_ACK_PING) txb->wire.flags |= RXRPC_REQUEST_ACK; - if (txb->ack.reason == RXRPC_ACK_DELAY) - clear_bit(RXRPC_CALL_DELAY_ACK_PENDING, &call->flags); - if (txb->ack.reason == RXRPC_ACK_IDLE) - clear_bit(RXRPC_CALL_IDLE_ACK_PENDING, &call->flags); - n = rxrpc_fill_out_ack(conn, call, txb); if (n == 0) return 0; @@ -247,8 +241,6 @@ static int rxrpc_send_ack_packet(struct rxrpc_local *local, struct rxrpc_txbuf * trace_rxrpc_tx_ack(call->debug_id, serial, ntohl(txb->ack.firstPacket), ntohl(txb->ack.serial), txb->ack.reason, txb->ack.nAcks); - if (txb->ack_why == rxrpc_propose_ack_ping_for_lost_ack) - call->acks_lost_ping = serial; if (txb->ack.reason == RXRPC_ACK_PING) rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_ping); @@ -259,7 +251,7 @@ static int rxrpc_send_ack_packet(struct rxrpc_local *local, struct rxrpc_txbuf * txb->ack.previousPacket = htonl(call->rx_highest_seq); iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, len); - ret = do_udp_sendmsg(conn->params.local->socket, &msg, len); + ret = do_udp_sendmsg(conn->local->socket, &msg, len); call->peer->last_tx_at = ktime_get_seconds(); if (ret < 0) trace_rxrpc_tx_fail(call->debug_id, serial, ret, @@ -279,44 +271,6 @@ static int rxrpc_send_ack_packet(struct rxrpc_local *local, struct rxrpc_txbuf * } /* - * ACK transmitter for a local endpoint. The UDP socket locks around each - * transmission, so we can only transmit one packet at a time, ACK, DATA or - * otherwise. - */ -void rxrpc_transmit_ack_packets(struct rxrpc_local *local) -{ - LIST_HEAD(queue); - int ret; - - trace_rxrpc_local(local->debug_id, rxrpc_local_tx_ack, - refcount_read(&local->ref), NULL); - - if (list_empty(&local->ack_tx_queue)) - return; - - spin_lock_bh(&local->ack_tx_lock); - list_splice_tail_init(&local->ack_tx_queue, &queue); - spin_unlock_bh(&local->ack_tx_lock); - - while (!list_empty(&queue)) { - struct rxrpc_txbuf *txb = - list_entry(queue.next, struct rxrpc_txbuf, tx_link); - - ret = rxrpc_send_ack_packet(local, txb); - if (ret < 0 && ret != -ECONNRESET) { - spin_lock_bh(&local->ack_tx_lock); - list_splice_init(&queue, &local->ack_tx_queue); - spin_unlock_bh(&local->ack_tx_lock); - break; - } - - list_del_init(&txb->tx_link); - rxrpc_put_call(txb->call, rxrpc_call_put); - rxrpc_put_txbuf(txb, rxrpc_txbuf_put_ack_tx); - } -} - -/* * Send an ABORT call packet. */ int rxrpc_send_abort_packet(struct rxrpc_call *call) @@ -358,7 +312,7 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call) pkt.whdr.userStatus = 0; pkt.whdr.securityIndex = call->security_ix; pkt.whdr._rsvd = 0; - pkt.whdr.serviceId = htons(call->service_id); + pkt.whdr.serviceId = htons(call->dest_srx.srx_service); pkt.abort_code = htonl(call->abort_code); iov[0].iov_base = &pkt; @@ -368,8 +322,8 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call) pkt.whdr.serial = htonl(serial); iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, sizeof(pkt)); - ret = do_udp_sendmsg(conn->params.local->socket, &msg, sizeof(pkt)); - conn->params.peer->last_tx_at = ktime_get_seconds(); + ret = do_udp_sendmsg(conn->local->socket, &msg, sizeof(pkt)); + conn->peer->last_tx_at = ktime_get_seconds(); if (ret < 0) trace_rxrpc_tx_fail(call->debug_id, serial, ret, rxrpc_tx_point_call_abort); @@ -395,12 +349,6 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) _enter("%x,{%d}", txb->seq, txb->len); - if (hlist_unhashed(&call->error_link)) { - spin_lock_bh(&call->peer->lock); - hlist_add_head_rcu(&call->error_link, &call->peer->error_targets); - spin_unlock_bh(&call->peer->lock); - } - /* Each transmission of a Tx packet needs a new serial number */ serial = atomic_inc_return(&conn->serial); txb->wire.serial = htonl(serial); @@ -466,6 +414,14 @@ dont_set_request_ack: trace_rxrpc_tx_data(call, txb->seq, serial, txb->wire.flags, test_bit(RXRPC_TXBUF_RESENT, &txb->flags), false); + + /* Track what we've attempted to transmit at least once so that the + * retransmission algorithm doesn't try to resend what we haven't sent + * yet. However, this can race as we can receive an ACK before we get + * to this point. But, OTOH, if we won't get an ACK mentioning this + * packet unless the far side received it (though it could have + * discarded it anyway and NAK'd it). + */ cmpxchg(&call->tx_transmitted, txb->seq - 1, txb->seq); /* send the packet with the don't fragment bit set if we currently @@ -473,7 +429,7 @@ dont_set_request_ack: if (txb->len >= call->peer->maxdata) goto send_fragmentable; - down_read(&conn->params.local->defrag_sem); + down_read(&conn->local->defrag_sem); txb->last_sent = ktime_get_real(); if (txb->wire.flags & RXRPC_REQUEST_ACK) @@ -486,11 +442,12 @@ dont_set_request_ack: * message and update the peer record */ rxrpc_inc_stat(call->rxnet, stat_tx_data_send); - ret = do_udp_sendmsg(conn->params.local->socket, &msg, len); - conn->params.peer->last_tx_at = ktime_get_seconds(); + ret = do_udp_sendmsg(conn->local->socket, &msg, len); + conn->peer->last_tx_at = ktime_get_seconds(); - up_read(&conn->params.local->defrag_sem); + up_read(&conn->local->defrag_sem); if (ret < 0) { + rxrpc_inc_stat(call->rxnet, stat_tx_data_send_fail); rxrpc_cancel_rtt_probe(call, serial, rtt_slot); trace_rxrpc_tx_fail(call->debug_id, serial, ret, rxrpc_tx_point_call_data_nofrag); @@ -549,22 +506,22 @@ send_fragmentable: /* attempt to send this message with fragmentation enabled */ _debug("send fragment"); - down_write(&conn->params.local->defrag_sem); + down_write(&conn->local->defrag_sem); txb->last_sent = ktime_get_real(); if (txb->wire.flags & RXRPC_REQUEST_ACK) rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_data); - switch (conn->params.local->srx.transport.family) { + switch (conn->local->srx.transport.family) { case AF_INET6: case AF_INET: - ip_sock_set_mtu_discover(conn->params.local->socket->sk, + ip_sock_set_mtu_discover(conn->local->socket->sk, IP_PMTUDISC_DONT); rxrpc_inc_stat(call->rxnet, stat_tx_data_send_frag); - ret = do_udp_sendmsg(conn->params.local->socket, &msg, len); - conn->params.peer->last_tx_at = ktime_get_seconds(); + ret = do_udp_sendmsg(conn->local->socket, &msg, len); + conn->peer->last_tx_at = ktime_get_seconds(); - ip_sock_set_mtu_discover(conn->params.local->socket->sk, + ip_sock_set_mtu_discover(conn->local->socket->sk, IP_PMTUDISC_DO); break; @@ -573,6 +530,7 @@ send_fragmentable: } if (ret < 0) { + rxrpc_inc_stat(call->rxnet, stat_tx_data_send_fail); rxrpc_cancel_rtt_probe(call, serial, rtt_slot); trace_rxrpc_tx_fail(call->debug_id, serial, ret, rxrpc_tx_point_call_data_frag); @@ -582,26 +540,25 @@ send_fragmentable: } rxrpc_tx_backoff(call, ret); - up_write(&conn->params.local->defrag_sem); + up_write(&conn->local->defrag_sem); goto done; } /* - * reject packets through the local endpoint + * Reject a packet through the local endpoint. */ -void rxrpc_reject_packets(struct rxrpc_local *local) +void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb) { - struct sockaddr_rxrpc srx; - struct rxrpc_skb_priv *sp; struct rxrpc_wire_header whdr; - struct sk_buff *skb; + struct sockaddr_rxrpc srx; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct msghdr msg; struct kvec iov[2]; size_t size; __be32 code; int ret, ioc; - _enter("%d", local->debug_id); + rxrpc_see_skb(skb, rxrpc_skb_see_reject); iov[0].iov_base = &whdr; iov[0].iov_len = sizeof(whdr); @@ -615,52 +572,42 @@ void rxrpc_reject_packets(struct rxrpc_local *local) memset(&whdr, 0, sizeof(whdr)); - while ((skb = skb_dequeue(&local->reject_queue))) { - rxrpc_see_skb(skb, rxrpc_skb_seen); - sp = rxrpc_skb(skb); + switch (skb->mark) { + case RXRPC_SKB_MARK_REJECT_BUSY: + whdr.type = RXRPC_PACKET_TYPE_BUSY; + size = sizeof(whdr); + ioc = 1; + break; + case RXRPC_SKB_MARK_REJECT_ABORT: + whdr.type = RXRPC_PACKET_TYPE_ABORT; + code = htonl(skb->priority); + size = sizeof(whdr) + sizeof(code); + ioc = 2; + break; + default: + return; + } - switch (skb->mark) { - case RXRPC_SKB_MARK_REJECT_BUSY: - whdr.type = RXRPC_PACKET_TYPE_BUSY; - size = sizeof(whdr); - ioc = 1; - break; - case RXRPC_SKB_MARK_REJECT_ABORT: - whdr.type = RXRPC_PACKET_TYPE_ABORT; - code = htonl(skb->priority); - size = sizeof(whdr) + sizeof(code); - ioc = 2; - break; - default: - rxrpc_free_skb(skb, rxrpc_skb_freed); - continue; - } + if (rxrpc_extract_addr_from_skb(&srx, skb) == 0) { + msg.msg_namelen = srx.transport_len; - if (rxrpc_extract_addr_from_skb(&srx, skb) == 0) { - msg.msg_namelen = srx.transport_len; - - whdr.epoch = htonl(sp->hdr.epoch); - whdr.cid = htonl(sp->hdr.cid); - whdr.callNumber = htonl(sp->hdr.callNumber); - whdr.serviceId = htons(sp->hdr.serviceId); - whdr.flags = sp->hdr.flags; - whdr.flags ^= RXRPC_CLIENT_INITIATED; - whdr.flags &= RXRPC_CLIENT_INITIATED; - - iov_iter_kvec(&msg.msg_iter, WRITE, iov, ioc, size); - ret = do_udp_sendmsg(local->socket, &msg, size); - if (ret < 0) - trace_rxrpc_tx_fail(local->debug_id, 0, ret, - rxrpc_tx_point_reject); - else - trace_rxrpc_tx_packet(local->debug_id, &whdr, - rxrpc_tx_point_reject); - } + whdr.epoch = htonl(sp->hdr.epoch); + whdr.cid = htonl(sp->hdr.cid); + whdr.callNumber = htonl(sp->hdr.callNumber); + whdr.serviceId = htons(sp->hdr.serviceId); + whdr.flags = sp->hdr.flags; + whdr.flags ^= RXRPC_CLIENT_INITIATED; + whdr.flags &= RXRPC_CLIENT_INITIATED; - rxrpc_free_skb(skb, rxrpc_skb_freed); + iov_iter_kvec(&msg.msg_iter, WRITE, iov, ioc, size); + ret = do_udp_sendmsg(local->socket, &msg, size); + if (ret < 0) + trace_rxrpc_tx_fail(local->debug_id, 0, ret, + rxrpc_tx_point_reject); + else + trace_rxrpc_tx_packet(local->debug_id, &whdr, + rxrpc_tx_point_reject); } - - _leave(""); } /* @@ -701,8 +648,6 @@ void rxrpc_send_keepalive(struct rxrpc_peer *peer) len = iov[0].iov_len + iov[1].iov_len; - _proto("Tx VERSION (keepalive)"); - iov_iter_kvec(&msg.msg_iter, WRITE, iov, 2, len); ret = do_udp_sendmsg(peer->local->socket, &msg, len); if (ret < 0) @@ -715,3 +660,43 @@ void rxrpc_send_keepalive(struct rxrpc_peer *peer) peer->last_tx_at = ktime_get_seconds(); _leave(""); } + +/* + * Schedule an instant Tx resend. + */ +static inline void rxrpc_instant_resend(struct rxrpc_call *call, + struct rxrpc_txbuf *txb) +{ + if (call->state < RXRPC_CALL_COMPLETE) + kdebug("resend"); +} + +/* + * Transmit one packet. + */ +void rxrpc_transmit_one(struct rxrpc_call *call, struct rxrpc_txbuf *txb) +{ + int ret; + + ret = rxrpc_send_data_packet(call, txb); + if (ret < 0) { + switch (ret) { + case -ENETUNREACH: + case -EHOSTUNREACH: + case -ECONNREFUSED: + rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, + 0, ret); + break; + default: + _debug("need instant resend %d", ret); + rxrpc_instant_resend(call, txb); + } + } else { + unsigned long now = jiffies; + unsigned long resend_at = now + call->peer->rto_j; + + WRITE_ONCE(call->resend_at, resend_at); + rxrpc_reduce_call_timer(call, resend_at, now, + rxrpc_timer_set_for_send); + } +} diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index cda3890657a9..6685bf917aa6 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -18,9 +18,9 @@ #include <net/ip.h> #include "ar-internal.h" -static void rxrpc_store_error(struct rxrpc_peer *, struct sock_exterr_skb *); -static void rxrpc_distribute_error(struct rxrpc_peer *, int, - enum rxrpc_call_completion); +static void rxrpc_store_error(struct rxrpc_peer *, struct sk_buff *); +static void rxrpc_distribute_error(struct rxrpc_peer *, struct sk_buff *, + enum rxrpc_call_completion, int); /* * Find the peer associated with a local error. @@ -48,13 +48,11 @@ static struct rxrpc_peer *rxrpc_lookup_peer_local_rcu(struct rxrpc_local *local, srx->transport.sin.sin_port = serr->port; switch (serr->ee.ee_origin) { case SO_EE_ORIGIN_ICMP: - _net("Rx ICMP"); memcpy(&srx->transport.sin.sin_addr, skb_network_header(skb) + serr->addr_offset, sizeof(struct in_addr)); break; case SO_EE_ORIGIN_ICMP6: - _net("Rx ICMP6 on v4 sock"); memcpy(&srx->transport.sin.sin_addr, skb_network_header(skb) + serr->addr_offset + 12, sizeof(struct in_addr)); @@ -70,14 +68,12 @@ static struct rxrpc_peer *rxrpc_lookup_peer_local_rcu(struct rxrpc_local *local, case AF_INET6: switch (serr->ee.ee_origin) { case SO_EE_ORIGIN_ICMP6: - _net("Rx ICMP6"); srx->transport.sin6.sin6_port = serr->port; memcpy(&srx->transport.sin6.sin6_addr, skb_network_header(skb) + serr->addr_offset, sizeof(struct in6_addr)); break; case SO_EE_ORIGIN_ICMP: - _net("Rx ICMP on v6 sock"); srx->transport_len = sizeof(srx->transport.sin); srx->transport.family = AF_INET; srx->transport.sin.sin_port = serr->port; @@ -106,13 +102,9 @@ static struct rxrpc_peer *rxrpc_lookup_peer_local_rcu(struct rxrpc_local *local, */ static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, unsigned int mtu) { - _net("Rx ICMP Fragmentation Needed (%d)", mtu); - /* wind down the local interface MTU */ - if (mtu > 0 && peer->if_mtu == 65535 && mtu < peer->if_mtu) { + if (mtu > 0 && peer->if_mtu == 65535 && mtu < peer->if_mtu) peer->if_mtu = mtu; - _net("I/F MTU %u", mtu); - } if (mtu == 0) { /* they didn't give us a size, estimate one */ @@ -129,63 +121,36 @@ static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, unsigned int mtu) } if (mtu < peer->mtu) { - spin_lock_bh(&peer->lock); + spin_lock(&peer->lock); peer->mtu = mtu; peer->maxdata = peer->mtu - peer->hdrsize; - spin_unlock_bh(&peer->lock); - _net("Net MTU %u (maxdata %u)", - peer->mtu, peer->maxdata); + spin_unlock(&peer->lock); } } /* * Handle an error received on the local endpoint. */ -void rxrpc_error_report(struct sock *sk) +void rxrpc_input_error(struct rxrpc_local *local, struct sk_buff *skb) { - struct sock_exterr_skb *serr; + struct sock_exterr_skb *serr = SKB_EXT_ERR(skb); struct sockaddr_rxrpc srx; - struct rxrpc_local *local; struct rxrpc_peer *peer = NULL; - struct sk_buff *skb; - rcu_read_lock(); - local = rcu_dereference_sk_user_data(sk); - if (unlikely(!local)) { - rcu_read_unlock(); - return; - } - _enter("%p{%d}", sk, local->debug_id); - - /* Clear the outstanding error value on the socket so that it doesn't - * cause kernel_sendmsg() to return it later. - */ - sock_error(sk); + _enter("L=%x", local->debug_id); - skb = sock_dequeue_err_skb(sk); - if (!skb) { - rcu_read_unlock(); - _leave("UDP socket errqueue empty"); - return; - } - rxrpc_new_skb(skb, rxrpc_skb_received); - serr = SKB_EXT_ERR(skb); if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) { _leave("UDP empty message"); - rcu_read_unlock(); - rxrpc_free_skb(skb, rxrpc_skb_freed); return; } + rcu_read_lock(); peer = rxrpc_lookup_peer_local_rcu(local, skb, &srx); - if (peer && !rxrpc_get_peer_maybe(peer)) + if (peer && !rxrpc_get_peer_maybe(peer, rxrpc_peer_get_input_error)) peer = NULL; - if (!peer) { - rcu_read_unlock(); - rxrpc_free_skb(skb, rxrpc_skb_freed); - _leave(" [no peer]"); + rcu_read_unlock(); + if (!peer) return; - } trace_rxrpc_rx_icmp(peer, &serr->ee, &srx); @@ -196,72 +161,26 @@ void rxrpc_error_report(struct sock *sk) goto out; } - rxrpc_store_error(peer, serr); + rxrpc_store_error(peer, skb); out: - rcu_read_unlock(); - rxrpc_free_skb(skb, rxrpc_skb_freed); - rxrpc_put_peer(peer); - - _leave(""); + rxrpc_put_peer(peer, rxrpc_peer_put_input_error); } /* * Map an error report to error codes on the peer record. */ -static void rxrpc_store_error(struct rxrpc_peer *peer, - struct sock_exterr_skb *serr) +static void rxrpc_store_error(struct rxrpc_peer *peer, struct sk_buff *skb) { enum rxrpc_call_completion compl = RXRPC_CALL_NETWORK_ERROR; - struct sock_extended_err *ee; - int err; + struct sock_exterr_skb *serr = SKB_EXT_ERR(skb); + struct sock_extended_err *ee = &serr->ee; + int err = ee->ee_errno; _enter(""); - ee = &serr->ee; - - err = ee->ee_errno; - switch (ee->ee_origin) { - case SO_EE_ORIGIN_ICMP: - switch (ee->ee_type) { - case ICMP_DEST_UNREACH: - switch (ee->ee_code) { - case ICMP_NET_UNREACH: - _net("Rx Received ICMP Network Unreachable"); - break; - case ICMP_HOST_UNREACH: - _net("Rx Received ICMP Host Unreachable"); - break; - case ICMP_PORT_UNREACH: - _net("Rx Received ICMP Port Unreachable"); - break; - case ICMP_NET_UNKNOWN: - _net("Rx Received ICMP Unknown Network"); - break; - case ICMP_HOST_UNKNOWN: - _net("Rx Received ICMP Unknown Host"); - break; - default: - _net("Rx Received ICMP DestUnreach code=%u", - ee->ee_code); - break; - } - break; - - case ICMP_TIME_EXCEEDED: - _net("Rx Received ICMP TTL Exceeded"); - break; - - default: - _proto("Rx Received ICMP error { type=%u code=%u }", - ee->ee_type, ee->ee_code); - break; - } - break; - case SO_EE_ORIGIN_NONE: case SO_EE_ORIGIN_LOCAL: - _proto("Rx Received local error { error=%d }", err); compl = RXRPC_CALL_LOCAL_ERROR; break; @@ -269,26 +188,40 @@ static void rxrpc_store_error(struct rxrpc_peer *peer, if (err == EACCES) err = EHOSTUNREACH; fallthrough; + case SO_EE_ORIGIN_ICMP: default: - _proto("Rx Received error report { orig=%u }", ee->ee_origin); break; } - rxrpc_distribute_error(peer, err, compl); + rxrpc_distribute_error(peer, skb, compl, err); } /* * Distribute an error that occurred on a peer. */ -static void rxrpc_distribute_error(struct rxrpc_peer *peer, int error, - enum rxrpc_call_completion compl) +static void rxrpc_distribute_error(struct rxrpc_peer *peer, struct sk_buff *skb, + enum rxrpc_call_completion compl, int err) { struct rxrpc_call *call; + HLIST_HEAD(error_targets); - hlist_for_each_entry_rcu(call, &peer->error_targets, error_link) { - rxrpc_see_call(call); - rxrpc_set_call_completion(call, compl, 0, -error); + spin_lock(&peer->lock); + hlist_move_list(&peer->error_targets, &error_targets); + + while (!hlist_empty(&error_targets)) { + call = hlist_entry(error_targets.first, + struct rxrpc_call, error_link); + hlist_del_init(&call->error_link); + spin_unlock(&peer->lock); + + rxrpc_see_call(call, rxrpc_call_see_distribute_error); + rxrpc_set_call_completion(call, compl, 0, -err); + rxrpc_input_call_event(call, skb); + + spin_lock(&peer->lock); } + + spin_unlock(&peer->lock); } /* @@ -304,18 +237,18 @@ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet, time64_t keepalive_at; int slot; - spin_lock_bh(&rxnet->peer_hash_lock); + spin_lock(&rxnet->peer_hash_lock); while (!list_empty(collector)) { peer = list_entry(collector->next, struct rxrpc_peer, keepalive_link); list_del_init(&peer->keepalive_link); - if (!rxrpc_get_peer_maybe(peer)) + if (!rxrpc_get_peer_maybe(peer, rxrpc_peer_get_keepalive)) continue; - if (__rxrpc_use_local(peer->local)) { - spin_unlock_bh(&rxnet->peer_hash_lock); + if (__rxrpc_use_local(peer->local, rxrpc_local_use_peer_keepalive)) { + spin_unlock(&rxnet->peer_hash_lock); keepalive_at = peer->last_tx_at + RXRPC_KEEPALIVE_TIME; slot = keepalive_at - base; @@ -334,15 +267,15 @@ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet, */ slot += cursor; slot &= mask; - spin_lock_bh(&rxnet->peer_hash_lock); + spin_lock(&rxnet->peer_hash_lock); list_add_tail(&peer->keepalive_link, &rxnet->peer_keepalive[slot & mask]); - rxrpc_unuse_local(peer->local); + rxrpc_unuse_local(peer->local, rxrpc_local_unuse_peer_keepalive); } - rxrpc_put_peer_locked(peer); + rxrpc_put_peer_locked(peer, rxrpc_peer_put_keepalive); } - spin_unlock_bh(&rxnet->peer_hash_lock); + spin_unlock(&rxnet->peer_hash_lock); } /* @@ -372,7 +305,7 @@ void rxrpc_peer_keepalive_worker(struct work_struct *work) * second; the bucket at cursor + 1 goes at now + 1s and so * on... */ - spin_lock_bh(&rxnet->peer_hash_lock); + spin_lock(&rxnet->peer_hash_lock); list_splice_init(&rxnet->peer_keepalive_new, &collector); stop = cursor + ARRAY_SIZE(rxnet->peer_keepalive); @@ -384,7 +317,7 @@ void rxrpc_peer_keepalive_worker(struct work_struct *work) } base = now; - spin_unlock_bh(&rxnet->peer_hash_lock); + spin_unlock(&rxnet->peer_hash_lock); rxnet->peer_keepalive_base = base; rxnet->peer_keepalive_cursor = cursor; diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 041a51225c5f..608946dcc505 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -138,10 +138,8 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local, unsigned long hash_key = rxrpc_peer_hash_key(local, srx); peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key); - if (peer) { - _net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport); + if (peer) _leave(" = %p {u=%d}", peer, refcount_read(&peer->ref)); - } return peer; } @@ -207,9 +205,9 @@ static void rxrpc_assess_MTU_size(struct rxrpc_sock *rx, /* * Allocate a peer. */ -struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp) +struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp, + enum rxrpc_peer_trace why) { - const void *here = __builtin_return_address(0); struct rxrpc_peer *peer; _enter(""); @@ -217,7 +215,7 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp) peer = kzalloc(sizeof(struct rxrpc_peer), gfp); if (peer) { refcount_set(&peer->ref, 1); - peer->local = rxrpc_get_local(local); + peer->local = rxrpc_get_local(local, rxrpc_local_get_peer); INIT_HLIST_HEAD(&peer->error_targets); peer->service_conns = RB_ROOT; seqlock_init(&peer->service_conn_lock); @@ -228,7 +226,7 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp) rxrpc_peer_init_rtt(peer); peer->cong_ssthresh = RXRPC_TX_MAX_WINDOW; - trace_rxrpc_peer(peer->debug_id, rxrpc_peer_new, 1, here); + trace_rxrpc_peer(peer->debug_id, why, 1); } _leave(" = %p", peer); @@ -284,7 +282,7 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_sock *rx, _enter(""); - peer = rxrpc_alloc_peer(local, gfp); + peer = rxrpc_alloc_peer(local, gfp, rxrpc_peer_new_client); if (peer) { memcpy(&peer->srx, srx, sizeof(*srx)); rxrpc_init_peer(rx, peer, hash_key); @@ -296,7 +294,8 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_sock *rx, static void rxrpc_free_peer(struct rxrpc_peer *peer) { - rxrpc_put_local(peer->local); + trace_rxrpc_peer(peer->debug_id, 0, rxrpc_peer_free); + rxrpc_put_local(peer->local, rxrpc_local_put_peer); kfree_rcu(peer, rcu); } @@ -336,7 +335,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx, /* search the peer list first */ rcu_read_lock(); peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key); - if (peer && !rxrpc_get_peer_maybe(peer)) + if (peer && !rxrpc_get_peer_maybe(peer, rxrpc_peer_get_lookup_client)) peer = NULL; rcu_read_unlock(); @@ -350,11 +349,11 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx, return NULL; } - spin_lock_bh(&rxnet->peer_hash_lock); + spin_lock(&rxnet->peer_hash_lock); /* Need to check that we aren't racing with someone else */ peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key); - if (peer && !rxrpc_get_peer_maybe(peer)) + if (peer && !rxrpc_get_peer_maybe(peer, rxrpc_peer_get_lookup_client)) peer = NULL; if (!peer) { hash_add_rcu(rxnet->peer_hash, @@ -363,7 +362,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx, &rxnet->peer_keepalive_new); } - spin_unlock_bh(&rxnet->peer_hash_lock); + spin_unlock(&rxnet->peer_hash_lock); if (peer) rxrpc_free_peer(candidate); @@ -371,8 +370,6 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx, peer = candidate; } - _net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport); - _leave(" = %p {u=%d}", peer, refcount_read(&peer->ref)); return peer; } @@ -380,27 +377,26 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx, /* * Get a ref on a peer record. */ -struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer) +struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer, enum rxrpc_peer_trace why) { - const void *here = __builtin_return_address(0); int r; __refcount_inc(&peer->ref, &r); - trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, r + 1, here); + trace_rxrpc_peer(peer->debug_id, why, r + 1); return peer; } /* * Get a ref on a peer record unless its usage has already reached 0. */ -struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *peer) +struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *peer, + enum rxrpc_peer_trace why) { - const void *here = __builtin_return_address(0); int r; if (peer) { if (__refcount_inc_not_zero(&peer->ref, &r)) - trace_rxrpc_peer(peer->debug_id, rxrpc_peer_got, r + 1, here); + trace_rxrpc_peer(peer->debug_id, r + 1, why); else peer = NULL; } @@ -416,10 +412,10 @@ static void __rxrpc_put_peer(struct rxrpc_peer *peer) ASSERT(hlist_empty(&peer->error_targets)); - spin_lock_bh(&rxnet->peer_hash_lock); + spin_lock(&rxnet->peer_hash_lock); hash_del_rcu(&peer->hash_link); list_del_init(&peer->keepalive_link); - spin_unlock_bh(&rxnet->peer_hash_lock); + spin_unlock(&rxnet->peer_hash_lock); rxrpc_free_peer(peer); } @@ -427,9 +423,8 @@ static void __rxrpc_put_peer(struct rxrpc_peer *peer) /* * Drop a ref on a peer record. */ -void rxrpc_put_peer(struct rxrpc_peer *peer) +void rxrpc_put_peer(struct rxrpc_peer *peer, enum rxrpc_peer_trace why) { - const void *here = __builtin_return_address(0); unsigned int debug_id; bool dead; int r; @@ -437,7 +432,7 @@ void rxrpc_put_peer(struct rxrpc_peer *peer) if (peer) { debug_id = peer->debug_id; dead = __refcount_dec_and_test(&peer->ref, &r); - trace_rxrpc_peer(debug_id, rxrpc_peer_put, r - 1, here); + trace_rxrpc_peer(debug_id, r - 1, why); if (dead) __rxrpc_put_peer(peer); } @@ -447,15 +442,14 @@ void rxrpc_put_peer(struct rxrpc_peer *peer) * Drop a ref on a peer record where the caller already holds the * peer_hash_lock. */ -void rxrpc_put_peer_locked(struct rxrpc_peer *peer) +void rxrpc_put_peer_locked(struct rxrpc_peer *peer, enum rxrpc_peer_trace why) { - const void *here = __builtin_return_address(0); unsigned int debug_id = peer->debug_id; bool dead; int r; dead = __refcount_dec_and_test(&peer->ref, &r); - trace_rxrpc_peer(debug_id, rxrpc_peer_put, r - 1, here); + trace_rxrpc_peer(debug_id, r - 1, why); if (dead) { hash_del_rcu(&peer->hash_link); list_del_init(&peer->keepalive_link); diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index fae22a8b38d6..3a59591ec061 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -49,8 +49,6 @@ static void rxrpc_call_seq_stop(struct seq_file *seq, void *v) static int rxrpc_call_seq_show(struct seq_file *seq, void *v) { struct rxrpc_local *local; - struct rxrpc_sock *rx; - struct rxrpc_peer *peer; struct rxrpc_call *call; struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq)); unsigned long timeout = 0; @@ -63,28 +61,19 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) "Proto Local " " Remote " " SvID ConnID CallID End Use State Abort " - " DebugId TxSeq TW RxSeq RW RxSerial RxTimo\n"); + " DebugId TxSeq TW RxSeq RW RxSerial CW RxTimo\n"); return 0; } call = list_entry(v, struct rxrpc_call, link); - rx = rcu_dereference(call->socket); - if (rx) { - local = READ_ONCE(rx->local); - if (local) - sprintf(lbuff, "%pISpc", &local->srx.transport); - else - strcpy(lbuff, "no_local"); - } else { - strcpy(lbuff, "no_socket"); - } - - peer = call->peer; - if (peer) - sprintf(rbuff, "%pISpc", &peer->srx.transport); + local = call->local; + if (local) + sprintf(lbuff, "%pISpc", &local->srx.transport); else - strcpy(rbuff, "no_connection"); + strcpy(lbuff, "no_local"); + + sprintf(rbuff, "%pISpc", &call->dest_srx.transport); if (call->state != RXRPC_CALL_SERVER_PREALLOC) { timeout = READ_ONCE(call->expect_rx_by); @@ -95,10 +84,10 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) wtmp = atomic64_read_acquire(&call->ackr_window); seq_printf(seq, "UDP %-47.47s %-47.47s %4x %08x %08x %s %3u" - " %-8.8s %08x %08x %08x %02x %08x %02x %08x %06lx\n", + " %-8.8s %08x %08x %08x %02x %08x %02x %08x %02x %06lx\n", lbuff, rbuff, - call->service_id, + call->dest_srx.srx_service, call->cid, call->call_id, rxrpc_is_service_call(call) ? "Svc" : "Clt", @@ -109,6 +98,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) acks_hard_ack, READ_ONCE(call->tx_top) - acks_hard_ack, lower_32_bits(wtmp), upper_32_bits(wtmp) - lower_32_bits(wtmp), call->rx_serial, + call->cong_cwnd, timeout); return 0; @@ -159,7 +149,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "Proto Local " " Remote " - " SvID ConnID End Use State Key " + " SvID ConnID End Ref Act State Key " " Serial ISerial CallId0 CallId1 CallId2 CallId3\n" ); return 0; @@ -172,12 +162,12 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v) goto print; } - sprintf(lbuff, "%pISpc", &conn->params.local->srx.transport); + sprintf(lbuff, "%pISpc", &conn->local->srx.transport); - sprintf(rbuff, "%pISpc", &conn->params.peer->srx.transport); + sprintf(rbuff, "%pISpc", &conn->peer->srx.transport); print: seq_printf(seq, - "UDP %-47.47s %-47.47s %4x %08x %s %3u" + "UDP %-47.47s %-47.47s %4x %08x %s %3u %3d" " %s %08x %08x %08x %08x %08x %08x %08x\n", lbuff, rbuff, @@ -185,8 +175,9 @@ print: conn->proto.cid, rxrpc_conn_is_service(conn) ? "Svc" : "Clt", refcount_read(&conn->ref), + atomic_read(&conn->active), rxrpc_conn_states[conn->state], - key_serial(conn->params.key), + key_serial(conn->key), atomic_read(&conn->serial), conn->hi_serial, conn->channels[0].call_id, @@ -341,7 +332,7 @@ static int rxrpc_local_seq_show(struct seq_file *seq, void *v) if (v == SEQ_START_TOKEN) { seq_puts(seq, "Proto Local " - " Use Act\n"); + " Use Act RxQ\n"); return 0; } @@ -350,10 +341,11 @@ static int rxrpc_local_seq_show(struct seq_file *seq, void *v) sprintf(lbuff, "%pISpc", &local->srx.transport); seq_printf(seq, - "UDP %-47.47s %3u %3u\n", + "UDP %-47.47s %3u %3u %3u\n", lbuff, refcount_read(&local->ref), - atomic_read(&local->active_users)); + atomic_read(&local->active_users), + local->rx_queue.qlen); return 0; } @@ -407,13 +399,16 @@ int rxrpc_stats_show(struct seq_file *seq, void *v) struct rxrpc_net *rxnet = rxrpc_net(seq_file_single_net(seq)); seq_printf(seq, - "Data : send=%u sendf=%u\n", + "Data : send=%u sendf=%u fail=%u\n", atomic_read(&rxnet->stat_tx_data_send), - atomic_read(&rxnet->stat_tx_data_send_frag)); + atomic_read(&rxnet->stat_tx_data_send_frag), + atomic_read(&rxnet->stat_tx_data_send_fail)); seq_printf(seq, - "Data-Tx : nr=%u retrans=%u\n", + "Data-Tx : nr=%u retrans=%u uf=%u cwr=%u\n", atomic_read(&rxnet->stat_tx_data), - atomic_read(&rxnet->stat_tx_data_retrans)); + atomic_read(&rxnet->stat_tx_data_retrans), + atomic_read(&rxnet->stat_tx_data_underflow), + atomic_read(&rxnet->stat_tx_data_cwnd_reset)); seq_printf(seq, "Data-Rx : nr=%u reqack=%u jumbo=%u\n", atomic_read(&rxnet->stat_rx_data), @@ -462,6 +457,9 @@ int rxrpc_stats_show(struct seq_file *seq, void *v) "Buffers : txb=%u rxb=%u\n", atomic_read(&rxrpc_nr_txbuf), atomic_read(&rxrpc_n_rx_skbs)); + seq_printf(seq, + "IO-thread: loops=%u\n", + atomic_read(&rxnet->stat_io_loop)); return 0; } @@ -478,8 +476,11 @@ int rxrpc_stats_clear(struct file *file, char *buf, size_t size) atomic_set(&rxnet->stat_tx_data, 0); atomic_set(&rxnet->stat_tx_data_retrans, 0); + atomic_set(&rxnet->stat_tx_data_underflow, 0); + atomic_set(&rxnet->stat_tx_data_cwnd_reset, 0); atomic_set(&rxnet->stat_tx_data_send, 0); atomic_set(&rxnet->stat_tx_data_send_frag, 0); + atomic_set(&rxnet->stat_tx_data_send_fail, 0); atomic_set(&rxnet->stat_rx_data, 0); atomic_set(&rxnet->stat_rx_data_reqack, 0); atomic_set(&rxnet->stat_rx_data_jumbo, 0); @@ -491,5 +492,7 @@ int rxrpc_stats_clear(struct file *file, char *buf, size_t size) memset(&rxnet->stat_rx_acks, 0, sizeof(rxnet->stat_rx_acks)); memset(&rxnet->stat_why_req_ack, 0, sizeof(rxnet->stat_why_req_ack)); + + atomic_set(&rxnet->stat_io_loop, 0); return size; } diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index efb85f983657..36b25d003cf0 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -36,16 +36,16 @@ void rxrpc_notify_socket(struct rxrpc_call *call) sk = &rx->sk; if (rx && sk->sk_state < RXRPC_CLOSE) { if (call->notify_rx) { - spin_lock_bh(&call->notify_lock); + spin_lock(&call->notify_lock); call->notify_rx(sk, call, call->user_call_ID); - spin_unlock_bh(&call->notify_lock); + spin_unlock(&call->notify_lock); } else { - write_lock_bh(&rx->recvmsg_lock); + write_lock(&rx->recvmsg_lock); if (list_empty(&call->recvmsg_link)) { - rxrpc_get_call(call, rxrpc_call_got); + rxrpc_get_call(call, rxrpc_call_get_notify_socket); list_add_tail(&call->recvmsg_link, &rx->recvmsg_q); } - write_unlock_bh(&rx->recvmsg_lock); + write_unlock(&rx->recvmsg_lock); if (!sock_flag(sk, SOCK_DEAD)) { _debug("call %ps", sk->sk_data_ready); @@ -87,9 +87,9 @@ bool rxrpc_set_call_completion(struct rxrpc_call *call, bool ret = false; if (call->state < RXRPC_CALL_COMPLETE) { - write_lock_bh(&call->state_lock); + write_lock(&call->state_lock); ret = __rxrpc_set_call_completion(call, compl, abort_code, error); - write_unlock_bh(&call->state_lock); + write_unlock(&call->state_lock); } return ret; } @@ -107,9 +107,9 @@ bool rxrpc_call_completed(struct rxrpc_call *call) bool ret = false; if (call->state < RXRPC_CALL_COMPLETE) { - write_lock_bh(&call->state_lock); + write_lock(&call->state_lock); ret = __rxrpc_call_completed(call); - write_unlock_bh(&call->state_lock); + write_unlock(&call->state_lock); } return ret; } @@ -131,9 +131,9 @@ bool rxrpc_abort_call(const char *why, struct rxrpc_call *call, { bool ret; - write_lock_bh(&call->state_lock); + write_lock(&call->state_lock); ret = __rxrpc_abort_call(why, call, seq, abort_code, error); - write_unlock_bh(&call->state_lock); + write_unlock(&call->state_lock); return ret; } @@ -193,23 +193,23 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) rxrpc_propose_delay_ACK(call, serial, rxrpc_propose_ack_terminal_ack); - write_lock_bh(&call->state_lock); + write_lock(&call->state_lock); switch (call->state) { case RXRPC_CALL_CLIENT_RECV_REPLY: __rxrpc_call_completed(call); - write_unlock_bh(&call->state_lock); + write_unlock(&call->state_lock); break; case RXRPC_CALL_SERVER_RECV_REQUEST: call->state = RXRPC_CALL_SERVER_ACK_REQUEST; call->expect_req_by = jiffies + MAX_JIFFY_OFFSET; - write_unlock_bh(&call->state_lock); + write_unlock(&call->state_lock); rxrpc_propose_delay_ACK(call, serial, rxrpc_propose_ack_processing_op); break; default: - write_unlock_bh(&call->state_lock); + write_unlock(&call->state_lock); break; } } @@ -228,9 +228,8 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) _enter("%d", call->debug_id); -further_rotation: skb = skb_dequeue(&call->recvmsg_queue); - rxrpc_see_skb(skb, rxrpc_skb_rotated); + rxrpc_see_skb(skb, rxrpc_skb_see_rotate); sp = rxrpc_skb(skb); tseq = sp->hdr.seq; @@ -241,7 +240,7 @@ further_rotation: if (after(tseq, call->rx_consumed)) smp_store_release(&call->rx_consumed, tseq); - rxrpc_free_skb(skb, rxrpc_skb_freed); + rxrpc_free_skb(skb, rxrpc_skb_put_rotate); trace_rxrpc_receive(call, last ? rxrpc_receive_rotate_last : rxrpc_receive_rotate, serial, call->rx_consumed); @@ -250,26 +249,12 @@ further_rotation: return; } - /* The next packet on the queue might entirely overlap with the one we - * just consumed; if so, rotate that away also. - */ - skb = skb_peek(&call->recvmsg_queue); - if (skb) { - sp = rxrpc_skb(skb); - if (sp->hdr.seq != call->rx_consumed && - after_eq(call->rx_consumed, sp->hdr.seq)) - goto further_rotation; - } - /* Check to see if there's an ACK that needs sending. */ acked = atomic_add_return(call->rx_consumed - old_consumed, &call->ackr_nr_consumed); if (acked > 2 && - !test_and_set_bit(RXRPC_CALL_IDLE_ACK_PENDING, &call->flags)) { - rxrpc_send_ACK(call, RXRPC_ACK_IDLE, serial, - rxrpc_propose_ack_rotate_rx); - rxrpc_transmit_ack_packets(call->peer->local); - } + !test_and_set_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags)) + rxrpc_poke_call(call, rxrpc_call_poke_idle); } /* @@ -314,15 +299,10 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, */ skb = skb_peek(&call->recvmsg_queue); while (skb) { - rxrpc_see_skb(skb, rxrpc_skb_seen); + rxrpc_see_skb(skb, rxrpc_skb_see_recvmsg); sp = rxrpc_skb(skb); seq = sp->hdr.seq; - if (after_eq(call->rx_consumed, seq)) { - kdebug("obsolete %x %x", call->rx_consumed, seq); - goto skip_obsolete; - } - if (!(flags & MSG_PEEK)) trace_rxrpc_receive(call, rxrpc_receive_front, sp->hdr.serial, seq); @@ -340,7 +320,6 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, ret = ret2; goto out; } - rxrpc_transmit_ack_packets(call->peer->local); } else { trace_rxrpc_recvdata(call, rxrpc_recvmsg_cont, seq, rx_pkt_offset, rx_pkt_len, 0); @@ -373,7 +352,6 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, break; } - skip_obsolete: /* The whole packet has been transferred. */ if (sp->hdr.flags & RXRPC_LAST_PACKET) ret = 1; @@ -395,7 +373,7 @@ done: trace_rxrpc_recvdata(call, rxrpc_recvmsg_data_return, seq, rx_pkt_offset, rx_pkt_len, ret); if (ret == -EAGAIN) - set_bit(RXRPC_CALL_RX_UNDERRUN, &call->flags); + set_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags); return ret; } @@ -463,14 +441,14 @@ try_again: /* Find the next call and dequeue it if we're not just peeking. If we * do dequeue it, that comes with a ref that we will need to release. */ - write_lock_bh(&rx->recvmsg_lock); + write_lock(&rx->recvmsg_lock); l = rx->recvmsg_q.next; call = list_entry(l, struct rxrpc_call, recvmsg_link); if (!(flags & MSG_PEEK)) list_del_init(&call->recvmsg_link); else - rxrpc_get_call(call, rxrpc_call_got); - write_unlock_bh(&rx->recvmsg_lock); + rxrpc_get_call(call, rxrpc_call_get_recvmsg); + write_unlock(&rx->recvmsg_lock); trace_rxrpc_recvmsg(call, rxrpc_recvmsg_dequeue, 0); @@ -508,11 +486,9 @@ try_again: } if (msg->msg_name && call->peer) { - struct sockaddr_rxrpc *srx = msg->msg_name; - size_t len = sizeof(call->peer->srx); + size_t len = sizeof(call->dest_srx); - memcpy(msg->msg_name, &call->peer->srx, len); - srx->srx_service = call->service_id; + memcpy(msg->msg_name, &call->dest_srx, len); msg->msg_namelen = len; } @@ -525,7 +501,6 @@ try_again: if (ret == -EAGAIN) ret = 0; - rxrpc_transmit_ack_packets(call->peer->local); if (!skb_queue_empty(&call->recvmsg_queue)) rxrpc_notify_socket(call); break; @@ -555,18 +530,18 @@ try_again: error_unlock_call: mutex_unlock(&call->user_mutex); - rxrpc_put_call(call, rxrpc_call_put); + rxrpc_put_call(call, rxrpc_call_put_recvmsg); trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, ret); return ret; error_requeue_call: if (!(flags & MSG_PEEK)) { - write_lock_bh(&rx->recvmsg_lock); + write_lock(&rx->recvmsg_lock); list_add(&call->recvmsg_link, &rx->recvmsg_q); - write_unlock_bh(&rx->recvmsg_lock); + write_unlock(&rx->recvmsg_lock); trace_rxrpc_recvmsg(call, rxrpc_recvmsg_requeue, 0); } else { - rxrpc_put_call(call, rxrpc_call_put); + rxrpc_put_call(call, rxrpc_call_put_recvmsg); } error_no_call: release_sock(&rx->sk); @@ -655,9 +630,8 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, read_phase_complete: ret = 1; out: - rxrpc_transmit_ack_packets(call->peer->local); if (_service) - *_service = call->service_id; + *_service = call->dest_srx.srx_service; mutex_unlock(&call->user_mutex); _leave(" = %d [%zu,%d]", ret, iov_iter_count(iter), *_abort); return ret; diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 110a5550c0a6..d1233720e05f 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -103,7 +103,7 @@ static int rxkad_init_connection_security(struct rxrpc_connection *conn, struct crypto_sync_skcipher *ci; int ret; - _enter("{%d},{%x}", conn->debug_id, key_serial(conn->params.key)); + _enter("{%d},{%x}", conn->debug_id, key_serial(conn->key)); conn->security_ix = token->security_index; @@ -118,7 +118,7 @@ static int rxkad_init_connection_security(struct rxrpc_connection *conn, sizeof(token->kad->session_key)) < 0) BUG(); - switch (conn->params.security_level) { + switch (conn->security_level) { case RXRPC_SECURITY_PLAIN: case RXRPC_SECURITY_AUTH: case RXRPC_SECURITY_ENCRYPT: @@ -150,7 +150,7 @@ static int rxkad_how_much_data(struct rxrpc_call *call, size_t remain, { size_t shdr, buf_size, chunk; - switch (call->conn->params.security_level) { + switch (call->conn->security_level) { default: buf_size = chunk = min_t(size_t, remain, RXRPC_JUMBO_DATALEN); shdr = 0; @@ -192,7 +192,7 @@ static int rxkad_prime_packet_security(struct rxrpc_connection *conn, _enter(""); - if (!conn->params.key) + if (!conn->key) return 0; tmpbuf = kmalloc(tmpsize, GFP_KERNEL); @@ -205,7 +205,7 @@ static int rxkad_prime_packet_security(struct rxrpc_connection *conn, return -ENOMEM; } - token = conn->params.key->payload.data[0]; + token = conn->key->payload.data[0]; memcpy(&iv, token->kad->session_key, sizeof(iv)); tmpbuf[0] = htonl(conn->proto.epoch); @@ -317,7 +317,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, } /* encrypt from the session key */ - token = call->conn->params.key->payload.data[0]; + token = call->conn->key->payload.data[0]; memcpy(&iv, token->kad->session_key, sizeof(iv)); sg_init_one(&sg, txb->data, txb->len); @@ -344,13 +344,13 @@ static int rxkad_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) int ret; _enter("{%d{%x}},{#%u},%u,", - call->debug_id, key_serial(call->conn->params.key), + call->debug_id, key_serial(call->conn->key), txb->seq, txb->len); if (!call->conn->rxkad.cipher) return 0; - ret = key_validate(call->conn->params.key); + ret = key_validate(call->conn->key); if (ret < 0) return ret; @@ -380,7 +380,7 @@ static int rxkad_secure_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) y = 1; /* zero checksums are not permitted */ txb->wire.cksum = htons(y); - switch (call->conn->params.security_level) { + switch (call->conn->security_level) { case RXRPC_SECURITY_PLAIN: ret = 0; break; @@ -525,7 +525,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, } /* decrypt from the session key */ - token = call->conn->params.key->payload.data[0]; + token = call->conn->key->payload.data[0]; memcpy(&iv, token->kad->session_key, sizeof(iv)); skcipher_request_set_sync_tfm(req, call->conn->rxkad.cipher); @@ -596,7 +596,7 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb) u32 x, y; _enter("{%d{%x}},{#%u}", - call->debug_id, key_serial(call->conn->params.key), seq); + call->debug_id, key_serial(call->conn->key), seq); if (!call->conn->rxkad.cipher) return 0; @@ -632,7 +632,7 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb) goto protocol_error; } - switch (call->conn->params.security_level) { + switch (call->conn->security_level) { case RXRPC_SECURITY_PLAIN: ret = 0; break; @@ -678,8 +678,8 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn) challenge.min_level = htonl(0); challenge.__padding = 0; - msg.msg_name = &conn->params.peer->srx.transport; - msg.msg_namelen = conn->params.peer->srx.transport_len; + msg.msg_name = &conn->peer->srx.transport; + msg.msg_namelen = conn->peer->srx.transport_len; msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; @@ -704,16 +704,15 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn) serial = atomic_inc_return(&conn->serial); whdr.serial = htonl(serial); - _proto("Tx CHALLENGE %%%u", serial); - ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); + ret = kernel_sendmsg(conn->local->socket, &msg, iov, 2, len); if (ret < 0) { trace_rxrpc_tx_fail(conn->debug_id, serial, ret, rxrpc_tx_point_rxkad_challenge); return -EAGAIN; } - conn->params.peer->last_tx_at = ktime_get_seconds(); + conn->peer->last_tx_at = ktime_get_seconds(); trace_rxrpc_tx_packet(conn->debug_id, &whdr, rxrpc_tx_point_rxkad_challenge); _leave(" = 0"); @@ -737,8 +736,8 @@ static int rxkad_send_response(struct rxrpc_connection *conn, _enter(""); - msg.msg_name = &conn->params.peer->srx.transport; - msg.msg_namelen = conn->params.peer->srx.transport_len; + msg.msg_name = &conn->peer->srx.transport; + msg.msg_namelen = conn->peer->srx.transport_len; msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; @@ -762,16 +761,15 @@ static int rxkad_send_response(struct rxrpc_connection *conn, serial = atomic_inc_return(&conn->serial); whdr.serial = htonl(serial); - _proto("Tx RESPONSE %%%u", serial); - ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 3, len); + ret = kernel_sendmsg(conn->local->socket, &msg, iov, 3, len); if (ret < 0) { trace_rxrpc_tx_fail(conn->debug_id, serial, ret, rxrpc_tx_point_rxkad_response); return -EAGAIN; } - conn->params.peer->last_tx_at = ktime_get_seconds(); + conn->peer->last_tx_at = ktime_get_seconds(); _leave(" = 0"); return 0; } @@ -834,15 +832,15 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, u32 version, nonce, min_level, abort_code; int ret; - _enter("{%d,%x}", conn->debug_id, key_serial(conn->params.key)); + _enter("{%d,%x}", conn->debug_id, key_serial(conn->key)); eproto = tracepoint_string("chall_no_key"); abort_code = RX_PROTOCOL_ERROR; - if (!conn->params.key) + if (!conn->key) goto protocol_error; abort_code = RXKADEXPIRED; - ret = key_validate(conn->params.key); + ret = key_validate(conn->key); if (ret < 0) goto other_error; @@ -856,8 +854,7 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, nonce = ntohl(challenge.nonce); min_level = ntohl(challenge.min_level); - _proto("Rx CHALLENGE %%%u { v=%u n=%u ml=%u }", - sp->hdr.serial, version, nonce, min_level); + trace_rxrpc_rx_challenge(conn, sp->hdr.serial, version, nonce, min_level); eproto = tracepoint_string("chall_ver"); abort_code = RXKADINCONSISTENCY; @@ -866,10 +863,10 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, abort_code = RXKADLEVELFAIL; ret = -EACCES; - if (conn->params.security_level < min_level) + if (conn->security_level < min_level) goto other_error; - token = conn->params.key->payload.data[0]; + token = conn->key->payload.data[0]; /* build the response packet */ resp = kzalloc(sizeof(struct rxkad_response), GFP_NOFS); @@ -881,7 +878,7 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, resp->encrypted.cid = htonl(conn->proto.cid); resp->encrypted.securityIndex = htonl(conn->security_ix); resp->encrypted.inc_nonce = htonl(nonce + 1); - resp->encrypted.level = htonl(conn->params.security_level); + resp->encrypted.level = htonl(conn->security_level); resp->kvno = htonl(token->kad->kvno); resp->ticket_len = htonl(token->kad->ticket_len); resp->encrypted.call_id[0] = htonl(conn->channels[0].call_counter); @@ -1139,8 +1136,8 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, version = ntohl(response->version); ticket_len = ntohl(response->ticket_len); kvno = ntohl(response->kvno); - _proto("Rx RESPONSE %%%u { v=%u kv=%u tl=%u }", - sp->hdr.serial, version, kvno, ticket_len); + + trace_rxrpc_rx_response(conn, sp->hdr.serial, version, kvno, ticket_len); eproto = tracepoint_string("rxkad_rsp_ver"); abort_code = RXKADINCONSISTENCY; @@ -1229,7 +1226,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, level = ntohl(response->encrypted.level); if (level > RXRPC_SECURITY_ENCRYPT) goto protocol_error_free; - conn->params.security_level = level; + conn->security_level = level; /* create a key to hold the security data and expiration time - after * this the connection security can be handled in exactly the same way diff --git a/net/rxrpc/rxperf.c b/net/rxrpc/rxperf.c new file mode 100644 index 000000000000..66f5eea291ff --- /dev/null +++ b/net/rxrpc/rxperf.c @@ -0,0 +1,619 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* In-kernel rxperf server for testing purposes. + * + * Copyright (C) 2022 Red Hat, Inc. All Rights Reserved. + * Written by David Howells ([email protected]) + */ + +#define pr_fmt(fmt) "rxperf: " fmt +#include <linux/module.h> +#include <linux/slab.h> +#include <net/sock.h> +#include <net/af_rxrpc.h> + +MODULE_DESCRIPTION("rxperf test server (afs)"); +MODULE_AUTHOR("Red Hat, Inc."); +MODULE_LICENSE("GPL"); + +#define RXPERF_PORT 7009 +#define RX_PERF_SERVICE 147 +#define RX_PERF_VERSION 3 +#define RX_PERF_SEND 0 +#define RX_PERF_RECV 1 +#define RX_PERF_RPC 3 +#define RX_PERF_FILE 4 +#define RX_PERF_MAGIC_COOKIE 0x4711 + +struct rxperf_proto_params { + __be32 version; + __be32 type; + __be32 rsize; + __be32 wsize; +} __packed; + +static const u8 rxperf_magic_cookie[] = { 0x00, 0x00, 0x47, 0x11 }; +static const u8 secret[8] = { 0xa7, 0x83, 0x8a, 0xcb, 0xc7, 0x83, 0xec, 0x94 }; + +enum rxperf_call_state { + RXPERF_CALL_SV_AWAIT_PARAMS, /* Server: Awaiting parameter block */ + RXPERF_CALL_SV_AWAIT_REQUEST, /* Server: Awaiting request data */ + RXPERF_CALL_SV_REPLYING, /* Server: Replying */ + RXPERF_CALL_SV_AWAIT_ACK, /* Server: Awaiting final ACK */ + RXPERF_CALL_COMPLETE, /* Completed or failed */ +}; + +struct rxperf_call { + struct rxrpc_call *rxcall; + struct iov_iter iter; + struct kvec kvec[1]; + struct work_struct work; + const char *type; + size_t iov_len; + size_t req_len; /* Size of request blob */ + size_t reply_len; /* Size of reply blob */ + unsigned int debug_id; + unsigned int operation_id; + struct rxperf_proto_params params; + __be32 tmp[2]; + s32 abort_code; + enum rxperf_call_state state; + short error; + unsigned short unmarshal; + u16 service_id; + int (*deliver)(struct rxperf_call *call); + void (*processor)(struct work_struct *work); +}; + +static struct socket *rxperf_socket; +static struct key *rxperf_sec_keyring; /* Ring of security/crypto keys */ +static struct workqueue_struct *rxperf_workqueue; + +static void rxperf_deliver_to_call(struct work_struct *work); +static int rxperf_deliver_param_block(struct rxperf_call *call); +static int rxperf_deliver_request(struct rxperf_call *call); +static int rxperf_process_call(struct rxperf_call *call); +static void rxperf_charge_preallocation(struct work_struct *work); + +static DECLARE_WORK(rxperf_charge_preallocation_work, + rxperf_charge_preallocation); + +static inline void rxperf_set_call_state(struct rxperf_call *call, + enum rxperf_call_state to) +{ + call->state = to; +} + +static inline void rxperf_set_call_complete(struct rxperf_call *call, + int error, s32 remote_abort) +{ + if (call->state != RXPERF_CALL_COMPLETE) { + call->abort_code = remote_abort; + call->error = error; + call->state = RXPERF_CALL_COMPLETE; + } +} + +static void rxperf_rx_discard_new_call(struct rxrpc_call *rxcall, + unsigned long user_call_ID) +{ + kfree((struct rxperf_call *)user_call_ID); +} + +static void rxperf_rx_new_call(struct sock *sk, struct rxrpc_call *rxcall, + unsigned long user_call_ID) +{ + queue_work(rxperf_workqueue, &rxperf_charge_preallocation_work); +} + +static void rxperf_queue_call_work(struct rxperf_call *call) +{ + queue_work(rxperf_workqueue, &call->work); +} + +static void rxperf_notify_rx(struct sock *sk, struct rxrpc_call *rxcall, + unsigned long call_user_ID) +{ + struct rxperf_call *call = (struct rxperf_call *)call_user_ID; + + if (call->state != RXPERF_CALL_COMPLETE) + rxperf_queue_call_work(call); +} + +static void rxperf_rx_attach(struct rxrpc_call *rxcall, unsigned long user_call_ID) +{ + struct rxperf_call *call = (struct rxperf_call *)user_call_ID; + + call->rxcall = rxcall; +} + +static void rxperf_notify_end_reply_tx(struct sock *sock, + struct rxrpc_call *rxcall, + unsigned long call_user_ID) +{ + rxperf_set_call_state((struct rxperf_call *)call_user_ID, + RXPERF_CALL_SV_AWAIT_ACK); +} + +/* + * Charge the incoming call preallocation. + */ +static void rxperf_charge_preallocation(struct work_struct *work) +{ + struct rxperf_call *call; + + for (;;) { + call = kzalloc(sizeof(*call), GFP_KERNEL); + if (!call) + break; + + call->type = "unset"; + call->debug_id = atomic_inc_return(&rxrpc_debug_id); + call->deliver = rxperf_deliver_param_block; + call->state = RXPERF_CALL_SV_AWAIT_PARAMS; + call->service_id = RX_PERF_SERVICE; + call->iov_len = sizeof(call->params); + call->kvec[0].iov_len = sizeof(call->params); + call->kvec[0].iov_base = &call->params; + iov_iter_kvec(&call->iter, READ, call->kvec, 1, call->iov_len); + INIT_WORK(&call->work, rxperf_deliver_to_call); + + if (rxrpc_kernel_charge_accept(rxperf_socket, + rxperf_notify_rx, + rxperf_rx_attach, + (unsigned long)call, + GFP_KERNEL, + call->debug_id) < 0) + break; + call = NULL; + } + + kfree(call); +} + +/* + * Open an rxrpc socket and bind it to be a server for callback notifications + * - the socket is left in blocking mode and non-blocking ops use MSG_DONTWAIT + */ +static int rxperf_open_socket(void) +{ + struct sockaddr_rxrpc srx; + struct socket *socket; + int ret; + + ret = sock_create_kern(&init_net, AF_RXRPC, SOCK_DGRAM, PF_INET6, + &socket); + if (ret < 0) + goto error_1; + + socket->sk->sk_allocation = GFP_NOFS; + + /* bind the callback manager's address to make this a server socket */ + memset(&srx, 0, sizeof(srx)); + srx.srx_family = AF_RXRPC; + srx.srx_service = RX_PERF_SERVICE; + srx.transport_type = SOCK_DGRAM; + srx.transport_len = sizeof(srx.transport.sin6); + srx.transport.sin6.sin6_family = AF_INET6; + srx.transport.sin6.sin6_port = htons(RXPERF_PORT); + + ret = rxrpc_sock_set_min_security_level(socket->sk, + RXRPC_SECURITY_ENCRYPT); + if (ret < 0) + goto error_2; + + ret = rxrpc_sock_set_security_keyring(socket->sk, rxperf_sec_keyring); + + ret = kernel_bind(socket, (struct sockaddr *)&srx, sizeof(srx)); + if (ret < 0) + goto error_2; + + rxrpc_kernel_new_call_notification(socket, rxperf_rx_new_call, + rxperf_rx_discard_new_call); + + ret = kernel_listen(socket, INT_MAX); + if (ret < 0) + goto error_2; + + rxperf_socket = socket; + rxperf_charge_preallocation(&rxperf_charge_preallocation_work); + return 0; + +error_2: + sock_release(socket); +error_1: + pr_err("Can't set up rxperf socket: %d\n", ret); + return ret; +} + +/* + * close the rxrpc socket rxperf was using + */ +static void rxperf_close_socket(void) +{ + kernel_listen(rxperf_socket, 0); + kernel_sock_shutdown(rxperf_socket, SHUT_RDWR); + flush_workqueue(rxperf_workqueue); + sock_release(rxperf_socket); +} + +/* + * Log remote abort codes that indicate that we have a protocol disagreement + * with the server. + */ +static void rxperf_log_error(struct rxperf_call *call, s32 remote_abort) +{ + static int max = 0; + const char *msg; + int m; + + switch (remote_abort) { + case RX_EOF: msg = "unexpected EOF"; break; + case RXGEN_CC_MARSHAL: msg = "client marshalling"; break; + case RXGEN_CC_UNMARSHAL: msg = "client unmarshalling"; break; + case RXGEN_SS_MARSHAL: msg = "server marshalling"; break; + case RXGEN_SS_UNMARSHAL: msg = "server unmarshalling"; break; + case RXGEN_DECODE: msg = "opcode decode"; break; + case RXGEN_SS_XDRFREE: msg = "server XDR cleanup"; break; + case RXGEN_CC_XDRFREE: msg = "client XDR cleanup"; break; + case -32: msg = "insufficient data"; break; + default: + return; + } + + m = max; + if (m < 3) { + max = m + 1; + pr_info("Peer reported %s failure on %s\n", msg, call->type); + } +} + +/* + * deliver messages to a call + */ +static void rxperf_deliver_to_call(struct work_struct *work) +{ + struct rxperf_call *call = container_of(work, struct rxperf_call, work); + enum rxperf_call_state state; + u32 abort_code, remote_abort = 0; + int ret; + + if (call->state == RXPERF_CALL_COMPLETE) + return; + + while (state = call->state, + state == RXPERF_CALL_SV_AWAIT_PARAMS || + state == RXPERF_CALL_SV_AWAIT_REQUEST || + state == RXPERF_CALL_SV_AWAIT_ACK + ) { + if (state == RXPERF_CALL_SV_AWAIT_ACK) { + if (!rxrpc_kernel_check_life(rxperf_socket, call->rxcall)) + goto call_complete; + return; + } + + ret = call->deliver(call); + if (ret == 0) + ret = rxperf_process_call(call); + + switch (ret) { + case 0: + continue; + case -EINPROGRESS: + case -EAGAIN: + return; + case -ECONNABORTED: + rxperf_log_error(call, call->abort_code); + goto call_complete; + case -EOPNOTSUPP: + abort_code = RXGEN_OPCODE; + rxrpc_kernel_abort_call(rxperf_socket, call->rxcall, + abort_code, ret, "GOP"); + goto call_complete; + case -ENOTSUPP: + abort_code = RX_USER_ABORT; + rxrpc_kernel_abort_call(rxperf_socket, call->rxcall, + abort_code, ret, "GUA"); + goto call_complete; + case -EIO: + pr_err("Call %u in bad state %u\n", + call->debug_id, call->state); + fallthrough; + case -ENODATA: + case -EBADMSG: + case -EMSGSIZE: + case -ENOMEM: + case -EFAULT: + rxrpc_kernel_abort_call(rxperf_socket, call->rxcall, + RXGEN_SS_UNMARSHAL, ret, "GUM"); + goto call_complete; + default: + rxrpc_kernel_abort_call(rxperf_socket, call->rxcall, + RX_CALL_DEAD, ret, "GER"); + goto call_complete; + } + } + +call_complete: + rxperf_set_call_complete(call, ret, remote_abort); + /* The call may have been requeued */ + rxrpc_kernel_end_call(rxperf_socket, call->rxcall); + cancel_work(&call->work); + kfree(call); +} + +/* + * Extract a piece of data from the received data socket buffers. + */ +static int rxperf_extract_data(struct rxperf_call *call, bool want_more) +{ + u32 remote_abort = 0; + int ret; + + ret = rxrpc_kernel_recv_data(rxperf_socket, call->rxcall, &call->iter, + &call->iov_len, want_more, &remote_abort, + &call->service_id); + pr_debug("Extract i=%zu l=%zu m=%u ret=%d\n", + iov_iter_count(&call->iter), call->iov_len, want_more, ret); + if (ret == 0 || ret == -EAGAIN) + return ret; + + if (ret == 1) { + switch (call->state) { + case RXPERF_CALL_SV_AWAIT_REQUEST: + rxperf_set_call_state(call, RXPERF_CALL_SV_REPLYING); + break; + case RXPERF_CALL_COMPLETE: + pr_debug("premature completion %d", call->error); + return call->error; + default: + break; + } + return 0; + } + + rxperf_set_call_complete(call, ret, remote_abort); + return ret; +} + +/* + * Grab the operation ID from an incoming manager call. + */ +static int rxperf_deliver_param_block(struct rxperf_call *call) +{ + u32 version; + int ret; + + /* Extract the parameter block */ + ret = rxperf_extract_data(call, true); + if (ret < 0) + return ret; + + version = ntohl(call->params.version); + call->operation_id = ntohl(call->params.type); + call->deliver = rxperf_deliver_request; + + if (version != RX_PERF_VERSION) { + pr_info("Version mismatch %x\n", version); + return -ENOTSUPP; + } + + switch (call->operation_id) { + case RX_PERF_SEND: + call->type = "send"; + call->reply_len = 0; + call->iov_len = 4; /* Expect req size */ + break; + case RX_PERF_RECV: + call->type = "recv"; + call->req_len = 0; + call->iov_len = 4; /* Expect reply size */ + break; + case RX_PERF_RPC: + call->type = "rpc"; + call->iov_len = 8; /* Expect req size and reply size */ + break; + case RX_PERF_FILE: + call->type = "file"; + fallthrough; + default: + return -EOPNOTSUPP; + } + + rxperf_set_call_state(call, RXPERF_CALL_SV_AWAIT_REQUEST); + return call->deliver(call); +} + +/* + * Deliver the request data. + */ +static int rxperf_deliver_request(struct rxperf_call *call) +{ + int ret; + + switch (call->unmarshal) { + case 0: + call->kvec[0].iov_len = call->iov_len; + call->kvec[0].iov_base = call->tmp; + iov_iter_kvec(&call->iter, READ, call->kvec, 1, call->iov_len); + call->unmarshal++; + fallthrough; + case 1: + ret = rxperf_extract_data(call, true); + if (ret < 0) + return ret; + + switch (call->operation_id) { + case RX_PERF_SEND: + call->type = "send"; + call->req_len = ntohl(call->tmp[0]); + call->reply_len = 0; + break; + case RX_PERF_RECV: + call->type = "recv"; + call->req_len = 0; + call->reply_len = ntohl(call->tmp[0]); + break; + case RX_PERF_RPC: + call->type = "rpc"; + call->req_len = ntohl(call->tmp[0]); + call->reply_len = ntohl(call->tmp[1]); + break; + default: + pr_info("Can't parse extra params\n"); + return -EIO; + } + + pr_debug("CALL op=%s rq=%zx rp=%zx\n", + call->type, call->req_len, call->reply_len); + + call->iov_len = call->req_len; + iov_iter_discard(&call->iter, READ, call->req_len); + call->unmarshal++; + fallthrough; + case 2: + ret = rxperf_extract_data(call, false); + if (ret < 0) + return ret; + call->unmarshal++; + fallthrough; + default: + return 0; + } +} + +/* + * Process a call for which we've received the request. + */ +static int rxperf_process_call(struct rxperf_call *call) +{ + struct msghdr msg = {}; + struct bio_vec bv[1]; + struct kvec iov[1]; + ssize_t n; + size_t reply_len = call->reply_len, len; + + rxrpc_kernel_set_tx_length(rxperf_socket, call->rxcall, + reply_len + sizeof(rxperf_magic_cookie)); + + while (reply_len > 0) { + len = min_t(size_t, reply_len, PAGE_SIZE); + bv[0].bv_page = ZERO_PAGE(0); + bv[0].bv_offset = 0; + bv[0].bv_len = len; + iov_iter_bvec(&msg.msg_iter, WRITE, bv, 1, len); + msg.msg_flags = MSG_MORE; + n = rxrpc_kernel_send_data(rxperf_socket, call->rxcall, &msg, + len, rxperf_notify_end_reply_tx); + if (n < 0) + return n; + if (n == 0) + return -EIO; + reply_len -= n; + } + + len = sizeof(rxperf_magic_cookie); + iov[0].iov_base = (void *)rxperf_magic_cookie; + iov[0].iov_len = len; + iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, len); + msg.msg_flags = 0; + n = rxrpc_kernel_send_data(rxperf_socket, call->rxcall, &msg, len, + rxperf_notify_end_reply_tx); + if (n >= 0) + return 0; /* Success */ + + if (n == -ENOMEM) + rxrpc_kernel_abort_call(rxperf_socket, call->rxcall, + RXGEN_SS_MARSHAL, -ENOMEM, "GOM"); + return n; +} + +/* + * Add a key to the security keyring. + */ +static int rxperf_add_key(struct key *keyring) +{ + key_ref_t kref; + int ret; + + kref = key_create_or_update(make_key_ref(keyring, true), + "rxrpc_s", + __stringify(RX_PERF_SERVICE) ":2", + secret, + sizeof(secret), + KEY_POS_VIEW | KEY_POS_READ | KEY_POS_SEARCH + | KEY_USR_VIEW, + KEY_ALLOC_NOT_IN_QUOTA); + + if (IS_ERR(kref)) { + pr_err("Can't allocate rxperf server key: %ld\n", PTR_ERR(kref)); + return PTR_ERR(kref); + } + + ret = key_link(keyring, key_ref_to_ptr(kref)); + if (ret < 0) + pr_err("Can't link rxperf server key: %d\n", ret); + key_ref_put(kref); + return ret; +} + +/* + * Initialise the rxperf server. + */ +static int __init rxperf_init(void) +{ + struct key *keyring; + int ret = -ENOMEM; + + pr_info("Server registering\n"); + + rxperf_workqueue = alloc_workqueue("rxperf", 0, 0); + if (!rxperf_workqueue) + goto error_workqueue; + + keyring = keyring_alloc("rxperf_server", + GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, current_cred(), + KEY_POS_VIEW | KEY_POS_READ | KEY_POS_SEARCH | + KEY_POS_WRITE | + KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH | + KEY_USR_WRITE | + KEY_OTH_VIEW | KEY_OTH_READ | KEY_OTH_SEARCH, + KEY_ALLOC_NOT_IN_QUOTA, + NULL, NULL); + if (IS_ERR(keyring)) { + pr_err("Can't allocate rxperf server keyring: %ld\n", + PTR_ERR(keyring)); + goto error_keyring; + } + rxperf_sec_keyring = keyring; + ret = rxperf_add_key(keyring); + if (ret < 0) + goto error_key; + + ret = rxperf_open_socket(); + if (ret < 0) + goto error_socket; + return 0; + +error_socket: +error_key: + key_put(rxperf_sec_keyring); +error_keyring: + destroy_workqueue(rxperf_workqueue); + rcu_barrier(); +error_workqueue: + pr_err("Failed to register: %d\n", ret); + return ret; +} +late_initcall(rxperf_init); /* Must be called after net/ to create socket */ + +static void __exit rxperf_exit(void) +{ + pr_info("Server unregistering.\n"); + + rxperf_close_socket(); + key_put(rxperf_sec_keyring); + destroy_workqueue(rxperf_workqueue); + rcu_barrier(); +} +module_exit(rxperf_exit); + diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c index 50cb5f1ee0c0..209f2c25a0da 100644 --- a/net/rxrpc/security.c +++ b/net/rxrpc/security.c @@ -63,13 +63,43 @@ const struct rxrpc_security *rxrpc_security_lookup(u8 security_index) } /* + * Initialise the security on a client call. + */ +int rxrpc_init_client_call_security(struct rxrpc_call *call) +{ + const struct rxrpc_security *sec; + struct rxrpc_key_token *token; + struct key *key = call->key; + int ret; + + if (!key) + return 0; + + ret = key_validate(key); + if (ret < 0) + return ret; + + for (token = key->payload.data[0]; token; token = token->next) { + sec = rxrpc_security_lookup(token->security_index); + if (sec) + goto found; + } + return -EKEYREJECTED; + +found: + call->security = sec; + _leave(" = 0"); + return 0; +} + +/* * initialise the security on a client connection */ int rxrpc_init_client_conn_security(struct rxrpc_connection *conn) { const struct rxrpc_security *sec; struct rxrpc_key_token *token; - struct key *key = conn->params.key; + struct key *key = conn->key; int ret; _enter("{%d},{%x}", conn->debug_id, key_serial(key)); @@ -163,7 +193,7 @@ struct key *rxrpc_look_up_server_security(struct rxrpc_connection *conn, rcu_read_lock(); - rx = rcu_dereference(conn->params.local->service); + rx = rcu_dereference(conn->local->service); if (!rx) goto out; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index e5fd8a95bf71..9fa7e37f7155 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -22,30 +22,9 @@ */ static bool rxrpc_check_tx_space(struct rxrpc_call *call, rxrpc_seq_t *_tx_win) { - unsigned int win_size; - rxrpc_seq_t tx_win = smp_load_acquire(&call->acks_hard_ack); - - /* If we haven't transmitted anything for >1RTT, we should reset the - * congestion management state. - */ - if (ktime_before(ktime_add_us(call->tx_last_sent, - call->peer->srtt_us >> 3), - ktime_get_real())) { - if (RXRPC_TX_SMSS > 2190) - win_size = 2; - else if (RXRPC_TX_SMSS > 1095) - win_size = 3; - else - win_size = 4; - win_size += call->cong_extra; - } else { - win_size = min_t(unsigned int, call->tx_winsize, - call->cong_cwnd + call->cong_extra); - } - if (_tx_win) - *_tx_win = tx_win; - return call->tx_top - tx_win < win_size; + *_tx_win = call->tx_bottom; + return call->tx_prepared - call->tx_bottom < 256; } /* @@ -66,11 +45,6 @@ static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx, if (signal_pending(current)) return sock_intr_errno(*timeo); - if (READ_ONCE(call->acks_hard_ack) != call->tx_bottom) { - rxrpc_shrink_call_tx_buffer(call); - continue; - } - trace_rxrpc_txqueue(call, rxrpc_txqueue_wait); *timeo = schedule_timeout(*timeo); } @@ -107,11 +81,6 @@ static int rxrpc_wait_for_tx_window_waitall(struct rxrpc_sock *rx, tx_win == tx_start && signal_pending(current)) return -EINTR; - if (READ_ONCE(call->acks_hard_ack) != call->tx_bottom) { - rxrpc_shrink_call_tx_buffer(call); - continue; - } - if (tx_win != tx_start) { timeout = rtt; tx_start = tx_win; @@ -137,11 +106,6 @@ static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx, if (call->state >= RXRPC_CALL_COMPLETE) return call->error; - if (READ_ONCE(call->acks_hard_ack) != call->tx_bottom) { - rxrpc_shrink_call_tx_buffer(call); - continue; - } - trace_rxrpc_txqueue(call, rxrpc_txqueue_wait); *timeo = schedule_timeout(*timeo); } @@ -206,33 +170,32 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, { unsigned long now; rxrpc_seq_t seq = txb->seq; - bool last = test_bit(RXRPC_TXBUF_LAST, &txb->flags); - int ret; + bool last = test_bit(RXRPC_TXBUF_LAST, &txb->flags), poke; rxrpc_inc_stat(call->rxnet, stat_tx_data); - ASSERTCMP(seq, ==, call->tx_top + 1); + ASSERTCMP(txb->seq, ==, call->tx_prepared + 1); /* We have to set the timestamp before queueing as the retransmit * algorithm can see the packet as soon as we queue it. */ txb->last_sent = ktime_get_real(); - /* Add the packet to the call's output buffer */ - rxrpc_get_txbuf(txb, rxrpc_txbuf_get_buffer); - spin_lock(&call->tx_lock); - list_add_tail(&txb->call_link, &call->tx_buffer); - call->tx_top = seq; - spin_unlock(&call->tx_lock); - if (last) trace_rxrpc_txqueue(call, rxrpc_txqueue_queue_last); else trace_rxrpc_txqueue(call, rxrpc_txqueue_queue); + /* Add the packet to the call's output buffer */ + spin_lock(&call->tx_lock); + poke = list_empty(&call->tx_sendmsg); + list_add_tail(&txb->call_link, &call->tx_sendmsg); + call->tx_prepared = seq; + spin_unlock(&call->tx_lock); + if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) { _debug("________awaiting reply/ACK__________"); - write_lock_bh(&call->state_lock); + write_lock(&call->state_lock); switch (call->state) { case RXRPC_CALL_CLIENT_SEND_REQUEST: call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY; @@ -255,33 +218,11 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, default: break; } - write_unlock_bh(&call->state_lock); + write_unlock(&call->state_lock); } - if (seq == 1 && rxrpc_is_client_call(call)) - rxrpc_expose_client_call(call); - - ret = rxrpc_send_data_packet(call, txb); - if (ret < 0) { - switch (ret) { - case -ENETUNREACH: - case -EHOSTUNREACH: - case -ECONNREFUSED: - rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, - 0, ret); - goto out; - } - } else { - unsigned long now = jiffies; - unsigned long resend_at = now + call->peer->rto_j; - - WRITE_ONCE(call->resend_at, resend_at); - rxrpc_reduce_call_timer(call, resend_at, now, - rxrpc_timer_set_for_send); - } - -out: - rxrpc_put_txbuf(txb, rxrpc_txbuf_put_trans); + if (poke) + rxrpc_poke_call(call, rxrpc_call_poke_start); } /* @@ -335,8 +276,6 @@ reload: rxrpc_see_txbuf(txb, rxrpc_txbuf_see_send_more); do { - rxrpc_transmit_ack_packets(call->peer->local); - if (!txb) { size_t remain, bufsize, chunk, offset; @@ -416,10 +355,10 @@ reload: success: ret = copied; if (READ_ONCE(call->state) == RXRPC_CALL_COMPLETE) { - read_lock_bh(&call->state_lock); + read_lock(&call->state_lock); if (call->error < 0) ret = call->error; - read_unlock_bh(&call->state_lock); + read_unlock(&call->state_lock); } out: call->tx_pending = txb; @@ -604,7 +543,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, atomic_inc_return(&rxrpc_debug_id)); /* The socket is now unlocked */ - rxrpc_put_peer(cp.peer); + rxrpc_put_peer(cp.peer, rxrpc_peer_put_discard_tmp); _leave(" = %p\n", call); return call; } @@ -667,7 +606,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) case RXRPC_CALL_CLIENT_AWAIT_CONN: case RXRPC_CALL_SERVER_PREALLOC: case RXRPC_CALL_SERVER_SECURING: - rxrpc_put_call(call, rxrpc_call_put); + rxrpc_put_call(call, rxrpc_call_put_sendmsg); ret = -EBUSY; goto error_release_sock; default: @@ -737,7 +676,7 @@ out_put_unlock: if (!dropped_lock) mutex_unlock(&call->user_mutex); error_put: - rxrpc_put_call(call, rxrpc_call_put); + rxrpc_put_call(call, rxrpc_call_put_sendmsg); _leave(" = %d", ret); return ret; @@ -784,9 +723,9 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call, notify_end_tx, &dropped_lock); break; case RXRPC_CALL_COMPLETE: - read_lock_bh(&call->state_lock); + read_lock(&call->state_lock); ret = call->error; - read_unlock_bh(&call->state_lock); + read_unlock(&call->state_lock); break; default: /* Request phase complete for this client call */ diff --git a/net/rxrpc/server_key.c b/net/rxrpc/server_key.c index ee269e0e6ee8..e51940589ee5 100644 --- a/net/rxrpc/server_key.c +++ b/net/rxrpc/server_key.c @@ -144,3 +144,28 @@ int rxrpc_server_keyring(struct rxrpc_sock *rx, sockptr_t optval, int optlen) _leave(" = 0 [key %x]", key->serial); return 0; } + +/** + * rxrpc_sock_set_security_keyring - Set the security keyring for a kernel service + * @sk: The socket to set the keyring on + * @keyring: The keyring to set + * + * Set the server security keyring on an rxrpc socket. This is used to provide + * the encryption keys for a kernel service. + */ +int rxrpc_sock_set_security_keyring(struct sock *sk, struct key *keyring) +{ + struct rxrpc_sock *rx = rxrpc_sk(sk); + int ret = 0; + + lock_sock(sk); + if (rx->securities) + ret = -EINVAL; + else if (rx->sk.sk_state != RXRPC_UNBOUND) + ret = -EISCONN; + else + rx->securities = key_get(keyring); + release_sock(sk); + return ret; +} +EXPORT_SYMBOL(rxrpc_sock_set_security_keyring); diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index 0c827d5bb2b8..ebe0c75e7b07 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-or-later -/* ar-skbuff.c: socket buffer destruction handling +/* Socket buffer accounting * * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. * Written by David Howells ([email protected]) @@ -19,56 +19,50 @@ /* * Note the allocation or reception of a socket buffer. */ -void rxrpc_new_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) +void rxrpc_new_skb(struct sk_buff *skb, enum rxrpc_skb_trace why) { - const void *here = __builtin_return_address(0); int n = atomic_inc_return(select_skb_count(skb)); - trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here); + trace_rxrpc_skb(skb, refcount_read(&skb->users), n, why); } /* * Note the re-emergence of a socket buffer from a queue or buffer. */ -void rxrpc_see_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) +void rxrpc_see_skb(struct sk_buff *skb, enum rxrpc_skb_trace why) { - const void *here = __builtin_return_address(0); if (skb) { int n = atomic_read(select_skb_count(skb)); - trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here); + trace_rxrpc_skb(skb, refcount_read(&skb->users), n, why); } } /* * Note the addition of a ref on a socket buffer. */ -void rxrpc_get_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) +void rxrpc_get_skb(struct sk_buff *skb, enum rxrpc_skb_trace why) { - const void *here = __builtin_return_address(0); int n = atomic_inc_return(select_skb_count(skb)); - trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here); + trace_rxrpc_skb(skb, refcount_read(&skb->users), n, why); skb_get(skb); } /* * Note the dropping of a ref on a socket buffer by the core. */ -void rxrpc_eaten_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) +void rxrpc_eaten_skb(struct sk_buff *skb, enum rxrpc_skb_trace why) { - const void *here = __builtin_return_address(0); int n = atomic_inc_return(&rxrpc_n_rx_skbs); - trace_rxrpc_skb(skb, op, 0, n, here); + trace_rxrpc_skb(skb, 0, n, why); } /* * Note the destruction of a socket buffer. */ -void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) +void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace why) { - const void *here = __builtin_return_address(0); if (skb) { - int n; - n = atomic_dec_return(select_skb_count(skb)); - trace_rxrpc_skb(skb, op, refcount_read(&skb->users), n, here); + int n = atomic_dec_return(select_skb_count(skb)); + trace_rxrpc_skb(skb, refcount_read(&skb->users), n, why); kfree_skb(skb); } } @@ -78,12 +72,12 @@ void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) */ void rxrpc_purge_queue(struct sk_buff_head *list) { - const void *here = __builtin_return_address(0); struct sk_buff *skb; + while ((skb = skb_dequeue((list))) != NULL) { int n = atomic_dec_return(select_skb_count(skb)); - trace_rxrpc_skb(skb, rxrpc_skb_purged, - refcount_read(&skb->users), n, here); + trace_rxrpc_skb(skb, refcount_read(&skb->users), n, + rxrpc_skb_put_purge); kfree_skb(skb); } } diff --git a/net/rxrpc/txbuf.c b/net/rxrpc/txbuf.c index 96bfee89927b..d2cf2aac3adb 100644 --- a/net/rxrpc/txbuf.c +++ b/net/rxrpc/txbuf.c @@ -26,7 +26,6 @@ struct rxrpc_txbuf *rxrpc_alloc_txbuf(struct rxrpc_call *call, u8 packet_type, INIT_LIST_HEAD(&txb->call_link); INIT_LIST_HEAD(&txb->tx_link); refcount_set(&txb->ref, 1); - txb->call = call; txb->call_debug_id = call->debug_id; txb->debug_id = atomic_inc_return(&rxrpc_txbuf_debug_ids); txb->space = sizeof(txb->data); @@ -34,7 +33,7 @@ struct rxrpc_txbuf *rxrpc_alloc_txbuf(struct rxrpc_call *call, u8 packet_type, txb->offset = 0; txb->flags = 0; txb->ack_why = 0; - txb->seq = call->tx_top + 1; + txb->seq = call->tx_prepared + 1; txb->wire.epoch = htonl(call->conn->proto.epoch); txb->wire.cid = htonl(call->cid); txb->wire.callNumber = htonl(call->call_id); @@ -44,7 +43,7 @@ struct rxrpc_txbuf *rxrpc_alloc_txbuf(struct rxrpc_call *call, u8 packet_type, txb->wire.userStatus = 0; txb->wire.securityIndex = call->security_ix; txb->wire._rsvd = 0; - txb->wire.serviceId = htons(call->service_id); + txb->wire.serviceId = htons(call->dest_srx.srx_service); trace_rxrpc_txbuf(txb->debug_id, txb->call_debug_id, txb->seq, 1, @@ -107,6 +106,7 @@ void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *call) { struct rxrpc_txbuf *txb; rxrpc_seq_t hard_ack = smp_load_acquire(&call->acks_hard_ack); + bool wake = false; _enter("%x/%x/%x", call->tx_bottom, call->acks_hard_ack, call->tx_top); @@ -120,8 +120,10 @@ void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *call) if (before(hard_ack, txb->seq)) break; + if (txb->seq != call->tx_bottom + 1) + rxrpc_see_txbuf(txb, rxrpc_txbuf_see_out_of_step); ASSERTCMP(txb->seq, ==, call->tx_bottom + 1); - call->tx_bottom++; + smp_store_release(&call->tx_bottom, call->tx_bottom + 1); list_del_rcu(&txb->call_link); trace_rxrpc_txqueue(call, rxrpc_txqueue_dequeue); @@ -129,7 +131,12 @@ void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *call) spin_unlock(&call->tx_lock); rxrpc_put_txbuf(txb, rxrpc_txbuf_put_rotated); + if (after(call->acks_hard_ack, call->tx_bottom + 128)) + wake = true; } spin_unlock(&call->tx_lock); + + if (wake) + wake_up(&call->waitq); } diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 9b31a10cc639..5b3c0ac495be 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -23,6 +23,7 @@ #include <net/act_api.h> #include <net/netlink.h> #include <net/flow_offload.h> +#include <net/tc_wrapper.h> #ifdef CONFIG_INET DEFINE_STATIC_KEY_FALSE(tcf_frag_xmit_count); @@ -1080,7 +1081,7 @@ restart_act_graph: repeat_ttl = 32; repeat: - ret = a->ops->act(skb, a, res); + ret = tc_act(skb, a, res); if (unlikely(ret == TC_ACT_REPEAT)) { if (--repeat_ttl != 0) goto repeat; diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index b79eee44e24e..b0455fda7d0b 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -18,6 +18,7 @@ #include <linux/tc_act/tc_bpf.h> #include <net/tc_act/tc_bpf.h> +#include <net/tc_wrapper.h> #define ACT_BPF_NAME_LEN 256 @@ -31,8 +32,9 @@ struct tcf_bpf_cfg { static struct tc_action_ops act_bpf_ops; -static int tcf_bpf_act(struct sk_buff *skb, const struct tc_action *act, - struct tcf_result *res) +TC_INDIRECT_SCOPE int tcf_bpf_act(struct sk_buff *skb, + const struct tc_action *act, + struct tcf_result *res) { bool at_ingress = skb_at_tc_ingress(skb); struct tcf_bpf *prog = to_bpf(act); diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index d41002e4613f..7e63ff7e3ed7 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -20,6 +20,7 @@ #include <net/pkt_cls.h> #include <uapi/linux/tc_act/tc_connmark.h> #include <net/tc_act/tc_connmark.h> +#include <net/tc_wrapper.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> @@ -27,8 +28,9 @@ static struct tc_action_ops act_connmark_ops; -static int tcf_connmark_act(struct sk_buff *skb, const struct tc_action *a, - struct tcf_result *res) +TC_INDIRECT_SCOPE int tcf_connmark_act(struct sk_buff *skb, + const struct tc_action *a, + struct tcf_result *res) { const struct nf_conntrack_tuple_hash *thash; struct nf_conntrack_tuple tuple; diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 1366adf9b909..95e9304024b7 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -32,6 +32,7 @@ #include <linux/tc_act/tc_csum.h> #include <net/tc_act/tc_csum.h> +#include <net/tc_wrapper.h> static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = { [TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), }, @@ -563,8 +564,9 @@ fail: return 0; } -static int tcf_csum_act(struct sk_buff *skb, const struct tc_action *a, - struct tcf_result *res) +TC_INDIRECT_SCOPE int tcf_csum_act(struct sk_buff *skb, + const struct tc_action *a, + struct tcf_result *res) { struct tcf_csum *p = to_tcf_csum(a); bool orig_vlan_tag_present = false; diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index dd5ae7551956..f6df0168c91f 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -24,6 +24,7 @@ #include <net/ipv6_frag.h> #include <uapi/linux/tc_act/tc_ct.h> #include <net/tc_act/tc_ct.h> +#include <net/tc_wrapper.h> #include <net/netfilter/nf_flow_table.h> #include <net/netfilter/nf_conntrack.h> @@ -1038,8 +1039,8 @@ static int tcf_ct_act_nat(struct sk_buff *skb, #endif } -static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, - struct tcf_result *res) +TC_INDIRECT_SCOPE int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) { struct net *net = dev_net(skb->dev); enum ip_conntrack_info ctinfo; diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c index eaa02f098d1c..4b1b59da5c0b 100644 --- a/net/sched/act_ctinfo.c +++ b/net/sched/act_ctinfo.c @@ -18,6 +18,7 @@ #include <net/pkt_cls.h> #include <uapi/linux/tc_act/tc_ctinfo.h> #include <net/tc_act/tc_ctinfo.h> +#include <net/tc_wrapper.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> @@ -75,8 +76,9 @@ static void tcf_ctinfo_cpmark_set(struct nf_conn *ct, struct tcf_ctinfo *ca, skb->mark = READ_ONCE(ct->mark) & cp->cpmarkmask; } -static int tcf_ctinfo_act(struct sk_buff *skb, const struct tc_action *a, - struct tcf_result *res) +TC_INDIRECT_SCOPE int tcf_ctinfo_act(struct sk_buff *skb, + const struct tc_action *a, + struct tcf_result *res) { const struct nf_conntrack_tuple_hash *thash = NULL; struct tcf_ctinfo *ca = to_ctinfo(a); diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 62d682b96b88..54f1b13b2360 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -18,6 +18,7 @@ #include <net/pkt_cls.h> #include <linux/tc_act/tc_gact.h> #include <net/tc_act/tc_gact.h> +#include <net/tc_wrapper.h> static struct tc_action_ops act_gact_ops; @@ -145,8 +146,9 @@ release_idr: return err; } -static int tcf_gact_act(struct sk_buff *skb, const struct tc_action *a, - struct tcf_result *res) +TC_INDIRECT_SCOPE int tcf_gact_act(struct sk_buff *skb, + const struct tc_action *a, + struct tcf_result *res) { struct tcf_gact *gact = to_gact(a); int action = READ_ONCE(gact->tcf_action); diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c index 3049878e7315..9b8def0be41e 100644 --- a/net/sched/act_gate.c +++ b/net/sched/act_gate.c @@ -14,6 +14,7 @@ #include <net/netlink.h> #include <net/pkt_cls.h> #include <net/tc_act/tc_gate.h> +#include <net/tc_wrapper.h> static struct tc_action_ops act_gate_ops; @@ -113,8 +114,9 @@ static enum hrtimer_restart gate_timer_func(struct hrtimer *timer) return HRTIMER_RESTART; } -static int tcf_gate_act(struct sk_buff *skb, const struct tc_action *a, - struct tcf_result *res) +TC_INDIRECT_SCOPE int tcf_gate_act(struct sk_buff *skb, + const struct tc_action *a, + struct tcf_result *res) { struct tcf_gate *gact = to_gate(a); diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 41d63b33461d..bc7611b0744c 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -29,6 +29,7 @@ #include <net/tc_act/tc_ife.h> #include <linux/etherdevice.h> #include <net/ife.h> +#include <net/tc_wrapper.h> static int max_metacnt = IFE_META_MAX + 1; static struct tc_action_ops act_ife_ops; @@ -861,8 +862,9 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, return action; } -static int tcf_ife_act(struct sk_buff *skb, const struct tc_action *a, - struct tcf_result *res) +TC_INDIRECT_SCOPE int tcf_ife_act(struct sk_buff *skb, + const struct tc_action *a, + struct tcf_result *res) { struct tcf_ife_info *ife = to_ife(a); struct tcf_ife_params *p; diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 1625e1037416..5d96ffebd40f 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -20,6 +20,7 @@ #include <net/pkt_sched.h> #include <linux/tc_act/tc_ipt.h> #include <net/tc_act/tc_ipt.h> +#include <net/tc_wrapper.h> #include <linux/netfilter_ipv4/ip_tables.h> @@ -216,8 +217,9 @@ static int tcf_xt_init(struct net *net, struct nlattr *nla, a, &act_xt_ops, tp, flags); } -static int tcf_ipt_act(struct sk_buff *skb, const struct tc_action *a, - struct tcf_result *res) +TC_INDIRECT_SCOPE int tcf_ipt_act(struct sk_buff *skb, + const struct tc_action *a, + struct tcf_result *res) { int ret = 0, result = 0; struct tcf_ipt *ipt = to_ipt(a); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index b8ad6ae282c0..7284bcea7b0b 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -24,6 +24,7 @@ #include <net/pkt_cls.h> #include <linux/tc_act/tc_mirred.h> #include <net/tc_act/tc_mirred.h> +#include <net/tc_wrapper.h> static LIST_HEAD(mirred_list); static DEFINE_SPINLOCK(mirred_list_lock); @@ -217,8 +218,9 @@ static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb) return err; } -static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, - struct tcf_result *res) +TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb, + const struct tc_action *a, + struct tcf_result *res) { struct tcf_mirred *m = to_mirred(a); struct sk_buff *skb2 = skb; diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index 8ad25cc8ccd5..ff47ce4d3968 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -14,6 +14,7 @@ #include <net/pkt_sched.h> #include <net/pkt_cls.h> #include <net/tc_act/tc_mpls.h> +#include <net/tc_wrapper.h> static struct tc_action_ops act_mpls_ops; @@ -49,8 +50,9 @@ static __be32 tcf_mpls_get_lse(struct mpls_shim_hdr *lse, return cpu_to_be32(new_lse); } -static int tcf_mpls_act(struct sk_buff *skb, const struct tc_action *a, - struct tcf_result *res) +TC_INDIRECT_SCOPE int tcf_mpls_act(struct sk_buff *skb, + const struct tc_action *a, + struct tcf_result *res) { struct tcf_mpls *m = to_mpls(a); struct tcf_mpls_params *p; diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 9265145f1040..74c74be33048 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -24,7 +24,7 @@ #include <net/tc_act/tc_nat.h> #include <net/tcp.h> #include <net/udp.h> - +#include <net/tc_wrapper.h> static struct tc_action_ops act_nat_ops; @@ -98,8 +98,9 @@ release_idr: return err; } -static int tcf_nat_act(struct sk_buff *skb, const struct tc_action *a, - struct tcf_result *res) +TC_INDIRECT_SCOPE int tcf_nat_act(struct sk_buff *skb, + const struct tc_action *a, + struct tcf_result *res) { struct tcf_nat *p = to_tcf_nat(a); struct iphdr *iph; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 94ed5857ce67..a0378e9f0121 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -20,6 +20,7 @@ #include <net/tc_act/tc_pedit.h> #include <uapi/linux/tc_act/tc_pedit.h> #include <net/pkt_cls.h> +#include <net/tc_wrapper.h> static struct tc_action_ops act_pedit_ops; @@ -319,8 +320,9 @@ static int pedit_skb_hdr_offset(struct sk_buff *skb, return ret; } -static int tcf_pedit_act(struct sk_buff *skb, const struct tc_action *a, - struct tcf_result *res) +TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb, + const struct tc_action *a, + struct tcf_result *res) { struct tcf_pedit *p = to_pedit(a); u32 max_offset; diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 0adb26e366a7..227cba58ce9f 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -19,6 +19,7 @@ #include <net/netlink.h> #include <net/pkt_cls.h> #include <net/tc_act/tc_police.h> +#include <net/tc_wrapper.h> /* Each policer is serialized by its individual spinlock */ @@ -242,8 +243,9 @@ static bool tcf_police_mtu_check(struct sk_buff *skb, u32 limit) return len <= limit; } -static int tcf_police_act(struct sk_buff *skb, const struct tc_action *a, - struct tcf_result *res) +TC_INDIRECT_SCOPE int tcf_police_act(struct sk_buff *skb, + const struct tc_action *a, + struct tcf_result *res) { struct tcf_police *police = to_police(a); s64 now, toks, ppstoks = 0, ptoks = 0; diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 7a25477f5d99..98dea08c1764 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -20,6 +20,7 @@ #include <net/tc_act/tc_sample.h> #include <net/psample.h> #include <net/pkt_cls.h> +#include <net/tc_wrapper.h> #include <linux/if_arp.h> @@ -153,8 +154,9 @@ static bool tcf_sample_dev_ok_push(struct net_device *dev) } } -static int tcf_sample_act(struct sk_buff *skb, const struct tc_action *a, - struct tcf_result *res) +TC_INDIRECT_SCOPE int tcf_sample_act(struct sk_buff *skb, + const struct tc_action *a, + struct tcf_result *res) { struct tcf_sample *s = to_sample(a); struct psample_group *psample_group; diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 18d376135461..4b84514534f3 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -14,6 +14,7 @@ #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> +#include <net/tc_wrapper.h> #include <linux/tc_act/tc_defact.h> #include <net/tc_act/tc_defact.h> @@ -21,8 +22,9 @@ static struct tc_action_ops act_simp_ops; #define SIMP_MAX_DATA 32 -static int tcf_simp_act(struct sk_buff *skb, const struct tc_action *a, - struct tcf_result *res) +TC_INDIRECT_SCOPE int tcf_simp_act(struct sk_buff *skb, + const struct tc_action *a, + struct tcf_result *res) { struct tcf_defact *d = to_defact(a); diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 1710780c908a..ce7008cf291c 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -16,6 +16,7 @@ #include <net/ipv6.h> #include <net/dsfield.h> #include <net/pkt_cls.h> +#include <net/tc_wrapper.h> #include <linux/tc_act/tc_skbedit.h> #include <net/tc_act/tc_skbedit.h> @@ -36,8 +37,9 @@ static u16 tcf_skbedit_hash(struct tcf_skbedit_params *params, return netdev_cap_txqueue(skb->dev, queue_mapping); } -static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a, - struct tcf_result *res) +TC_INDIRECT_SCOPE int tcf_skbedit_act(struct sk_buff *skb, + const struct tc_action *a, + struct tcf_result *res) { struct tcf_skbedit *d = to_skbedit(a); struct tcf_skbedit_params *params; diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index d98758a63934..dffa990a9629 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -15,14 +15,16 @@ #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> +#include <net/tc_wrapper.h> #include <linux/tc_act/tc_skbmod.h> #include <net/tc_act/tc_skbmod.h> static struct tc_action_ops act_skbmod_ops; -static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a, - struct tcf_result *res) +TC_INDIRECT_SCOPE int tcf_skbmod_act(struct sk_buff *skb, + const struct tc_action *a, + struct tcf_result *res) { struct tcf_skbmod *d = to_skbmod(a); int action, max_edit_len, err; diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 2691a3d8e451..2d12d2626415 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -16,14 +16,16 @@ #include <net/pkt_sched.h> #include <net/dst.h> #include <net/pkt_cls.h> +#include <net/tc_wrapper.h> #include <linux/tc_act/tc_tunnel_key.h> #include <net/tc_act/tc_tunnel_key.h> static struct tc_action_ops act_tunnel_key_ops; -static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a, - struct tcf_result *res) +TC_INDIRECT_SCOPE int tunnel_key_act(struct sk_buff *skb, + const struct tc_action *a, + struct tcf_result *res) { struct tcf_tunnel_key *t = to_tunnel_key(a); struct tcf_tunnel_key_params *params; diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 7b24e898a3e6..0251442f5f29 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -12,14 +12,16 @@ #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> +#include <net/tc_wrapper.h> #include <linux/tc_act/tc_vlan.h> #include <net/tc_act/tc_vlan.h> static struct tc_action_ops act_vlan_ops; -static int tcf_vlan_act(struct sk_buff *skb, const struct tc_action *a, - struct tcf_result *res) +TC_INDIRECT_SCOPE int tcf_vlan_act(struct sk_buff *skb, + const struct tc_action *a, + struct tcf_result *res) { struct tcf_vlan *v = to_vlan(a); struct tcf_vlan_params *p; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 23d1cfa4f58c..668130f08903 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -40,6 +40,7 @@ #include <net/tc_act/tc_mpls.h> #include <net/tc_act/tc_gate.h> #include <net/flow_offload.h> +#include <net/tc_wrapper.h> extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1]; @@ -1564,7 +1565,7 @@ reclassify: tp->protocol != htons(ETH_P_ALL)) continue; - err = tp->classify(skb, tp, res); + err = tc_classify(skb, tp, res); #ifdef CONFIG_NET_CLS_ACT if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) { first_tp = orig_tp; diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index d229ce99e554..1b92c33b5f81 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -18,6 +18,7 @@ #include <net/netlink.h> #include <net/act_api.h> #include <net/pkt_cls.h> +#include <net/tc_wrapper.h> struct basic_head { struct list_head flist; @@ -36,8 +37,9 @@ struct basic_filter { struct rcu_work rwork; }; -static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res) +TC_INDIRECT_SCOPE int basic_classify(struct sk_buff *skb, + const struct tcf_proto *tp, + struct tcf_result *res) { int r; struct basic_head *head = rcu_dereference_bh(tp->root); diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index bc317b3eac12..466c26df853a 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -19,6 +19,7 @@ #include <net/rtnetlink.h> #include <net/pkt_cls.h> #include <net/sock.h> +#include <net/tc_wrapper.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Daniel Borkmann <[email protected]>"); @@ -77,8 +78,9 @@ static int cls_bpf_exec_opcode(int code) } } -static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res) +TC_INDIRECT_SCOPE int cls_bpf_classify(struct sk_buff *skb, + const struct tcf_proto *tp, + struct tcf_result *res) { struct cls_bpf_head *head = rcu_dereference_bh(tp->root); bool at_ingress = skb_at_tc_ingress(skb); diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index ed00001b528a..bd9322d71910 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -13,6 +13,7 @@ #include <net/pkt_cls.h> #include <net/sock.h> #include <net/cls_cgroup.h> +#include <net/tc_wrapper.h> struct cls_cgroup_head { u32 handle; @@ -22,8 +23,9 @@ struct cls_cgroup_head { struct rcu_work rwork; }; -static int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res) +TC_INDIRECT_SCOPE int cls_cgroup_classify(struct sk_buff *skb, + const struct tcf_proto *tp, + struct tcf_result *res) { struct cls_cgroup_head *head = rcu_dereference_bh(tp->root); u32 classid = task_get_classid(skb); diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 014cd3de7b5d..535668e1f748 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -24,6 +24,7 @@ #include <net/ip.h> #include <net/route.h> #include <net/flow_dissector.h> +#include <net/tc_wrapper.h> #if IS_ENABLED(CONFIG_NF_CONNTRACK) #include <net/netfilter/nf_conntrack.h> @@ -292,8 +293,9 @@ static u32 flow_key_get(struct sk_buff *skb, int key, struct flow_keys *flow) (1 << FLOW_KEY_NFCT_PROTO_SRC) | \ (1 << FLOW_KEY_NFCT_PROTO_DST)) -static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res) +TC_INDIRECT_SCOPE int flow_classify(struct sk_buff *skb, + const struct tcf_proto *tp, + struct tcf_result *res) { struct flow_head *head = rcu_dereference_bh(tp->root); struct flow_filter *f; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 25bc57ee6ea1..0b15698b3531 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -27,6 +27,7 @@ #include <net/vxlan.h> #include <net/erspan.h> #include <net/gtp.h> +#include <net/tc_wrapper.h> #include <net/dst.h> #include <net/dst_metadata.h> @@ -305,8 +306,9 @@ static u16 fl_ct_info_to_flower_map[] = { TCA_FLOWER_KEY_CT_FLAGS_NEW, }; -static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res) +TC_INDIRECT_SCOPE int fl_classify(struct sk_buff *skb, + const struct tcf_proto *tp, + struct tcf_result *res) { struct cls_fl_head *head = rcu_dereference_bh(tp->root); bool post_ct = tc_skb_cb(skb)->post_ct; diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index a32351da968c..ae9439a6c56c 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -21,6 +21,7 @@ #include <net/act_api.h> #include <net/pkt_cls.h> #include <net/sch_generic.h> +#include <net/tc_wrapper.h> #define HTSIZE 256 @@ -47,8 +48,9 @@ static u32 fw_hash(u32 handle) return handle % HTSIZE; } -static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res) +TC_INDIRECT_SCOPE int fw_classify(struct sk_buff *skb, + const struct tcf_proto *tp, + struct tcf_result *res) { struct fw_head *head = rcu_dereference_bh(tp->root); struct fw_filter *f; diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 39a5d9c170de..705f63da2c21 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -12,6 +12,7 @@ #include <net/sch_generic.h> #include <net/pkt_cls.h> +#include <net/tc_wrapper.h> struct cls_mall_head { struct tcf_exts exts; @@ -24,8 +25,9 @@ struct cls_mall_head { bool deleting; }; -static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res) +TC_INDIRECT_SCOPE int mall_classify(struct sk_buff *skb, + const struct tcf_proto *tp, + struct tcf_result *res) { struct cls_mall_head *head = rcu_dereference_bh(tp->root); diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 9e43b929d4ca..d0c53724d3e8 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -17,6 +17,7 @@ #include <net/netlink.h> #include <net/act_api.h> #include <net/pkt_cls.h> +#include <net/tc_wrapper.h> /* * 1. For now we assume that route tags < 256. @@ -121,8 +122,9 @@ static inline int route4_hash_wild(void) return 0; \ } -static int route4_classify(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res) +TC_INDIRECT_SCOPE int route4_classify(struct sk_buff *skb, + const struct tcf_proto *tp, + struct tcf_result *res) { struct route4_head *head = rcu_dereference_bh(tp->root); struct dst_entry *dst; diff --git a/net/sched/cls_rsvp.c b/net/sched/cls_rsvp.c index de1c1d4da597..03d8619bd9c6 100644 --- a/net/sched/cls_rsvp.c +++ b/net/sched/cls_rsvp.c @@ -15,10 +15,12 @@ #include <net/netlink.h> #include <net/act_api.h> #include <net/pkt_cls.h> +#include <net/tc_wrapper.h> #define RSVP_DST_LEN 1 #define RSVP_ID "rsvp" #define RSVP_OPS cls_rsvp_ops +#define RSVP_CLS rsvp_classify #include "cls_rsvp.h" MODULE_LICENSE("GPL"); diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index b00a7dbd0587..869efba9f834 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -124,8 +124,8 @@ static inline unsigned int hash_src(__be32 *src) return r; \ } -static int rsvp_classify(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res) +TC_INDIRECT_SCOPE int RSVP_CLS(struct sk_buff *skb, const struct tcf_proto *tp, + struct tcf_result *res) { struct rsvp_head *head = rcu_dereference_bh(tp->root); struct rsvp_session *s; @@ -738,7 +738,7 @@ static void rsvp_bind_class(void *fh, u32 classid, unsigned long cl, void *q, static struct tcf_proto_ops RSVP_OPS __read_mostly = { .kind = RSVP_ID, - .classify = rsvp_classify, + .classify = RSVP_CLS, .init = rsvp_init, .destroy = rsvp_destroy, .get = rsvp_get, diff --git a/net/sched/cls_rsvp6.c b/net/sched/cls_rsvp6.c index 64078846000e..e627cc32d633 100644 --- a/net/sched/cls_rsvp6.c +++ b/net/sched/cls_rsvp6.c @@ -15,10 +15,12 @@ #include <net/act_api.h> #include <net/pkt_cls.h> #include <net/netlink.h> +#include <net/tc_wrapper.h> #define RSVP_DST_LEN 4 #define RSVP_ID "rsvp6" #define RSVP_OPS cls_rsvp6_ops +#define RSVP_CLS rsvp6_classify #include "cls_rsvp.h" MODULE_LICENSE("GPL"); diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 1c9eeb98d826..eb0e9458e722 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -16,6 +16,7 @@ #include <net/netlink.h> #include <net/pkt_cls.h> #include <net/sch_generic.h> +#include <net/tc_wrapper.h> /* * Passing parameters to the root seems to be done more awkwardly than really @@ -98,9 +99,9 @@ static struct tcindex_filter_result *tcindex_lookup(struct tcindex_data *p, return NULL; } - -static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res) +TC_INDIRECT_SCOPE int tcindex_classify(struct sk_buff *skb, + const struct tcf_proto *tp, + struct tcf_result *res) { struct tcindex_data *p = rcu_dereference_bh(tp->root); struct tcindex_filter_result *f; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 34d25f7a0687..4e2e269f121f 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -39,6 +39,7 @@ #include <net/act_api.h> #include <net/pkt_cls.h> #include <linux/idr.h> +#include <net/tc_wrapper.h> struct tc_u_knode { struct tc_u_knode __rcu *next; @@ -100,8 +101,9 @@ static inline unsigned int u32_hash_fold(__be32 key, return h; } -static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res) +TC_INDIRECT_SCOPE int u32_classify(struct sk_buff *skb, + const struct tcf_proto *tp, + struct tcf_result *res) { struct { struct tc_u_knode *knode; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 4a27dfb1ba0f..2317db02c764 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -31,6 +31,7 @@ #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> +#include <net/tc_wrapper.h> #include <trace/events/qdisc.h> @@ -2273,6 +2274,8 @@ static struct pernet_operations psched_net_ops = { .exit = psched_net_exit, }; +DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper); + static int __init pktsched_init(void) { int err; @@ -2300,6 +2303,8 @@ static int __init pktsched_init(void) rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass, 0); + tc_wrapper_init(); + return 0; } diff --git a/net/tipc/link.c b/net/tipc/link.c index e260c0d557f5..b3ce24823f50 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2224,7 +2224,9 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, if (tipc_own_addr(l->net) > msg_prevnode(hdr)) l->net_plane = msg_net_plane(hdr); - skb_linearize(skb); + if (skb_linearize(skb)) + goto exit; + hdr = buf_msg(skb); data = msg_data(hdr); diff --git a/net/tipc/node.c b/net/tipc/node.c index b48d97cbbe29..49ddc484c4fe 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1689,6 +1689,7 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list, struct tipc_node *n; struct sk_buff_head xmitq; bool node_up = false; + struct net *peer_net; int bearer_id; int rc; @@ -1705,18 +1706,23 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list, return -EHOSTUNREACH; } + rcu_read_lock(); tipc_node_read_lock(n); node_up = node_is_up(n); - if (node_up && n->peer_net && check_net(n->peer_net)) { + peer_net = n->peer_net; + tipc_node_read_unlock(n); + if (node_up && peer_net && check_net(peer_net)) { /* xmit inner linux container */ - tipc_lxc_xmit(n->peer_net, list); + tipc_lxc_xmit(peer_net, list); if (likely(skb_queue_empty(list))) { - tipc_node_read_unlock(n); + rcu_read_unlock(); tipc_node_put(n); return 0; } } + rcu_read_unlock(); + tipc_node_read_lock(n); bearer_id = n->active_links[selector & 1]; if (unlikely(bearer_id == INVALID_BEARER_ID)) { tipc_node_read_unlock(n); diff --git a/net/unix/diag.c b/net/unix/diag.c index 105f522a89fe..616b55c5b890 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -114,14 +114,16 @@ static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb) return nla_put(nlskb, UNIX_DIAG_RQLEN, sizeof(rql), &rql); } -static int sk_diag_dump_uid(struct sock *sk, struct sk_buff *nlskb) +static int sk_diag_dump_uid(struct sock *sk, struct sk_buff *nlskb, + struct user_namespace *user_ns) { - uid_t uid = from_kuid_munged(sk_user_ns(nlskb->sk), sock_i_uid(sk)); + uid_t uid = from_kuid_munged(user_ns, sock_i_uid(sk)); return nla_put(nlskb, UNIX_DIAG_UID, sizeof(uid_t), &uid); } static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req, - u32 portid, u32 seq, u32 flags, int sk_ino) + struct user_namespace *user_ns, + u32 portid, u32 seq, u32 flags, int sk_ino) { struct nlmsghdr *nlh; struct unix_diag_msg *rep; @@ -167,7 +169,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r goto out_nlmsg_trim; if ((req->udiag_show & UDIAG_SHOW_UID) && - sk_diag_dump_uid(sk, skb)) + sk_diag_dump_uid(sk, skb, user_ns)) goto out_nlmsg_trim; nlmsg_end(skb, nlh); @@ -179,7 +181,8 @@ out_nlmsg_trim: } static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req, - u32 portid, u32 seq, u32 flags) + struct user_namespace *user_ns, + u32 portid, u32 seq, u32 flags) { int sk_ino; @@ -190,7 +193,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_r if (!sk_ino) return 0; - return sk_diag_fill(sk, skb, req, portid, seq, flags, sk_ino); + return sk_diag_fill(sk, skb, req, user_ns, portid, seq, flags, sk_ino); } static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) @@ -214,7 +217,7 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) goto next; if (!(req->udiag_states & (1 << sk->sk_state))) goto next; - if (sk_diag_dump(sk, skb, req, + if (sk_diag_dump(sk, skb, req, sk_user_ns(skb->sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI) < 0) { @@ -282,7 +285,8 @@ again: if (!rep) goto out; - err = sk_diag_fill(sk, rep, req, NETLINK_CB(in_skb).portid, + err = sk_diag_fill(sk, rep, req, sk_user_ns(NETLINK_CB(in_skb).sk), + NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 0, req->udiag_ino); if (err < 0) { nlmsg_free(rep); diff --git a/sound/firewire/dice/dice-stream.c b/sound/firewire/dice/dice-stream.c index f99e00083141..4c677c8546c7 100644 --- a/sound/firewire/dice/dice-stream.c +++ b/sound/firewire/dice/dice-stream.c @@ -59,7 +59,7 @@ int snd_dice_stream_get_rate_mode(struct snd_dice *dice, unsigned int rate, static int select_clock(struct snd_dice *dice, unsigned int rate) { - __be32 reg; + __be32 reg, new; u32 data; int i; int err; @@ -83,15 +83,17 @@ static int select_clock(struct snd_dice *dice, unsigned int rate) if (completion_done(&dice->clock_accepted)) reinit_completion(&dice->clock_accepted); - reg = cpu_to_be32(data); + new = cpu_to_be32(data); err = snd_dice_transaction_write_global(dice, GLOBAL_CLOCK_SELECT, - ®, sizeof(reg)); + &new, sizeof(new)); if (err < 0) return err; if (wait_for_completion_timeout(&dice->clock_accepted, - msecs_to_jiffies(NOTIFICATION_TIMEOUT_MS)) == 0) - return -ETIMEDOUT; + msecs_to_jiffies(NOTIFICATION_TIMEOUT_MS)) == 0) { + if (reg != new) + return -ETIMEDOUT; + } return 0; } diff --git a/sound/soc/codecs/cs42l51.c b/sound/soc/codecs/cs42l51.c index 51721edd8f53..e88d9ff95cdf 100644 --- a/sound/soc/codecs/cs42l51.c +++ b/sound/soc/codecs/cs42l51.c @@ -143,7 +143,7 @@ static const struct snd_kcontrol_new cs42l51_snd_controls[] = { 0, 0xA0, 96, adc_att_tlv), SOC_DOUBLE_R_SX_TLV("PGA Volume", CS42L51_ALC_PGA_CTL, CS42L51_ALC_PGB_CTL, - 0, 0x19, 30, pga_tlv), + 0, 0x1A, 30, pga_tlv), SOC_SINGLE("Playback Deemphasis Switch", CS42L51_DAC_CTL, 3, 1, 0), SOC_SINGLE("Auto-Mute Switch", CS42L51_DAC_CTL, 2, 1, 0), SOC_SINGLE("Soft Ramp Switch", CS42L51_DAC_CTL, 1, 1, 0), diff --git a/sound/soc/codecs/tlv320adc3xxx.c b/sound/soc/codecs/tlv320adc3xxx.c index a969547708d4..52bb55724724 100644 --- a/sound/soc/codecs/tlv320adc3xxx.c +++ b/sound/soc/codecs/tlv320adc3xxx.c @@ -14,6 +14,7 @@ #include <dt-bindings/sound/tlv320adc3xxx.h> #include <linux/clk.h> +#include <linux/gpio/consumer.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/io.h> @@ -1025,7 +1026,9 @@ static const struct gpio_chip adc3xxx_gpio_chip = { static void adc3xxx_free_gpio(struct adc3xxx *adc3xxx) { +#ifdef CONFIG_GPIOLIB gpiochip_remove(&adc3xxx->gpio_chip); +#endif } static void adc3xxx_init_gpio(struct adc3xxx *adc3xxx) diff --git a/sound/soc/fsl/fsl_micfil.c b/sound/soc/fsl/fsl_micfil.c index 79ef4e269bc9..4b86ef82fd93 100644 --- a/sound/soc/fsl/fsl_micfil.c +++ b/sound/soc/fsl/fsl_micfil.c @@ -194,6 +194,25 @@ static int fsl_micfil_reset(struct device *dev) if (ret) return ret; + /* + * SRES is self-cleared bit, but REG_MICFIL_CTRL1 is defined + * as non-volatile register, so SRES still remain in regmap + * cache after set, that every update of REG_MICFIL_CTRL1, + * software reset happens. so clear it explicitly. + */ + ret = regmap_clear_bits(micfil->regmap, REG_MICFIL_CTRL1, + MICFIL_CTRL1_SRES); + if (ret) + return ret; + + /* + * Set SRES should clear CHnF flags, But even add delay here + * the CHnF may not be cleared sometimes, so clear CHnF explicitly. + */ + ret = regmap_write_bits(micfil->regmap, REG_MICFIL_STAT, 0xFF, 0xFF); + if (ret) + return ret; + return 0; } diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c index bd88de056358..55b009d3c681 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -452,7 +452,7 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol, val = ucontrol->value.integer.value[0]; if (mc->platform_max && val > mc->platform_max) return -EINVAL; - if (val > max - min) + if (val > max) return -EINVAL; val_mask = mask << shift; val = (val + min) & mask; @@ -464,10 +464,15 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol, ret = err; if (snd_soc_volsw_is_stereo(mc)) { - unsigned int val2; + unsigned int val2 = ucontrol->value.integer.value[1]; + + if (mc->platform_max && val2 > mc->platform_max) + return -EINVAL; + if (val2 > max) + return -EINVAL; val_mask = mask << rshift; - val2 = (ucontrol->value.integer.value[1] + min) & mask; + val2 = (val2 + min) & mask; val2 = val2 << rshift; err = snd_soc_component_update_bits(component, reg2, val_mask, diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/devlink_trap_tunnel_ipip6.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip6.sh index f62ce479c266..878125041fc3 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/devlink_trap_tunnel_ipip6.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip6.sh @@ -31,7 +31,7 @@ # | 2001:db8:10::2/64 | # +-------------------------+ -lib_dir=$(dirname $0)/../../../../net/forwarding +lib_dir=$(dirname $0)/../../../net/forwarding ALL_TESTS=" decap_error_test diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index ee38ca888244..9cc84114741d 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -2,6 +2,7 @@ bind_bhash csum cmsg_sender +diag_uid fin_ack_lat gro hwtstamp_config diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile index 969620ae9928..1e4b397cece6 100644 --- a/tools/testing/selftests/net/af_unix/Makefile +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -1,3 +1,3 @@ -TEST_GEN_PROGS := test_unix_oob unix_connect +TEST_GEN_PROGS := diag_uid test_unix_oob unix_connect include ../../lib.mk diff --git a/tools/testing/selftests/net/af_unix/diag_uid.c b/tools/testing/selftests/net/af_unix/diag_uid.c new file mode 100644 index 000000000000..5b88f7129fea --- /dev/null +++ b/tools/testing/selftests/net/af_unix/diag_uid.c @@ -0,0 +1,178 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates. */ + +#define _GNU_SOURCE +#include <sched.h> + +#include <unistd.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> +#include <linux/sock_diag.h> +#include <linux/unix_diag.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/un.h> + +#include "../../kselftest_harness.h" + +FIXTURE(diag_uid) +{ + int netlink_fd; + int unix_fd; + __u32 inode; + __u64 cookie; +}; + +FIXTURE_VARIANT(diag_uid) +{ + int unshare; + int udiag_show; +}; + +FIXTURE_VARIANT_ADD(diag_uid, uid) +{ + .unshare = 0, + .udiag_show = UDIAG_SHOW_UID +}; + +FIXTURE_VARIANT_ADD(diag_uid, uid_unshare) +{ + .unshare = CLONE_NEWUSER, + .udiag_show = UDIAG_SHOW_UID +}; + +FIXTURE_SETUP(diag_uid) +{ + struct stat file_stat; + socklen_t optlen; + int ret; + + if (variant->unshare) + ASSERT_EQ(unshare(variant->unshare), 0); + + self->netlink_fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG); + ASSERT_NE(self->netlink_fd, -1); + + self->unix_fd = socket(AF_UNIX, SOCK_STREAM, 0); + ASSERT_NE(self->unix_fd, -1); + + ret = fstat(self->unix_fd, &file_stat); + ASSERT_EQ(ret, 0); + + self->inode = file_stat.st_ino; + + optlen = sizeof(self->cookie); + ret = getsockopt(self->unix_fd, SOL_SOCKET, SO_COOKIE, &self->cookie, &optlen); + ASSERT_EQ(ret, 0); +} + +FIXTURE_TEARDOWN(diag_uid) +{ + close(self->netlink_fd); + close(self->unix_fd); +} + +int send_request(struct __test_metadata *_metadata, + FIXTURE_DATA(diag_uid) *self, + const FIXTURE_VARIANT(diag_uid) *variant) +{ + struct { + struct nlmsghdr nlh; + struct unix_diag_req udr; + } req = { + .nlh = { + .nlmsg_len = sizeof(req), + .nlmsg_type = SOCK_DIAG_BY_FAMILY, + .nlmsg_flags = NLM_F_REQUEST + }, + .udr = { + .sdiag_family = AF_UNIX, + .udiag_ino = self->inode, + .udiag_cookie = { + (__u32)self->cookie, + (__u32)(self->cookie >> 32) + }, + .udiag_show = variant->udiag_show + } + }; + struct sockaddr_nl nladdr = { + .nl_family = AF_NETLINK + }; + struct iovec iov = { + .iov_base = &req, + .iov_len = sizeof(req) + }; + struct msghdr msg = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1 + }; + + return sendmsg(self->netlink_fd, &msg, 0); +} + +void render_response(struct __test_metadata *_metadata, + struct unix_diag_req *udr, __u32 len) +{ + unsigned int rta_len = len - NLMSG_LENGTH(sizeof(*udr)); + struct rtattr *attr; + uid_t uid; + + ASSERT_GT(len, sizeof(*udr)); + ASSERT_EQ(udr->sdiag_family, AF_UNIX); + + attr = (struct rtattr *)(udr + 1); + ASSERT_NE(RTA_OK(attr, rta_len), 0); + ASSERT_EQ(attr->rta_type, UNIX_DIAG_UID); + + uid = *(uid_t *)RTA_DATA(attr); + ASSERT_EQ(uid, getuid()); +} + +void receive_response(struct __test_metadata *_metadata, + FIXTURE_DATA(diag_uid) *self) +{ + long buf[8192 / sizeof(long)]; + struct sockaddr_nl nladdr = { + .nl_family = AF_NETLINK + }; + struct iovec iov = { + .iov_base = buf, + .iov_len = sizeof(buf) + }; + struct msghdr msg = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1 + }; + struct unix_diag_req *udr; + struct nlmsghdr *nlh; + int ret; + + ret = recvmsg(self->netlink_fd, &msg, 0); + ASSERT_GT(ret, 0); + + nlh = (struct nlmsghdr *)buf; + ASSERT_NE(NLMSG_OK(nlh, ret), 0); + ASSERT_EQ(nlh->nlmsg_type, SOCK_DIAG_BY_FAMILY); + + render_response(_metadata, NLMSG_DATA(nlh), nlh->nlmsg_len); + + nlh = NLMSG_NEXT(nlh, ret); + ASSERT_EQ(NLMSG_OK(nlh, ret), 0); +} + +TEST_F(diag_uid, 1) +{ + int ret; + + ret = send_request(_metadata, self, variant); + ASSERT_GT(ret, 0); + + receive_response(_metadata, self); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/bpf/Makefile b/tools/testing/selftests/net/bpf/Makefile index a26cb94354f6..4abaf16d2077 100644 --- a/tools/testing/selftests/net/bpf/Makefile +++ b/tools/testing/selftests/net/bpf/Makefile @@ -12,7 +12,7 @@ CCINCLUDE += -I$(SCRATCH_DIR)/include BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a -MAKE_DIRS := $(BUILD_DIR)/libbpf +MAKE_DIRS := $(BUILD_DIR)/libbpf $(OUTPUT)/bpf $(MAKE_DIRS): mkdir -p $@ @@ -37,8 +37,8 @@ endif CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH)) -$(TEST_CUSTOM_PROGS): $(BPFOBJ) - $(CLANG) -O2 -target bpf -c $(@:.o=.c) $(CCINCLUDE) $(CLANG_SYS_INCLUDES) -o $@ +$(TEST_CUSTOM_PROGS): $(OUTPUT)/%.o: %.c $(BPFOBJ) | $(MAKE_DIRS) + $(CLANG) -O2 -target bpf -c $< $(CCINCLUDE) $(CLANG_SYS_INCLUDES) -o $@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ $(APIDIR)/linux/bpf.h \ diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index ead7963b9bf0..bd89198cd817 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -43,5 +43,5 @@ CONFIG_NET_ACT_TUNNEL_KEY=m CONFIG_NET_ACT_MIRRED=m CONFIG_BAREUDP=m CONFIG_IPV6_IOAM6_LWTUNNEL=y -CONFIG_CRYPTO_SM4=y +CONFIG_CRYPTO_SM4_GENERIC=y CONFIG_AMT=m diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 2271a8727f62..5637b5dadabd 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -1711,13 +1711,21 @@ ipv4_del_addr_test() $IP addr add dev dummy1 172.16.104.1/24 $IP addr add dev dummy1 172.16.104.11/24 + $IP addr add dev dummy1 172.16.104.12/24 + $IP addr add dev dummy1 172.16.104.13/24 $IP addr add dev dummy2 172.16.104.1/24 $IP addr add dev dummy2 172.16.104.11/24 + $IP addr add dev dummy2 172.16.104.12/24 $IP route add 172.16.105.0/24 via 172.16.104.2 src 172.16.104.11 + $IP route add 172.16.106.0/24 dev lo src 172.16.104.12 + $IP route add table 0 172.16.107.0/24 via 172.16.104.2 src 172.16.104.13 $IP route add vrf red 172.16.105.0/24 via 172.16.104.2 src 172.16.104.11 + $IP route add vrf red 172.16.106.0/24 dev lo src 172.16.104.12 set +e # removing address from device in vrf should only remove route from vrf table + echo " Regular FIB info" + $IP addr del dev dummy2 172.16.104.11/24 $IP ro ls vrf red | grep -q 172.16.105.0/24 log_test $? 1 "Route removed from VRF when source address deleted" @@ -1735,6 +1743,35 @@ ipv4_del_addr_test() $IP ro ls vrf red | grep -q 172.16.105.0/24 log_test $? 0 "Route in VRF is not removed by address delete" + # removing address from device in vrf should only remove route from vrf + # table even when the associated fib info only differs in table ID + echo " Identical FIB info with different table ID" + + $IP addr del dev dummy2 172.16.104.12/24 + $IP ro ls vrf red | grep -q 172.16.106.0/24 + log_test $? 1 "Route removed from VRF when source address deleted" + + $IP ro ls | grep -q 172.16.106.0/24 + log_test $? 0 "Route in default VRF not removed" + + $IP addr add dev dummy2 172.16.104.12/24 + $IP route add vrf red 172.16.106.0/24 dev lo src 172.16.104.12 + + $IP addr del dev dummy1 172.16.104.12/24 + $IP ro ls | grep -q 172.16.106.0/24 + log_test $? 1 "Route removed in default VRF when source address deleted" + + $IP ro ls vrf red | grep -q 172.16.106.0/24 + log_test $? 0 "Route in VRF is not removed by address delete" + + # removing address from device in default vrf should remove route from + # the default vrf even when route was inserted with a table ID of 0. + echo " Table ID 0" + + $IP addr del dev dummy1 172.16.104.13/24 + $IP ro ls | grep -q 172.16.107.0/24 + log_test $? 1 "Route removed in default VRF when source address deleted" + $IP li del dummy1 $IP li del dummy2 cleanup diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 0900c5438fbb..275491be3da2 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -782,7 +782,7 @@ kci_test_ipsec_offload() tmpl proto esp src $srcip dst $dstip spi 9 \ mode transport reqid 42 check_err $? - ip x p add dir out src $dstip/24 dst $srcip/24 \ + ip x p add dir in src $dstip/24 dst $srcip/24 \ tmpl proto esp src $dstip dst $srcip spi 9 \ mode transport reqid 42 check_err $? diff --git a/tools/testing/selftests/net/toeplitz.sh b/tools/testing/selftests/net/toeplitz.sh index 0a49907cd4fe..da5bfd834eff 100755 --- a/tools/testing/selftests/net/toeplitz.sh +++ b/tools/testing/selftests/net/toeplitz.sh @@ -32,7 +32,7 @@ DEV="eth0" # This is determined by reading the RSS indirection table using ethtool. get_rss_cfg_num_rxqs() { echo $(ethtool -x "${DEV}" | - egrep [[:space:]]+[0-9]+:[[:space:]]+ | + grep -E [[:space:]]+[0-9]+:[[:space:]]+ | cut -d: -f2- | awk '{$1=$1};1' | tr ' ' '\n' | diff --git a/tools/vm/slabinfo-gnuplot.sh b/tools/vm/slabinfo-gnuplot.sh index 26e193ffd2a2..873a892147e5 100644 --- a/tools/vm/slabinfo-gnuplot.sh +++ b/tools/vm/slabinfo-gnuplot.sh @@ -150,7 +150,7 @@ do_preprocess() let lines=3 out=`basename "$in"`"-slabs-by-loss" `cat "$in" | grep -A "$lines" 'Slabs sorted by loss' |\ - egrep -iv '\-\-|Name|Slabs'\ + grep -E -iv '\-\-|Name|Slabs'\ | awk '{print $1" "$4+$2*$3" "$4}' > "$out"` if [ $? -eq 0 ]; then do_slabs_plotting "$out" @@ -159,7 +159,7 @@ do_preprocess() let lines=3 out=`basename "$in"`"-slabs-by-size" `cat "$in" | grep -A "$lines" 'Slabs sorted by size' |\ - egrep -iv '\-\-|Name|Slabs'\ + grep -E -iv '\-\-|Name|Slabs'\ | awk '{print $1" "$4" "$4-$2*$3}' > "$out"` if [ $? -eq 0 ]; then do_slabs_plotting "$out" |